def test_lazy_slicing(self): s = Stream() << iters.range(10) self.assertEqual(s.cursor(), 0) s_slice = s[:5] self.assertEqual(s.cursor(), 0) self.assertEqual(len(list(s_slice)), 5)
def test_origin_param(self): self.assertEqual([100], list(Stream(100))) self.assertEqual([1, 2, 3], list(Stream(1, 2, 3))) self.assertEqual( [1, 2, 3, 10, 20, 30], list(Stream(1, 2, 3) << [10, 20, 30]) )
def ref_with_vcf_dicts_strategy_factory(draw): ''' Generate vcf records for randomish locations along a randomishly generated reference sequence. Each vcf record generator will have a randomish sized "chunk" of the reference to use Returns (reference sequence(str), iterable(vcf dicts)) ''' seq = draw(st.text(alphabet='ACGT', min_size=10, max_size=20)) size = len(seq) # This gets you a list of numbers that are randomish and increasing ranges = draw( rolling_sum(1, 3, int(size / 2)).map( lambda xs: ifilter(lambda x: x < size, xs))) #.filter(_not(bool))) # Stream lets you re-use a generator without draining it. # Pairs will hold start/stop values for each part of sequence pairs = Stream() << partition(2, ranges) # POSs will contain the start position of each vcf row POSs = Stream() << imap(operator.itemgetter(0), pairs) # VCF files start at index 1; python starts at 0 pairs_offset_1 = imap(lambda x: (x[0] - 1, x[1] - 1), pairs) #grab the pieces of the reference to build our elts from chunks = map(lambda x: seq[x[0]:x[1]], pairs_offset_1) #random chromosome name chrom = draw(st.text(string.ascii_letters)) # Draw a new record for each of the Positions we have made vcfs = map(compose(draw, partial(vcf_dict_strategy_factory, chrom)), POSs, chunks) #TODO: ranges must be non-empty. Assuming vcfs for now. # vcfs can be a a generator #assume(len(vcfs) > 0) return (seq, vcfs)
def test_from_generator(self): def gen(): yield 1 yield 2 yield 3 s = Stream() << gen << (4, 5) assert list(s) == [1, 2, 3, 4, 5]
def test_fib_infinite_stream(self): from operator import add f = Stream() fib = f << [0, 1] << iters.map(add, f, iters.drop(1, f)) self.assertEqual([0,1,1,2,3,5,8,13,21,34], list(iters.take(10, fib))) self.assertEqual(6765, fib[20]) self.assertEqual([832040,1346269,2178309,3524578,5702887], list(fib[30:35])) # 35 elements should be already evaluated self.assertEqual(fib.cursor(), 35)
def combined_stats_over_time( label, runs, objective, worst, best, ): """ combine stats_over_time() vectors for multiple runs """ extract_fn = _.result.time combine_fn = min no_data = 999 by_run = [ stats_over_time(session, run, extract_fn, combine_fn, no_data) for run, session in runs ] max_len = max(list(map(len, by_run))) by_run_streams = [ Stream() << x << repeat(x[-1], max_len - len(x)) for x in by_run ] by_quanta = list(zip(*by_run_streams[:])) # TODO: Fix this, this variable should be configurable stats_quanta = 10 def get_data(value_function): final_values = [] for quanta, values in enumerate(by_quanta): sec = quanta * stats_quanta final_values.append([sec] + value_function(values)) return final_values mean_values = get_data(lambda values: [mean(values), stddev(values)]) def extract_percentiles(values): values = sorted(values) return ([values[int(round(p * (len(values) - 1)))] for p in PCTSTEPS] + [mean(values)]) percentile_values = get_data(extract_percentiles) return mean_values, percentile_values
def __iter__(self): s = reduce( lambda acc, el: acc << (el if isinstance(el, self.__class__) else [el]), takewhile(lambda el: el is not None, self.array), Stream()) return iter(s)
def __iter__(self): s = Stream() << self.root << self.tail return iter(s)
def test_lazy_slicing_recursive(self): s = Stream() << iters.range(10) sf = s[1:3][0:2] self.assertEqual(s.cursor(), 0) self.assertEqual(len(list(sf)), 2)
def combined_stats_over_time( self, output_dir, label, runs, objective, worst, best, ): """ combine stats_over_time() vectors for multiple runs """ #extract_fn = lambda dr: objective.stats_quality_score(dr.result, worst, best) extract_fn = _.result.run_time combine_fn = min no_data = 999 log.debug("writing stats for %s to %s", label, output_dir) by_run = [ self.stats_over_time(session, run, extract_fn, combine_fn, no_data) for run, session in runs ] max_len = max(map(len, by_run)) by_run_streams = [ Stream() << x << repeat(x[-1], max_len - len(x)) for x in by_run ] by_quanta = zip(*by_run_streams[:]) def data_file(suffix, headers, value_function): with open(os.path.join(output_dir, label + suffix), 'w') as fd: out = csv.writer(fd, delimiter=' ', lineterminator='\n') out.writerow(['#sec'] + headers) for quanta, values in enumerate(by_quanta): sec = quanta * self.args.stats_quanta out.writerow([sec] + value_function(values)) #data_file('_details.dat', # map(lambda x: 'run%d'%x, xrange(max_len)), # list) #self.gnuplot_file(output_dir, # label+'_details', # [('"'+label+'_details.dat"' # ' using 1:%d'%i + # ' with lines' # ' title "Run %d"'%i) # for i in xrange(max_len)]) data_file('_mean.dat', ['#sec', 'mean', 'stddev'], lambda values: [mean(values), stddev(values)]) self.gnuplot_file( output_dir, label + '_mean', ['"' + label + '_mean.dat" using 1:2 with lines title "Mean"']) def extract_percentiles(values): values = sorted(values) return ( [values[int(round(p * (len(values) - 1)))] for p in PCTSTEPS] + [mean(values)]) data_file("_percentiles.dat", PCTSTEPS + ['mean'], extract_percentiles) self.gnuplot_file( output_dir, label + '_percentiles', reversed([ '"' + label + '_percentiles.dat" using 1:2 with lines title "0%"', # '"" using 1:3 with lines title "5%"', '"" using 1:4 with lines title "10%"', # '"" using 1:5 with lines title "25%"', '"" using 1:6 with lines title "20%"', # '"" using 1:7 with lines title "35%"', '"" using 1:8 with lines title "30%"', # '"" using 1:9 with lines title "45%"', '"" using 1:10 with lines title "40%"', # '"" using 1:11 with lines title "55%"', '"" using 1:12 with lines title "50%"', # '"" using 1:13 with lines title "65%"', '"" using 1:14 with lines title "70%"', # '"" using 1:15 with lines title "75%"', '"" using 1:16 with lines title "80%"', # '"" using 1:17 with lines title "85%"', '"" using 1:18 with lines title "90%"', # '"" using 1:19 with lines title "95%"', '"' + label + '_percentiles.dat" using 1:20 with lines title "100%"', ]))
def test_origin_param_string(self): self.assertEqual(["stream"], list(Stream("stream")))
def test_from_iterator(self): s = Stream() << range(6) << [6, 7] self.assertEqual([0, 1, 2, 3, 4, 5, 6, 7], list(s))
def test_from_list(self): s = Stream() << [1, 2, 3, 4, 5] self.assertEqual([1, 2, 3, 4, 5], list(s)) self.assertEqual(2, s[1]) self.assertEqual([1, 2], list(s[0:2]))