Пример #1
0
    def test_lazy_slicing(self):
        s = Stream() << iters.range(10)
        self.assertEqual(s.cursor(), 0)

        s_slice = s[:5]
        self.assertEqual(s.cursor(), 0)
        self.assertEqual(len(list(s_slice)), 5)
Пример #2
0
    def test_lazy_slicing(self):
        s = Stream() << iters.range(10)
        self.assertEqual(s.cursor(), 0)

        s_slice = s[:5]
        self.assertEqual(s.cursor(), 0)
        self.assertEqual(len(list(s_slice)), 5)
Пример #3
0
 def test_origin_param(self):
     self.assertEqual([100], list(Stream(100)))
     self.assertEqual([1, 2, 3], list(Stream(1, 2, 3)))
     self.assertEqual(
         [1, 2, 3, 10, 20, 30],
         list(Stream(1, 2, 3) << [10, 20, 30])
     )
Пример #4
0
def ref_with_vcf_dicts_strategy_factory(draw):
    '''
    Generate vcf records for randomish locations along a randomishly generated
    reference sequence. Each vcf record generator will have a randomish sized
    "chunk" of the reference to use

    Returns (reference sequence(str), iterable(vcf dicts))
    '''
    seq = draw(st.text(alphabet='ACGT', min_size=10, max_size=20))
    size = len(seq)
    # This gets you a list of numbers that are randomish and increasing
    ranges = draw(
        rolling_sum(1, 3, int(size / 2)).map(
            lambda xs: ifilter(lambda x: x < size, xs)))  #.filter(_not(bool)))
    # Stream lets you re-use a generator without draining it.
    # Pairs will hold start/stop values for each part of sequence
    pairs = Stream() << partition(2, ranges)
    # POSs will contain the start position of each vcf row
    POSs = Stream() << imap(operator.itemgetter(0), pairs)
    # VCF files start at index 1; python starts at 0
    pairs_offset_1 = imap(lambda x: (x[0] - 1, x[1] - 1), pairs)
    #grab the pieces of the reference to build our elts from
    chunks = map(lambda x: seq[x[0]:x[1]], pairs_offset_1)
    #random chromosome name
    chrom = draw(st.text(string.ascii_letters))
    # Draw a new record for each of the Positions we have made
    vcfs = map(compose(draw, partial(vcf_dict_strategy_factory, chrom)), POSs,
               chunks)
    #TODO: ranges must be non-empty. Assuming vcfs for now.
    # vcfs can be a a generator
    #assume(len(vcfs) > 0)
    return (seq, vcfs)
Пример #5
0
    def test_from_generator(self):
        def gen():
            yield 1
            yield 2
            yield 3

        s = Stream() << gen << (4, 5)
        assert list(s) == [1, 2, 3, 4, 5]
Пример #6
0
    def test_fib_infinite_stream(self):
        from operator import add

        f = Stream()
        fib = f << [0, 1] << iters.map(add, f, iters.drop(1, f))

        self.assertEqual([0,1,1,2,3,5,8,13,21,34], list(iters.take(10, fib)))
        self.assertEqual(6765, fib[20])
        self.assertEqual([832040,1346269,2178309,3524578,5702887], list(fib[30:35]))
        # 35 elements should be already evaluated
        self.assertEqual(fib.cursor(), 35)
Пример #7
0
def combined_stats_over_time(
    label,
    runs,
    objective,
    worst,
    best,
):
    """
    combine stats_over_time() vectors for multiple runs
    """

    extract_fn = _.result.time
    combine_fn = min
    no_data = 999

    by_run = [
        stats_over_time(session, run, extract_fn, combine_fn, no_data)
        for run, session in runs
    ]
    max_len = max(list(map(len, by_run)))

    by_run_streams = [
        Stream() << x << repeat(x[-1], max_len - len(x)) for x in by_run
    ]
    by_quanta = list(zip(*by_run_streams[:]))

    # TODO: Fix this, this variable should be configurable
    stats_quanta = 10

    def get_data(value_function):
        final_values = []
        for quanta, values in enumerate(by_quanta):
            sec = quanta * stats_quanta
            final_values.append([sec] + value_function(values))
        return final_values

    mean_values = get_data(lambda values: [mean(values), stddev(values)])

    def extract_percentiles(values):
        values = sorted(values)
        return ([values[int(round(p * (len(values) - 1)))]
                 for p in PCTSTEPS] + [mean(values)])

    percentile_values = get_data(extract_percentiles)
    return mean_values, percentile_values
Пример #8
0
 def __iter__(self):
     s = reduce(
         lambda acc, el: acc <<
         (el if isinstance(el, self.__class__) else [el]),
         takewhile(lambda el: el is not None, self.array), Stream())
     return iter(s)
Пример #9
0
 def __iter__(self):
     s = Stream() << self.root << self.tail
     return iter(s)
Пример #10
0
    def test_lazy_slicing_recursive(self):
        s = Stream() << iters.range(10)
        sf = s[1:3][0:2]

        self.assertEqual(s.cursor(), 0)
        self.assertEqual(len(list(sf)), 2)
Пример #11
0
    def combined_stats_over_time(
        self,
        output_dir,
        label,
        runs,
        objective,
        worst,
        best,
    ):
        """
    combine stats_over_time() vectors for multiple runs
    """

        #extract_fn = lambda dr: objective.stats_quality_score(dr.result, worst, best)
        extract_fn = _.result.run_time
        combine_fn = min
        no_data = 999

        log.debug("writing stats for %s to %s", label, output_dir)
        by_run = [
            self.stats_over_time(session, run, extract_fn, combine_fn, no_data)
            for run, session in runs
        ]
        max_len = max(map(len, by_run))

        by_run_streams = [
            Stream() << x << repeat(x[-1], max_len - len(x)) for x in by_run
        ]
        by_quanta = zip(*by_run_streams[:])

        def data_file(suffix, headers, value_function):
            with open(os.path.join(output_dir, label + suffix), 'w') as fd:
                out = csv.writer(fd, delimiter=' ', lineterminator='\n')
                out.writerow(['#sec'] + headers)
                for quanta, values in enumerate(by_quanta):
                    sec = quanta * self.args.stats_quanta
                    out.writerow([sec] + value_function(values))

    #data_file('_details.dat',
    #          map(lambda x: 'run%d'%x, xrange(max_len)),
    #          list)
    #self.gnuplot_file(output_dir,
    #                  label+'_details',
    #                  [('"'+label+'_details.dat"'
    #                    ' using 1:%d'%i +
    #                    ' with lines'
    #                    ' title "Run %d"'%i)
    #                   for i in xrange(max_len)])

        data_file('_mean.dat', ['#sec', 'mean', 'stddev'],
                  lambda values: [mean(values), stddev(values)])
        self.gnuplot_file(
            output_dir, label + '_mean',
            ['"' + label + '_mean.dat" using 1:2 with lines title "Mean"'])

        def extract_percentiles(values):
            values = sorted(values)
            return (
                [values[int(round(p * (len(values) - 1)))]
                 for p in PCTSTEPS] + [mean(values)])

        data_file("_percentiles.dat", PCTSTEPS + ['mean'], extract_percentiles)
        self.gnuplot_file(
            output_dir,
            label + '_percentiles',
            reversed([
                '"' + label +
                '_percentiles.dat" using 1:2  with lines title "0%"',
                # '""                          using 1:3  with lines title "5%"',
                '""                          using 1:4  with lines title "10%"',
                # '""                          using 1:5  with lines title "25%"',
                '""                          using 1:6  with lines title "20%"',
                # '""                          using 1:7  with lines title "35%"',
                '""                          using 1:8  with lines title "30%"',
                # '""                          using 1:9  with lines title "45%"',
                '""                          using 1:10 with lines title "40%"',
                # '""                          using 1:11 with lines title "55%"',
                '""                          using 1:12 with lines title "50%"',
                # '""                          using 1:13 with lines title "65%"',
                '""                          using 1:14 with lines title "70%"',
                # '""                          using 1:15 with lines title "75%"',
                '""                          using 1:16 with lines title "80%"',
                # '""                          using 1:17 with lines title "85%"',
                '""                          using 1:18 with lines title "90%"',
                # '""                          using 1:19 with lines title "95%"',
                '"' + label +
                '_percentiles.dat" using 1:20 with lines title "100%"',
            ]))
Пример #12
0
 def test_origin_param_string(self):
     self.assertEqual(["stream"], list(Stream("stream")))
Пример #13
0
    def test_lazy_slicing_recursive(self):
        s = Stream() << iters.range(10)
        sf = s[1:3][0:2]

        self.assertEqual(s.cursor(), 0)
        self.assertEqual(len(list(sf)), 2)
Пример #14
0
 def test_from_iterator(self):
     s = Stream() << range(6) << [6, 7]
     self.assertEqual([0, 1, 2, 3, 4, 5, 6, 7], list(s))
Пример #15
0
 def test_from_list(self):
     s = Stream() << [1, 2, 3, 4, 5]
     self.assertEqual([1, 2, 3, 4, 5], list(s))
     self.assertEqual(2, s[1])
     self.assertEqual([1, 2], list(s[0:2]))