Exemplo n.º 1
0
def test_to():
    assert Stream.range(5, 8).to_array() == Array([5, 6, 7])
    assert Stream([(1, 2), (3, 4)]).to_dict() == {1: 2, 3: 4}
    assert Stream.range(5, 8).to_set() == set([5, 6, 7])

    assert list(Stream.range(5, 8)) == [5, 6, 7]
    assert Stream.range(5, 8).to_series().equals(pd.Series([5, 6, 7]))
Exemplo n.º 2
0
def test_filtering():
    def is_even(n):
        return n % 2 == 0

    assert Stream.range(5, 10).filter(is_even).to_list() == [6, 8]
    assert Stream.range(5, 10).filter_false(is_even).to_list() == [5, 7, 9]

    assert Stream.range(5, 10).without(6, 8).to_list() == [5, 7, 9]
Exemplo n.º 3
0
def ndcg_score_of_prediction(y_truth, y_pred):
    pred_groups_stm = (Stream(y_pred).chunk(20).map(lambda row: row.to_list()))

    test_y_groups_stm = (
        Stream(y_truth).chunk(20).map(lambda row: row.to_list()))

    return (
        pred_groups_stm.zip(test_y_groups_stm).starmap(lambda pred_l, y_l: [
            y_e for pred_e, y_e in sorted(zip(pred_l, y_l), reverse=True)
        ]).map(lambda l: ndcg_at_k(l)).mean())
Exemplo n.º 4
0
def test_slice():
    assert Stream.range(5, 10).take(3).to_list() == [5, 6, 7]
    assert Stream.range(5, 10).drop(3).to_list() == [8, 9]

    def less_than_3(n):
        return n < 3

    assert Stream.range(0, 5).takewhile(less_than_3).to_list() == [0, 1, 2]

    assert Stream.range(0, 5).dropwhile(less_than_3).to_list() == [3, 4]
Exemplo n.º 5
0
def test_groupby():
    assert (Stream.range(10).group_by_as_stream(lambda n: n // 3).starmap(
        lambda k, vs: (k, list(vs))).to_list() == [(0, [0, 1, 2]),
                                                   (1, [3, 4, 5]),
                                                   (2, [6, 7, 8]), (3, [9])])

    assert Stream.range(10).group_by_as_map(lambda n: n // 3) == {
        0: Array([0, 1, 2]),
        1: Array([3, 4, 5]),
        2: Array([6, 7, 8]),
        3: Array([9])
    }
Exemplo n.º 6
0
def test_map():
    def multiply_2(n):
        return n * 2

    assert Stream.range(10).map(multiply_2).to_list() == list(range(0, 20, 2))

    def duplicate(v):
        return [v] * 2

    assert Stream.range(3).flat_map(duplicate).to_list() == [0, 0, 1, 1, 2, 2]

    assert Stream([(5, 1), (3, 4)]).flatten().to_list() == [5, 1, 3, 4]
    assert Stream.range(10).len() == 10
    assert Stream([(5, 1), (3, 4)]).flatten().len() == 4
Exemplo n.º 7
0
def test_split():
    def divisible_by_three(n):
        return n % 3 == 0

    assert Stream.range(10).split_before(divisible_by_three).to_list() == [
        Array([0, 1, 2]),
        Array([3, 4, 5]),
        Array([6, 7, 8]),
        Array([9])
    ]
    assert Stream.range(10).split_after(divisible_by_three).to_list() == [
        Array([0]),
        Array([1, 2, 3]),
        Array([4, 5, 6]),
        Array([7, 8, 9])
    ]
Exemplo n.º 8
0
def test_pluck():
    assert Stream([{
        'a': 1,
        'b': 2
    }, {
        'a': 4,
        'b': 5
    }]).pluck('a').to_list() == [1, 4]
    assert Stream([{
        'a': 1,
        'b': 2
    }, {
        'b': 3
    }, {
        'a': 4,
        'b': 5
    }]).pluck_opt('a').to_list() == [Some(1), Nothing,
                                     Some(4)]

    people = [Person('Johnny', 18), Person('Amy', 15)]
    assert Stream(people).pluck_attr('name').to_list() == ['Johnny', 'Amy']
Exemplo n.º 9
0
def test_reduce():
    assert Stream.range(5, 8).reduce(lambda a, b: a + b) == 18
    assert Stream.range(5, 8).fold_left(lambda a, b: a + b, 2) == 20
    assert Stream.range(5, 8).mean() == 6
    assert Stream.range(5, 8).sum() == 18
    assert Stream.range(5, 8).accumulate().to_list() == [5, 11, 18]
    assert Stream.range(5, 8).accumulate(op.mul).to_list() == [5, 30, 210]
Exemplo n.º 10
0
def test_zip():
    assert Stream.range(5, 8).zip(itt.count(1)).to_list() == [(5, 1), (6, 2),
                                                              (7, 3)]
    assert Stream.range(5, 8).zip(itt.count(1)).to_list() == [(5, 1), (6, 2),
                                                              (7, 3)]

    assert Stream.range(5, 8).zip_longest(range(1, 5)).to_list() == [(5, 1),
                                                                     (6, 2),
                                                                     (7, 3),
                                                                     (None, 4)]

    assert (Stream.range(5,
                         8).zip_longest(range(1, 5),
                                        fillvalue=100).to_list() == [(5, 1),
                                                                     (6, 2),
                                                                     (7, 3),
                                                                     (100, 4)])

    assert Stream.range(5, 8).zip_prev().to_list() == [
        CurrPrev(5, None), CurrPrev(6, 5),
        CurrPrev(7, 6)
    ]

    assert Stream.range(5, 8).zip_next().to_list() == [
        CurrNext(5, 6), CurrNext(6, 7),
        CurrNext(7, None)
    ]

    assert Stream.range(5, 8).zip_prev(99).to_list() == [
        CurrPrev(5, 99), CurrPrev(6, 5),
        CurrPrev(7, 6)
    ]

    assert Stream.range(5, 8).zip_next(99).to_list() == [
        CurrNext(5, 6), CurrNext(6, 7),
        CurrNext(7, 99)
    ]

    assert Stream.range(5, 8).zip_index().to_list() == [
        ValueIndex(5, 0), ValueIndex(6, 1),
        ValueIndex(7, 2)
    ]
Exemplo n.º 11
0
    def handle(self):
        self.email_id_mappings = (Stream(
            self.argument('email_id_mappings')).map(lambda s: Row.from_values(
                s.split(':', 1), fields=('email', 'trello_id'))).to_list())

        first_updated_min = (Optional.from_value(
            self.option('first-updated-min')).map(pdl.parse).get_or_none())

        self.sync(first_updated_min)

        now = pdl.now(tz='Asia/Taipei')
        now = now.set(second=0, microsecond=0)
        last_minute = now.subtract(minutes=10)
        while True:
            last_minute = last_minute.add(minutes=1)
            self.sync(last_minute)
            time.sleep(60)
Exemplo n.º 12
0
def test_general_case(ipsum, capsys):
    out = (
        Stream(ipsum.splitlines()).flat_map(lambda line: line.split(' ')).map(
            lambda word: word.strip(',.')).filter(lambda word: len(word) > 0).
        distincted().sorted(key=lambda word: len(word)).group_by_as_stream(
            lambda word: len(word)).map(lambda keyvalues: keyvalues.transform(
                values=lambda stream: stream.to_array()))
        .map(lambda keyvalues:
             Row(length=keyvalues.key, count=keyvalues.values.len())).tap(
                 tag='length count', n=2).nlargest(
                     3,
                     key=lambda row: row.count).pluck_attr('length').to_list())
    assert out == [6, 7, 8]
    captured = capsys.readouterr()
    assert captured.out == '''length count:0: Row(length=1, count=2)
length count:1: Row(length=2, count=10)
'''
    assert captured.err == ''
Exemplo n.º 13
0
def get_tfidf(joblist, tokens):
    if not tokens:
        return [
            tuple([job] + [0.0 for field in Job.tfidf_fields])
            for job in joblist
        ]

    joined_tokens = ' '.join(tokens)
    joblist = joblist

    q_multi_match = Q('multi_match',
                      query=joined_tokens,
                      type='most_fields',
                      fields=Job.tfidf_fields,
                      analyzer='whitespace')
    q_ids = Q('ids', values=joblist)
    q_overall = Q('bool', must=q_multi_match, filter=q_ids)
    client = connections.create_connection(hosts=['localhost:9201'],
                                           timeout=20)

    search = Search(using=client,
                    index='hack104').query(q_overall).extra(explain=True)

    hits = (h for h in search.execute().hits.hits)

    def ugly_extract_fieldname(description):
        return description[7:description.find(':')]

    def extract_fields_scores(expl):
        return (Stream(
            expl['_explanation']['details'][0]['details']).map(lambda d: Row(
                field=ugly_extract_fieldname(d['details'][0]['description']),
                score=d['value'])).to_map())

    scores_of_job = (Stream(hits).map(extract_fields_scores)).to_list()

    return [
        tuple([job] + [scores.get(field, 0.0) for field in Job.tfidf_fields])
        for job, scores in zip(joblist, scores_of_job)
    ]
Exemplo n.º 14
0
def test_extend():
    assert Stream.range(5, 8).extended([1, 3]).to_list() == [5, 6, 7, 1, 3]
    assert Stream.range(5, 8).appended(5).to_list() == [5, 6, 7, 5]
Exemplo n.º 15
0
def ndcg_score_of_truth(y_truth):
    from carriage import Stream
    return (Stream(y_truth).chunk(20).map(lambda row: row.to_list()).map(
        lambda l: ndcg_at_k(l)).mean())
Exemplo n.º 16
0
def test_access():
    assert Stream.range(5, 10).get(3) == 8
    assert Stream.range(5, 10).get(5, 0) == 0
    assert Stream.range(5, 10).get(7, 0) == 0
    assert Stream.range(5, 10).get(6) is None
    with pytest.raises(ValueError):
        assert Stream.range(5, 10).get(-1) is None
    assert Stream.range(5, 10)[3] == 8

    with pytest.raises(ValueError):
        assert Stream(iter(range(5, 10)))[-1]
    with pytest.raises(IndexError):
        assert Stream.range(5, 10)[5]

    assert Stream.range(5, 10).get_opt(3) == Some(8)
    assert Stream.range(5, 10).get_opt(5) is Nothing
    with pytest.raises(ValueError):
        assert Stream(iter(range(5, 10))).get_opt(-1) is Nothing

    assert Stream.range(5, 10).first() == 5
    assert Stream.range(5, 10).first_opt() == Some(5)
    assert Stream([5]).first_opt() == Some(5)
    assert Stream([]).first_opt() is Nothing
    assert Stream(iter([])).first_opt() is Nothing
Exemplo n.º 17
0
def test_init():
    assert Stream([1, 2, 3]).to_list() == [1, 2, 3]
    assert Stream(range(10)).to_list() == list(range(10))
    assert Stream.range(10).to_list() == list(range(10))
Exemplo n.º 18
0
def test_tuple_as_row():
    strm = Stream([(1, 2), (3, 4)])
    rows = strm.tuple_as_row(['x', 'y']).to_list()
    assert rows == [Row(x=1, y=2), Row(x=3, y=4)]
Exemplo n.º 19
0
def test_counter():
    assert Stream([3, 1, 1, 3, 5, 4, 9, 3, 1, 5, 3]).value_counts() == Counter(
        [3, 1, 1, 3, 5, 4, 9, 3, 1, 5, 3])
Exemplo n.º 20
0
def test_sliding_window():
    assert Stream.range(5, 10).sliding_window(3).to_list() == [(5, 6, 7),
                                                               (6, 7, 8),
                                                               (7, 8, 9)]
Exemplo n.º 21
0
 def extract_fields_scores(expl):
     return (Stream(
         expl['_explanation']['details'][0]['details']).map(lambda d: Row(
             field=ugly_extract_fieldname(d['details'][0]['description']),
             score=d['value'])).to_map())
Exemplo n.º 22
0
 def tlist_of_name_opt(self, name):
     return Stream(self.tlists).find_opt(lambda tlist: name in tlist.name)
Exemplo n.º 23
0
def test_interpose():
    assert Stream.range(5, 9).interpose(0).to_list() == [5, 0, 6, 0, 7, 0, 8]
    assert Stream([]).interpose(0).to_list() == []
Exemplo n.º 24
0
def test_ordering():
    assert Stream.range(5, 8).reversed().to_list() == [7, 6, 5]
    assert Stream([1, 3, 2, 4]).sorted().to_list() == [1, 2, 3, 4]
    assert Stream([1, 3, 2, 4]).sorted(lambda x: -x).to_list() == [4, 3, 2, 1]
    assert Stream([1, 3, 2,
                   4]).sorted(lambda x: x % 2).to_list() == [2, 4, 1, 3]
Exemplo n.º 25
0
def test_distincted():
    assert Stream([1, 5, 1, 3, 3, 5, 6]).distincted().to_list() == [1, 5, 3, 6]
Exemplo n.º 26
0
def tokenize(text):
    if text is None:
        return None

    return (Stream(jieba.cut_for_search(
        openCC.convert(text))).filter(X != ' ').to_list())