def test_join(transform): other = transform([1, 2, 3]) c = b.join(other, on_self=isodd, on_other=iseven) assert_eq(c, list(join(iseven, [1, 2, 3], isodd, list(b)))) assert_eq(b.join(other, isodd), list(join(isodd, [1, 2, 3], isodd, list(b)))) assert c.name == b.join(other, on_self=isodd, on_other=iseven).name
def test_join(): c = b.join([1, 2, 3], on_self=isodd, on_other=iseven) assert list(c) == list(join(iseven, [1, 2, 3], isodd, list(b))) assert (list(b.join([1, 2, 3], isodd)) == list(join(isodd, [1, 2, 3], isodd, list(b)))) assert c.name == b.join([1, 2, 3], on_self=isodd, on_other=iseven).name
def compute_up(t, lhs, rhs, **kwargs): """ Join Operation for Python Streaming Backend Note that a pure streaming Join is challenging/impossible because any row in one seq might connect to any row in the other, requiring simultaneous complete access. As a result this approach compromises and fully realizes the LEFT sequence while allowing the RIGHT sequence to stream. As a result Always put your bigger collection on the RIGHT side of the Join. """ if lhs == rhs: lhs, rhs = itertools.tee(lhs, 2) on_left = [t.lhs.fields.index(col) for col in listpack(t.on_left)] on_right = [t.rhs.fields.index(col) for col in listpack(t.on_right)] left_default = (None if t.how in ('right', 'outer') else toolz.itertoolz.no_default) right_default = (None if t.how in ('left', 'outer') else toolz.itertoolz.no_default) pairs = toolz.join(on_left, lhs, on_right, rhs, left_default=left_default, right_default=right_default) assemble = pair_assemble(t) return map(assemble, pairs)
def compute_up(t, lhs, rhs, **kwargs): """ Join Operation for Python Streaming Backend Note that a pure streaming Join is challenging/impossible because any row in one seq might connect to any row in the other, requiring simultaneous complete access. As a result this approach compromises and fully realizes the LEFT sequence while allowing the RIGHT sequence to stream. As a result Always put your bigger collection on the RIGHT side of the Join. """ if lhs == rhs: lhs, rhs = itertools.tee(lhs, 2) on_left = [t.lhs.fields.index(col) for col in listpack(t.on_left)] on_right = [t.rhs.fields.index(col) for col in listpack(t.on_right)] left_default = (None if t.how in ('right', 'outer') else toolz.itertoolz.no_default) right_default = (None if t.how in ('left', 'outer') else toolz.itertoolz.no_default) pairs = toolz.join(on_left, lhs, on_right, rhs, left_default=left_default, right_default=right_default) assemble = pair_assemble(t, on_left, on_right) return map(assemble, pairs)
def generate2(buckets1, buckets2): for x, y in join(first, generate1(buckets1), first, generate1(buckets2), left_default=None, right_default=None): if x is None: yield y[0], 0, y[1] elif y is None: yield x[0], x[1], 0 else: yield x[0], x[1], y[1]
def spacy_spanpostokenize(self, message: str, stopwords=[], pos_tag_type="simple"): postokens = self.spacy_postokenize(message, pos_tag_type) spantokens = self.span_tokenize(message, stopwords) # this is like a left join finaltokens = [] merged = t.join(0, spantokens, 0, postokens) for ((a, b), (c, d)) in merged: finaltokens.append((a, b, d)) return list(set(finaltokens))
def test_join(): assert list(b.join([1, 2, 3], on_self=isodd, on_other=iseven)) == \ list(join(iseven, [1, 2, 3], isodd, list(b))) assert list(b.join([1, 2, 3], isodd)) == \ list(join(isodd, [1, 2, 3], isodd, list(b)))
def test_join(): c = b.join([1, 2, 3], on_self=isodd, on_other=iseven) assert list(c) == list(join(iseven, [1, 2, 3], isodd, list(b))) assert list(b.join([1, 2, 3], isodd)) == \ list(join(isodd, [1, 2, 3], isodd, list(b))) assert c.name == b.join([1, 2, 3], on_self=isodd, on_other=iseven).name