def is_unordered_eq(pcollection, expect): if not isinstance(expect, ptype.PType): expect = pcollection._pipeline.parallelize(expect) pcollection = pcollection.map(lambda elem: (elem, None)) expect = expect.map(lambda elem: (elem, None)) return pcollection.cogroup(expect).apply_values(lambda v1, v2: v1.count() == v2.count()) \ .flatten_values() \ .apply(and_all)
def map_on(pcollection, field_extractor, fn): return pcollection.map(on(field_extractor, fn))
def convert_to(pcollection, to_type): return pcollection.map(lambda data: to_type(data))
def select(pcollection, *field_extractors): if len(field_extractors) == 1: return pcollection.map(lambda inp: field_extractors[0].get(inp)) return pcollection.map(lambda inp: map( lambda extractor: extractor.get(inp), field_extractors))
def select(pcollection, *field_extractors): if len(field_extractors) == 1: return pcollection.map(lambda inp: field_extractors[0].get(inp)) return pcollection.map(lambda inp: map(lambda extractor: extractor.get(inp), field_extractors))