def test_error(self): producer = Iterable(['a', 'b', 'c']) flt = Filter(lambda x: int(x) > 1) tolist = AsList() producer.add_child(flt) flt.add_child(tolist) with self.assertRaises(ValueError): producer.run()
def test_iter_filter_list(self): producer = Iterable([1, 2, 3]) flt = Filter(lambda x: x > 1) tolist = AsList() producer.add_child(flt) flt.add_child(tolist) producer.run() self.assertEqual(tolist.list, [2, 3])
def test_iter_filter_map_list(self): producer = Iterable([1, 2, 3]) flt = Filter(lambda x: x > 1) map = Map(lambda x: x + 10) tolist = AsList() producer.add_child(flt) flt.add_child(map) map.add_child(tolist) producer.run() self.assertEqual(tolist.list, [12, 13])
def test_two_lists(self): producer = Iterable([1, 2, 3]) flt = Filter(lambda x: x > 1) l1 = AsList() l2 = AsList() producer.add_child(l1) producer.add_child(flt) flt.add_child(l2) producer.run() self.assertEqual(l1.list, [1, 2, 3]) self.assertEqual(l2.list, [2, 3])
def test_query_search_for_leaf_on_two_branches(self): workflow = Iterable(range(10)) branch1 = workflow | Map(lambda x: x + 1) | StdOut() branch2 = workflow | Filter(lambda x: x > 5) | Assert( self, [6, 7, 8, 9]) self.assertEqual( workflow.query("Iterable/Map/StdOut").name(), "StdOut") self.assertEqual( workflow.query("Iterable/Filter/Assert").name(), "Assert")
def test_leafs_2_breanches_balanced(self): workflow = Iterable(range(1000)) branch1 = workflow | Map(lambda x: x + 1) branch2 = workflow | Filter(lambda x: x > 500) self.assertEqual([n.name() for n in workflow.leafs()], ["Map", "Filter"])
def test_dinasty_third_level_2_two_branches(self): workflow = Iterable(range(1000)) branch1 = workflow | Map(lambda x: x + 1) | StdOut() branch2 = workflow | Filter(lambda x: x > 500) | StdOut() self.assertEqual(branch1.dinasty(), "Iterable/Map/StdOut") self.assertEqual(branch2.dinasty(), "Iterable/Filter/StdOut")
from pypelines import Map, Filter, Sum, StdOut from pypelines.io import HTTPClient workflow = HTTPClient('http://www.gutenberg.org/cache/epub/1232/pg1232.txt', readlines=True) | Filter(lambda line: line != "") | Map( lambda line: line.split(' ')) | Map( lambda words: len(words)) | Sum() | StdOut() workflow.run()
from pypelines import Map, Filter, Sum, StdOut, CountByKey, FlatMap, Sort, Head from pypelines.io import HTTPClient, TextFile workflow = HTTPClient('http://www.gutenberg.org/cache/epub/1232/pg1232.txt', readlines=True) | Filter(lambda line: line != "") savefile = workflow | TextFile("macchiavelli.txt") wordcount = workflow | Map(lambda line: line.split(' ')) | Map( lambda words: len(words)) | Sum() | StdOut() histogram = workflow | FlatMap(lambda line: line.split(' ')) | Filter( lambda word: word != "") | Map(lambda word: (word, 1)) | CountByKey() | Sort( key_func=lambda data: data[1], reverse=True) | Head(10) | StdOut() workflow.run()