Пример #1
0
    def test_split(self):
        self.assertTrue(self.w.is_valid)
        src = [('row_id', 'row_data')]
        sink1 = []
        sink2 = []

        cfg = gibbon.Configuration()
        cfg.add_configuration('src',
                              source=gibbon.SequenceWrapper,
                              iterable=src)
        cfg.add_configuration('tgt1',
                              target=gibbon.SequenceWrapper,
                              container=sink1)
        cfg.add_configuration('tgt2',
                              target=gibbon.SequenceWrapper,
                              container=sink2)

        executor = gibbon.get_async_executor(shutdown=True)
        self.w.prepare(cfg)
        self.w.run(executor)

        self.assertEqual(len(sink1), 1)
        self.assertEqual(len(sink2), 1)
        self.assertEqual(sink1[0], ('row_id', ))
        self.assertEqual(sink2[0], ('row_data', ))
Пример #2
0
    def test_concat_unbalanced(self):
        self.assertTrue(self.w.is_valid)

        src1 = [('row_id', ), ('another_id', )]
        src2 = [('row_data', )]
        sink = []

        cfg = gibbon.Configuration()
        cfg.add_configuration('src1',
                              source=gibbon.SequenceWrapper,
                              iterable=src1)
        cfg.add_configuration('src2',
                              source=gibbon.SequenceWrapper,
                              iterable=src2)
        cfg.add_configuration('tgt',
                              target=gibbon.SequenceWrapper,
                              container=sink)

        executor = gibbon.get_async_executor(shutdown=True)
        self.w.prepare(cfg)
        self.w.run(executor)

        self.assertTrue(len(sink))
        self.assertEqual(len(sink), 2)
        self.assertEqual(sink[0], ('row_id', 'row_data'))
        self.assertEqual(sink[1], ('another_id', ))
Пример #3
0
    def setUp(self):
        self.data = ['22', '333', '4444']
        self.wk = gibbon.Workflow('all_rows')
        self.wk.add_source('src')
        self.wk.add_transformation('filter',
                                   gibbon.Filter,
                                   source='src',
                                   condition=len_over_three)
        self.wk.add_target('tgt', source='filter')

        self.cfg = gibbon.Configuration()
Пример #4
0
    def setUp(self):
        self.data = ['Henry', 'Jane', 'Willy']

        self.w = gibbon.Workflow('test_enumerator')
        self.w.add_source('src')
        self.w.add_transformation('enum', gibbon.Enumerator, source='src', start_with=1, reset_after=1)
        self.w.add_target('tgt', source='enum')

        self.cfg = gibbon.Configuration()
        tuples = list(zip(self.data))
        self.cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=tuples)
        self.w.validate(verbose=True)
Пример #5
0
    def test_csv_source(self):
        self.assertTrue(self.w.is_valid)

        results = []
        cfg = gibbon.Configuration()
        cfg.add_configuration('csv',
                              source=gibbon.CSVSourceFile,
                              filename=self._filename)
        cfg.add_configuration('list',
                              target=gibbon.SequenceWrapper,
                              container=results)

        self.w.prepare(cfg)
        self.w.run(gibbon.get_async_executor(shutdown=True))
        self.assertTrue(len(results) > 0)
        self.assertIsInstance(results[0], tuple)
Пример #6
0
 def test_sum(self):
     cfg = gibbon.Configuration()
     data = list(zip([1, 2, 3]))
     sink = []
     cfg.add_configuration('src',
                           source=gibbon.SequenceWrapper,
                           iterable=data)
     cfg.add_configuration('tgt',
                           target=gibbon.SequenceWrapper,
                           container=sink)
     self.w.prepare(cfg)
     self.w.run(gibbon.get_async_executor(shutdown=True))
     self.assertSequenceEqual(sink, [(
         'sum:',
         6,
     )])
Пример #7
0
 def test_group_by(self):
     cfg = gibbon.Configuration()
     data = list(zip(['a', 'b', 'a', 'b'], [1, 2, 3, 0]))
     sink = []
     cfg.add_configuration('src',
                           source=gibbon.SequenceWrapper,
                           iterable=data)
     cfg.add_configuration('tgt',
                           target=gibbon.SequenceWrapper,
                           container=sink)
     self.w.prepare(cfg)
     self.w.run(gibbon.get_async_executor(shutdown=True))
     self.assertEqual(len(sink), 2)
     self.assertTrue('a' in dict(sink))
     self.assertTrue('b' in dict(sink))
     self.assertEqual(dict(sink)['a'], 4)
     self.assertEqual(dict(sink)['b'], 2)
Пример #8
0
    def setUp(self):
        self.data = [(0, ), (1, ), (-1, )]
        self.results = []
        self.wk_default_expr = gibbon.Workflow('default')
        self.wk_default_expr.add_source('src')
        self.wk_default_expr.add_transformation('expression',
                                                gibbon.Expression,
                                                source='src')
        self.wk_default_expr.add_target('tgt', source='expression')

        self.wk_compute_expr = gibbon.Workflow('compute')
        self.wk_compute_expr.add_source('src')
        self.wk_compute_expr.add_transformation('expression',
                                                gibbon.Expression,
                                                source='src',
                                                func=compute)
        self.wk_compute_expr.add_target('tgt', source='expression')

        self.cfg = gibbon.Configuration()
Пример #9
0
    def setUp(self):
        self.data = [(0, ), (1, ), (-1, )]
        self.results = []
        self.wk_all_rows = gibbon.Workflow('all_rows')
        self.wk_all_rows.add_source('src')
        self.wk_all_rows.add_transformation('filter',
                                            gibbon.Filter,
                                            source='src')
        self.wk_all_rows.add_target('tgt', source='filter')

        self.wk_only_pos = gibbon.Workflow('only_positive')
        self.wk_only_pos.add_source('src')
        self.wk_only_pos.add_transformation('filter',
                                            gibbon.Filter,
                                            source='src',
                                            condition=self.only_positive)
        self.wk_only_pos.add_target('tgt', source='filter')

        self.cfg = gibbon.Configuration()
Пример #10
0
    def test_csv_target(self):
        self.assertTrue(self.w.is_valid)

        input_data = [
            ('Brian', 23),
            ('Joe', 'ERROR'),
            ('Mary', 40),
            ('Alice', 25),
            ('Billy', 15),
        ]
        cfg = gibbon.Configuration()
        cfg.add_configuration('src',
                              source=gibbon.SequenceWrapper,
                              iterable=input_data)
        cfg.add_configuration('csv',
                              target=gibbon.CSVTargetFile,
                              filename=self._filename)

        self.w.prepare(cfg)
        self.w.run(gibbon.get_async_executor(shutdown=True))
        self.assertFileContent()
Пример #11
0
    def setUp(self):
        self.data = [(0, ), (1, ), (-1, )]
        self.results = []
        self.wk_sort_asc = gibbon.Workflow('sort_asc')
        self.wk_sort_asc.add_source('src')
        self.wk_sort_asc.add_transformation('sort_asc',
                                            gibbon.Sorter,
                                            source='src',
                                            key=lambda r: r[0])
        self.wk_sort_asc.add_target('tgt', source='sort_asc')

        self.wk_sort_desc = gibbon.Workflow('sort_desc')
        self.wk_sort_desc.add_source('src')
        self.wk_sort_desc.add_transformation('sort_desc',
                                             gibbon.Sorter,
                                             source='src',
                                             key=lambda r: r[0],
                                             reverse=True)
        self.wk_sort_desc.add_target('tgt', source='sort_desc')

        self.cfg = gibbon.Configuration()
Пример #12
0
    def test_union(self):
        self.w.validate()
        self.assertTrue(self.w.is_valid)

        data_src_1 = list(zip(['a', 'b', 'c'], [1, 2, 3]))
        data_src_2 = list(zip(['e', 'f', 'g'], [4, 5, 6]))
        sink = []

        cfg = gibbon.Configuration()
        cfg.add_configuration('src1', source=gibbon.SequenceWrapper, iterable=data_src_1)
        cfg.add_configuration('src2', source=gibbon.SequenceWrapper, iterable=data_src_2)
        cfg.add_configuration('tgt', target=gibbon.SequenceWrapper, container=sink)

        self.w.prepare(cfg)
        self.w.run(gibbon.get_async_executor(shutdown=True))

        self.assertGreater(len(sink), 0)
        self.assertEqual(len(sink), len(data_src_1)+len(data_src_2))
        dict_for_assert = dict(sink)
        ref_for_assert = dict(data_src_1+data_src_2)
        self.assertSequenceEqual(sorted(list(dict_for_assert.keys())), sorted(list(ref_for_assert.keys())))
        self.assertSequenceEqual(sorted(list(dict_for_assert.values())), sorted(list(ref_for_assert.values())))
Пример #13
0
    def setUp(self):
        self.data = [(0, ), (1, ), (-1, )]

        self.wk_sel_bin = gibbon.Workflow('binary_selector')
        self.wk_sel_bin.add_source('src')

        conditions = (is_positive_or_zero, is_negative)
        self.wk_sel_bin.add_transformation('sel',
                                           gibbon.Selector,
                                           source='src',
                                           conditions=conditions)
        self.wk_sel_bin.add_target('tgt1', source='sel')
        self.wk_sel_bin.add_target('tgt2', source='sel')

        self.wk_sel_mul = gibbon.Workflow('multiple_selector')
        self.wk_sel_mul.add_source('src')

        conditions = (is_positive, is_negative, is_zero)
        self.wk_sel_mul.add_transformation('sel',
                                           gibbon.Selector,
                                           source='src',
                                           conditions=conditions)
        self.wk_sel_mul.add_target('tgt1', source='sel')
        self.wk_sel_mul.add_target('tgt2', source='sel')
        self.wk_sel_mul.add_target('tgt3', source='sel')

        conditions = (is_positive_or_zero, is_zero)
        self.wk_sel_shc = gibbon.Workflow('short_circuit_selector')
        self.wk_sel_shc.add_source('src')
        self.wk_sel_shc.add_transformation('sel',
                                           gibbon.Selector,
                                           source='src',
                                           conditions=conditions)
        self.wk_sel_shc.add_target('tgt1', source='sel')
        self.wk_sel_shc.add_target('tgt2', source='sel')

        conditions = (is_positive, is_negative)
        self.wk_sel_def = gibbon.Workflow('default_selector')
        self.wk_sel_def.add_source('src')
        self.wk_sel_def.add_transformation('sel',
                                           gibbon.Selector,
                                           source='src',
                                           conditions=conditions)
        self.wk_sel_def.add_target('tgt1', source='sel')
        self.wk_sel_def.add_target('tgt2', source='sel')

        conditions = (is_positive, is_negative)
        self.wk_sel_osp1 = gibbon.Workflow('out_port_specified')
        self.wk_sel_osp1.add_source('src')
        self.wk_sel_osp1.add_transformation('sel',
                                            gibbon.Selector,
                                            out_ports=1,
                                            source='src',
                                            conditions=conditions)
        self.wk_sel_osp1.add_target('tgt1', source='sel')
        self.wk_sel_osp1.add_target('tgt2', source='sel')

        conditions = (is_positive, is_negative)
        self.wk_sel_osp2 = gibbon.Workflow('no_out_ports_specified')
        self.wk_sel_osp2.add_source('src')
        self.wk_sel_osp2.add_transformation('sel',
                                            gibbon.Selector,
                                            source='src',
                                            conditions=conditions)
        self.wk_sel_osp2.add_target('tgt1', source='sel')
        self.wk_sel_osp2.add_target('tgt2', source='sel')

        self.wk_sel_wd = gibbon.Workflow('with_default')
        self.wk_sel_wd.add_source('src')
        self.wk_sel_wd.add_transformation('sel',
                                          gibbon.Selector,
                                          source='src',
                                          conditions=conditions)
        self.wk_sel_wd.add_target('tgt1', source='sel')
        self.wk_sel_wd.add_target('tgt2', source='sel')

        self.cfg = gibbon.Configuration()