示例#1
0
    def test_header_only_input_one_output_file_with_header(self):
        rows = ReaderWriter()
        rows.writerow(u'a b'.split())

        m.split(rows, prefix='split.', chunk_size=1)

        self.assertEqual(u'a,b', header(u'split.0'))
示例#2
0
    def test_input_contain_zip_field_exception(self):
        csv_in = self.csv_header_a_b_c()
        csv_out_spec = ReaderWriter()
        csv_out_unspec = ReaderWriter()

        with self.assertRaises(m.DuplicateFieldError):
            m.unzip(csv_in, ['a'], csv_out_spec, csv_out_unspec, zip_field='a')
示例#3
0
    def test_header_only_input_one_output_file_with_header(self):
        rows = ReaderWriter()
        rows.writerow(u'a b'.split())

        m.split(rows, prefix='split.', chunk_size=1)

        self.assertEqual(u'a,b', header(u'split.0'))
示例#4
0
    def test_zip_id_defaults_to_id(self):
        csv_in = self.csv_header_a_b_c()
        csv_out_spec = ReaderWriter()
        csv_out_unspec = ReaderWriter()

        m.unzip(csv_in, ['a'], csv_out_spec, csv_out_unspec)

        self.assertListEqual(['id b c'.split()], csv_out_unspec.rows)
示例#5
0
    def test_output_header(self):
        reader = ReaderWriter()
        reader.rows = [('aa', 'bb', 'cc', 'dd')]
        writer = ReaderWriter()

        m.ExtractMap('b=bb,c=cc', 'a=id').process(reader, writer)

        self.assertEqual(
            [('aa', 'dd', 'id')],
            writer.rows)
示例#6
0
    def test_multiple_output_files_have_same_header(self):
        rows = ReaderWriter()
        rows.writerow(u'a b'.split())
        rows.writerow([1, 2])
        rows.writerow([3, 4])

        m.split(rows, prefix='split.', chunk_size=1)

        self.assertEqual(u'a,b', header(u'split.0'))
        self.assertEqual(u'a,b', header(u'split.1'))
示例#7
0
    def test_custom_zip_id_in_out_unspec(self):
        csv_in = self.csv_header_a_b_c()
        csv_out_spec = ReaderWriter()
        csv_out_unspec = ReaderWriter()

        m.unzip(
            csv_in, ['a'], csv_out_spec, csv_out_unspec,
            zip_field='zip_id')

        self.assertListEqual(['zip_id b c'.split()], csv_out_unspec.rows)
示例#8
0
    def test_ids_are_converted_to_string(self):
        rw = ReaderWriter()
        rw.rows = [('id', 'value'), ('1', 'one')]

        newmap = make_map('value', 'id')
        newmap.read(rw)

        self.assertEqual(
            {('one',): 1},
            newmap.values)
示例#9
0
    def test_less_data_rows_than_chunk_size_one_file_created(self):
        rows = ReaderWriter()
        rows.writerow(u'a b'.split())
        rows.writerow([1, 2])
        rows.writerow([3, 4])

        m.split(rows, prefix='split.', chunk_size=3)

        self.assertTrue(os.path.exists(u'split.0'))
        self.assertFalse(os.path.exists(u'split.1'))
示例#10
0
    def test_header_is_output_field_names(self):
        reader = ReaderWriter()
        reader.rows = [('a', 'b')]
        writer = ReaderWriter()

        t = m.Transformer()
        t.output_field_names = sentinel.output_field_names

        t.process(reader, writer)

        self.assertEqual([sentinel.output_field_names], writer.rows)
示例#11
0
    def test_content_is_produced_by_process(self):
        reader = ReaderWriter()
        reader.rows = [('a', 'b'), (1, 2), (1, 2)]
        writer = ReaderWriter()

        t = m.Transformer()
        t.transform = mock.Mock(t.transform, return_value=sentinel.output)

        t.process(reader, writer)

        self.assertEqual([sentinel.output, sentinel.output], writer.rows[1:])
示例#12
0
 def __init__(self):
     self.appender = ReaderWriter()
     self.mapper_reader = self._mapper_reader()
     self.mapper_appender = ReaderWriter()
     self.reader = self._reader()
     self.extractor = m.EntityExtractor(
         #                 in/output: ab_id    entity-mapper: id
         ref_field_map=FieldsMap.parse('id=ab_id'),
         #                 in/output: a, b     entity-mapper: a, other
         fields_map=FieldsMap.parse('a,other=b'),
         keep_fields=True)
示例#13
0
    def test_keep_fields(self):
        with open('map.csv', 'w') as f:
            f.write('id,a\n5,a')
        reader = ReaderWriter()
        reader.rows = [('a', 'b'), ('a', 'b'), ('c', 'd')]
        writer = ReaderWriter()

        m.extract_map(reader, writer, 'map.csv', 'a', 'id', keep_fields=True)

        self.assertEqual(
            [('a', 'b', 'id'), ('a', 'b', 5), ('c', 'd', 6)],
            writer.rows)
示例#14
0
    def test_existing_map_used(self):
        with open('map.csv', 'w') as f:
            f.write('id,a\n5,a')
        reader = ReaderWriter()
        reader.rows = [('a', 'b'), ('a', 'b'), ('c', 'd')]
        writer = ReaderWriter()

        m.extract_map(reader, writer, 'map.csv', 'a', 'id')

        self.assertEqual(
            sorted([('b', 5), ('d', 6)]),
            sorted(writer.rows[1:]))
示例#15
0
    def test_11_data_rows_chunk_size_1_11_files_created(self):
        rows = ReaderWriter()
        rows.writerow(u'a b'.split())
        for i in range(11):
            rows.writerow([i, i + 1])

        m.split(rows, prefix='split.', chunk_size=1)

        self.assertTrue(os.path.exists(u'split.0'))
        self.assertTrue(os.path.exists(u'split.1'))
        # ...
        self.assertTrue(os.path.exists(u'split.10'))
        self.assertFalse(os.path.exists(u'split.11'))
示例#16
0
    def test_output_file_contains_rows_from_input(self):
        rows = ReaderWriter()
        rows.writerow(u'a b'.split())
        rows.writerow([1, 2])
        rows.writerow([3, 4])

        m.split(rows, prefix='split.', chunk_size=2)

        with codecs.open('split.0', encoding='utf8') as f:
            self.assertEqual(
                [[u'a', u'b'],
                 [u'1', u'2'],
                 [u'3', u'4']],
                list(csv.reader(f)))
示例#17
0
    def test_changed_map_is_written_out(self):
        with open('map.csv', 'w') as f:
            f.write('id,a\n5,a')
        reader = ReaderWriter()
        reader.rows = [('a', 'b'), ('a', 'b'), ('c', 'd')]
        writer = ReaderWriter()

        m.extract_map(reader, writer, 'map.csv', 'a', 'id')

        with open('map.csv') as f:
            items = tuple(csv.reader(f))

        self.assertEqual(
            sorted((('id', 'a'), ('5', 'a'), ('6', 'c'))),
            map(tuple, sorted(items)))
示例#18
0
    def test_map_header(self):
        reader = ReaderWriter()
        reader.rows = [('aa', 'bb', 'cc', 'dd'), ]
        writer = ReaderWriter()

        extract_map = m.ExtractMap('b=bb,c=cc', 'a=id')
        extract_map.process(reader, writer)

        map_writer = ReaderWriter()
        extract_map.map.write(map_writer)

        # all fields renamed, id comes first - for sorting?
        self.assertEqual(
            [('a', 'b', 'c')],
            map_writer.rows)
示例#19
0
    def test(self):
        reader = ReaderWriter()
        reader.rows = [
            ('aa', 'bb', 'cc', 'dd'),
            (sentinel.aa1, sentinel.bb1, sentinel.cc1, sentinel.dd1),
            (sentinel.aa2, sentinel.bb2, sentinel.cc2, sentinel.dd2),
        ]
        writer = ReaderWriter()

        m.RemoveFields(['cc', 'aa']).process(reader, writer)

        self.assertEqual([
            ('bb', 'dd'),
            (sentinel.bb1, sentinel.dd1),
            (sentinel.bb2, sentinel.dd2),
        ], writer.rows)
示例#20
0
    def test_new_map_can_be_used(self):
        appender = ReaderWriter()
        mapper = m.Mapper.new('id', ['a', 'b'], appender=appender)

        mapped_id = mapper.map(('aa', 'bb'))
        self.assertEqual(1, mapped_id)
        self.assertEqual(2, len(appender.rows))
        self.assertListEqual([1, 'aa', 'bb'], appender.rows[1])
示例#21
0
    def test_output_values(self):
        reader = ReaderWriter()
        reader.rows = [
            ('aa', 'bb', 'cc', 'dd'),
            (sentinel.aa1, sentinel.bb1, sentinel.cc1, sentinel.dd1),
            (sentinel.aa2, sentinel.bb2, sentinel.cc2, sentinel.dd2),
            (sentinel.aa3, sentinel.bb1, sentinel.cc1, sentinel.dd3), ]
        writer = ReaderWriter()

        m.ExtractMap('b=bb,c=cc', 'a=id').process(reader, writer)

        self.assertEqual(
            sorted([
                (sentinel.aa1, sentinel.dd1, 0),
                (sentinel.aa2, sentinel.dd2, 1),
                (sentinel.aa3, sentinel.dd3, 0), ]),
            sorted(writer.rows[1:]))
示例#22
0
    def test_id_field_is_not_in_output(self):
        csv_in1 = csv_reader('a,b,id')
        csv_in2 = csv_reader('c,d,id')
        csv_out = ReaderWriter()

        m.csvzip(csv_in1, csv_in2, csv_out)

        self.assertNotIn('id', csv_out.rows[0])
示例#23
0
    def test(self):
        reader = ReaderWriter()
        reader.rows = [
            ('aa', 'bb', 'cc', 'dd'),
            (sentinel.aa1, sentinel.bb1, sentinel.cc1, sentinel.dd1),
            (sentinel.aa2, sentinel.bb2, sentinel.cc2, sentinel.dd2), ]
        writer = ReaderWriter()

        m.RemoveFields(['cc', 'aa']).process(reader, writer)

        self.assertEqual(
            [
                ('bb', 'dd'),
                (sentinel.bb1, sentinel.dd1),
                (sentinel.bb2, sentinel.dd2),
            ],
            writer.rows)
示例#24
0
    def test_map_new_value(self):
        reader = self.map_reader()
        appender = ReaderWriter()
        mapper = m.Mapper('id', ['a', 'b'], reader, appender)

        mapped_id = mapper.map(('aaa', 'bbb'))

        self.assertEqual(2, mapped_id)
        self.assertEqual(1, len(appender.rows))
        self.assertListEqual([2, 'aaa', 'bbb'], appender.rows[0])
示例#25
0
    def test_keep_fields(self):
        reader = ReaderWriter()
        reader.rows = [
            ('aa', 'bb', 'cc', 'dd'),
            (sentinel.aa1, sentinel.bb1, sentinel.cc1, sentinel.dd1),
            (sentinel.aa2, sentinel.bb2, sentinel.cc2, sentinel.dd2),
            (sentinel.aa3, sentinel.bb1, sentinel.cc1, sentinel.dd3), ]
        writer = ReaderWriter()

        em = m.ExtractMap('b=bb,c=cc', 'a=id', keep_fields=True)
        em.process(reader, writer)

        self.assertEqual(
            [
                ('aa', 'bb', 'cc', 'dd', 'id'),
                (sentinel.aa1, sentinel.bb1, sentinel.cc1, sentinel.dd1, 0),
                (sentinel.aa2, sentinel.bb2, sentinel.cc2, sentinel.dd2, 1),
                (sentinel.aa3, sentinel.bb1, sentinel.cc1, sentinel.dd3, 0), ],
            writer.rows)
示例#26
0
    def test_map_with_different_field_order_read_in_properly(self):
        reader = csv_reader('''\
            b,id,a
            b,1,a
            ''')
        appender = ReaderWriter()
        mapper = m.Mapper('id', ['a', 'b'], reader, appender)

        mapped_id = mapper.map(('a', 'b'))

        self.assertEqual(1, mapped_id)
示例#27
0
    def test_map_content(self):
        reader = ReaderWriter()
        reader.rows = [
            ('aa', 'bb', 'cc', 'dd'),
            (sentinel.aa1, sentinel.bb1, sentinel.cc1, sentinel.dd1),
            (sentinel.aa2, sentinel.bb2, sentinel.cc2, sentinel.dd2),
            (sentinel.aa3, sentinel.bb1, sentinel.cc1, sentinel.dd3), ]
        writer = ReaderWriter()

        extract_map = m.ExtractMap('b=bb,c=cc', 'a=id')
        extract_map.process(reader, writer)

        map_writer = ReaderWriter()
        extract_map.map.write(map_writer)

        # all fields renamed, id comes first - for sorting?
        self.assertEqual(
            sorted([
                (0, sentinel.bb1, sentinel.cc1),
                (1, sentinel.bb2, sentinel.cc2), ]),
            sorted(map_writer.rows[1:]))
示例#28
0
    def test_unsorted_map_with_gaps_works_correctly(self):
        reader = csv_reader('''\
            id,a,b
            5,aaa,bbb
            1,aa,bb
            ''')
        appender = ReaderWriter()
        mapper = m.Mapper('id', ['a', 'b'], reader, appender)

        mapped_id = mapper.map(('a3', 'b3'))

        self.assertEqual(6, mapped_id)
示例#29
0
    def test_map_with_different_field_order_is_written_properly(self):
        reader = csv_reader('''\
            b,id,a
            b,1,a
            ''')
        appender = ReaderWriter()
        mapper = m.Mapper('id', ['a', 'b'], reader, appender)

        mapped_id = mapper.map(('aa', 'bb'))

        self.assertEqual(2, mapped_id)
        self.assertListEqual([['bb', 2, 'aa']], appender.rows)
示例#30
0
    def test_out_spec(self):
        csv_in = ReaderWriter()
        csv_in.writerow('a  b  c'.split())
        csv_in.writerow('a1 b1 c1'.split())
        csv_in.writerow('a2 b2 c2'.split())
        csv_out_spec = ReaderWriter()
        csv_out_unspec = ReaderWriter()

        m.unzip(csv_in, ['a'], csv_out_spec, csv_out_unspec)

        self.assertListEqual(
            ['id a'.split(),
             '0  a1'.split(),
             '1  a2'.split()],
            csv_out_spec.rows)
示例#31
0
    def test_multiple_output_files_have_same_header(self):
        rows = ReaderWriter()
        rows.writerow(u'a b'.split())
        rows.writerow([1, 2])
        rows.writerow([3, 4])

        m.split(rows, prefix='split.', chunk_size=1)

        self.assertEqual(u'a,b', header(u'split.0'))
        self.assertEqual(u'a,b', header(u'split.1'))
示例#32
0
    def test_less_data_rows_than_chunk_size_one_file_created(self):
        rows = ReaderWriter()
        rows.writerow(u'a b'.split())
        rows.writerow([1, 2])
        rows.writerow([3, 4])

        m.split(rows, prefix='split.', chunk_size=3)

        self.assertTrue(os.path.exists(u'split.0'))
        self.assertFalse(os.path.exists(u'split.1'))
示例#33
0
    def test_output_file_contains_rows_from_input(self):
        rows = ReaderWriter()
        rows.writerow(u'a b'.split())
        rows.writerow([1, 2])
        rows.writerow([3, 4])

        m.split(rows, prefix='split.', chunk_size=2)

        with codecs.open('split.0', encoding='utf8') as f:
            self.assertEqual([[u'a', u'b'], [u'1', u'2'], [u'3', u'4']],
                             list(csv.reader(f)))
示例#34
0
    def test_mismatch_in_id_values_raises_error(self):
        csv_in1 = csv_reader('''\
            a,b,id
            a,b,1
            aa,bb,2''')
        csv_in2 = csv_reader('''\
            c,d,id
            c,d,1
            cc,dd,3''')
        csv_out = ReaderWriter()

        with self.assertRaises(m.IdMismatch):
            m.csvzip(csv_in1, csv_in2, csv_out)

        self.assertEqual(2, len(csv_out.rows))
        self.assertEqual('a b c d'.split(), csv_out.rows[0])
        self.assertEqual('a b c d'.split(), csv_out.rows[1])
示例#35
0
    def test_normal_case(self):
        csv_in1 = csv_reader('''\
            a,b,id
            a,b,1
            aa,bb,2''')
        csv_in2 = csv_reader('''\
            c,d,id
            c,d,1
            cc,dd,2''')
        csv_out = ReaderWriter()

        m.csvzip(csv_in1, csv_in2, csv_out)

        self.assertEqual(3, len(csv_out.rows))
        self.assertEqual('a b c d'.split(), csv_out.rows[0])
        self.assertEqual('a b c d'.split(), csv_out.rows[1])
        self.assertEqual('aa bb cc dd'.split(), csv_out.rows[2])
示例#36
0
    def test_keep_id_id_field_is_in_output(self):
        csv_in1 = csv_reader('''\
            a,b,id
            a,b,1
            aa,bb,2''')
        csv_in2 = csv_reader('''\
            c,d,id
            c,d,1
            cc,dd,2''')
        csv_out = ReaderWriter()

        m.csvzip(csv_in1, csv_in2, csv_out, keep_id=True)

        self.assertEqual(3, len(csv_out.rows))
        self.assertEqual('id a b c d'.split(), csv_out.rows[0])
        self.assertEqual('1 a b c d'.split(), csv_out.rows[1])
        self.assertEqual('2 aa bb cc dd'.split(), csv_out.rows[2])
示例#37
0
    def test_11_data_rows_chunk_size_1_11_files_created(self):
        rows = ReaderWriter()
        rows.writerow(u'a b'.split())
        for i in range(11):
            rows.writerow([i, i + 1])

        m.split(rows, prefix='split.', chunk_size=1)

        self.assertTrue(os.path.exists(u'split.0'))
        self.assertTrue(os.path.exists(u'split.1'))
        # ...
        self.assertTrue(os.path.exists(u'split.10'))
        self.assertFalse(os.path.exists(u'split.11'))
示例#38
0
 def csv_header_a_b_c(self):
     csv = ReaderWriter()
     csv.writerow('a b c'.split())
     return csv
示例#39
0
    def test_valuess_not_unique_dies(self):
        reader = ReaderWriter()
        reader.rows = [self.header, (1, 1, 1), (2, 1, 1)]
        map = make_map('aa,bb', 'id')

        self.assertRaises(Exception, lambda: map.read(reader))
示例#40
0
 def test_two_common_fields_zip_raises_error(self):
     csv_in1 = csv_reader('a,b')
     csv_in2 = csv_reader('a,b')
     csv_out = ReaderWriter()
     with self.assertRaises(m.BadInput):
         m.csvzip(csv_in1, csv_in2, csv_out)
示例#41
0
    def test_missing_value_field(self):
        reader = ReaderWriter()
        reader.rows = [('id', 'bb')]
        map = make_map('aa,bb', 'id')

        self.assertRaises(Exception, lambda: map.read(reader))
示例#42
0
    def test_calls_bind_before_transform(self):
        reader = ReaderWriter()
        reader.rows = [('a', 'b'), (1, 2)]
        writer = ReaderWriter()

        BindCheckerTransformer().process(reader, writer)
示例#43
0
    def test_new_creates_header(self):
        appender = ReaderWriter()
        m.Mapper.new('id', ['a', 'b'], appender=appender)

        self.assertEqual(1, len(appender.rows))
        self.assertListEqual(['id', 'a', 'b'], appender.rows[0])