Exemplo n.º 1
0
def append_files_to_dataset(files, dataset):

    header_map, schema = crp.build_new_schema(files, dataset.schema)
    template = {k: None for k in schema}

    for f in files:
        with open(f, "rb") as handle:
            for row in csv.DictReader(handle, delimiter=";"):

                instance = copy.copy(template)
                for k, v in row.iteritems():
                    instance[header_map[k]] = unicode(v, "latin-1")

                dataset.write_row(instance)
    def test_build_new_schema(self, mock_open, mock_csv):
        existing_schema = ["attr1"]

        files = [None, None]

        mock_csv.DictReader = mock.Mock()
        mock_reader = mock.Mock()
        mock_csv.DictReader.return_value = mock_reader
        mock_reader.fieldnames = ["attr1", "attr2"]

        header_map, schema = crp.build_new_schema(files, existing_schema)

        self.assertEqual(mock_open.call_count, 2)
        self.assertEqual(header_map, {"attr1": "attr1", "attr2": "attr2"})
        self.assertEqual(schema, ["attr1", "attr2"])