def data(args): from colander import Invalid from openspending.validation.data import convert_types return_code = 0 model = _validate_model(args.model) if model is None: return 1 try: fh = open(args.csv_file, 'rb') for line in UnicodeDictReader(fh): try: convert_types(model['mapping'], line) except Invalid as errors: return_code = 1 for error in errors.children: value = error.value if value and len(value) > 70: value = value[:66] + ' ...' message = "[Column '%s' -> Attribute '%s' " \ "(%s)]\n\t%s\n\t(Value: %s)\n" % ( error.column, error.node.name, error.datatype, error.msg, value) print message.encode('utf-8') except Exception as ex: print unicode(ex).encode('utf-8') return 1 if not return_code: print "OK: data validates for the model." return return_code
def process_line(self, line): if self.row_number % 1000 == 0: log.info('Imported %s lines' % self.row_number) try: data = convert_types(self.dataset.mapping.get('mapping', {}), line) if not self.dry_run: self.source.model.load(data) else: # Check uniqueness unique_value = ', '.join([unicode(data[k]) for k in self.key]) if unique_value in self.unique_check: # Log the error (with the unique key represented as # a dictionary) self.log_exception( ValueError("Unique key constraint not met"), error="%s is not a unique key" % unique_value) self.unique_check[unique_value] = True except Invalid as invalid: for child in invalid.children: self.log_invalid_data(child) if self.raise_errors: raise except Exception as ex: self.log_exception(ex) if self.raise_errors: raise
def load_dataset(dataset): simple_model = model_fixture('simple') data = data_fixture('simple') reader = csv.DictReader(data) for row in reader: row = convert_types(simple_model['mapping'], row) dataset.model.load(row) data.close()
def load_dataset(dataset): from StringIO import StringIO import csv from openspending.validation.data import convert_types reader = csv.DictReader(StringIO(TEST_DATA)) for row in reader: row = convert_types(SIMPLE_MODEL['mapping'], row) dataset.load(row)
def load_dataset(dataset): simple_model = model_fixture('simple') data = data_fixture('simple') reader = csv.DictReader(data) for row in reader: row = convert_types(simple_model['mapping'], row) dataset.load(row) data.close()
def test_convert_dates_custom_format(self): mapping = { "foo": {"column": "foo", "format": "%d.%m.%Y", "datatype": "date"} } row = {"foo": "7.5.2010"} out = convert_types(mapping, row) assert out['foo']==datetime.date(2010, 5, 7)
def test_convert_types_value(self): mapping = { "foo": {"column": "foo", "datatype": "string"} } row = {"foo": "bar"} out = convert_types(mapping, row) assert isinstance(out, dict), out assert 'foo' in out, out assert out['foo']=='bar'
def test_convert_types_casting(self): mapping = { "foo": {"column": "foo", "datatype": "float"} } row = {"foo": "5.0"} out = convert_types(mapping, row) assert isinstance(out, dict), out assert 'foo' in out, out assert out['foo']==5.0
def test_convert_dates(self): mapping = { "foo": {"column": "foo", "datatype": "date"} } row = {"foo": "2010"} out = convert_types(mapping, row) assert out['foo']==datetime.date(2010, 1, 1) row = {"foo": "2010-02"} out = convert_types(mapping, row) assert out['foo']==datetime.date(2010, 2, 1) row = {"foo": "2010-02-03"} out = convert_types(mapping, row) assert out['foo']==datetime.date(2010, 2, 3) row = {"foo": "2010-02-03Z"} out = convert_types(mapping, row) assert out['foo']==datetime.date(2010, 2, 3)
def test_convert_types_compound(self): mapping = { "foo": {"attributes": { "name": {"column": "foo_name", "datatype": "string"}, "label": {"column": "foo_label", "datatype": "string"} } } } row = {"foo_name": "bar", "foo_label": "qux"} out = convert_types(mapping, row) assert isinstance(out, dict), out assert 'foo' in out, out assert isinstance(out['foo'], dict), out assert out['foo']['name']=='bar' assert out['foo']['label']=='qux'
def process_line(self, line): if self.row_number % 1000 == 0: log.info('Imported %s lines' % self.row_number) try: data = convert_types(self.dataset.mapping, line) if not self.dry_run: self.dataset.load(data) except Invalid as invalid: for child in invalid.children: self.log_invalid_data(child) if self.raise_errors: raise except Exception as ex: self.log_exception(ex) if self.raise_errors: raise
def load_fixture(name, manager=None): """ Load fixture data into the database. """ model = model_fixture(name) dataset = Dataset(model) dataset.updated_at = datetime.utcnow() if manager is not None: dataset.managers.append(manager) db.session.add(dataset) db.session.commit() dataset.model.generate() data = data_fixture(name) reader = csv.DictReader(data) for row in reader: entry = convert_types(model['mapping'], row) dataset.model.load(entry) data.close() return dataset
def process_line(self, line): if self.row_number % 1000 == 0: log.info('Imported %s lines' % self.row_number) try: # {u'geom_time_id': u'0', u'country_level0': {u'countryid': u'Caribbean small stat # es', u'name': u'caribbean-small-states', u'label': u'Caribbean small states'}, u # 'amount': 27.1328588135588, u'theid': u'71', u'time': datetime.date(1977, 1, 1)} data = convert_types(self.dataset.mapping.get('mapping', {}), line) gid = self._match_country_id(data['country_level0']) data['geom_time_id'] = str(self._match_time_geom( gid, data['time'])) # if not data['geom_time_id'] or data['geom_time_id'] == "0": # logger = {'label': data['country_level0']['label']} # self.log_invalid_countrytime(logger) if not self.dry_run: self.source.model.load(data) else: # Check uniqueness unique_value = ', '.join([unicode(data[k]) for k in self.key]) if unique_value in self.unique_check: # Log the error (with the unique key represented as # a dictionary) print "unique error probelm???????????????????" self.log_exception( ValueError("Unique key constraint not met"), error="%s is not a unique key" % unique_value) self.unique_check[unique_value] = True except Invalid as invalid: for child in invalid.children: self.log_invalid_data(child) if self.raise_errors: raise except Exception as ex: self.log_exception(ex) if self.raise_errors: raise
def process_line(self, line): if self.row_number % 1000 == 0: log.info('Imported %s lines' % self.row_number) try: # {u'geom_time_id': u'0', u'country_level0': {u'countryid': u'Caribbean small stat # es', u'name': u'caribbean-small-states', u'label': u'Caribbean small states'}, u # 'amount': 27.1328588135588, u'theid': u'71', u'time': datetime.date(1977, 1, 1)} data = convert_types(self.dataset.mapping.get('mapping', {}), line) gid = self._match_country_id(data['country_level0']) data['geom_time_id'] = str(self._match_time_geom(gid, data['time'])) # if not data['geom_time_id'] or data['geom_time_id'] == "0": # logger = {'label': data['country_level0']['label']} # self.log_invalid_countrytime(logger) if not self.dry_run: self.source.model.load(data) else: # Check uniqueness unique_value = ', '.join([unicode(data[k]) for k in self.key]) if unique_value in self.unique_check: # Log the error (with the unique key represented as # a dictionary) print "unique error probelm???????????????????" self.log_exception( ValueError("Unique key constraint not met"), error="%s is not a unique key" % unique_value) self.unique_check[unique_value] = True except Invalid as invalid: for child in invalid.children: self.log_invalid_data(child) if self.raise_errors: raise except Exception as ex: self.log_exception(ex) if self.raise_errors: raise
def load_fixture(name, manager=None): """ Load fixture data into the database. """ from openspending.validation.data import convert_types fh = fixture_file('%s.js' % name) data = json.load(fh) fh.close() dataset = Dataset(data) if manager is not None: dataset.managers.append(manager) db.session.add(dataset) db.session.commit() dataset.generate() fh = fixture_file('%s.csv' % name) reader = csv.DictReader(fh) for row in reader: entry = convert_types(data['mapping'], row) dataset.load(entry) fh.close() dataset.commit() return dataset
def load_fixture(name, manager=None): """ Load fixture data into the database. """ from openspending.validation.data import convert_types fh = open(fixture_path('%s.js' % name), 'r') data = json.load(fh) fh.close() dataset = Dataset(data) if manager is not None: dataset.managers.append(manager) db.session.add(dataset) db.session.commit() dataset.generate() fh = open(fixture_path('%s.csv' % name), 'r') reader = csv.DictReader(fh) for row in reader: entry = convert_types(data['mapping'], row) dataset.load(entry) fh.close() dataset.commit() return dataset