def import_csv(dataset, url, args): """ Import the csv data into the dataset """ csv_data_url, source_url = url source = Source(dataset, shell_account(), csv_data_url) # Analyse the csv data and add it to the source # If we don't analyse it we'll be left with a weird message source.analysis = analyze_csv(csv_data_url) # Check to see if the dataset already has this source for source_ in dataset.sources: if source_.url == csv_data_url: source = source_ break db.session.add(source) db.session.commit() dataset.generate() importer = CSVImporter(source) importer.run(**vars(args)) # Check if imported from the file system (source and data url differ) if csv_data_url != source_url: # If we did, then we must update the source url based on the # sources in the dataset model (so we need to fetch the source again # or else we'll add a new one) source = Source.by_id(source.id) source.url = source_url db.session.commit()
def setup(self): super(TestRunController, self).setup() self.source = csvimport_fixture('import_errors') self.source.dataset.managers.append(Account.by_name('test')) self.importer = CSVImporter(self.source) self.importer.run()
class TestRunController(ControllerTestCase): def setup(self): super(TestRunController, self).setup() self.source = csvimport_fixture('import_errors') self.source.dataset.managers.append(Account.by_name('test')) self.importer = CSVImporter(self.source) self.importer.run() def test_view_run(self): response = self.app.get(url(controller='run', action='view', dataset=self.source.dataset.name, source=self.source.id, id=self.importer._run.id), extra_environ={'REMOTE_USER': '******'}, expect_errors=True) assert readable_url(self.source.url).encode('utf-8') in response.body def test_view_run_does_not_exist(self): response = self.app.get(url(controller='run', action='view', dataset=self.source.dataset.name, source=self.source.id, id=47347893), extra_environ={'REMOTE_USER': '******'}, expect_errors=True) assert '404' in response.status, response.status
def test_delete_successfully_loaded_source(self): """ Test source removal with a source that has been successfully loaded. Removing a source that has been successfully loaded should not be possible. """ # Add and import source without errors. # The source is added to a dataset called 'test-csv' (but # we'll just use source.dataset.name in case it changes) source = csvimport_fixture('successful_import') source.dataset.managers.append(Account.by_name('test')) importer = CSVImporter(source) importer.run() # Make sure the source is imported assert db.session.query(Source).filter_by(id=source.id).count() == 1, \ "Import of csv failed. Source not found" # Delete the source response = self.app.post(url(controller='source', action='delete', dataset=source.dataset.name, id=source.id), extra_environ={'REMOTE_USER': '******'}) # Check if source has been deleted assert db.session.query(Source).filter_by(id=source.id).count() == 1, \ "Deleting source succeeded. The source is gone."
def test_delete_source(self): """ Test source removal with a source that includes errors """ # Add and import source with errors (we want to remove it) # The source is added to a dataset called 'test-csv' (but # we'll just use source.dataset.name in case it changes) source = csvimport_fixture('import_errors') source.dataset.managers.append(Account.by_name('test')) importer = CSVImporter(source) importer.run() # Make sure the source is imported assert db.session.query(Source).filter_by(id=source.id).count() == 1, \ "Import of csv failed. Source not found" # Delete the source response = self.app.post(url(controller='source', action='delete', dataset=source.dataset.name, id=source.id), extra_environ={'REMOTE_USER': '******'}) # Check if source has been deleted assert db.session.query(Source).filter_by(id=source.id).count() == 0, \ "Deleting source unsuccessful. Source still exists."
class TestRunController(ControllerTestCase): def setup(self): h.skip_if_stubbed_solr() super(TestRunController, self).setup() self.source = csvimport_fixture('import_errors') self.source.dataset.managers.append(Account.by_name('test')) self.importer = CSVImporter(self.source) self.importer.run() def test_view_run(self): response = self.app.get(url(controller='run', action='view', dataset=self.source.dataset.name, source=self.source.id, id=self.importer._run.id), extra_environ={'REMOTE_USER': '******'}, expect_errors=True) assert self.source.name.encode('utf-8') in response.body def test_view_run_does_not_exist(self): response = self.app.get(url(controller='run', action='view', dataset=self.source.dataset.name, source=self.source.id, id=47347893), extra_environ={'REMOTE_USER': '******'}, expect_errors=True) assert '404' in response.status, response.status
def import_csv(dataset, url, args): """ Import the csv data into the dataset """ csv_data_url, source_url = url source = Source(dataset, shell_account(), csv_data_url) # Analyse the csv data and add it to the source # If we don't analyse it we'll be left with a weird message source.analysis = analyze_csv(csv_data_url) # Check to see if the dataset already has this source for source_ in dataset.sources: if source_.url == csv_data_url: source = source_ break db.session.add(source) db.session.commit() dataset.generate() importer = CSVImporter(source) importer.run(**vars(args)) # Check if imported from the file system (source and data url differ) if csv_data_url != source_url: # If we did, then we must update the source url based on the # sources in the dataset model (so we need to fetch the source again # or else we'll add a new one) source = Source.by_id(source.id) source.url = source_url db.session.commit()
def test_error_with_empty_additional_date(self): source = csvimport_fixture('empty_additional_date') importer = CSVImporter(source) importer.run() # We are currently not able to import date cells without a value. See: # http://trac.openspending.org/ticket/170 h.assert_equal(importer.errors, 1)
def test_delete_successfully_loaded_source(self): """ Test source removal with a source that has been successfully loaded. Removing a source that has been successfully loaded should not be possible. """ # Add and import source without errors. # The source is added to a dataset called 'test-csv' (but # we'll just use source.dataset.name in case it changes) source = csvimport_fixture('successful_import') source.dataset.managers.append(Account.by_name('test')) importer = CSVImporter(source) importer.run() # Make sure the source is imported assert db.session.query(Source).filter_by(id=source.id).count() == 1, \ "Import of csv failed. Source not found" # Delete the source response = self.app.post(url(controller='source', action='delete', dataset=source.dataset.name, id=source.id), extra_environ={'REMOTE_USER': '******'}) # Check if source has been deleted assert db.session.query(Source).filter_by(id=source.id).count() == 1, \ "Deleting source succeeded. The source is gone."
def test_delete_source(self): """ Test source removal with a source that includes errors """ # Add and import source with errors (we want to remove it) # The source is added to a dataset called 'test-csv' (but # we'll just use source.dataset.name in case it changes) source = csvimport_fixture('import_errors') source.dataset.managers.append(Account.by_name('test')) importer = CSVImporter(source) importer.run() # Make sure the source is imported assert db.session.query(Source).filter_by(id=source.id).count() == 1, \ "Import of csv failed. Source not found" # Delete the source response = self.app.post(url(controller='source', action='delete', dataset=source.dataset.name, id=source.id), extra_environ={'REMOTE_USER': '******'}) # Check if source has been deleted assert db.session.query(Source).filter_by(id=source.id).count() == 0, \ "Deleting source unsuccessful. Source still exists."
def test_error_with_empty_additional_date(self): source = csvimport_fixture('empty_additional_date') importer = CSVImporter(source) importer.run() # We are currently not able to import date cells without a value. See: # http://trac.openspending.org/ticket/170 h.assert_equal(importer.errors, 1)
def test_no_dimensions_for_measures(self): source = csvimport_fixture('simple') importer = CSVImporter(source) importer.run() dataset = db.session.query(Dataset).first() dimensions = [str(d.name) for d in dataset.dimensions] assert sorted(dimensions) == ['entry_id', 'from', 'time', 'to']
def test_no_dimensions_for_measures(self): source = csvimport_fixture('simple') importer = CSVImporter(source) importer.run() dataset = db.session.query(Dataset).first() dimensions = [str(d.name) for d in dataset.dimensions] h.assert_equal(sorted(dimensions), ['entry_id', 'from', 'time', 'to'])
def test_erroneous_values(self): source = csvimport_fixture('erroneous_values') importer = CSVImporter(source) importer.run(dry_run=True) h.assert_equal(importer.errors, 2) records = list(importer._run.records) h.assert_true("time" in records[1].attribute, "Should find badly formatted date") h.assert_equal(records[1].row, 5)
def test_import_errors(self): source = csvimport_fixture('import_errors') importer = CSVImporter(source) importer.run(dry_run=True) h.assert_true(importer.errors > 1, "Should have errors") records = list(importer._run.records) h.assert_equal(records[0].row, 1, "Should detect missing date colum in line 1")
def test_import_errors(self): source = csvimport_fixture('import_errors') importer = CSVImporter(source) importer.run(dry_run=True) h.assert_true(importer.errors > 1, "Should have errors") records = list(importer._run.records) h.assert_equal(records[0].row, 1, "Should detect missing date colum in line 1")
def test_erroneous_values(self): source = csvimport_fixture('erroneous_values') importer = CSVImporter(source) importer.run(dry_run=True) h.assert_equal(importer.errors, 2) records = list(importer._run.records) h.assert_true("time" in records[1].attribute, "Should find badly formatted date") h.assert_equal(records[1].row, 5)
def test_empty_csv(self): source = csvimport_fixture('default') source.url = 'file:///dev/null' importer = CSVImporter(source) importer.run(dry_run=True) h.assert_equal(importer.errors, 2) records = list(importer._run.records) h.assert_equal(records[0].row, 0) h.assert_equal(records[1].row, 0) h.assert_true("Didn't read any lines of data" in str(records[1].message))
def test_empty_csv(self): source = csvimport_fixture('default') source.url = 'file:///dev/null' importer = CSVImporter(source) importer.run(dry_run=True) h.assert_equal(importer.errors, 2) records = list(importer._run.records) h.assert_equal(records[0].row, 0) h.assert_equal(records[1].row, 0) h.assert_true("Didn't read any lines of data" in str(records[1].message))
def test_successful_import(self): source = csvimport_fixture('successful_import') importer = CSVImporter(source) importer.run() dataset = db.session.query(Dataset).first() h.assert_true(dataset is not None, "Dataset should not be None") h.assert_equal(dataset.name, "test-csv") entries = dataset.entries() h.assert_equal(len(list(entries)), 4) # TODO: provenance entry = list(dataset.entries(limit=1, offset=1)).pop() h.assert_true(entry is not None, "Entry with name could not be found") h.assert_equal(entry['amount'], 66097.77)
def _test_import(self, name): source = csvimport_fixture(name) data = open(source.url) lines = self.count_lines_in_stream(data) - 1 # -1 for header row importer = CSVImporter(source) importer.run() h.assert_equal(importer.errors, 0) # check correct number of entries dataset = db.session.query(Dataset).first() entries = list(dataset.entries()) h.assert_equal(len(entries), lines)
def _test_import(self, name): source = csvimport_fixture(name) data = open(source.url) lines = self.count_lines_in_stream(data) - 1 # -1 for header row importer = CSVImporter(source) importer.run() assert importer.errors == 0 # check correct number of entries dataset = db.session.query(Dataset).first() entries = list(dataset.entries()) assert len(entries) == lines
def test_successful_import(self): source = csvimport_fixture('successful_import') importer = CSVImporter(source) importer.run() dataset = db.session.query(Dataset).first() h.assert_true(dataset is not None, "Dataset should not be None") h.assert_equal(dataset.name, "test-csv") entries = dataset.entries() h.assert_equal(len(list(entries)), 4) # TODO: provenance entry = list(dataset.entries(limit=1, offset=1)).pop() h.assert_true(entry is not None, "Entry with name could not be found") h.assert_equal(entry['amount'], 66097.77)
def test_erroneous_values(self): source = csvimport_fixture('erroneous_values') importer = CSVImporter(source) importer.run(dry_run=True) # Expected failures: # * unique key constraint not met (2x) # * amount cannot be parsed # * time cannot be parse h.assert_equal(importer.errors, 4) records = list(importer._run.records) # The fourth record should be about badly formed date h.assert_true("time" in records[3].attribute, "Should find badly formatted date") # The row number of the badly formed date should be 5 h.assert_equal(records[3].row, 5)
def setup(self): super(TestRunController, self).setup() self.source = csvimport_fixture('import_errors') self.source.dataset.managers.append(Account.by_name('test')) self.importer = CSVImporter(self.source) self.importer.run()
def test_successful_import_with_simple_testdata(self): source = csvimport_fixture('simple') importer = CSVImporter(source) importer.run() h.assert_equal(importer.errors, 0) dataset = db.session.query(Dataset).first() h.assert_true(dataset is not None, "Dataset should not be None") entries = list(dataset.entries()) h.assert_equal(len(entries), 5) entry = entries[0] h.assert_equal(entry['from']['label'], 'Test From') h.assert_equal(entry['to']['label'], 'Test To') h.assert_equal(entry['time']['name'], '2010-01-01') h.assert_equal(entry['amount'], 100.00)
def test_successful_import_with_simple_testdata(self): source = csvimport_fixture('simple') importer = CSVImporter(source) importer.run() assert importer.errors == 0 dataset = db.session.query(Dataset).first() assert dataset is not None, "Dataset should not be None" entries = list(dataset.entries()) assert len(entries) == 5 entry = entries[0] assert entry['from']['label'] == 'Test From' assert entry['to']['label'] == 'Test To' assert entry['time']['name'] == '2010-01-01' assert entry['amount'] == 100.00
def test_successful_import_with_simple_testdata(self): source = csvimport_fixture('simple') importer = CSVImporter(source) importer.run() assert importer.errors == 0 dataset = db.session.query(Dataset).first() assert dataset is not None, "Dataset should not be None" entries = list(dataset.entries()) assert len(entries) == 5 entry = entries[0] assert entry['from']['label'] == 'Test From' assert entry['to']['label'] == 'Test To' assert entry['time']['name'] == '2010-01-01' assert entry['amount'] == 100.00
def test_successful_import_with_simple_testdata(self): source = csvimport_fixture('simple') importer = CSVImporter(source) importer.run() h.assert_equal(importer.errors, 0) dataset = db.session.query(Dataset).first() h.assert_true(dataset is not None, "Dataset should not be None") entries = list(dataset.entries()) h.assert_equal(len(entries), 5) entry = entries[0] h.assert_equal(entry['from']['label'], 'Test From') h.assert_equal(entry['to']['label'], 'Test To') h.assert_equal(entry['time']['name'], '2010-01-01') h.assert_equal(entry['amount'], 100.00)
def test_erroneous_values(self): source = csvimport_fixture('erroneous_values') importer = CSVImporter(source) importer.run(dry_run=True) # Expected failures: # * unique key constraint not met (2x) # * amount cannot be parsed # * time cannot be parse assert importer.errors == 4 records = list(importer._run.records) # The fourth record should be about badly formed date assert "time" in records[3].attribute, \ "Should find badly formatted date" # The row number of the badly formed date should be 5 assert records[3].row == 5
def load_source(source_id, sample=False): from openspending.model.source import Source from openspending.importer import CSVImporter source = Source.by_id(source_id) if not source: log.error("No such source: %s", source_id) if not source.loadable: log.error("Dataset has no mapping.") return source.dataset.generate() importer = CSVImporter(source) if sample: importer.run(dry_run=True, max_lines=1000, max_errors=1000) else: importer.run() index_dataset.delay(source.dataset.name)
def load_source(source_id, sample=False): from openspending.model import Source from openspending.importer import CSVImporter source = Source.by_id(source_id) if not source: log.error("No such source: %s", source_id) if not source.loadable: log.error("Dataset has no mapping.") return source.dataset.generate() importer = CSVImporter(source) if sample: importer.run(max_lines=1000, max_errors=1000) else: importer.run() index_dataset.delay(source.dataset.name)
def test_error_with_empty_additional_date(self): source = csvimport_fixture('empty_additional_date') importer = CSVImporter(source) importer.run() assert importer.errors == 1
dataset = Dataset.by_name(model['dataset']['name']) if dataset is None: dataset = Dataset(model) db.session.add(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset, shell_account(), csv_data_url) for source_ in dataset.sources: if source_.url == csv_data_url: source = source_ break db.session.add(source) db.session.commit() dataset.generate() importer = CSVImporter(source) importer.run(**vars(args)) return 0 def _csvimport(args): return csvimport(args.dataset_url, args) def configure_parser(subparser): p = subparser.add_parser('csvimport', help='Load a CSV dataset', description='You must specify --model.', parents=[import_parser]) p.add_argument('--model', action="store",
def test_quoting(self): source = csvimport_fixture('quoting') importer = CSVImporter(source) importer.run() h.assert_equal(importer.errors, 0)
def test_malformed_csv(self): source = csvimport_fixture('malformed') importer = CSVImporter(source) importer.run(dry_run=True) assert importer.errors == 1
def test_quoting(self): source = csvimport_fixture('quoting') importer = CSVImporter(source) importer.run() h.assert_equal(importer.errors, 0)
def test_error_with_empty_additional_date(self): source = csvimport_fixture('empty_additional_date') importer = CSVImporter(source) importer.run() assert importer.errors == 1
def test_malformed_csv(self): source = csvimport_fixture('malformed') importer = CSVImporter(source) importer.run(dry_run=True) h.assert_equal(importer.errors, 1)
def test_quoting(self): source = csvimport_fixture('quoting') importer = CSVImporter(source) importer.run() assert importer.errors == 0
def test_quoting(self): source = csvimport_fixture('quoting') importer = CSVImporter(source) importer.run() assert importer.errors == 0
if dataset is None: dataset = Dataset(model) db.session.add(dataset) log.info("Dataset: %s", dataset.name) source = Source(dataset, shell_account(), csv_data_url) for source_ in dataset.sources: if source_.url == csv_data_url: source = source_ break db.session.add(source) db.session.commit() dataset.generate() importer = CSVImporter(source) importer.run(**vars(args)) return 0 def _csvimport(args): return csvimport(args.dataset_url, args) def configure_parser(subparser): p = subparser.add_parser('csvimport', help='Load a CSV dataset', description='You must specify --model.', parents=[import_parser]) p.add_argument('--model', action="store", dest='model', default=None, metavar='url', help="URL of JSON format model (metadata and mapping).") p.add_argument('dataset_url', help="Dataset file URL")