def json_of_url(url): # Check if it's a local file if _is_local_file(url): # If it is we open it as a normal file return json.load(open(url, 'r')) else: # If it isn't we open the url as a file return json.load(urllib2.urlopen(url))
def csvimport_fixture(name): data_fp = csvimport_fixture_file(name, 'data.csv') model_fp = csvimport_fixture_file(name, 'model.json') mapping_fp = csvimport_fixture_file(name, 'mapping.json') model = json.load(model_fp) if mapping_fp: model['mapping'] = json.load(mapping_fp) return data_fp, model
def csvimport_fixture(name): model_fp = csvimport_fixture_file(name, 'model.json') mapping_fp = csvimport_fixture_file(name, 'mapping.json') model = json.load(model_fp) if mapping_fp: model['mapping'] = json.load(mapping_fp) dataset = Dataset(model) dataset.generate() db.session.add(dataset) data_path = csvimport_fixture_path(name, 'data.csv') user = make_account() source = Source(dataset, user, data_path) db.session.add(source) db.session.commit() return source
def csvimport_fixture(name): model_fp = csvimport_fixture_file(name, 'model.json') mapping_fp = csvimport_fixture_file(name, 'mapping.json') model = json.load(model_fp) if mapping_fp: model['mapping'] = json.load(mapping_fp) dataset = Dataset(model) dataset.generate() db.session.add(dataset) data_path = csvimport_fixture_path(name, 'data.csv') user = h.make_account() source = Source(dataset, user, data_path) db.session.add(source) db.session.commit() return source
def __init__(self, package, model_url=None, resource_uuid=None): if not isinstance(package, Package): package = Package(package) if resource_uuid: data = package.get_resource(resource_uuid) else: data = package.openspending_resource('data') if not model_url: # Use magic CKAN tags model = package.openspending_resource('model') model_url = model['url'] model_fp = util.urlopen(model_url) try: model = json.load(model_fp) except Exception as e: raise ImporterError("Error encountered while parsing JSON model. " "http://jsonlint.com might help! Error was: %s" % e) csv = util.urlopen_lines(data["url"]) super(CKANImporter, self).__init__(csv, model, data["url"])
def csv_import(resource_url, model_url, **kwargs): import urllib from openspending.lib import json from openspending.etl import util from openspending.etl.importer import CSVImporter model = json.load(urllib.urlopen(model_url)) csv = util.urlopen_lines(resource_url) importer = CSVImporter(csv, model, resource_url) importer.run(**kwargs)
def _test_mapping(self, dir): mapping_csv = h.fixture_file("csv_import/%s/mapping.csv" % dir) mapping_json = h.fixture_file("csv_import/%s/mapping.json" % dir) csv = mapping_csv.read() expected_mapping_data = json.load(mapping_json) importer = MappingImporter() observed_mapping_data = importer.import_from_string(csv) assert observed_mapping_data == expected_mapping_data
def input_json(**args): """ inputjson JSON data """ inputfile = args.get('inputfile', None) if len(inputfile) != 1: print "You need to specific one and only one output file" return inputfile = inputfile[0] try: f = open(inputfile, 'rb') except: print "file not found" inputobj = json.load(f) f.close() modeldict = {'MetadataOrg' : [],'DataOrg' : [],'Source' : [],'SourceFile' : [],'Dataset' : []} for theobj in inputobj: if theobj['model'] not in modeldict.keys(): modeldict[theobj['model']] = [theobj] else: modeldict[theobj['model']].append(theobj) theorder = ['MetadataOrg','DataOrg','Source','SourceFile','Dataset'] ordermapping = {'metadataorg': 'MetadataOrg','dataorg':'DataOrg','source':'Source','sourcefile':'SourceFile','dataset':'Dataset'} for orderitem in theorder: modelclass = getattr(openspending.model, orderitem) print modelclass for theobj in modeldict[orderitem]: for objkey in theobj['fields'].keys(): try: if objkey.find("_id") != -1: #need to find the object assoicated ot this and repopulate modelobjstr = objkey.split('_')[0] searchpk = theobj['fields'][objkey] for foreignkeymod in modeldict[modelobjstr]: if foreignkeymod['pk'] == searchpk: foreignmodelclass = getattr(openspending.model, ordermapping[modelobjstr]) theobj[objkey] = foreignmodelclass.by_id(foreignkeymod['theid']) except: pass theid = modelclass.import_json_dump(theobj) theobj['theid'] = theid
def _test_dataset_dir(self, dir): data_csv = h.fixture_file("csv_import/%s/data.csv" % dir) mapping_json = h.fixture_file("csv_import/%s/mapping.json" % dir) dataset_name = unicode(dir) model = csv_fixture_model() model["mapping"] = json.load(mapping_json) model["dataset"]["name"] = dataset_name lines = self.count_lines_in_stream(data_csv) - 1 importer = CSVImporter(data_csv, model) importer.run() assert len(importer.errors) == 0, "Import should not throw errors" # check correct number of entries entries = Entry.find({"dataset.name": dataset_name}) assert entries.count() == lines
def __init__(self, package, model_url=None, mapping_url=None, resource_uuid=None): if not isinstance(package, ckan.Package): package = ckan.Package(package) if resource_uuid: data = package.get_resource(resource_uuid) else: data = package.openspending_resource('data') explicit = (model_url and not mapping_url) or (mapping_url and not model_url) if not explicit: # Use magic CKAN tags model = package.openspending_resource('model') mapping = package.openspending_resource('model:mapping') if model: model_url = model['url'] elif mapping: mapping_url = mapping['url'] # Model given if model_url and data: model = json.load(util.urlopen(model_url)) # Mapping given, need to extract metadata from CKAN elif mapping_url and data: model = {} model['mapping'] = MappingImporter().import_from_url(mapping_url) model['dataset'] = package.metadata_for_resource(data) csv = util.urlopen_lines(data["url"]) super(CKANImporter, self).__init__(csv, model, data["url"])
from openspending.lib import ckan from openspending.lib import json from openspending.etl.command import daemon from openspending.etl.ui.test import ControllerTestCase, url, helpers as h MOCK_REGISTRY = json.load(h.fixture_file('mock_ckan.json')) class TestLoadController(ControllerTestCase): def setup(self): super(TestLoadController, self).setup() self.patcher = h.patch('openspending.etl.ui.controllers.load.ckan.CkanClient', spec=ckan.CkanClient) self.MockCkanClient = self.patcher.start() self.MockCkanClient.return_value = self.c = h.mock_ckan(MOCK_REGISTRY) def teardown(self): self.patcher.stop() super(TestLoadController, self).teardown() def test_packages(self): response = self.app.get(url(controller='load', action='packages')) # Show title for packages assert '<a href="http://ckan.net/package/baz">The Baz dataset</a>' in response # Show 'import' link for importable packages import_url = url(controller='load', action='start', package='bar') assert '<a href="%s">' % import_url in response # Show 'diagnose' link for non-importable packages diagnose_url = url(controller='load', action='diagnose', package='baz')
def make_mock_ckan(*args, **kwargs): global current_mock_ckan current_mock_ckan = h.mock_ckan(json.load(h.fixture_file('mock_ckan.json'))) return current_mock_ckan
def _load_json(fp_or_str): try: return json.load(fp_or_str) except AttributeError: return json.loads(fp_or_str)
def csv_fixture_mapping(name=None): if name is None: return default_mapping.copy() f = h.fixture_file("csv_import/%s-model.json" % name) return json.load(f)
def parseDBJSON(args): if len(args['jsondata']) != 1: print "\n\nPlease specify one and only one json dump from python manage.py etldata dumpdata" sys.exit(1) try: jsonfile = open(args['jsondata'][0], 'rb') except Exception, e: print "failed to open file" print e sys.exit(1) try: databankjson = json.load(jsonfile) except Exception, e: print "\n\nYou hit an error on json loading" print e sys.exit(1) #split the objects to their approrpirate spot modelobjs = {} for jsonobj in databankjson: modelname = jsonobj['model'].split(".")[1] if modelname in modelobjs.keys(): modelobjs[modelname].append(jsonobj) else: modelobjs[modelname] = [jsonobj]
def input_json(**args): """ inputjson JSON data """ inputfile = args.get('inputfile', None) if len(inputfile) != 1: print "You need to specific one and only one output file" return inputfile = inputfile[0] try: f = open(inputfile, 'rb') except: print "file not found" inputobj = json.load(f) f.close() modeldict = { 'MetadataOrg': [], 'DataOrg': [], 'Source': [], 'SourceFile': [], 'Dataset': [] } for theobj in inputobj: if theobj['model'] not in modeldict.keys(): modeldict[theobj['model']] = [theobj] else: modeldict[theobj['model']].append(theobj) theorder = [ 'MetadataOrg', 'DataOrg', 'Source', 'SourceFile', 'Dataset' ] ordermapping = { 'metadataorg': 'MetadataOrg', 'dataorg': 'DataOrg', 'source': 'Source', 'sourcefile': 'SourceFile', 'dataset': 'Dataset' } for orderitem in theorder: modelclass = getattr(openspending.model, orderitem) print modelclass for theobj in modeldict[orderitem]: for objkey in theobj['fields'].keys(): try: if objkey.find("_id") != -1: #need to find the object assoicated ot this and repopulate modelobjstr = objkey.split('_')[0] searchpk = theobj['fields'][objkey] for foreignkeymod in modeldict[modelobjstr]: if foreignkeymod['pk'] == searchpk: foreignmodelclass = getattr( openspending.model, ordermapping[modelobjstr]) theobj[objkey] = foreignmodelclass.by_id( foreignkeymod['theid']) except: pass theid = modelclass.import_json_dump(theobj) theobj['theid'] = theid
def json_of_url(url): return json.load(urllib2.urlopen(url))