def import_csv(dataset, url, args): """ Import the csv data into the dataset """ csv_data_url, source_url = url source = Source(dataset, shell_account(), csv_data_url) # Analyse the csv data and add it to the source # If we don't analyse it we'll be left with a weird message source.analysis = analyze_csv(csv_data_url) # Check to see if the dataset already has this source for source_ in dataset.sources: if source_.url == csv_data_url: source = source_ break db.session.add(source) db.session.commit() dataset.generate() importer = CSVImporter(source) importer.run(**vars(args)) # Check if imported from the file system (source and data url differ) if csv_data_url != source_url: # If we did, then we must update the source url based on the # sources in the dataset model (so we need to fetch the source again # or else we'll add a new one) source = Source.by_id(source.id) source.url = source_url db.session.commit()
if basesource.rawfile: basesource.rawfile.delete() basesource.rawfile = sourcefile source = basesource oroperations = source.getORInstructions() source.reload_openrefine() else: source = Source(dataset=dataset, name=data['name'], url=None, rawfile=sourcefile) db.session.add(source) #handle file elif data.get('url', None): if basesource: source = basesource source.name = data['name'] source.url = data['url'] oroperations = source.getORInstructions() source.reload_openrefine() #maybe reload the OpenRefine? #trigger reload else: source = Source(dataset=dataset, name=data['name'], url=data['url']) db.session.add(source) else: source = basesource oroperations = source.getORInstructions() source.reload_openrefine() #check if source exists if sourceapi.get('prefuncs', None):
basesource.rawfile = sourcefile source = basesource source.reload_openrefine() else: source = Source(dataset=dataset, name=data['name'], url=None, rawfile=sourcefile) db.session.add(source) #handle file elif data.get('url', None): if basesource: source = basesource source.name = data['name'] source.url = data['url'] source.reload_openrefine() #maybe reload the OpenRefine? #trigger reload else: source = Source(dataset=dataset, name=data['name'], url=data['url']) db.session.add(source) else: source = basesource source.reload_openrefine() #check if source exists if sourceapi.get('prefuncs', None): prefuncs = json.loads(sourceapi['prefuncs'])