def test_load_file(): # test github ds = Dataset.load_file('files/git_urls_commits.csv', registry='github', cache_dir=CACHE_DIR, debug=True, github_pat=os.getenv('GITHUB_PAT')) ds.update(**{'name': 'test', 'version': '1.0'}) ds.backup('../test.p') ds = Dataset.restore('../test.p') ds.export_inputset('../test.json') # test npm ds = Dataset.load_file('files/names_versions.csv', registry='npm', cache_dir=CACHE_DIR, debug=True) ds.update(**{'name': 'test', 'version': '1.0'}) ds.backup('../test.p') ds = Dataset.restore('../test.p') ds.export_inputset('../test.json') # test pypi ds = Dataset.load_file('files/names_versions.csv', registry='pypi', cache_dir=CACHE_DIR, debug=True) ds.update(**{'name': 'test', 'version': '1.0'}) ds.backup('../test.p') ds = Dataset.restore('../test.p') ds.export_inputset('../test.json') # test vanilla ds = Dataset.load_file('files/urls.csv', cache_dir=CACHE_DIR, debug=True) ds.update(**{'name': 'test', 'version': '1.0'}) ds.backup('../test.p') ds = Dataset.restore('../test.p') ds.export_inputset('../test.json') # cleanup files os.remove('../test.p') os.remove('../test.json')
def load(ctx, registry, from_type, name_or_path, fileargs): """Generates a dataset from a weblist name or file path.""" backup_ds = None try: backup_ds = deepcopy(ctx.obj.get('dataset', None)) if registry == 'noreg': registry = None global TEMP_SETTINGS if from_type == 'file': # read in a file (fileargs is either a header string for csv # or a parser handle for json) ds = Dataset.load_file(name_or_path, registry, fileargs=fileargs, **TEMP_SETTINGS) else: # download a weblist or organization repo list ds = Dataset.load_web(name_or_path, registry, from_type=from_type, **TEMP_SETTINGS) ctx.obj['dataset'] = ds # reset the temporary api/metadata dict TEMP_SETTINGS = dict() except Exception as e: print_error(e, DEBUG) # silently restore the dataset ctx.obj['dataset'] = backup_ds