# Connect to Dataverse Api api_token_down = os.environ["API_TOKEN_DOWN"] api_host_down = os.environ["API_HOST_DOWN"] api_down = Api(api_host_down, api_token=api_token_down) # Extract dois and dataverse of the datastes. datasets_list = read_datasets_csv(ROOT_DIR + '/data/datasets.csv') dv_list = [ds['dataverse'] for ds in datasets_list] dv_list = set(dv_list) # Create directories for all dataverses and download the metadata for dv in dv_list: down_dataverse_dir = down_dir + '/dv_{0}'.format(dv) if not os.path.isdir(down_dataverse_dir): os.mkdir(down_dataverse_dir) resp_dv = api_down.get_dataverse(dv) write_file(down_dataverse_dir + '/dv_' + dv + '_metadata.json', json.dumps(resp_dv['data'])) # Loop over all datasets for ds in datasets_list: # Get metadata of dataset resp_ds = api_down.get_dataset(ds['doi']) identifier = ds['doi'].split('/')[1] # Create directory for dataset down_dataset_dir = down_dir + '/dv_' + ds[ 'dataverse'] + '/ds_' + identifier if not os.path.isdir(down_dataset_dir): os.mkdir(down_dataset_dir)
# TODO limit amount of recursion def check_dataset_lock(dataset_dbid): query_str = '/datasets/' + str(dataset_dbid) + '/locks' params = {} resp = api.get_request(query_str, params=params, auth=True) locks = resp.json()['data'] if (locks): print('Lock found for dataset id ' + str(dataset_dbid) + '... sleeping...') time.sleep(2) check_dataset_lock(dataset_dbid) resp = api.get_dataverse(':root') buff = StringIO("") if (resp.status_code == 401): print('Publishing root dataverse.') resp = api.publish_dataverse(':root') print(resp) for path in paths: parts = path.split('/') json_file = parts[-1] dvtype = parts[-3] if 'dataverses' == dvtype: dvtype = 'dataverse' else: dvtype = 'dataset' parent = parts[-4] if 'data' == parent: