from pyDataverse.api import Api import json import dvconfig base_url = dvconfig.base_url api_token = dvconfig.api_token api = Api(base_url, api_token) print(api.status) dataset_json = 'data/dataverses/open-source-at-harvard/datasets/open-source-at-harvard/open-source-at-harvard.json' with open(dataset_json) as f: metadata = json.load(f) dataverse = 'open-source-at-harvard' resp = api.create_dataset(dataverse, json.dumps(metadata)) print(resp) dataset_pid = resp.json()['data']['persistentId'] tabular_file = 'data/dataverses/open-source-at-harvard/datasets/open-source-at-harvard/files/2019-02-25.tsv' resp = api.upload_file(dataset_pid, tabular_file) print(resp)
if UPLOAD_DATA: CREATE_DV = False DELETE_DV = False CREATE_DS = False ADD_FILE = False DELETE_DS = False CREATE_DF = False api_token_up = os.environ["API_TOKEN_UP"] api_host_up = os.environ["API_HOST_UP"] api_up = Api(api_host_up, api_token=api_token_up, use_https=False) # create dataverse if CREATE_DV: dv_json = read_json_file( 'data/down/dv_AUSSDA/dv_AUSSDA_metadata.json') api_up.create_dataverse(dv_json['alias'], dict_to_json(dv_json)) time.sleep(0.2) # create dataset if CREATE_DS: ds_json = read_json_file( 'data/down/dv_AUSSDA/ds_VKYZPD/ds_VKYZPD_metadata.json') resp = api_up.create_dataset('science', dict_to_json(ds_json)) time.sleep(0.2) if ADD_FILE: doi = 'doi:10.5072/FK2/PF6EMS' filename = 'dev/cat.jpg' resp = api_up.upload_file(doi, filename)
metadata = json.load(f) dataverse = parent resp = api.create_dataset(dataverse, json.dumps(metadata)) print(resp) dataset_pid = resp.json()['data']['persistentId'] dataset_dbid = resp.json()['data']['id'] files_dir = path.replace(json_file, '') + 'files' filemetadata_dir = path.replace(json_file, '') + '.filemetadata' print(files_dir) for path, subdir, files in os.walk(files_dir): for name in files: filepath = os.path.join(path, name) relpath = os.path.relpath(filepath, files_dir) # "directoryLabel" is used to populate "File Path" directoryLabel, filename = os.path.split(relpath) resp = api.upload_file(dataset_pid, "'" + filepath + "'") print(resp) file_id = resp['data']['files'][0]['dataFile']['id'] ## This lock check and sleep is here to prevent the dataset from being permanently ## locked because a tabular file was uploaded first. check_dataset_lock(dataset_dbid) # TODO: Think more about where the description comes from. A "sidecar" file as proposed at https://github.com/IQSS/dataverse/issues/5924#issuecomment-499605672 ? # L.A.: I implemented something along these lines - an (optional) directory called ".filemetadata" # in the dataset directory, where files containing extra json filemetadata records may be # placed for each of the files in the "files" directory. # check for optional filemetadata file: filemetadatapath = os.path.join(filemetadata_dir, relpath) if (os.path.exists(filemetadatapath)): with open(filemetadatapath) as m: file_metadata = json.load(m) else: