def create_dataset(): print 'make connection...' connection = Connection(DV_HOST, TOKEN, use_https=False) print 'connection', connection dataverse = connection.get_dataverse('root') print 'base_url', connection.base_url title, description, creator = get_geotweet_params() kwargs = dict(notes="notes go here") dataverse.create_dataset(title, description, creator, **kwargs) print 'dataset created'
def clone(self): #Disables ssl validation c = Connection(self.user, self.password, self.host, disable_ssl=True) if not c.connected: raise DataverseArchiverError('Invalid creditials or host') dv = c.get_dataverse(self.dataverse_name) if not dv: raise DataverseArchiverError('Invalid dataverse alias') study = dv.get_study_by_doi(self.study_doi) if not study: raise DataverseArchiverError('Invalid study doi') header = [ self.download_file.si(self, f.download_url) for f in study.get_released_files() ] return chord(header, self.clone_done.s(self))
def fetch_dz(token): from dataverse import Connection host = "dataverse.harvard.edu" connection = Connection(host, token) dataverse = connection.get_dataverse("jakobshavn-inversions", refresh = True) if not dataverse: raise DataverseError("No dataverse found!") title = ("Ice surface elevation and change at Jakobshavn Isbrae, " "West Greenland, 2008-2014") dataset = dataverse.get_dataset_by_title(title, refresh = True) if not dataset: raise DataverseError("No data set found!") file = dataset.get_file("dZ_grid.nc", "latest") url = file.download_url subprocess.call(["wget", "--no-clobber", url, "-O", "data/dZ_grid.nc"])
from subprocess import Popen, PIPE, STDOUT sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../modules'))) from config import configuration def loadjson(apiurl): jsondataurl = apiurl req = urllib2.Request(jsondataurl) opener = urllib2.build_opener() f = opener.open(req) dataframe = simplejson.load(f) return dataframe config = configuration() connection = Connection(host, token) dataverse = connection.get_dataverse('clioinfra') for item in dataverse.get_contents(): # u'protocol': u'hdl', u'authority': u'10622' u'identifier': u'R8EJJF' try: handle = str(item['protocol']) + ':' + str(item['authority']) + "/" + str(item['identifier']) datasetid = item['id'] url = "https://" + str(host) + "/api/datasets/" + str(datasetid) + "/versions/1.0?&key=" + str(token) print item dataframe = loadjson(url) for fileitem in dataframe['data']['files']: runimport = os.path.abspath(os.path.join(os.path.dirname(__file__))) runimport = str(runimport) + "/import.py -d 'https://" + str(host) + "' -H '" + str(handle) + ":" + str(datasetid) + ":" + str(fileitem['datafile']['id']) + "' -k " + str(token) #print fileitem['datafile']['id'] p = Popen(runimport, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True)
def _connect(host, token): try: return Connection(host, token) except ConnectionError: return None
def _connect(token, host=settings.HOST): try: return Connection(host, token) except ConnectionError: return None
for org in list(metadata['files'][file]['org']): if org == org_name: del metadata["files"][file]["org"][org_name] metadata = json.dumps(metadata) update_metadata(dataset, metadata) print('User access revoked for',org_name) return host = 'demo.dataverse.org' token = 'ae1379dd-29b3-40b7-b583-c4e40cc3656e' connection = Connection(host, token) dataverse = connection.get_dataverse('testing_dataverse_123') # I have created the dataset in dataverse manually: "doi:10.70122/FK2/0HH8BM" dataset = dataverse.get_dataset_by_doi('doi:10.70122/FK2/O13BQC') metadata_filename = "metadata.txt" parser = argparse.ArgumentParser(description='To work with encrypted files in dataverse') subparsers = parser.add_subparsers(help='sub-command help') #parser for uploading a file
def datasetspace(settings=''): (where, query, datasets, metadata, s, permissions) = ({}, '', [], [], {}, 'yes') where = {'collab': '', 'iish': '', 'harvard': ''} pagetitle = "Public datasets" config = configuration() if config['error']: return config['error'] root = config['apiroot'] dataversename = 'global' if request.args.get('dv'): dataversename = request.args.get('dv') if request.args.get('q'): query = request.args.get('q') if request.args.get('permissions'): permissions = request.args.get('permissions') if request.args.get('where'): where[request.args.get('where')] = 'checked="checked"' settings = Configuration() sconnection = ExtrasearchAPI(settings.config['dataverseroot'], dataversename) if where['harvard']: # Extract host for Dataverse connection findhost = re.search('(http\:\/\/|https\:\/\/)(.+)', settings.config['harvarddataverse']) if findhost: settings.config['dataversehostname'] = findhost.group(2) connection = Connection(settings.config['dataversehostname'], settings.config['harvardkey']) else: try: connection = Connection(config['hostname'], settings.config['key']) except: return 'Error: no connection to Dataverse. Please try later...' handlestr = '' if query: s['q'] = query metadata = search_by_keyword(connection, s) else: try: dataverse = connection.get_dataverse(dataversename) item = dataverse.get_contents() active = 'yes' except: active = None if active: try: for item in dataverse.get_contents(): handlestr+= item['identifier'] + ' ' active = 'yes' except: active = None if not active: handlestr = sconnection.read_all_datasets() if handlestr: s['q'] = handlestr s['per_page'] = 100 metadata = search_by_keyword(connection, s) #return str(metadata['items']) for dataset in metadata['items']: active = '' # Private datasets if permissions == 'closed': pagetitle = "Restricted datasets" try: if (sconnection.has_restricted_data(dataset['global_id'])): active = 'yes' except: active = '' # Public data else: try: if not (sconnection.has_restricted_data(dataset['global_id'])): active = 'yes' except: active = '' if active: try: for author in dataset['authors']: dataset['author'] = str(author) + ', ' dataset['author'] = dataset['author'][:-2] except: dataset['author'] = str(dataset['description']) datasets.append(dataset) if where['harvard']: datasets.append(dataset) (username, projectname) = ('','') fields = {} resp = make_response(render_template('search.html', projectname=projectname, username=username, datasets=datasets, searchq=query, pagetitle=pagetitle, where=where, fields=fields)) return resp