Exemplo n.º 1
0
def create_dataset():

    print 'make connection...'
    connection = Connection(DV_HOST, TOKEN, use_https=False)

    print 'connection', connection
    dataverse = connection.get_dataverse('root')
    print 'base_url', connection.base_url

    title, description, creator = get_geotweet_params()

    kwargs = dict(notes="notes go here")
    dataverse.create_dataset(title, description, creator, **kwargs)
    print 'dataset created'
Exemplo n.º 2
0
    def clone(self):
        #Disables ssl validation
        c = Connection(self.user, self.password, self.host, disable_ssl=True)
        if not c.connected:
            raise DataverseArchiverError('Invalid creditials or host')

        dv = c.get_dataverse(self.dataverse_name)
        if not dv:
            raise DataverseArchiverError('Invalid dataverse alias')

        study = dv.get_study_by_doi(self.study_doi)
        if not study:
            raise DataverseArchiverError('Invalid study doi')

        header = [
            self.download_file.si(self, f.download_url)
            for f in
            study.get_released_files()
        ]

        return chord(header, self.clone_done.s(self))
def fetch_dz(token):
    from dataverse import Connection
    host = "dataverse.harvard.edu"
    connection = Connection(host, token)
    dataverse = connection.get_dataverse("jakobshavn-inversions",
                                         refresh = True)

    if not dataverse:
        raise DataverseError("No dataverse found!")

    title = ("Ice surface elevation and change at Jakobshavn Isbrae, "
             "West Greenland, 2008-2014")
    dataset = dataverse.get_dataset_by_title(title, refresh = True)

    if not dataset:
        raise DataverseError("No data set found!")

    file = dataset.get_file("dZ_grid.nc", "latest")
    url  = file.download_url

    subprocess.call(["wget", "--no-clobber", url, "-O", "data/dZ_grid.nc"])
Exemplo n.º 4
0
from subprocess import Popen, PIPE, STDOUT
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../modules')))
from config import configuration

def loadjson(apiurl):
    jsondataurl = apiurl
    
    req = urllib2.Request(jsondataurl)
    opener = urllib2.build_opener()
    f = opener.open(req)
    dataframe = simplejson.load(f)
    return dataframe

config = configuration()

connection = Connection(host, token)
dataverse = connection.get_dataverse('clioinfra')

for item in dataverse.get_contents():
    # u'protocol': u'hdl', u'authority': u'10622' u'identifier': u'R8EJJF'
    try:
        handle = str(item['protocol']) + ':' + str(item['authority']) + "/" + str(item['identifier'])
        datasetid = item['id']
        url = "https://" + str(host) + "/api/datasets/" + str(datasetid) + "/versions/1.0?&key=" + str(token)
        print item
	dataframe = loadjson(url)
        for fileitem in dataframe['data']['files']:
	    runimport = os.path.abspath(os.path.join(os.path.dirname(__file__)))
	    runimport = str(runimport) + "/import.py -d 'https://" + str(host) + "' -H '" + str(handle) + ":" + str(datasetid) + ":" + str(fileitem['datafile']['id']) + "' -k " + str(token)
	    #print fileitem['datafile']['id']
            p = Popen(runimport, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True)
Exemplo n.º 5
0
def _connect(host, token):
    try:
        return Connection(host, token)
    except ConnectionError:
        return None
Exemplo n.º 6
0
def _connect(token, host=settings.HOST):
    try:
        return Connection(host, token)
    except ConnectionError:
        return None
Exemplo n.º 7
0
		for org in list(metadata['files'][file]['org']):
			if org == org_name:
				del metadata["files"][file]["org"][org_name]
	metadata = json.dumps(metadata)
	update_metadata(dataset, metadata)
	print('User access revoked for',org_name)
	return			





host = 'demo.dataverse.org'
token = 'ae1379dd-29b3-40b7-b583-c4e40cc3656e'

connection = Connection(host, token)


dataverse = connection.get_dataverse('testing_dataverse_123')

# I have created the dataset in dataverse manually: "doi:10.70122/FK2/0HH8BM"
dataset = dataverse.get_dataset_by_doi('doi:10.70122/FK2/O13BQC')

metadata_filename = "metadata.txt"


parser = argparse.ArgumentParser(description='To work with encrypted files in dataverse')

subparsers = parser.add_subparsers(help='sub-command help')

#parser for uploading a file
Exemplo n.º 8
0
def datasetspace(settings=''):
    (where, query, datasets, metadata, s, permissions) = ({}, '', [], [], {}, 'yes')
    where = {'collab': '', 'iish': '', 'harvard': ''}
    pagetitle = "Public datasets"
    config = configuration()
    if config['error']:
        return config['error']

    root = config['apiroot']
    dataversename = 'global'
    if request.args.get('dv'):
	dataversename = request.args.get('dv')
    if request.args.get('q'):
        query = request.args.get('q')
    if request.args.get('permissions'):
        permissions = request.args.get('permissions')
    if request.args.get('where'):
	where[request.args.get('where')] = 'checked="checked"' 

    settings = Configuration()
    sconnection = ExtrasearchAPI(settings.config['dataverseroot'], dataversename)
    if where['harvard']:
        # Extract host for Dataverse connection
        findhost = re.search('(http\:\/\/|https\:\/\/)(.+)', settings.config['harvarddataverse'])
        if findhost:
            settings.config['dataversehostname'] = findhost.group(2)
        connection = Connection(settings.config['dataversehostname'], settings.config['harvardkey'])
    else:
       try:
           connection = Connection(config['hostname'], settings.config['key'])
       except:
           return 'Error: no connection to Dataverse. Please try later...'

    handlestr = ''
    if query:
	s['q'] = query
	metadata = search_by_keyword(connection, s)
    else:
	try:
	    dataverse = connection.get_dataverse(dataversename)
	    item = dataverse.get_contents()
	    active = 'yes'	
	except:
	    active = None
	if active:
	    try:
                for item in dataverse.get_contents():
                    handlestr+= item['identifier'] + ' '
		    active = 'yes'
	    except:
	 	active = None

	if not active:
	    handlestr = sconnection.read_all_datasets()

        if handlestr:
	    s['q'] = handlestr	
	    s['per_page'] = 100
	    metadata = search_by_keyword(connection, s)

    #return str(metadata['items'])
    for dataset in metadata['items']:
	active = ''
	# Private datasets
	if permissions == 'closed':
	    pagetitle = "Restricted datasets"
	    try:
	        if (sconnection.has_restricted_data(dataset['global_id'])):
		    active = 'yes'
	    except:
		active = ''
	# Public data
	else:
	    try:
                if not (sconnection.has_restricted_data(dataset['global_id'])):
                    active = 'yes'
	    except:
		active = ''
	
	if active:
	    try:
	        for author in dataset['authors']:
	            dataset['author'] = str(author) + ', '
	            dataset['author'] = dataset['author'][:-2]
	    except:
	        dataset['author'] = str(dataset['description'])
	
            datasets.append(dataset)
	if where['harvard']:
	    datasets.append(dataset)

    (username, projectname) = ('','')    
    fields = {}
    resp = make_response(render_template('search.html', projectname=projectname, username=username, datasets=datasets, searchq=query, pagetitle=pagetitle, where=where, fields=fields))
    return resp