def update_user_tags(user): '''Function to update the user tags from their home node.''' if user.profile.openid() is not None: openid = user.profile.openid() url = "http://%s/share/user/?openid=%s" % (user.profile.site.domain, user.profile.openid()) print 'Updating user tags: querying URL=%s' % url jobj = getJson(url) if jobj is not None and openid in jobj[ 'users'] and 'project_tags' in jobj['users'][openid]: # loop over tags found on user home node tags = [] for tagName in jobj['users'][openid]['project_tags']: try: tags.append(ProjectTag.objects.get(name__iexact=tagName)) except ObjectDoesNotExist: pass # tag not found in local database # store tags in local user profile userProfile = UserProfile.objects.get(id=user.profile.id) userProfile.tags.clear() userProfile.tags = tags userProfile.save() transaction.commit() print 'User: %s updated for tags: %s' % (user, tags) # NOTE: connecting the login signal is not needed because every time the user logs in, # the session is refreshed and updating of projects is triggered already by the CoG session middleware #user_logged_in.connect(update_user_projects_at_login)
def update_user_tags(user): '''Function to update the user tags from their home node.''' if user.profile.openid() is not None: openid = user.profile.openid() url = "http://%s/share/user/?openid=%s" % (user.profile.site.domain, user.profile.openid()) print 'Updating user tags: querying URL=%s' % url jobj = getJson(url) if jobj is not None and openid in jobj['users'] and 'project_tags' in jobj['users'][openid]: # loop over tags found on user home node tags = [] for tagName in jobj['users'][openid]['project_tags']: try: tags.append( ProjectTag.objects.get(name__iexact=tagName) ) except ObjectDoesNotExist: pass # tag not found in local database # store tags in local user profile userProfile = UserProfile.objects.get(id=user.profile.id) userProfile.tags.clear() userProfile.tags = tags userProfile.save() transaction.commit() print 'User: %s updated for tags: %s' % (user, tags) # NOTE: connecting the login signal is not needed because every time the user logs in, # the session is refreshed and updating of projects is triggered already by the CoG session middleware #user_logged_in.connect(update_user_projects_at_login)
def sync(self): '''Updates the list of remote projects from all peer sites.''' # loop over peer sites sites = {} totalNumberOfProjects = 0 totalNumberOfUsers = 0 # loop over federated peer site + local site allSites = getPeerSites() local_site = Site.objects.get_current() allSites.append( local_site ) for site in allSites: url = "http://%s/share/projects/" % site.domain numberOfUsers = 0 numberOfProjects = 0 jobj = getJson(url) if jobj is None: status = 'ERROR' else: status = 'OK' numberOfProjects = len( jobj["projects"]) numberOfUsers = int( jobj.get("users",0) ) if site != local_site: # harvest projects, tags from remote site self._harvest(jobj) sites[site.id] = { 'name': site.name, 'domain':site.domain, 'url': url, 'status':status, 'numberOfProjects': numberOfProjects, 'numberOfUsers': numberOfUsers } totalNumberOfProjects += numberOfProjects totalNumberOfUsers += numberOfUsers return sites, totalNumberOfProjects, totalNumberOfUsers
def ac_list(request): """ View to display all access control groups that may be used to restrict data access. This view is intentionally open to the public (for now). """ # loop over local node + peer nodes groups = {} sites = [Site.objects.get_current()] + getPeerSites() for site in sites: url = "http://%s/share/groups/" % site.domain jobj = getJson(url) if jobj is not None: # no error in fetching URL site_name = jobj["site"]["name"] site_domain = jobj["site"]["domain"] # loop over groups for this node for group_name, group_dict in jobj["groups"].items(): # augment group dictionary group_dict["site_name"] = site_name group_dict["site_domain"] = site_domain groups[group_name] = group_dict # order groups by name _groups = OrderedDict(sorted(groups.items())) # remove ESGF root group if "wheel" in _groups: del _groups["wheel"] return render( request, "cog/access_control/list.html", {"groups": _groups, "title": "ESGF Data Access Control Groups"} )
def ac_list(request): """ View to display all access control groups that may be used to restrict data access. This view is intentionally open to the public (for now). """ # loop over local node + peer nodes groups = {} sites = [Site.objects.get_current()] + getPeerSites() for site in sites: url = "http://%s/share/groups/" % site.domain jobj = getJson(url) if jobj is not None: # no error in fetching URL site_name = jobj['site']['name'] site_domain = jobj['site']['domain'] # loop over groups for this node for group_name, group_dict in jobj['groups'].items(): # augment group dictionary group_dict['site_name'] = site_name group_dict['site_domain'] = site_domain groups[group_name] = group_dict # order groups by name _groups = OrderedDict(sorted(groups.items())) # remove ESGF root group if 'wheel' in _groups: del _groups['wheel'] return render(request, 'cog/access_control/list.html', { 'groups': _groups, 'title': 'ESGF Data Access Control Groups' })
def discoverSiteForUser(openid): '''IMPORTANT: call this function ONLY at account creation as it makes requests to all peer nodes.''' for site in getPeerSites(): # loop over enabled peer nodes url = "http://%s/share/user/?openid=%s" % (site.domain, openid) jobj = getJson(url) if jobj is not None: for key, value in jobj['users'].items(): if str(value['home_site_domain']) == site.domain: return site # node found # node not found return None
def getDataCartsForUser(openid): dcs = {} # dictionary of (site_name, datacart_size) items #for site in Site.objects.all(): # loop over all sites (e.g. nodes) in database. Note: includes current node for site in getPeerSites(): # loop over nodes that are federated url = "http://%s/share/user/?openid=%s" % (site.domain, openid) print 'Querying for datacart: url=%s' % url jobj = getJson(url) if jobj is not None: for key, value in jobj['users'].items(): dcs[site] = int(value['datacart']['size']) return dcs
def getDataCartsForUser(openid): dcs = {} # dictionary of (site_name, datacart_size) items #for site in Site.objects.all(): # loop over all sites (e.g. nodes) in database. Note: includes current node for site in getPeerSites(): # loop over nodes that are federated url = "http://%s/share/user/?openid=%s" % (site.domain, openid) print 'Querying for datacart: url=%s' % url jobj = getJson(url) if jobj is not None: for key, value in jobj['users'].items(): dcs[ site ] = int( value['datacart']['size'] ) return dcs
def sync(self): '''Updates the list of remote projects from all peer sites.''' # loop over peer sites sites = {} totalNumberOfProjects = 0 totalNumberOfUsers = 0 # loop over federated peer site + local site allSites = getPeerSites() local_site = Site.objects.get_current() allSites.append(local_site) for site in allSites: url = "http://%s/share/projects/" % site.domain numberOfUsers = 0 numberOfProjects = 0 jobj = getJson(url) if jobj is None: status = 'ERROR' else: status = 'OK' numberOfProjects = len(jobj["projects"]) numberOfUsers = int(jobj.get("users", 0)) if site != local_site: # harvest projects, tags from remote site self._harvest(jobj) sites[site.id] = { 'name': site.name, 'domain': site.domain, 'url': url, 'status': status, 'numberOfProjects': numberOfProjects, 'numberOfUsers': numberOfUsers } totalNumberOfProjects += numberOfProjects totalNumberOfUsers += numberOfUsers return sites, totalNumberOfProjects, totalNumberOfUsers
def download(request): ''' View that initiates the Globus download workflow by collecting and optionally sub-selecting the GridFTP URLs to be downloaded. This view can be invoked via GET (link from search page, one dataset only) or POST (link from data cart page, multiple datasets at once). Example URL: http://localhost:8000/globus/download/ ?dataset=obs4MIPs.NASA-JPL.AIRS.mon.v1%[email protected],obs4MIPs.NASA-JPL.MLS.mon.v1%[email protected] &method=web ''' # retrieve request parameters datasets = getQueryDict(request).get('dataset','').split(",") # optional query filter query = getQueryDict(request).get('query',None) # maximum number of files to query for, if specified limit = request.GET.get('limit', DOWNLOAD_LIMIT) # map of (data_node, list of GridFTP URLs to download) download_map = {} # loop over requested datasets for dataset in datasets: # query each index_node for all files belonging to that dataset (dataset_id, index_node) = str(dataset).split('@') params = [ ('type',"File"), ('dataset_id',dataset_id), ('offset','0'), ('limit',limit), ('fields','url'), ("format", "application/solr+json") ] if query is not None and len(query.strip())>0: params.append( ('query', query) ) # optional shard shard = request.GET.get('shard', '') if shard is not None and len(shard.strip()) > 0: params.append(('shards', shard+"/solr")) # '&shards=localhost:8982/solr' else: params.append(("distrib", "false")) url = "http://"+index_node+"/esg-search/search?"+urllib.urlencode(params) print 'Searching for files at URL: %s' % url jobj = getJson(url) # parse response for GridFTP URls if jobj is not None: for doc in jobj['response']['docs']: access = {} for url in doc['url']: # example URLs: # 'http://esg-datanode.jpl.nasa.gov/thredds/fileServer/esg_dataroot/obs4MIPs/observations/atmos/husNobs/mon/grid/NASA-JPL/AIRS/v20110608/husNobs_AIRS_L3_RetStd-v5_200209-201105.nc|application/netcdf|HTTPServer' # 'http://esg-datanode.jpl.nasa.gov/thredds/dodsC/esg_dataroot/obs4MIPs/observations/atmos/husNobs/mon/grid/NASA-JPL/AIRS/v20110608/husNobs_AIRS_L3_RetStd-v5_200209-201105.nc.html|application/opendap-html|OPENDAP' # 'globus:8a3f3166-e9dc-11e5-97d6-22000b9da45e//esg_dataroot/obs4MIPs/observations/atmos/husNobs/mon/grid/NASA-JPL/AIRS/v20110608/husNobs_AIRS_L3_RetStd-v5_200209-201105.nc|Globus|Globus' # 'gsiftp://esg-datanode.jpl.nasa.gov:2811//esg_dataroot/obs4MIPs/observations/atmos/husNobs/mon/grid/NASA-JPL/AIRS/v20110608/husNobs_AIRS_L3_RetStd-v5_200209-201105.nc|application/gridftp|GridFTP' parts = url.split('|') access[parts[2].lower()] = parts[0] if 'globus' in access: m = re.match('globus:([^/]*)(.*)', access['globus']) if m: gendpoint_name = m.group(1) path = m.group(2) if not gendpoint_name in download_map: download_map[gendpoint_name] = [] # insert empty list of paths download_map[gendpoint_name].append(path) else: print 'The file is not accessible through Globus' else: return HttpResponseServerError("Error querying for files URL") # store map in session request.session[GLOBUS_DOWNLOAD_MAP] = download_map print 'Stored Globus Download Map=%s at session scope' % download_map # redirect after post (to display page) return HttpResponseRedirect( reverse('globus_transfer') )
def download(request): ''' View that initiates the Globus download workflow by collecting and optionally sub-selecting the GridFTP URLs to be downloaded. This view can be invoked via GET (link from search page, one dataset only) or POST (link from data cart page, multiple datasets at once). Example URL: http://localhost:8000/globus/download/ ?dataset=obs4MIPs.NASA-JPL.AIRS.mon.v1%[email protected],obs4MIPs.NASA-JPL.MLS.mon.v1%[email protected] &method=web ''' # retrieve request parameters datasets = getQueryDict(request).get('dataset', '').split(",") # optional query filter query = getQueryDict(request).get('query', None) # maximum number of files to query for, if specified limit = request.GET.get('limit', DOWNLOAD_LIMIT) # map of (data_node, list of GridFTP URLs to download) download_map = {} # loop over requested datasets for dataset in datasets: # query each index_node for all files belonging to that dataset (dataset_id, index_node) = str(dataset).split('@') params = [('type', "File"), ('dataset_id', dataset_id), ('offset', '0'), ('limit', limit), ('fields', 'url'), ("format", "application/solr+json")] if query is not None and len(query.strip()) > 0: params.append(('query', query)) # optional shard shard = request.GET.get('shard', '') if shard is not None and len(shard.strip()) > 0: params.append( ('shards', shard + "/solr")) # '&shards=localhost:8982/solr' else: params.append(("distrib", "false")) url = "http://" + index_node + "/esg-search/search?" + urllib.urlencode( params) print 'Searching for files at URL: %s' % url jobj = getJson(url) # parse response for GridFTP URls if jobj is not None: for doc in jobj['response']['docs']: access = {} for url in doc['url']: # example URLs: # 'http://esg-datanode.jpl.nasa.gov/thredds/fileServer/esg_dataroot/obs4MIPs/observations/atmos/husNobs/mon/grid/NASA-JPL/AIRS/v20110608/husNobs_AIRS_L3_RetStd-v5_200209-201105.nc|application/netcdf|HTTPServer' # 'http://esg-datanode.jpl.nasa.gov/thredds/dodsC/esg_dataroot/obs4MIPs/observations/atmos/husNobs/mon/grid/NASA-JPL/AIRS/v20110608/husNobs_AIRS_L3_RetStd-v5_200209-201105.nc.html|application/opendap-html|OPENDAP' # 'globus:8a3f3166-e9dc-11e5-97d6-22000b9da45e//esg_dataroot/obs4MIPs/observations/atmos/husNobs/mon/grid/NASA-JPL/AIRS/v20110608/husNobs_AIRS_L3_RetStd-v5_200209-201105.nc|Globus|Globus' # 'gsiftp://esg-datanode.jpl.nasa.gov:2811//esg_dataroot/obs4MIPs/observations/atmos/husNobs/mon/grid/NASA-JPL/AIRS/v20110608/husNobs_AIRS_L3_RetStd-v5_200209-201105.nc|application/gridftp|GridFTP' parts = url.split('|') access[parts[2].lower()] = parts[0] if 'globus' in access: m = re.match('globus:([^/]*)(.*)', access['globus']) if m: gendpoint_name = m.group(1) path = m.group(2) if not gendpoint_name in download_map: download_map[gendpoint_name] = [ ] # insert empty list of paths download_map[gendpoint_name].append(path) else: print 'The file is not accessible through Globus' else: return HttpResponseServerError("Error querying for files URL") # store map in session request.session[GLOBUS_DOWNLOAD_MAP] = download_map print 'Stored Globus Download Map=%s at session scope' % download_map # redirect after post (to display page) return HttpResponseRedirect(reverse('globus_transfer'))
def get_all_shared_user_info(user, includeCurrentSite=True): """Queries all nodes (including local node) for projects and groups the user belongs to. Returns two lists of dictionaries but does NOT update the local database. Example of JSON data retrieved from each node: { "users": { "https://hydra.fsl.noaa.gov/esgf-idp/openid/rootAdmin": { "home_site_domain": "cog-esgf.esrl.noaa.gov", "openid": "https://hydra.fsl.noaa.gov/esgf-idp/openid/rootAdmin", "datacart": { "size": 0 }, "home_site_name": "NOAA ESRL ESGF-CoG", "groups": { "HIWPP": {}, "NCPP DIP": { "admin": true, "publisher": true, "super": true, "user": true }, "NOAA ESRL": { "super": true } }, "projects": { "AlaskaSummerSchool": [ "admin", "user" ], "CF-Grids": [ "admin" ], "CFSS": [ "admin", "user" ], ..... """ # dictionary of information retrieved from each node, including current node userDict = {} # node --> dictionary of user information try: if user.profile.openid() is not None: openid = user.profile.openid() print 'Retrieving projects, groups for user with openid=%s' % openid # loop over remote (enabled) nodes, possibly add current node sites = list(getPeerSites()) if includeCurrentSite: sites = sites + [Site.objects.get_current()] for site in sites: url = "http://%s/share/user/?openid=%s" % (site.domain, openid) print 'Retrieving user projects and groups from URL=%s' % url jobj = getJson(url) if jobj is not None and openid in jobj['users']: userDict[site] = jobj['users'][openid] else: print 'Openid=%s not found at site=%s' % (openid, site) except UserProfile.DoesNotExist: pass # user profile not yet created # restructure information as list of (project object, user roles) and (group name, group roles) tuples projects = [] groups = [] for usite, udict in userDict.items(): if udict.get('projects', None): for pname, proles in udict["projects"].items(): try: proj = Project.objects.get(short_name__iexact=pname) projects.append((proj, proles)) except ObjectDoesNotExist: pass if udict.get('groups', None): for gname, gdict in udict["groups"].items(): groles = [] for grole, approved in gdict.items(): if approved: groles.append(grole) groups.append((gname, groles)) # sort by project short name return (projects, groups)
def get_all_shared_user_info(user, includeCurrentSite=True): """Queries all nodes (including local node) for projects and groups the user belongs to. Returns two lists of dictionaries but does NOT update the local database. Example of JSON data retrieved from each node: { "users": { "https://hydra.fsl.noaa.gov/esgf-idp/openid/rootAdmin": { "home_site_domain": "cog-esgf.esrl.noaa.gov", "openid": "https://hydra.fsl.noaa.gov/esgf-idp/openid/rootAdmin", "datacart": { "size": 0 }, "home_site_name": "NOAA ESRL ESGF-CoG", "groups": { "HIWPP": {}, "NCPP DIP": { "admin": true, "publisher": true, "super": true, "user": true }, "NOAA ESRL": { "super": true } }, "projects": { "AlaskaSummerSchool": [ "admin", "user" ], "CF-Grids": [ "admin" ], "CFSS": [ "admin", "user" ], ..... """ # dictionary of information retrieved from each node, including current node userDict = {} # node --> dictionary of user information try: if user.profile.openid() is not None: openid = user.profile.openid() print 'Retrieving projects, groups for user with openid=%s' % openid # loop over remote (enabled) nodes, possibly add current node sites = list(getPeerSites()) if includeCurrentSite: sites = sites + [Site.objects.get_current()] for site in sites: url = "http://%s/share/user/?openid=%s" % (site.domain, openid) print 'Retrieving user projects and groups from URL=%s' % url jobj = getJson(url) if jobj is not None and openid in jobj['users']: userDict[site] = jobj['users'][openid] else: print 'Openid=%s not found at site=%s' % (openid, site) except UserProfile.DoesNotExist: pass # user profile not yet created # restructure information as list of (project object, user roles) and (group name, group roles) tuples projects = [] groups = [] for usite, udict in userDict.items(): if udict.get('projects', None): for pname, proles in udict["projects"].items(): try: proj = Project.objects.get(short_name__iexact=pname) projects.append((proj, proles)) except ObjectDoesNotExist: pass if udict.get('groups', None): for gname, gdict in udict["groups"].items(): groles = [] for grole, approved in gdict.items(): if approved: groles.append(grole) groups.append((gname,groles)) # sort by project short name return (projects, groups)