Пример #1
0
 def __init__(self, property_file_name):
     self.props = PropHelper(
         property_file_name,
         required_props=[
             'neo4j.server', 'neo4j.username', 'neo4j.password', 'db.host',
             'db.name', 'db.username', 'db.password', 'user.mapping',
             'client.id', 'client.secret', 'old.ingest.upload.dir',
             'new.ingest.upload.dir', 'uuid.api.url', 'user.nexus.token'
         ])
     self.graph = Graph(self.props.get('neo4j.server'),
                        auth=(self.props.get('neo4j.username'),
                              self.props.get('neo4j.password')))
     self.uuid_wrker = UUIDWorker(self.props.get('client.id'),
                                  self.props.get('client.secret'),
                                  self.props.get('db.host'),
                                  self.props.get('db.name'),
                                  self.props.get('db.username'),
                                  self.props.get('db.password'))
     self.old_ingest_upload_dir = file_helper.ensureTrailingSlash(
         self.props.get('old.ingest.upload.dir'))
     self.new_ingest_upload_dir = file_helper.ensureTrailingSlash(
         self.props.get('new.ingest.upload.dir'))
     self.uuid_api_url = file_helper.ensureTrailingSlashURL(
         self.props.get('uuid.api.url')) + "hmuuid"
     self.user_token = self.props.get('user.nexus.token')
Пример #2
0
 def __init__(self, property_file_name):
     self.dataset_info = None
     self.dataset_info_tsv_path = None
     self.prop_file_name = property_file_name
     if not os.path.isfile(property_file_name):
         raise Exception("property file does not exist: " + property_file_name)
     #Open the properties file
     propMgr = Property()
     self.props = propMgr.load_property_files(property_file_name)
     self.data_root_path = file_helper.ensureTrailingSlash(self.get_prop('root.path.to.data'))
     self.ingest_api_url = file_helper.ensureTrailingSlashURL(self.get_prop("ingest.api.url"))
     self.nexus_token = self.get_prop("nexus.token").strip()
     self.entity_api_url = file_helper.ensureTrailingSlashURL(self.get_prop("entity.api.url"))
     self.uuid_api_url = file_helper.ensureTrailingSlashURL(self.get_prop("uuid.api.url"))
     self.dataset_info_tsv_path = self.get_prop("vand.dataset.info.tsv")
     if string_helper.isBlank(self.dataset_info_tsv_path) or not os.path.isfile(self.dataset_info_tsv_path):
         raise Exception("dataset info file does not exist:" + self.dataset_info_tsv_path)
     if not self.dataset_info_tsv_path.endswith(".tsv"):
         raise Exception("dataset info file must be of type .tsv : " + self.dataset_info_tsv_path)
     self.dataset_info = []
     with open(self.dataset_info_tsv_path, newline='') as tsvfile:
         reader = csv.DictReader(tsvfile, delimiter='\t')
         for row in reader:
             info_row = {}
             for key in row.keys():
                 info_row[key] = row[key]
             self.dataset_info.append(info_row)
             
     self.collections = {}
     self.meta_info = None
Пример #3
0
class ProvConst(object):
    PROV_ENTITY_TYPE = 'prov:Entity'
    PROV_ACTIVITY_TYPE = 'prov:Activity'
    PROV_AGENT_TYPE = 'prov:Agent'
    PROV_COLLECTION_TYPE = 'prov:Collection'
    PROV_ORGANIZATION_TYPE = 'prov:Organization'
    PROV_PERSON_TYPE = 'prov:Person'
    PROV_LABEL_ATTRIBUTE = 'prov:label'
    PROV_TYPE_ATTRIBUTE = 'prov:type'
    PROV_GENERATED_TIME_ATTRIBUTE = 'prov:generatedAtTime'

    HUBMAP_DOI_ATTRIBUTE = 'hubmap:doi'  #the doi concept here might be a good alternative: https://sparontologies.github.io/datacite/current/datacite.html
    HUBMAP_DISPLAY_DOI_ATTRIBUTE = 'hubmap:displayDOI'
    HUBMAP_SPECIMEN_TYPE_ATTRIBUTE = 'hubmap:specimenType'
    HUBMAP_DISPLAY_IDENTIFIER_ATTRIBUTE = 'hubmap:displayIdentifier'
    HUBMAP_UUID_ATTRIBUTE = 'hubmap:uuid'
    #HUBMAP_SOURCE_UUID_ATTRIBUTE = 'hubmap:sourceUUID'
    HUBMAP_METADATA_ATTRIBUTE = 'hubmap:metadata'
    HUBMAP_MODIFIED_TIMESTAMP = 'hubmap:modifiedTimestamp'
    HUBMAP_PROV_GROUP_NAME = 'hubmap:groupName'
    HUBMAP_PROV_GROUP_UUID = 'hubmap:groupUUID'
    HUBMAP_PROV_USER_DISPLAY_NAME = 'hubmap:userDisplayName'
    HUBMAP_PROV_USER_EMAIL = 'hubmap:userEmail'
    HUBMAP_PROV_USER_UUID = 'hubmap:userUUID'
    groupJsonFilename = file_helper.ensureTrailingSlash(
        os.path.dirname(
            os.path.realpath(__file__))) + 'hubmap-globus-groups.json'
    def __init__(self, property_file_name):
        self.props = IngestProps(property_file_name, required_props = ['nexus.token', 'ingest.api.url', 'search.api.url', 'uuid.api.url', 'dataset.uuid.file', 'globus.app.client.id', 'globus.app.client.secret'])
        if len(sys.argv) >= 2:
            self.id_file = sys.argv[1]
        else:
            self.id_file = self.props.get('dataset.uuid.file')
            if string_helper.isBlank(self.id_file):
                raise ErrorMessage("ERROR: A list of dataset uuids must be specified in " + self.prop_file_name + " as as property 'dataset.uuid.file' or as the first argument on the command line")
        if not os.path.isfile(self.id_file):
            raise ErrorMessage("ERROR: Input file " + self.id_file + " does not exist.")

        base_file_name = os.path.splitext(os.path.basename(self.id_file))[0] 
        dir_path = file_helper.ensureTrailingSlash(os.path.dirname(self.id_file))
        
        
        #set up log files, first for errors, second to record all actions
        cur_time = time.strftime("%d-%m-%Y-%H-%M-%S")
        error_log_filename = dir_path + base_file_name + "-errors." + cur_time + ".log"
        self.error_logger = logging.getLogger('publish.datasets.err')
        self.error_logger.setLevel(logging.INFO)
        error_logFH = logging.FileHandler(error_log_filename)
        self.error_logger.addHandler(error_logFH)
        
        recording_log_filename = dir_path + base_file_name + "-rcding." + cur_time + ".log" 
        self.recording_logger = logging.getLogger('publish.datasets.rcd')
        self.recording_logger.setLevel(logging.INFO)
        recording_logFH = logging.FileHandler(recording_log_filename)
        self.recording_logger.addHandler(recording_logFH)
        
        #initialize variables, get required values from property file
        self.dataset_info = None
        self.dataset_info_tsv_path = None

        self.token = self.props.get('nexus.token')
        self.search_api_url = file_helper.ensureTrailingSlashURL(self.props.get('search.api.url'))
        self.ingest_api_url = file_helper.ensureTrailingSlashURL(self.props.get('ingest.api.url'))
        
        #initialize the auth helper and use it to get the
        #user information for the person running the script
        auth_helper = AuthHelper.create(self.props.get('globus.app.client.id'), self.props.get('globus.app.client.secret'))
        user_info = auth_helper.getUserInfo(self.token, getGroups = True)        
        if isinstance(user_info, Response):
            raise ErrorMessage("error validating auth token: " + user_info.get_data(as_text=True))
        
        id_f = open(self.id_file, 'r') 
        id_lines = id_f.readlines()
        id_f.close()
        
        self.ds_ids = []
        for id_line in id_lines:
            if not string_helper.isBlank(id_line):
                tl = id_line.strip()
                if not tl.startswith('#'):
                    self.ds_ids.append(tl)

        self.donors_to_reindex = []
        self.set_acl_commands = []
    def remove_file(self, file_dir, file_uuid, files_info_list):
        for file_info in files_info_list:
            if file_info['file_uuid'] == file_uuid:
                # Remove from file system
                file_dir = file_helper.ensureTrailingSlash(
                    file_dir) + file_info['file_uuid']
                path_to_file = file_dir + os.sep + file_info['filename']
                os.remove(path_to_file)
                os.rmdir(file_dir)

                # Remove from the list
                files_info_list.remove(file_info)
                break

        return files_info_list
Пример #6
0
 def resolve_to_uuid(self, identifier):
     url = file_helper.ensureTrailingSlash(
         self.uuid_url) + "hmuuid/" + identifier
     headers = {'Authorization': 'Bearer ' + self.token}
     resp = requests.get(url, headers=headers)
     status_code = resp.status_code
     if status_code < 200 or status_code >= 300:
         return None
     id_info = resp.json()
     if not isinstance(id_info, list) or len(id_info) == 0:
         return None
     else:
         vals = id_info[0]
     if 'hmuuid' in vals:
         return vals['hmuuid']
     else:
         return None
Пример #7
0
 def new_uuid(self, entity_type, generate_doi=False):
     url = file_helper.ensureTrailingSlash(self.uuid_url) + "hmuuid"
     headers = {
         'Authorization': 'Bearer ' + self.token,
         'Content-Type': 'application/json'
     }
     gen_doi = "false"
     if generate_doi: gen_doi = "true"
     resp = requests.post(url,
                          json={
                              'entityType': entity_type,
                              'generateDOI': gen_doi
                          },
                          headers=headers)
     status_code = resp.status_code
     if status_code < 200 or status_code >= 300:
         raise ErrorMessage("Unable to generate UUID " +
                            str(resp.status_code) + ":" + str(resp.text))
     vals = resp.json()
     return vals[0]
Пример #8
0
def upload_validate(upload_uuid):
    ingest_helper = IngestFileHelper(app.config)
    url = commons_file_helper.ensureTrailingSlashURL(
        app.config['ENTITY_WEBSERVICE_URL']) + 'entities/' + upload_uuid
    auth_headers = {
        'Authorization': request.headers["AUTHORIZATION"],
        'X-Hubmap-Application': 'ingest-api'
    }
    resp = requests.get(url, headers=auth_headers)
    if resp.status_code >= 300:
        return Response(resp.text, resp.status_code)
    upload = resp.json()
    prev_status = upload['status']
    upload_path = ingest_helper.get_upload_directory_absolute_path(
        None, upload['group_uuid'], upload_uuid)
    if not os.path.exists(upload_path):
        return Response(f"upload directory does not exist: {upload_path}", 500)
    mock_cfg_path = commons_file_helper.ensureTrailingSlash(
        upload_path) + "mock_run.json"
    if not os.path.exists(mock_cfg_path):
        return Response(
            f"mock configuration json file does not exist: {mock_cfg_path}")
    ''' Example mock_run.json
    {
      "mock_processing_time_seconds": 20,
      "new_status_message": "new message",
      "new_status": "Invalid"
    }
    '''
    #read the mock_run json file into a dict
    with open(mock_cfg_path) as json_file:
        mock_run = json.load(json_file)

    x = threading.Thread(
        target=__apply_mock_run,
        args=[mock_run, upload_path, upload_uuid, auth_headers, prev_status])
    x.start()

    return Response("Accepted", 202)
 def __init__(self, upload_temp_dir, upload_dir, uuid_api_url):
     self.upload_temp_dir = file_helper.ensureTrailingSlash(upload_temp_dir)
     self.upload_dir = file_helper.ensureTrailingSlash(upload_dir)
     self.uuid_api_url = uuid_api_url
Пример #10
0
class AuthCache:
    cache = {}
    userLock = threading.RLock()
    groupLock = threading.RLock()
    groupIdByName = None
    roleIdByName = None
    groupsById = {}
    rolesById = {}
    groupLastRefreshed = None
    groupJsonFilename = file_helper.ensureTrailingSlash(os.path.dirname(os.path.realpath(__file__))) + 'hubmap-globus-groups.json'
    globusGroups = None
    roleJsonFilename = file_helper.ensureTrailingSlash(os.path.dirname(os.path.realpath(__file__))) + 'hubmap-globus-roles.json'
    procSecret = None
    admin_groups = None
    processUserFilename = file_helper.ensureTrailingSlash(os.path.dirname(os.path.realpath(__file__))) + 'hubmap-process-user.json'
    processUser = None
         
    @staticmethod
    def setProcessSecret(secret):
        if AuthCache.procSecret is None:
            AuthCache.procSecret = secret

    @staticmethod
    def setGlobusGroups(globusJson):
        AuthCache.globusGroups = identifyGroups(globusJson)

    @staticmethod
    def getHMGroups():
        with AuthCache.groupLock:
            now = datetime.datetime.now()
            diff = None
            if AuthCache.groupLastRefreshed is not None:
                diff = now - AuthCache.groupLastRefreshed
            if diff is None or diff.days > 0 or diff.seconds > TOKEN_EXPIRATION:
                if AuthCache.globusGroups is not None:
                    return AuthCache.globusGroups
                else:
                    with open(AuthCache.groupJsonFilename) as jsFile:
                        groups = json.load(jsFile)
                        return identifyGroups(groups)


    @staticmethod
    def getHMGroupsById():
        if len(AuthCache.groupsById) == 0:
            AuthCache.getHMGroups()
        return(AuthCache.groupsById)
    
    @staticmethod
    def getHMRoles():
        with AuthCache.groupLock:
            now = datetime.datetime.now()
            diff = None
            if AuthCache.groupLastRefreshed is not None:
                diff = now - AuthCache.groupLastRefreshed
            if diff is None or diff.days > 0 or diff.seconds > TOKEN_EXPIRATION:
                roleIdByName = {}                    
                #rolesById = {}                    
                with open(AuthCache.roleJsonFilename) as jsFile:
                    roles = json.load(jsFile)
                    for role in roles:
                        if 'name' in role and 'uuid' in role and 'displayname' in role and not string_helper.isBlank(role['name']) and not string_helper.isBlank(role['uuid']) and not string_helper.isBlank(role['displayname']):
                            role_obj = {'name' : role['name'].lower().strip(), 'uuid' : role['uuid'].lower().strip(),
                                         'displayname' : role['displayname']}
                            roleIdByName[role['name'].lower().strip()] = role_obj
                            AuthCache.rolesById[role['uuid']] = role_obj
            return roleIdByName

    @staticmethod
    def getUserWithGroups(appKey, token):
        return AuthCache.getUserInfo(appKey, token, getGroups=True)
    
    @staticmethod
    def getUserInfo(appKey, token, getGroups=False):
        with AuthCache.userLock:
            if token in AuthCache.cache:
                now = datetime.datetime.now()
                diff = now - AuthCache.cache[token]['timestamp']
                if diff.days > 0 or diff.seconds > TOKEN_EXPIRATION:  #15 minutes
                    AuthCache.cache[token] = AuthCache.__authRecord(appKey, token, getGroups)
            else:
                AuthCache.cache[token] = AuthCache.__authRecord(appKey, token, getGroups)
            
            if isinstance(AuthCache.cache[token]['info'], Response):
                return AuthCache.cache[token]['info']
            
            if getGroups and "hmgroupids" not in AuthCache.cache[token]['info']:
                AuthCache.cache[token] = AuthCache.__authRecord(appKey, token, getGroups)

            return AuthCache.cache[token]['info']

    @staticmethod
    def __authRecord(appKey, token, getGroups=False):
        rVal = {}
        now = datetime.datetime.now()
        info = AuthCache.__userInfo(appKey, token, getGroups)
        rVal['info'] = info
        rVal['timestamp'] = now
        
        if isinstance(rVal['info'], Response):        
            rVal['valid'] = False
        elif not 'active' in info or info['active'] is None:
            rVal['valid'] = False
        else:
            rVal['valid'] = info['active']

        if rVal['valid'] and 'scope' in info and not string_helper.isBlank(info['scope']):
            info['hmscopes'] = info['scope'].lower().strip().split()
        
        return rVal

    @staticmethod
    def __get_admin_groups():
        if AuthCache.admin_groups is None:
            #start with hubmap-read group
            admin_grps = ["5777527e-ec11-11e8-ab41-0af86edb4424", data_admin_group_uuid]
            all_groups = AuthCache.getHMGroups()
            #add all data provider groups
            for grp_name in all_groups.keys():
                grp = all_groups[grp_name]
                if 'data_provider' in grp and grp['data_provider']:
                    admin_grps.append(grp)
            AuthCache.admin_groups = admin_grps
        return AuthCache.admin_groups

    #try to get user's group info via both deprecated Nexus token and new Groups API token
    #@staticmethod
    #def __userGroupsComb(token):
    #    groups = AuthCache.__userGroupsNexus(token)
    #    if isinstance(groups, Response):
    #        #if the nexus call failed try the Groups API
    #        groups = AuthCache.__get_user_groups_via_groups_api(token)
    #    return groups


    #@staticmethod
    #def __userGroupsNexus(token):
    #    if token == AuthCache.procSecret:
    #        return AuthCache.__get_admin_groups()

    #    getHeaders = {
    #        'Content-Type': 'application/json',
    #        'Accept': 'application/json',
    #        'Authorization': 'Bearer ' + token
    #    }
    #    url='https://nexus.api.globusonline.org/groups?fields=id,name,description,group_type,has_subgroups,identity_set_properties&for_all_identities=false&include_identaaaaay_set_properties=false&my_statuses=active'
    #    response = requests.get(url, headers=getHeaders)
    #    if response.status_code != 200:
    #        return Response("Unable to get user groups\n"+response.text, 500)
    #    try:
    #        jsonResp = response.json()
    #        ids = []
    #        for value in jsonResp:
    #            if 'id' in value:
    #                ids.append(value['id'].lower().strip())
    #        return ids
    #    except Exception as e:
    #        return Response('Unable to parse json response while gathering user groups\n' + str(e), 500)

    @staticmethod
    def __get_user_groups_via_groups_api(token):
        if token == AuthCache.procSecret:
            return AuthCache.__get_admin_groups()
        #GET /v2/groups/my_groups
        url = 'https://groups.api.globus.org/v2/groups/my_groups'
        headers = { 'Authorization' : 'Bearer ' + token }
        response = requests.get(url, headers=headers)
        if response.status_code != 200:
            return Response("Unable to get user groups\n"+response.text, 500)
        try:
            jsonResp = response.json()
#            print(json.dumps(jsonResp, indent=4))
            ids = []
#            if 'my_memberships' in jsonResp and isinstance(jsonResp['my_memberships'], list):
            for grp in jsonResp:
                if 'id' in grp:
                    ids.append(grp['id'])
                                
            return ids
        except Exception as e:
            return Response("unable to get groups from Groups api while gathering user groups\n" + str(e), 500)

    @staticmethod
    def __userInfo(applicationKey, authToken, getGroups=False):
        if authToken == AuthCache.procSecret:
            if AuthCache.processUser is None:
                with open(AuthCache.processUserFilename) as jsFile:
                    AuthCache.processUser = json.load(jsFile)
            return AuthCache.processUser

        postHeaders = {
            'Content-Type': 'application/x-www-form-urlencoded',
            'Authorization': 'Basic ' + applicationKey
            }
        tdata = 'token=' + authToken
        url='https://auth.globus.org/v2/oauth2/token/introspect'
        response = requests.post(url, headers=postHeaders, data=tdata)
        if response.status_code != 200:
            return Response("Unable to introspect user from token", 500)
        try:
            jsonResp = response.json()
            if 'active' in jsonResp and jsonResp['active']:
                if getGroups:
                    if len(AuthCache.groupsById) == 0:
                        AuthCache.getHMGroups()
                    if len(AuthCache.rolesById) == 0:
                        AuthCache.getHMRoles()
                    #groups = AuthCache.__userGroupsComb(authToken)
                    groups = AuthCache.__get_user_groups_via_groups_api(authToken)

                    if isinstance(groups, Response):
                        return groups
                    grp_list = []
                    role_list = []
                    for group_uuid in groups:
                        #if group_uuid in AuthCache.groupsById:
                        #    grp_list.append(group_uuid)
                        #elif group_uuid in AuthCache.rolesById:
                        #    role_list.append(group_uuid)
                        if not group_uuid in grp_list:
                            grp_list.append(group_uuid)
                    jsonResp['hmgroupids'] = grp_list
                    jsonResp['group_membership_ids'] = grp_list
                    jsonResp['hmroleids'] = role_list
                return jsonResp
            else:
                return Response("Non-active login", 401)
        except Exception as e:
            print("ERROR!: " + str(e))            
            return Response("Unable to parse json response on user introspect", 500)
        if not 'active' in jsonResp or not jsonResp['active']:
            return Response("Login session not active.", 401)