def delete_work_for_orcid(orcid): oauth_token = TestOrcidPusherBase._oauth_token(orcid) client = OrcidClient(oauth_token, orcid) all_work = client.get_all_works_summary() for work in all_work.get("group", []): putcode = work["work-summary"][0]["put-code"] client.delete_work(putcode)
def test_get_putcodes_for_source_source_client_id_none(self): orcid = '0000-0002-4490-1930' client = OrcidClient('mytoken', orcid) response = client.get_all_works_summary() response.raise_for_result() putcodes = list( response.get_putcodes_for_source_iter('0000-0001-8607-8906')) assert len(putcodes) == 90
class OrcidPutcodeGetter(object): def __init__(self, orcid, oauth_token): self.orcid = orcid self.oauth_token = oauth_token self.client = OrcidClient(self.oauth_token, self.orcid) self.source_client_id_path = current_app.config[ 'ORCID_APP_CREDENTIALS']['consumer_key'] def get_all_inspire_putcodes(self): """ Get all the Inspire putcodes for the given ORCID. """ putcodes = self._get_all_putcodes() if not putcodes: return # Filter out putcodes that do not belong to Inspire. for putcode, url in self._get_urls_for_putcodes(putcodes): if INSPIRE_WORK_URL_REGEX.match(url): yield putcode, url def _get_all_putcodes(self): response = self.client.get_all_works_summary() utils.log_service_response(logger, response, 'in OrcidPutcodeGetter works summary') try: response.raise_for_result() except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) return list( response.get_putcodes_for_source(self.source_client_id_path)) def _get_urls_for_putcodes(self, putcodes): # The call get_bulk_works_details_iter() can be very expensive for an # author with many works (if each work also has many *contributors*). # Fi. for an ATLAS author with ~750 works, 8 calls would be performed # with a total data transfer > 0.5 Gb. chained = [] for response in self.client.get_bulk_works_details_iter(putcodes): utils.log_service_response(logger, response, 'in OrcidPutcodeGetter works details') try: response.raise_for_result() except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) chained = itertools.chain(chained, response.get_putcodes_and_urls()) return chained
def __init__(self, orcid, recid, oauth_token, do_fail_if_duplicated_identifier=False, record_db_version=None): self.orcid = orcid self.recid = recid self.oauth_token = oauth_token self.do_fail_if_duplicated_identifier = do_fail_if_duplicated_identifier self.record_db_version = record_db_version self.inspire_record = self._get_inspire_record() self.cache = OrcidCache(orcid, recid) self.lock_name = 'orcid:{}'.format(self.orcid) self.client = OrcidClient(self.oauth_token, self.orcid) self.converter = None
def __init__(self, orcid, recid, oauth_token): self.orcid = orcid self.recid = recid self.oauth_token = oauth_token try: self.inspire_record = get_db_record('lit', recid) except RecordGetterError as exc: raise exceptions.RecordNotFoundException( 'recid={} not found for pid_type=lit'.format(self.recid), from_exc=exc) self.cache = OrcidCache(orcid, recid) self.lock_name = 'orcid:{}'.format(self.orcid) self.client = OrcidClient(self.oauth_token, self.orcid) self.xml_element = None
def __init__( self, orcid, recid, oauth_token, pushing_duplicated_identifier=False, record_db_version=None, ): self.orcid = orcid self.recid = str(recid) self.oauth_token = oauth_token self.pushing_duplicated_identifier = pushing_duplicated_identifier self.record_db_version = record_db_version self.inspire_record = self._get_inspire_record() self.cache = OrcidCache(orcid, recid) self.lock_name = "orcid:{}".format(self.orcid) self.client = OrcidClient(self.oauth_token, self.orcid) self.converter = None self.cached_author_putcodes = {}
def get_putcode_for_work(orcid, token, recid): client = OrcidClient(token, orcid) response = client.get_all_works_summary() response.raise_for_result() source_client_id_path = config.get('orcid-api', 'consumer_key') putcodes = list( response.get_putcodes_for_source_iter(source_client_id_path)) if not putcodes: return None # TODO: this has to be simplified when we push recids as external # identifier (thus just the get_all_works_summary() call is required to # match recids with putcodes). for response in client.get_bulk_works_details_iter(putcodes): response.raise_for_result() for putcode, url in response.get_putcodes_and_urls_iter(): if url.endswith('/{}'.format(recid)): return putcode
class OrcidPutcodeGetter(object): def __init__(self, orcid, oauth_token): self.orcid = orcid self.oauth_token = oauth_token self.client = OrcidClient(self.oauth_token, self.orcid) self.source_client_id_path = current_app.config[ "ORCID_APP_CREDENTIALS"]["consumer_key"] def get_all_inspire_putcodes_and_recids_iter(self): """ Query ORCID api and get all the Inspire putcodes for the given ORCID. """ summary_response = self._get_all_works_summary() # `putcodes_recids` is a list like: [('43326850', 20), ('43255490', None)] putcodes_recids = list( summary_response.get_putcodes_and_recids_for_source_iter( self.source_client_id_path)) putcodes_with_recids = [x for x in putcodes_recids if x[1]] putcodes_without_recids = [x[0] for x in putcodes_recids if not x[1]] for putcode, recid in putcodes_with_recids: yield putcode, recid if not putcodes_without_recids: return for putcode, recid in self._get_putcodes_and_recids_iter( putcodes_without_recids): yield putcode, recid def _get_all_works_summary(self): """ Query ORCID api and get all the putcodes with their embedded recids for the given ORCID. An embedded recid is a recid written as external-identifier. """ response = self.client.get_all_works_summary() LOGGER.info("Get ORCID work summary", response=response, orcid=self.orcid) try: response.raise_for_result() except ( orcid_client_exceptions.TokenInvalidException, orcid_client_exceptions.TokenMismatchException, orcid_client_exceptions.TokenWithWrongPermissionException, ): LOGGER.info( "OrcidPutcodeGetter: deleting Orcid push access", token=self.oauth_token, orcid=self.orcid, ) push_access_tokens.delete_access_token(self.oauth_token, self.orcid) raise exceptions.TokenInvalidDeletedException except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) return response def _get_putcodes_and_recids_iter(self, putcodes): for putcode, url in self._get_urls_for_putcodes_iter(putcodes): # Filter out putcodes that do not belong to Inspire. if INSPIRE_WORK_URL_REGEX.match(url): recid = PidStoreBase.get_pid_from_record_uri(url)[1] if not recid: LOGGER.error( "OrcidPutcodeGetter: cannot parse recid from url", url=url, orcid=self.orcid, ) continue yield putcode, recid def _get_urls_for_putcodes_iter(self, putcodes): # The call `get_bulk_works_details_iter()` can be expensive for an # author with many works (if each work also has many *contributors*). # Fi. for an ATLAS author with ~750 works (each of them with many # authors), 8 calls would be performed with a total data transfer > 0.5 Gb. chained = [] for response in self.client.get_bulk_works_details_iter(putcodes): # Note: this log can be large. Consider removing it when this part # is considered mature. LOGGER.info("ORCID work details", response=response, orcid=self.orcid) try: response.raise_for_result() except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) chained = itertools.chain(chained, response.get_putcodes_and_urls_iter()) return chained def get_putcodes_and_recids_by_identifiers_iter(self, identifiers): """ Yield putcode and recid for each work matched by the external identifiers. Note: external identifiers of type 'other-id' are skipped. Args: identifiers (List[inspirehep.orcid.converter.ExternalIdentifier]): list af all external identifiers added after the xml conversion. """ summary_response = self._get_all_works_summary() for ( putcode, ids, ) in summary_response.get_putcodes_and_external_identifiers_iter(): # ids is a list like: # [ # {'external-id-relationship': 'SELF', # 'external-id-type': 'other-id', # 'external-id-url': {'value': 'http://inspireheptest.cern.ch/record/20'}, # 'external-id-value': '20' # },... # ] # Get the recid. recid = self._get_recid_for_work(ids, str(putcode)) for identifier in ids: id_type = identifier.get("external-id-type") # We are interested only in doi, arxiv, isbns. if not id_type or id_type.lower() == "other-id": continue id_value = identifier.get("external-id-value") if not id_value: continue if ExternalIdentifier(id_type, id_value) in identifiers: yield putcode, recid def _get_recid_for_work(self, external_identifiers, putcode): """ Get the recid for a work given its external identifiers and putcode. The recid might be in the external identifiers or a get_work_details() might be called to find it. Args: external_identifier (List[Dict]): a list like: [ {'external-id-relationship': 'SELF', 'external-id-type': 'other-id', 'external-id-url': {'value': 'http://inspireheptest.cern.ch/record/20'}, 'external-id-value': '20' },... ] putcode: putcode of the given work. Returns: the Inspire recid mathcing the work. """ for identifier in external_identifiers: id_type = identifier.get("external-id-type") if not id_type or id_type.lower() != "other-id": continue id_url = inspire_service_orcid_utils.smartget( identifier, "external-id-url.value", "") if not re.match(r".*inspire.*", id_url, re.I): continue id_value = identifier.get("external-id-value") if not id_value: continue # recid found. return id_value # The recid was not found in the external_identifiers. # Thus we call get_bulk_works_details_iter(). putcodes_recid = list(self._get_putcodes_and_recids_iter([putcode])) if putcodes_recid: return putcodes_recid[0][1]
class OrcidPusher(object): def __init__(self, orcid, recid, oauth_token): self.orcid = orcid self.recid = recid self.oauth_token = oauth_token try: self.inspire_record = get_db_record('lit', recid) except RecordGetterError as exc: raise exceptions.RecordNotFoundException( 'recid={} not found for pid_type=lit'.format(self.recid), from_exc=exc) self.cache = OrcidCache(orcid, recid) self.lock_name = 'orcid:{}'.format(self.orcid) self.client = OrcidClient(self.oauth_token, self.orcid) self.xml_element = None @time_execution def push(self): putcode = self.cache.read_work_putcode() if not self.cache.has_work_content_changed(self.inspire_record): logger.info( 'OrcidPusher cache hit for recid={} and orcid={}'.format( self.recid, self.orcid)) return putcode logger.info('OrcidPusher cache miss for recid={} and orcid={}'.format( self.recid, self.orcid)) self.xml_element = OrcidConverter( record=self.inspire_record, url_pattern=current_app.config['LEGACY_RECORD_URL_PATTERN'], put_code=putcode, ).get_xml(do_add_bibtex_citation=True) try: putcode = self._post_or_put_work(putcode) except orcid_client_exceptions.WorkAlreadyExistentException: # We POSTed the record as new work, but it failed because the work # already exists (identified by the external identifiers). # This means we do not have the putcode, thus we cache all # author's putcodes and PUT the work again. putcode = self._cache_all_author_putcodes() self._post_or_put_work(putcode) self.cache.write_work_putcode(putcode, self.inspire_record) return putcode @time_execution def _post_or_put_work(self, putcode=None): # Note: if putcode is None, then it's a POST (it means the work is new). # Otherwise a PUT (it means the work already exists and it has the given # putcode). # ORCID API allows 1 POST/PUT only for the same orcid at the same time. # Using `distributed_lock` to achieve this. with distributed_lock(self.lock_name, blocking=True): if putcode: response = self.client.put_updated_work( self.xml_element, putcode) else: response = self.client.post_new_work(self.xml_element) utils.log_service_response( logger, response, 'in OrcidPusher for recid={}'.format(self.recid)) try: response.raise_for_result() putcode = response['putcode'] except orcid_client_exceptions.WorkAlreadyExistentException: # Only raisable by a POST. raise except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) return putcode @time_execution def _cache_all_author_putcodes(self): logger.info( 'New OrcidPusher cache all author putcodes for orcid={}'.format( self.orcid)) putcode_getter = OrcidPutcodeGetter(self.orcid, self.oauth_token) putcodes_urls = list(putcode_getter.get_all_inspire_putcodes( )) # Can raise exceptions.InputDataInvalidException. putcode = None for fetched_putcode, fetched_url in putcodes_urls: fetched_recid = get_pid_from_record_uri(fetched_url)[1] if not fetched_recid: logger.error( 'OrcidPusher cache all author putcodes: cannot parse recid from url={} for orcid={}' .format(fetched_url, self.orcid)) continue if fetched_recid == str(self.recid): putcode = fetched_putcode cache = OrcidCache(self.orcid, fetched_recid) cache.write_work_putcode(fetched_putcode) if not putcode: raise exceptions.PutcodeNotFoundInOrcidException( 'No putcode was found in ORCID API for orcid={} and recid={}.' ' And the POST has previously failed for the same recid because' ' the work had already existed'.format(self.orcid, self.recid)) # Ensure the putcode is actually in cache. # Note: this step is not really necessary and it can be skipped, but # at this moment it helps isolate a potential issue. if not self.cache.read_work_putcode(): raise exceptions.PutcodeNotFoundInCacheAfterCachingAllPutcodes( 'No putcode={} found in cache for recid={} after having' ' cached all author putcodes for orcid={}'.format( self.putcode, self.recid, self.orcid)) return putcode
class OrcidPusher(object): def __init__( self, orcid, recid, oauth_token, do_fail_if_duplicated_identifier=False, record_db_version=None, ): self.orcid = orcid self.recid = recid self.oauth_token = oauth_token self.do_fail_if_duplicated_identifier = do_fail_if_duplicated_identifier self.record_db_version = record_db_version self.inspire_record = self._get_inspire_record() self.cache = OrcidCache(orcid, recid) self.lock_name = "orcid:{}".format(self.orcid) self.client = OrcidClient(self.oauth_token, self.orcid) self.converter = None @time_execution def _get_inspire_record(self): try: inspire_record = LiteratureRecord.get_record_by_pid_value(self.recid) except PIDDoesNotExistError as exc: raise exceptions.RecordNotFoundException( "recid={} not found for pid_type=lit".format(self.recid), from_exc=exc ) # If the record_db_version was given, then ensure we are about to push # the right record version. # This check is related to the fact the orcid push at this moment is # triggered by the signal after_record_update (which happens after a # InspireRecord.commit()). This is not the actual commit to the db which # might happen at a later stage or not at all. # Note that connecting to the proper SQLAlchemy signal would also # have issues: https://github.com/mitsuhiko/flask-sqlalchemy/issues/645 if ( self.record_db_version and inspire_record.model.version_id < self.record_db_version ): raise exceptions.StaleRecordDBVersionException( "Requested push for db version={}, but actual record db" " version={}".format( self.record_db_version, inspire_record.model.version_id ) ) return inspire_record @property def _do_force_cache_miss(self): """ Hook to force a cache miss. This can be leveraged in feature tests. """ for note in self.inspire_record.get("_private_notes", []): if note.get("value") == "orcid-push-force-cache-miss": LOGGER.debug( "OrcidPusher force cache miss", recid=self.recid, orcid=self.orcid ) return True return False @property def _is_record_deleted(self): # Hook to force a delete. This can be leveraged in feature tests. for note in self.inspire_record.get("_private_notes", []): if note.get("value") == "orcid-push-force-delete": LOGGER.debug( "OrcidPusher force delete", recid=self.recid, orcid=self.orcid ) return True return self.inspire_record.get("deleted", False) @time_execution def push(self): putcode = None if not self._do_force_cache_miss: putcode = self.cache.read_work_putcode() if not self._is_record_deleted and not self.cache.has_work_content_changed( self.inspire_record ): LOGGER.debug( "OrcidPusher cache hit", recid=self.recid, orcid=self.orcid ) return putcode LOGGER.debug("OrcidPusher cache miss", recid=self.recid, orcid=self.orcid) # If the record is deleted, then delete it. if self._is_record_deleted: self._delete_work(putcode) return None self.converter = OrcidConverter( record=self.inspire_record, url_pattern=current_app.config["LEGACY_RECORD_URL_PATTERN"], put_code=putcode, ) try: putcode = self._post_or_put_work(putcode) except orcid_client_exceptions.WorkAlreadyExistsException: # We POSTed the record as new work, but it failed because the work # already exists (identified by the external identifiers). # This means we do not have the putcode, thus we cache all # author's putcodes and PUT the work again. try: if self.do_fail_if_duplicated_identifier: raise exceptions.DuplicatedExternalIdentifierPusherException self._push_work_with_clashing_identifier() putcode = self._post_or_put_work(putcode) except orcid_client_exceptions.WorkAlreadyExistsException: putcode = self._cache_all_author_putcodes() if not putcode: msg = ( "No putcode was found in ORCID API for orcid={} and recid={}." " And the POST has previously failed for the same recid because" " the work had already existed".format(self.orcid, self.recid) ) raise exceptions.PutcodeNotFoundInOrcidException(msg) putcode = self._post_or_put_work(putcode) except orcid_client_exceptions.DuplicatedExternalIdentifierException: # We PUT a record changing its identifier, but there is another work # in ORCID with the same identifier. We need to find out the recid # of the clashing work in ORCID and push a fresh version of that # record. # This scenario might be triggered by a merge of 2 records in Inspire. if self.do_fail_if_duplicated_identifier: raise exceptions.DuplicatedExternalIdentifierPusherException self._push_work_with_clashing_identifier() putcode = self._post_or_put_work(putcode) except orcid_client_exceptions.PutcodeNotFoundPutException: self.cache.delete_work_putcode() self.converter = OrcidConverter( record=self.inspire_record, url_pattern=current_app.config["LEGACY_RECORD_URL_PATTERN"], put_code=None, ) putcode = self._post_or_put_work() except ( orcid_client_exceptions.TokenInvalidException, orcid_client_exceptions.TokenMismatchException, orcid_client_exceptions.TokenWithWrongPermissionException, ): LOGGER.info( "Deleting Orcid push access", token=self.oauth_token, orcid=self.orcid ) push_access_tokens.delete_access_token(self.oauth_token, self.orcid) raise exceptions.TokenInvalidDeletedException except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) self.cache.write_work_putcode(putcode, self.inspire_record) return putcode @time_execution def _post_or_put_work(self, putcode=None): # Note: if putcode is None, then it's a POST (it means the work is new). # Otherwise a PUT (it means the work already exists and it has the given # putcode). xml_element = self.converter.get_xml(do_add_bibtex_citation=True) # ORCID API allows 1 non-idempotent call only for the same orcid at # the same time. Using `distributed_lock` to achieve this. with utils.distributed_lock(self.lock_name, blocking=True): if putcode: response = self.client.put_updated_work(xml_element, putcode) else: response = self.client.post_new_work(xml_element) LOGGER.info("POST/PUT ORCID work", response=response, recid=self.recid) response.raise_for_result() return response["putcode"] @time_execution def _cache_all_author_putcodes(self): LOGGER.debug("New OrcidPusher cache all author putcodes", orcid=self.orcid) putcode_getter = OrcidPutcodeGetter(self.orcid, self.oauth_token) putcodes_recids = list( putcode_getter.get_all_inspire_putcodes_and_recids_iter() ) # Can raise exceptions.InputDataInvalidException. putcode = None for fetched_putcode, fetched_recid in putcodes_recids: if fetched_recid == str(self.recid): putcode = int(fetched_putcode) cache = OrcidCache(self.orcid, fetched_recid) cache.write_work_putcode(fetched_putcode) # Ensure the putcode is actually in cache. # Note: this step is not really necessary and it can be skipped, but # at this moment it helps isolate a potential issue. if putcode and not self.cache.read_work_putcode(): raise exceptions.PutcodeNotFoundInCacheAfterCachingAllPutcodes( "No putcode={} found in cache for recid={} after having" " cached all author putcodes for orcid={}".format( self.putcode, self.recid, self.orcid ) ) return putcode @time_execution def _delete_work(self, putcode=None): putcode = putcode or self._cache_all_author_putcodes() if not putcode: # Such recid does not exists (anymore?) in ORCID API. return # ORCID API allows 1 non-idempotent call only for the same orcid at # the same time. Using `distributed_lock` to achieve this. with utils.distributed_lock(self.lock_name, blocking=True): response = self.client.delete_work(putcode) try: response.raise_for_result() except orcid_client_exceptions.PutcodeNotFoundDeleteException: # Such putcode does not exists (anymore?) in orcid. pass except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) self.cache.delete_work_putcode() @time_execution def _push_work_with_clashing_identifier(self): putcode_getter = OrcidPutcodeGetter(self.orcid, self.oauth_token) ids = self.converter.added_external_identifiers for ( putcode, recid, ) in putcode_getter.get_putcodes_and_recids_by_identifiers_iter(ids): if not putcode or not recid: continue if recid == self.recid: continue # Local import to avoid import error. from inspirehep.orcid import tasks max_retries = 3 # Execute the orcid_push Celery task synchronously. backoff = lambda retry_count: [30, 2 * 60, 7 * 60][ # noqa: E731 retry_count % max_retries ] utils.apply_celery_task_with_retry( tasks.orcid_push, kwargs={ "orcid": self.orcid, "rec_id": recid, "oauth_token": self.oauth_token, # Set `do_fail_if_duplicated_identifier` to avoid an # infinite recursive calls chain. "kwargs_to_pusher": dict( do_fail_if_duplicated_identifier=True, record_db_version=self.record_db_version, ), }, max_retries=max_retries, countdown=backoff, time_limit=10 * 60, )
def delete_work(orcid, token, recid): putcode = get_putcode_for_work(orcid, token, recid) if putcode: client = OrcidClient(token, orcid) client.delete_work(putcode)
def setup(self): self.putcodes = [ '43326850', '43255490', '43183518', '43857637', '43257979', '43938460', '43553536', '43846642', '43869107', '43466717', '43880082', '43852910', '44762573', '44762737', '44762744', '44762721', '44762617', '43257122', '43861964', '43938538', '43606530', '43855125', '44762615', '44762741', '43554289', '44762570', '44762735', '44762597', '43859780', '43941962', '43856818', '43938515', '43864453', '43875319', '43935537', '43467792', '44077351', '43554306', '44472652', '43911727', '43922432', '43916436', '43907796', '43924927', '43923874', '43938553', '43938542', '43878004', '43935695', '43881622', '43935569', '44231173', '43880802', '43938523', '43938458', '43935897', '43919253', '43918420', '43938697', '43920855', '43933388', '43942717', '43910178', '44515789', '43882441', '43935355', '43935418', '43935500', '43929711', '43935348', '43938613', '43919864', '43885354', '43935660', '43882622', '43935419', '43935519', '43942195', '43935682', '43949957', '43941870', '43938614', '43938644', '43941852', '43935478', '43937005', '44216033', '43948457', '43942230', '43938670', '43935725', '43942117', '43935577', '44227246', '43942042', '44219584', '43942229', '43942467', '43935574', '43461438', '43939244', '43942225', '43942110', '44218042', '44236863', '43942221', '43935690', '43938687', '43942306', '43326714', '43935600', '43935671', '43935595', '44229237', '43942579', '43935727', '43939389', '43935714', '44232896', '44227649', '43935744', '43938719', '43938710', '43942556', '44237648', '44226428', '43938991', '44236016', '43935746', '44236622', '43938809', '44234262', '43942562', '43939267', '43935804', '43935814', '44235446', '44238589', '43476255', '44238117', '43942245', '43935831', '44255508', '43935773', '43935525', '43349513', '43939364', '43942333', '44259358', '43334280', '43935879', '43474664', '43942483', '43868647', '43942582', '44269186', '43935857', '43939273', '44265932', '43328661', '43939436', '44575020', '44252784', '43473085', '43935955', '43329599', '43474084', '43942511', '43935852', '43325385', '43935788', '43942608', '43935829', '43942738', '43935875', '43939367', '44274797', '43328989', '43474829', '43942339', '43330602', '43939455', '43939372', '43943050', '43351389', '43328159', '43329373', '43935762', '43939467', '43943007', '43476291', '44272682', '43478322', '43343506', '43483181', '43347500', '43333264', '43858017', '43473511', '43332255', '43476010', '43350059', '44251364', '43475852', '43353967', '43849619', '43819343', '43339682', '43348858', '43333748', '44217143', '44232508', '43822751', '43939441', '43339402', '44284285', '43478099', '43356509', '43942969', '43348252', '43483990', '43936102', '43939877', '43935994', '44575015', '43939643', '44285709', '43352429', '43942965', '43364988', '44265579', '43939719', '43940213', '43368521', '43939725', '43361294', '43936167', '43293661', '43362128', '43940188', '43358238', '43936143', '44283137', '44284877', '43356836', '43939941', '44293857', '43363375', '43361159', '43365921', '43939949', '43941280', '43368183', '44291548', '43360300', '43366583', '43936275', '43370435', '43939860', '43361521', '43936314', '43942905', '43942981', '43292406', '43367691', '44317462' ] # noqa: E501 self.orcid = '0000-0002-6665-4934' # ATLAS author. try: # Pick the token from settings_local.py first. self.oauth_token = inspire_service_orcid.conf.settings.OAUTH_TOKENS.get( self.orcid) except AttributeError: self.oauth_token = 'mytoken' self.client = OrcidClient(self.oauth_token, self.orcid)
class OrcidPusher(object): def __init__(self, orcid, recid, oauth_token, do_fail_if_duplicated_identifier=False, record_db_version=None): self.orcid = orcid self.recid = recid self.oauth_token = oauth_token self.do_fail_if_duplicated_identifier = do_fail_if_duplicated_identifier self.record_db_version = record_db_version self.inspire_record = self._get_inspire_record() self.cache = OrcidCache(orcid, recid) self.lock_name = 'orcid:{}'.format(self.orcid) self.client = OrcidClient(self.oauth_token, self.orcid) self.converter = None @time_execution def _get_inspire_record(self): try: inspire_record = get_db_record('lit', self.recid) except RecordGetterError as exc: raise exceptions.RecordNotFoundException( 'recid={} not found for pid_type=lit'.format(self.recid), from_exc=exc) # If the record_db_version was given, then ensure we are about to push # the right record version. # This check is related to the fact the orcid push at this moment is # triggered by the signal after_record_update (which happens after a # InspireRecord.commit()). This is not the actual commit to the db which # might happen at a later stage or not at all. # Note that connecting to the proper SQLAlchemy signal would also # have issues: https://github.com/mitsuhiko/flask-sqlalchemy/issues/645 if self.record_db_version and inspire_record.model.version_id < self.record_db_version: raise exceptions.StaleRecordDBVersionException( 'Requested push for db version={}, but actual record db' ' version={}'.format(self.record_db_version, inspire_record.model.version_id) ) return inspire_record @property def _do_force_cache_miss(self): """ Hook to force a cache miss. This can be leveraged in feature tests. """ for note in self.inspire_record.get('_private_notes', []): if note.get('value') == 'orcid-push-force-cache-miss': logger.info('OrcidPusher force cache miss for recid={} and orcid={}'.format( self.recid, self.orcid)) return True return False @property def _is_record_deleted(self): # Hook to force a delete. This can be leveraged in feature tests. for note in self.inspire_record.get('_private_notes', []): if note.get('value') == 'orcid-push-force-delete': logger.info('OrcidPusher force delete for recid={} and orcid={}'.format( self.recid, self.orcid)) return True return self.inspire_record.get('deleted', False) @time_execution def push(self): putcode = None if not self._do_force_cache_miss: putcode = self.cache.read_work_putcode() if not self._is_record_deleted and \ not self.cache.has_work_content_changed(self.inspire_record): logger.info('OrcidPusher cache hit for recid={} and orcid={}'.format( self.recid, self.orcid)) return putcode logger.info('OrcidPusher cache miss for recid={} and orcid={}'.format( self.recid, self.orcid)) # If the record is deleted, then delete it. if self._is_record_deleted: self._delete_work(putcode) return None self.converter = OrcidConverter( record=self.inspire_record, url_pattern=current_app.config['LEGACY_RECORD_URL_PATTERN'], put_code=putcode, ) try: putcode = self._post_or_put_work(putcode) except orcid_client_exceptions.WorkAlreadyExistsException: # We POSTed the record as new work, but it failed because the work # already exists (identified by the external identifiers). # This means we do not have the putcode, thus we cache all # author's putcodes and PUT the work again. putcode = self._cache_all_author_putcodes() if not putcode: msg = 'No putcode was found in ORCID API for orcid={} and recid={}.'\ ' And the POST has previously failed for the same recid because'\ ' the work had already existed'.format(self.orcid, self.recid) raise exceptions.PutcodeNotFoundInOrcidException(msg) putcode = self._post_or_put_work(putcode) except orcid_client_exceptions.DuplicatedExternalIdentifierException: # We PUT a record changing its identifier, but there is another work # in ORCID with the same identifier. We need to find out the recid # of the clashing work in ORCID and push a fresh version of that # record. # This scenario might be triggered by a merge of 2 records in Inspire. if self.do_fail_if_duplicated_identifier: raise exceptions.DuplicatedExternalIdentifierPusherException self._push_work_with_clashing_identifier() putcode = self._post_or_put_work(putcode) except orcid_client_exceptions.PutcodeNotFoundPutException: self.cache.delete_work_putcode() putcode = self._post_or_put_work() except (orcid_client_exceptions.TokenInvalidException, orcid_client_exceptions.TokenMismatchException, orcid_client_exceptions.TokenWithWrongPermissionException): logger.info('Deleting Orcid push access token={} for orcid={}'.format( self.oauth_token, self.orcid)) push_access_tokens.delete_access_token(self.oauth_token, self.orcid) raise exceptions.TokenInvalidDeletedException except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) self.cache.write_work_putcode(putcode, self.inspire_record) return putcode @time_execution def _post_or_put_work(self, putcode=None): # Note: if putcode is None, then it's a POST (it means the work is new). # Otherwise a PUT (it means the work already exists and it has the given # putcode). xml_element = self.converter.get_xml(do_add_bibtex_citation=True) # ORCID API allows 1 non-idempotent call only for the same orcid at # the same time. Using `distributed_lock` to achieve this. with distributed_lock(self.lock_name, blocking=True): if putcode: response = self.client.put_updated_work(xml_element, putcode) else: response = self.client.post_new_work(xml_element) utils.log_service_response(logger, response, 'in OrcidPusher for recid={}'.format(self.recid)) response.raise_for_result() return response['putcode'] @time_execution def _cache_all_author_putcodes(self): logger.info('New OrcidPusher cache all author putcodes for orcid={}'.format(self.orcid)) putcode_getter = OrcidPutcodeGetter(self.orcid, self.oauth_token) putcodes_recids = list(putcode_getter.get_all_inspire_putcodes_and_recids_iter()) # Can raise exceptions.InputDataInvalidException. putcode = None for fetched_putcode, fetched_recid in putcodes_recids: if fetched_recid == str(self.recid): putcode = int(fetched_putcode) cache = OrcidCache(self.orcid, fetched_recid) cache.write_work_putcode(fetched_putcode) # Ensure the putcode is actually in cache. # Note: this step is not really necessary and it can be skipped, but # at this moment it helps isolate a potential issue. if putcode and not self.cache.read_work_putcode(): raise exceptions.PutcodeNotFoundInCacheAfterCachingAllPutcodes( 'No putcode={} found in cache for recid={} after having' ' cached all author putcodes for orcid={}'.format( self.putcode, self.recid, self.orcid)) return putcode @time_execution def _delete_work(self, putcode=None): putcode = putcode or self._cache_all_author_putcodes() if not putcode: # Such recid does not exists (anymore?) in ORCID API. return # ORCID API allows 1 non-idempotent call only for the same orcid at # the same time. Using `distributed_lock` to achieve this. with distributed_lock(self.lock_name, blocking=True): response = self.client.delete_work(putcode) try: response.raise_for_result() except orcid_client_exceptions.PutcodeNotFoundDeleteException: # Such putcode does not exists (anymore?) in orcid. pass except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) self.cache.delete_work_putcode() @time_execution def _push_work_with_clashing_identifier(self): putcode_getter = OrcidPutcodeGetter(self.orcid, self.oauth_token) ids = self.converter.added_external_identifiers for putcode, recid in putcode_getter.get_putcodes_and_recids_by_identifiers_iter(ids): if not putcode or not recid: continue # Local import to avoid import error. from inspirehep.modules.orcid import tasks max_retries = 3 # Execute the orcid_push Celery task synchronously. backoff = lambda retry_count: [30, 2 * 60, 7 * 60][retry_count % max_retries] # noqa: E731ß utils.apply_celery_task_with_retry( tasks.orcid_push, kwargs={ 'orcid': self.orcid, 'rec_id': recid, 'oauth_token': self.oauth_token, # Set `do_fail_if_duplicated_identifier` to avoid an # infinite recursive calls chain. 'kwargs_to_pusher': dict( do_fail_if_duplicated_identifier=True, record_db_version=self.record_db_version) }, max_retries=max_retries, countdown=backoff, time_limit=10 * 60, )
class OrcidPutcodeGetter(object): def __init__(self, orcid, oauth_token): self.orcid = orcid self.oauth_token = oauth_token self.client = OrcidClient(self.oauth_token, self.orcid) self.source_client_id_path = current_app.config['ORCID_APP_CREDENTIALS'][ 'consumer_key'] def get_all_inspire_putcodes_and_recids_iter(self): """ Query ORCID api and get all the Inspire putcodes for the given ORCID. """ summary_response = self._get_all_works_summary() # `putcodes_recids` is a list like: [('43326850', 20), ('43255490', None)] putcodes_recids = list(summary_response.get_putcodes_and_recids_for_source_iter( self.source_client_id_path)) putcodes_with_recids = [x for x in putcodes_recids if x[1]] putcodes_without_recids = [x[0] for x in putcodes_recids if not x[1]] for putcode, recid in putcodes_with_recids: yield putcode, recid if not putcodes_without_recids: return for putcode, recid in self._get_putcodes_and_recids_iter(putcodes_without_recids): yield putcode, recid def _get_all_works_summary(self): """ Query ORCID api and get all the putcodes with their embedded recids for the given ORCID. An embedded recid is a recid written as external-identifier. """ response = self.client.get_all_works_summary() utils.log_service_response(logger, response, 'in OrcidPutcodeGetter works summary') try: response.raise_for_result() except (orcid_client_exceptions.TokenInvalidException, orcid_client_exceptions.TokenMismatchException, orcid_client_exceptions.TokenWithWrongPermissionException): logger.info('OrcidPutcodeGetter: deleting Orcid push access token={} for orcid={}'.format( self.oauth_token, self.orcid)) push_access_tokens.delete_access_token(self.oauth_token, self.orcid) raise exceptions.TokenInvalidDeletedException except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) return response def _get_putcodes_and_recids_iter(self, putcodes): for putcode, url in self._get_urls_for_putcodes_iter(putcodes): # Filter out putcodes that do not belong to Inspire. if INSPIRE_WORK_URL_REGEX.match(url): recid = get_pid_from_record_uri(url)[1] if not recid: logger.error('OrcidPutcodeGetter: cannot parse recid from url={} for orcid={}'.format( url, self.orcid)) continue yield putcode, recid def _get_urls_for_putcodes_iter(self, putcodes): # The call `get_bulk_works_details_iter()` can be expensive for an # author with many works (if each work also has many *contributors*). # Fi. for an ATLAS author with ~750 works (each of them with many # authors), 8 calls would be performed with a total data transfer > 0.5 Gb. chained = [] for response in self.client.get_bulk_works_details_iter(putcodes): # Note: this log can be large. Consider removing it when this part # is considered mature. utils.log_service_response(logger, response, 'in OrcidPutcodeGetter works details') try: response.raise_for_result() except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) chained = itertools.chain(chained, response.get_putcodes_and_urls_iter()) return chained def get_putcodes_and_recids_by_identifiers_iter(self, identifiers): """ Yield putcode and recid for each work matched by the external identifiers. Note: external identifiers of type 'other-id' are skipped. Args: identifiers (List[inspirehep.modules.orcid.converter.ExternalIdentifier]): list af all external identifiers added after the xml conversion. """ summary_response = self._get_all_works_summary() for putcode, ids in summary_response.get_putcodes_and_external_identifiers_iter(): # ids is a list like: # [ # {'external-id-relationship': 'SELF', # 'external-id-type': 'other-id', # 'external-id-url': {'value': 'http://inspireheptest.cern.ch/record/20'}, # 'external-id-value': '20' # },... # ] # Get the recid. recid = self._get_recid_for_work(ids, str(putcode)) for identifier in ids: id_type = identifier.get('external-id-type') # We are interested only in doi, arxiv, isbns. if not id_type or id_type.lower() == 'other-id': continue id_value = identifier.get('external-id-value') if not id_value: continue if ExternalIdentifier(id_type, id_value) in identifiers: yield putcode, recid def _get_recid_for_work(self, external_identifiers, putcode): """ Get the recid for a work given its external identifiers and putcode. The recid might be in the external identifiers or a get_work_details() might be called to find it. Args: external_identifier (List[Dict]): a list like: [ {'external-id-relationship': 'SELF', 'external-id-type': 'other-id', 'external-id-url': {'value': 'http://inspireheptest.cern.ch/record/20'}, 'external-id-value': '20' },... ] putcode: putcode of the given work. Returns: the Inspire recid mathcing the work. """ for identifier in external_identifiers: id_type = identifier.get('external-id-type') if not id_type or id_type.lower() != 'other-id': continue id_url = inspire_service_orcid_utils.smartget(identifier, 'external-id-url.value', '') if not re.match(r'.*inspire.*', id_url, re.I): continue id_value = identifier.get('external-id-value') if not id_value: continue # recid found. return id_value # The recid was not found in the external_identifiers. # Thus we call get_bulk_works_details_iter(). putcodes_recid = list(self._get_putcodes_and_recids_iter([putcode])) if putcodes_recid: return putcodes_recid[0][1]
def client(self): # Pick the token from settings_local.py first. self.oauth_token = getattr(inspire_service_orcid.conf.settings, 'OAUTH_TOKENS', {}).get(self.orcid, 'mytoken') return OrcidClient(self.oauth_token, self.orcid)
def test_invalid_token(self): client = OrcidClient('invalidtoken', self.orcid) response = client.delete_work(self.putcode) with pytest.raises(exceptions.TokenInvalidException): response.raise_for_result() assert not response.ok
def test_invalid_token(self): client = OrcidClient('invalidtoken', self.orcid) response = client.post_new_work(self.xml_element) with pytest.raises(exceptions.TokenInvalidException): response.raise_for_result() assert not response.ok
def test_invalid_token(self): client = OrcidClient('invalidtoken', self.orcid) response = client.get_all_works_summary() with pytest.raises(exceptions.TokenInvalidException): response.raise_for_result() assert not response.ok
def orcid_client(self): return OrcidClient(self.oauth_token, self.orcid)
def __init__(self, orcid, oauth_token): self.orcid = orcid self.oauth_token = oauth_token self.client = OrcidClient(self.oauth_token, self.orcid) self.source_client_id_path = current_app.config['ORCID_APP_CREDENTIALS'][ 'consumer_key']
class TestGenerateGetBulkWorksDetails(object): def setup(self): self.putcodes = [ '43326850', '43255490', '43183518', '43857637', '43257979', '43938460', '43553536', '43846642', '43869107', '43466717', '43880082', '43852910', '44762573', '44762737', '44762744', '44762721', '44762617', '43257122', '43861964', '43938538', '43606530', '43855125', '44762615', '44762741', '43554289', '44762570', '44762735', '44762597', '43859780', '43941962', '43856818', '43938515', '43864453', '43875319', '43935537', '43467792', '44077351', '43554306', '44472652', '43911727', '43922432', '43916436', '43907796', '43924927', '43923874', '43938553', '43938542', '43878004', '43935695', '43881622', '43935569', '44231173', '43880802', '43938523', '43938458', '43935897', '43919253', '43918420', '43938697', '43920855', '43933388', '43942717', '43910178', '44515789', '43882441', '43935355', '43935418', '43935500', '43929711', '43935348', '43938613', '43919864', '43885354', '43935660', '43882622', '43935419', '43935519', '43942195', '43935682', '43949957', '43941870', '43938614', '43938644', '43941852', '43935478', '43937005', '44216033', '43948457', '43942230', '43938670', '43935725', '43942117', '43935577', '44227246', '43942042', '44219584', '43942229', '43942467', '43935574', '43461438', '43939244', '43942225', '43942110', '44218042', '44236863', '43942221', '43935690', '43938687', '43942306', '43326714', '43935600', '43935671', '43935595', '44229237', '43942579', '43935727', '43939389', '43935714', '44232896', '44227649', '43935744', '43938719', '43938710', '43942556', '44237648', '44226428', '43938991', '44236016', '43935746', '44236622', '43938809', '44234262', '43942562', '43939267', '43935804', '43935814', '44235446', '44238589', '43476255', '44238117', '43942245', '43935831', '44255508', '43935773', '43935525', '43349513', '43939364', '43942333', '44259358', '43334280', '43935879', '43474664', '43942483', '43868647', '43942582', '44269186', '43935857', '43939273', '44265932', '43328661', '43939436', '44575020', '44252784', '43473085', '43935955', '43329599', '43474084', '43942511', '43935852', '43325385', '43935788', '43942608', '43935829', '43942738', '43935875', '43939367', '44274797', '43328989', '43474829', '43942339', '43330602', '43939455', '43939372', '43943050', '43351389', '43328159', '43329373', '43935762', '43939467', '43943007', '43476291', '44272682', '43478322', '43343506', '43483181', '43347500', '43333264', '43858017', '43473511', '43332255', '43476010', '43350059', '44251364', '43475852', '43353967', '43849619', '43819343', '43339682', '43348858', '43333748', '44217143', '44232508', '43822751', '43939441', '43339402', '44284285', '43478099', '43356509', '43942969', '43348252', '43483990', '43936102', '43939877', '43935994', '44575015', '43939643', '44285709', '43352429', '43942965', '43364988', '44265579', '43939719', '43940213', '43368521', '43939725', '43361294', '43936167', '43293661', '43362128', '43940188', '43358238', '43936143', '44283137', '44284877', '43356836', '43939941', '44293857', '43363375', '43361159', '43365921', '43939949', '43941280', '43368183', '44291548', '43360300', '43366583', '43936275', '43370435', '43939860', '43361521', '43936314', '43942905', '43942981', '43292406', '43367691', '44317462' ] # noqa: E501 self.orcid = '0000-0002-6665-4934' # ATLAS author. try: # Pick the token from settings_local.py first. self.oauth_token = inspire_service_orcid.conf.settings.OAUTH_TOKENS.get( self.orcid) except AttributeError: self.oauth_token = 'mytoken' self.client = OrcidClient(self.oauth_token, self.orcid) def test_happy_flow(self): for response in self.client.get_bulk_works_details_iter(self.putcodes): response.raise_for_result() assert response.ok assert str( response['bulk'][0]['work']['put-code']) in self.putcodes assert str( response['bulk'][-1]['work']['put-code']) in self.putcodes def test_too_many_putcodes(self): from inspire_service_orcid import client with mock.patch.object(client, 'MAX_PUTCODES_PER_WORKS_DETAILS_REQUEST', 101): for response in self.client.get_bulk_works_details_iter( [str(x) for x in range(101)]): with pytest.raises( exceptions.ExceedMaxNumberOfPutCodesException): response.raise_for_result() def test_get_putcodes_and_urls(self): for response in self.client.get_bulk_works_details_iter(self.putcodes): response.raise_for_result() assert response.ok putcodes_and_urls = list(response.get_putcodes_and_urls_iter()) # Note: the recorded cassette returns the same result for each for loop. assert putcodes_and_urls[0] == ( '43183518', 'http://inspirehep.net/record/1665234') assert putcodes_and_urls[-1] == ( '44227246', 'http://inspirehep.net/record/1515025') def test_single_work_error(self): self.putcodes = ['51540408', '51496313'] result = [] for response in self.client.get_bulk_works_details_iter(self.putcodes): response.raise_for_result() assert response.ok result += (list(response.get_putcodes_and_urls_iter())) assert result == [('51496313', 'http://inspireheptest.cern.ch/record/20')]
def __init__(self, orcid, oauth_token): self.orcid = orcid self.oauth_token = oauth_token self.client = OrcidClient(self.oauth_token, self.orcid) self.source_client_id_path = current_app.config[ 'ORCID_APP_CREDENTIALS']['consumer_key']
class OrcidPusher(object): def __init__( self, orcid, recid, oauth_token, pushing_duplicated_identifier=False, record_db_version=None, ): self.orcid = orcid self.recid = str(recid) self.oauth_token = oauth_token self.pushing_duplicated_identifier = pushing_duplicated_identifier self.record_db_version = record_db_version self.inspire_record = self._get_inspire_record() self.cache = OrcidCache(orcid, recid) self.lock_name = "orcid:{}".format(self.orcid) self.client = OrcidClient(self.oauth_token, self.orcid) self.converter = None self.cached_author_putcodes = {} @time_execution def _get_inspire_record(self): try: inspire_record = LiteratureRecord.get_record_by_pid_value( self.recid) except PIDDoesNotExistError as exc: raise exceptions.RecordNotFoundException( "recid={} not found for pid_type=lit".format(self.recid), from_exc=exc) # If the record_db_version was given, then ensure we are about to push # the right record version. # This check is related to the fact the orcid push at this moment is # triggered by the signal after_record_update (which happens after a # InspireRecord.commit()). This is not the actual commit to the db which # might happen at a later stage or not at all. # Note that connecting to the proper SQLAlchemy signal would also # have issues: https://github.com/mitsuhiko/flask-sqlalchemy/issues/645 if (self.record_db_version and inspire_record.model.version_id < self.record_db_version): raise exceptions.StaleRecordDBVersionException( "Requested push for db version={}, but actual record db" " version={}".format(self.record_db_version, inspire_record.model.version_id)) return inspire_record @property def _do_force_cache_miss(self): """ Hook to force a cache miss. This can be leveraged in feature tests. """ for note in self.inspire_record.get("_private_notes", []): if note.get("value") == "orcid-push-force-cache-miss": LOGGER.debug("OrcidPusher force cache miss", recid=self.recid, orcid=self.orcid) return True return False @property def _is_record_deleted(self): # Hook to force a delete. This can be leveraged in feature tests. for note in self.inspire_record.get("_private_notes", []): if note.get("value") == "orcid-push-force-delete": LOGGER.debug("OrcidPusher force delete", recid=self.recid, orcid=self.orcid) return True return self.inspire_record.get("deleted", False) @time_execution # noqa: C901 def push(self): putcode = None if not self._do_force_cache_miss: putcode = self.cache.read_work_putcode() if not self._is_record_deleted and not self.cache.has_work_content_changed( self.inspire_record): LOGGER.debug("OrcidPusher cache hit", recid=self.recid, orcid=self.orcid) return putcode LOGGER.debug("OrcidPusher cache miss", recid=self.recid, orcid=self.orcid) # If the record is deleted, then delete it. if self._is_record_deleted: self._delete_work(putcode) return None self.converter = OrcidConverter( record=self.inspire_record, url_pattern=current_app.config["LEGACY_RECORD_URL_PATTERN"], put_code=putcode, ) try: putcode = self._post_or_put_work(putcode) except orcid_client_exceptions.WorkAlreadyExistsException: # We POSTed the record as new work, but it failed because # a work with the same identifier is already in ORCID. # This can mean two things: # 1. the record itself is already in ORCID, but we don't have the putcode; # 2. a different record with the same external identifier is already in ORCID. # We first try to fix 1. by caching all author's putcodes and PUT the work again. # If the putcode wasn't found we are probably facing case 2. # so we try to push once again works with clashing identifiers # to update them and resolve the potential conflict. if self.pushing_duplicated_identifier: raise exceptions.DuplicatedExternalIdentifierPusherException putcode = self._cache_all_author_putcodes() if not putcode: try: self._push_work_with_clashing_identifier() putcode = self._post_or_put_work(putcode) except orcid_client_exceptions.WorkAlreadyExistsException: # The PUT/POST failed despite pushing works with clashing identifiers # and we can't do anything about this. raise exceptions.DuplicatedExternalIdentifierPusherException else: self._post_or_put_work(putcode) except orcid_client_exceptions.DuplicatedExternalIdentifierException: # We PUT a record changing its identifier, but there is another work # in ORCID with the same identifier. We need to find out the recid # of the clashing work in ORCID and push a fresh version of that # record. # This scenario might be triggered by a merge of 2 records in Inspire. if not self.pushing_duplicated_identifier: self._push_work_with_clashing_identifier() # Raised exception will cause retry of celery task raise exceptions.DuplicatedExternalIdentifierPusherException except orcid_client_exceptions.PutcodeNotFoundPutException: # We try to push the work with invalid putcode, so we delete # its putcode and push it without any putcode. # If it turns out that the record already exists # in ORCID we search for the putcode by caching # all author's putcodes and PUT the work again. self.cache.delete_work_putcode() self.converter = OrcidConverter( record=self.inspire_record, url_pattern=current_app.config["LEGACY_RECORD_URL_PATTERN"], put_code=None, ) putcode = self._cache_all_author_putcodes() self._post_or_put_work(putcode) except ( orcid_client_exceptions.TokenInvalidException, orcid_client_exceptions.TokenMismatchException, orcid_client_exceptions.TokenWithWrongPermissionException, ): LOGGER.info("Deleting Orcid push access", token=self.oauth_token, orcid=self.orcid) push_access_tokens.delete_access_token(self.oauth_token, self.orcid) raise exceptions.TokenInvalidDeletedException except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) self.cache.write_work_putcode(putcode, self.inspire_record) return putcode @time_execution def _post_or_put_work(self, putcode=None): # Note: if putcode is None, then it's a POST (it means the work is new). # Otherwise a PUT (it means the work already exists and it has the given # putcode). xml_element = self.converter.get_xml(do_add_bibtex_citation=True) # ORCID API allows 1 non-idempotent call only for the same orcid at # the same time. Using `distributed_lock` to achieve this. with utils.distributed_lock(self.lock_name, blocking=True): if putcode: response = self.client.put_updated_work(xml_element, putcode) else: response = self.client.post_new_work(xml_element) LOGGER.info("POST/PUT ORCID work", recid=self.recid) response.raise_for_result() return response["putcode"] def _delete_works_with_duplicated_putcodes(self, cached_putcodes_recids): unique_recids_putcodes = {} for fetched_putcode, fetched_recid in cached_putcodes_recids: if fetched_recid in unique_recids_putcodes: self._delete_work(fetched_putcode) else: unique_recids_putcodes[fetched_recid] = fetched_putcode return unique_recids_putcodes @time_execution def _cache_all_author_putcodes(self): LOGGER.debug("New OrcidPusher cache all author putcodes", orcid=self.orcid) if not self.cached_author_putcodes: putcode_getter = OrcidPutcodeGetter(self.orcid, self.oauth_token) putcodes_recids = list( putcode_getter.get_all_inspire_putcodes_and_recids_iter()) self.cached_author_putcodes = self._delete_works_with_duplicated_putcodes( putcodes_recids) putcode = None for fetched_recid, fetched_putcode in self.cached_author_putcodes.items( ): if fetched_recid == self.recid: putcode = int(fetched_putcode) cache = OrcidCache(self.orcid, fetched_recid) cache.write_work_putcode(fetched_putcode) # Ensure the putcode is actually in cache. # Note: this step is not really necessary and it can be skipped, but # at this moment it helps isolate a potential issue. if putcode and not self.cache.read_work_putcode(): raise exceptions.PutcodeNotFoundInCacheAfterCachingAllPutcodes( "No putcode={} found in cache for recid={} after having" " cached all author putcodes for orcid={}".format( self.putcode, self.recid, self.orcid)) return putcode @time_execution def _delete_work(self, putcode=None): putcode = putcode or self._cache_all_author_putcodes() if not putcode: # Such recid does not exists (anymore?) in ORCID API. return # ORCID API allows 1 non-idempotent call only for the same orcid at # the same time. Using `distributed_lock` to achieve this. with utils.distributed_lock(self.lock_name, blocking=True): response = self.client.delete_work(putcode) try: response.raise_for_result() except orcid_client_exceptions.PutcodeNotFoundDeleteException: # Such putcode does not exists (anymore?) in orcid. pass except orcid_client_exceptions.BaseOrcidClientJsonException as exc: raise exceptions.InputDataInvalidException(from_exc=exc) self.cache.delete_work_putcode() @time_execution def _push_work_with_clashing_identifier(self): putcode_getter = OrcidPutcodeGetter(self.orcid, self.oauth_token) ids = self.converter.added_external_identifiers putcodes_recids = putcode_getter.get_putcodes_and_recids_by_identifiers_iter( ids) updated_putcodes_recid = self._delete_works_with_duplicated_putcodes( putcodes_recids) for (recid, putcode) in updated_putcodes_recid.items(): if not putcode or not recid: continue if recid == self.recid: continue # Local import to avoid import error. from inspirehep.orcid import tasks tasks.orcid_push( self.orcid, recid, self.oauth_token, dict( pushing_duplicated_identifier=True, record_db_version=self.record_db_version, ), )