def delete_work_for_orcid(orcid):
     oauth_token = TestOrcidPusherBase._oauth_token(orcid)
     client = OrcidClient(oauth_token, orcid)
     all_work = client.get_all_works_summary()
     for work in all_work.get("group", []):
         putcode = work["work-summary"][0]["put-code"]
         client.delete_work(putcode)
Пример #2
0
 def test_get_putcodes_for_source_source_client_id_none(self):
     orcid = '0000-0002-4490-1930'
     client = OrcidClient('mytoken', orcid)
     response = client.get_all_works_summary()
     response.raise_for_result()
     putcodes = list(
         response.get_putcodes_for_source_iter('0000-0001-8607-8906'))
     assert len(putcodes) == 90
Пример #3
0
class OrcidPutcodeGetter(object):
    def __init__(self, orcid, oauth_token):
        self.orcid = orcid
        self.oauth_token = oauth_token
        self.client = OrcidClient(self.oauth_token, self.orcid)
        self.source_client_id_path = current_app.config[
            'ORCID_APP_CREDENTIALS']['consumer_key']

    def get_all_inspire_putcodes(self):
        """
        Get all the Inspire putcodes for the given ORCID.
        """
        putcodes = self._get_all_putcodes()
        if not putcodes:
            return
        # Filter out putcodes that do not belong to Inspire.
        for putcode, url in self._get_urls_for_putcodes(putcodes):
            if INSPIRE_WORK_URL_REGEX.match(url):
                yield putcode, url

    def _get_all_putcodes(self):
        response = self.client.get_all_works_summary()
        utils.log_service_response(logger, response,
                                   'in OrcidPutcodeGetter works summary')
        try:
            response.raise_for_result()
        except orcid_client_exceptions.BaseOrcidClientJsonException as exc:
            raise exceptions.InputDataInvalidException(from_exc=exc)
        return list(
            response.get_putcodes_for_source(self.source_client_id_path))

    def _get_urls_for_putcodes(self, putcodes):
        # The call get_bulk_works_details_iter() can be very expensive for an
        # author with many works (if each work also has many *contributors*).
        # Fi. for an ATLAS author with ~750 works, 8 calls would be performed
        # with a total data transfer > 0.5 Gb.
        chained = []
        for response in self.client.get_bulk_works_details_iter(putcodes):
            utils.log_service_response(logger, response,
                                       'in OrcidPutcodeGetter works details')
            try:
                response.raise_for_result()
            except orcid_client_exceptions.BaseOrcidClientJsonException as exc:
                raise exceptions.InputDataInvalidException(from_exc=exc)

            chained = itertools.chain(chained,
                                      response.get_putcodes_and_urls())
        return chained
Пример #4
0
def get_putcode_for_work(orcid, token, recid):
    client = OrcidClient(token, orcid)
    response = client.get_all_works_summary()
    response.raise_for_result()
    source_client_id_path = config.get('orcid-api', 'consumer_key')
    putcodes = list(
        response.get_putcodes_for_source_iter(source_client_id_path))

    if not putcodes:
        return None

    # TODO: this has to be simplified when we push recids as external
    # identifier (thus just the get_all_works_summary() call is required to
    # match recids with putcodes).
    for response in client.get_bulk_works_details_iter(putcodes):
        response.raise_for_result()
        for putcode, url in response.get_putcodes_and_urls_iter():
            if url.endswith('/{}'.format(recid)):
                return putcode
Пример #5
0
class OrcidPutcodeGetter(object):
    def __init__(self, orcid, oauth_token):
        self.orcid = orcid
        self.oauth_token = oauth_token
        self.client = OrcidClient(self.oauth_token, self.orcid)
        self.source_client_id_path = current_app.config[
            "ORCID_APP_CREDENTIALS"]["consumer_key"]

    def get_all_inspire_putcodes_and_recids_iter(self):
        """
        Query ORCID api and get all the Inspire putcodes for the given ORCID.
        """
        summary_response = self._get_all_works_summary()
        # `putcodes_recids` is a list like: [('43326850', 20), ('43255490', None)]
        putcodes_recids = list(
            summary_response.get_putcodes_and_recids_for_source_iter(
                self.source_client_id_path))
        putcodes_with_recids = [x for x in putcodes_recids if x[1]]
        putcodes_without_recids = [x[0] for x in putcodes_recids if not x[1]]

        for putcode, recid in putcodes_with_recids:
            yield putcode, recid

        if not putcodes_without_recids:
            return

        for putcode, recid in self._get_putcodes_and_recids_iter(
                putcodes_without_recids):
            yield putcode, recid

    def _get_all_works_summary(self):
        """
        Query ORCID api and get all the putcodes with their embedded recids
        for the given ORCID.
        An embedded recid is a recid written as external-identifier.
        """
        response = self.client.get_all_works_summary()
        LOGGER.info("Get ORCID work summary",
                    response=response,
                    orcid=self.orcid)
        try:
            response.raise_for_result()
        except (
                orcid_client_exceptions.TokenInvalidException,
                orcid_client_exceptions.TokenMismatchException,
                orcid_client_exceptions.TokenWithWrongPermissionException,
        ):
            LOGGER.info(
                "OrcidPutcodeGetter: deleting Orcid push access",
                token=self.oauth_token,
                orcid=self.orcid,
            )
            push_access_tokens.delete_access_token(self.oauth_token,
                                                   self.orcid)
            raise exceptions.TokenInvalidDeletedException
        except orcid_client_exceptions.BaseOrcidClientJsonException as exc:
            raise exceptions.InputDataInvalidException(from_exc=exc)
        return response

    def _get_putcodes_and_recids_iter(self, putcodes):
        for putcode, url in self._get_urls_for_putcodes_iter(putcodes):
            # Filter out putcodes that do not belong to Inspire.
            if INSPIRE_WORK_URL_REGEX.match(url):
                recid = PidStoreBase.get_pid_from_record_uri(url)[1]
                if not recid:
                    LOGGER.error(
                        "OrcidPutcodeGetter: cannot parse recid from url",
                        url=url,
                        orcid=self.orcid,
                    )
                    continue
                yield putcode, recid

    def _get_urls_for_putcodes_iter(self, putcodes):
        # The call `get_bulk_works_details_iter()` can be expensive for an
        # author with many works (if each work also has many *contributors*).
        # Fi. for an ATLAS author with ~750 works (each of them with many
        # authors), 8 calls would be performed with a total data transfer > 0.5 Gb.
        chained = []
        for response in self.client.get_bulk_works_details_iter(putcodes):
            # Note: this log can be large. Consider removing it when this part
            # is considered mature.
            LOGGER.info("ORCID work details",
                        response=response,
                        orcid=self.orcid)
            try:
                response.raise_for_result()
            except orcid_client_exceptions.BaseOrcidClientJsonException as exc:
                raise exceptions.InputDataInvalidException(from_exc=exc)

            chained = itertools.chain(chained,
                                      response.get_putcodes_and_urls_iter())
        return chained

    def get_putcodes_and_recids_by_identifiers_iter(self, identifiers):
        """
        Yield putcode and recid for each work matched by the external
        identifiers.
        Note: external identifiers of type 'other-id' are skipped.

        Args:
            identifiers (List[inspirehep.orcid.converter.ExternalIdentifier]):
                list af all external identifiers added after the xml conversion.
        """
        summary_response = self._get_all_works_summary()
        for (
                putcode,
                ids,
        ) in summary_response.get_putcodes_and_external_identifiers_iter():
            # ids is a list like:
            #   [
            #       {'external-id-relationship': 'SELF',
            #        'external-id-type': 'other-id',
            #        'external-id-url': {'value': 'http://inspireheptest.cern.ch/record/20'},
            #        'external-id-value': '20'
            #       },...
            #   ]

            # Get the recid.
            recid = self._get_recid_for_work(ids, str(putcode))

            for identifier in ids:
                id_type = identifier.get("external-id-type")
                # We are interested only in doi, arxiv, isbns.
                if not id_type or id_type.lower() == "other-id":
                    continue
                id_value = identifier.get("external-id-value")
                if not id_value:
                    continue

                if ExternalIdentifier(id_type, id_value) in identifiers:
                    yield putcode, recid

    def _get_recid_for_work(self, external_identifiers, putcode):
        """
        Get the recid for a work given its external identifiers and putcode.
        The recid might be in the external identifiers or a get_work_details()
        might be called to find it.

        Args:
            external_identifier (List[Dict]): a list like:
               [
                   {'external-id-relationship': 'SELF',
                    'external-id-type': 'other-id',
                    'external-id-url': {'value': 'http://inspireheptest.cern.ch/record/20'},
                    'external-id-value': '20'
                   },...
               ]
            putcode: putcode of the given work.

        Returns: the Inspire recid mathcing the work.
        """
        for identifier in external_identifiers:
            id_type = identifier.get("external-id-type")
            if not id_type or id_type.lower() != "other-id":
                continue

            id_url = inspire_service_orcid_utils.smartget(
                identifier, "external-id-url.value", "")
            if not re.match(r".*inspire.*", id_url, re.I):
                continue

            id_value = identifier.get("external-id-value")
            if not id_value:
                continue

            # recid found.
            return id_value

        # The recid was not found in the external_identifiers.
        # Thus we call get_bulk_works_details_iter().
        putcodes_recid = list(self._get_putcodes_and_recids_iter([putcode]))

        if putcodes_recid:
            return putcodes_recid[0][1]
Пример #6
0
class OrcidPutcodeGetter(object):
    def __init__(self, orcid, oauth_token):
        self.orcid = orcid
        self.oauth_token = oauth_token
        self.client = OrcidClient(self.oauth_token, self.orcid)
        self.source_client_id_path = current_app.config['ORCID_APP_CREDENTIALS'][
            'consumer_key']

    def get_all_inspire_putcodes_and_recids_iter(self):
        """
        Query ORCID api and get all the Inspire putcodes for the given ORCID.
        """
        summary_response = self._get_all_works_summary()
        # `putcodes_recids` is a list like: [('43326850', 20), ('43255490', None)]
        putcodes_recids = list(summary_response.get_putcodes_and_recids_for_source_iter(
            self.source_client_id_path))
        putcodes_with_recids = [x for x in putcodes_recids if x[1]]
        putcodes_without_recids = [x[0] for x in putcodes_recids if not x[1]]

        for putcode, recid in putcodes_with_recids:
            yield putcode, recid

        if not putcodes_without_recids:
            return

        for putcode, recid in self._get_putcodes_and_recids_iter(putcodes_without_recids):
            yield putcode, recid

    def _get_all_works_summary(self):
        """
        Query ORCID api and get all the putcodes with their embedded recids
        for the given ORCID.
        An embedded recid is a recid written as external-identifier.
        """
        response = self.client.get_all_works_summary()
        utils.log_service_response(logger, response, 'in OrcidPutcodeGetter works summary')
        try:
            response.raise_for_result()
        except (orcid_client_exceptions.TokenInvalidException,
                orcid_client_exceptions.TokenMismatchException,
                orcid_client_exceptions.TokenWithWrongPermissionException):
            logger.info('OrcidPutcodeGetter: deleting Orcid push access token={} for orcid={}'.format(
                self.oauth_token, self.orcid))
            push_access_tokens.delete_access_token(self.oauth_token, self.orcid)
            raise exceptions.TokenInvalidDeletedException
        except orcid_client_exceptions.BaseOrcidClientJsonException as exc:
            raise exceptions.InputDataInvalidException(from_exc=exc)
        return response

    def _get_putcodes_and_recids_iter(self, putcodes):
        for putcode, url in self._get_urls_for_putcodes_iter(putcodes):
            # Filter out putcodes that do not belong to Inspire.
            if INSPIRE_WORK_URL_REGEX.match(url):
                recid = get_pid_from_record_uri(url)[1]
                if not recid:
                    logger.error('OrcidPutcodeGetter: cannot parse recid from url={} for orcid={}'.format(
                        url, self.orcid))
                    continue
                yield putcode, recid

    def _get_urls_for_putcodes_iter(self, putcodes):
        # The call `get_bulk_works_details_iter()` can be expensive for an
        # author with many works (if each work also has many *contributors*).
        # Fi. for an ATLAS author with ~750 works (each of them with many
        # authors), 8 calls would be performed with a total data transfer > 0.5 Gb.
        chained = []
        for response in self.client.get_bulk_works_details_iter(putcodes):
            # Note: this log can be large. Consider removing it when this part
            # is considered mature.
            utils.log_service_response(logger, response, 'in OrcidPutcodeGetter works details')
            try:
                response.raise_for_result()
            except orcid_client_exceptions.BaseOrcidClientJsonException as exc:
                raise exceptions.InputDataInvalidException(from_exc=exc)

            chained = itertools.chain(chained, response.get_putcodes_and_urls_iter())
        return chained

    def get_putcodes_and_recids_by_identifiers_iter(self, identifiers):
        """
        Yield putcode and recid for each work matched by the external
        identifiers.
        Note: external identifiers of type 'other-id' are skipped.

        Args:
            identifiers (List[inspirehep.modules.orcid.converter.ExternalIdentifier]):
                list af all external identifiers added after the xml conversion.
        """
        summary_response = self._get_all_works_summary()
        for putcode, ids in summary_response.get_putcodes_and_external_identifiers_iter():
            # ids is a list like:
            #   [
            #       {'external-id-relationship': 'SELF',
            #        'external-id-type': 'other-id',
            #        'external-id-url': {'value': 'http://inspireheptest.cern.ch/record/20'},
            #        'external-id-value': '20'
            #       },...
            #   ]

            # Get the recid.
            recid = self._get_recid_for_work(ids, str(putcode))

            for identifier in ids:
                id_type = identifier.get('external-id-type')
                # We are interested only in doi, arxiv, isbns.
                if not id_type or id_type.lower() == 'other-id':
                    continue
                id_value = identifier.get('external-id-value')
                if not id_value:
                    continue

                if ExternalIdentifier(id_type, id_value) in identifiers:
                    yield putcode, recid

    def _get_recid_for_work(self, external_identifiers, putcode):
        """
        Get the recid for a work given its external identifiers and putcode.
        The recid might be in the external identifiers or a get_work_details()
        might be called to find it.

        Args:
            external_identifier (List[Dict]): a list like:
               [
                   {'external-id-relationship': 'SELF',
                    'external-id-type': 'other-id',
                    'external-id-url': {'value': 'http://inspireheptest.cern.ch/record/20'},
                    'external-id-value': '20'
                   },...
               ]
            putcode: putcode of the given work.

        Returns: the Inspire recid mathcing the work.
        """
        for identifier in external_identifiers:
            id_type = identifier.get('external-id-type')
            if not id_type or id_type.lower() != 'other-id':
                continue

            id_url = inspire_service_orcid_utils.smartget(identifier, 'external-id-url.value', '')
            if not re.match(r'.*inspire.*', id_url, re.I):
                continue

            id_value = identifier.get('external-id-value')
            if not id_value:
                continue

            # recid found.
            return id_value

        # The recid was not found in the external_identifiers.
        # Thus we call get_bulk_works_details_iter().
        putcodes_recid = list(self._get_putcodes_and_recids_iter([putcode]))

        if putcodes_recid:
            return putcodes_recid[0][1]
Пример #7
0
 def test_invalid_token(self):
     client = OrcidClient('invalidtoken', self.orcid)
     response = client.get_all_works_summary()
     with pytest.raises(exceptions.TokenInvalidException):
         response.raise_for_result()
     assert not response.ok