Exemplos de Search em Python, exemplos de pybliometrics.scopus.superclasses.Search em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: serial_search.py Projeto: milensys/pybliometrics

    def __init__(self, query, refresh=False, view='ENHANCED'):
        """Interaction with the Serial Title API.

        Parameters
        ----------
        query: dict
            Query parameters and corresponding fields. Allowed keys 'title',
            'issn', 'pub', 'subj', 'subjCode', 'content', 'oa'.  For
            examples on possible values, please refer to
            https://dev.elsevier.com/documentation/SerialTitleAPI.wadl#d1e22.
        
        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        view : str (optional, default="ENHANCED")
            The view of the file that should be downloaded.  Allowed values:
            STANDARD, ENHANCED, CITESCORE.  For details see
            https://dev.elsevier.com/guides/SerialTitleViews.htm.

        Raises
        ------
        Scopus400Error
            If provided value for a query key is invalid or if for
            non-subscribers the number of search results exceeds 5000.

        ValueError
            If view parameter is not one of allowed ones or if query contains
            invalid fields.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/SerialSearch.html.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{fname}`,
        where `path` is specified in `~/.scopus/config.ini` and fname is
        the md5-hashed version of `query` dict turned into string in format
        of 'key=value' delimited by '&'.
        """
        # Checks
        allowed_query_keys = ('title', 'issn', 'date', 'pub', 'subj',
                              'subjCode', 'content', 'oa')
        invalid = [k for k in query.keys() if k not in allowed_query_keys]
        if invalid:
            raise ValueError(f'Query key(s) "{", ".join(invalid)}" invalid')
        allowed_views = ('STANDARD', 'ENHANCED', 'CITESCORE')
        if view not in allowed_views:
            raise ValueError('view parameter must be one of ' +
                             ', '.join(allowed_views))

        # Query
        self.query = str(query)
        Search.__init__(self, query=query, api='SerialSearch',
                        refresh=refresh, view=view)
        self._n = len(self._json['serial-metadata-response'].get('entry', []))

Exemplo n.º 2

0

Exibir arquivo

    def __init__(self,
                 query: Dict,
                 refresh: Union[bool, int] = False,
                 view: str = 'ENHANCED',
                 **kwds: str) -> None:
        """Interaction with the Serial Title API.

        :param query:  Query parameters and corresponding fields. Allowed keys
                      'title', 'issn', 'pub', 'subj', 'subjCode', 'content',
                      'oa'.  For examples on possible values, please refer to
                      https://dev.elsevier.com/documentation/SerialTitleAPI.wadl#d1e22.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param view: The view of the file that should be downloaded.  Allowed
                     values: STANDARD, ENHANCED, CITESCORE.  For details see
                     https://dev.elsevier.com/sc_serial_title_views.html.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values listed in the API specification at
                     https://dev.elsevier.com/documentation/SerialTitleAPI.wadl.

        Raises
        ------
        Scopus400Error
            If provided value for a query key is invalid or if for
            non-subscribers the number of search results exceeds 5000.

        ValueError
            If any of the parameters `refresh` or `view` is not
            one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{fname}`,
        where `path` is specified in your configuration file, and `fname` is
        the md5-hashed version of `query` dict turned into string in format
        of 'key=value' delimited by '&'.
        """
        # Checks
        allowed_query_keys = ('title', 'issn', 'date', 'pub', 'subj',
                              'subjCode', 'content', 'oa')
        invalid = [k for k in query.keys() if k not in allowed_query_keys]
        if invalid:
            raise ValueError(f'Query key(s) "{", ".join(invalid)}" invalid')
        check_parameter_value(view, ('STANDARD', 'ENHANCED', 'CITESCORE'),
                              "view")

        # Query
        self._query = str(query)
        self._refresh = refresh
        self._view = view
        Search.__init__(self, query=query, api='SerialSearch', **kwds)
        self._n = len(self._json['serial-metadata-response'].get('entry', []))

Exemplo n.º 3

0

Exibir arquivo

    def __init__(self, query, fields=None, refresh=False):
        """Interaction with the Subject Classifications Scopus API.

        Parameters
        ----------
        query: dict
            Query parameters and corresponding fields. Allowed keys 'code',
            'abbrev', 'description', 'detail'. For more details on search fields
            please refer to
            https://dev.elsevier.com/documentation/SubjectClassificationsAPI.wadl#d1e199.

        fields : iterable (optional, default=None)
            The fields to return when calling search results. Allowed values:
            'code', 'abbrev', 'description', 'detail'.  For details see
            https://dev.elsevier.com/documentation/SubjectClassificationsAPI.wadl#d1e199.

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        Raises
        ------
        ValueError
            If query or return fields contain invalid fields.

        TypeError
            If returned fields are not passed in an iterable container.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/SubjectClassifications.html.

        Notes
        -----
        The directory for cached results is `{path}/STANDARD/{fname}`,
        where `path` is specified in `~/.scopus/config.ini` and fname is
        the md5-hashed version of `query` dict turned into string in format
        of 'key=value' delimited by '&'.
        """
        # Checks
        allowed_query_keys = ('code', 'description', 'detail', 'abbrev')
        invalid = [k for k in query.keys() if k not in allowed_query_keys]
        if invalid:
            raise ValueError(f'Query key(s) "{", ".join(invalid)}" invalid.')
        self.fields = fields or allowed_query_keys
        if fields:
            try:
                return_fields = [i for i in fields]
            except TypeError:
                print("Fields must be iterable")
                raise
            if not set(return_fields).issubset(allowed_query_keys):
                raise ValueError("Parameter 'fields' must be one of " +
                                 f"{', '.join(allowed_query_keys)}.")

        # Query
        query['field'] = ','.join(self.fields)
        self.query = str(query)
        Search.__init__(self, query=query, api='SubjectClassifications',
                        refresh=refresh)
        path = ['subject-classifications', 'subject-classification']
        self._n = len(chained_get(self._json, path, []))

Exemplo n.º 4

0

Exibir arquivo

    def __init__(self,
                 query,
                 refresh=False,
                 download=True,
                 count=200,
                 integrity_fields=None,
                 integrity_action="raise",
                 verbose=False):
        """Interaction with the Affiliation Search API.

        Parameters
        ----------
        query : str
            A string of the query, e.g. "af-id(60021784)".

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        count : int (optional, default=200)
            The number of entries to be displayed at once.  A smaller number
            means more queries with each query having less results.

        download : bool (optional, default=True)
            Whether to download results (if they have not been cached).

        integrity_fields : None or iterable (default=None)
            Iterable of field names whose completeness should be checked.
            ScopusSearch will perform the action specified in
            `integrity_action` if elements in these fields are missing.  This
            helps avoiding idiosynchratically missing elements that should
            always be present, such as the EID or the name.

        integrity_action : str (optional, default="raise")
            What to do in case integrity of provided fields cannot be
            verified.  Possible actions:
            - "raise": Raise an AttributeError
            - "warn": Raise a UserWarning

        verbose : bool (optional, default=False)
            Whether to print a downloading progress bar to terminal. Has no
            effect for download=False.

        Raises
        ------
        ScopusQueryError
            If the number of search results exceeds 5000.

        ValueError
            If the integrity_action parameter is not one of the allowed ones.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/AffiliationSearch.html.

        Notes
        -----
        The directory for cached results is `{path}/STANDARD/{fname}`,
        where  `path` is specified in `~/.scopus/config.ini` and fname is
        the md5-hashed version of `query`.
        """
        # Checks
        check_integrity_params(integrity_action)

        # Query
        self.query = query
        Search.__init__(self,
                        query=query,
                        api="AffiliationSearch",
                        refresh=refresh,
                        count=count,
                        download=download,
                        verbose=verbose,
                        view="STANDARD")
        self.integrity = integrity_fields or []
        self.action = integrity_action

Exemplo n.º 5

0

Exibir arquivo

    def __init__(self, query, refresh=False, subscriber=True, view=None,
                 download=True, integrity_fields=None,
                 integrity_action="raise", verbose=False, **kwds):
        """Interaction with the Scopus Search API.

        Parameters
        ----------
        query : str
            A string of the query.

        refresh : bool or int (optional, default=False)
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        subscriber : bool (optional, default=True)
            Whether the user accesses Scopus with a subscription or not.
            For subscribers, Scopus's cursor navigation will be used.
            Sets the number of entries in each query iteration to the maximum
            number allowed by the corresponding view.

        view : str (optional, default=None)
            Which view to use for the query, see
            https://dev.elsevier.com/guides/ScopusSearchViews.htm.
            Allowed values: STANDARD, COMPLETE.  If None, defaults to
            COMPLETE if subscriber=True and to STANDARD if subscriber=False.

        download : bool (optional, default=True)
            Whether to download results (if they have not been cached).

        integrity_fields : None or iterable (default=None)
            Iterable of field names whose completeness should be checked.
            ScopusSearch will perform the action specified in
            `integrity_action` if elements in these fields are missing.  This
            helps avoiding idiosynchratically missing elements that should
            always be present, such as the EID or the source ID.

        integrity_action : str (optional, default="raise")
            What to do in case integrity of provided fields cannot be
            verified.  Possible actions:
            - "raise": Raise an AttributeError
            - "warn": Raise a UserWarning

        verbose : bool (optional, default=False)
            Whether to print a downloading progress bar to terminal.
            Has no effect for download=False or when query file is
            in cache.

        kwds : key-value parings, optional
            Keywords passed on as query parameters.  Must contain fields
            and values listed mentioned in the API specification
            (https://dev.elsevier.com/documentation/SCOPUSSearchAPI.wadl),
            such as "field" or "date".

        Raises
        ------
        ScopusQueryError
            For non-subscribers, if the number of search results exceeds 5000.

        ValueError
            If the view or the integrity_action parameter is not one of
            the allowed ones.

        Examples
        --------
        See https://pybliometrics.readthedocs.io/en/stable/examples/ScopusSearch.html.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{fname}`,
        where `path` is specified in `~/.scopus/config.ini` and fname is
        the md5-hashed version of `query`.
        """
        # Checks
        allowed_views = ('STANDARD', 'COMPLETE')
        if view and view not in allowed_views:
            msg = 'view parameter must be one of ' + ', '.join(allowed_views)
            raise ValueError(msg)
        check_integrity_params(integrity_action)

        # Parameters
        if not view:
            if subscriber:
                view = "COMPLETE"
            else:
                view = "STANDARD"
        count = 25
        if view == "STANDARD" and subscriber:
            count = 200
        if "cursor" in kwds:
            subscriber = kwds["cursor"]
            kwds.pop("cursor")

        # Query
        self.query = query
        Search.__init__(self, query=query, api='ScopusSearch', refresh=refresh,
                        count=count, cursor=subscriber, view=view,
                        download=download, verbose=verbose, **kwds)
        self.integrity = integrity_fields or []
        self.action = integrity_action

Exemplo n.º 6

0

Exibir arquivo

Arquivo: subject_classifications.py Projeto: raffaem/pybliometrics

    def __init__(self,
                 query: Dict,
                 refresh: Union[bool, int] = False,
                 fields: Union[List[str], Tuple[str, ...]] = None,
                 **kwds: str) -> None:
        """Interaction with the Subject Classifications Scopus API.

        :param query: Query parameters and corresponding fields. Allowed keys
                      'code', 'abbrev', 'description', 'detail'. For more
                      details on search fields please refer to
                      https://dev.elsevier.com/documentation/SubjectClassificationsAPI.wadl#d1e199.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param fields: The fields to return when calling search results.
                       Allowed values: 'code', 'abbrev', 'description',
                       'detail'.  For details see
                       https://dev.elsevier.com/documentation/SubjectClassificationsAPI.wadl#d1e199.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values mentioned in the API specification at
                     https://dev.elsevier.com/documentation/SubjectClassificationsAPI.wadl.

        Raises
        ------
        TypeError
            If returned fields are not passed in an iterable container.

        ValueError
            If any of the parameters `fields`, `refresh` or `query` is not
            one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/{fname}`,
        where `path` is specified in your configuration file, and `fname` is
        the md5-hashed version of `query` dict turned into string in format
        of 'key=value' delimited by '&'.
        """
        # Checks
        allowed_query_keys = ('code', 'description', 'detail', 'abbrev')
        invalid = [k for k in query.keys() if k not in allowed_query_keys]
        if invalid:
            raise ValueError(f'Query key(s) "{", ".join(invalid)}" invalid.')
        self.fields = fields or allowed_query_keys
        if fields:
            try:
                return_fields = [i for i in fields]
            except TypeError:
                print("Fields must be iterable")
                raise
            if not set(return_fields).issubset(allowed_query_keys):
                raise ValueError("Parameter 'fields' must be one of " +
                                 f"{', '.join(allowed_query_keys)}.")

        # Query
        query['field'] = ','.join(self.fields)
        self._refresh = refresh
        self._query = str(query)
        self._view = None
        Search.__init__(self,
                        query=query,
                        api='SubjectClassifications',
                        **kwds)
        path = ['subject-classifications', 'subject-classification']
        self._n = len(chained_get(self._json, path, []))

Exemplo n.º 7

0

Exibir arquivo

Arquivo: author_search.py Projeto: raffaem/pybliometrics

    def __init__(self,
                 query: str,
                 refresh: Union[bool, int] = False,
                 verbose: bool = False,
                 download: bool = True,
                 integrity_fields: Union[List[str], Tuple[str, ...]] = None,
                 integrity_action: str = "raise",
                 count: int = 200,
                 **kwds: str) -> None:
        """Interaction with the Author Search API.

        :param query: A string of the query.  For allowed fields and values see
                      https://dev.elsevier.com/sc_author_search_tips.html.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param download: Whether to download results (if they have not been
                         cached).
        :param verbose: Whether to print a download progress bar.
        :param integrity_fields: Names of fields whose completeness should
                                 be checked.  ScopusSearch will perform the
                                 action specified in `integrity_action` if
                                 elements in these fields are missing.  This
                                 helps avoiding idiosynchratically missing
                                 elements that should always be present
                                 (e.g., EID or source ID).
        :param integrity_action: What to do in case integrity of provided fields
                                 cannot be verified.  Possible actions:
                                 - "raise": Raise an AttributeError
                                 - "warn": Raise a UserWarning
        :param count: (deprecated) The number of entries to be displayed at
                      once.  A smaller number means more queries with each
                      query having fewer results.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values mentioned in the API specification at
                     https://dev.elsevier.com/documentation/AuthorSearchAPI.wadl.

        Raises
        ------
        ScopusQueryError
            If the number of search results exceeds 5000, which is the API's
            maximum number of results returned.  The error prevents the
            download attempt and avoids making use of your API key.

        ValueError
            If any of the parameters `integrity_action` or `refresh` is not
            one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/STANDARD/{fname}`,
        where  `path` is specified in your configuration file, and `fname` is
        the md5-hashed version of `query`.
        """
        # Checks
        allowed = ("warn", "raise")
        check_parameter_value(integrity_action, allowed, "integrity_action")
        if count != 200:
            msg = "Parameter `count` is deprecated and will be removed in a "\
                  "future release.  There will be no substitute."
            warn(msg, FutureWarning)

        # Query
        self._action = integrity_action
        self._integrity = integrity_fields or []
        self._query = query
        self._refresh = refresh
        self._view = "STANDARD"
        Search.__init__(self,
                        query=query,
                        api='AuthorSearch',
                        count=count,
                        download=download,
                        verbose=verbose,
                        **kwds)

Exemplo n.º 8

0

Exibir arquivo

    def __init__(self,
                 query: str,
                 refresh: Union[bool, int] = False,
                 view: str = None,
                 verbose: bool = False,
                 download: bool = True,
                 integrity_fields: Union[List[str], Tuple[str, ...]] = None,
                 integrity_action: str = "raise",
                 subscriber: bool = True,
                 **kwds: str
                 ) -> None:
        """Interaction with the Scopus Search API.

        :param query: A string of the query as used in the Advanced Search
                     on scopus.com.  All fields except "INDEXTERMS()" and
                     "LIMIT-TO()" work.
        :param refresh: Whether to refresh the cached file if it exists or not.
                        If int is passed, cached file will be refreshed if the
                        number of days since last modification exceeds that value.
        :param view: Which view to use for the query, see
                     https://dev.elsevier.com/sc_search_views.html.
                     Allowed values: STANDARD, COMPLETE.  If None, defaults to
                     COMPLETE if `subscriber=True` and to STANDARD if
                     `subscriber=False`.
        :param verbose: Whether to print a download progress bar.
        :param download: Whether to download results (if they have not been
                         cached).
        :param integrity_fields: Names of fields whose completeness should
                                 be checked.  ScopusSearch will perform the
                                 action specified in `integrity_action` if
                                 elements in these fields are missing.  This
                                 helps avoiding idiosynchratically missing
                                 elements that should always be present
                                 (e.g., EID or source ID).
        :param integrity_action: What to do in case integrity of provided fields
                                 cannot be verified.  Possible actions:
                                 - "raise": Raise an AttributeError
                                 - "warn": Raise a UserWarning
        :param subscriber: Whether you access Scopus with a subscription or not.
                           For subscribers, Scopus's cursor navigation will be
                           used.  Sets the number of entries in each query
                           iteration to the maximum number allowed by the
                           corresponding view.
        :param kwds: Keywords passed on as query parameters.  Must contain
                     fields and values mentioned in the API specification at
                     https://dev.elsevier.com/documentation/ScopusSearchAPI.wadl.

        Raises
        ------
        ScopusQueryError
            For non-subscribers, if the number of search results exceeds 5000.

        ValueError
            If any of the parameters `integrity_action`, `refresh` or `view`
            is not one of the allowed values.

        Notes
        -----
        The directory for cached results is `{path}/{view}/{fname}`,
        where `path` is specified in your configuration file and `fname` is
        the md5-hashed version of `query`.
        """
        # Checks
        if view:
            check_parameter_value(view, ('STANDARD', 'COMPLETE'), "view")
        allowed = ("warn", "raise")
        check_parameter_value(integrity_action, allowed, "integrity_action")

        # Parameters
        if not view:
            if subscriber:
                view = "COMPLETE"
            else:
                view = "STANDARD"
        count = 25
        if view == "STANDARD" and subscriber:
            count = 200
        if "cursor" in kwds:
            subscriber = kwds["cursor"]
            kwds.pop("cursor")
        if "count" in kwds:
            count = kwds["count"]
            kwds.pop("count")

        # Query
        self._action = integrity_action
        self._integrity = integrity_fields or []
        self._refresh = refresh
        self._query = query
        self._view = view
        Search.__init__(self, query=query, api='ScopusSearch', count=count,
                        cursor=subscriber, download=download,
                        verbose=verbose, **kwds)