Пример #1
0
    def affiliations(self):
        """A list of namedtuples storing affiliation information,
        where each namedtuple corresponds to one affiliation.
        The information in each namedtuple is (eid name variant documents city
        country parent).

        All entries are strings or None.  variant combines variants of names
        with a semicolon.

        Raises
        ------
        ValueError
            If the elements provided in integrity_fields do not match the
            actual field names (listed above).
        """
        # Initiate namedtuple with ordered list of fields
        fields = 'eid name variant documents city country parent'
        aff = namedtuple('Affiliation', fields)
        check_field_consistency(self.integrity, fields)
        # Parse elements one-by-one
        out = []
        for item in self._json:
            name = item.get('affiliation-name')
            variants = [d.get('$', "") for d in item.get('name-variant', [])
                        if d.get('$', "") != name]
            new = aff(eid=item.get('eid'), variant=";".join(variants),
                      documents=item.get('document-count', '0'), name=name,
                      city=item.get('city'), country=item.get('country'),
                      parent=item.get('parent-affiliation-id'))
            out.append(new)
        # Finalize
        check_integrity(out, self.integrity, self.action)
        return out or None
Пример #2
0
    def authors(self) -> Optional[List[NamedTuple]]:
        """A list of namedtuples storing author information,
        where each namedtuple corresponds to one author.
        The information in each namedtuple is (eid orcid surname initials givenname
        documents affiliation affiliation_id city country areas).

        All entries are strings or None.  Areas combines abbreviated subject
        areas followed by the number of documents in this subject.

        Raises
        ------
        ValueError
            If the elements provided in integrity_fields do not match the
            actual field names (listed above).
        """
        # Initiate namedtuple with ordered list of fields
        fields = 'eid orcid surname initials givenname affiliation documents '\
                 'affiliation_id city country areas'
        auth = namedtuple('Author', fields)
        check_field_consistency(self._integrity, fields)
        # Parse elements one-by-one
        out = []
        for item in self._json:
            name = item.get('preferred-name', {})
            aff = item.get('affiliation-current', {})
            fields = item.get('subject-area', [{
                '@abbrev': '',
                '@frequency': ''
            }])
            areas = [
                f"{d.get('@abbrev', '')} ({d.get('@frequency', '')})"
                for d in listify(fields)
            ]
            new = auth(eid=item.get('eid'),
                       orcid=item.get('orcid'),
                       initials=name.get('initials'),
                       surname=name.get('surname'),
                       areas="; ".join(areas),
                       givenname=name.get('given-name'),
                       documents=int(item['document-count']),
                       affiliation=aff.get('affiliation-name'),
                       affiliation_id=aff.get('affiliation-id'),
                       city=aff.get('affiliation-city'),
                       country=aff.get('affiliation-country'))
            out.append(new)
        # Finalize
        check_integrity(out, self._integrity, self._action)
        return out or None
Пример #3
0
    def results(self):
        """A list of namedtuples in the form (eid doi pii pubmed_id title
        subtype subtypeDescription creator afid affilname affiliation_city
        affiliation_country author_count author_names author_ids author_afids
        coverDate coverDisplayDate publicationName issn source_id eIssn
        aggregationType volume issueIdentifier article_number pageRange
        description authkeywords citedby_count openaccess fund_acr fund_no
        fund_sponsor).
        Field definitions correspond to
        https://dev.elsevier.com/guides/ScopusSearchViews.htm and return the
        values as-is, except for afid, affilname, affiliation_city,
        affiliation_country, author_names, author_ids and author_afids:  These
        information are joined on ";".  In case an author has multiple
        affiliations, they are joined on "-"
        (e.g. Author1Aff;Author2Aff1-Author2Aff2).

        Raises
        ------
        ValueError
            If the elements provided in integrity_fields do not match the
            actual field names (listed above).

        Notes
        -----
        The list of authors and the list of affiliations per author are
        deduplicated.
        """
        # Initiate namedtuple with ordered list of fields
        fields = 'eid doi pii pubmed_id title subtype subtypeDescription creator ' \
                 'afid affilname affiliation_city affiliation_country author_count ' \
                 'author_names author_ids author_afids coverDate '\
                 'coverDisplayDate publicationName issn source_id eIssn '\
                 'aggregationType volume issueIdentifier article_number '\
                 'pageRange description authkeywords citedby_count '\
                 'openaccess fund_acr fund_no fund_sponsor'
        doc = namedtuple('Document', fields)
        check_field_consistency(self.integrity, fields)
        # Parse elements one-by-one
        out = []
        for item in self._json:
            info = {}
            # Parse affiliations
            info["affilname"] = _join(item, 'affilname')
            info["afid"] = _join(item, 'afid')
            info["aff_city"] = _join(item, 'affiliation-city')
            info["aff_country"] = _join(item, 'affiliation-country')
            # Parse authors
            try:
                # Deduplicate list of authors
                authors = _deduplicate(item['author'])
                # Extract information
                surnames = _replace_none([d['surname'] for d in authors])
                firstnames = _replace_none([d['given-name'] for d in authors])
                info["auth_names"] = ";".join([", ".join([t[0], t[1]]) for t in
                                               zip(surnames, firstnames)])
                info["auth_ids"] = ";".join([d['authid'] for d in authors])
                affs = []
                for auth in authors:
                    aff = listify(_deduplicate(auth.get('afid', [])))
                    affs.append('-'.join([d['$'] for d in aff]))
                info["auth_afid"] = (';'.join(affs) or None)
            except KeyError:
                pass
            date = item.get('prism:coverDate')
            if isinstance(date, list):
                date = date[0].get('$')
            new = doc(article_number=item.get('article-number'),
                      title=item.get('dc:title'), fund_no=item.get('fund-no'),
                      fund_sponsor=item.get('fund-sponsor'),
                      subtype=item.get('subtype'), doi=item.get('prism:doi'),
                      subtypeDescription=item.get('subtypeDescription'),
                      issn=item.get('prism:issn'), creator=item.get('dc:creator'),
                      affilname=info.get("affilname"),
                      author_names=info.get("auth_names"),
                      coverDate=date, volume=item.get('prism:volume'),
                      coverDisplayDate=item.get('prism:coverDisplayDate'),
                      publicationName=item.get('prism:publicationName'),
                      source_id=item.get('source-id'), author_ids=info.get("auth_ids"),
                      aggregationType=item.get('prism:aggregationType'),
                      issueIdentifier=item.get('prism:issueIdentifier'),
                      pageRange=item.get('prism:pageRange'),
                      author_afids=info.get("auth_afid"),
                      affiliation_country=info.get("aff_country"),
                      citedby_count=item.get('citedby-count'),
                      openaccess=item.get('openaccess'), eIssn=item.get('prism:eIssn'),
                      author_count=item.get('author-count', {}).get('$'),
                      affiliation_city=info.get("aff_city"), afid=info.get("afid"),
                      description=item.get('dc:description'), pii=item.get('pii'),
                      authkeywords=item.get('authkeywords'), eid=item.get('eid'),
                      fund_acr=item.get('fund-acr'), pubmed_id=item.get('pubmed-id'))
            out.append(new)
        # Finalize
        check_integrity(out, self.integrity, self.action)
        return out or None