Exemplo n.º 1
0
def populate_affiliation_suggest(record):
    """Populate the ``affiliation_suggest`` field of Institution records."""
    ICN = record.get('ICN', [])
    institution_acronyms = get_value(record, 'institution_hierarchy.acronym', default=[])
    institution_names = get_value(record, 'institution_hierarchy.name', default=[])
    legacy_ICN = record.get('legacy_ICN', '')
    name_variants = force_list(get_value(record, 'name_variants.value', default=[]))
    postal_codes = force_list(get_value(record, 'addresses.postal_code', default=[]))

    # XXX: this is need by the curators to search only with numbers
    extract_numbers_from_umr = []
    for name in name_variants:
        match = re.match(r'UMR\s', name, re.IGNORECASE)
        if match:
            umr_number = name.replace(match.group(0), '')
            extract_numbers_from_umr.append(umr_number)

    input_values = []
    input_values.extend(ICN)
    input_values.extend(institution_acronyms)
    input_values.extend(institution_names)
    input_values.append(legacy_ICN)
    input_values.extend(name_variants)
    input_values.extend(postal_codes)
    input_values.extend(extract_numbers_from_umr)
    input_values = [el for el in input_values if el]

    record['affiliation_suggest'] = {
        'input': input_values,
    }
Exemplo n.º 2
0
def populate_bookautocomplete(sender, json, *args, **kwargs):
    """Populate the ```bookautocomplete`` field of Literature records."""
    if not is_hep(json):
        return

    if 'book' not in json.get('document_type', []):
        return

    paths = [
        'imprints.date',
        'imprints.publisher',
        'isbns.value',
    ]

    authors = force_list(get_value(json, 'authors.full_name', default=[]))
    titles = force_list(get_value(json, 'titles.title', default=[]))

    input_values = list(chain.from_iterable(
        force_list(get_value(json, path, default=[])) for path in paths))
    input_values.extend(authors)
    input_values.extend(titles)
    input_values = [el for el in input_values if el]

    ref = get_value(json, 'self.$ref')

    json.update({
        'bookautocomplete': {
            'input': input_values,
            'payload': {
                'authors': authors,
                'id': ref,
                'title': titles,
            },
        },
    })
Exemplo n.º 3
0
def populate_affiliation_suggest(sender, json, *args, **kwargs):
    """Populate the ``affiliation_suggest`` field of Institution records."""
    if 'institutions.json' not in json.get('$schema'):
        return

    ICN = json.get('ICN', [])
    institution_acronyms = get_value(json, 'institution_hierarchy.acronym', default=[])
    institution_names = get_value(json, 'institution_hierarchy.name', default=[])
    legacy_ICN = json.get('legacy_ICN', '')
    name_variants = force_list(get_value(json, 'name_variants.value', default=[]))
    postal_codes = force_list(get_value(json, 'addresses.postal_code', default=[]))

    input_values = []
    input_values.extend(ICN)
    input_values.extend(institution_acronyms)
    input_values.extend(institution_names)
    input_values.append(legacy_ICN)
    input_values.extend(name_variants)
    input_values.extend(postal_codes)
    input_values = [el for el in input_values if el]

    json.update({
        'affiliation_suggest': {
            'input': input_values,
            'output': legacy_ICN,
            'payload': {
                '$ref': get_value(json, 'self.$ref'),
                'ICN': ICN,
                'institution_acronyms': institution_acronyms,
                'institution_names': institution_names,
                'legacy_ICN': legacy_ICN,
            },
        },
    })
Exemplo n.º 4
0
 def get_dois(self, data):
     dois = data.get('dois', None)
     control_number = data.get('control_number')
     if dois and not control_number:
         data['dois'] = force_list(
             {'value': get_value(data, 'dois[0]', default=missing)})
     elif dois:
         data['dois'] = force_list(
             {'value': get_value(data, 'dois[0].value', default=missing)})
     return data.get('dois', missing)
Exemplo n.º 5
0
 def get_arxiv_eprints(self, data):
     arxiv_eprint = data.pop('arxiv_eprint', None)
     arxiv_eprints = data.get('arxiv_eprints')
     if arxiv_eprint:
         data['arxiv_eprint'] = force_list({'value': arxiv_eprint})
     elif arxiv_eprints:
         data['arxiv_eprint'] = force_list(
             {'value': get_value(data, 'arxiv_eprints[0].value', default=missing)})
     data.pop('arxiv_eprints', None)
     return data.get('arxiv_eprint', missing)
Exemplo n.º 6
0
def get_collection(marc_record):
    collections = set()
    for field in force_list(marc_record.get('980__')):
        for v in field.values():
            for e in force_list(v):
                collections.add(e.upper().strip())
    if 'DELETED' in collections:
        return 'DELETED'
    for collection in collections:
        if collection in REAL_COLLECTIONS:
            return collection
    return 'HEP'
Exemplo n.º 7
0
def is_published(record):
    """Return if a record is published.

    We say that a record is published if it is citeable, which means that
    it has enough information in a ``publication_info``, or if we know its
    DOI and a ``journal_title``, which means it is in press.

    Args:
        record(InspireRecord): a record.

    Returns:
        bool: whether the record is published.

    Examples:
        >>> record = {
        ...     'dois': [
        ...         {'value': '10.1016/0029-5582(61)90469-2'},
        ...     ],
        ...     'publication_info': [
        ...         {'journal_title': 'Nucl.Phys.'},
        ...     ],
        ... }
        >>> is_published(record)
        True

    """
    citeable = 'publication_info' in record and is_citeable(record['publication_info'])
    submitted = 'dois' in record and any(
        'journal_title' in el for el in force_list(record.get('publication_info')))

    return citeable or submitted
Exemplo n.º 8
0
def get_linked_records_in_field(record, field_path):
    """Get all linked records in a given field.

    Args:
        record (dict): the record containing the links
        field_path (string): a dotted field path specification understandable
            by ``get_value``, containing a json reference to another record.

    Returns:
        Iterator[dict]: an iterator on the linked record.

    Warning:
        Currently, the order in which the linked records are yielded is
        different from the order in which they appear in the record.

    Example:
        >>> record = {'references': [
        ...     {'record': {'$ref': 'https://labs.inspirehep.net/api/literature/1234'}},
        ...     {'record': {'$ref': 'https://labs.inspirehep.net/api/data/421'}},
        ... ]}
        >>> get_linked_record_in_field(record, 'references.record')
        [...]
    """
    full_path = '.'.join([field_path, '$ref'])
    pids = force_list([get_pid_from_record_uri(rec) for rec in get_value(record, full_path, [])])
    return get_db_records(pids)
Exemplo n.º 9
0
def map_refextract_to_schema(extracted_references, source=None):
    """Convert refextract output to the schema using the builder."""
    result = []

    for reference in extracted_references:
        rb = ReferenceBuilder()
        mapping = [
            ('author', rb.add_refextract_authors_str),
            ('collaboration', rb.add_collaboration),
            ('doi', rb.add_uid),
            ('hdl', rb.add_uid),
            ('isbn', rb.add_uid),
            ('journal_reference', rb.set_pubnote),
            ('linemarker', rb.set_label),
            ('misc', rb.add_misc),
            ('publisher', rb.set_publisher),
            ('raw_ref', lambda raw_ref: rb.add_raw_reference(raw_ref, source=source)),
            ('reportnumber', rb.add_report_number),
            ('texkey', rb.set_texkey),
            ('title', rb.add_title),
            ('url', rb.add_url),
            ('year', rb.set_year),
        ]

        for field, method in mapping:
            for el in force_list(reference.get(field)):
                if el:
                    method(el)

        if get_value(rb.obj, 'reference.urls'):
            rb.obj['reference']['urls'] = dedupe_list_of_dicts(rb.obj['reference']['urls'])

        result.append(rb.obj)

    return result
Exemplo n.º 10
0
 def get_resolved_references_by_control_number(self, data):
     data = force_list(data)
     resolved_records = get_linked_records_in_field(
         {'references': data}, 'references.record')
     return {
         record['control_number']: record
         for record in resolved_records
     }
 def get_control_numbers_to_resolved_experiments_map(self, data):
     data = force_list(data)
     resolved_records = get_linked_records_in_field(
         {'accelerator_experiments': data}, 'accelerator_experiments.record'
     )
     return {
         record['control_number']: record
         for record in resolved_records
     }
Exemplo n.º 12
0
def populate_bookautocomplete(record):
    """Populate the ```bookautocomplete`` field of Literature records."""
    paths = [
        'imprints.date',
        'imprints.publisher',
        'isbns.value',
    ]

    authors = force_list(get_value(record, 'authors.full_name', default=[]))
    titles = force_list(get_value(record, 'titles.title', default=[]))

    input_values = list(chain.from_iterable(
        force_list(get_value(record, path, default=[])) for path in paths))
    input_values.extend(authors)
    input_values.extend(titles)
    input_values = [el for el in input_values if el]

    record['bookautocomplete'] = {
        'input': input_values,
    }
Exemplo n.º 13
0
def get_and_format_references(record):
    """Format references.

    .. deprecated:: 2018-06-07
    """
    out = []
    references = record.get('references')
    if references:
        reference_recids = [
            str(ref['recid']) for ref in references if ref.get('recid')
        ]

        resolved_references = get_es_records(
            'lit',
            reference_recids,
            _source=[
                'authors',
                'citation_count',
                'collaboration',
                'control_number',
                'corporate_author',
                'earliest_date',
                'publication_info',
                'titles',
            ]
        )

        # Create mapping to keep reference order
        recid_to_reference = {
            ref['control_number']: ref for ref in resolved_references
        }
        for reference in references:
            row = []
            ref_record = recid_to_reference.get(
                reference.get('recid'), {}
            )
            if 'reference' in reference:
                reference.update(reference['reference'])
                del reference['reference']
            if 'publication_info' in reference:
                reference['publication_info'] = force_list(
                    reference['publication_info']
                )
            row.append(render_template_to_string(
                'inspirehep_theme/references.html',
                record=ref_record,
                reference=reference
            ))
            row.append(ref_record.get('citation_count', ''))
            out.append(row)

    return out
Exemplo n.º 14
0
def populate_author_suggest(record, *args, **kwargs):
    """Populate the ``author_suggest`` field of Authors records."""
    author_paths = [
        'name.preferred_name',
        'name.previous_names',
        'name.name_variants',
        'name.native_names',
        'name.value',
    ]

    input_values = [el for el in chain.from_iterable([force_list(get_value(record, path)) for path in author_paths])]

    record['author_suggest'] = {
        'input': input_values
    }
Exemplo n.º 15
0
def populate_experiment_suggest(record):
    """Populates experiment_suggest field of experiment records."""

    experiment_paths = [
        'accelerator.value',
        'collaboration.value',
        'experiment.short_name',
        'experiment.value',
        'institutions.value',
        'legacy_name',
        'long_name',
        'name_variants',
    ]

    input_values = [el for el in chain.from_iterable(
        [force_list(get_value(record, path)) for path in experiment_paths]) if el]

    record['experiment_suggest'] = {
        'input': input_values,
    }
Exemplo n.º 16
0
def populate_earliest_date(sender, json, *args, **kwargs):
    """Populate the ``earliest_date`` field of Literature records."""
    if not is_hep(json):
        return

    date_paths = [
        'preprint_date',
        'thesis_info.date',
        'thesis_info.defense_date',
        'publication_info.year',
        'legacy_creation_date',
        'imprints.date',
    ]

    dates = [str(el) for el in chain.from_iterable(
        [force_list(get_value(json, path)) for path in date_paths])]

    if dates:
        result = earliest_date(dates)
        if result:
            json['earliest_date'] = result
Exemplo n.º 17
0
def populate_earliest_date(record):
    """Populate the ``earliest_date`` field of Literature records."""
    date_paths = [
        'preprint_date',
        'thesis_info.date',
        'thesis_info.defense_date',
        'publication_info.year',
        'legacy_creation_date',
        'imprints.date',
    ]

    dates = [
        str(el) for el in chain.from_iterable(
            [force_list(get_value(record, path)) for path in date_paths]
        )
    ]

    if dates:
        result = earliest_date(dates)
        if result:
            record['earliest_date'] = result
 def visit_less_equal_than_op(self, node, fieldnames):
     return self._generate_range_queries(force_list(fieldnames), {'lte': node.op.value})
Exemplo n.º 19
0
 def no_arxiv_in_dois(obj):
     return 'dois' in obj and \
         any(source.lower() != 'arxiv' for source in force_list(get_value(obj, 'dois.source')))
Exemplo n.º 20
0
 def get_titles(self, data):
     title = data.pop('title', None)
     if title:
         data['titles'] = force_list(title)
     return data.get('titles', missing)
Exemplo n.º 21
0
def _authors(key, value):
    def _get_affiliations(value):
        result = []

        u_values = force_list(value.get('u'))
        z_values = force_list(value.get('z'))

        # XXX: we zip only when they have the same length, otherwise
        #      we might match a value with the wrong recid.
        if len(u_values) == len(z_values):
            for u_value, z_value in zip(u_values, z_values):
                result.append({
                    'record': get_record_ref(z_value, 'institutions'),
                    'value': u_value,
                })
        else:
            for u_value in u_values:
                result.append({'value': u_value})

        return dedupe_list(result)

    def _get_affiliations_identifiers(value):
        t_values = (t_value.split(':')
                    for t_value in dedupe_list(force_list(value.get('t'))))

        return [{
            'schema': schema.upper(),
            'value': identifier
        } for schema, identifier in t_values]

    def _get_curated_relation(value):
        return value.get('y') == '1' or None

    def _get_emails(value):
        return [
            el[6:] if el.startswith('email:') else el
            for el in force_list(value.get('m'))
        ]

    def _get_full_names(value):
        return force_list(value.get('a'))

    def _get_ids(value):
        def _is_jacow(j_value):
            return j_value.upper().startswith('JACOW-')

        def _is_orcid(j_value):
            return j_value.upper().startswith('ORCID:') and len(j_value) > 6

        def _is_naked_orcid(j_value):
            return ORCID.match(j_value)

        def _is_cern(j_value):
            return j_value.startswith('CCID-')

        result = []

        i_values = force_list(value.get('i'))
        for i_value in i_values:
            result.append({
                'schema': 'INSPIRE ID',
                'value': i_value,
            })

        j_values = force_list(value.get('j'))
        for j_value in j_values:
            if _is_jacow(j_value):
                result.append({
                    'schema': 'JACOW',
                    'value': 'JACoW-' + j_value[6:],
                })
            elif _is_orcid(j_value):
                result.append({
                    'schema': 'ORCID',
                    'value': j_value[6:].replace('.', ''),
                })
            elif _is_naked_orcid(j_value):
                result.append({
                    'schema': 'ORCID',
                    'value': j_value,
                })
            elif _is_cern(j_value):
                result.append({
                    'schema': 'CERN',
                    'value': 'CERN-' + j_value[5:],
                })

        w_values = force_list(value.get('w'))
        for w_value in w_values:
            result.append({
                'schema': 'INSPIRE BAI',
                'value': w_value,
            })

        return dedupe_list(result)

    def _get_inspire_roles(value):
        result = []

        e_values = force_list(value.get('e'))
        if any(el.lower().startswith('ed') for el in e_values):
            result.append('editor')

        if key.startswith('701'):
            result.append('supervisor')

        return result

    def _get_raw_affiliations(value):
        return dedupe_list([{
            'value': el
        } for el in force_list(value.get('v'))])

    def _get_record(value):
        return get_record_ref(maybe_int(force_single_element(value.get('x'))),
                              'authors')

    full_names = _get_full_names(value)
    if len(full_names) == 1:
        return [
            {
                'affiliations': _get_affiliations(value),
                'affiliations_identifiers':
                _get_affiliations_identifiers(value),
                'alternative_names': force_list(value.get('q')),
                'curated_relation': _get_curated_relation(value),
                'emails': _get_emails(value),
                'full_name': full_names[0],
                'ids': _get_ids(value),
                'inspire_roles': _get_inspire_roles(value),
                'raw_affiliations': _get_raw_affiliations(value),
                'record': _get_record(value),
            },
        ]
    else:
        return [{
            'affiliations':
            _get_affiliations(value),
            'affiliations_identifiers':
            _get_affiliations_identifiers(value),
            'full_name':
            full_name,
            'inspire_roles':
            _get_inspire_roles(value),
            'raw_affiliations':
            _get_raw_affiliations(value),
        } for full_name in full_names]
Exemplo n.º 22
0
def authors2marc(self, key, value):
    """Populate the ``100`` MARC field.

    Also populates the ``700`` and the ``701`` MARC fields through side effects.
    """
    value = force_list(value)

    def _get_ids(value):
        ids = {
            'i': [],
            'j': [],
        }
        if value.get('ids'):
            for _id in value.get('ids'):
                if _id.get('schema') == 'INSPIRE ID':
                    ids['i'].append(_id.get('value'))
                elif _id.get('schema') == 'ORCID':
                    ids['j'].append('ORCID:' + _id.get('value'))
                elif _id.get('schema') == 'JACOW':
                    ids['j'].append(_id.get('value'))
                elif _id.get('schema') == 'CERN':
                    ids['j'].append('CCID-' + _id.get('value')[5:])
        return ids

    def _get_affiliations(value):
        return [aff.get('value') for aff in value.get('affiliations', [])]

    def _get_affiliations_identifiers(value):
        return [
            u'{}:{}'.format(aff.get('schema'), aff.get('value'))
            for aff in value.get('affiliations_identifiers', [])
        ]

    def _get_inspire_roles(value):
        values = force_list(value.get('inspire_roles'))
        return ['ed.' for role in values if role == 'editor']

    def _get_raw_affiliations(value):
        return [aff.get('value') for aff in value.get('raw_affiliations', [])]

    def get_value_100_700(value):
        ids = _get_ids(value)
        return {
            'a': value.get('full_name'),
            'e': _get_inspire_roles(value),
            'q': value.get('alternative_names'),
            'i': ids.get('i'),
            'j': ids.get('j'),
            'm': value.get('emails'),
            't': _get_affiliations_identifiers(value),
            'u': _get_affiliations(value),
            'v': _get_raw_affiliations(value),
        }

    def get_value_701(value):
        ids = _get_ids(value)
        return {
            'a': value.get('full_name'),
            'q': value.get('alternative_names'),
            'i': ids.get('i'),
            'j': ids.get('j'),
            'u': _get_affiliations(value),
            'v': _get_raw_affiliations(value),
        }

    if len(value) > 1:
        self["700"] = []
        self["701"] = []

    for author in value[1:]:
        is_supervisor = 'supervisor' in author.get('inspire_roles', [])
        if is_supervisor:
            self["701"].append(get_value_701(author))
        else:
            self["700"].append(get_value_100_700(author))
    return get_value_100_700(value[0])
Exemplo n.º 23
0
 def get_pid_values(self):
     pid_values = get_value(self.data, self.pid_value_path, default=[])
     if not isinstance(pid_values, (tuple, list)):
         pid_values = force_list(pid_values)
     return set(pid_values)
Exemplo n.º 24
0
    def authors(self):
        authors_key = self.record.get("author")
        authors = [self.get_author(author) for author in force_list(authors_key)]

        return authors
Exemplo n.º 25
0
 def _get_emails(value):
     return [el[6:] if el.startswith('email:') else el for el in force_list(value.get('m'))]
Exemplo n.º 26
0
    def _get_affiliations_identifiers(value):
        t_values = (t_value.split(':', 1) for t_value in dedupe_list(force_list(value.get('t'))))

        return [{'schema': schema.upper(), 'value': identifier} for schema, identifier in t_values]
Exemplo n.º 27
0
def ranks(self, key, value):
    """Populate the ``ranks`` key."""
    return [normalize_rank(el) for el in force_list(value.get('a'))]
Exemplo n.º 28
0
def force_single_element(obj):
    """Force an object to a list and return the first element."""
    lst = force_list(obj)
    if lst:
        return lst[0]
    return None
Exemplo n.º 29
0
    def serialize(self, pid, record, links_factory=None):
        """Return a different metrics for a given author recid.

        :param pid:
            Persistent identifier instance.

        :param record:
            Record instance.

        :param links_factory:
            Factory function for the link generation, which are added to
            the response.
        """
        author_pid = pid.pid_value

        fields = set()
        keywords = []

        statistics = {}
        statistics['citations'] = 0
        statistics['publications'] = 0
        statistics['types'] = {}

        statistics_citations = {}

        query = Q('match', authors__recid=author_pid)
        search = LiteratureSearch().query('nested', path='authors', query=query)\
                                   .params(_source=[
                                       'citation_count',
                                       'control_number',
                                       'facet_inspire_doc_type',
                                       'facet_inspire_categories',
                                       'keywords',
                                   ])

        for result in search.scan():
            result_source = result.to_dict()

            # Increment the count of the total number of publications.
            statistics['publications'] += 1

            # Increment the count of citations.
            citation_count = result_source.get('citation_count', 0)

            statistics['citations'] += citation_count
            statistics_citations[result_source['control_number']] = \
                citation_count

            # Count how many times certain type of publication was published.
            try:
                publication_type = result_source.get(
                    'facet_inspire_doc_type', [])[0]
            except IndexError:
                pass

            if publication_type:
                if publication_type in statistics['types']:
                    statistics['types'][publication_type] += 1
                else:
                    statistics['types'][publication_type] = 1

            # Get fields.
            for field in result_source.get('facet_inspire_categories', []):
                fields.add(field)

            # Get keywords.
            keywords.extend([
                k for k in force_list(
                    get_value(result_source, 'keywords.value'))
                if k != '* Automatic Keywords *'])

        # Calculate h-index together with i10-index.
        statistics['hindex'] = calculate_h_index(statistics_citations)
        statistics['i10index'] = calculate_i10_index(statistics_citations)

        if fields:
            statistics['fields'] = list(fields)

        # Return the top 25 keywords.
        if keywords:
            counter = Counter(keywords)
            statistics['keywords'] = [{
                'count': i[1],
                'keyword': i[0]
            } for i in counter.most_common(25)]

        return json.dumps(statistics)
Exemplo n.º 30
0
 def _is_not_for_hal(value):
     normalized_c_values = [el.upper() for el in force_list(value.get('c'))]
     return 'NOT HAL' in normalized_c_values
Exemplo n.º 31
0
 def _is_for_cds(value):
     normalized_c_values = [el.upper() for el in force_list(value.get('c'))]
     return 'CDS' in normalized_c_values
Exemplo n.º 32
0
def historical_data(self, key, value):
    return force_list(value.get('a'))
Exemplo n.º 33
0
def formdata_to_model(obj, formdata):
    """Manipulate form data to match literature data model."""
    def _is_arxiv_url(url):
        return 'arxiv.org' in url

    form_fields = copy.deepcopy(formdata)
    filter_empty_elements(form_fields,
                          ['authors', 'supervisors', 'report_numbers'])

    builder = LiteratureBuilder(source='submitter')

    for author in form_fields.get('authors', []):
        builder.add_author(
            builder.make_author(author['full_name'],
                                affiliations=force_list(author['affiliation'])
                                if author['affiliation'] else None,
                                roles=['author']))

    for supervisor in form_fields.get('supervisors', []):
        builder.add_author(
            builder.make_author(
                supervisor['full_name'],
                affiliations=force_list(supervisor['affiliation'])
                if author['affiliation'] else None,
                roles=['supervisor']))

    builder.add_title(title=form_fields.get('title'))

    document_type = 'conference paper' if form_fields.get('conf_name') \
        else form_fields.get('type_of_doc', [])
    if document_type == 'chapter':
        document_type = 'book chapter'

    builder.add_document_type(document_type=document_type)

    builder.add_abstract(
        abstract=form_fields.get('abstract'),
        source='arXiv' if form_fields.get('categories') else None)

    if form_fields.get('arxiv_id') and form_fields.get('categories'):
        builder.add_arxiv_eprint(
            arxiv_id=form_fields.get('arxiv_id'),
            arxiv_categories=form_fields.get('categories').split())

    builder.add_doi(doi=form_fields.get('doi'))

    builder.add_inspire_categories(
        subject_terms=form_fields.get('subject_term'), source='user')

    for key in ('extra_comments', 'nonpublic_note', 'hidden_notes',
                'conf_name', 'references'):
        builder.add_private_note(private_notes=form_fields.get(key))

    year = form_fields.get('year')
    try:
        year = int(year)
    except (TypeError, ValueError):
        year = None

    builder.add_preprint_date(
        preprint_date=form_fields.get('preprint_created'))

    if form_fields.get('type_of_doc') == 'thesis':
        builder.add_thesis(defense_date=form_fields.get('defense_date'),
                           degree_type=form_fields.get('degree_type'),
                           institution=form_fields.get('institution'),
                           date=form_fields.get('thesis_date'))

    if form_fields.get('type_of_doc') == 'chapter':
        if not form_fields.get('journal_title'):
            builder.add_book_series(title=form_fields.get('series_title'))

    if form_fields.get('type_of_doc') == 'book':
        if form_fields.get('journal_title'):
            form_fields['volume'] = form_fields.get('series_volume')
        else:
            builder.add_book_series(title=form_fields.get('series_title'),
                                    volume=form_fields.get('series_volume'))
        builder.add_book(publisher=form_fields.get('publisher_name'),
                         place=form_fields.get('publication_place'),
                         date=form_fields.get('publication_date'))

    builder.add_publication_info(
        year=year,
        cnum=form_fields.get('conference_id'),
        journal_issue=form_fields.get('issue'),
        journal_title=form_fields.get('journal_title'),
        journal_volume=form_fields.get('volume'),
        page_start=form_fields.get('start_page'),
        page_end=form_fields.get('end_page'),
        artid=form_fields.get('artid'),
        parent_record=form_fields.get('parent_book'))

    builder.add_accelerator_experiments_legacy_name(
        legacy_name=form_fields.get('experiment'))

    language = form_fields.get('other_language') \
        if form_fields.get('language') == 'oth' \
        else form_fields.get('language')
    builder.add_language(language=language)

    builder.add_title_translation(title=form_fields.get('title_translation'))

    builder.add_title(title=form_fields.get('title_arXiv'), source='arXiv')

    builder.add_title(title=form_fields.get('title_crossref'),
                      source='crossref')

    builder.add_license(url=form_fields.get('license_url'))

    builder.add_public_note(public_note=form_fields.get('public_notes'))

    builder.add_public_note(
        public_note=form_fields.get('note'),
        source='arXiv' if form_fields.get('categories') else 'CrossRef')

    form_url = form_fields.get('url')
    form_additional_url = form_fields.get('additional_url')
    if form_url and not _is_arxiv_url(form_url):
        obj.extra_data['submission_pdf'] = form_url
        if not form_additional_url:
            builder.add_url(url=form_url)

    if form_additional_url and not _is_arxiv_url(form_additional_url):
        builder.add_url(url=form_additional_url)

    [
        builder.add_report_number(
            report_number=report_number.get('report_number'))
        for report_number in form_fields.get('report_numbers', [])
    ]

    builder.add_collaboration(collaboration=form_fields.get('collaboration'))

    builder.add_acquisition_source(
        datetime=datetime.datetime.utcnow().isoformat(),
        submission_number=obj.id,
        internal_uid=int(obj.id_user),
        email=form_fields.get('email'),
        orcid=form_fields.get('orcid'),
        method='submitter')
    builder.validate_record()

    return builder.record
Exemplo n.º 34
0
def public_notes_500(self, key, value):
    """Populate the ``public_notes`` key."""
    return [{
        'source': value.get('9'),
        'value': public_note,
    } for public_note in force_list(value.get('a'))]
Exemplo n.º 35
0
    def license(self):
        license_keys = self.record.get("license")
        licenses = [self.get_license(license) for license in force_list(license_keys)]

        return licenses
Exemplo n.º 36
0
def _private_notes_595(self, key, value):
    """Populate the ``_private_notes`` key."""
    return [{
        'source': value.get('9'),
        'value': _private_note,
    } for _private_note in force_list(value.get('a'))]
Exemplo n.º 37
0
 def _get_raw_affiliations(value):
     return dedupe_list([{
         'value': el
     } for el in force_list(value.get('v'))])
Exemplo n.º 38
0
def formdata_to_model(obj, formdata):
    """Manipulate form data to match literature data model."""
    def _is_arxiv_url(url):
        return 'arxiv.org' in url

    form_fields = copy.deepcopy(formdata)
    filter_empty_elements(
        form_fields, ['authors', 'supervisors', 'report_numbers']
    )

    builder = LiteratureBuilder(source='submitter')

    for author in form_fields.get('authors', []):
        builder.add_author(builder.make_author(
            author['full_name'],
            affiliations=force_list(author['affiliation'])
            if author['affiliation'] else None,
            roles=['author']
        ))

    for supervisor in form_fields.get('supervisors', []):
        builder.add_author(builder.make_author(
            supervisor['full_name'],
            affiliations=force_list(supervisor['affiliation'])
            if author['affiliation'] else None,
            roles=['supervisor']
        ))

    builder.add_title(title=form_fields.get('title'))

    document_type = 'conference paper' if form_fields.get('conf_name') \
        else form_fields.get('type_of_doc', [])
    if document_type == 'chapter':
        document_type = 'book chapter'

    builder.add_document_type(
        document_type=document_type
    )

    builder.add_abstract(
        abstract=form_fields.get('abstract'),
        source='arXiv' if form_fields.get('categories') else None
    )

    if form_fields.get('arxiv_id') and form_fields.get('categories'):
        builder.add_arxiv_eprint(
            arxiv_id=form_fields.get('arxiv_id'),
            arxiv_categories=form_fields.get('categories').split()
        )

    builder.add_doi(doi=form_fields.get('doi'))

    builder.add_inspire_categories(
        subject_terms=form_fields.get('subject_term'),
        source='user'
    )

    for key in ('extra_comments', 'nonpublic_note',
                'hidden_notes', 'conf_name', 'references'):
        builder.add_private_note(
            private_notes=form_fields.get(key)
        )

    year = form_fields.get('year')
    try:
        year = int(year)
    except (TypeError, ValueError):
        year = None

    builder.add_preprint_date(
        preprint_date=form_fields.get('preprint_created')
    )

    if form_fields.get('type_of_doc') == 'thesis':
        builder.add_thesis(
            defense_date=form_fields.get('defense_date'),
            degree_type=form_fields.get('degree_type'),
            institution=form_fields.get('institution'),
            date=form_fields.get('thesis_date')
        )

    if form_fields.get('type_of_doc') == 'chapter':
        if not form_fields.get('journal_title'):
            builder.add_book_series(title=form_fields.get('series_title'))

    if form_fields.get('type_of_doc') == 'book':
            if form_fields.get('journal_title'):
                form_fields['volume'] = form_fields.get('series_volume')
            else:
                builder.add_book_series(title=form_fields.get('series_title'),
                                        volume=form_fields.get('series_volume')
                                        )
            builder.add_book(
                publisher=form_fields.get('publisher_name'),
                place=form_fields.get('publication_place'),
                date=form_fields.get('publication_date'))

    builder.add_publication_info(
        year=year,
        cnum=form_fields.get('conference_id'),
        journal_issue=form_fields.get('issue'),
        journal_title=form_fields.get('journal_title'),
        journal_volume=form_fields.get('volume'),
        page_start=form_fields.get('start_page'),
        page_end=form_fields.get('end_page'),
        artid=form_fields.get('artid'),
        parent_record=form_fields.get('parent_book')
    )

    builder.add_accelerator_experiments_legacy_name(
        legacy_name=form_fields.get('experiment')
    )

    language = form_fields.get('other_language') \
        if form_fields.get('language') == 'oth' \
        else form_fields.get('language')
    builder.add_language(language=language)

    if form_fields.get('title_translation'):
        builder.add_title_translation(
            title=form_fields['title_translation'],
            language='en',
        )

    builder.add_title(
        title=form_fields.get('title_arXiv'),
        source='arXiv'
    )

    builder.add_title(
        title=form_fields.get('title_crossref'),
        source='crossref'
    )

    builder.add_license(url=form_fields.get('license_url'))

    builder.add_public_note(public_note=form_fields.get('public_notes'))

    builder.add_public_note(
        public_note=form_fields.get('note'),
        source='arXiv' if form_fields.get('categories') else 'CrossRef'
    )

    form_url = form_fields.get('url')
    form_additional_url = form_fields.get('additional_url')
    if form_url and not _is_arxiv_url(form_url):
        obj.extra_data['submission_pdf'] = form_url
        if not form_additional_url:
            builder.add_url(url=form_url)

    if form_additional_url and not _is_arxiv_url(form_additional_url):
        builder.add_url(url=form_additional_url)

    [builder.add_report_number(
        report_number=report_number.get('report_number')
    ) for report_number in form_fields.get('report_numbers', [])]

    builder.add_collaboration(collaboration=form_fields.get('collaboration'))

    builder.add_acquisition_source(
        datetime=datetime.datetime.utcnow().isoformat(),
        submission_number=obj.id,
        internal_uid=int(obj.id_user),
        email=form_fields.get('email'),
        orcid=form_fields.get('orcid'),
        method='submitter'
    )

    return builder.record
Exemplo n.º 39
0
 def _get_inspire_roles(value):
     values = force_list(value.get('inspire_roles'))
     return ['ed.' for role in values if role == 'editor']
 def visit_greater_than_op(self, node, fieldnames):
     return self._generate_range_queries(force_list(fieldnames), {'gt': node.op.value})
Exemplo n.º 41
0
 def get_titles(self, data):
     title = data.pop('title', None)
     if title:
         data['titles'] = force_list(title)
     return data.get('titles', missing)
Exemplo n.º 42
0
def dois(self, key, value):
    """Populate the ``dois`` key.

    Also populates the ``persistent_identifiers`` key through side effects.
    """
    def _get_first_non_curator_source(sources):
        sources_without_curator = [
            el for el in sources if el.upper() != 'CURATOR'
        ]
        return force_single_element(sources_without_curator)

    def _get_material(value):
        MATERIAL_MAP = {
            'addendum': 'addendum',
            'ebook': 'publication',
            'erratum': 'erratum',
            'preprint': 'preprint',
            'publication': 'publication',
            'reprint': 'reprint',
            'translation': 'translation',
        }

        q_value = force_single_element(value.get('q', ''))
        normalized_q_value = q_value.lower()

        return MATERIAL_MAP.get(normalized_q_value)

    def _is_doi(id_, type_):
        return (not type_ or type_.upper() == 'DOI') and is_doi(id_)

    def _is_handle(id_, type_):
        return (not type_ or type_.upper() == 'HDL') and is_handle(id_)

    dois = self.get('dois', [])
    persistent_identifiers = self.get('persistent_identifiers', [])

    values = force_list(value)
    for value in values:
        id_ = force_single_element(value.get('a', ''))
        material = _get_material(value)
        schema = force_single_element(value.get('2', ''))

        sources = force_list(value.get('9'))
        source = _get_first_non_curator_source(sources)

        if _is_doi(id_, schema):
            dois.append({
                'material': material,
                'source': source,
                'value': normalize_doi(id_),
            })
        else:
            schema = 'HDL' if _is_handle(id_, schema) else schema
            persistent_identifiers.append({
                'material': material,
                'schema': schema,
                'source': source,
                'value': id_,
            })

    self['persistent_identifiers'] = persistent_identifiers
    return dois
Exemplo n.º 43
0
 def _is_hidden(value):
     return 'HIDDEN' in [val.upper() for val in force_list(value.get('o'))
                         ] or _get_source(value) == 'arxiv' or None
Exemplo n.º 44
0
 def _has_curator_flag(value):
     normalized_nine_values = [
         el.upper() for el in force_list(value.get('9'))
     ]
     return 'CURATOR' in normalized_nine_values
    def visit_value(self, node, fieldnames=None):
        if not fieldnames:
            fieldnames = '_all'

        if node.contains_wildcard:
            if ElasticSearchVisitor.KEYWORD_TO_ES_FIELDNAME['date'] == fieldnames:
                return self._generate_date_with_wildcard_query(node.value)

            bai_fieldnames = self._generate_fieldnames_if_bai_query(
                node.value,
                bai_field_variation=FieldVariations.search,
                query_bai_field_if_dots_in_name=True
            )

            query = self._generate_query_string_query(
                node.value,
                fieldnames=bai_fieldnames or fieldnames,
                analyze_wildcard=True
            )

            if ElasticSearchVisitor.KEYWORD_TO_ES_FIELDNAME['author'] == fieldnames:
                return generate_nested_query(ElasticSearchVisitor.AUTHORS_NESTED_QUERY_PATH, query)
            return query
        else:
            if isinstance(fieldnames, list):
                if ElasticSearchVisitor.KEYWORD_TO_ES_FIELDNAME['date'] == fieldnames:
                    # Date queries with simple values are transformed into range queries, among the given and the exact
                    # next date, according to the granularity of the given date.
                    return self._generate_range_queries(force_list(fieldnames), {ES_RANGE_EQ_OPERATOR: node.value})

                if ElasticSearchVisitor.KEYWORD_TO_ES_FIELDNAME['journal'] == fieldnames:
                    return self._generate_journal_nested_queries(node.value)

                return {
                    'multi_match': {
                        'fields': fieldnames,
                        'query': node.value,
                    }
                }
            else:
                if ElasticSearchVisitor.KEYWORD_TO_ES_FIELDNAME['author'] == fieldnames:
                    bai_fieldnames = self._generate_fieldnames_if_bai_query(
                        node.value,
                        bai_field_variation=FieldVariations.search,
                        query_bai_field_if_dots_in_name=True
                    )
                    if bai_fieldnames:
                        if len(bai_fieldnames) == 1:
                            query = {"match": {bai_fieldnames[0]: node.value}}
                            return generate_nested_query(ElasticSearchVisitor.AUTHORS_NESTED_QUERY_PATH, query)
                        else:
                            # Not an exact BAI pattern match, but node's value looks like BAI (no spaces and dots),
                            # e.g. `S.Mele`. In this case generate a partial match query.
                            return self.visit_partial_match_value(node, bai_fieldnames)

                    return self._generate_author_query(node.value)

                elif ElasticSearchVisitor.KEYWORD_TO_ES_FIELDNAME['exact-author'] == fieldnames:
                    return self._generate_exact_author_query(node.value)

                elif ElasticSearchVisitor.KEYWORD_TO_ES_FIELDNAME['irn'] == fieldnames:
                    return {'term': {fieldnames: ''.join(('SPIRES-', node.value))}}

                elif ElasticSearchVisitor.KEYWORD_TO_ES_FIELDNAME['title'] == fieldnames:
                    return self._generate_title_queries(node.value)

                elif ElasticSearchVisitor.KEYWORD_TO_ES_FIELDNAME['type-code'] == fieldnames:
                    return self._generate_type_code_query(node.value)

                elif fieldnames not in ElasticSearchVisitor.KEYWORD_TO_ES_FIELDNAME.values():
                    colon_value = ':'.join([fieldnames, node.value])
                    given_field_query = generate_match_query(fieldnames, node.value, with_operator_and=True)
                    texkey_query = self._generate_term_query('texkeys.raw', colon_value, boost=2.0)
                    _all_field_query = generate_match_query('_all', colon_value, with_operator_and=True)
                    query = wrap_queries_in_bool_clauses_if_more_than_one([given_field_query, texkey_query, _all_field_query], use_must_clause=False)
                    return wrap_query_in_nested_if_field_is_nested(query, fieldnames, ElasticSearchVisitor.NESTED_FIELDS)

                return generate_match_query(fieldnames, node.value, with_operator_and=True)
Exemplo n.º 46
0
def references2marc(self, key, value):
    """Populate the ``999C5`` MARC field."""
    reference = value.get('reference', {})

    pids = force_list(reference.get('persistent_identifiers'))
    a_values = ['doi:' + el for el in force_list(reference.get('dois'))]
    a_values.extend(
        ['hdl:' + el['value'] for el in pids if el.get('schema') == 'HDL'])
    a_values.extend(
        ['urn:' + el['value'] for el in pids if el.get('schema') == 'URN'])

    external_ids = force_list(reference.get('external_system_identifiers'))
    u_values = force_list(get_value(reference, 'urls.value'))
    u_values.extend(
        CDS_RECORD_FORMAT.format(el['value']) for el in external_ids
        if el.get('schema') == 'CDS')
    u_values.extend(
        ADS_RECORD_FORMAT.format(el['value']) for el in external_ids
        if el.get('schema') == 'ADS')

    authors = force_list(reference.get('authors'))
    e_values = [
        el['full_name'] for el in authors if el.get('inspire_role') == 'editor'
    ]
    h_values = [
        el['full_name'] for el in authors if el.get('inspire_role') != 'editor'
    ]

    r_values = force_list(reference.get('report_numbers'))
    if reference.get('arxiv_eprint'):
        arxiv_eprint = reference['arxiv_eprint']
        r_values.append(
            'arXiv:' +
            arxiv_eprint if is_arxiv_post_2007(arxiv_eprint) else arxiv_eprint)

    if reference.get('publication_info'):
        reference['publication_info'] = convert_new_publication_info_to_old(
            [reference['publication_info']])[0]
    journal_title = get_value(reference, 'publication_info.journal_title')
    journal_volume = get_value(reference, 'publication_info.journal_volume')
    page_start = get_value(reference, 'publication_info.page_start')
    page_end = get_value(reference, 'publication_info.page_end')
    artid = get_value(reference, 'publication_info.artid')
    s_value = build_pubnote(journal_title, journal_volume, page_start,
                            page_end, artid)

    m_value = ' / '.join(force_list(reference.get('misc')))

    return {
        '0': get_recid_from_ref(value.get('record')),
        '9': 'CURATOR' if value.get('legacy_curated') else None,
        'a': a_values,
        'b': get_value(reference, 'publication_info.cnum'),
        'c': reference.get('collaborations'),
        'e': e_values,
        'h': h_values,
        'i': reference.get('isbn'),
        'k': reference.get('texkey'),
        'm': m_value,
        'o': reference.get('label'),
        'p': get_value(reference, 'imprint.publisher'),
        'q': get_value(reference, 'publication_info.parent_title'),
        'r': r_values,
        's': s_value,
        't': get_value(reference, 'title.title'),
        'u': u_values,
        'x': get_value(value, 'raw_refs.value'),
        'y': get_value(reference, 'publication_info.year'),
        'z': 1 if value.get('curated_relation') else 0,
    }
Exemplo n.º 47
0
def write(data, _id=None, xpath=None, link_text=None):
    data = force_list(data)
    elem = wait_for(_id=_id, xpath=xpath, link_text=link_text)
    return elem.send_keys(*data)
Exemplo n.º 48
0
def _get_collections(marcjson):
    collections = chain.from_iterable(
        [force_list(el) for el in force_list(get_value(marcjson, '980__.a'))])
    normalized_collections = [el.lower() for el in collections]

    return normalized_collections
Exemplo n.º 49
0
 def _get_full_names(value):
     return force_list(value.get('a'))
Exemplo n.º 50
0
    def serialize(self, pid, record, links_factory=None):
        """Return a different metrics for a given author recid.

        :param pid:
            Persistent identifier instance.

        :param record:
            Record instance.

        :param links_factory:
            Factory function for the link generation, which are added to
            the response.
        """
        author_pid = pid.pid_value

        fields = set()
        keywords = []

        statistics = {}
        statistics['citations'] = 0
        statistics['publications'] = 0
        statistics['types'] = {}

        statistics_citations = {}

        query = Q('match', authors__recid=author_pid)
        search = LiteratureSearch().query('nested', path='authors', query=query)\
                                   .params(_source=[
                                       'citation_count',
                                       'control_number',
                                       'facet_inspire_doc_type',
                                       'facet_inspire_categories',
                                       'keywords',
                                   ])

        for result in search.scan():
            result_source = result.to_dict()

            # Increment the count of the total number of publications.
            statistics['publications'] += 1

            # Increment the count of citations.
            citation_count = result_source.get('citation_count', 0)

            statistics['citations'] += citation_count
            statistics_citations[result_source['control_number']] = \
                citation_count

            # Count how many times certain type of publication was published.
            try:
                publication_type = result_source.get('facet_inspire_doc_type',
                                                     [])[0]
            except IndexError:
                pass

            if publication_type:
                if publication_type in statistics['types']:
                    statistics['types'][publication_type] += 1
                else:
                    statistics['types'][publication_type] = 1

            # Get fields.
            for field in result_source.get('facet_inspire_categories', []):
                fields.add(field)

            # Get keywords.
            keywords.extend([
                k
                for k in force_list(get_value(result_source, 'keywords.value'))
                if k != '* Automatic Keywords *'
            ])

        # Calculate h-index together with i10-index.
        statistics['hindex'] = calculate_h_index(statistics_citations)
        statistics['i10index'] = calculate_i10_index(statistics_citations)

        if fields:
            statistics['fields'] = list(fields)

        # Return the top 25 keywords.
        if keywords:
            counter = Counter(keywords)
            statistics['keywords'] = [{
                'count': i[1],
                'keyword': i[0]
            } for i in counter.most_common(25)]

        return json.dumps(statistics)