Exemplo n.º 1
0
def publication_info(self, key, value):
    """Populate the ``publication_info`` key."""
    def _get_cnum(value):
        w_value = force_single_element(value.get('w', ''))
        normalized_w_value = w_value.replace('/', '-').upper()

        return normalized_w_value

    def _get_material(value):
        schema = load_schema('elements/material')
        valid_materials = schema['enum']

        m_value = force_single_element(value.get('m', ''))
        normalized_m_value = m_value.lower()

        if normalized_m_value in valid_materials:
            return normalized_m_value

    def _get_parent_isbn(value):
        z_value = force_single_element(value.get('z', ''))
        if z_value:
            return normalize_isbn(z_value)

    def _get_pubinfo_freetext(value):
        x_value = force_single_element(value.get('x', ''))
        if not x_value.startswith('#DONE'):
            return x_value

    page_start, page_end, artid = split_page_artid(value.get('c'))

    parent_recid = maybe_int(force_single_element(value.get('0')))
    parent_record = get_record_ref(parent_recid, 'literature')

    journal_recid = maybe_int(force_single_element(value.get('1')))
    journal_record = get_record_ref(journal_recid, 'journals')

    conference_recid = maybe_int(force_single_element(value.get('2')))
    conference_record = get_record_ref(conference_recid, 'conferences')

    return {
        'artid': artid,
        'cnum': _get_cnum(value),
        'conf_acronym': force_single_element(value.get('q')),
        'conference_record': conference_record,
        'hidden': key.startswith('7731') or None,
        'journal_issue': force_single_element(value.get('n')),
        'journal_record': journal_record,
        'journal_title': force_single_element(value.get('p')),
        'journal_volume': force_single_element(value.get('v')),
        'material': _get_material(value),
        'page_end': page_end,
        'page_start': page_start,
        'parent_isbn': _get_parent_isbn(value),
        'parent_record': parent_record,
        'parent_report_number': force_single_element(value.get('r')),
        'pubinfo_freetext': _get_pubinfo_freetext(value),
        'year': maybe_int(force_single_element(value.get('y'))),
    }
Exemplo n.º 2
0
def positions(self, key, value):
    current = False
    record = None

    recid_or_status = force_list(value.get('z'))
    for el in recid_or_status:
        if el.lower() == 'current':
            current = True
        else:
            record = get_record_ref(maybe_int(el), 'institutions')

    institution = {
        'name': value.get('a'),
        'record': record,
        'curated_relation': record is not None,
    }

    emails = [el for el in force_list(value.get('m'))]
    old_emails = [el for el in force_list(value.get('o'))]

    _rank = value.get('r')
    rank = normalize_rank(_rank)

    return {
        'institution': institution if institution['name'] else None,
        'emails': emails,
        'old_emails': old_emails,
        '_rank': _rank,
        'rank': rank,
        'start_date': normalize_date(value.get('s')),
        'end_date': normalize_date(value.get('t')),
        'current': current,
    }
Exemplo n.º 3
0
def related_records(self, key, value):
    def _get_relation(value):
        RELATIONS_MAP = {
            'a': 'predecessor',
            'r': 'other',
            't': 'parent',
        }

        return RELATIONS_MAP.get(value.get('w'))

    record = get_record_ref(maybe_int(value.get('0')), 'institutions')
    relation = _get_relation(value)

    if record and relation == 'other':
        return {
            'curated_relation': record is not None,
            'record': record,
            'relation_freetext': relation,
        }
    elif record and relation:
        return {
            'curated_relation': record is not None,
            'record': record,
            'relation': relation,
        }
Exemplo n.º 4
0
def related_records(self, key, value):
    def _get_relation(value):
        RELATION_MAP = {
            'a': 'predecessor',
            'b': 'other',
            'r': 'other',
        }

        return RELATION_MAP.get(value.get('w'))

    def _get_relation_freetext(value):
        return value.get('i')

    record = get_record_ref(maybe_int(value.get('0')), 'journals')
    relation = _get_relation(value)
    relation_freetext = _get_relation_freetext(value)

    if record and relation == 'other':
        return {
            'curated_relation': record is not None,
            'record': record,
            'relation_freetext': relation_freetext,
        }
    elif record and relation:
        return {
            'curated_relation': record is not None,
            'record': record,
            'relation': relation,
        }
def get_recid_from_ref(ref_obj):
    """Retrieve recid from jsonref reference object.
    If no recid can be parsed, returns None.
    """
    if not isinstance(ref_obj, dict):
        return None
    url = ref_obj.get("$ref", "")
    return maybe_int(url.split("/")[-1])
Exemplo n.º 6
0
def collaboration(self, key, value):
    record = get_record_ref(maybe_int(value.get('0')), 'experiments')

    return {
        'curated_relation': record is not None,
        'record': record,
        'value': force_single_element(value.get('g')),
    }
Exemplo n.º 7
0
    def _external_system_identifiers(self, key, value):
        new_recid = maybe_int(value.get('d'))
        if new_recid:
            self['new_record'] = get_record_ref(new_recid, endpoint)

        return [{
            'schema': 'SPIRES',
            'value': ext_sys_id,
        } for ext_sys_id in force_list(value.get('a'))]
Exemplo n.º 8
0
def related_records_78708(self, key, value):
    """Populate the ``related_records`` key."""
    record = get_record_ref(maybe_int(value.get('w')), 'literature')
    if record:
        return {
            'curated_relation': record is not None,
            'record': record,
            'relation_freetext': value.get('i'),
        }
Exemplo n.º 9
0
def related_records_78502(self, key, value):
    """Populate the ``related_records`` key."""
    record = get_record_ref(maybe_int(value.get('w')), 'literature')
    if record:
        return {
            'curated_relation': record is not None,
            'record': record,
            'relation': 'successor',
        }
Exemplo n.º 10
0
    def number_of_pages(self):
        comments = '; '.join(self.root.xpath('.//comments/text()').extract())

        found_pages = RE_PAGES.search(comments)
        if found_pages:
            pages = found_pages.group(1)
            return maybe_int(pages)

        return None
Exemplo n.º 11
0
def extract_journal_info(obj, eng):
    """Extract the journal information from ``pubinfo_freetext``.

    Runs ``extract_journal_reference`` on the ``pubinfo_freetext`` key of each
    ``publication_info``, if it exists, and uses the extracted information to
    populate the other keys.

    Args:
        obj: a workflow object.
        eng: a workflow engine.

    Returns:
        None

    """
    if not obj.data.get('publication_info'):
        return

    for publication_info in obj.data['publication_info']:
        try:
            with local_refextract_kbs_path() as kbs_path:
                extracted_publication_info = extract_journal_reference(
                    publication_info['pubinfo_freetext'],
                    override_kbs_files=kbs_path,
                )

            if not extracted_publication_info:
                continue

            if extracted_publication_info.get('title'):
                publication_info['journal_title'] = extracted_publication_info[
                    'title']

            if extracted_publication_info.get('volume'):
                publication_info[
                    'journal_volume'] = extracted_publication_info['volume']

            if extracted_publication_info.get('page'):
                page_start, page_end, artid = split_page_artid(
                    extracted_publication_info['page'])
                if page_start:
                    publication_info['page_start'] = page_start
                if page_end:
                    publication_info['page_end'] = page_end
                if artid:
                    publication_info['artid'] = artid

            if extracted_publication_info.get('year'):
                year = maybe_int(extracted_publication_info['year'])
                if year:
                    publication_info['year'] = year
        except KeyError:
            pass

    obj.data['publication_info'] = convert_old_publication_info_to_new(
        obj.data['publication_info'])
Exemplo n.º 12
0
    def _get_json_experiments(marc_dict):
        start_year = maybe_int(marc_dict.get('s'))
        end_year = maybe_int(marc_dict.get('d'))

        names = force_list(marc_dict.get('e'))
        recids = force_list(marc_dict.get('0'))
        name_recs = zip(names, recids or [None] * len(names))

        for name, recid in name_recs:
            record = get_record_ref(recid, 'experiments')
            yield {
                'curated_relation': record is not None,
                'current': (
                    True if marc_dict.get('z', '').lower() == 'current'
                    else False
                ),
                'end_year': end_year,
                'name': name,
                'record': record,
                'start_year': start_year,
            }
Exemplo n.º 13
0
def extract_journal_info(obj, eng):
    """Extract the journal information from ``pubinfo_freetext``.

    Runs ``extract_journal_reference`` on the ``pubinfo_freetext`` key of each
    ``publication_info``, if it exists, and uses the extracted information to
    populate the other keys.

    Args:
        obj: a workflow object.
        eng: a workflow engine.

    Returns:
        None

    """
    if not obj.data.get('publication_info'):
        return

    for publication_info in obj.data['publication_info']:
        try:
            with local_refextract_kbs_path() as kbs_path:
                extracted_publication_info = extract_journal_reference(
                    publication_info['pubinfo_freetext'],
                    override_kbs_files=kbs_path,
                )

            if not extracted_publication_info:
                continue

            if extracted_publication_info.get('title'):
                publication_info['journal_title'] = extracted_publication_info['title']

            if extracted_publication_info.get('volume'):
                publication_info['journal_volume'] = extracted_publication_info['volume']

            if extracted_publication_info.get('page'):
                page_start, page_end, artid = split_page_artid(extracted_publication_info['page'])
                if page_start:
                    publication_info['page_start'] = page_start
                if page_end:
                    publication_info['page_end'] = page_end
                if artid:
                    publication_info['artid'] = artid

            if extracted_publication_info.get('year'):
                year = maybe_int(extracted_publication_info['year'])
                if year:
                    publication_info['year'] = year
        except KeyError:
            pass

    obj.data['publication_info'] = convert_old_publication_info_to_new(obj.data['publication_info'])
Exemplo n.º 14
0
def related_records_78002(self, key, values):
    result = self.get('related_records', [])

    for value in force_list(values):
        record = get_record_ref(maybe_int(value.get('w')), 'literature')

        if record:
            result.append({
                'curated_relation': record is not None,
                'record': record,
                'relation': 'predecessor',
            })

    return result
Exemplo n.º 15
0
def related_records_78708(self, key, values):
    result = self.get('related_records', [])

    for value in force_list(values):
        record = get_record_ref(maybe_int(value.get('w')), 'literature')

        if record:
            result.append({
                'curated_relation': record is not None,
                'record': record,
                'relation_freetext': value.get('i'),
            })

    return result
Exemplo n.º 16
0
def collaborations(self, key, value):
    """Populate the ``collaborations`` key."""
    collaborations = normalize_collaboration(value.get('g'))

    if len(collaborations) == 1:
        return [
            {
                'record': get_record_ref(maybe_int(value.get('0')),
                                         'experiments'),
                'value': collaborations[0],
            },
        ]
    else:
        return [{'value': collaboration} for collaboration in collaborations]
Exemplo n.º 17
0
def copyright(self, key, value):
    MATERIAL_MAP = {
        'Article': 'publication',
        'Published thesis as a book': 'publication',
    }

    material = value.get('e') or value.get('3')

    return {
        'holder': value.get('d'),
        'material': MATERIAL_MAP.get(material),
        'statement': value.get('f'),
        'url': value.get('u'),
        'year': maybe_int(value.get('g')),
    }
Exemplo n.º 18
0
def related_records(self, key, value):
    def _get_relation(value):
        RELATIONS_MAP = {'a': 'predecessor'}

        return RELATIONS_MAP.get(value.get('w'))

    record = get_record_ref(maybe_int(value.get('0')), 'experiments')
    relation = _get_relation(value)

    if record and relation:
        return {
            'curated_relation': record is not None,
            'record': record,
            'relation': relation,
        }
Exemplo n.º 19
0
def collaborations(self, key, value):
    """Populate the ``collaborations`` key."""
    result = []

    for g_value in force_list(value.get('g')):
        collaborations = normalize_collaboration(g_value)
        if len(collaborations) == 1:
            result.append({
                'record': get_record_ref(maybe_int(value.get('0')), 'experiments'),
                'value': collaborations[0],
            })
        else:
            result.extend({'value': collaboration} for collaboration in collaborations)

    return result
Exemplo n.º 20
0
def extract_journal_info(obj, eng):
    """Extract journal, volume etc. from any freetext publication info."""
    publication_info = get_value(obj.data, "publication_info")
    if not publication_info:
        return

    new_publication_info = []
    for pubnote in publication_info:
        if not pubnote:
            continue
        freetext = pubnote.get("pubinfo_freetext")
        if freetext:
            if isinstance(freetext, (list, tuple)):
                freetext = ". ".join(freetext)
            extracted_publication_info = extract_journal_reference(
                freetext,
                # override_kbs_files={
                #    'journals': get_mappings_from_kbname(['REFEXTRACT_KB_NAME'])
                # }
            )
            if extracted_publication_info:
                if "volume" in extracted_publication_info:
                    pubnote["journal_volume"] = extracted_publication_info.get(
                        "volume"
                    )
                if "title" in extracted_publication_info:
                    pubnote["journal_title"] = extracted_publication_info.get(
                        "title"
                    )
                if "year" in extracted_publication_info:
                    year = maybe_int(extracted_publication_info.get('year'))
                    if year is not None:
                        pubnote['year'] = year
                if "page" in extracted_publication_info:
                    page_start, page_end, artid = split_page_artid(
                        extracted_publication_info.get("page"))
                    if page_start:
                        pubnote["page_start"] = page_start
                    if page_end:
                        pubnote["page_end"] = page_end
                    if artid:
                        pubnote["artid"] = artid
        if any(value for value in pubnote.values()):
            new_publication_info.append(pubnote)

    obj.data["publication_info"] = new_publication_info
Exemplo n.º 21
0
def positions(self, key, value):
    """Populate the positions field.

    Also populates the email_addresses field by side effect.
    """
    email_addresses = self.get("email_addresses", [])
    current = None
    record = None

    recid_or_status = force_list(value.get('z'))
    for el in recid_or_status:
        if el.lower() == 'current':
            current = True if value.get('a') else None
        else:
            record = get_record_ref(maybe_int(el), 'institutions')

    rank = normalize_rank(value.get('r'))

    current_email_addresses = force_list(value.get('m'))
    non_current_email_addresses = force_list(value.get('o'))

    email_addresses.extend({
        'value': address,
        'current': True,
    } for address in current_email_addresses)
    email_addresses.extend({
        'value': address,
        'current': False,
    } for address in non_current_email_addresses)

    self['email_addresses'] = email_addresses

    if 'a' not in value:
        return None

    return {
        'institution': value['a'],
        'record': record,
        'curated_relation': True if record is not None else None,
        'rank': rank,
        'start_date': normalize_date(value.get('s')),
        'end_date': normalize_date(value.get('t')),
        'current': current,
    }
Exemplo n.º 22
0
def _fft(self, key, value):
    def _get_creation_datetime(value):
        if value.get('s'):
            dt = datetime.strptime(value['s'], '%Y-%m-%d %H:%M:%S')
            return dt.isoformat()

    is_context = value.get('f', '').endswith('context')
    if is_context:
        return

    return {
        'creation_datetime': _get_creation_datetime(value),
        'description': value.get('d'),
        'filename': value.get('n'),
        'flags': force_list(value.get('o')),
        'format': value.get('f'),
        'path': value.get('a'),
        'status': value.get('z'),
        'type': value.get('t'),
        'version': maybe_int(value.get('v')),
    }
Exemplo n.º 23
0
def experiment(self, key, values):
    """Populate the ``experiment`` key.

    Also populates the ``legacy_name``, the ``accelerator``, and the
    ``institutions`` keys through side effects.
    """
    experiment = self.get('experiment', {})
    legacy_name = self.get('legacy_name', '')
    accelerator = self.get('accelerator', {})
    institutions = self.get('institutions', [])

    for value in force_list(values):
        if value.get('c'):
            experiment['value'] = value.get('c')
        if value.get('d'):
            experiment['short_name'] = value.get('d')

        if value.get('a'):
            legacy_name = value.get('a')

        if value.get('b'):
            accelerator['value'] = value.get('b')

        institution = {}
        if value.get('u'):
            institution['value'] = value.get('u')
        if value.get('z'):
            record = get_record_ref(maybe_int(value.get('z')), 'institutions')
            if record:
                institution['curated_relation'] = True
                institution['record'] = record
        institutions.append(institution)

    self['legacy_name'] = legacy_name
    self['accelerator'] = accelerator
    self['institutions'] = institutions
    return experiment
Exemplo n.º 24
0
 def _get_number(value):
     return maybe_int(force_single_element(value.get('n')))
Exemplo n.º 25
0
    def number_of_pages(self):
        number_of_pages = maybe_int(self.root.xpath('./front/article-meta//page-count/@count').extract_first())

        return number_of_pages
Exemplo n.º 26
0
    def copyright_year(self):
        copyright_year = self.root.xpath('./front//copyright-year/text()').extract_first()

        return maybe_int(copyright_year)
Exemplo n.º 27
0
 def _set_record(el):
     recid = maybe_int(el)
     record = get_record_ref(recid, 'literature')
     rb.set_record(record)
Exemplo n.º 28
0
    def copyright_year(self):
        copyright_year = self.root.xpath('./front//copyright-year/text()').extract_first()

        return maybe_int(copyright_year)
Exemplo n.º 29
0
    def number_of_pages(self):
        number_of_pages = maybe_int(self.root.xpath('./front/article-meta//page-count/@count').extract_first())

        return number_of_pages
Exemplo n.º 30
0
def test_maybe_int_returns_none_otherwise():
    assert maybe_int('216+337') is None
Exemplo n.º 31
0
    def copyright_year(self):
        copyright_year = self.root.xpath(
            "./*/item-info/copyright[@type]/@year").extract_first()

        return maybe_int(copyright_year)
Exemplo n.º 32
0
 def _get_record(value):
     return get_record_ref(maybe_int(force_single_element(value.get('x'))),
                           'authors')
Exemplo n.º 33
0
 def _deleted_records(self, key, value):
     deleted_recid = maybe_int(value.get('a'))
     if deleted_recid:
         return get_record_ref(deleted_recid, endpoint)