def test_replace_refs_correct_sources(get_db_rec, get_es_rec): with_es_record = {'ES': 'ES'} with_db_record = {'DB': 'DB'} get_es_rec.return_value = with_es_record get_db_rec.return_value = with_db_record db_rec = replace_refs({'$ref': _build_url()}, 'db') es_rec = replace_refs({'$ref': _build_url()}, 'es') assert db_rec == with_db_record assert es_rec == with_es_record
def test_replace_refs_correct_sources(get_db_rec, get_es_rec, app): with_es_record = {'ES': 'ES'} with_db_record = {'DB': 'DB'} get_es_rec.return_value = with_es_record get_db_rec.return_value = with_db_record with app.app_context(): db_rec = replace_refs({'$ref': _build_url(app)}, 'db') es_rec = replace_refs({'$ref': _build_url(app)}, 'es') # Lazy objects need to be evaluated in app_context. assert db_rec == with_db_record assert es_rec == with_es_record
def populate_journal_coverage(obj, eng): """Populate ``journal_coverage`` from the Journals DB. Searches in the Journals DB if the current article was published in a journal that we harvest entirely, then populates the ``journal_coverage`` key in ``extra_data`` with ``'full'`` if it was, ``'partial' otherwise. Args: obj: a workflow object. eng: a workflow engine. Returns: None """ journals = replace_refs( get_value(obj.data, 'publication_info.journal_record'), 'db') if not journals: return if any( get_value(journal, '_harvesting_info.coverage') == 'full' for journal in journals): obj.extra_data['journal_coverage'] = 'full' else: obj.extra_data['journal_coverage'] = 'partial'
def get_conference_record(record, default=None): """Return the first Conference record associated with a record. Queries the database to fetch the first Conference record referenced in the ``publication_info`` of the record. Args: record(InspireRecord): a record. default: value to be returned if no conference record present/found Returns: InspireRecord: the first Conference record associated with the record. Examples: >>> record = { ... 'publication_info': [ ... { ... 'conference_record': { ... '$ref': '/api/conferences/972464', ... }, ... }, ... ], ... } >>> conference_record = get_conference_record(record) >>> conference_record['control_number'] 972464 """ replaced = replace_refs(get_value(record, 'publication_info.conference_record[0]'), 'db') if replaced: return replaced else: return default
def get_conference_record(record): """Return the first Conference record associated with a record. Queries the database to fetch the first Conference record referenced in the ``publication_info`` of the record. Args: record(InspireRecord): a record. Returns: InspireRecord: the first Conference record associated with the record. Examples: >>> record = { ... 'publication_info': [ ... { ... 'conference_record': { ... '$ref': '/api/conferences/972464', ... }, ... }, ... ], ... } >>> conference_record = get_conference_record(record) >>> conference_record['control_number'] 972464 """ return replace_refs( get_value(record, 'publication_info.conference_record[0]', default=None), 'db')
def conference_information(self): """Conference information. Returns a list with information about conferences related to the record. """ conf_info = [] for pub_info in self['publication_info']: conference_recid = None parent_recid = None parent_rec = {} conference_rec = {} if 'conference_record' in pub_info: conference_rec = replace_refs(pub_info['conference_record'], 'es') if conference_rec and conference_rec.get('control_number'): conference_recid = conference_rec['control_number'] else: conference_rec = {} if 'parent_record' in pub_info: parent_rec = replace_refs(pub_info['parent_record'], 'es') if parent_rec and parent_rec.get('control_number'): parent_recid = parent_rec['control_number'] else: parent_rec = {} conf_info.append({ "conference_recid": conference_recid, "conference_title": LiteratureReader(conference_rec).title, "parent_recid": parent_recid, "parent_title": LiteratureReader(parent_rec).title.replace( "Proceedings, ", "", 1), "page_start": pub_info.get('page_start'), "page_end": pub_info.get('page_end'), "artid": pub_info.get('artid'), }) return conf_info
def set_refereed_and_fix_document_type(obj, eng): """Set the ``refereed`` field using the Journals DB. Searches in the Journals DB if the current article was published in journals that we know for sure to be peer-reviewed, or that publish both peer-reviewed and non peer-reviewed content but for which we can infer that it belongs to the former category, and sets the ``refereed`` key in ``data`` to ``True`` if that was the case. If instead we know for sure that all journals in which it published are **not** peer-reviewed we set it to ``False``. Also replaces the ``article`` document type with ``conference paper`` if the paper was only published in non refereed proceedings. Args: obj: a workflow object. eng: a workflow engine. Returns: None """ journals = replace_refs( get_value(obj.data, 'publication_info.journal_record'), 'db') if not journals: return is_published_in_a_refereed_journal_that_does_not_publish_proceedings = any( journal.get('refereed') and not journal.get('proceedings') for journal in journals) is_published_in_a_refereed_journal_that_also_publishes_proceedings = any( journal.get('refereed') and journal.get('proceedings') for journal in journals) is_not_a_conference_paper = 'conference paper' not in obj.data[ 'document_type'] is_published_exclusively_in_non_refereed_journals = all( not journal.get('refereed', True) for journal in journals) if is_published_in_a_refereed_journal_that_does_not_publish_proceedings: obj.data['refereed'] = True elif is_not_a_conference_paper and is_published_in_a_refereed_journal_that_also_publishes_proceedings: obj.data['refereed'] = True elif is_published_exclusively_in_non_refereed_journals: obj.data['refereed'] = False is_published_only_in_proceedings = all( journal.get('proceedings') for journal in journals) is_published_only_in_non_refereed_journals = all( not journal.get('refereed') for journal in journals) if is_published_only_in_proceedings and is_published_only_in_non_refereed_journals: try: obj.data['document_type'].remove('article') obj.data['document_type'].append('conference paper') except ValueError: pass
def get_journal_coverage(obj, eng): """Return the journal coverage that this article belongs to.""" journals = replace_refs(get_value(obj.data, 'publication_info.journal_record'), 'db') if not journals: return if any(journal['_harvesting_info'].get('coverage') == 'full' for journal in journals): obj.extra_data['journal_coverage'] = 'full' else: obj.extra_data['journal_coverage'] = 'partial'
def conference_information(self): """Conference information. Returns a list with information about conferences related to the record. """ conf_info = [] for pub_info in self['publication_info']: conference_recid = None parent_recid = None parent_rec = {} conference_rec = {} if 'conference_record' in pub_info: conference_rec = replace_refs(pub_info['conference_record'], 'es') if conference_rec and conference_rec.get('control_number'): conference_recid = conference_rec['control_number'] else: conference_rec = {} if 'parent_record' in pub_info: parent_rec = replace_refs(pub_info['parent_record'], 'es') if parent_rec and parent_rec.get('control_number'): parent_recid = parent_rec['control_number'] else: parent_rec = {} conf_info.append( { "conference_recid": conference_recid, "conference_title": get_title(conference_rec), "parent_recid": parent_recid, "parent_title": get_title(parent_rec).replace( "Proceedings, ", "", 1 ), "page_start": pub_info.get('page_start'), "page_end": pub_info.get('page_end'), "artid": pub_info.get('artid'), } ) return conf_info
def set_refereed_and_fix_document_type(obj, eng): """Set the ``refereed`` field using the Journals DB. Searches in the Journals DB if the current article was published in journals that we know for sure to be peer-reviewed, or that publish both peer-reviewed and non peer-reviewed content but for which we can infer that it belongs to the former category, and sets the ``refereed`` key in ``data`` to ``True`` if that was the case. If instead we know for sure that all journals in which it published are **not** peer-reviewed we set it to ``False``. Also replaces the ``article`` document type with ``conference paper`` if the paper was only published in non refereed proceedings. Args: obj: a workflow object. eng: a workflow engine. Returns: None """ journals = replace_refs(get_value(obj.data, 'publication_info.journal_record'), 'db') if not journals: return is_published_in_a_refereed_journal_that_does_not_publish_proceedings = any( journal.get('refereed') and not journal.get('proceedings') for journal in journals) is_published_in_a_refereed_journal_that_also_publishes_proceedings = any( journal.get('refereed') and journal.get('proceedings') for journal in journals) is_not_a_conference_paper = 'conference paper' not in obj.data['document_type'] is_published_exclusively_in_non_refereed_journals = all( not journal.get('refereed', True) for journal in journals) if is_published_in_a_refereed_journal_that_does_not_publish_proceedings: obj.data['refereed'] = True elif is_not_a_conference_paper and is_published_in_a_refereed_journal_that_also_publishes_proceedings: obj.data['refereed'] = True elif is_published_exclusively_in_non_refereed_journals: obj.data['refereed'] = False is_published_only_in_proceedings = all(journal.get('proceedings') for journal in journals) is_published_only_in_non_refereed_journals = all(not journal.get('refereed') for journal in journals) if is_published_only_in_proceedings and is_published_only_in_non_refereed_journals: try: obj.data['document_type'].remove('article') obj.data['document_type'].append('conference paper') except ValueError: pass
def _conference_data(conf): ref = replace_refs(conf, 'db') # FIXME: Add conference city, country, and country code fields if ref: return {'type': "conference", 'name': get_value(ref, "titles[0].title", ""), 'acronym': get_value(ref, "acronym[0]", ""), 'opening_date': get_value(ref, "opening_date", ""), 'closing_date': get_value(ref, "closing_date", "")} else: return {'type': "conference", 'name': "", 'acronym': "", 'opening_date': "", 'closing_date': ""}
def _conference_data(conf): ref = replace_refs(conf, 'db') # FIXME: Add conference city, country, and country code fields if ref: return { 'type': "conference", 'name': get_value(ref, "titles[0].title", ""), 'acronym': get_value(ref, "acronym[0]", ""), 'opening_date': get_value(ref, "opening_date", ""), 'closing_date': get_value(ref, "closing_date", "") } else: return { 'type': "conference", 'name': "", 'acronym': "", 'opening_date': "", 'closing_date': "" }
def _conference_data(conf_record): #sys.stderr.write(str(conf_record)) ref = replace_refs(conf_record, 'db') #sys.stderr.write(str(ref)) o_addr = ref['address'][0]['original_address'].split(" ") city = o_addr[0][:-1] # trim off comma country = o_addr[1] date = ref['date'].split(" ") month = date[1] year = date[2] return {'type': "conference", 'name': ref['titles'][0]['title'], 'acronym': ref['acronym'][0], 'opening_date': ref['opening_date'], 'closing_date': ref['closing_date'], 'month': month, 'year': year, 'city': city, 'country': country, 'country_code': ref['address'][0]['country_code']}
def populate_journal_coverage(obj, eng): """Populate ``journal_coverage`` from the Journals DB. Searches in the Journals DB if the current article was published in a journal that we harvest entirely, then populates the ``journal_coverage`` key in ``extra_data`` with ``'full'`` if it was, ``'partial' otherwise. Args: obj: a workflow object. eng: a workflow engine. Returns: None """ journals = replace_refs(get_value(obj.data, 'publication_info.journal_record'), 'db') if not journals: return if any(get_value(journal, '_harvesting_info.coverage') == 'full' for journal in journals): obj.extra_data['journal_coverage'] = 'full' else: obj.extra_data['journal_coverage'] = 'partial'
def tei_response(record): data = record env = Environment(loader=PackageLoader('inspirehep.modules.converttohal', 'templates'), trim_blocks=True, lstrip_blocks=True) template = env.get_template(TEMPLATE) #import ipdb; ipdb.set_trace() authors = data['authors'] for author in data['authors']: if 'full_name' in author and author['full_name']: # handle first/last name #scan = scan_author_string_for_phrases(author['full_name']) #parsed = parse_scanned_author_for_phrases(scan) #author['parsed_name'] = parsed parsed = HumanName(author['full_name']) author['parsed_name'] = parsed #sys.stderr.write(str(scan) + '\n' + str(parsed) + '\n') #sys.exit(0) ''' auth_spl = author['full_name'].split(",") if len(auth_spl) == 2: last = auth_spl[0].strip() first = auth_spl[1].strip() else: last = author['full_name'] first = "" authors.append({'last': last, 'first': first, 'affiliation_id': (author['affiliations'][0] ['recid']) if 'affiliations' in author and 'recid' in author['affiliations'][0] else "" })''' titles = data.get('titles', []) # TODO: update the following line doi = data['dois'][0]['value'] if 'dois' in data else "" if 'publication_info' in data: pub_info = data['publication_info'][0] if 'journal_title' in pub_info: if 'page_artid' in pub_info: pp = pub_info['page_artid'] elif 'page_start' and 'page_end' in pub_info: pp = pub_info['page_start'] + "-" + pub_info['page_end'] elif 'page_start' in pub_info or 'page_end' in pub_info: pp = pub_info['page_start'] or pub_info['page_end'] else: pp = "" publication = {'type': "journal", 'name': pub_info['journal_title'], 'year': pub_info['year'], 'volume': pub_info['journal_volume'] if 'journal_volume' in pub_info else "", 'issue': pub_info['journal_issue'] if 'journal_issue' in pub_info else "", 'pp': pp} elif 'conference_record' in pub_info: publication = _conference_data(pub_info['conference_record']) else: publication = None else: publication = None my_affiliations = [] recids = [] structures = [] for author in (data.get('authors') or []): for affiliation in (author.get('affiliations') or []): if 'recid' in affiliation and affiliation['recid'] not in recids: my_affiliations.append(affiliation) recids.append(affiliation['recid']) for affiliation in my_affiliations: ref = replace_refs(affiliation, 'db') #import ipdb; ipdb.set_trace() #sys.stderr.write(str(ref) + '\n') #sys.stderr.write(str(ref['record']) + '\n') #sys.stderr.write(str(ref['record']['collections']) + '\n') #sys.stderr.write(str(ref['record']['collections'][1]['primary']) + '\n\n') if ('record' in ref and 'collections' in ref['record']): structures.append({'type': ref['record']['collections'][1]['primary'].lower() if len(ref['record']['collections']) >= 2 else "", 'name': ref['record']['institution'][0], 'address': ref['record']['address'][0]['original_address'], 'country': ref['record']['address'][0]['country_code'], 'recid': ref['record']['oai_pmh'][0]['id'].split(":")[-1] }) print template.render(titles=titles, doi=doi, authors=authors, publication=publication, structures=structures)
def publication_info(record): """Displays inline publication and conference information""" result = {} out = [] if 'publication_info' in record: journal_title, journal_volume, year, journal_issue, pages = \ ('', '', '', '', '') for pub_info in record['publication_info']: if 'journal_title' in pub_info: journal_title = '<i>' + pub_info['journal_title'] + '</i>' if 'journal_volume' in pub_info: journal_volume = ' ' + pub_info['journal_volume'] if 'year' in pub_info: year = ' (' + str(pub_info['year']) + ')' if 'journal_issue' in pub_info: journal_issue = ' ' + pub_info['journal_issue'] + ', ' if 'page_start' in pub_info and 'page_end' in pub_info: pages = ' ' + '{page_start}-{page_end}'.format(**pub_info) elif 'page_start' in pub_info: pages = ' ' + '{page_start}'.format(**pub_info) elif 'artid' in pub_info: pages = ' ' + '{artid}'.format(**pub_info) out.append(journal_title + journal_volume + year + journal_issue + pages) if out: result['pub_info'] = out if not result: for field in record['publication_info']: if 'pubinfo_freetext' in field: out.append(field['pubinfo_freetext']) result['pub_info'] = out break # Conference info line for pub_info in record['publication_info']: conference_recid = None parent_recid = None if 'conference_record' in pub_info: conference_rec = replace_refs(pub_info['conference_record'], 'es') if conference_rec and conference_rec.get('control_number'): conference_recid = conference_rec['control_number'] if 'parent_record' in pub_info: parent_rec = replace_refs(pub_info['parent_record'], 'es') if parent_rec and parent_rec.get('control_number'): parent_recid = parent_rec['control_number'] if conference_recid and parent_recid: try: ctx = { "parent_recid": parent_recid, "conference_recid": conference_recid, "conference_title": get_title(conference_rec) } if result: result['conf_info'] = render_macro_from_template( name="conf_with_pub_info", template="inspirehep_theme/format/record/Conference_info_macros.tpl", ctx=ctx) break else: ctx.update(dict( page_start=pub_info.get('page_start'), page_end=pub_info.get('page_end'), artid=pub_info.get('artid') )) result['conf_info'] = render_macro_from_template( name="conf_without_pub_info", template="inspirehep_theme/format/record/Conference_info_macros.tpl", ctx=ctx) break except TypeError: pass elif conference_recid and not parent_recid: try: ctx = { "conference_recid": conference_recid, "conference_title": get_title(conference_rec), "pub_info": bool(result.get('pub_info', '')) } result['conf_info'] = render_macro_from_template( name="conference_only", template="inspirehep_theme/format/record/Conference_info_macros.tpl", ctx=ctx) except TypeError: pass elif parent_recid and not conference_recid: try: ctx = { "parent_recid": parent_recid, "parent_title": parent_rec['titles'][0]['title'].replace( "Proceedings, ", "", 1), "pub_info": bool(result.get('pub_info', '')) } result['conf_info'] = render_macro_from_template( name="proceedings_only", template="inspirehep_theme/format/record/Conference_info_macros.tpl", ctx=ctx) except TypeError: pass return result