def test_set_refereed_and_fix_document_type_replaces_article_with_conference_paper_if_needed(mock_replace_refs): schema = load_schema('journals') subschema = schema['properties']['proceedings'] journals = [{'proceedings': True}] assert validate(journals[0]['proceedings'], subschema) is None mock_replace_refs.return_value = journals schema = load_schema('hep') subschema = schema['properties']['document_type'] data = {'document_type': ['article']} extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() assert set_refereed_and_fix_document_type(obj, eng) is None expected = ['conference paper'] result = obj.data['document_type'] assert validate(result, subschema) is None assert expected == result
def test_populate_journal_coverage_writes_partial_if_all_coverages_are_partial(mock_replace_refs): schema = load_schema('journals') subschema = schema['properties']['_harvesting_info'] journals = [{'_harvesting_info': {'coverage': 'partial'}}] assert validate(journals[0]['_harvesting_info'], subschema) is None mock_replace_refs.return_value = journals schema = load_schema('hep') subschema = schema['properties']['publication_info'] data = { 'publication_info': [ {'journal_record': {'$ref': 'http://localhost:/api/journals/1212337'}}, ], } extra_data = {} assert validate(data['publication_info'], subschema) is None obj = MockObj(data, extra_data) eng = MockEng() assert populate_journal_coverage(obj, eng) is None expected = 'partial' result = obj.extra_data['journal_coverage'] assert expected == result
def test_set_refereed_and_fix_document_type_handles_journals_that_publish_mixed_content(mock_replace_refs): schema = load_schema('journals') proceedings_schema = schema['properties']['proceedings'] refereed_schema = schema['properties']['refereed'] journals = [{'proceedings': True, 'refereed': True}] assert validate(journals[0]['proceedings'], proceedings_schema) is None assert validate(journals[0]['refereed'], refereed_schema) is None mock_replace_refs.return_value = journals schema = load_schema('hep') subschema = schema['properties']['refereed'] data = {'document_type': ['article']} extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() assert set_refereed_and_fix_document_type(obj, eng) is None expected = True result = obj.data['refereed'] assert validate(result, subschema) is None assert expected == result
def test_set_refereed_and_fix_document_type_sets_refereed_to_false_if_all_journals_are_not_refereed(mock_replace_refs): schema = load_schema('journals') subschema = schema['properties']['refereed'] journals = [{'refereed': False}] assert validate(journals[0]['refereed'], subschema) is None mock_replace_refs.return_value = journals schema = load_schema('hep') subschema = schema['properties']['refereed'] data = {'document_type': ['article']} extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() assert set_refereed_and_fix_document_type(obj, eng) is None expected = False result = obj.data['refereed'] assert validate(result, subschema) is None assert expected == result
def __init__(self, *args, **kwargs): """Constructor.""" super(LiteratureForm, self).__init__(*args, **kwargs) inspire_categories_schema = load_schema('elements/inspire_field.json') categories = inspire_categories_schema['properties']['term']['enum'] self.subject.choices = [(val, val) for val in categories] degree_type_schema = load_schema('elements/degree_type.json') degree_choices = [ (val, val.capitalize()) if val != 'phd' else ('phd', 'PhD') for val in degree_type_schema['enum'] ] degree_choices.sort(key=lambda x: x[1]) self.degree_type.choices = degree_choices
def test_arxiv_plot_extract_logs_when_tarball_is_invalid(mock_process_tarball): mock_process_tarball.side_effect = InvalidTarball schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'physics.ins-det', ], 'value': '1612.00626', }, ], } # synthetic data extra_data = {} files = MockFiles({ '1612.00626.tar.gz': AttrDict({ 'file': AttrDict({ 'uri': 'http://export.arxiv.org/e-print/1612.00626', }) }) }) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert arxiv_plot_extract(obj, eng) is None expected = 'Invalid tarball http://export.arxiv.org/e-print/1612.00626 for arxiv_id 1612.00626' result = obj.log._info.getvalue() assert expected == result
def test_arxiv_derive_inspire_categories(): schema = load_schema('hep') arxiv_eprints_schema = schema['properties']['arxiv_eprints'] inspire_categories_schema = schema['properties']['inspire_categories'] data = { 'arxiv_eprints': [ { 'categories': [ 'nucl-th', ], 'value': '1605.03898', }, ], } # literature/1458300 extra_data = {} assert validate(data['arxiv_eprints'], arxiv_eprints_schema) is None obj = MockObj(data, extra_data) eng = MockEng() assert arxiv_derive_inspire_categories(obj, eng) is None expected = [ { 'source': 'arxiv', 'term': 'Theory-Nucl', }, ] result = obj.data['inspire_categories'] assert validate(result, inspire_categories_schema) is None assert expected == result
def test_match_references_finds_match_when_repeated_record_with_different_scores( mocked_inspire_matcher_match, isolated_app ): references = [ { 'reference': { 'publication_info': { 'artid': '045', 'journal_title': 'JHEP', 'journal_volume': '06', 'page_start': '045', 'year': 2007 } } } ] schema = load_schema('hep') subschema = schema['properties']['references'] assert validate(references, subschema) is None references = match_references(references) assert len(references) == 1 assert references[0]['record']['$ref'] == 'http://localhost:5000/api/literature/1' assert validate(references, subschema) is None
def test_populate_arxiv_document_logs_on_pdf_not_existing(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'GET', 'http://export.arxiv.org/pdf/1707.02785', content=pkg_resources.resource_string( __name__, os.path.join('fixtures', '1707.02785.html')), ) schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'cs.CV', ], 'value': '1707.02785', }, ], } # literature/1458302 extra_data = {} files = MockFiles({}) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert populate_arxiv_document(obj, eng) is None expected = 'No PDF is available for 1707.02785' result = obj.log._info.getvalue() assert expected == result
def test_get_conference_record(replace_refs): schema = load_schema('hep') control_number_schema = schema['properties']['control_number'] publication_info_schema = schema['properties']['publication_info'] conference_record = {'control_number': 972464} assert validate(conference_record['control_number'], control_number_schema) is None record = { 'publication_info': [ { 'conference_record': { '$ref': 'http://localhost:5000/api/conferences/972464', }, }, ], } assert validate(record['publication_info'], publication_info_schema) is None replace_refs.return_value = conference_record expected = 972464 result = get_conference_record(record) assert expected == result['control_number']
def test_match_reference_ignores_hidden_collections(isolated_app): cited_record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['HAL Hidden'], 'control_number': 1, 'document_type': ['article'], 'dois': [{ 'value': '10.1371/journal.pone.0188398', }], } TestRecordMetadata.create_from_kwargs( json=cited_record_json, index_name='records-hep') reference = { 'reference': { 'dois': ['10.1371/journal.pone.0188398'], } } schema = load_schema('hep') subschema = schema['properties']['references'] assert validate([reference], subschema) is None reference = match_reference(reference) assert 'record' not in reference
def test_match_reference_doesnt_touch_curated(isolated_app): cited_record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'control_number': 1, 'document_type': ['article'], 'dois': [{ 'value': '10.1371/journal.pone.0188398', }], } TestRecordMetadata.create_from_kwargs( json=cited_record_json, index_name='records-hep') reference = { 'curated_relation': True, 'record': { '$ref': 'http://localhost:5000/api/literature/42', }, 'reference': { 'dois': ['10.1371/journal.pone.0188398'], } } schema = load_schema('hep') subschema = schema['properties']['references'] assert validate([reference], subschema) is None reference = match_reference(reference) assert reference['record']['$ref'] == 'http://localhost:5000/api/literature/42'
def test_populate_affiliation_suggest_to_ref(): schema = load_schema('institutions') subschema = schema['properties']['self'] record = { '$schema': 'http://localhost:5000/schemas/records/institutions.json', 'legacy_ICN': 'CERN', 'self': { '$ref': 'http://localhost:5000/api/institutions/902725', }, } assert validate(record['self'], subschema) is None populate_affiliation_suggest(None, record) expected = { 'input': [ 'CERN', ], 'output': 'CERN', 'payload': { '$ref': 'http://localhost:5000/api/institutions/902725', 'ICN': [], 'institution_acronyms': [], 'institution_names': [], 'legacy_ICN': 'CERN', }, } result = record['affiliation_suggest'] assert expected == result
def test_match_reference_on_texkey(isolated_app): cited_record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'control_number': 1, 'document_type': ['article'], 'texkeys': [ 'Giudice:2007fh', ], 'titles': [ { 'title': 'The Strongly-Interacting Light Higgs' } ], } TestRecordMetadata.create_from_kwargs( json=cited_record_json, index_name='records-hep') reference = { 'reference': { 'texkey': 'Giudice:2007fh', } } schema = load_schema('hep') subschema = schema['properties']['references'] assert validate([reference], subschema) is None reference = match_reference(reference) assert reference['record']['$ref'] == 'http://localhost:5000/api/literature/1' assert validate([reference], subschema) is None
def test_populate_affiliation_suggest_from_name_variants(): schema = load_schema('institutions') subschema = schema['properties']['name_variants'] record = { '$schema': 'http://localhost:5000/schemas/records/institutions.json', 'legacy_ICN': 'CERN', 'name_variants': [ {'value': u'Centre Européen de Recherches Nucléaires'}, ], } assert validate(record['name_variants'], subschema) is None populate_affiliation_suggest(None, record) expected = { 'input': [ 'CERN', u'Centre Européen de Recherches Nucléaires', ], 'output': 'CERN', 'payload': { '$ref': None, 'ICN': [], 'institution_acronyms': [], 'institution_names': [], 'legacy_ICN': 'CERN', }, } result = record['affiliation_suggest'] assert expected == result
def test_populate_affiliation_suggest_from_postal_code(): schema = load_schema('institutions') subschema = schema['properties']['addresses'] record = { '$schema': 'http://localhost:5000/schemas/records/institutions.json', 'addresses': [ {'postal_code': '1211'}, ], 'legacy_ICN': 'CERN', } assert validate(record['addresses'], subschema) is None populate_affiliation_suggest(None, record) expected = { 'input': [ 'CERN', '1211', ], 'output': 'CERN', 'payload': { '$ref': None, 'ICN': [], 'institution_acronyms': [], 'institution_names': [], 'legacy_ICN': 'CERN', }, } result = record['affiliation_suggest'] assert expected == result
def test_populate_title_suggest_with_all_inputs(): schema = load_schema('journals') journal_title_schema = schema['properties']['journal_title'] short_title_schema = schema['properties']['short_title'] title_variants_schema = schema['properties']['title_variants'] record = { '$schema': 'http://localhost:5000/schemas/records/journals.json', 'journal_title': {'title': 'The Journal of High Energy Physics (JHEP)'}, 'short_title': 'JHEP', 'title_variants': ['JOURNAL OF HIGH ENERGY PHYSICS'], } assert validate(record['journal_title'], journal_title_schema) is None assert validate(record['short_title'], short_title_schema) is None assert validate(record['title_variants'], title_variants_schema) is None populate_title_suggest(None, record) expected = { 'input': [ 'The Journal of High Energy Physics (JHEP)', 'JHEP', 'JOURNAL OF HIGH ENERGY PHYSICS' ], 'output': 'JHEP', 'payload': { 'full_title': 'The Journal of High Energy Physics (JHEP)' } } result = record['title_suggest'] assert expected == result
def test_populate_affiliation_suggest_from_institution_hierarchy_name(): schema = load_schema('institutions') subschema = schema['properties']['legacy_ICN'] record = { '$schema': 'http://localhost:5000/schemas/records/institutions.json', 'institution_hierarchy': [ {'name': 'European Organization for Nuclear Research'}, ], 'legacy_ICN': 'CERN', } assert validate(record['legacy_ICN'], subschema) is None populate_affiliation_suggest(None, record) expected = { 'input': [ 'European Organization for Nuclear Research', 'CERN', ], 'output': 'CERN', 'payload': { '$ref': None, 'ICN': [], 'institution_acronyms': [], 'institution_names': [ 'European Organization for Nuclear Research', ], 'legacy_ICN': 'CERN', }, } result = record['affiliation_suggest'] assert expected == result
def test_populate_abstract_source_suggest(): schema = load_schema('hep') subschema = schema['properties']['abstracts'] record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'abstracts': [ { 'source': 'foo', 'value': 'bar', }, ], } assert validate(record['abstracts'], subschema) is None populate_abstract_source_suggest(None, record) expected = [ { 'abstract_source_suggest': { 'input': 'foo', 'output': 'foo', }, 'source': 'foo', 'value': 'bar', }, ] result = record['abstracts'] assert expected == result
def test_populate_abstract_source_suggest_does_nothing_if_record_is_not_literature(): schema = load_schema('hep') subschema = schema['properties']['abstracts'] record = { '$schema': 'http://localhost:5000/schemas/records/other.json', 'abstracts': [ { 'source': 'foo', 'value': 'bar', }, ], } assert validate(record['abstracts'], subschema) is None populate_abstract_source_suggest(None, record) expected = [ { 'source': 'foo', 'value': 'bar', }, ] result = record['abstracts'] assert expected == result
def test_assign_uuid_does_not_touch_existing_uuids(mock_uuid4): mock_uuid4.return_value = UUID('727238f3-8ed6-40b6-97d2-dc3cd1429131') schema = load_schema('hep') subschema = schema['properties']['authors'] record = { '$schema': 'http://localhost:5000/records/schemas/hep.json', 'authors': [ { 'full_name': 'Ellis, John Richard', 'uuid': 'e14955b0-7e57-41a0-90a8-f4c64eb8f4e9', }, ], } assert validate(record['authors'], subschema) is None assign_uuid(None, record) expected = [ { 'full_name': 'Ellis, John Richard', 'uuid': 'e14955b0-7e57-41a0-90a8-f4c64eb8f4e9', }, ] result = record['authors'] assert validate(result, subschema) is None assert expected == result
def test_populate_authors_full_name_unicode_normalized(): schema = load_schema('hep') subschema = schema['properties']['authors'] record = { '$schema': 'http://localhost:5000/records/schemas/hep.json', 'authors': [ { 'full_name': u'Müller, J.', }, { 'full_name': u'Muller, J.', }, ], } assert validate(record['authors'], subschema) is None populate_authors_full_name_unicode_normalized(None, record) expected = [ { 'full_name': u'Müller, J.', 'full_name_unicode_normalized': u'müller, j.', }, { 'full_name': u'Muller, J.', 'full_name_unicode_normalized': u'muller, j.', }, ] result = record['authors'] assert expected == result
def test_load_author_project_membership(): data = { 'project_membership': [ { 'name': 'pariatur', 'start_date': '1997-05-01', 'end_date': '2001-12-31', 'record': { '$ref': 'http://180' }, 'current': True } ], } result = Author().load(data).data schema = load_schema('authors') subschema = schema['properties']['project_membership'] expected = { '_collections': ['Authors'], 'project_membership': [ { 'curated_relation': False, 'current': True, 'end_date': '2001-12-31', 'name': 'pariatur', 'record': {'$ref': 'http://180'}, 'start_date': '1997-05-01', }, ], } assert validate(expected['project_membership'], subschema) is None assert expected == result
def test_populate_author_count(): schema = load_schema('hep') subschema = schema['properties']['authors'] record = { '$schema': 'http://localhost:5000/records/schemas/hep.json', 'authors': [ { 'full_name': 'Smith, John', 'inspire_roles': [ 'author', ], }, { 'full_name': 'Rafelski, Johann', 'inspire_roles': [ 'author', 'editor', ], }, { 'full_name': 'Rohan, George', 'inspire_roles': [ 'author', 'supervisor', ], }, ], } assert validate(record['authors'], subschema) is None populate_author_count(None, record) assert record['author_count'] == 2
def test_load_author_positions(): data = { 'positions': [ { 'institution': 'Colgate University', 'start_date': '1994-02-01', 'end_date': '1995-01-31', 'rank': 'PHD', 'current': False, }, ], } result = Author().load(data).data schema = load_schema('authors') subschema = schema['properties']['positions'] expected = { '_collections': ['Authors'], 'positions': [ { 'current': False, 'curated_relation': False, 'end_date': '1995-01-31', 'institution': 'Colgate University', 'rank': 'PHD', 'start_date': '1994-02-01', }, ], } assert validate(expected['positions'], subschema) is None assert expected == result
def test_dump_author_project_membership(): data = { 'project_membership': [ { 'institution': 'Colgate University', 'start_date': '1994-02-01', 'end_date': '1995-01-31', 'rank': 'PHD', 'current': False, }, ], } result = Author().dump(data).data schema = load_schema('authors') subschema = schema['properties']['positions'] expected = { 'project_membership': [ { 'institution': 'Colgate University', 'start_date': '1994-02-01', 'end_date': '1995-01-31', 'rank': 'PHD', 'current': False, }, ], } assert validate(data['project_membership'], subschema) is None assert expected == result
def test_populate_bookautocomplete_does_nothing_if_record_is_not_a_book(): schema = load_schema('hep') authors_schema = schema['properties']['authors'] document_type_schema = schema['properties']['document_type'] self_schema = schema['properties']['self'] record = { '$schema': 'http://localhost:5000/records/schemas/hep.json', 'authors': [ {'full_name': 'Mohayai, Tanaz Angelina'}, ], 'document_type': [ 'article', ], 'self': { '$ref': 'http://localhost:5000/api/literature/1520027', } } assert validate(record['authors'], authors_schema) is None assert validate(record['document_type'], document_type_schema) is None assert validate(record['self'], self_schema) is None populate_bookautocomplete(None, record) assert 'bookautocomplete' not in record
def test_populate_inspire_document_type_from_publication_type(): schema = load_schema('hep') document_type_schema = schema['properties']['document_type'] publication_type_schema = schema['properties']['publication_type'] record = { '$schema': 'http://localhost:5000/records/schemas/hep.json', 'document_type': [ 'article', ], 'publication_type': [ 'introductory', ], } assert validate(record['document_type'], document_type_schema) is None assert validate(record['publication_type'], publication_type_schema) is None populate_inspire_document_type(None, record) expected = [ 'article', 'introductory', ] result = record['facet_inspire_doc_type'] assert expected == result
def test_arxiv_plot_extract_logs_when_images_are_invalid(mock_process_tarball): mock_process_tarball.side_effect = DelegateError schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'physics.ins-det', ], 'value': '1612.00624', }, ], } # synthetic data extra_data = {} files = MockFiles({ '1612.00624.tar.gz': AttrDict({ 'file': AttrDict({ 'uri': 'http://export.arxiv.org/e-print/1612.00624', }) }) }) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert arxiv_plot_extract(obj, eng) is None expected = 'Error extracting plots for 1612.00624. Report and skip.' result = obj.log._error.getvalue() assert expected == result
def test_arxiv_package_download_logs_on_error(): with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( 'GET', 'http://export.arxiv.org/e-print/1605.03951', status_code=500, ) schema = load_schema('hep') subschema = schema['properties']['arxiv_eprints'] data = { 'arxiv_eprints': [ { 'categories': [ 'astro-ph.HE', ], 'value': '1605.03951', }, ], } # literature/1458254 extra_data = {} files = MockFiles({}) assert validate(data['arxiv_eprints'], subschema) is None obj = MockObj(data, extra_data, files=files) eng = MockEng() assert arxiv_package_download(obj, eng) is None expected = 'Cannot retrieve tarball from arXiv for 1605.03951' result = obj.log._error.getvalue() assert expected == result
def test_addresses_from_371__triple_a_b_d_e_g_and_371__triple_a_b_d_e_g_x(): schema = load_schema('institutions') subschema = schema['properties']['addresses'] snippet = ( '<record>' ' <datafield tag="371" ind1=" " ind2=" ">' ' <subfield code="a">Université Libre de Bruxelles (ULB)</subfield>' ' <subfield code="a">Boulevard du Triomphe, 2</subfield>' ' <subfield code="a">B-1050 Bruxelles</subfield>' ' <subfield code="b">Brussels</subfield>' ' <subfield code="d">Belgium</subfield>' ' <subfield code="e">1050</subfield>' ' <subfield code="g">BE</subfield>' ' </datafield>' ' <datafield tag="371" ind1=" " ind2=" ">' ' <subfield code="a">Vrije Universiteit VUB</subfield>' ' <subfield code="a">Pleinlaan 2</subfield>' ' <subfield code="a">B-1050 Brussel</subfield>' ' <subfield code="b">Brussels</subfield>' ' <subfield code="d">Belgium</subfield>' ' <subfield code="e">1050</subfield>' ' <subfield code="g">BE</subfield>' ' <subfield code="x">secondary</subfield>' ' </datafield>' '</record>') # record/902696 expected = [ { 'cities': [ 'Brussels', ], 'country_code': 'BE', 'postal_address': [ u'Université Libre de Bruxelles (ULB)', 'Boulevard du Triomphe, 2', 'B-1050 Bruxelles', ], 'postal_code': '1050', }, { 'cities': [ 'Brussels', ], 'country_code': 'BE', 'postal_address': [ 'Vrije Universiteit VUB', 'Pleinlaan 2', 'B-1050 Brussel', ], 'postal_code': '1050', }, ] result = institutions.do(create_record(snippet)) assert validate(result['addresses'], subschema) is None assert expected == result['addresses']
def test_populate_experiment_suggest(): schema = load_schema('experiments') legacy_name_schema = schema['properties']['legacy_name'] long_name_schema = schema['properties']['long_name'] name_variants_schema = schema['properties']['name_variants'] collaboration_schema = schema['properties']['collaboration'] accelerator_schema = schema['properties']['accelerator'] experiment_schema = schema['properties']['experiment'] institutions_schema = schema['properties']['institutions'] record = { '$schema': 'http://foo/experiments.json', 'self': { '$ref': 'https://localhost:5000/api/experiments/bar' }, 'legacy_name': 'foo', 'long_name': 'foobarbaz', 'name_variants': [ 'bar', 'baz', ], 'collaboration': { 'value': 'D0', }, 'accelerator': { 'value': 'LHC', }, 'experiment': { 'short_name': 'SHINE', 'value': 'NA61', }, 'institutions': [ { 'value': 'ICN', }, ], } record = InspireRecord(record, model=RecordMetadata) assert validate(record['legacy_name'], legacy_name_schema) is None assert validate(record['long_name'], long_name_schema) is None assert validate(record['name_variants'], name_variants_schema) is None assert validate(record['collaboration'], collaboration_schema) is None assert validate(record['accelerator'], accelerator_schema) is None assert validate(record['institutions'], institutions_schema) is None assert validate(record['experiment'], experiment_schema) is None populate_experiment_suggest(record) expected = { 'input': [ 'LHC', 'D0', 'SHINE', 'NA61', 'ICN', 'foo', 'foobarbaz', 'bar', 'baz', ] } result = record['experiment_suggest'] assert expected == result
def test_reference_from_999C5o_h_c_t_s_r_y_0(): schema = load_schema('hep') subschema = schema['properties']['references'] snippet = ( '<datafield tag="999" ind1="C" ind2="5">' ' <subfield code="o">36</subfield>' ' <subfield code="h">S. Chatrchyan et al.</subfield>' ' <subfield code="c">CMS Collaboration</subfield>' ' <subfield code="t">Angular analysis and branching fraction measurement of the decay B0 → K∗0 µ+ µ-</subfield>' ' <subfield code="s">Phys.Lett.,B727,77</subfield>' ' <subfield code="r">arXiv:1308.3409 [hep-ex]</subfield>' ' <subfield code="y">2013</subfield>' ' <subfield code="0">1247976</subfield>' '</datafield>' ) # record/1591975 expected = [ { 'curated_relation': False, 'record': { '$ref': 'http://localhost:5000/api/literature/1247976' }, 'reference': { 'arxiv_eprint': '1308.3409', 'authors': [ {'full_name': u'Chatrchyan, S.'} ], 'collaborations': [ 'CMS Collaboration' ], 'label': '36', 'publication_info': { 'artid': '77', 'journal_title': 'Phys.Lett.B', 'journal_volume': '727', 'page_start': '77', 'year': 2013, }, 'title': {'title': u'Angular analysis and branching fraction measurement of the decay B0 → K∗0 µ+ µ-'}, } } ] result = hep.do(create_record(snippet)) assert validate(result['references'], subschema) is None assert expected == result['references'] expected = [ { '0': 1247976, 'c': [ 'CMS Collaboration', ], 'h': [ 'Chatrchyan, S.', ], 'o': '36', 'r': [ 'arXiv:1308.3409', ], 's': 'Phys.Lett.,B727,77', 't': u'Angular analysis and branching fraction measurement of the decay B0 → K∗0 µ+ µ-', 'y': 2013, 'z': 0, } ] result = hep2marc.do(result) assert expected == result['999C5']
def test_authors_from_100__a_u_and_multiple_700__a_u_e(): schema = load_schema('hep') subschema = schema['properties']['authors'] snippet = ('<record>' ' <datafield tag="100" ind1=" " ind2=" ">' ' <subfield code="a">Aichinger, Ida</subfield>' ' <subfield code="u">Linz U.</subfield>' ' </datafield>' ' <datafield tag="700" ind1=" " ind2=" ">' ' <subfield code="a">Larcher, Gerhard</subfield>' ' <subfield code="u">Linz U.</subfield>' ' <subfield code="e">dir.</subfield>' ' </datafield>' ' <datafield tag="700" ind1=" " ind2=" ">' ' <subfield code="a">Kersevan, Roberto</subfield>' ' <subfield code="u">Linz U.</subfield>' ' <subfield code="e">dir.</subfield>' ' </datafield>' '</record>') # record/2295265 expected = { '100__': [ { 'a': 'Aichinger, Ida', 'u': 'Linz U.', }, ], '701__': [{ 'a': 'Larcher, Gerhard', 'e': 'dir.', 'u': 'Linz U.', }, { 'a': 'Kersevan, Roberto', 'e': 'dir.', 'u': 'Linz U.', }] } result = cds2hep_marc.do(create_record(snippet)) assert expected['100__'] == result['100__'] assert expected['701__'] == result['701__'] expected = [ { 'full_name': 'Aichinger, Ida', 'affiliations': [{ 'value': 'Linz U.' }], }, { 'full_name': 'Larcher, Gerhard', 'inspire_roles': ['supervisor'], 'affiliations': [{ 'value': 'Linz U.' }], }, { 'full_name': 'Kersevan, Roberto', 'inspire_roles': ['supervisor'], 'affiliations': [{ 'value': 'Linz U.' }], }, ] result = hep.do(create_record_from_dict(result)) assert validate(result['authors'], subschema) is None assert expected == result['authors']
def test_authors_from_100__a_0_u_m_and_700__a_0_u_m(): schema = load_schema('hep') subschema = schema['properties']['authors'] snippet = ( '<record>' ' <datafield tag="100" ind1=" " ind2=" ">' ' <subfield code="a">Joram, Christian</subfield>' ' <subfield code="0">AUTHOR|(INSPIRE)INSPIRE-00093928</subfield>' ' <subfield code="0">AUTHOR|(SzGeCERN)403463</subfield>' ' <subfield code="0">AUTHOR|(CDS)2068232</subfield>' ' <subfield code="u">CERN</subfield>' ' <subfield code="m">[email protected]</subfield>' ' </datafield>' ' <datafield tag="700" ind1=" " ind2=" ">' ' <subfield code="a">Pons, Xavier</subfield>' ' <subfield code="0">AUTHOR|(CDS)2067681</subfield>' ' <subfield code="0">AUTHOR|(SzGeCERN)531402</subfield>' ' <subfield code="u">CERN</subfield>' ' <subfield code="m">[email protected]</subfield>' ' </datafield>' '</record>') # record/2295263 expected = { '100__': [ { 'a': 'Joram, Christian', 'i': ['INSPIRE-00093928'], 'j': ['CCID-403463'], 'u': 'CERN', 'm': '*****@*****.**', }, ], '700__': [ { 'a': 'Pons, Xavier', 'j': ['CCID-531402'], 'u': 'CERN', 'm': '*****@*****.**', }, ], } result = cds2hep_marc.do(create_record(snippet)) assert expected['100__'] == result['100__'] assert expected['700__'] == result['700__'] expected = [ { 'full_name': 'Joram, Christian', 'ids': [ { 'schema': 'INSPIRE ID', 'value': 'INSPIRE-00093928', }, { 'schema': 'CERN', 'value': 'CERN-403463', }, ], 'affiliations': [{ 'value': 'CERN' }], 'emails': ['*****@*****.**'], }, { 'full_name': 'Pons, Xavier', 'ids': [ { 'schema': 'CERN', 'value': 'CERN-531402', }, ], 'affiliations': [{ 'value': 'CERN' }], 'emails': ['*****@*****.**'], }, ] result = hep.do(create_record_from_dict(result)) assert validate(result['authors'], subschema) is None assert expected == result['authors']
def test_report_numbers_and_document_type_and_publicate_notes_from_037__a(): schema = load_schema('hep') subschema_report_numbers = schema['properties']['report_numbers'] subschema_document_type = schema['properties']['document_type'] subschema_public_notes = schema['properties']['public_notes'] snippet = ('<datafield tag="088" ind1=" " ind2=" ">' ' <subfield code="a">CMS-PAS-SMP-15-001</subfield>' '</datafield>') # cds.cern.ch/record/2202807 expected = { '037__': [ { '9': 'CDS', 'a': 'CMS-PAS-SMP-15-001', }, ], '500__': [ { '9': 'CDS', 'a': 'Preliminary results', }, ], '980__': [ { 'a': 'NOTE', }, { 'a': 'HEP', }, { 'a': 'CORE', }, ], } result = cds2hep_marc.do(create_record(snippet)) assert expected['037__'] == result['037__'] assert expected['500__'] == result['500__'] assert expected['980__'] == result['980__'] expected = { 'document_type': [ 'note', ], 'public_notes': [ { 'source': 'CDS', 'value': 'Preliminary results', }, ], 'report_numbers': [ { 'source': 'CDS', 'value': 'CMS-PAS-SMP-15-001', }, ], } result = hep.do(create_record_from_dict(result)) assert validate(result['report_numbers'], subschema_report_numbers) is None assert validate(result['public_notes'], subschema_public_notes) is None assert validate(result['document_type'], subschema_document_type) is None assert expected['report_numbers'] == result['report_numbers'] assert expected['public_notes'] == result['public_notes'] assert expected['document_type'] == result['document_type']
def test_report_numbers_and_document_type_from_multiple_088__a(): schema = load_schema('hep') subschema_report_numbers = schema['properties']['report_numbers'] subschema_document_type = schema['properties']['document_type'] snippet = ('<record>' ' <datafield tag="088" ind1=" " ind2=" ">' ' <subfield code="a">ATL-PHYS-CONF-2008-015</subfield>' ' </datafield>' ' <datafield tag="088" ind1=" " ind2=" ">' ' <subfield code="a">ATL-COM-PHYS-2008-052</subfield>' ' </datafield>' '<record>') # cds.cern.ch/record/2275456 expected = { '037__': [ { '9': 'CDS', 'a': 'ATL-PHYS-CONF-2008-015', }, { '9': 'CDS', 'a': 'ATL-COM-PHYS-2008-052', }, ], '980__': [ { 'a': 'NOTE', }, { 'a': 'HEP', }, { 'a': 'CORE', }, ], } result = cds2hep_marc.do(create_record(snippet)) assert expected['037__'] == result['037__'] assert expected['980__'] == result['980__'] expected = { 'document_type': [ 'note', ], 'public_notes': [ { 'source': 'CDS', 'value': 'Preliminary results', }, ], 'report_numbers': [ { 'source': 'CDS', 'value': 'ATL-PHYS-CONF-2008-015', }, { 'source': 'CDS', 'value': 'ATL-COM-PHYS-2008-052', }, ], } result = hep.do(create_record_from_dict(result)) assert validate(result['report_numbers'], subschema_report_numbers) is None assert validate(result['document_type'], subschema_document_type) is None assert expected['report_numbers'] == result['report_numbers'] assert expected['document_type'] == result['document_type']
def test_collaborations_from_multiple_710__g_0_and_710__g(): schema = load_schema('hep') subschema = schema['properties']['collaborations'] snippet = ('<record>' ' <datafield tag="710" ind1=" " ind2=" ">' ' <subfield code="g">ANTARES</subfield>' ' <subfield code="0">1110619</subfield>' ' </datafield>' ' <datafield tag="710" ind1=" " ind2=" ">' ' <subfield code="g">IceCube</subfield>' ' <subfield code="0">1108514</subfield>' ' </datafield>' ' <datafield tag="710" ind1=" " ind2=" ">' ' <subfield code="g">LIGO Scientific</subfield>' ' </datafield>' ' <datafield tag="710" ind1=" " ind2=" ">' ' <subfield code="g">Virgo</subfield>' ' <subfield code="0">1110601</subfield>' ' </datafield>' '</record>') # record/1422032/export/xme expected = [ { 'record': { '$ref': 'http://localhost:5000/api/experiments/1110619', }, 'value': 'ANTARES', }, { 'record': { '$ref': 'http://localhost:5000/api/experiments/1108514', }, 'value': 'IceCube', }, { 'value': 'LIGO Scientific', }, { 'record': { '$ref': 'http://localhost:5000/api/experiments/1110601', }, 'value': 'Virgo', }, ] result = hep.do(create_record(snippet)) assert validate(result['collaborations'], subschema) is None assert expected == result['collaborations'] expected = [ { 'g': 'ANTARES' }, { 'g': 'IceCube' }, { 'g': 'LIGO Scientific' }, { 'g': 'Virgo' }, ] result = hep2marc.do(result) assert expected == result['710']
def xtest_populate_facet_author_name(mocked_get_linked_records_in_field): authors_json = [{ '$schema': 'http://localhost:5000/records/schemas/authors.json', 'name': { 'value': 'Silk, James Brian' }, '_collections': ['Authors'], 'ids': [{ 'schema': 'INSPIRE BAI', 'value': 'James.Brian.1' }], 'control_number': 111, }, { '$schema': 'http://localhost:5000/records/schemas/authors.json', 'name': { 'value': 'Doe, John', 'preferred_name': 'J Doe' }, '_collections': ['Authors'], 'ids': [{ 'schema': 'INSPIRE BAI', 'value': 'John.Doe.1' }], 'control_number': 222, }] mocked_get_linked_records_in_field.return_value = iter(authors_json) schema = load_schema('hep') subschema = schema['properties']['authors'] record = { '$schema': 'http://localhost:5000/records/schemas/hep.json', 'authors': [ { 'full_name': 'Silk, James Brian', 'record': { '$ref': 'https://labs.inspirehep.net/api/literature/111' } }, { 'full_name': 'Doe, John', 'record': { '$ref': 'https://labs.inspirehep.net/api/literature/222' } }, { 'full_name': 'Rohan, George', }, ], } record = InspireRecord(record, model=RecordMetadata) expected_result = [ u'James.Brian.1_James Brian Silk', u'John.Doe.1_J Doe', u'BAI_George Rohan' ] assert validate(record['authors'], subschema) is None populate_facet_author_name(record) assert record['facet_author_name'] == expected_result
def test_fuzzy_match_returns_true_if_something_matched_with_4_authors(mock_match, enable_fuzzy_matcher): schema = load_schema('hep') authors_schema = schema['properties']['authors'] titles_schema = schema['properties']['titles'] matched_record = { 'control_number': 4328, 'titles': [ { 'title': 'title', }, ], 'authors': [ { 'full_name': 'Author 1' }, { 'full_name': 'Author, 2' }, { 'full_name': 'Author, 3' }, { 'full_name': 'Author, 4' } ], 'authors_count': 4 } assert validate(matched_record['titles'], titles_schema) is None assert validate(matched_record['authors'], authors_schema) is None mock_match.return_value = iter([{'_source': matched_record}]) data = {} extra_data = {} obj = MockObj(data, extra_data) eng = MockEng() assert fuzzy_match(obj, eng) assert 'matches' in obj.extra_data expected = [{ 'control_number': 4328, 'title': 'title', 'authors': [ { 'full_name': 'Author 1' }, { 'full_name': 'Author, 2' }, { 'full_name': 'Author, 3' }, ], 'authors_count': 4 }] result = get_value(obj.extra_data, 'matches.fuzzy') assert expected == result
def test_ids_from_035__a_9_with_cern_malformed(): schema = load_schema('authors') subschema = schema['properties']['ids'] snippet = ( '<record>' ' <datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">CERN</subfield>' ' <subfield code="a">CERN-CERN-645257</subfield>' ' </datafield>' # record/1030771 ' <datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">CERN</subfield>' ' <subfield code="a">cern-783683</subfield>' ' </datafield>' # record/1408145 ' <datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">CERN</subfield>' ' <subfield code="a">CERM-724319</subfield>' ' </datafield>' # record/1244430 ' <datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">CERN</subfield>' ' <subfield code="a">CNER-727986</subfield>' ' </datafield>' # record/1068077 ' <datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">CERN</subfield>' ' <subfield code="a">CVERN-765559</subfield>' ' </datafield>' # record/1340631 '</record>') expected = [ { 'schema': 'CERN', 'value': 'CERN-645257', }, { 'schema': 'CERN', 'value': 'CERN-783683', }, { 'schema': 'CERN', 'value': 'CERN-724319', }, { 'schema': 'CERN', 'value': 'CERN-727986', }, { 'schema': 'CERN', 'value': 'CERN-765559', }, ] result = hepnames.do(create_record(snippet)) assert validate(result['ids'], subschema) is None assert expected == result['ids'] expected = [ { '9': 'CERN', 'a': 'CERN-645257', }, { '9': 'CERN', 'a': 'CERN-783683', }, { '9': 'CERN', 'a': 'CERN-724319', }, { '9': 'CERN', 'a': 'CERN-727986', }, { '9': 'CERN', 'a': 'CERN-765559', }, ] result = hepnames2marc.do(result) assert expected == result['035']
def test_match_reference_on_texkey_has_lower_priority_than_pub_info( inspire_app): cited_record_with_texkey_json = { "$schema": "http://localhost:5000/schemas/records/hep.json", "_collections": ["Literature"], "control_number": 1, "document_type": ["article"], "texkeys": ["MyTexKey:2008fh"], "titles": [{ "title": "The Strongly-Interacting Light Higgs" }], } create_record("lit", cited_record_with_texkey_json) cited_record_with_pub_info_json = { "$schema": "http://localhost:5000/schemas/records/hep.json", "_collections": ["Literature"], "control_number": 2, "document_type": ["article"], "publication_info": [{ "artid": "100", "journal_title": "JHEP", "journal_volume": "100", "page_start": "100", "year": 2020, }], "titles": [{ "title": "The Strongly-Interacting Light Higgs" }], } create_record("lit", cited_record_with_pub_info_json) reference = { "reference": { "texkey": "MyTexKey:2008fh", "publication_info": { "artid": "100", "journal_title": "JHEP", "journal_volume": "100", "page_start": "100", "year": 2020, }, } } schema = load_schema("hep") subschema = schema["properties"]["references"] assert validate([reference], subschema) is None reference = match_reference(reference) assert reference["record"][ "$ref"] == "http://localhost:5000/api/literature/2" assert validate([reference], subschema) is None expected_control_number = [2, 1] result_coontrol_number = match_reference_control_numbers(reference) assert set(expected_control_number) == set(result_coontrol_number) assert len(expected_control_number) == len(result_coontrol_number)
def test_match_references_no_match_when_multiple_match_different_from_previous( inspire_app, ): """Test reference matcher for when inspire-matcher returns multiple matches where the matched record id is not the same as the previous matched record id""" original_cited_record_json = { "$schema": "http://localhost:5000/schemas/records/hep.json", "_collections": ["Literature"], "control_number": 1, "document_type": ["article"], "publication_info": [ { "artid": "159", "journal_title": "JHEP", "journal_volume": "03", "page_start": "159", "year": 2016, }, { "artid": "074", "journal_title": "JHEP", "journal_volume": "05", "material": "erratum", "page_start": "074", "year": 2017, }, ], } errata_cited_record_json = { "$schema": "http://localhost:5000/schemas/records/hep.json", "_collections": ["Literature"], "control_number": 2, "document_type": ["article"], "publication_info": [{ "artid": "074", "journal_title": "JHEP", "journal_volume": "05", "material": "erratum", "page_start": "074", "year": 2017, }], } create_record("lit", data=original_cited_record_json) create_record("lit", data=errata_cited_record_json) references = [{ "reference": { "publication_info": { "artid": "074", "journal_title": "JHEP", "journal_volume": "05", "page_start": "074", "year": 2017, } } }] schema = load_schema("hep") subschema = schema["properties"]["references"] assert validate(references, subschema) is None match_result = match_references(references) references = match_result["matched_references"] assert get_value(references[0], "record") is None assert validate(references, subschema) is None assert not match_result["any_link_modified"] assert match_result["added_recids"] == [] assert match_result["removed_recids"] == []
class AuthorUpdateForm(INSPIREForm): """Author update form.""" bai = fields.StringField( label='Bai', description=u'e.g. M.Santos.1', widget=HiddenInput(), widget_classes="form-control", validators=[ validators.Optional(), RegexpStopValidator( "(\\w+\\.)+\\d+", message="A valid Bai is in the form of 'M.Santos.1'.", ) ]) inspireid = fields.StringField( label='Inspireid', description=u'e.g. INSPIRE-0000000', widget=HiddenInput(), widget_classes="form-control", validators=[ validators.Optional(), RegexpStopValidator( "INSPIRE-\\d{8}", message= "A valid Inspireid is in the form of 'INSPIRE-0000000'.", ) ]) # Hidden field to hold record id information control_number = fields.IntegerField( widget=HiddenInput(), validators=[validators.Optional()], ) given_names = fields.StringField(label='Given Names', description=u'e.g. Diego', validators=[validators.DataRequired()], widget_classes="form-control") family_name = fields.StringField(label='Family Name', description=u'e.g. Martínez Santos', widget_classes="form-control") display_name = fields.StringField( label='Display Name', description= u'How should the author be addressed throughout the site? e.g. Diego Martínez', validators=[validators.DataRequired()], widget_classes="form-control") native_name = fields.StringField( label='Native Name', description=u'For non-Latin names e.g. 麦迪娜 or Эдгар Бугаев', widget_classes="form-control") public_emails = fields.DynamicFieldList( fields.FormField(EmailInlineForm, widget=ExtendedListWidget( item_widget=ItemWidget(), html_tag='div', ), widget_classes="col-xs-10"), description= u"This emails will be displayed online in the INSPIRE Author Profile.", label='Public emails', add_label='Add another email', min_entries=1, widget=DynamicUnsortedNonRemoveWidget(), widget_classes="ui-disable-sort") orcid = fields.StringField( label= 'ORCID <img src="/static/images/orcid_icon_24.png" style="height:20px">', widget_classes="form-control", description= u"""ORCID provides a persistent digital identifier that distinguishes you from other researchers. Learn more at <a href="http://orcid.org" tabIndex="-1" target="_blank">orcid.org</a>""", widget=WrappedInput(wrapper=""" <div class="input-group"> <span class="input-group-addon" id="sizing-addon2">orcid.org/</span> %(field)s </div> """), placeholder="0000-0000-0000-0000", validators=[ validators.Optional(), RegexpStopValidator( "\d{4}-\d{4}-\d{4}-\d{3}[\dX]", message= "A valid ORCID iD consists of 16 digits separated by dashes.", ), ORCIDValidator, duplicated_orcid_validator ]) status_options = [("active", "Active"), ("retired", "Retired"), ("departed", "Departed"), ("deceased", "Deceased")] status = fields.SelectField( label='Status', choices=status_options, default="active", validators=[validators.DataRequired()], widget_classes='form-control', ) blog_url = fields.StringField( label='Blog', placeholder='http://www.example.com', icon="fa fa-wordpress", widget_classes="form-control", ) twitter_url = fields.StringField( label='Twitter', placeholder='https://twitter.com/inspirehep', icon="fa fa-twitter", widget_classes="form-control", ) linkedin_url = fields.StringField( label='Linkedin', placeholder= 'https://www.linkedin.com/pub/john-francis-lampen/16/750/778', icon="fa fa-linkedin-square", widget_classes="form-control", ) websites = fields.DynamicFieldList(fields.FormField( WebpageInlineForm, widget=ExtendedListWidget( item_widget=ItemWidget(), html_tag='div', ), ), add_label='Add another website', min_entries=1, widget_classes='ui-disable-sort', icon="fa fa-globe", widget=DynamicUnsortedWidget()) arxiv_categories_schema = load_schema('elements/arxiv_categories.json') research_field_options = [ (val, val) for val in arxiv_categories_schema['enum'] if '.' not in val or val in ('physics.ins-det', 'physics.acc-ph', 'physics.data-an') ] research_field = fields.SelectMultipleField( label='Field of Research', choices=research_field_options, widget_classes="form-control", filters=[clean_empty_list], validators=[validators.DataRequired()]) institution_history = fields.DynamicFieldList( fields.FormField(InstitutionInlineForm, widget=ExtendedListWidget( item_widget=ItemWidget(), html_tag='div', ), widget_classes="col-xs-10"), label='Institution History', add_label='Add another institution', min_entries=1, widget=DynamicUnsortedWidget(), widget_classes="ui-disable-sort") advisors = fields.DynamicFieldList(fields.FormField( AdvisorsInlineForm, widget=ExtendedListWidget( item_widget=ItemWidget(), html_tag='div', ), ), label='Advisors', add_label='Add another advisor', min_entries=1, widget=DynamicUnsortedWidget(), widget_classes="ui-disable-sort") experiments = fields.DynamicFieldList(fields.FormField( ExperimentsInlineForm, widget=ExtendedListWidget( item_widget=ItemWidget(), html_tag='div', ), widget_classes="col-xs-10"), label='Experiment History', add_label='Add another experiment', min_entries=1, widget=DynamicUnsortedWidget(), widget_classes="ui-disable-sort") extra_comments = fields.TextAreaField( label='Comments', description= u'Send us any comments you might have. They will not be visible.', widget_classes="form-control") # # Form Configuration # _title = "Update author details" # Group fields in categories groups = [ ('Personal Information', [ 'given_names', 'family_name', 'display_name', 'native_name', 'email', 'public_emails', 'status', 'orcid', 'bai', 'inspireid' ], { "icon": "fa fa-user" }), ('Personal Websites', [ 'websites', 'linkedin_url', 'blog_url', 'twitter_url', "twitter_hidden" ], { "icon": "fa fa-globe" }), ('Career Information', ['research_field', 'institution_history', 'experiments', 'advisors'], { "icon": "fa fa-university" }), ('Comments', ['extra_comments'], { "icon": "fa fa-comments" }) ] def __init__(self, *args, **kwargs): """Constructor.""" super(AuthorUpdateForm, self).__init__(*args, **kwargs) is_update = kwargs.pop('is_update', False) is_review = kwargs.pop('is_review', False) if is_update: self.orcid.widget = HiddenInput() self.orcid.validators = [] if is_review: self.bai.widget = TextInput() self.bai.flags = Flags() self.inspireid.widget = TextInput() self.inspireid.flags = Flags()
def test_arxiv_author_list_handles_multiple_author_xml_files(): schema = load_schema('hep') eprints_subschema = schema['properties']['arxiv_eprints'] filename = pkg_resources.resource_filename( __name__, os.path.join('fixtures', '1703.09986.multiple_author_lists.tar.gz')) data = { '$schema': 'http://localhost:5000/hep.json', 'arxiv_eprints': [ { 'categories': [ 'hep-ex', ], 'value': '1703.09986', }, ], } # record/1519995 validate(data['arxiv_eprints'], eprints_subschema) extra_data = {} files = MockFiles({ '1703.09986.tar.gz': AttrDict({'file': AttrDict({ 'uri': filename, })}) }) obj = MockObj(data, extra_data, files=files) eng = MockEng() default_arxiv_author_list = arxiv_author_list() default_arxiv_author_list(obj, eng) authors_subschema = schema['properties']['authors'] expected_authors = [{ 'affiliations': [{ 'value': 'Yerevan Phys. Inst.' }], 'ids': [ { 'value': 'INSPIRE-00312131', 'schema': 'INSPIRE ID' }, { 'value': 'CERN-432142', 'schema': 'CERN' }, ], 'full_name': 'Sirunyan, Albert M', }, { 'affiliations': [{ 'value': 'Yerevan Phys. Inst.' }], 'ids': [ { 'value': 'INSPIRE-00312132', 'schema': 'INSPIRE ID' }, { 'value': 'CERN-432143', 'schema': 'CERN' }, ], 'full_name': 'Weary, Jake', }] validate(expected_authors, authors_subschema) assert obj.data.get('authors') == expected_authors
def test_references_from_999C5h_m_o_r_s_y_0(): schema = load_schema('hep') subschema = schema['properties']['references'] snippet = ( '<datafield tag="999" ind1="C" ind2="5">' ' <subfield code="0">857215</subfield>' ' <subfield code="h">R. C. Myers and A. Sinha</subfield>' ' <subfield code="m">Seeing a c-theorem with holography ; [hep-th]</subfield>' ' <subfield code="o">10</subfield>' ' <subfield code="r">arXiv:1006.1263</subfield>' ' <subfield code="s">Phys.Rev.,D82,046006</subfield>' ' <subfield code="y">2010</subfield>' '</datafield>' ) # record/1498589 expected = [ { 'curated_relation': False, 'record': { '$ref': 'http://localhost:5000/api/literature/857215', }, 'reference': { 'arxiv_eprint': '1006.1263', 'authors': [ {'full_name': u'Myers, R.C.'}, {'full_name': u'Sinha, A.'}, ], 'label': '10', 'misc': [ 'Seeing a c-theorem with holography ; [hep-th]', ], 'publication_info': { 'artid': '046006', 'journal_title': 'Phys.Rev.D', 'journal_volume': '82', 'year': 2010, }, }, }, ] result = hep.do(create_record(snippet)) assert validate(result['references'], subschema) is None assert expected == result['references'] expected = [ { '0': 857215, 'h': [ 'Myers, R.C.', 'Sinha, A.', ], 'm': 'Seeing a c-theorem with holography ; [hep-th]', 'o': '10', 'r': [ 'arXiv:1006.1263', ], 's': 'Phys.Rev.,D82,046006', 'y': 2010, 'z': 0, }, ] result = hep2marc.do(result) assert expected == result['999C5']
def test_references_from_999C5h_k_double_m_o_s_y_0(): schema = load_schema('hep') subschema = schema['properties']['references'] snippet = ( '<datafield tag="999" ind1="C" ind2="5">' ' <subfield code="h">W, Schoutens.</subfield>' ' <subfield code="k">Bouwknegt:1992wg</subfield>' ' <subfield code="m">Peter Bouwknegt and Kareljan</subfield>' ' <subfield code="m">symmetry in conformal field theory</subfield>' ' <subfield code="o">12</subfield>' ' <subfield code="s">Phys.Rept.,223,183-276</subfield>' ' <subfield code="y">1993</subfield>' ' <subfield code="0">338634</subfield>' '</datafield>' ) # record/1613562 expected = [ { 'curated_relation': False, 'record': { '$ref': 'http://localhost:5000/api/literature/338634', }, 'reference': { 'authors': [ {'full_name': 'Schoutens.'}, # XXX: wrong, but the best we can do. ], 'label': '12', 'misc': [ 'Peter Bouwknegt and Kareljan', 'symmetry in conformal field theory', ], 'publication_info': { 'journal_title': 'Phys.Rept.', 'journal_volume': '223', 'page_start': '183', 'page_end': '276', 'year': 1993, }, 'texkey': 'Bouwknegt:1992wg', }, }, ] result = hep.do(create_record(snippet)) assert validate(result['references'], subschema) is None assert expected == result['references'] expected = [ { '0': 338634, 'h': [ 'Schoutens.', # XXX: wrong, but the best we can do. ], 'k': 'Bouwknegt:1992wg', 'm': 'Peter Bouwknegt and Kareljan / symmetry in conformal field theory', 'o': '12', 's': 'Phys.Rept.,223,183-276', 'y': 1993, 'z': 0, }, ] result = hep2marc.do(result) assert expected == result['999C5']
def test_references_from_999C50_h_m_o_r_y(): schema = load_schema('hep') subschema = schema['properties']['references'] snippet = ( '<datafield tag="999" ind1="C" ind2="5">' ' <subfield code="0">701721</subfield>' ' <subfield code="h">A. Ferrari, P.R. Sala, A. Fasso, and J. Ranft</subfield>' ' <subfield code="m">FLUKA: a multi-particle transport code, CERN-10 , INFN/TC_05/11</subfield>' ' <subfield code="o">13</subfield>' ' <subfield code="r">SLAC-R-773</subfield>' ' <subfield code="y">2005</subfield>' '</datafield>' ) # record/1478478 expected = [ { 'curated_relation': False, 'record': { '$ref': 'http://localhost:5000/api/literature/701721', }, 'reference': { 'authors': [ {'full_name': 'Ferrari, A.'}, {'full_name': 'Sala, P.R.'}, {'full_name': 'Fasso, A.'}, {'full_name': 'Ranft, J.'}, ], 'label': '13', 'misc': [ 'FLUKA: a multi-particle transport code, CERN-10 , INFN/TC_05/11', ], 'publication_info': {'year': 2005}, 'report_numbers': [ 'SLAC-R-773', ], }, }, ] result = hep.do(create_record(snippet)) assert validate(result['references'], subschema) is None assert expected == result['references'] expected = [ { '0': 701721, 'h': [ 'Ferrari, A.', 'Sala, P.R.', 'Fasso, A.', 'Ranft, J.', ], 'm': 'FLUKA: a multi-particle transport code, CERN-10 , INFN/TC_05/11', 'r': [ 'SLAC-R-773', ], 'o': '13', 'y': 2005, 'z': 0, } ] result = hep2marc.do(result) assert expected == result['999C5']
def test_references_from_999C5a_h_o_s_x_y_0(): schema = load_schema('hep') subschema = schema['properties']['references'] snippet = ( '<datafield tag="999" ind1="C" ind2="5">' ' <subfield code="a">doi:10.1142/S0217751X0804055X</subfield>' ' <subfield code="h">G.K. Leontaris</subfield>' ' <subfield code="o">15</subfield>' ' <subfield code="s">Int.J.Mod.Phys.,A23,2055</subfield>' ' <subfield code="x">Int. J. Mod. Phys. A 23 (doi:10.1142/S0217751X0804055X)</subfield>' ' <subfield code="y">2008</subfield>' ' <subfield code="0">780399</subfield>' '</datafield>' ) # record/1478478 expected = [ { 'curated_relation': False, 'record': { '$ref': 'http://localhost:5000/api/literature/780399', }, 'raw_refs': [ { 'value': 'Int. J. Mod. Phys. A 23 (doi:10.1142/S0217751X0804055X)', 'schema': 'text', }, ], 'reference': { 'dois': ['10.1142/S0217751X0804055X'], 'authors': [ {'full_name': u'Leontaris, G.K.'}, ], 'label': '15', 'publication_info': { "artid": '2055', 'journal_title': 'Int.J.Mod.Phys.A', 'journal_volume': '23', 'page_start': '2055', 'year': 2008, }, }, }, ] result = hep.do(create_record(snippet)) assert validate(result['references'], subschema) is None assert expected == result['references'] expected = [ { 'a': [ 'doi:10.1142/S0217751X0804055X', ], 'h': [ 'Leontaris, G.K.', ], 'o': '15', 's': 'Int.J.Mod.Phys.,A23,2055', 'x': [ 'Int. J. Mod. Phys. A 23 (doi:10.1142/S0217751X0804055X)', ], 'y': 2008, 'z': 0, '0': 780399, } ] result = hep2marc.do(result) assert expected == result['999C5']
def test_add_institution_sorts_by_rank(): schema = load_schema('authors') subschema = schema['properties']['positions'] author = AuthorBuilder() author.add_institution(institution='Colgate University', rank='MASTER') author.add_institution(institution='Colgate University', rank='PHD') author.add_institution(institution='Colgate University', rank='VISITOR') author.add_institution(institution='Colgate University', rank='STAFF') author.add_institution(institution='Colgate University', rank='SENIOR') author.add_institution(institution='Colgate University', rank='OTHER') author.add_institution(institution='Colgate University', rank='UNDERGRADUATE') author.add_institution(institution='Colgate University') author.add_institution(institution='Colgate University', rank='POSTDOC') author.add_institution(institution='Colgate University', rank='JUNIOR') expected = [ { "institution": 'Colgate University', "rank": 'STAFF', "curated_relation": False, "current": False }, { "institution": 'Colgate University', "rank": 'SENIOR', "curated_relation": False, "current": False }, { "institution": 'Colgate University', "rank": 'JUNIOR', "curated_relation": False, "current": False }, { "institution": 'Colgate University', "rank": 'VISITOR', "curated_relation": False, "current": False }, { "institution": 'Colgate University', "rank": 'POSTDOC', "curated_relation": False, "current": False }, { "institution": 'Colgate University', "rank": 'PHD', "curated_relation": False, "current": False }, { "institution": 'Colgate University', "rank": 'MASTER', "curated_relation": False, "current": False }, { "institution": 'Colgate University', "rank": 'UNDERGRADUATE', "curated_relation": False, "current": False }, { "institution": 'Colgate University', "rank": 'OTHER', "curated_relation": False, "current": False }, { "institution": 'Colgate University', "curated_relation": False, "current": False }, ] result = author.obj['positions'] assert validate(result, subschema) is None assert expected == result
def _is_inspire(category): schema = load_schema('elements/inspire_field') valid_inspire_categories = schema['properties']['term']['enum'] return category in valid_inspire_categories
def test_match_references_no_match_when_multiple_match_different_from_previous(isolated_app): """Test reference matcher for when inspire-matcher returns multiple matches where the matched record id is not the same as the previous matched record id""" original_cited_record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'control_number': 1, 'document_type': ['article'], 'publication_info': [ { 'artid': '159', 'journal_title': 'JHEP', 'journal_volume': '03', 'page_start': '159', 'year': 2016 }, { 'artid': '074', 'journal_title': 'JHEP', 'journal_volume': '05', 'material': 'erratum', 'page_start': '074', 'year': 2017 } ] } errata_cited_record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'control_number': 2, 'document_type': ['article'], 'publication_info': [ { 'artid': '074', 'journal_title': 'JHEP', 'journal_volume': '05', 'material': 'erratum', 'page_start': '074', 'year': 2017 } ] } TestRecordMetadata.create_from_kwargs( json=original_cited_record_json, index_name='records-hep') TestRecordMetadata.create_from_kwargs( json=errata_cited_record_json, index_name='records-hep') references = [ { 'reference': { 'publication_info': { 'artid': '074', 'journal_title': 'JHEP', 'journal_volume': '05', 'page_start': '074', 'year': 2017 } } } ] schema = load_schema('hep') subschema = schema['properties']['references'] assert validate(references, subschema) is None references = match_references(references) assert get_value(references[0], 'record') is None assert validate(references, subschema) is None
def test_references_from_999C5d_multiple_h_o_r_0_9(): schema = load_schema('hep') subschema = schema['properties']['references'] snippet = ( '<datafield tag="999" ind1="C" ind2="5">' ' <subfield code="0">568216</subfield>' ' <subfield code="9">CURATOR</subfield>' ' <subfield code="d">eprint</subfield>' ' <subfield code="h">Y. Yan</subfield>' ' <subfield code="h">R. Tegen</subfield>' ' <subfield code="h">T. Gutsche</subfield>' ' <subfield code="h">V. E. Lyubovitskij</subfield>' ' <subfield code="h">A. Faessler</subfield>' ' <subfield code="o">20</subfield>' ' <subfield code="r">hep-ph/0112168v2</subfield>' '</datafield>' ) # record/1410105 expected = [ { 'legacy_curated': True, 'curated_relation': False, 'record': { '$ref': 'http://localhost:5000/api/literature/568216', }, 'reference': { 'arxiv_eprint': 'hep-ph/0112168', 'authors': [ {'full_name': 'Yan, Y.'}, {'full_name': 'Tegen, R.'}, {'full_name': 'Gutsche, T.'}, {'full_name': 'Lyubovitskij, V.E.'}, {'full_name': 'Faessler, A.'}, ], 'label': '20', }, }, ] result = hep.do(create_record(snippet)) assert validate(result['references'], subschema) is None assert expected == result['references'] expected = [ { '0': 568216, '9': 'CURATOR', 'h': [ 'Yan, Y.', 'Tegen, R.', 'Gutsche, T.', 'Lyubovitskij, V.E.', 'Faessler, A.', ], 'o': '20', 'r': [ 'hep-ph/0112168', ], 'z': 0, }, ] result = hep2marc.do(result) assert expected == result['999C5']
def get_all_literature_collections(): return load_schema("hep")["properties"]["_collections"]["items"]["enum"]
def test_references_from_999C50_9_r_u_h_m_o(): schema = load_schema('hep') subschema = schema['properties']['references'] snippet = ( '<datafield tag="999" ind1="C" ind2="5">' ' <subfield code="0">1511470</subfield>' ' <subfield code="9">CURATOR</subfield>' ' <subfield code="r">urn:nbn:de:hebis:77-diss-1000009520</subfield>' ' <subfield code="u">http://www.diss.fu-berlin.de/diss/receive/FUDISS_thesis_000000094316</subfield>' ' <subfield code="h">K. Wiebe</subfield>' ' <subfield code="m">Ph.D. thesis, University of Mainz, in preparation</subfield>' ' <subfield code="o">51</subfield>' '</datafield>' ) # record/1504897 expected = [ { 'curated_relation': False, 'legacy_curated': True, 'record': { '$ref': 'http://localhost:5000/api/literature/1511470', }, 'reference': { 'authors': [ {'full_name': 'Wiebe, K.'}, ], 'label': '51', 'misc': [ 'Ph.D. thesis, University of Mainz, in preparation', ], 'report_numbers': [ 'urn:nbn:de:hebis:77-diss-1000009520', ], 'urls': [ {'value': 'http://www.diss.fu-berlin.de/diss/receive/FUDISS_thesis_000000094316'}, ], }, }, ] result = hep.do(create_record(snippet)) assert validate(result['references'], subschema) is None assert expected == result['references'] expected = [ { '0': 1511470, '9': 'CURATOR', 'h': [ 'Wiebe, K.', ], 'r': [ 'urn:nbn:de:hebis:77-diss-1000009520', ], 'm': 'Ph.D. thesis, University of Mainz, in preparation', 'o': '51', 'u': [ 'http://www.diss.fu-berlin.de/diss/receive/FUDISS_thesis_000000094316', ], 'z': 0, }, ] result = hep2marc.do(result) assert expected == result['999C5']
def test_figures_order_from_FFT(): schema = load_schema('hep') subschema = schema['properties']['figures'] snippet = ( '<record>' ' <datafield tag="FFT" ind1=" " ind2=" ">' ' <subfield code="a">/opt/cds-invenio/var/data/files/g151/3037400/content.png;1</subfield>' ' <subfield code="d">00010 Co-simulation results, at $50~\mathrm{ms}$...</subfield>' ' <subfield code="f">.png</subfield>' ' <subfield code="n">FIG11</subfield>' ' <subfield code="r"/>' ' <subfield code="s">2017-10-04 07:54:54</subfield>' ' <subfield code="t">Main</subfield>' ' <subfield code="v">1</subfield>' ' <subfield code="z"/>' ' </datafield>' ' <datafield tag="FFT" ind1=" " ind2=" ">' ' <subfield code="a">/opt/cds-invenio/var/data/files/g151/3037399/content.png;1</subfield>' ' <subfield code="d">00009 Co-simulation results, at $50~\mathrm{ms}$...</subfield>' ' <subfield code="f">.png</subfield>' ' <subfield code="n">FIG10</subfield>' ' <subfield code="r"/>' ' <subfield code="s">2017-10-04 07:54:54</subfield>' ' <subfield code="t">Main</subfield>' ' <subfield code="v">1</subfield>' ' <subfield code="z"/>' ' </datafield>' ' <datafield tag="FFT" ind1=" " ind2=" ">' ' <subfield code="a">/opt/cds-invenio/var/data/files/g151/3037401/content.png;1</subfield>' ' <subfield code="d">00011 Co-simulation results, at $50~\mathrm{ms}$...</subfield>' ' <subfield code="f">.png</subfield>' ' <subfield code="n">FIG12</subfield>' ' <subfield code="r"/>' ' <subfield code="s">2017-10-04 07:54:54</subfield>' ' <subfield code="t">Main</subfield>' ' <subfield code="v">1</subfield>' ' <subfield code="z"/>' ' </datafield>' '</record>' ) # record/1628455 expected = [ { 'key': 'FIG10.png', 'caption': 'Co-simulation results, at $50~\mathrm{ms}$...', 'url': 'file:///afs/cern.ch/project/inspire/PROD/var/data/files/g151/3037399/content.png%3B1', }, { 'key': 'FIG11.png', 'caption': 'Co-simulation results, at $50~\mathrm{ms}$...', 'url': 'file:///afs/cern.ch/project/inspire/PROD/var/data/files/g151/3037400/content.png%3B1', }, { 'key': 'FIG12.png', 'caption': 'Co-simulation results, at $50~\mathrm{ms}$...', 'url': 'file:///afs/cern.ch/project/inspire/PROD/var/data/files/g151/3037401/content.png%3B1', } ] result = hep.do(create_record(snippet)) assert validate(result['figures'], subschema) is None assert expected == result['figures'] assert 'documents' not in result
def test_references_from_999C5a_h_i_m_o_p_y_9(): schema = load_schema('hep') subschema = schema['properties']['references'] snippet = ( '<datafield tag="999" ind1="C" ind2="5">' ' <subfield code="o">16</subfield>' ' <subfield code="h">A. Del Guerra</subfield>' ' <subfield code="m">Ionizing Radiation Detectors for Medical Imaging Crossref:</subfield>' ' <subfield code="p">World Scientific</subfield>' ' <subfield code="i">9812562621</subfield>' ' <subfield code="a">doi:10.1142/5408</subfield>' ' <subfield code="y">2004</subfield>' ' <subfield code="9">refextract</subfield>' '</datafield>' ) # record/1593684 expected = [ { 'reference': { 'authors': [ {'full_name': 'Guerra, A.Del'}, # XXX: wrong. ], 'dois': [ '10.1142/5408', ], 'imprint': {'publisher': 'World Scientific'}, 'isbn': '9789812562623', 'label': '16', 'misc': [ 'Ionizing Radiation Detectors for Medical Imaging Crossref:', ], 'publication_info': {'year': 2004}, }, }, ] result = hep.do(create_record(snippet)) assert validate(result['references'], subschema) is None assert expected == result['references'] expected = [ { 'a': [ 'doi:10.1142/5408', ], 'h': [ 'Guerra, A.Del', # XXX: wrong ], 'i': '9789812562623', 'm': [ 'Ionizing Radiation Detectors for Medical Imaging Crossref:', ], 'o': '16', 'p': 'World Scientific', 'y': 2004, }, ] result = hep2marc.do(result) assert expected == result['999C5']