def test_positions_from_371__a_double_m_z(): snippet = ('<datafield tag="371" ind1=" " ind2=" ">' ' <subfield code="a">Argonne</subfield>' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="z">current</subfield>' '</datafield>') # record/1408378 expected = [{ 'current': True, 'emails': [ '*****@*****.**', '*****@*****.**', ], 'institution': { 'curated_relation': False, 'name': 'Argonne', }, }] result = hepnames.do(create_record(snippet)) assert expected == result['positions'] expected = [{ 'a': 'Argonne', 'm': ['*****@*****.**', '*****@*****.**'], 'z': 'Current' }] result = hepnames2marc.do(result) assert expected == result['371']
def test_arxiv_categories_from_65017a_2(): schema = load_schema('authors') subschema = schema['properties']['arxiv_categories'] snippet = ( '<datafield tag="650" ind1="1" ind2="7">' ' <subfield code="2">INSPIRE</subfield>' ' <subfield code="a">HEP-TH</subfield>' '</datafield>' ) # record/1010819 expected = [ 'hep-th', ] result = hepnames.do(create_record(snippet)) assert validate(result['arxiv_categories'], subschema) is None assert expected == result['arxiv_categories'] expected = [ { '2': 'arXiv', 'a': 'hep-th', }, ] result = hepnames2marc.do(result) assert expected == result['65017']
def test_ids_from_035__a_9_with_wikipedia(): schema = load_schema('authors') subschema = schema['properties']['ids'] snippet = ( '<datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">Wikipedia</subfield>' ' <subfield code="a">Guido_Tonelli</subfield>' '</datafield>' ) # record/985898/export/xme expected = [ { 'schema': 'WIKIPEDIA', 'value': 'Guido_Tonelli', }, ] result = hepnames.do(create_record(snippet)) assert validate(result['ids'], subschema) is None assert expected == result['ids'] expected = [ { '9': 'WIKIPEDIA', 'a': 'Guido_Tonelli', }, ] result = hepnames2marc.do(result) assert expected == result['035']
def test_private_notes_from_595__a_9(): schema = load_schema('authors') subschema = schema['properties']['_private_notes'] snippet = ( '<datafield tag="595" ind1=" " ind2=" ">' ' <subfield code="a">Author prefers Alexandrov, A.S.</subfield>' ' <subfield code="9">SPIRES-HIDDEN</subfield>' '</datafield>' ) # record/1050484 expected = [ { 'source': 'SPIRES-HIDDEN', 'value': 'Author prefers Alexandrov, A.S.', } ] result = hepnames.do(create_record(snippet)) assert validate(result['_private_notes'], subschema) is None assert expected == result['_private_notes'] expected = [ { '9': 'SPIRES-HIDDEN', 'a': 'Author prefers Alexandrov, A.S.', } ] result = hepnames2marc.do(result) assert expected == result['595']
def test_inspire_categories_from_65017a_2_E(): schema = load_schema('authors') subschema = schema['properties']['inspire_categories'] snippet = ( '<datafield tag="650" ind1="1" ind2="7">' ' <subfield code="2">INSPIRE</subfield>' ' <subfield code="a">E</subfield>' '</datafield>' ) # record/1019112 expected = [ {'term': 'Experiment-HEP'}, ] result = hepnames.do(create_record(snippet)) assert validate(result['inspire_categories'], subschema) is None assert expected == result['inspire_categories'] expected = [ { '2': 'INSPIRE', 'a': 'Experiment-HEP', }, ] result = hepnames2marc.do(result) assert expected == result['65017']
def test_ids_from_035__a_9_with_viaf(): schema = load_schema('authors') subschema = schema['properties']['ids'] snippet = ( '<datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">VIAF</subfield>' ' <subfield code="a">34517183</subfield>' '</datafield>' ) # record/1008109/export/xme expected = [ { 'schema': 'VIAF', 'value': '34517183', }, ] result = hepnames.do(create_record(snippet)) assert validate(result['ids'], subschema) is None assert expected == result['ids'] expected = [ { '9': 'VIAF', 'a': '34517183', }, ] result = hepnames2marc.do(result) assert expected == result['035']
def test_ids_from_035__a_9_with_researcherid(): schema = load_schema('authors') subschema = schema['properties']['ids'] snippet = ( '<datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">RESEARCHERID</subfield>' ' <subfield code="a">B-4717-2008</subfield>' '</datafield>' ) # record/1051026/export/xme expected = [ { 'schema': 'RESEARCHERID', 'value': 'B-4717-2008', }, ] result = hepnames.do(create_record(snippet)) assert validate(result['ids'], subschema) is None assert expected == result['ids'] expected = [ { '9': 'RESEARCHERID', 'a': 'B-4717-2008', }, ] result = hepnames2marc.do(result) assert expected == result['035']
def test_ids_from_035__a_9_with_cern(): schema = load_schema('authors') subschema = schema['properties']['ids'] snippet = ( '<datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">CERN</subfield>' ' <subfield code="a">CERN-622961</subfield>' '</datafield>' ) # record/1064570/export/xme expected = [ { 'schema': 'CERN', 'value': 'CERN-622961', }, ] result = hepnames.do(create_record(snippet)) assert validate(result['ids'], subschema) is None assert expected == result['ids'] expected = [ { '9': 'CERN', 'a': 'CERN-622961' } ] result = hepnames2marc.do(result) assert expected == result['035']
def test_ids_from_035__a_9_with_googlescholar(): schema = load_schema('authors') subschema = schema['properties']['ids'] snippet = ( '<datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">GoogleScholar</subfield>' ' <subfield code="a">Tnl-9KoAAAAJ</subfield>' '</datafield>' ) # record/1467553/export/xme expected = [ { 'schema': 'GOOGLESCHOLAR', 'value': 'Tnl-9KoAAAAJ', }, ] result = hepnames.do(create_record(snippet)) assert validate(result['ids'], subschema) is None assert expected == result['ids'] expected = [ { '9': 'GOOGLESCHOLAR', 'a': 'Tnl-9KoAAAAJ', }, ] result = hepnames2marc.do(result) assert expected == result['035']
def test_ids_from_035__a_9_with_scopus(): schema = load_schema('authors') subschema = schema['properties']['ids'] snippet = ( '<datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">SCOPUS</subfield>' ' <subfield code="a">7103280792</subfield>' '</datafield>' ) # record/1017182/export/xme expected = [ { 'schema': 'SCOPUS', 'value': '7103280792', }, ] result = hepnames.do(create_record(snippet)) assert validate(result['ids'], subschema) is None assert expected == result['ids'] expected = [ { '9': 'SCOPUS', 'a': '7103280792', }, ] result = hepnames2marc.do(result) assert expected == result['035']
def test_status_from_100__a_g_q(): schema = load_schema('authors') subschema = schema['properties']['status'] snippet = ( '<datafield tag="100" ind1=" " ind2=" ">' ' <subfield code="a">Abarbanel, Henry D.I.</subfield>' ' <subfield code="q">Henry D.I. Abarbanel</subfield>' ' <subfield code="g">ACTIVE</subfield>' '</datafield>' ) # record/1019100 expected = 'active' result = hepnames.do(create_record(snippet)) assert validate(result['status'], subschema) is None assert expected == result['status'] expected = { 'a': 'Abarbanel, Henry D.I.', 'q': 'Henry D.I. Abarbanel', 'g': 'active', } result = hepnames2marc.do(result) assert expected == result['100']
def test_ids_from_035__a_with_bai(): schema = load_schema('authors') subschema = schema['properties']['ids'] snippet = ( '<datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="a">Jian.Long.Han.1</subfield>' '</datafield>' ) # record/1464894/export/xme expected = [ { 'schema': 'INSPIRE BAI', 'value': 'Jian.Long.Han.1', }, ] result = hepnames.do(create_record(snippet)) assert validate(result['ids'], subschema) is None assert expected == result['ids'] expected = [ { '9': 'BAI', 'a': 'Jian.Long.Han.1', }, ] result = hepnames2marc.do(result) assert expected == result['035']
def test_other_names_from_400__triple_a(): schema = load_schema('authors') subschema = schema['properties']['other_names'] snippet = ( '<datafield tag="400" ind1=" " ind2=" ">' ' <subfield code="a">Yosef Cohen, Hadar</subfield>' ' <subfield code="a">Josef Cohen, Hadar</subfield>' ' <subfield code="a">Cohen, Hadar Josef</subfield>' '</datafield>' ) # record/1292399/export/xme expected = [ 'Yosef Cohen, Hadar', 'Josef Cohen, Hadar', 'Cohen, Hadar Josef', ] result = hepnames.do(create_record(snippet)) assert validate(result['other_names'], subschema) is None assert expected == result['other_names'] expected = [ {'a': 'Yosef Cohen, Hadar'}, {'a': 'Josef Cohen, Hadar'}, {'a': 'Cohen, Hadar Josef'}, ] result = hepnames2marc.do(result) assert expected == result['400']
def test_ids_from_035__a_9_with_desy(): schema = load_schema('authors') subschema = schema['properties']['ids'] snippet = ( '<datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="a">DESY-1001805</subfield>' ' <subfield code="9">DESY</subfield>' '</datafield>' ) # record/993224/export/xme expected = [ { 'schema': 'DESY', 'value': 'DESY-1001805', }, ] result = hepnames.do(create_record(snippet)) assert validate(result['ids'], subschema) is None assert expected == result['ids'] expected = [ { '9': 'DESY', 'a': 'DESY-1001805', }, ] result = hepnames2marc.do(result) assert expected == result['035']
def test_positions_from_371__a(): schema = load_schema('authors') subschema = schema['properties']['positions'] snippet = ( '<datafield tag="371" ind1=" " ind2=" ">' ' <subfield code="a">Aachen, Tech. Hochsch.</subfield>' '</datafield>' ) # record/997958 expected = [ { 'current': False, 'institution': { 'curated_relation': False, 'name': 'Aachen, Tech. Hochsch.', }, }, ] result = hepnames.do(create_record(snippet)) assert validate(result['positions'], subschema) is None assert expected == result['positions'] expected = [ {'a': 'Aachen, Tech. Hochsch.'} ] result = hepnames2marc.do(result) assert expected == result['371']
def test_ids_from_035__a_9_with_orcid(): schema = load_schema('authors') subschema = schema['properties']['ids'] snippet = ( '<datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">ORCID</subfield>' ' <subfield code="a">0000-0001-6771-2174</subfield>' '</datafield>' ) expected = [ { 'schema': 'ORCID', 'value': '0000-0001-6771-2174', }, ] result = hepnames.do(create_record(snippet)) assert validate(result['ids'], subschema) is None assert expected == result['ids'] expected = [ { '9': 'ORCID', 'a': '0000-0001-6771-2174', } ] result = hepnames2marc.do(result) assert expected == result['035']
def test_ids_from_970__a(): schema = load_schema('authors') subschema = schema['properties']['ids'] snippet = ( '<datafield tag="970" ind1=" " ind2=" ">' ' <subfield code="a">HEPNAMES-646482</subfield>' '</datafield>' ) # record/1498151 expected = [ { 'schema': 'SPIRES', 'value': 'HEPNAMES-646482', }, ] result = hepnames.do(create_record(snippet)) assert validate(result['ids'], subschema) is None assert expected == result['ids'] expected = [ {'a': 'HEPNAMES-646482'}, ] result = hepnames2marc.do(result) assert expected == result['970']
def _create_marcxml_record(obj, eng): from inspirehep.dojson.hepnames import hepnames2marc from inspirehep.dojson.utils import legacy_export_as_marc obj.extra_data["marcxml"] = legacy_export_as_marc( hepnames2marc.do(obj.data) ) obj.log.info("Produced MarcXML: \n {}".format( obj.extra_data["marcxml"]) )
def setUp(self): self.marcxml = pkg_resources.resource_string('tests', os.path.join( 'fixtures', 'test_hepnames_record.xml') ) record = create_record(self.marcxml) self.marcxml_to_json = hepnames.do(record) self.json_to_marc = hepnames2marc.do(self.marcxml_to_json)
def marcxml_filter(record): from inspirehep.dojson.hep import hep2marc from inspirehep.dojson.hepnames import hepnames2marc from inspirehep.dojson.utils import legacy_export_as_marc collections = [ collection['primary'] for collection in record["collections"] ] if "HEP" in collections: return legacy_export_as_marc(hep2marc.do(record)) elif "HEPNAMES" in collections: return legacy_export_as_marc(hepnames2marc.do(record))
def test_old_single_email_from_371__a(): schema = load_schema('authors') subschema = schema['properties']['positions'] snippet = ( '<datafield tag="371" ind1=" " ind2=" ">' ' <subfield code="a">IMSc, Chennai</subfield>' ' <subfield code="o">[email protected]</subfield>' ' <subfield code="r">PD</subfield>' ' <subfield code="s">2012</subfield>' ' <subfield code="t">2013</subfield>' '</datafield>' ) # record/1060782 expected = [ { "current": False, "old_emails": [ "*****@*****.**" ], "end_date": "2013", "rank": "POSTDOC", "institution": { "name": "IMSc, Chennai", "curated_relation": False }, "_rank": "PD", "start_date": "2012" } ] result = hepnames.do(create_record(snippet)) assert validate(result['positions'], subschema) is None assert expected == result['positions'] expected = [ { "a": "IMSc, Chennai", "o": [ "*****@*****.**" ], "s": "2012", "r": "PD", "t": "2013" } ] result = hepnames2marc.do(result) assert expected == result['371']
def test_experiments(mock_get_record_ref, mock_get_recid_from_ref, test_name, xml_snippet, expected_json, expected_marc): mock_get_record_ref.side_effect = \ lambda x, *_: x and 'mocked_recid_%s' % x mock_get_recid_from_ref.side_effect = \ lambda x, *_: x and int(x.rsplit('_')[-1]) if not xml_snippet.strip().startswith('<record>'): xml_snippet = '<record>%s</record>' % xml_snippet json_data = hepnames.do(create_record(xml_snippet)) json_experiments = json_data['experiments'] marc_experiments = hepnames2marc.do(json_data)['693'] assert marc_experiments == expected_marc assert json_experiments == expected_json
def test_ids_from_double_035__a_9_with_kaken(): schema = load_schema('authors') subschema = schema['properties']['ids'] snippet = ( '<record>' ' <datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">BAI</subfield>' ' <subfield code="a">Toshio.Suzuki.2</subfield>' ' </datafield>' ' <datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">KAKEN</subfield>' ' <subfield code="a">70139070</subfield>' ' </datafield>' '</record>' ) # record/1474271/export/xme expected = [ { 'schema': 'INSPIRE BAI', 'value': 'Toshio.Suzuki.2', }, { 'schema': 'KAKEN', 'value': 'KAKEN-70139070', }, ] result = hepnames.do(create_record(snippet)) assert validate(result['ids'], subschema) is None assert expected == result['ids'] expected = [ { '9': 'BAI', 'a': 'Toshio.Suzuki.2', }, { '9': 'KAKEN', 'a': 'KAKEN-70139070', }, ] result = hepnames2marc.do(result) assert expected == result['035']
def test_ids_from_double_035__a_9(): schema = load_schema('authors') subschema = schema['properties']['ids'] snippet = ( '<record>' ' <datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="a">INSPIRE-00134135</subfield>' ' <subfield code="9">INSPIRE</subfield>' ' </datafield>' ' <datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="a">H.Vogel.1</subfield>' ' <subfield code="9">BAI</subfield>' ' </datafield>' '</record>' ) # record/984519 expected = [ { 'schema': 'INSPIRE ID', 'value': 'INSPIRE-00134135', }, { 'schema': 'INSPIRE BAI', 'value': 'H.Vogel.1', }, ] result = hepnames.do(create_record(snippet)) assert validate(result['ids'], subschema) is None assert expected == result['ids'] expected = [ { 'a': 'INSPIRE-00134135', '9': 'INSPIRE' }, { 'a': 'H.Vogel.1', '9': 'BAI' } ] result = hepnames2marc.do(result) assert sorted(expected) == sorted(result['035'])
def test_advisors_from_701__a_g_i(): schema = load_schema('authors') subschema = schema['properties']['advisors'] snippet = ( '<datafield tag="701" ind1=" " ind2=" ">' ' <subfield code="a">Rivelles, Victor O.</subfield>' ' <subfield code="g">PhD</subfield>' ' <subfield code="i">INSPIRE-00120420</subfield>' ' <subfield code="x">991627</subfield>' ' <subfield code="y">1</subfield>' '</datafield>' ) # record/1474091/export/xme expected = [ { 'name': 'Rivelles, Victor O.', 'degree_type': 'PhD', 'ids': [ { 'schema': 'INSPIRE ID', 'value': 'INSPIRE-00120420' } ], 'record': { '$ref': 'http://localhost:5000/api/authors/991627', }, 'curated_relation': True }, ] result = hepnames.do(create_record(snippet)) assert validate(result['advisors'], subschema) is None assert expected == result['advisors'] expected = [ { 'a': 'Rivelles, Victor O.', 'g': 'PhD', 'i': 'INSPIRE-00120420', }, ] result = hepnames2marc.do(result) assert expected == result['701']
def test_acquisition_source_field_marcxml(): """Test acquisition_source MARC output.""" expected = { 'a': 'inspire:uid:50000', 'c': 'submission', 'b': '*****@*****.**', 'e': '339830', 'd': '2015-12-10' } record = {"acquisition_source": { 'source': "inspire:uid:50000", 'email': "*****@*****.**", 'method': "submission", 'date': "2015-12-10", 'submission_number': "339830", }} result = hepnames2marc.do(record) assert expected == result['541']
def test_positions_from_371__a_m_r_z(): schema = load_schema('authors') subschema = schema['properties']['positions'] snippet = ( '<datafield tag="371" ind1=" " ind2=" ">' ' <subfield code="a">Antwerp U.</subfield>' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="r">SENIOR</subfield>' ' <subfield code="z">Current</subfield>' '</datafield>' ) # record/997958 expected = [ { 'current': True, 'emails': [ '*****@*****.**', ], 'institution': { 'curated_relation': False, 'name': 'Antwerp U.', }, 'rank': 'SENIOR', '_rank': 'SENIOR', }, ] result = hepnames.do(create_record(snippet)) assert validate(result['positions'], subschema) is None assert expected == result['positions'] expected = [ { 'a': 'Antwerp U.', 'm': ['*****@*****.**'], 'r': 'SENIOR', 'z': 'Current' } ] result = hepnames2marc.do(result) assert expected == result['371']
def test_positions_from_371__a_r_t_z(): schema = load_schema('authors') subschema = schema['properties']['positions'] snippet = ( '<datafield tag="371" ind1=" " ind2=" ">' ' <subfield code="a">San Luis Potosi U.</subfield>' ' <subfield code="r">Master</subfield>' ' <subfield code="t">2007</subfield>' ' <subfield code="z">903830</subfield>' '</datafield>' ) # record/1037568 expected = [ { '_rank': 'Master', 'current': False, 'end_date': '2007', 'institution': { 'curated_relation': True, 'name': 'San Luis Potosi U.', 'record': { '$ref': 'http://localhost:5000/api/institutions/903830', }, }, 'rank': 'MASTER', }, ] result = hepnames.do(create_record(snippet)) assert validate(result['positions'], subschema) is None assert expected == result['positions'] expected = [ { 'a': 'San Luis Potosi U.', 'r': 'MAS', 't': '2007', }, ] result = hepnames2marc.do(result) assert expected == result['371']
def test_positions_from_371__a_double_m_z(): schema = load_schema('authors') subschema = schema['properties']['positions'] snippet = ( '<datafield tag="371" ind1=" " ind2=" ">' ' <subfield code="a">Argonne</subfield>' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="z">current</subfield>' '</datafield>' ) # record/1408378 expected = [ { 'current': True, 'emails': [ '*****@*****.**', '*****@*****.**', ], 'institution': { 'curated_relation': False, 'name': 'Argonne', }, } ] result = hepnames.do(create_record(snippet)) assert validate(result['positions'], subschema) is None assert expected == result['positions'] expected = [ { 'a': 'Argonne', 'm': ['*****@*****.**', '*****@*****.**'], 'z': 'Current' } ] result = hepnames2marc.do(result) assert expected == result['371']
def test_new_record_from_970__d(): schema = load_schema('authors') subschema = schema['properties']['new_record'] snippet = ( '<datafield tag="970" ind1=" " ind2=" ">' ' <subfield code="d">1039458</subfield>' '</datafield>' ) # record/1271254 expected = {'$ref': 'http://localhost:5000/api/authors/1039458'} result = hepnames.do(create_record(snippet)) assert validate(result['new_record'], subschema) is None assert expected == result['new_record'] expected = {'d': 1039458} result = hepnames2marc.do(result) assert expected == result['970']
def test_stub_from_980__a_not_useful(): schema = load_schema('authors') subschema = schema['properties']['stub'] snippet = ( '<datafield tag="980" ind1=" " ind2=" ">' ' <subfield code="a">HEPNAMES</subfield>' '</datafield>' ) # record/1019103 expected = True result = hepnames.do(create_record(snippet)) assert validate(result['stub'], subschema) is None assert expected == result['stub'] expected = [ {'a': 'HEPNAMES'}, ] result = hepnames2marc.do(result) assert expected == result['980']
def test_old_single_email_from_371__a(): snippet = ('<datafield tag="371" ind1=" " ind2=" ">' ' <subfield code="a">IMSc, Chennai</subfield>' ' <subfield code="o">[email protected]</subfield>' ' <subfield code="r">PD</subfield>' ' <subfield code="s">2012</subfield>' ' <subfield code="t">2013</subfield>' '</datafield>') # record/1060782 expected = [{ "current": False, "old_emails": ["*****@*****.**"], "end_date": "2013", "rank": "POSTDOC", "institution": { "name": "IMSc, Chennai", "curated_relation": False }, "_rank": "PD", "start_date": "2012" }] result = hepnames.do(create_record(snippet)) assert expected == result['positions'] expected = [{ "a": "IMSc, Chennai", "o": ["*****@*****.**"], "s": "2012", "r": "PD", "t": "2013" }] marc = hepnames2marc.do(result) assert expected == marc['371']
def json_to_marc(marcxml_to_json): return hepnames2marc.do(marcxml_to_json)