def test_institutions_from_110__double_a_z(): snippet = ( '<datafield tag="110" ind1=" " ind2=" ">' ' <subfield code="a">Indiana U.</subfield>' ' <subfield code="a">NIST, Wash., D.C.</subfield>' ' <subfield code="z">902874</subfield>' ' <subfield code="z">903056</subfield>' "</datafield>" ) # record/1328021/export/xme expected = [ { "curated_relation": True, "name": "Indiana U.", "record": {"$ref": "http://localhost:5000/api/institutions/902874"}, }, { "curated_relation": True, "name": "NIST, Wash., D.C.", "record": {"$ref": "http://localhost:5000/api/institutions/903056"}, }, ] result = clean_record(jobs.do(create_record(snippet))) assert expected == result["institutions"]
def test_address_from_marcxml_371__a_b_c_d_e_double_g(): snippet = ('<datafield tag="371" ind1=" " ind2=" ">' ' <subfield code="a">Philosophenweg 16</subfield>' ' <subfield code="b">Heidelberg</subfield>' ' <subfield code="c">Baden-Wuerttemberg</subfield>' ' <subfield code="d">Germany</subfield>' ' <subfield code="e">69120</subfield>' ' <subfield code="g">DE</subfield>' ' <subfield code="g">DE</subfield>' '</datafield>') expected = [ { "city": "Heidelberg", "country": "Germany", "country_code": "DE", "state": "Baden-Wuerttemberg", "original_address": [ "Philosophenweg 16", ], "postal_code": "69120", }, ] result = clean_record(institutions.do(create_record(snippet))) assert expected == result['address']
def test_field_from_marcxml_650_with_two_2(): """Two '2' subfields in one datafield. The first will be taken (this time it's correct). """ snippet = ( '<record>' ' <datafield tag="650" ind1="1" ind2="7">' ' <subfield code="2">arXiv</subfield>' ' <subfield code="2">INSPIRE</subfield>' ' <subfield code="a">hep-ex</subfield>' ' </datafield>' '</record>' ) expected = [ { '_scheme': 'arXiv', 'scheme': 'INSPIRE', '_term': 'hep-ex', 'term': 'Experiment-HEP', }, ] result = clean_record(hepnames.do(create_record(snippet))) assert expected == result['field_categories']
def test_positions_from_371__a_m_r_z(): snippet = ( '<datafield tag="371" ind1=" " ind2=" ">' ' <subfield code="a">Antwerp U.</subfield>' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="r">SENIOR</subfield>' ' <subfield code="z">Current</subfield>' '</datafield>' ) # record/997958 expected = [ { 'current': True, 'emails': [ '*****@*****.**', ], 'institution': { 'curated_relation': False, 'name': 'Antwerp U.', }, 'rank': 'SENIOR', '_rank': 'SENIOR', }, ] result = clean_record(hepnames.do(create_record(snippet))) assert expected == result['positions']
def test_advisors_from_701__a_g_i(): snippet = ( '<datafield tag="701" ind1=" " ind2=" ">' ' <subfield code="a">Rivelles, Victor O.</subfield>' ' <subfield code="g">PhD</subfield>' ' <subfield code="i">INSPIRE-00120420</subfield>' ' <subfield code="x">991627</subfield>' ' <subfield code="y">1</subfield>' '</datafield>' ) # record/1474091 expected = [ { 'name': 'Rivelles, Victor O.', 'degree_type': 'PhD', '_degree_type': 'PhD', 'record': { '$ref': 'http://localhost:5000/api/authors/991627', }, 'curated_relation': True }, ] result = clean_record(hepnames.do(create_record(snippet))) assert expected == result['advisors']
def test_positions_from_371__a_double_m_z(): snippet = ( '<datafield tag="371" ind1=" " ind2=" ">' ' <subfield code="a">Argonne</subfield>' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="z">current</subfield>' '</datafield>' ) # record/1408378 expected = [ { 'current': True, 'emails': [ '*****@*****.**', '*****@*****.**', ], 'institution': { 'curated_relation': False, 'name': 'Argonne', }, } ] result = clean_record(hepnames.do(create_record(snippet))) assert expected == result['positions']
def test_field_from_marcxml_650_with_two_a(): """Two 'a' subfields in one datafield. The first is an arXiv fieldcode an the second an INSPIRE category. """ snippet = ( '<record>' ' <datafield tag="650" ind1="1" ind2="7">' ' <subfield code="2">INSPIRE</subfield>' ' <subfield code="a">hep-ex</subfield>' ' <subfield code="a">Gravitation and Cosmology</subfield>' ' </datafield>' '</record>' ) expected = [ { '_scheme': 'INSPIRE', 'scheme': 'INSPIRE', '_term': 'hep-ex', 'term': 'Experiment-HEP', }, { '_scheme': 'INSPIRE', 'scheme': 'INSPIRE', '_term': 'Gravitation and Cosmology', 'term': 'Gravitation and Cosmology', }, ] result = clean_record(hepnames.do(create_record(snippet))) assert expected == result['field_categories']
def test_ids_from_double_035__a_9_with_kaken(): snippet = ( '<record>' ' <datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">BAI</subfield>' ' <subfield code="a">Toshio.Suzuki.2</subfield>' ' </datafield>' ' <datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="9">KAKEN</subfield>' ' <subfield code="a">70139070</subfield>' ' </datafield>' '</record>' ) # record/1474271/export/xme expected = [ { 'type': 'INSPIRE BAI', 'value': 'Toshio.Suzuki.2', }, { 'type': 'KAKEN', 'value': 'KAKEN-70139070', }, ] result = clean_record(hepnames.do(create_record(snippet))) assert expected == result['ids']
def test_dois_from_2472_a_2_and_247_a_2_9(): snippet = ('<record>' ' <datafield tag="024" ind1="7" ind2=" ">' ' <subfield code="2">DOI</subfield>' ' <subfield code="a">10.1103/PhysRevD.89.072002</subfield>' ' </datafield>' ' <datafield tag="024" ind1="7" ind2=" ">' ' <subfield code="2">DOI</subfield>' ' <subfield code="9">bibmatch</subfield>' ' <subfield code="a">10.1103/PhysRevD.91.019903</subfield>' ' </datafield>' '</record>') expected = [ { 'value': '10.1103/PhysRevD.89.072002', }, { 'source': 'bibmatch', 'value': '10.1103/PhysRevD.91.019903', }, ] result = clean_record(hep.do(create_record(snippet))) assert expected == result['dois']
def test_thesis_supervisors_from_701__a_double_u(): snippet = ( '<datafield tag="701" ind1=" " ind2=" ">' ' <subfield code="a">Mnich, Joachim</subfield>' ' <subfield code="u">DESY</subfield>' ' <subfield code="u">U. Hamburg (main)</subfield>' '</datafield>' ) # record/1462486 expected = [ { 'affiliations': [ { 'curated_relation': False, 'value': 'DESY', }, { 'curated_relation': False, 'value': 'U. Hamburg (main)', }, ], 'full_name': 'Mnich, Joachim', }, ] result = clean_record(hep.do(create_record(snippet))) assert expected == result['thesis_supervisors']
def test_institutions_from_110__double_a_z(): snippet = ('<datafield tag="110" ind1=" " ind2=" ">' ' <subfield code="a">Indiana U.</subfield>' ' <subfield code="a">NIST, Wash., D.C.</subfield>' ' <subfield code="z">902874</subfield>' ' <subfield code="z">903056</subfield>' '</datafield>') # record/1328021/export/xme expected = [ { 'curated_relation': True, 'name': 'Indiana U.', 'record': { '$ref': 'http://localhost:5000/api/institutions/902874', }, }, { 'curated_relation': True, 'name': 'NIST, Wash., D.C.', 'record': { '$ref': 'http://localhost:5000/api/institutions/903056', }, }, ] result = clean_record(jobs.do(create_record(snippet))) assert expected == result['institutions']
def test_superseded_institutions_from_110__x_z(): snippet = ('<datafield tag="110" ind1=" " ind2=" ">' ' <subfield code="a">University of Pittsburgh</subfield>' ' <subfield code="t">U. Pittsburgh</subfield>' ' <subfield code="u">U. Pittsburgh (main)</subfield>' ' <subfield code="x">Pittsburgh U., Dept. Phil.</subfield>' ' <subfield code="x">Pittsburgh U., Med. School</subfield>' ' <subfield code="z">908047</subfield>' ' <subfield code="z">905042</subfield>' '</datafield>') # record/1272953 expected = [ { 'curated_relation': True, 'name': 'Pittsburgh U., Dept. Phil.', 'record': { '$ref': 'http://localhost:5000/api/institutions/908047', }, 'relation_type': 'superseded', }, { 'curated_relation': True, 'name': 'Pittsburgh U., Med. School', 'record': { '$ref': 'http://localhost:5000/api/institutions/905042', }, 'relation_type': 'superseded', }, ] result = clean_record(institutions.do(create_record(snippet))) assert expected == result['related_institutes']
def test_dois_from_0247_a_2_and_0247_a_2_9(): snippet = ( '<record>' ' <datafield tag="024" ind1="7" ind2=" ">' ' <subfield code="2">DOI</subfield>' ' <subfield code="9">bibmatch</subfield>' ' <subfield code="a">10.1088/1475-7516/2015/03/044</subfield>' ' </datafield>' ' <datafield tag="024" ind1="7" ind2=" ">' ' <subfield code="2">DOI</subfield>' ' <subfield code="a">10.1088/1475-7516/2015/03/044</subfield>' ' </datafield>' '</record>' ) expected = [ { 'source': 'bibmatch', 'value': '10.1088/1475-7516/2015/03/044', }, { 'value': '10.1088/1475-7516/2015/03/044', }, ] result = clean_record(hep.do(create_record(snippet))) assert expected == result['dois']
def test_dois_from_2472_a_2_and_247_a_2_9(): snippet = ( '<record>' ' <datafield tag="024" ind1="7" ind2=" ">' ' <subfield code="2">DOI</subfield>' ' <subfield code="a">10.1103/PhysRevD.89.072002</subfield>' ' </datafield>' ' <datafield tag="024" ind1="7" ind2=" ">' ' <subfield code="2">DOI</subfield>' ' <subfield code="9">bibmatch</subfield>' ' <subfield code="a">10.1103/PhysRevD.91.019903</subfield>' ' </datafield>' '</record>' ) expected = [ { 'value': '10.1103/PhysRevD.89.072002', }, { 'source': 'bibmatch', 'value': '10.1103/PhysRevD.91.019903', }, ] result = clean_record(hep.do(create_record(snippet))) assert expected == result['dois']
def test_related_experiments_from_double_510__a_w_0(): snippet = ('<record>' ' <datafield tag="510" ind1=" " ind2=" ">' ' <subfield code="0">1108293</subfield>' ' <subfield code="a">XENON</subfield>' ' <subfield code="w">a</subfield>' ' </datafield>' ' <datafield tag="510" ind1=" " ind2=" ">' ' <subfield code="0">1386527</subfield>' ' <subfield code="a">XENON100</subfield>' ' <subfield code="w">a</subfield>' ' </datafield>' '</record>') # record/1386519 expected = [ { 'name': 'XENON', 'record': { '$ref': 'http://localhost:5000/api/experiments/1108293' }, 'relation': 'predecessor', 'curated_relation': True, }, { 'name': 'XENON100', 'record': { '$ref': 'http://localhost:5000/api/experiments/1386527' }, 'relation': 'predecessor', 'curated_relation': True, }, ] result = clean_record(experiments.do(create_record(snippet))) assert expected == result['related_experiments']
def test_description_from_multiple_520__a(): snippet = ( '<record>' ' <datafield tag="520" ind1=" " ind2=" ">' ' <subfield code="a">DAMA is an observatory for rare processes which develops and uses several low-background set-ups at the Gran Sasso National Laboratory of the I.N.F.N. (LNGS). The main experimental set-ups are: i) DAMA/NaI (about 100 kg of highly radiopure NaI(Tl)), which completed its data taking on July 2002</subfield>' ' </datafield>' ' <datafield tag="520" ind1=" " ind2=" ">' ' <subfield code="a">ii) DAMA/LXe (about 6.5 kg liquid Kr-free Xenon enriched either in 129Xe or in 136Xe)</subfield>' ' </datafield>' ' <datafield tag="520" ind1=" " ind2=" ">' ' <subfield code="a">iii) DAMA/R&D, devoted to tests on prototypes and to small scale experiments, mainly on the investigations of double beta decay modes in various isotopes. iv) the second generation DAMA/LIBRA set-up (about 250 kg highly radiopure NaI(Tl)) in operation since March 2003</subfield>' ' </datafield>' ' <datafield tag="520" ind1=" " ind2=" ">' ' <subfield code="a">v) the low background DAMA/Ge detector mainly devoted to sample measurements: in some measurements on rare processes the low-background Germanium detectors of the LNGS facility are also used. Moreover, a third generation R&D is in progress towards a possible 1 ton set-up, DAMA proposed in 1996. In particular, the DAMA/NaI and the DAMA/LIBRA set-ups have investigated the presence of Dark Matter particles in the galactic halo by exploiting the Dark Matter annual modulation signature.</subfield>' ' </datafield>' '</record>') # record/1110568 expected = [ 'DAMA is an observatory for rare processes which develops and uses several low-background set-ups at the Gran Sasso National Laboratory of the I.N.F.N. (LNGS). The main experimental set-ups are: i) DAMA/NaI (about 100 kg of highly radiopure NaI(Tl)), which completed its data taking on July 2002', 'ii) DAMA/LXe (about 6.5 kg liquid Kr-free Xenon enriched either in 129Xe or in 136Xe)', 'iii) DAMA/R&D, devoted to tests on prototypes and to small scale experiments, mainly on the investigations of double beta decay modes in various isotopes. iv) the second generation DAMA/LIBRA set-up (about 250 kg highly radiopure NaI(Tl)) in operation since March 2003', 'v) the low background DAMA/Ge detector mainly devoted to sample measurements: in some measurements on rare processes the low-background Germanium detectors of the LNGS facility are also used. Moreover, a third generation R&D is in progress towards a possible 1 ton set-up, DAMA proposed in 1996. In particular, the DAMA/NaI and the DAMA/LIBRA set-ups have investigated the presence of Dark Matter particles in the galactic halo by exploiting the Dark Matter annual modulation signature.', ] result = clean_record(experiments.do(create_record(snippet))) assert expected == result['description']
def test_experiment_names_and_affiliations_from_marcxml_multiple_119(): snippet = ('<record>' ' <datafield tag="119" ind1=" " ind2=" ">' ' <subfield code="a">LATTICE-UKQCD</subfield>' ' </datafield>' ' <datafield tag="119" ind1=" " ind2=" ">' ' <subfield code="u">Cambridge U.</subfield>' ' </datafield>' ' <datafield tag="119" ind1=" " ind2=" ">' ' <subfield code="u">Edinburgh U.</subfield>' ' <subfield code="z">902787</subfield>' ' </datafield>' ' <datafield tag="119" ind1=" " ind2=" ">' ' <subfield code="u">Swansea U.</subfield>' ' </datafield>' '</record>') # record/1228417 result = clean_record(experiments.do(create_record(snippet))) assert result['experiment_names'] == [{'title': 'LATTICE-UKQCD'}] assert result['affiliations'] == [{ 'curated_relation': False, 'name': 'Cambridge U.' }, { 'curated_relation': True, 'name': 'Edinburgh U.', 'record': { '$ref': 'http://localhost:5000/api/institutions/902787', }, }, { 'curated_relation': False, 'name': 'Swansea U.' }]
def test_ids_from_double_035__a_9(): snippet = ( '<record>' ' <datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="a">INSPIRE-00134135</subfield>' ' <subfield code="9">INSPIRE</subfield>' ' </datafield>' ' <datafield tag="035" ind1=" " ind2=" ">' ' <subfield code="a">H.Vogel.1</subfield>' ' <subfield code="9">BAI</subfield>' ' </datafield>' '</record>' ) expected = [ { 'type': 'INSPIRE ID', 'value': 'INSPIRE-00134135', }, { 'type': 'INSPIRE BAI', 'value': 'H.Vogel.1', }, ] result = clean_record(hepnames.do(create_record(snippet))) assert expected == result['ids']
def test_address_from_111__a_c_e_g_x_y_and_270__b(): snippet = ( '<record>' ' <datafield tag="111" ind1=" " ind2=" ">' ' <subfield code="a">2017 International Workshop on Baryon and Lepton Number Violation: From the Cosmos to the LHC</subfield>' ' <subfield code="c">Cleveland, Ohio, USA</subfield>' ' <subfield code="e">BLV 2017</subfield>' ' <subfield code="g">C17-05-15</subfield>' ' <subfield code="x">2017-05-15</subfield>' ' <subfield code="y">2017-05-18</subfield>' ' </datafield>' ' <datafield tag="270" ind1=" " ind2=" ">' ' <subfield code="b">Case Western Reserve University</subfield>' ' </datafield>' '</record>') # record/1353313 expected = [ { 'original_address': 'Cleveland, Ohio, USA', 'country_code': 'US', 'state': 'US-OH', }, { 'original_address': 'Case Western Reserve University', }, ] result = clean_record(conferences.do(create_record(snippet))) assert expected == result['address']
def test_dois_from_0247_a_2_and_0247_a_2_9(): snippet = ( '<record>' ' <datafield tag="024" ind1="7" ind2=" ">' ' <subfield code="2">DOI</subfield>' ' <subfield code="9">bibmatch</subfield>' ' <subfield code="a">10.1088/1475-7516/2015/03/044</subfield>' ' </datafield>' ' <datafield tag="024" ind1="7" ind2=" ">' ' <subfield code="2">DOI</subfield>' ' <subfield code="a">10.1088/1475-7516/2015/03/044</subfield>' ' </datafield>' '</record>') expected = [ { 'source': 'bibmatch', 'value': '10.1088/1475-7516/2015/03/044', }, { 'value': '10.1088/1475-7516/2015/03/044', }, ] result = clean_record(hep.do(create_record(snippet))) assert expected == result['dois']
def test_multiple_issn_from_marcxml_022(): """Test multiple ISSNs.""" snippet = ( '<record>' ' <datafield tag="022" ind1=" " ind2=" ">' ' <subfield code="a">2349-2716</subfield>' ' <subfield code="b">Online</subfield>' ' </datafield>' ' <datafield tag="022" ind1=" " ind2=" ">' ' <subfield code="a">2349-6088</subfield>' ' <subfield code="b">Print</subfield>' ' </datafield>' '</record>' ) expected = [ { 'medium': 'online', 'value': '2349-2716', }, { 'medium': 'print', 'value': '2349-6088', }, ] result = clean_record(journals.do(create_record(snippet))) assert expected == result['issn']
def test_hidden_notes_from_595__a_9_and_595__double_a_9(): snippet = ( '<record>' ' <datafield tag="595" ind1=" " ind2=" ">' ' <subfield code="9">SPIRES-HIDDEN</subfield>' ' <subfield code="a">Title changed from ALLCAPS</subfield>' ' </datafield>' ' <datafield tag="595" ind1=" " ind2=" ">' ' <subfield code="9">SPIRES-HIDDEN</subfield>' ' <subfield code="a">TeXtitle from script</subfield>' ' <subfield code="a">no affiliation (not clear pn the fulltext)</subfield>' ' </datafield>' '</record>' ) # record/109310 expected = [ { 'source': 'SPIRES-HIDDEN', 'value': 'Title changed from ALLCAPS', }, { 'source': 'SPIRES-HIDDEN', 'value': 'TeXtitle from script', }, { 'source': 'SPIRES-HIDDEN', 'value': 'no affiliation (not clear pn the fulltext)', }, ] result = clean_record(hep.do(create_record(snippet))) assert expected == result['hidden_notes']
def test_institution_from_double_110__a(): snippet = ( '<record>' ' <datafield tag="110" ind1=" " ind2=" ">' ' <subfield code="a">Coll. William and Mary</subfield>' ' </datafield>' ' <datafield tag="110" ind1=" " ind2=" ">' ' <subfield code="a">Jefferson Lab</subfield>' ' </datafield>' '</record>' ) # record/1427342 expected = [ { 'curated_relation': False, 'name': 'Coll. William and Mary', }, { 'curated_relation': False, 'name': 'Jefferson Lab', }, ] result = clean_record(jobs.do(create_record(snippet))) assert expected == result['institution']
def test_address_from_marcxml_371__a_double_b_c_d_e_g(): snippet = ('<datafield tag="371" ind1=" " ind2=" ">' ' <subfield code="a">Philosophenweg 16</subfield>' ' <subfield code="b">Altstadt</subfield>' ' <subfield code="b">Heidelberg</subfield>' ' <subfield code="c">Baden-Wuerttemberg</subfield>' ' <subfield code="d">Germany</subfield>' ' <subfield code="e">69120</subfield>' ' <subfield code="g">DE</subfield>' '</datafield>') expected = [ { 'city': 'Altstadt, Heidelberg', 'country': 'Germany', 'country_code': 'DE', 'state': 'Baden-Wuerttemberg', 'original_address': [ 'Philosophenweg 16', ], 'postal_code': '69120', }, ] result = clean_record(institutions.do(create_record(snippet))) assert expected == result['address']
def test_regions_from_043__a_corrects_misspellings(): snippet = '<datafield tag="043" ind1=" " ind2=" ">' ' <subfield code="a">United States</subfield>' "</datafield>" expected = ["North America"] result = clean_record(jobs.do(create_record(snippet))) assert expected == result["regions"]
def test_urls_from_marcxml_multiple_8564(): snippet = ( '<record>' ' <datafield tag="856" ind1="4" ind2="">' ' <subfield code="u">http://www.physics.unlv.edu/labastro/</subfield>' ' <subfield code="y">Conference web page</subfield>' ' </datafield>' ' <datafield tag="856" ind1="4" ind2="">' ' <subfield code="u">http://www.cern.ch/</subfield>' ' <subfield code="y">CERN web page</subfield>' ' </datafield>' '</record>' ) expected = [ { 'description': 'Conference web page', 'value': 'http://www.physics.unlv.edu/labastro/', }, { 'description': 'CERN web page', 'value': 'http://www.cern.ch/', }, ] result = clean_record(hep.do(create_record(snippet))) assert expected == result['urls']
def test_extra_words_from_410__decuple_g(): snippet = ( '<datafield tag="410" ind1=" " ind2=" ">' ' <subfield code="g">Institut Theoretische Physik,</subfield>' ' <subfield code="g">RWTH, Inst.</subfield>' ' <subfield code="g">institute A</subfield>' ' <subfield code="g">III. Physikalisches Institut, Technische Hochschule Aachen, Aachen, West</subfield>' ' <subfield code="g">physics</subfield>' ' <subfield code="g">52056</subfield>' ' <subfield code="g">D-52056</subfield>' ' <subfield code="g">DE-52056</subfield>' ' <subfield code="g">phys</subfield>' ' <subfield code="g">I. Physikalisches Institut</subfield>' '</datafield>' ) # record/902624 expected = [ 'Institut Theoretische Physik,', 'RWTH, Inst.', 'institute A', 'III. Physikalisches Institut, Technische Hochschule Aachen, Aachen, West', 'physics', '52056', 'D-52056', 'DE-52056', 'phys', 'I. Physikalisches Institut', ] result = clean_record(institutions.do(create_record(snippet))) assert expected == result['extra_words']
def test_regions_from_043__a(): snippet = '<datafield tag="043" ind1=" " ind2=" ">' ' <subfield code="a">Asia</subfield>' "</datafield>" expected = ["Asia"] result = clean_record(jobs.do(create_record(snippet))) assert expected == result["regions"]
def test_positions_from_371__a_m_r_z(): snippet = ( '<datafield tag="371" ind1=" " ind2=" ">' ' <subfield code="a">Antwerp U.</subfield>' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="r">SENIOR</subfield>' ' <subfield code="z">Current</subfield>' '</datafield>' ) # record/997958 expected = [ { 'curated_relation': False, 'email': '*****@*****.**', 'institution': { 'name': 'Antwerp U.', }, 'rank': 'SENIOR', '_rank': 'SENIOR', 'status': 'Current', }, ] result = clean_record(hepnames.do(create_record(snippet))) assert expected == result['positions']
def test_contact_details_from_multiple_marcxml_270(): snippet = ( '<record> ' ' <datafield tag="270" ind1=" " ind2=" ">' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="p">Manfred Lindner</subfield>' ' </datafield>' ' <datafield tag="270" ind1=" " ind2=" ">' ' <subfield code="p">Wynton Marsalis</subfield>' ' </datafield>' '</record>' ) expected = [ { 'name': 'Manfred Lindner', 'email': '*****@*****.**', }, { 'name': 'Wynton Marsalis', }, ] result = clean_record(conferences.do(create_record(snippet))) assert expected == result['contact_details']
def test_spokespersons_from_702__a_i_z(): snippet = ( '<datafield tag="702" ind1=" " ind2=" ">' ' <subfield code="a">Hogan, Craig J.</subfield>' ' <subfield code="i">INSPIRE-00090662</subfield>' ' <subfield code="z">Current</subfield>' '</datafield>' ) # record/1108189 expected = [ { 'ids': [ { 'type': 'INSPIRE ID', 'value': 'INSPIRE-00090662', }, ], 'name': 'Hogan, Craig J.', 'current': True, 'curated_relation': False, }, ] result = clean_record(experiments.do(create_record(snippet))) assert expected == result['spokespersons']
def test_related_experiments_from_double_510__a_w_0(): snippet = ( '<record>' ' <datafield tag="510" ind1=" " ind2=" ">' ' <subfield code="0">1108293</subfield>' ' <subfield code="a">XENON</subfield>' ' <subfield code="w">a</subfield>' ' </datafield>' ' <datafield tag="510" ind1=" " ind2=" ">' ' <subfield code="0">1386527</subfield>' ' <subfield code="a">XENON100</subfield>' ' <subfield code="w">a</subfield>' ' </datafield>' '</record>' ) # record/1386519 expected = [ { 'name': 'XENON', 'record': {'$ref': 'http://localhost:5000/api/experiments/1108293'}, 'relation': 'predecessor', 'curated_relation': True, }, { 'name': 'XENON100', 'record': {'$ref': 'http://localhost:5000/api/experiments/1386527'}, 'relation': 'predecessor', 'curated_relation': True, }, ] result = clean_record(experiments.do(create_record(snippet))) assert expected == result['related_experiments']
def test_extra_words_from_410__decuple_g(): snippet = ( '<datafield tag="410" ind1=" " ind2=" ">' ' <subfield code="g">Institut Theoretische Physik,</subfield>' ' <subfield code="g">RWTH, Inst.</subfield>' ' <subfield code="g">institute A</subfield>' ' <subfield code="g">III. Physikalisches Institut, Technische Hochschule Aachen, Aachen, West</subfield>' ' <subfield code="g">physics</subfield>' ' <subfield code="g">52056</subfield>' ' <subfield code="g">D-52056</subfield>' ' <subfield code="g">DE-52056</subfield>' ' <subfield code="g">phys</subfield>' ' <subfield code="g">I. Physikalisches Institut</subfield>' '</datafield>') # record/902624 expected = [ 'Institut Theoretische Physik,', 'RWTH, Inst.', 'institute A', 'III. Physikalisches Institut, Technische Hochschule Aachen, Aachen, West', 'physics', '52056', 'D-52056', 'DE-52056', 'phys', 'I. Physikalisches Institut', ] result = clean_record(institutions.do(create_record(snippet))) assert expected == result['extra_words']
def test_address_from_multiple_marcxml__111_c(): snippet = ( '<record>' ' <datafield tag="111" ind1=" " ind2=" ">' ' <subfield code="c">Austin, Tex.</subfield>' ' </datafield>' ' <datafield tag="111" ind1=" " ind2=" ">' ' <subfield code="c">Den Haag, Nederlands</subfield>' ' </datafield>' '</record>' ) expected = [ { 'country_code': 'US', 'state': 'US-TX', 'original_address': 'Austin, Tex.' }, { 'country_code': 'NL', 'original_address': 'Den Haag, Nederlands' }, ] result = clean_record(conferences.do(create_record(snippet))) assert expected == result['address']
def test_address_from_marcxml_371__a_b_c_d_double_e_g(): snippet = ( '<datafield tag="371" ind1=" " ind2=" ">' ' <subfield code="a">Philosophenweg 16</subfield>' ' <subfield code="b">Heidelberg</subfield>' ' <subfield code="c">Baden-Wuerttemberg</subfield>' ' <subfield code="d">Germany</subfield>' ' <subfield code="e">69120</subfield>' ' <subfield code="e">DE-119</subfield>' ' <subfield code="g">DE</subfield>' '</datafield>' ) expected = [ { 'city': 'Heidelberg', 'country': 'Germany', 'country_code': 'DE', 'state': 'Baden-Wuerttemberg', 'original_address': [ 'Philosophenweg 16', ], 'postal_code': '69120, DE-119', } ] result = clean_record(institutions.do(create_record(snippet))) assert expected == result['address']
def test_address_from_marcxml_371__a_b_c_d_e_double_g(): snippet = ( '<datafield tag="371" ind1=" " ind2=" ">' ' <subfield code="a">Philosophenweg 16</subfield>' ' <subfield code="b">Heidelberg</subfield>' ' <subfield code="c">Baden-Wuerttemberg</subfield>' ' <subfield code="d">Germany</subfield>' ' <subfield code="e">69120</subfield>' ' <subfield code="g">DE</subfield>' ' <subfield code="g">DE</subfield>' '</datafield>' ) expected = [ { "city": "Heidelberg", "country": "Germany", "country_code": "DE", "state": "Baden-Wuerttemberg", "original_address": [ "Philosophenweg 16", ], "postal_code": "69120", }, ] result = clean_record(institutions.do(create_record(snippet))) assert expected == result['address']
def test_field_from_marcxml_650_with_single_a_and_9(): """Simple case. One arXiv fieldcode that will be mapped to an INSPIRE category. Source will also be mapped to a standard term. """ snippet = ( '<record>' ' <datafield tag="650" ind1="1" ind2="7">' ' <subfield code="2">INSPIRE</subfield>' ' <subfield code="a">HEP-PH</subfield>' ' <subfield code="9">automatically added based on DCC, PPF, DK </subfield>' ' </datafield>' '</record>' ) expected = [ { 'source': 'INSPIRE', '_scheme': 'INSPIRE', 'scheme': 'INSPIRE', '_term': 'HEP-PH', 'term': 'Phenomenology-HEP', }, ] result = clean_record(hepnames.do(create_record(snippet))) assert expected == result['field_categories']
def test_field_from_multiple_marcxml_650(): """Two datafields. Both are arXiv field codes, but the other is incorrectly labeled as INSPIRE. """ snippet = ( '<record>' ' <datafield tag="650" ind1="1" ind2="7">' ' <subfield code="2">arXiv</subfield>' ' <subfield code="a">HEP-PH</subfield>' ' </datafield>' ' <datafield tag="650" ind1="1" ind2="7">' ' <subfield code="2">INSPIRE</subfield>' ' <subfield code="a">astro-ph.IM</subfield>' ' </datafield>' '</record>' ) expected = [ { '_scheme': 'arXiv', 'scheme': 'INSPIRE', '_term': 'HEP-PH', 'term': 'Phenomenology-HEP', }, { '_scheme': 'INSPIRE', 'scheme': 'INSPIRE', '_term': 'astro-ph.IM', 'term': 'Instrumentation', }, ] result = clean_record(hepnames.do(create_record(snippet))) assert expected == result['field_categories']
def test_related_institutes_from__double_510_a_w_0_predecessor(): snippet = ('<record>' ' <datafield tag="510" ind1=" " ind2=" ">' ' <subfield code="0">903276</subfield>' ' <subfield code="a">INS, Tokyo</subfield>' ' <subfield code="w">a</subfield>' ' </datafield>' ' <datafield tag="510" ind1=" " ind2=" ">' ' <subfield code="0">905439</subfield>' ' <subfield code="a">U. Tokyo, Meson Sci. Lab.</subfield>' ' <subfield code="w">a</subfield>' ' </datafield>' '</record>') # record/902916 expected = [ { 'curated_relation': True, 'name': 'INS, Tokyo', 'relation_type': 'predecessor', 'record': { '$ref': 'http://localhost:5000/api/institutions/903276', }, }, { 'curated_relation': True, 'name': 'U. Tokyo, Meson Sci. Lab.', 'relation_type': 'predecessor', 'record': { '$ref': 'http://localhost:5000/api/institutions/905439', }, }, ] result = clean_record(institutions.do(create_record(snippet))) assert expected == result['related_institutes']
def test_related_institutes_from__double_510_a_w_0(): snippet = ('<record>' ' <datafield tag="510" ind1=" " ind2=" ">' ' <subfield code="0">1385404</subfield>' ' <subfield code="a">U. Caen (main)</subfield>' ' <subfield code="w">t</subfield>' ' </datafield>' ' <datafield tag="510" ind1=" " ind2=" ">' ' <subfield code="0">926589</subfield>' ' <subfield code="a">CNRS, France</subfield>' ' <subfield code="w">t</subfield>' ' </datafield>' '</record>') # record/1430106 expected = [ { 'curated_relation': True, 'name': 'U. Caen (main)', 'relation_type': 'parent', 'record': { '$ref': 'http://localhost:5000/api/institutions/1385404', }, }, { 'curated_relation': True, 'name': 'CNRS, France', 'relation_type': 'parent', 'record': { '$ref': 'http://localhost:5000/api/institutions/926589', }, }, ] result = clean_record(institutions.do(create_record(snippet))) assert expected == result['related_institutes']
def test_address_from_111__a_c_e_g_x_y_and_270__b(): snippet = ( '<record>' ' <datafield tag="111" ind1=" " ind2=" ">' ' <subfield code="a">2017 International Workshop on Baryon and Lepton Number Violation: From the Cosmos to the LHC</subfield>' ' <subfield code="c">Cleveland, Ohio, USA</subfield>' ' <subfield code="e">BLV 2017</subfield>' ' <subfield code="g">C17-05-15</subfield>' ' <subfield code="x">2017-05-15</subfield>' ' <subfield code="y">2017-05-18</subfield>' ' </datafield>' ' <datafield tag="270" ind1=" " ind2=" ">' ' <subfield code="b">Case Western Reserve University</subfield>' ' </datafield>' '</record>' ) # record/1353313 expected = [ { 'original_address': 'Cleveland, Ohio, USA', 'country_code': 'US', 'state': 'US-OH', }, { 'original_address': 'Case Western Reserve University', }, ] result = clean_record(conferences.do(create_record(snippet))) assert expected == result['address']
def test_description_from_multiple_520__a(): snippet = ( '<record>' ' <datafield tag="520" ind1=" " ind2=" ">' ' <subfield code="a">DAMA is an observatory for rare processes which develops and uses several low-background set-ups at the Gran Sasso National Laboratory of the I.N.F.N. (LNGS). The main experimental set-ups are: i) DAMA/NaI (about 100 kg of highly radiopure NaI(Tl)), which completed its data taking on July 2002</subfield>' ' </datafield>' ' <datafield tag="520" ind1=" " ind2=" ">' ' <subfield code="a">ii) DAMA/LXe (about 6.5 kg liquid Kr-free Xenon enriched either in 129Xe or in 136Xe)</subfield>' ' </datafield>' ' <datafield tag="520" ind1=" " ind2=" ">' ' <subfield code="a">iii) DAMA/R&D, devoted to tests on prototypes and to small scale experiments, mainly on the investigations of double beta decay modes in various isotopes. iv) the second generation DAMA/LIBRA set-up (about 250 kg highly radiopure NaI(Tl)) in operation since March 2003</subfield>' ' </datafield>' ' <datafield tag="520" ind1=" " ind2=" ">' ' <subfield code="a">v) the low background DAMA/Ge detector mainly devoted to sample measurements: in some measurements on rare processes the low-background Germanium detectors of the LNGS facility are also used. Moreover, a third generation R&D is in progress towards a possible 1 ton set-up, DAMA proposed in 1996. In particular, the DAMA/NaI and the DAMA/LIBRA set-ups have investigated the presence of Dark Matter particles in the galactic halo by exploiting the Dark Matter annual modulation signature.</subfield>' ' </datafield>' '</record>' ) # record/1110568 expected = [ 'DAMA is an observatory for rare processes which develops and uses several low-background set-ups at the Gran Sasso National Laboratory of the I.N.F.N. (LNGS). The main experimental set-ups are: i) DAMA/NaI (about 100 kg of highly radiopure NaI(Tl)), which completed its data taking on July 2002', 'ii) DAMA/LXe (about 6.5 kg liquid Kr-free Xenon enriched either in 129Xe or in 136Xe)', 'iii) DAMA/R&D, devoted to tests on prototypes and to small scale experiments, mainly on the investigations of double beta decay modes in various isotopes. iv) the second generation DAMA/LIBRA set-up (about 250 kg highly radiopure NaI(Tl)) in operation since March 2003', 'v) the low background DAMA/Ge detector mainly devoted to sample measurements: in some measurements on rare processes the low-background Germanium detectors of the LNGS facility are also used. Moreover, a third generation R&D is in progress towards a possible 1 ton set-up, DAMA proposed in 1996. In particular, the DAMA/NaI and the DAMA/LIBRA set-ups have investigated the presence of Dark Matter particles in the galactic halo by exploiting the Dark Matter annual modulation signature.', ] result = clean_record(experiments.do(create_record(snippet))) assert expected == result['description']
def test_no_location_from_034__f(): snippet = ('<datafield tag="034" ind1=" " ind2=" ">' ' <subfield code="f">50.7736</subfield>' '</datafield>') # synthetic data result = clean_record(institutions.do(create_record(snippet))) assert 'location' not in result
def test_core_from_690c_a_noncore(): snippet = ('<datafield tag="690" ind1="C" ind2=" ">' ' <subfield code=a">NONCORE</subfield>' '</datafield>') # record/916025 result = clean_record(institutions.do(create_record(snippet))) assert not result['core']
def test_date_closed_from_046__l(): snippet = ( '<datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="l">2008-02-11</subfield>' "</datafield>" ) # record/934304 expected = "2008-02-11" result = clean_record(jobs.do(create_record(snippet))) assert expected == result["date_closed"]
def test_timezone_from_043__t(): snippet = ('<datafield tag="043" ind1=" " ind2=" ">' ' <subfield code="t">+05</subfield>' '</datafield>') # record/902635 expected = ['+05'] result = clean_record(institutions.do(create_record(snippet))) assert expected == result['timezone']
def test_no_location_from_034__double_d(): snippet = ('<datafield tag="034" ind1=" " ind2=" ">' ' <subfield code="d">32.540776</subfield>' ' <subfield code="d">15.561010</subfield>' '</datafield>') # record/1442294 result = clean_record(institutions.do(create_record(snippet))) assert 'location' not in result
def test_non_public_notes_from_667__a(): snippet = ('<datafield tag="667" ind1=" " ind2=" ">' ' <subfield code="a">Former ICN = Negev U.</subfield>' '</datafield>') # record/902663 expected = ['Former ICN = Negev U.'] result = clean_record(institutions.do(create_record(snippet))) assert expected == result['non_public_notes']
def test_field_activity_from_372__a(): snippet = ('<datafield tag="372" ind1=" " ind2=" ">' ' <subfield code="a">Research center</subfield>' '</datafield>') expected = ['Research Center'] result = clean_record(institutions.do(create_record(snippet))) assert expected == result['field_activity']
def test_position_from_245__a(): snippet = ( '<datafield tag="245" ind1=" " ind2=" ">' ' <subfield code="a">Neutrino Physics</subfield>' "</datafield>" ) # record/1467312 expected = "Neutrino Physics" result = clean_record(jobs.do(create_record(snippet))) assert expected == result["position"]