Пример #1
0
 def test_legacy_export_marcxml(self):
     """Record - legacy export marxml."""
     # FIXME: use a better way to compare
     from invenio_record.models import Record
     from invenio.legacy.bibrecord import create_record, records_identical
     blob = '''
         <record>
           <controlfield tag="001">8</controlfield>
           <datafield tag="100" ind1=" " ind2=" ">
             <subfield code="a">Efstathiou, G P</subfield>
             <subfield code="u">Cambridge University</subfield>
           </datafield>
           <datafield tag="245" ind1=" " ind2=" ">
             <subfield code="a">Title</subfield>
             <subfield code="b">SubTitle</subfield>
           </datafield>
           <datafield tag="700" ind1=" " ind2=" ">
            <subfield code="a">Lasenby, A N</subfield>
           </datafield>
           <datafield tag="980" ind1=" " ind2=" ">
             <subfield code="a">Articles</subfield>
           </datafield>
         </record>
     '''
     rec = Record.create(blob, master_format='marc', namespace='testsuite')
     recstruct, _, _ = create_record(blob)
     json_recstruct, _, _ = create_record(rec.legacy_export_as_marc())
     self.assertTrue(records_identical(json_recstruct, recstruct,
                                       ignore_subfield_order=True))
Пример #2
0
    def setUp(self):
        self.marcxml = pkg_resources.resource_string('tests',
                                                     os.path.join(
                                                         'fixtures',
                                                         'test_hep_formats.xml')
                                                     )

        self.marcxml_publi_info = pkg_resources.resource_string('tests',
                                                                os.path.join(
                                                                    'fixtures',
                                                                    'test_hep_publi_info.xml')
                                                                )
        record = create_record(self.marcxml)

        record_publi_info = create_record(self.marcxml_publi_info)

        self.hep_record = hep.do(record)

        self.hep_record_publi_info = hep.do(record_publi_info)

        self.sample_cv_latex = {
            'author': 'G.~Aad',
            'title': "{\\bf ``\nSearch for supersymmetry in events containing a same-flavour opposite-sign dilepton pair, jets, and large missing transverse momentum in $\sqrt{s}=8$ TeV $pp$ collisions with the ATLAS detector\n''}",
            'publi_info': ['Eur.\ Phys.\ J.\ C {\\bf 75}, no. 7, 318 (2015)', '[Eur.\ Phys.\ J.\ C {\\bf 75}, no. 10, 463 (2015)]'],
            'url': cfg['CFG_SITE_URL'] + '/record/1351762',
            'date': 'Mar 11, 2015'
        }

        self.sample_cv_latex_publi_info = {
            'publi_info': ['Class.\\ Quant.\\ Grav.\\  {\\bf 15}, 2153 (1998)']
        }
Пример #3
0
    def test_image(self):
        """Test image model from XML into JSONi."""
        from dojson.contrib.marc21.utils import create_record
        from cds_dojson.marc21.models.image import (
            model as marc21
        )

        match = query_matcher(create_record(CDS_IMAGE))

        assert isinstance(match, marc21.__class__)

        blob = create_record(CDS_IMAGE)
        data = marc21.do(blob)

        # Check the control number (doJSON)
        assert data.get('control_number') == '1782445'

        # Check the parent album (CDSImage)
        assert data['album_parent'][0]['album_id'] == '2054964'

        # Check the imprint (CDSMarc21)
        assert data['imprint'][0]['place_of_publication'] == 'Geneva'

        # Check that no fields are missing their model
        assert marc21.missing(blob) == []
Пример #4
0
def test_identity_check():
    """Test image model from XML into JSON."""
    blob = create_record(CDS_VIDEO_PROJECT)
    data = marc21.do(blob)
    back_blob = to_marc21.do(data)
    assert blob == back_blob

    blob = create_record(CDS_VIDEO_CLIP)
    data = marc21.do(blob)
    back_blob = to_marc21.do(data)
    assert blob == back_blob
Пример #5
0
    def setUp(self):
        self.marcxml = pkg_resources.resource_string('tests',
                                                     os.path.join(
                                                         'fixtures',
                                                         'test_hep_formats.xml')
                                                     )

        self.marcxml_publi_info = pkg_resources.resource_string('tests',
                                                                os.path.join(
                                                                    'fixtures',
                                                                    'test_hep_publi_info.xml')
                                                                )
        record = create_record(self.marcxml)

        record_publi_info = create_record(self.marcxml_publi_info)

        self.hep_record = hep.do(record)

        self.hep_record_publi_info = hep.do(record_publi_info)

        self.latex_eu = Latex(self.hep_record, 'latex_eu')

        self.latex_us = Latex(self.hep_record, 'latex_us')

        self.latex_eu_publi_info = Latex(
            self.hep_record_publi_info, 'latex_eu')

        self.sample_latex_eu = {
            'citation_key': 'Aad:2015wqa',
            'author': 'G.~Aad',
            'title': '\nSearch for supersymmetry in events containing a same-flavour opposite-sign dilepton pair, jets, and large missing transverse momentum in $\sqrt{s}=8$ TeV pp collisions with the ATLAS detector\n',
            'publi_info': ['Eur.\ Phys.\ J.\ C {\\bf 75} (2015) 7,  318', '[Eur.\ Phys.\ J.\ C {\\bf 75} (2015) 10,  463]'],
            'arxiv': 'arXiv:1503.03290 [hep-ex]',
            'report_number': '',
            'SLACcitation': '%%CITATION = ARXIV:1503.03290;%%',

        }

        self.sample_latex_publi_info = {
            'publi_info': ['Class.\\ Quant.\\ Grav.\\  {\\bf 15} (1998) 2153']
        }

        self.sample_latex_us = {
            'citation_key': 'Aad:2015wqa',
            'author': 'G.~Aad',
            'title': '\nSearch for supersymmetry in events containing a same-flavour opposite-sign dilepton pair, jets, and large missing transverse momentum in $\sqrt{s}=8$ TeV pp collisions with the ATLAS detector\n',
            'publi_info': ['Eur.\ Phys.\ J.\ C {\\bf 75}, no. 7, 318 (2015)', '[Eur.\ Phys.\ J.\ C {\\bf 75}, no. 10, 463 (2015)]'],
            'arxiv': 'arXiv:1503.03290 [hep-ex]',
            'report_number': '',
            'SLACcitation': '%%CITATION = ARXIV:1503.03290;%%',

        }
Пример #6
0
def test_marc21_856_indicators():
    """Test MARC21 856 field special indicator values."""
    from dojson.contrib.marc21 import marc21
    from dojson.contrib.marc21.utils import create_record
    from dojson.contrib.to_marc21 import to_marc21

    RECORD_8564 = '''
    <datafield tag="856" ind1="4" ind2=" ">
        <subfield code="s">272681</subfield>
        <subfield code="u">https://zenodo.org/record/17575/files/...</subfield>
        <subfield code="z">0</subfield>
    </datafield>
    '''
    RECORD_8567 = '''
    <datafield tag="856" ind1="7" ind2=" ">
        <subfield code="s">272681</subfield>
        <subfield code="u">https://zenodo.org/record/17575/files/...</subfield>
        <subfield code="z">0</subfield>
        <subfield code="2">Awesome access method</subfield>
    </datafield>
    '''

    expected_8564 = {
        'electronic_location_and_access': [
            {'public_note': ('0',),
             'access_method': 'HTTP',
             'uniform_resource_identifier': (
                 'https://zenodo.org/record/17575/files/...',),
             'file_size': ('272681',)}
        ]
    }
    expected_8567 = {
        'electronic_location_and_access': [
            {'public_note': ('0',),
             'access_method': 'Awesome access method',
             'uniform_resource_identifier': (
                 'https://zenodo.org/record/17575/files/...',),
             'file_size': ('272681',)}
        ]
    }

    blob = create_record(RECORD_8564)
    data = marc21.do(blob)
    assert expected_8564 == data
    back_blob = to_marc21.do(data)
    assert blob == back_blob

    blob = create_record(RECORD_8567)
    data = marc21.do(blob)
    assert expected_8567 == data
    back_blob = to_marc21.do(data)
    assert blob == back_blob
def test_urls_from_marcxml_multiple_8564():
    snippet = (
        '<record>'
        '  <datafield tag="856" ind1="4" ind2="">'
        '    <subfield code="u">http://www.physics.unlv.edu/labastro/</subfield>'
        '    <subfield code="y">Conference web page</subfield>'
        '  </datafield>'
        '  <datafield tag="856" ind1="4" ind2="">'
        '    <subfield code="u">http://www.cern.ch/</subfield>'
        '    <subfield code="y">CERN web page</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'description': 'Conference web page',
            'value': 'http://www.physics.unlv.edu/labastro/',
        },
        {
            'description': 'CERN web page',
            'value': 'http://www.cern.ch/',
        },
    ]
    result = clean_record(hep.do(create_record(snippet)))

    assert expected == result['urls']
def test_field_from_marcxml_650_with_two_2():
    """Two '2' subfields in one datafield.

    The first will be taken (this time it's correct).
    """
    snippet = (
        '<record>'
        '  <datafield tag="650" ind1="1" ind2="7">'
        '    <subfield code="2">arXiv</subfield>'
        '    <subfield code="2">INSPIRE</subfield>'
        '    <subfield code="a">hep-ex</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            '_scheme': 'arXiv',
            'scheme': 'INSPIRE',
            '_term': 'hep-ex',
            'term': 'Experiment-HEP',
        },
    ]
    result = clean_record(hepnames.do(create_record(snippet)))

    assert expected == result['field_categories']
Пример #9
0
def records():
    """Load records."""
    import pkg_resources
    import uuid
    from dojson.contrib.marc21 import marc21
    from dojson.contrib.marc21.utils import create_record, split_blob
    from invenio_pidstore import current_pidstore
    from invenio_records.api import Record

    # pkg resources the demodata
    data_path = pkg_resources.resource_filename(
        'invenio_records', 'data/marc21/bibliographic.xml'
    )
    with open(data_path) as source:
        indexer = RecordIndexer()
        with db.session.begin_nested():
            for index, data in enumerate(split_blob(source.read()), start=1):
                # create uuid
                rec_uuid = uuid.uuid4()
                # do translate
                record = marc21.do(create_record(data))
                # create PID
                current_pidstore.minters['recid_minter'](
                    rec_uuid, record
                )
                # create record
                indexer.index(Record.create(record, id_=rec_uuid))
        db.session.commit()
def test_field_from_multiple_marcxml_650():
    """Two datafields.

    Both are arXiv field codes, but the other is incorrectly labeled as INSPIRE.
    """
    snippet = (
        '<record>'
        '  <datafield tag="650" ind1="1" ind2="7">'
        '    <subfield code="2">arXiv</subfield>'
        '    <subfield code="a">HEP-PH</subfield>'
        '  </datafield>'
        '  <datafield tag="650" ind1="1" ind2="7">'
        '    <subfield code="2">INSPIRE</subfield>'
        '    <subfield code="a">astro-ph.IM</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            '_scheme': 'arXiv',
            'scheme': 'INSPIRE',
            '_term': 'HEP-PH',
            'term': 'Phenomenology-HEP',
        },
        {
            '_scheme': 'INSPIRE',
            'scheme': 'INSPIRE',
            '_term': 'astro-ph.IM',
            'term': 'Instrumentation',
        },
    ]
    result = clean_record(hepnames.do(create_record(snippet)))

    assert expected == result['field_categories']
def test_address_from_111__a_c_e_g_x_y_and_270__b():
    snippet = (
        '<record>'
        '  <datafield tag="111" ind1=" " ind2=" ">'
        '    <subfield code="a">2017 International Workshop on Baryon and Lepton Number Violation: From the Cosmos to the LHC</subfield>'
        '    <subfield code="c">Cleveland, Ohio, USA</subfield>'
        '    <subfield code="e">BLV 2017</subfield>'
        '    <subfield code="g">C17-05-15</subfield>'
        '    <subfield code="x">2017-05-15</subfield>'
        '    <subfield code="y">2017-05-18</subfield>'
        '  </datafield>'
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="b">Case Western Reserve University</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1353313

    expected = [
        {
            'original_address': 'Cleveland, Ohio, USA',
            'country_code': 'US',
            'state': 'US-OH',
        },
        {
            'original_address': 'Case Western Reserve University',
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['address']
def test_field_from_marcxml_650_with_single_a_and_9():
    """Simple case.

    One arXiv fieldcode that will be mapped to an INSPIRE category. Source
    will also be mapped to a standard term.
    """
    snippet = (
        '<record>'
        '  <datafield tag="650" ind1="1" ind2="7">'
        '    <subfield code="2">INSPIRE</subfield>'
        '    <subfield code="a">HEP-PH</subfield>'
        '    <subfield code="9">automatically added based on DCC, PPF, DK </subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'source': 'INSPIRE',
            '_scheme': 'INSPIRE',
            'scheme': 'INSPIRE',
            '_term': 'HEP-PH',
            'term': 'Phenomenology-HEP',
        },
    ]
    result = clean_record(hepnames.do(create_record(snippet)))

    assert expected == result['field_categories']
def test_address_from_multiple_marcxml__111_c():
    snippet = (
        '<record>'
        '  <datafield tag="111" ind1=" " ind2=" ">'
        '    <subfield code="c">Austin, Tex.</subfield>'
        '  </datafield>'
        '  <datafield tag="111" ind1=" " ind2=" ">'
        '    <subfield code="c">Den Haag, Nederlands</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'country_code': 'US',
            'state': 'US-TX',
            'original_address': 'Austin, Tex.'
        },
        {
            'country_code': 'NL',
            'original_address': 'Den Haag, Nederlands'
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['address']
Пример #14
0
def record_not_yet_deleted(app):
    snippet = (
        '<record>'
        '  <controlfield tag="001">333</controlfield>'
        '  <controlfield tag="005">20160913214552.0</controlfield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">HEP</subfield>'
        '  </datafield>'
        '</record>'
    )

    with app.app_context():
        json_record = hep.do(create_record(snippet))
        json_record['$schema'] = 'http://localhost:5000/schemas/records/hep.json'

        with db.session.begin_nested():
            record = record_upsert(json_record)
            if record:
                ri = RecordIndexer()
                ri.index(record)

        db.session.commit()

    yield

    with app.app_context():
        _delete_record_from_everywhere('literature', 333)
def test_contact_details_from_multiple_marcxml_270():
    snippet = (
        '<record> '
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="m">[email protected]</subfield>'
        '    <subfield code="p">Manfred Lindner</subfield>'
        '  </datafield>'
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="p">Wynton Marsalis</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'name': 'Manfred Lindner',
            'email': '*****@*****.**',
        },
        {
            'name': 'Wynton Marsalis',
        },
    ]
    result = clean_record(conferences.do(create_record(snippet)))

    assert expected == result['contact_details']
Пример #16
0
def test_languages_from_041__a_handles_multiple_languages_in_one_a():
    schema = load_schema('hep')
    subschema = schema['properties']['languages']

    snippet = (
        '<datafield tag="041" ind1=" " ind2=" ">'
        '  <subfield code="a">Russian / English</subfield>'
        '</datafield>'
    )  # record/116959

    expected = [
        'ru',
        'en',
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['languages'], subschema) is None
    assert expected == result['languages']

    expected = [
        {'a': 'russian'},
        {'a': 'english'},
    ]
    result = hep2marc.do(result)

    assert expected == result['041']
Пример #17
0
def test_languages_from_double_041__a():
    schema = load_schema('hep')
    subschema = schema['properties']['languages']

    snippet = (
        '<record>'
        '  <datafield tag="041" ind1=" " ind2=" ">'
        '    <subfield code="a">French</subfield>'
        '  </datafield>'
        '  <datafield tag="041" ind1=" " ind2=" ">'
        '    <subfield code="a">German</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1231408

    expected = [
        'fr',
        'de',
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['languages'], subschema) is None
    assert expected == result['languages']

    expected = [
        {'a': 'french'},
        {'a': 'german'},
    ]
    result = hep2marc.do(result)

    assert expected == result['041']
Пример #18
0
def test_report_numbers_from_037__z_9():
    schema = load_schema('hep')
    subschema = schema['properties']['report_numbers']

    snippet = (
        '<datafield tag="037" ind1=" " ind2=" ">'
        '  <subfield code="9">SLAC</subfield>'
        '  <subfield code="a">SLAC-PUB-16140</subfield>'
        '</datafield>'
    )  # record/1326454

    expected = [
        {
            'source': 'SLAC',
            'value': 'SLAC-PUB-16140',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['report_numbers'], subschema) is None
    assert expected == result['report_numbers']

    expected = [
        {
            '9': 'SLAC',
            'a': 'SLAC-PUB-16140',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['037']
Пример #19
0
def test_isbns_from_020__a_b_normalizes_online():
    schema = load_schema('hep')
    subschema = schema['properties']['isbns']

    snippet = (
        '<datafield tag="020" ind1=" " ind2=" ">'
        '  <subfield code="a">978-94-024-0999-4</subfield>'
        '  <subfield code="b">Online</subfield>'
        '</datafield>'
    )  # record/1504286

    expected = [
        {
            'value': '9789402409994',
            'medium': 'online',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['isbns'], subschema) is None
    assert expected == result['isbns']

    expected = [
        {
            'a': '9789402409994',
            'b': 'online',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['020']
Пример #20
0
def test_report_numbers_from_two_037__a():
    schema = load_schema('hep')
    subschema = schema['properties']['report_numbers']

    snippet = (
        '<record>'
        '  <datafield tag="037" ind1=" " ind2=" ">'
        '    <subfield code="a">UTPT-89-27</subfield>'
        '  </datafield>'
        '  <datafield tag="037" ind1=" " ind2=" ">'
        '    <subfield code="a">CALT-68-1585</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/26564

    expected = [
        {
            'value': 'UTPT-89-27',
        },
        {
            'value': 'CALT-68-1585',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['report_numbers'], subschema) is None
    assert expected == result['report_numbers']

    expected = [
        {'a': 'UTPT-89-27'},
        {'a': 'CALT-68-1585'},
    ]
    result = hep2marc.do(result)

    assert expected == result['037']
Пример #21
0
def test_report_numbers_hidden_from_037__z():
    schema = load_schema('hep')
    subschema = schema['properties']['report_numbers']

    snippet = (
        '<datafield tag="037" ind1=" " ind2=" ">'
        '  <subfield code="z">FERMILAB-PUB-17-011-CMS</subfield>'
        '</datafield>'
    )  # record/1508174

    expected = [
        {
            'hidden': True,
            'value': 'FERMILAB-PUB-17-011-CMS',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['report_numbers'], subschema) is None
    assert expected == result['report_numbers']

    expected = [
        {'z': 'FERMILAB-PUB-17-011-CMS'}
    ]
    result = hep2marc.do(result)

    assert expected == result['037']
Пример #22
0
def test_external_system_numbers_from_035__a_d_h_m_9():
    schema = load_schema('hep')
    subschema = schema['properties']['external_system_identifiers']

    snippet = (
        '<datafield tag="035" ind1=" " ind2=" ">'
        '  <subfield code="9">http://cds.cern.ch/oai2d</subfield>'
        '  <subfield code="a">oai:cds.cern.ch:325030</subfield>'
        '  <subfield code="d">2015-06-05T13:24:42Z</subfield>'
        '  <subfield code="h">2015-11-09T16:22:48Z</subfield>'
        '  <subfield code="m">marcxml</subfield>'
        '</datafield>'
    )  # record/1403324

    expected = [
        {
            'value': 'oai:cds.cern.ch:325030',
            'schema': 'http://cds.cern.ch/oai2d',
        }
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['external_system_identifiers'], subschema) is None
    assert expected == result['external_system_identifiers']

    expected = [
        {
            '9': 'http://cds.cern.ch/oai2d',
            'a': 'oai:cds.cern.ch:325030',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['035']
Пример #23
0
def test_texkeys_from_035__a_9():
    schema = load_schema('hep')
    subschema = schema['properties']['texkeys']

    snippet = (
        '<datafield tag="035" ind1=" " ind2=" ">'
        '  <subfield code="9">INSPIRETeX</subfield>'
        '  <subfield code="a">Hagedorn:1963hdh</subfield>'
        '</datafield>'
    )  # record/1403324

    expected = [
        'Hagedorn:1963hdh',
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['texkeys'], subschema) is None
    assert expected == result['texkeys']

    expected = [
        {
            '9': 'INSPIRETeX',
            'a': 'Hagedorn:1963hdh',
        }
    ]
    result = hep2marc.do(result)

    assert expected == result['035']
Пример #24
0
def test_dois_from_0247_a_2_double_9_ignores_curator_source():
    schema = load_schema('hep')
    subschema = schema['properties']['dois']

    snippet = (
        '<datafield tag="024" ind1="7" ind2=" ">'
        '  <subfield code="2">DOI</subfield>'
        '  <subfield code="9">bibcheck</subfield>'
        '  <subfield code="9">CURATOR</subfield>'
        '  <subfield code="a">10.1590/S1806-11172008005000006</subfield>'
        '</datafield>'
    )  # record/1117362

    expected = [
        {
            'source': 'bibcheck',
            'value': '10.1590/S1806-11172008005000006',
        },
    ]
    result = hep.do(create_record(snippet))  # no roundtrip

    assert validate(result['dois'], subschema) is None
    assert expected == result['dois']

    expected = [
        {
            'a': '10.1590/S1806-11172008005000006',
            '9': 'bibcheck',
            '2': 'DOI',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['0247']
Пример #25
0
def test_dois_from_0247_a_2():
    schema = load_schema('hep')
    subschema = schema['properties']['dois']

    snippet = (
        '<datafield tag="024" ind1="7" ind2=" ">'
        '  <subfield code="2">DOI</subfield>'
        '  <subfield code="a">10.1088/0264-9381/31/24/245004</subfield>'
        '</datafield>'
    )  # record/1302395

    expected = [
        {'value': '10.1088/0264-9381/31/24/245004'},
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['dois'], subschema) is None
    assert expected == result['dois']

    expected = [
        {
            'a': '10.1088/0264-9381/31/24/245004',
            '2': 'DOI',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['0247']
def test_field_from_marcxml_650_with_two_a():
    """Two 'a' subfields in one datafield.

    The first is an arXiv fieldcode an the second an INSPIRE category.
    """
    snippet = (
        '<record>'
        '  <datafield tag="650" ind1="1" ind2="7">'
        '    <subfield code="2">INSPIRE</subfield>'
        '    <subfield code="a">hep-ex</subfield>'
        '    <subfield code="a">Gravitation and Cosmology</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            '_scheme': 'INSPIRE',
            'scheme': 'INSPIRE',
            '_term': 'hep-ex',
            'term': 'Experiment-HEP',
        },
        {
            '_scheme': 'INSPIRE',
            'scheme': 'INSPIRE',
            '_term': 'Gravitation and Cosmology',
            'term': 'Gravitation and Cosmology',
        },
    ]
    result = clean_record(hepnames.do(create_record(snippet)))

    assert expected == result['field_categories']
Пример #27
0
def test_isbns_from_020__a_b_normalizes_hardcover():
    schema = load_schema('hep')
    subschema = schema['properties']['isbns']

    snippet = (
        '<datafield tag="020" ind1=" " ind2=" ">'
        '  <subfield code="a">978-981-4571-66-1</subfield>'
        '  <subfield code="b">hardcover</subfield>'
        '</datafield>'
    )  # record/1351311

    expected = [
        {
            'value': '9789814571661',
            'medium': 'hardcover',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['isbns'], subschema) is None
    assert expected == result['isbns']

    expected = [
        {
            'a': '9789814571661',
            'b': 'hardcover',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['020']
Пример #28
0
def test_arxiv_eprints_from_037__a_c_9():
    schema = load_schema('hep')
    subschema = schema['properties']['arxiv_eprints']

    snippet = (
        '<datafield tag="037" ind1=" " ind2=" ">'
        '  <subfield code="9">arXiv</subfield>'
        '  <subfield code="a">arXiv:1505.01843</subfield>'
        '  <subfield code="c">hep-ph</subfield>'
        '</datafield>'
    )  # record/1368891

    expected = [
        {
            'categories': [
                'hep-ph',
            ],
            'value': '1505.01843',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['arxiv_eprints'], subschema) is None
    assert expected == result['arxiv_eprints']

    expected = [
        {
            '9': 'arXiv',
            'a': 'arXiv:1505.01843',
            'c': 'hep-ph',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['037']
Пример #29
0
def test_external_system_identifiers_from_035__z_9_handles_cernkey():
    schema = load_schema('hep')
    subschema = schema['properties']['external_system_identifiers']

    snippet = (
        '<datafield tag="035" ind1=" " ind2=" ">'
        '  <subfield code="9">CERNKEY</subfield>'
        '  <subfield code="z">0263439</subfield>'
        '</datafield>'
    )  # record/451647

    expected = [
        {
            'schema': 'CERNKEY',
            'value': '0263439',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['external_system_identifiers'], subschema) is None
    assert expected == result['external_system_identifiers']

    expected = [
        {
            '9': 'CERNKEY',
            'z': '0263439',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['035']
Пример #30
0
def test_harvesting_arxiv_workflow_accepted(
    mocked, db_only_app, record_oai_arxiv_plots):
    """Test a full harvesting workflow."""
    from invenio_workflows import (
        start, WorkflowEngine, ObjectStatus, workflow_object_class
    )
    from dojson.contrib.marc21.utils import create_record
    from invenio_db import db
    from inspirehep.dojson.hep import hep
    from inspirehep.modules.converter.xslt import convert

    # Convert to MARCXML, then dict, then HEP JSON
    record_oai_arxiv_plots_marcxml = convert(
        record_oai_arxiv_plots,
        "oaiarXiv2marcxml.xsl"
    )
    record_marc = create_record(record_oai_arxiv_plots_marcxml)
    record_json = hep.do(record_marc)
    workflow_uuid = None
    with db_only_app.app_context():
        workflow_uuid = start('article', [record_json])

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        assert obj.status == ObjectStatus.HALTED
        assert obj.data_type == "hep"

        # Files should have been attached (tarball + pdf)
        assert obj.files["1407.7587.pdf"]
        assert obj.files["1407.7587.tar.gz"]

        # A publication note should have been extracted
        pub_info = obj.data.get('publication_info')
        assert pub_info
        assert pub_info[0]
        assert pub_info[0].get('year') == "2014"
        assert pub_info[0].get('journal_title') == "J. Math. Phys."

        # This record should not have been touched yet
        assert "approved" not in obj.extra_data

        # Now let's resolve it as accepted and continue
        # FIXME Should be accept, but record validation prevents us.
        obj.remove_action()
        obj.extra_data["approved"] = True
        obj.extra_data["core"] = True
        obj.save()

        db.session.commit()

    with db_only_app.app_context():
        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]
        obj_id = obj.id
        obj.continue_workflow()

        obj = workflow_object_class.get(obj_id)
        # It was accepted
        assert obj.status == ObjectStatus.COMPLETED
Пример #31
0
def test_issn_from_marcxml_022_with_a_and_b_and_comment():
    """Test ISSN with medium normalization.

    The original 'b' value will be stored in 'comment'.
    """
    snippet = ('<record>'
               '  <datafield tag="022" ind1=" " ind2=" ">'
               '    <subfield code="a">2213-1337</subfield>'
               '    <subfield code="b">ebook</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'medium': 'online',
            'value': '2213-1337',
            'comment': 'ebook',
        },
    ]
    result = journals.do(create_record(snippet))

    assert expected == result['issn']
Пример #32
0
def deleted_record(app):
    snippet = ('<record>'
               '  <controlfield tag="001">111</controlfield>'
               '  <datafield tag="245" ind1=" " ind2=" ">'
               '    <subfield code="a">deleted</subfield>'
               '  </datafield>'
               '  <datafield tag="980" ind1=" " ind2=" ">'
               '    <subfield code="a">HEP</subfield>'
               '    <subfield code="c">DELETED</subfield>'
               '  </datafield>'
               '</record>')

    record = hep.do(create_record(snippet))
    record['$schema'] = 'http://localhost:5000/schemas/records/hep.json'

    with db.session.begin_nested():
        record_insert_or_replace(record)
    db.session.commit()

    yield

    _delete_record('lit', 111)
Пример #33
0
def test_deleted_records_from_981__a():
    schema = load_schema('hep')
    subschema = schema['properties']['deleted_records']

    snippet = ('<datafield tag="981" ind1=" " ind2=" ">'
               '  <subfield code="a">1508668</subfield>'
               '</datafield>')  # record/1508886

    expected = [{'$ref': 'http://localhost:5000/api/literature/1508668'}]
    result = hep.do(create_record(snippet))

    assert validate(result['deleted_records'], subschema) is None
    assert expected == result['deleted_records']

    expected = [
        {
            'a': 1508668
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['981']
Пример #34
0
def test_thesis_info_defense_date_from_500__a_incomplete_human_date():
    schema = load_schema('hep')
    subschema = schema['properties']['thesis_info']

    snippet = ('<datafield tag="500" ind1=" " ind2=" ">'
               '  <subfield code="a">Presented on Dec 1992</subfield>'
               '</datafield>')  # record/887715

    expected = {'defense_date': '1992-12'}
    result = hep.do(create_record(snippet))

    assert validate(result['thesis_info'], subschema) is None
    expected == result['thesis_info']

    expected = [
        {
            'a': 'Presented on 1992-12'
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['500']
Пример #35
0
def test_thesis_info_defense_date_from_500__a():
    schema = load_schema('hep')
    subschema = schema['properties']['thesis_info']

    snippet = ('<datafield tag="500" ind1=" " ind2=" ">'
               '  <subfield code="a">Presented on 2016-09-30</subfield>'
               '</datafield>')  # record/1517362

    expected = {'defense_date': '2016-09-30'}
    result = hep.do(create_record(snippet))

    assert validate(result['thesis_info'], subschema) is None
    assert expected == result['thesis_info']

    expected = [
        {
            'a': 'Presented on 2016-09-30'
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['500']
Пример #36
0
def test_curated_from_500__a():
    schema = load_schema('hep')
    subschema = schema['properties']['curated']

    snippet = ('<datafield tag="500" ind1=" " ind2=" ">'
               '  <subfield code="a">* Brief entry *</subfield>'
               '</datafield>')  # record/1184775

    expected = False
    result = hep.do(create_record(snippet))

    assert validate(result['curated'], subschema) is None
    assert expected == result['curated']

    expected = [
        {
            'a': '* Brief entry *'
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['500']
Пример #37
0
def test_export_to_from_595__c_hal():
    schema = load_schema('hep')
    subschema = schema['properties']['_export_to']

    snippet = ('<datafield tag="595" ind1=" " ind2=" ">'
               '  <subfield code="c">HAL</subfield>'
               '</datafield>')  # record/1623281

    expected = {'HAL': True}
    result = hep.do(create_record(snippet))

    assert validate(result['_export_to'], subschema) is None
    assert expected == result['_export_to']

    expected = [
        {
            'c': 'HAL'
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['595']
Пример #38
0
def transform_harvested_records(sender=None, records=None, **kwargs):
    """Harvest records and transform them and send to the import queue.

    This function is called when the oaiharvester command is finished.

    :param sender: Sender of the signal.
    :param list records: Liste of records to harvest.
    """
    start_time = time.time()

    max_records = kwargs.get('max', None)

    if kwargs.get('name'):
        print('Harvesting records from "{set}"'.format(set=kwargs.get('name')))

    harvested_records = list(records)

    # Reduce array to max records
    if max_records:
        harvested_records = harvested_records[:int(max_records)]

    records = []

    for harvested_record in harvested_records:
        # Convert from Marc XML to JSON
        data = create_record(harvested_record.xml)

        # Transform JSON
        data = marc21tojson.do(data)

        # Add transformed data to list
        records.append(data)

    # Chunk record list and send celery task
    for chunk in list(chunks(records, CHUNK_SIZE)):
        import_records.delay(chunk)

    print('{count} records harvested in {time} seconds'.format(
        count=len(records), time=time.time() - start_time))
Пример #39
0
def test_experiments_from_693__e__0():
    schema = load_schema('jobs')
    subschema = schema['properties']['experiments']

    snippet = ('<datafield tag="693" ind1=" " ind2=" ">'
               '  <subfield code="e">CERN-LHC-ATLAS</subfield>'
               '  <subfield code="0">1108541</subfield>'
               '</datafield>')  # record/1332138

    expected = [
        {
            'curated_relation': True,
            'name': 'CERN-LHC-ATLAS',
            'record': {
                '$ref': 'http://localhost:5000/api/experiments/1108541',
            },
        },
    ]
    result = jobs.do(create_record(snippet))

    assert validate(result['experiments'], subschema) is None
    assert expected == result['experiments']
def test_marc21_to_provision_activity_ebooks_from_field_264_2():
    """Test provision activity Place and Date from field 264_2 transform."""
    marc21xml = """
    <record>
      <datafield tag="264" ind1=" " ind2="2">
        <subfield code="a">Lausanne :</subfield>
        <subfield code="b">Payot,</subfield>
        <subfield code="c">[2006-2010]</subfield>
      </datafield>
    </record>
    """
    marc21json = create_record(marc21xml)
    data = marc21.do(marc21json)
    assert data.get('provisionActivity') == [
        {
            'type': 'bf:Distribution',
            'statement': [
                {
                    'label': [
                        {'value': 'Lausanne'}
                    ],
                    'type': 'bf:Place'
                },
                {
                    'label': [
                        {'value': 'Payot'}
                    ],
                    'type': 'bf:Agent'
                },
                {
                    'label': [
                        {'value': '[2006-2010]'}
                    ],
                    'type': 'Date'
                }

            ]
        }
    ]
Пример #41
0
def test_stub_from_980__a_not_useful():
    schema = load_schema('authors')
    subschema = schema['properties']['stub']

    snippet = (
        '<datafield tag="980" ind1=" " ind2=" ">'
        '  <subfield code="a">HEPNAMES</subfield>'
        '</datafield>'
    )  # record/1019103

    expected = True
    result = hepnames.do(create_record(snippet))

    assert validate(result['stub'], subschema) is None
    assert expected == result['stub']

    expected = [
        {'a': 'HEPNAMES'},
    ]
    result = hepnames2marc.do(result)

    assert expected == result['980']
def test_marc21subjects():
    """Test dojson subjects."""

    unimarcxml = """
    <record>
      <datafield tag="600" ind1=" " ind2=" ">
        <subfield code="a">subjects 600</subfield>
      </datafield>
      <datafield tag="616" ind1=" " ind2=" ">
        <subfield code="a">Capet</subfield>
        <subfield code="b">Louis</subfield>
        <subfield code="c">Jr.</subfield>
        <subfield code="d">III</subfield>
        <subfield code="f">1700-1780</subfield>
      </datafield>
    </record>
    """
    unimarcjson = create_record(unimarcxml)
    data = unimarctojson.do(unimarcjson)
    assert data.get('subjects') == [
        'subjects 600', 'Capet, Louis III, Jr., 1700-1780'
    ]
Пример #43
0
def test_positions_from_371__a_r_t():
    schema = load_schema('authors')
    subschema = schema['properties']['positions']

    snippet = (
        '<datafield tag="371" ind1=" " ind2=" ">'
        '  <subfield code="a">Case Western Reserve U.</subfield>'
        '  <subfield code="r">UNDERGRADUATE</subfield>'
        '  <subfield code="t">2011</subfield>'
        '</datafield>'
    )  # record/1590188

    expected = [
        {
            '_rank': 'UNDERGRADUATE',
            'current': False,
            'end_date': '2011',
            'institution': {
                'curated_relation': False,
                'name': 'Case Western Reserve U.',
            },
            'rank': 'UNDERGRADUATE',
        },
    ]
    result = hepnames.do(create_record(snippet))

    assert validate(result['positions'], subschema) is None
    assert expected == result['positions']

    expected = [
        {
            'a': 'Case Western Reserve U.',
            'r': 'UG',
            't': '2011',
        },
    ]
    result = hepnames2marc.do(result)

    assert expected == result['371']
Пример #44
0
def test_urls_from_8564_u_and_8564_g_u_y():
    schema = load_schema('authors')
    subschema = schema['properties']['urls']

    snippet = (
        '<record>'
        '  <datafield tag="856" ind1="4" ind2=" ">'
        '    <subfield code="u">http://www.haydenplanetarium.org/tyson/</subfield>'
        '  </datafield>'
        '  <datafield tag="856" ind1="4" ind2=" ">'
        '    <subfield code="g">active</subfield>'
        '    <subfield code="u">https://twitter.com/neiltyson</subfield>'
        '    <subfield code="y">TWITTER</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1073331

    expected = [
        {'value': 'http://www.haydenplanetarium.org/tyson/'},
        {
            'description': 'TWITTER',
            'value': 'https://twitter.com/neiltyson',
        },
    ]
    result = hepnames.do(create_record(snippet))

    assert validate(result['urls'], subschema) is None
    assert expected == result['urls']

    expected = [
        {'u': 'http://www.haydenplanetarium.org/tyson/'},
        {
            'u': 'https://twitter.com/neiltyson',
            'y': 'TWITTER',
        },
    ]
    result = hepnames2marc.do(result)

    assert expected == result['8564']
Пример #45
0
def test_deleted_from_980__c():
    schema = load_schema('hep')
    subschema = schema['properties']['deleted']

    snippet = ('<datafield tag="980" ind1=" " ind2=" ">'
               '  <subfield code="c">DELETED</subfield>'
               '</datafield>')  # record/1508668/export/xme

    expected = True
    result = hep.do(create_record(snippet))

    assert validate(result['deleted'], subschema) is None
    assert expected == result['deleted']

    expected = [
        {
            'c': 'DELETED'
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['980']
def test_marc21_to_edition_statement_one_field_250():
    """Test dojson edition statement.
    - 1 edition designation and 1 responsibility from field 250
    """
    marc21xml = """
    <record>
      <datafield tag="250" ind1=" " ind2=" ">
        <subfield code="a">2e ed.</subfield>
        <subfield code="b">avec un avant-propos par Jean Faret</subfield>
      </datafield>
    </record>
    """
    marc21json = create_record(marc21xml)
    data = marc21.do(marc21json)
    assert data.get('editionStatement') == [{
        'editionDesignation': [{
            'value': '2e ed.'
        }],
        'responsibility': [{
            'value': 'avec un avant-propos par Jean Faret'
        }]
    }]
def test_old_single_email_from_371__a():
    schema = load_schema('authors')
    subschema = schema['properties']['positions']

    snippet = ('<datafield tag="371" ind1=" " ind2=" ">'
               '   <subfield code="a">IMSc, Chennai</subfield>'
               '   <subfield code="o">[email protected]</subfield>'
               '   <subfield code="r">PD</subfield>'
               '   <subfield code="s">2012</subfield>'
               '   <subfield code="t">2013</subfield>'
               '</datafield>')  # record/1060782

    expected = [{
        "current": False,
        "old_emails": ["*****@*****.**"],
        "end_date": "2013",
        "rank": "POSTDOC",
        "institution": {
            "name": "IMSc, Chennai",
            "curated_relation": False
        },
        "_rank": "PD",
        "start_date": "2012"
    }]
    result = hepnames.do(create_record(snippet))

    assert validate(result['positions'], subschema) is None
    assert expected == result['positions']

    expected = [{
        "a": "IMSc, Chennai",
        "o": ["*****@*****.**"],
        "s": "2012",
        "r": "PD",
        "t": "2013"
    }]
    result = hepnames2marc.do(result)

    assert expected == result['371']
def test_positions_from_371__a_m_r_z():
    schema = load_schema('authors')
    subschema = schema['properties']['positions']

    snippet = ('<datafield tag="371" ind1=" " ind2=" ">'
               '  <subfield code="a">Antwerp U.</subfield>'
               '  <subfield code="m">[email protected]</subfield>'
               '  <subfield code="r">SENIOR</subfield>'
               '  <subfield code="z">Current</subfield>'
               '</datafield>')  # record/997958

    expected = [
        {
            'current': True,
            'emails': [
                '*****@*****.**',
            ],
            'institution': {
                'curated_relation': False,
                'name': 'Antwerp U.',
            },
            'rank': 'SENIOR',
            '_rank': 'SENIOR',
        },
    ]
    result = hepnames.do(create_record(snippet))

    assert validate(result['positions'], subschema) is None
    assert expected == result['positions']

    expected = [{
        'a': 'Antwerp U.',
        'm': ['*****@*****.**'],
        'r': 'SENIOR',
        'z': 'Current'
    }]
    result = hepnames2marc.do(result)

    assert expected == result['371']
Пример #49
0
def test_historical_data_from_6781_multiple_a():
    schema = load_schema('institutions')
    subschema = schema['properties']['historical_data']

    snippet = (
        '<datafield tag="678" ind1="1" ind2=" ">'
        '  <subfield code="a">Conseil européen pour la Recherche Nucléaire (1952-1954)</subfield>'
        '  <subfield code="a">Organisation européenne pour la Recherche nucléaire (1954-now)</subfield>'
        '  <subfield code="a">Sub title: Laboratoire européen pour la Physique des Particules (1984-now)</subfield>'
        '  <subfield code="a">Sub title: European Laboratory for Particle Physics (1984-now)</subfield>'
        '</datafield>')  # record/902725

    expected = [
        u'Conseil européen pour la Recherche Nucléaire (1952-1954)',
        u'Organisation européenne pour la Recherche nucléaire (1954-now)',
        u'Sub title: Laboratoire européen pour la Physique des Particules (1984-now)',
        u'Sub title: European Laboratory for Particle Physics (1984-now)',
    ]
    result = institutions.do(create_record(snippet))

    assert validate(result['historical_data'], subschema) is None
    assert expected == result['historical_data']
Пример #50
0
def test_publication_info_from_7731_c_p_v_y():
    schema = load_schema('hep')
    subschema = schema['properties']['publication_info']

    snippet = ('<datafield tag="773" ind1="1" ind2=" ">'
               '  <subfield code="c">948-979</subfield>'
               '  <subfield code="p">Adv.Theor.Math.Phys.</subfield>'
               '  <subfield code="v">12</subfield>'
               '  <subfield code="y">2008</subfield>'
               '</datafield>')  # record/697133

    expected = [
        {
            'hidden': True,
            'journal_title': 'Adv.Theor.Math.Phys.',
            'journal_volume': '12',
            'page_end': '979',
            'page_start': '948',
            'year': 2008,
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['publication_info'], subschema) is None
    assert expected == result['publication_info']

    expected = [
        {
            'c': [
                '948-979',
            ],
            'p': 'Adv.Theor.Math.Phys.',
            'v': '12',
            'y': 2008,
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['7731']
def test_collaboration_from_710__g_0():
    schema = load_schema('experiments')
    subschema = schema['properties']['collaboration']

    snippet = (
        '<datafield tag="710" ind1=" " ind2=" ">'
        '  <subfield code="g">DarkSide</subfield>'
        '  <subfield code="0">1108199</subfield>'
        '</datafield>'
    )  # record/1108199

    expected = {
        'curated_relation': True,
        'record': {
            '$ref': 'http://localhost:5000/api/experiments/1108199',
        },
        'value': 'DarkSide',
    }
    result = experiments.do(create_record(snippet))

    assert validate(result['collaboration'], subschema) is None
    assert expected == result['collaboration']
def test_related_records_from_double_510__a_w_0_accepts_predecessors():
    schema = load_schema('experiments')
    subschema = schema['properties']['related_records']

    snippet = (
        '<record>'
        '  <datafield tag="510" ind1=" " ind2=" ">'
        '    <subfield code="0">1108293</subfield>'
        '    <subfield code="a">XENON</subfield>'
        '    <subfield code="w">a</subfield>'
        '  </datafield>'
        '  <datafield tag="510" ind1=" " ind2=" ">'
        '    <subfield code="0">1386527</subfield>'
        '    <subfield code="a">XENON100</subfield>'
        '    <subfield code="w">a</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1386519

    expected = [
        {
            'curated_relation': True,
            'record': {
                '$ref': 'http://localhost:5000/api/experiments/1108293',
            },
            'relation': 'predecessor',
        },
        {
            'curated_relation': True,
            'record': {
                '$ref': 'http://localhost:5000/api/experiments/1386527',
            },
            'relation': 'predecessor',
        },
    ]
    result = experiments.do(create_record(snippet))

    assert validate(result['related_records'], subschema) is None
    assert expected == result['related_records']
Пример #53
0
def test_address_from_111__a_double_c_d_e_g_x_y():
    schema = load_schema('conferences')
    subschema = schema['properties']['address']

    snippet = (
        '<datafield tag="111" ind1=" " ind2=" ">'
        '  <subfield code="a">16th High-Energy Physics International Conference in Quantum Chromodynamics</subfield>'
        '  <subfield code="c">QCD 12</subfield>'
        '  <subfield code="c">Montpellier, France</subfield>'
        '  <subfield code="d">2-7 Jul 2012</subfield>'
        '  <subfield code="e">QCD 12</subfield>'
        '  <subfield code="g">C12-07-02</subfield>'
        '  <subfield code="x">2012-07-02</subfield>'
        '  <subfield code="y">2012-07-07</subfield>'
        '</datafield>')  # record/1085463

    expected = [
        {
            'cities': [
                'QCD 12',
            ],
            'postal_address': [
                'QCD 12',
            ],
        },  # XXX: Wrong, but the best we can do.
        {
            'cities': [
                'Montpellier',
            ],
            'country_code': 'FR',
            'postal_address': [
                'Montpellier, France',
            ],
        },
    ]
    result = conferences.do(create_record(snippet))

    assert validate(result['address'], subschema) is None
    assert expected == result['address']
Пример #54
0
def test_texkeys_from_035__z_9_and_035__a_9():
    schema = load_schema('hep')
    subschema = schema['properties']['texkeys']

    snippet = (
        '<record>'
        '  <datafield tag="035" ind1=" " ind2=" ">'
        '    <subfield code="9">SPIRESTeX</subfield>'
        '    <subfield code="z">N.Cartiglia:2015cn</subfield>'
        '  </datafield>'
        '  <datafield tag="035" ind1=" " ind2=" ">'
        '    <subfield code="9">INSPIRETeX</subfield>'
        '    <subfield code="a">Akiba:2016ofq</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1498308

    expected = [
        'Akiba:2016ofq',       # XXX: the first one is the one coming
        'N.Cartiglia:2015cn',  # from the "a" field.
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['texkeys'], subschema) is None
    assert expected == result['texkeys']

    expected = [
        {
            '9': 'INSPIRETeX',
            'a': 'Akiba:2016ofq',
        },
        {
            '9': 'INSPIRETeX',
            'z': 'N.Cartiglia:2015cn',
        }
    ]
    result = hep2marc.do(result)

    assert expected == result['035']
Пример #55
0
def test_alternative_titles_from_711__a_b():
    schema = load_schema('conferences')
    subschema = schema['properties']['alternative_titles']

    snippet = (
        '<datafield tag="711" ind1=" " ind2=" ">'
        '  <subfield code="a">XX Riunione Nazionale di Elettromagnetismo</subfield>'
        '  <subfield code="b">Padova</subfield>'
        '</datafield>')  # record/1403856

    expected = [
        {
            'title': 'XX Riunione Nazionale di Elettromagnetismo'
        },
        {
            'title': 'Padova'
        },
    ]
    result = conferences.do(create_record(snippet))

    assert validate(result['alternative_titles'], subschema) is None
    assert expected == result['alternative_titles']
def test_unimarc_edition():
    """Test dojson edition statement.
    - 1 edition designation and 1 responsibility from field 205
    """
    unimarcxml = """
    <record>
      <datafield tag="205" ind1=" " ind2=" ">
        <subfield code="a">2e ed.</subfield>
        <subfield code="f">avec un avant-propos par Jean Faret</subfield>
      </datafield>
    </record>
    """
    unimarcjson = create_record(unimarcxml)
    data = unimarc.do(unimarcjson)
    assert data.get('editionStatement') == [{
        'editionDesignation': [{
            'value': '2e ed.'
        }],
        'responsibility': [{
            'value': 'avec un avant-propos par Jean Faret'
        }]
    }]
Пример #57
0
def test_private_notes_from_595__double_a_9():
    schema = load_schema('hep')
    subschema = schema['properties']['_private_notes']

    snippet = (
        '<datafield tag="595" ind1=" " ind2=" ">'
        '  <subfield code="9">SPIRES-HIDDEN</subfield>'
        '  <subfield code="a">TeXtitle from script</subfield>'
        '  <subfield code="a">no affiliation (not clear pn the fulltext)</subfield>'
        '</datafield>')  # record/109310

    expected = [
        {
            'source': 'SPIRES-HIDDEN',
            'value': 'TeXtitle from script',
        },
        {
            'source': 'SPIRES-HIDDEN',
            'value': 'no affiliation (not clear pn the fulltext)',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['_private_notes'], subschema) is None
    assert expected == result['_private_notes']

    expected = [
        {
            '9': 'SPIRES-HIDDEN',
            'a': 'TeXtitle from script',
        },
        {
            '9': 'SPIRES-HIDDEN',
            'a': 'no affiliation (not clear pn the fulltext)',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['595']
Пример #58
0
def test_multiple_title_variants_from_marcxml_730():
    snippet = (
        '<record>'
        '  <datafield tag="730" ind1=" " ind2=" ">'
        '    <subfield code="a">PHYS REV SPECIAL TOPICS ACCELERATORS BEAMS</subfield>'
        '  </datafield>'
        '  <datafield tag="730" ind1=" " ind2=" ">'
        '    <subfield code="a">PHYSICS REVIEW ST ACCEL BEAMS</subfield>'
        '  </datafield>'
        '</record>')

    expected = [
        {
            'title': 'PHYS REV SPECIAL TOPICS ACCELERATORS BEAMS',
        },
        {
            'title': 'PHYSICS REVIEW ST ACCEL BEAMS',
        },
    ]
    result = journals.do(create_record(snippet))

    assert expected == result['title_variants']
Пример #59
0
def test_public_notes_from_500__double_a_9():
    schema = load_schema('hep')
    subschema = schema['properties']['public_notes']

    snippet = (
        '<datafield tag="500" ind1=" " ind2=" ">'
        '  <subfield code="9">arXiv</subfield>'
        '  <subfield code="a">11 pages, 8 figures. Submitted to MNRAS</subfield>'
        '  <subfield code="a">preliminary entry</subfield>'
        '</datafield>')  # record/1380257

    expected = [
        {
            'source': 'arXiv',
            'value': '11 pages, 8 figures. Submitted to MNRAS',
        },
        {
            'source': 'arXiv',
            'value': 'preliminary entry',
        },
    ]
    result = hep.do(create_record(snippet))

    assert validate(result['public_notes'], subschema) is None
    assert expected == result['public_notes']

    expected = [
        {
            '9': 'arXiv',
            'a': '11 pages, 8 figures. Submitted to MNRAS',
        },
        {
            '9': 'arXiv',
            'a': 'preliminary entry',
        },
    ]
    result = hep2marc.do(result)

    assert expected == result['500']
Пример #60
0
def test_name_variants_from_410__double_a():
    schema = load_schema('institutions')
    subschema = schema['properties']['name_variants']

    snippet = (
        '<datafield tag="410" ind1=" " ind2=" ">'
        '  <subfield code="a">Theoretische Teilchenphysik und Kosmologie</subfield>'
        '  <subfield code="a">Elementarteilchenphysik</subfield>'
        '</datafield>')  # record/902624

    expected = [
        {
            'value': 'Theoretische Teilchenphysik und Kosmologie'
        },
        {
            'value': 'Elementarteilchenphysik'
        },
    ]
    result = institutions.do(create_record(snippet))

    assert validate(result['name_variants'], subschema) is None
    assert expected == result['name_variants']