示例#1
0
def convert_marcxml(source):
    """Convert MARC XML to JSON."""
    from dojson.contrib.marc21.utils import create_record, split_blob

    from inspirehep.dojson.utils import strip_empty_values
    from inspirehep.dojson.hep import hep
    from inspirehep.dojson.institutions import institutions
    from inspirehep.dojson.journals import journals
    from inspirehep.dojson.experiments import experiments
    from inspirehep.dojson.hepnames import hepnames
    from inspirehep.dojson.jobs import jobs
    from inspirehep.dojson.conferences import conferences

    for data in split_blob(source.read()):
        record = create_record(data)
        if _collection_in_record(record, 'institution'):
            yield strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            yield strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            yield strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            yield strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            yield strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            yield strip_empty_values(conferences.do(record))
        else:
            yield strip_empty_values(hep.do(record))
示例#2
0
def convert_marcxml(source):
    """Convert MARC XML to JSON."""
    from dojson.contrib.marc21.utils import create_record, split_blob

    from inspirehep.dojson.utils import strip_empty_values
    from inspirehep.dojson.hep import hep
    from inspirehep.dojson.institutions import institutions
    from inspirehep.dojson.journals import journals
    from inspirehep.dojson.experiments import experiments
    from inspirehep.dojson.hepnames import hepnames
    from inspirehep.dojson.jobs import jobs
    from inspirehep.dojson.conferences import conferences

    for data in split_blob(source.read()):
        record = create_record(data)
        if _collection_in_record(record, 'institution'):
            yield strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            yield strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            yield strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            yield strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            yield strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            yield strip_empty_values(conferences.do(record))
        else:
            yield strip_empty_values(hep.do(record))
def test_multiple_issn_from_marcxml_022():
    """Test multiple ISSNs."""
    snippet = (
        '<record>'
        '  <datafield tag="022" ind1=" " ind2=" ">'
        '    <subfield code="a">2349-2716</subfield>'
        '    <subfield code="b">Online</subfield>'
        '  </datafield>'
        '  <datafield tag="022" ind1=" " ind2=" ">'
        '    <subfield code="a">2349-6088</subfield>'
        '    <subfield code="b">Print</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'medium': 'online',
            'value': '2349-2716',
        },
        {
            'medium': 'print',
            'value': '2349-6088',
        },
    ]
    result = clean_record(journals.do(create_record(snippet)))

    assert expected == result['issn']
def test_coden_from_double_030__a_2():
    schema = load_schema('journals')
    subschema = schema['properties']['coden']

    snippet = (
        '<record>'
        '  <datafield tag="030" ind1=" " ind2=" ">'
        '    <subfield code="2">CODEN</subfield>'
        '    <subfield code="a">00686</subfield>'
        '  </datafield>'
        '  <datafield tag="030" ind1=" " ind2=" ">'
        '    <subfield code="2">CODEN</subfield>'
        '    <subfield code="a">VLUFB</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1213834

    expected = [
        '00686',
        'VLUFB',
    ]
    result = journals.do(create_record(snippet))

    assert validate(result['coden'], subschema) is None
    assert expected == result['coden']
def test_issn_from_double_022__a_b():
    schema = load_schema('journals')
    subschema = schema['properties']['issn']

    snippet = (
        '<record>'
        '  <datafield tag="022" ind1=" " ind2=" ">'
        '    <subfield code="a">1812-9471</subfield>'
        '    <subfield code="b">Print</subfield>'
        '  </datafield>'
        '  <datafield tag="022" ind1=" " ind2=" ">'
        '    <subfield code="a">1817-5805</subfield>'
        '    <subfield code="b">Online</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1513418

    expected = [
        {
            'medium': 'print',
            'value': '1812-9471',
        },
        {
            'medium': 'online',
            'value': '1817-5805',
        },
    ]
    result = journals.do(create_record(snippet))

    assert validate(result['issn'], subschema) is None
    assert expected == result['issn']
def test_publisher_from_643__b():
    snippet = ('<datafield tag="643" ind1=" " ind2=" ">'
               '  <subfield code="b">ANITA PUBLICATIONS, INDIA</subfield>'
               '</datafield>')  # record/1211888

    expected = [
        'ANITA PUBLICATIONS, INDIA',
    ]
    result = journals.do(create_record(snippet))

    assert expected == result['publisher']
def test_issn_from_marcxml_022_with_b_no_a():
    """Test ISSN in wrong subfield."""
    snippet = ('<record>'
               '  <datafield tag="022" ind1=" " ind2=" ">'
               '    <subfield code="b">9780486632827</subfield>'
               '  </datafield> '
               '</record>')

    result = journals.do(create_record(snippet))

    assert 'issn' not in result
示例#8
0
def create_record(data, force=False, dry_run=False):
    record = marc_create_record(data)
    recid = None
    if '001' in record:
        recid = int(record['001'][0])
    if not dry_run and recid:
        prod_record = InspireProdRecords(recid=recid)
        prod_record.marcxml = data
    try:
        if _collection_in_record(record, 'institution'):
            json = strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            json = strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            json = strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            json = strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            json = strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            json = strip_empty_values(conferences.do(record))
        else:
            json = strip_empty_values(hep.do(record))
        if dry_run:
            return recid, json

        if force and any(key in json for key in ('control_number', 'recid')):
            try:
                control_number = json['control_number']
            except KeyError:
                control_number = json['recid']
            control_number = int(control_number)
            # Searches if record already exists.
            record = Record.get_record(control_number)
            if record is None:
                # Adds the record to the db session.
                rec = RecordModel(id=control_number)
                db.session.merge(rec)
                record = Record.create(json)
            else:
                record = Record(json, model=record.model)
                record.commit()
            if recid:
                prod_record.successful = True
                db.session.merge(prod_record)
            logger.info("Elaborated record {}".format(control_number))
            return control_number, dict(record)
    except Exception:
        if recid:
            prod_record.successful = False
            db.session.merge(prod_record)
            logger.exception("Error in elaborating record ID {}".format(recid))
        raise
def test_coden_from_030__a_2():
    snippet = ('<datafield tag="030" ind1=" " ind2=" ">'
               '  <subfield code="2">CODEN</subfield>'
               '  <subfield code="a">HERAS</subfield>'
               '</datafield>')  # record/1211568

    expected = [
        'HERAS',
    ]
    result = journals.do(create_record(snippet))

    assert expected == result['coden']
def test_publisher_from_643__b():
    snippet = (
        '<datafield tag="643" ind1=" " ind2=" ">'
        '  <subfield code="b">ANITA PUBLICATIONS, INDIA</subfield>'
        '</datafield>'
    )  # record/1211888

    expected = [
        'ANITA PUBLICATIONS, INDIA',
    ]
    result = clean_record(journals.do(create_record(snippet)))

    assert expected == result['publisher']
def test_issn_from_marcxml_022_with_b_no_a():
    """Test ISSN in wrong subfield."""
    snippet = (
        '<record>'
        '  <datafield tag="022" ind1=" " ind2=" ">'
        '    <subfield code="b">9780486632827</subfield>'
        '  </datafield> '
        '</record>'
    )

    result = clean_record(journals.do(create_record(snippet)))

    assert 'issn' not in result
def test_coden_from_030__a_2():
    snippet = (
        '<datafield tag="030" ind1=" " ind2=" ">'
        '  <subfield code="2">CODEN</subfield>'
        '  <subfield code="a">HERAS</subfield>'
        '</datafield>'
    )  # record/1211568

    expected = [
        'HERAS',
    ]
    result = clean_record(journals.do(create_record(snippet)))

    assert expected == result['coden']
示例#13
0
def create_record(recid, record, force=False, dry_run=False, validation=False):
    """Create record from marc21 model."""
    errors = ""

    if _collection_in_record(record, 'institution'):
        json = strip_empty_values(institutions.do(record))
    elif _collection_in_record(record, 'experiment'):
        json = strip_empty_values(experiments.do(record))
    elif _collection_in_record(record, 'journals'):
        json = strip_empty_values(journals.do(record))
    elif _collection_in_record(record, 'hepnames'):
        json = strip_empty_values(hepnames.do(record))
    elif _collection_in_record(record, 'job') or \
            _collection_in_record(record, 'jobhidden'):
        json = strip_empty_values(jobs.do(record))
    elif _collection_in_record(record, 'conferences'):
        json = strip_empty_values(conferences.do(record))
    else:
        json = strip_empty_values(hep.do(record))

    if validation:
        try:
            validate(json)
        except ValidationError as err:
            errors = "ValidationError: Record {0}: {1}".format(recid, err)
            current_app.logger.warning(errors)

    if dry_run:
        return errors, recid, json

    if force and any(key in json for key in ('control_number', 'recid')):
        try:
            control_number = json['control_number']
        except KeyError:
            control_number = json['recid']
        control_number = int(control_number)
        # Searches if record already exists.
        with db.session.begin_nested():
            record = Record.get_record(control_number)
            if record is None:
                # Adds the record to the db session.
                rec = RecordModel(id=control_number)
                db.session.merge(rec)
                record = Record.create(json)
            else:
                record = Record(json, model=record.model)
                record.commit()
        logger.info("Elaborated record {}".format(control_number))
        return errors, control_number, dict(record)
def test_publisher_from_643__b():
    schema = load_schema('journals')
    subschema = schema['properties']['publisher']

    snippet = (
        '<datafield tag="643" ind1=" " ind2=" ">'
        '  <subfield code="b">ANITA PUBLICATIONS, INDIA</subfield>'
        '</datafield>'
    )  # record/1211888

    expected = ['ANITA PUBLICATIONS, INDIA']
    result = journals.do(create_record(snippet))

    assert validate(result['publisher'], subschema) is None
    assert expected == result['publisher']
def test_short_titles_from_marcxml_711():
    snippet = ('<record>'
               '  <datafield tag="711" ind1=" " ind2=" ">'
               '    <subfield code="a">Phys.Rev.ST Accel.Beams</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'title': 'Phys.Rev.ST Accel.Beams',
        },
    ]
    result = journals.do(create_record(snippet))

    assert expected == result['short_titles']
def test_issn_from_022__a_b_electronic():
    snippet = ('<datafield tag="022" ind1=" " ind2=" ">'
               '  <subfield code="a">2469-9888</subfield>'
               '  <subfield code="b">electronic</subfield>'
               '</datafield>')  # record/1415879

    expected = [
        {
            'comment': 'electronic',
            'medium': 'online',
            'value': '2469-9888',
        },
    ]
    result = journals.do(create_record(snippet))

    assert expected == result['issn']
def test_title_variants_from_marcxml_730():
    snippet = (
        '<record>'
        '  <datafield tag="730" ind1=" " ind2=" ">'
        '    <subfield code="a">PHYSICAL REVIEW SPECIAL TOPICS ACCELERATORS AND BEAMS</subfield>'
        '  </datafield>'
        '</record>')

    expected = [
        {
            'title': 'PHYSICAL REVIEW SPECIAL TOPICS ACCELERATORS AND BEAMS'
        },
    ]
    result = journals.do(create_record(snippet))

    assert expected == result['title_variants']
def test_titles_from_marcxml_130_with_single_a():
    snippet = (
        '<record>'
        '  <datafield tag="130" ind1=" " ind2=" ">'
        '    <subfield code="a">Physical Review Special Topics - Accelerators and Beams</subfield>'
        '  </datafield>'
        '</record>')

    expected = [
        {
            'title': 'Physical Review Special Topics - Accelerators and Beams',
        },
    ]
    result = journals.do(create_record(snippet))

    assert expected == result['titles']
def test_issn_from_marcxml_022_with_a():
    """Test simple ISSN without medium."""
    snippet = ('<record>'
               '  <datafield tag="022" ind1=" " ind2=" ">'
               '    <subfield code="a">2213-1337</subfield>'
               '  </datafield> '
               '</record>')

    expected = [
        {
            'value': '2213-1337',
        },
    ]
    result = journals.do(create_record(snippet))

    assert expected == result['issn']
def test_coden_from_030__a_2():
    schema = load_schema('journals')
    subschema = schema['properties']['coden']

    snippet = (
        '<datafield tag="030" ind1=" " ind2=" ">'
        '  <subfield code="2">CODEN</subfield>'
        '  <subfield code="a">HERAS</subfield>'
        '</datafield>'
    )  # record/1211568

    expected = ['HERAS']
    result = journals.do(create_record(snippet))

    assert validate(result['coden'], subschema) is None
    assert expected == result['coden']
def test_publisher_from_double_643__b():
    snippet = ('<record>'
               '  <datafield tag="643" ind1=" " ind2=" ">'
               '    <subfield code="b">Elsevier</subfield>'
               '  </datafield>'
               '  <datafield tag="643" ind1=" " ind2=" ">'
               '    <subfield code="b">Science Press</subfield>'
               '  </datafield>'
               '</record>')  # record/1212635

    expected = [
        'Elsevier',
        'Science Press',
    ]
    result = journals.do(create_record(snippet))

    assert expected == result['publisher']
def test_title_variants_from_730__a():
    schema = load_schema('journals')
    subschema = schema['properties']['title_variants']

    snippet = (
        '<datafield tag="730" ind1=" " ind2=" ">'
        '  <subfield code="a">PHYSICAL REVIEW SPECIAL TOPICS ACCELERATORS AND BEAMS</subfield>'
        '</datafield>'
    )  # record/1212820

    expected = [
        {'title': 'PHYSICAL REVIEW SPECIAL TOPICS ACCELERATORS AND BEAMS'},
    ]
    result = journals.do(create_record(snippet))

    assert validate(result['title_variants'], subschema) is None
    assert expected == result['title_variants']
示例#23
0
def overdo_marc_dict(record):
    """Convert MARC Groupable Ordered Dict into JSON."""
    if _collection_in_record(record, 'institution'):
        return clean_record(institutions.do(record))
    elif _collection_in_record(record, 'experiment'):
        return clean_record(experiments.do(record))
    elif _collection_in_record(record, 'journals'):
        return clean_record(journals.do(record))
    elif _collection_in_record(record, 'hepnames'):
        return clean_record(hepnames.do(record))
    elif _collection_in_record(record, 'job') or \
            _collection_in_record(record, 'jobhidden'):
        return clean_record(jobs.do(record))
    elif _collection_in_record(record, 'conferences'):
        return clean_record(conferences.do(record))
    else:
        return clean_record(hep.do(record))
def test_titles_from_marcxml_130_with_single_a():
    snippet = (
        '<record>'
        '  <datafield tag="130" ind1=" " ind2=" ">'
        '    <subfield code="a">Physical Review Special Topics - Accelerators and Beams</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'title': 'Physical Review Special Topics - Accelerators and Beams',
        },
    ]
    result = clean_record(journals.do(create_record(snippet)))

    assert expected == result['titles']
def test_short_titles_from_711__a():
    schema = load_schema('journals')
    subschema = schema['properties']['short_titles']

    snippet = (
        '<datafield tag="711" ind1=" " ind2=" ">'
        '  <subfield code="a">Phys.Rev.ST Accel.Beams</subfield>'
        '</datafield>'
    )  # record/1212820

    expected = [
        {'title': 'Phys.Rev.ST Accel.Beams'},
    ]
    result = journals.do(create_record(snippet))

    assert validate(result['short_titles'], subschema) is None
    assert expected == result['short_titles']
def test_title_variants_from_marcxml_730():
    snippet = (
        '<record>'
        '  <datafield tag="730" ind1=" " ind2=" ">'
        '    <subfield code="a">PHYSICAL REVIEW SPECIAL TOPICS ACCELERATORS AND BEAMS</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'title': 'PHYSICAL REVIEW SPECIAL TOPICS ACCELERATORS AND BEAMS'
        },
    ]
    result = clean_record(journals.do(create_record(snippet)))

    assert expected == result['title_variants']
示例#27
0
def overdo_marc_dict(record):
    """Convert MARC Groupable Ordered Dict into JSON."""
    if _collection_in_record(record, 'institution'):
        return institutions.do(record)
    elif _collection_in_record(record, 'experiment'):
        return experiments.do(record)
    elif _collection_in_record(record, 'journals'):
        return journals.do(record)
    elif _collection_in_record(record, 'hepnames'):
        return hepnames.do(record)
    elif _collection_in_record(record, 'job') or \
            _collection_in_record(record, 'jobhidden'):
        return jobs.do(record)
    elif _collection_in_record(record, 'conferences'):
        return conferences.do(record)
    else:
        return hep.do(record)
def test_issn_from_022__a():
    schema = load_schema('journals')
    subschema = schema['properties']['issn']

    snippet = (
        '<datafield tag="022" ind1=" " ind2=" ">'
        '  <subfield code="a">2213-1337</subfield>'
        '</datafield> '
    )  # record/1445059

    expected = [
        {'value': '2213-1337'},
    ]
    result = journals.do(create_record(snippet))

    assert validate(result['issn'], subschema) is None
    assert expected == result['issn']
def test_journal_titles_from_130__a():
    schema = load_schema('journals')
    subschema = schema['properties']['journal_titles']

    snippet = (
        '<datafield tag="130" ind1=" " ind2=" ">'
        '  <subfield code="a">Physical Review Special Topics - Accelerators and Beams</subfield>'
        '</datafield>'
    )

    expected = [
        {'title': 'Physical Review Special Topics - Accelerators and Beams'},
    ]
    result = journals.do(create_record(snippet))

    assert validate(result['journal_titles'], subschema) is None
    assert expected == result['journal_titles']
def test_short_titles_from_marcxml_711():
    snippet = (
        '<record>'
        '  <datafield tag="711" ind1=" " ind2=" ">'
        '    <subfield code="a">Phys.Rev.ST Accel.Beams</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'title': 'Phys.Rev.ST Accel.Beams',
        },
    ]
    result = clean_record(journals.do(create_record(snippet)))

    assert expected == result['short_titles']
def test_issn_from_marcxml_022_with_a():
    """Test simple ISSN without medium."""
    snippet = (
        '<record>'
        '  <datafield tag="022" ind1=" " ind2=" ">'
        '    <subfield code="a">2213-1337</subfield>'
        '  </datafield> '
        '</record>'
    )

    expected = [
        {
            'value': '2213-1337',
        },
    ]
    result = clean_record(journals.do(create_record(snippet)))

    assert expected == result['issn']
def test_issn_from_022__a_b_electronic():
    snippet = (
        '<datafield tag="022" ind1=" " ind2=" ">'
        '  <subfield code="a">2469-9888</subfield>'
        '  <subfield code="b">electronic</subfield>'
        '</datafield>'
    )  # record/1415879

    expected = [
        {
            'comment': 'electronic',
            'medium': 'online',
            'value': '2469-9888',
        },
    ]
    result = clean_record(journals.do(create_record(snippet)))

    assert expected == result['issn']
def test_issn_from_marcxml_022_with_a_and_b():
    """Test ISSN with medium normalization."""
    snippet = ('<record>'
               '  <datafield tag="022" ind1=" " ind2=" ">'
               '    <subfield code="a">2213-1337</subfield>'
               '    <subfield code="b">Print</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'medium': 'print',
            'value': '2213-1337',
        },
    ]
    result = journals.do(create_record(snippet))

    assert expected == result['issn']
def test_titles_from_marcxml_130_with_a_and_b():
    snippet = (
        '<record>'
        '  <datafield tag="130" ind1=" " ind2=" ">'
        '    <subfield code="a">Humana Mente</subfield>'
        '    <subfield code="b">Journal of Philosophical Studies</subfield>'
        '  </datafield>'
        '</record>')

    expected = [
        {
            'title': 'Humana Mente',
            'subtitle': 'Journal of Philosophical Studies',
        },
    ]
    result = journals.do(create_record(snippet))

    assert expected == result['titles']
def test_publisher_from_double_643__b():
    snippet = (
        '<record>'
        '  <datafield tag="643" ind1=" " ind2=" ">'
        '    <subfield code="b">Elsevier</subfield>'
        '  </datafield>'
        '  <datafield tag="643" ind1=" " ind2=" ">'
        '    <subfield code="b">Science Press</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1212635

    expected = [
        'Elsevier',
        'Science Press',
    ]
    result = clean_record(journals.do(create_record(snippet)))

    assert expected == result['publisher']
def test_coden_from_double_030__a_2():
    snippet = ('<record>'
               '  <datafield tag="030" ind1=" " ind2=" ">'
               '    <subfield code="2">CODEN</subfield>'
               '    <subfield code="a">00686</subfield>'
               '  </datafield>'
               '  <datafield tag="030" ind1=" " ind2=" ">'
               '    <subfield code="2">CODEN</subfield>'
               '    <subfield code="a">VLUFB</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        '00686',
        'VLUFB',
    ]
    result = journals.do(create_record(snippet))

    assert expected == result['coden']
def test_titles_from_marcxml_130_with_a_and_b():
    snippet = (
        '<record>'
        '  <datafield tag="130" ind1=" " ind2=" ">'
        '    <subfield code="a">Humana Mente</subfield>'
        '    <subfield code="b">Journal of Philosophical Studies</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'title': 'Humana Mente',
            'subtitle': 'Journal of Philosophical Studies',
        },
    ]
    result = clean_record(journals.do(create_record(snippet)))

    assert expected == result['titles']
def test_issn_from_marcxml_022_with_a_and_b():
    """Test ISSN with medium normalization."""
    snippet = (
        '<record>'
        '  <datafield tag="022" ind1=" " ind2=" ">'
        '    <subfield code="a">2213-1337</subfield>'
        '    <subfield code="b">Print</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'medium': 'print',
            'value': '2213-1337',
        },
    ]
    result = clean_record(journals.do(create_record(snippet)))

    assert expected == result['issn']
def test_journal_titles_from_130__a_b():
    schema = load_schema('journals')
    subschema = schema['properties']['journal_titles']

    snippet = (
        '<datafield tag="130" ind1=" " ind2=" ">'
        '  <subfield code="a">Humana Mente</subfield>'
        '  <subfield code="b">Journal of Philosophical Studies</subfield>'
        '</datafield>'
    )

    expected = [
        {
            'title': 'Humana Mente',
            'subtitle': 'Journal of Philosophical Studies',
        },
    ]
    result = journals.do(create_record(snippet))

    assert validate(result['journal_titles'], subschema) is None
    assert expected == result['journal_titles']
def test_coden_from_double_030__a_2():
    snippet = (
        '<record>'
        '  <datafield tag="030" ind1=" " ind2=" ">'
        '    <subfield code="2">CODEN</subfield>'
        '    <subfield code="a">00686</subfield>'
        '  </datafield>'
        '  <datafield tag="030" ind1=" " ind2=" ">'
        '    <subfield code="2">CODEN</subfield>'
        '    <subfield code="a">VLUFB</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        '00686',
        'VLUFB',
    ]
    result = clean_record(journals.do(create_record(snippet)))

    assert expected == result['coden']
def test_issn_from_022__a_b_handles_electronic():
    schema = load_schema('journals')
    subschema = schema['properties']['issn']

    snippet = (
        '<datafield tag="022" ind1=" " ind2=" ">'
        '  <subfield code="a">2469-9888</subfield>'
        '  <subfield code="b">electronic</subfield>'
        '</datafield>'
    )  # record/1415879

    expected = [
        {
            'comment': 'electronic',
            'medium': 'online',
            'value': '2469-9888',
        },
    ]
    result = journals.do(create_record(snippet))

    assert validate(result['issn'], subschema) is None
    assert expected == result['issn']
def test_issn_from_marcxml_022_with_a_and_b_and_comment():
    """Test ISSN with medium normalization.

    The original 'b' value will be stored in 'comment'.
    """
    snippet = ('<record>'
               '  <datafield tag="022" ind1=" " ind2=" ">'
               '    <subfield code="a">2213-1337</subfield>'
               '    <subfield code="b">ebook</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'medium': 'online',
            'value': '2213-1337',
            'comment': 'ebook',
        },
    ]
    result = journals.do(create_record(snippet))

    assert expected == result['issn']
示例#43
0
def test_multiple_title_variants_from_marcxml_730():
    snippet = (
        '<record>'
        '  <datafield tag="730" ind1=" " ind2=" ">'
        '    <subfield code="a">PHYS REV SPECIAL TOPICS ACCELERATORS BEAMS</subfield>'
        '  </datafield>'
        '  <datafield tag="730" ind1=" " ind2=" ">'
        '    <subfield code="a">PHYSICS REVIEW ST ACCEL BEAMS</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'title': 'PHYS REV SPECIAL TOPICS ACCELERATORS BEAMS',
        },
        {
            'title': 'PHYSICS REVIEW ST ACCEL BEAMS',
        },
    ]
    result = strip_empty_values(journals.do(create_record(snippet)))

    assert expected == result['title_variants']
示例#44
0
def create_record(record, force=True, dry_run=False):
    """Create record from marc21 model."""
    errors = ""

    if _collection_in_record(record, 'institution'):
        json = strip_empty_values(institutions.do(record))
    elif _collection_in_record(record, 'experiment'):
        json = strip_empty_values(experiments.do(record))
    elif _collection_in_record(record, 'journals'):
        json = strip_empty_values(journals.do(record))
    elif _collection_in_record(record, 'hepnames'):
        json = strip_empty_values(hepnames.do(record))
    elif _collection_in_record(record, 'job') or \
            _collection_in_record(record, 'jobhidden'):
        json = strip_empty_values(jobs.do(record))
    elif _collection_in_record(record, 'conferences'):
        json = strip_empty_values(conferences.do(record))
    else:
        json = strip_empty_values(hep.do(record))

    if dry_run:
        return errors, json

    return json