Пример #1
0
def test_date_closed_from_046__i_and_046__l_an_email():
    schema = load_schema('jobs')
    subschema_deadline_date = schema['properties']['deadline_date']
    subschema_reference_email = schema['properties']['reference_email']

    snippet = (
        '<record>'
        '  <datafield tag="046" ind1=" " ind2=" ">'
        '    <subfield code="l">[email protected]</subfield>'
        '  </datafield>'
        '  <datafield tag="046" ind1=" " ind2=" ">'
        '    <subfield code="i">8888</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1089529

    expected_deadline_date = '8888'
    expected_reference_email = ['*****@*****.**']
    result = jobs.do(create_record(snippet))

    assert validate(result['deadline_date'], subschema_deadline_date) is None
    assert expected_deadline_date == result['deadline_date']

    assert validate(result['reference_email'], subschema_reference_email) is None
    assert expected_reference_email == result['reference_email']
Пример #2
0
def test_date_closed_from_046__i_and_046__l_an_url():
    schema = load_schema('jobs')
    subschema_deadline_date = schema['properties']['deadline_date']
    subschema_urls = schema['properties']['urls']

    snippet = (
        '<record>'
        '  <datafield tag="046" ind1=" " ind2=" ">'
        '    <subfield code="i">2012-06-01</subfield>'
        '  </datafield>'
        '  <datafield tag="046" ind1=" " ind2=" ">'
        '    <subfield code="l">http://www.pma.caltech.edu/physics-search</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/963314

    expected_deadline_date = '2012-06-01'
    expected_urls = [
        {'value': 'http://www.pma.caltech.edu/physics-search'},
    ]
    result = jobs.do(create_record(snippet))

    assert validate(result['deadline_date'], subschema_deadline_date) is None
    assert expected_deadline_date == result['deadline_date']

    assert validate(result['urls'], subschema_urls) is None
    assert expected_urls == result['urls']
Пример #3
0
def convert_marcxml(source):
    """Convert MARC XML to JSON."""
    from dojson.contrib.marc21.utils import create_record, split_blob

    from inspirehep.dojson.utils import strip_empty_values
    from inspirehep.dojson.hep import hep
    from inspirehep.dojson.institutions import institutions
    from inspirehep.dojson.journals import journals
    from inspirehep.dojson.experiments import experiments
    from inspirehep.dojson.hepnames import hepnames
    from inspirehep.dojson.jobs import jobs
    from inspirehep.dojson.conferences import conferences

    for data in split_blob(source.read()):
        record = create_record(data)
        if _collection_in_record(record, 'institution'):
            yield strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            yield strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            yield strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            yield strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            yield strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            yield strip_empty_values(conferences.do(record))
        else:
            yield strip_empty_values(hep.do(record))
Пример #4
0
def convert_marcxml(source):
    """Convert MARC XML to JSON."""
    from dojson.contrib.marc21.utils import create_record, split_blob

    from inspirehep.dojson.utils import strip_empty_values
    from inspirehep.dojson.hep import hep
    from inspirehep.dojson.institutions import institutions
    from inspirehep.dojson.journals import journals
    from inspirehep.dojson.experiments import experiments
    from inspirehep.dojson.hepnames import hepnames
    from inspirehep.dojson.jobs import jobs
    from inspirehep.dojson.conferences import conferences

    for data in split_blob(source.read()):
        record = create_record(data)
        if _collection_in_record(record, 'institution'):
            yield strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            yield strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            yield strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            yield strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            yield strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            yield strip_empty_values(conferences.do(record))
        else:
            yield strip_empty_values(hep.do(record))
Пример #5
0
def test_institutions_from_110__double_a_z():
    snippet = (
        '<datafield tag="110" ind1=" " ind2=" ">'
        '  <subfield code="a">Indiana U.</subfield>'
        '  <subfield code="a">NIST, Wash., D.C.</subfield>'
        '  <subfield code="z">902874</subfield>'
        '  <subfield code="z">903056</subfield>'
        '</datafield>'
    )  # record/1328021/export/xme

    expected = [
        {
            'curated_relation': True,
            'name': 'Indiana U.',
            'record': {
                '$ref': 'http://localhost:5000/api/institutions/902874',
            },
        },
        {
            'curated_relation': True,
            'name': 'NIST, Wash., D.C.',
            'record': {
                '$ref': 'http://localhost:5000/api/institutions/903056',
            },
        },
    ]
    result = jobs.do(create_record(snippet))

    assert expected == result['institutions']
Пример #6
0
def test_institutions_from_double_110__a():
    snippet = (
        '<record>'
        '  <datafield tag="110" ind1=" " ind2=" ">'
        '    <subfield code="a">Coll. William and Mary</subfield>'
        '  </datafield>'
        '  <datafield tag="110" ind1=" " ind2=" ">'
        '    <subfield code="a">Jefferson Lab</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1427342

    expected = [
        {
            'curated_relation': False,
            'name': 'Coll. William and Mary',
        },
        {
            'curated_relation': False,
            'name': 'Jefferson Lab',
        },
    ]
    result = jobs.do(create_record(snippet))

    assert expected == result['institutions']
Пример #7
0
def test_institutions_from_110__double_a_z():
    snippet = (
        '<datafield tag="110" ind1=" " ind2=" ">'
        '  <subfield code="a">Indiana U.</subfield>'
        '  <subfield code="a">NIST, Wash., D.C.</subfield>'
        '  <subfield code="z">902874</subfield>'
        '  <subfield code="z">903056</subfield>'
        "</datafield>"
    )  # record/1328021/export/xme

    expected = [
        {
            "curated_relation": True,
            "name": "Indiana U.",
            "record": {"$ref": "http://localhost:5000/api/institutions/902874"},
        },
        {
            "curated_relation": True,
            "name": "NIST, Wash., D.C.",
            "record": {"$ref": "http://localhost:5000/api/institutions/903056"},
        },
    ]
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result["institutions"]
Пример #8
0
def test_experiments_from_693__e__0_and_e():
    snippet = (
        '<record>'
        '  <datafield tag="693" ind1=" " ind2=" ">'
        '    <subfield code="e">CERN-LHC-ATLAS</subfield>'
        '    <subfield code="0">1108541</subfield>'
        '  </datafield>'
        '  <datafield tag="693" ind1=" " ind2=" ">'
        '    <subfield code="e">IHEP-CEPC</subfield>'
        '  </datafield>'
        '</record>'
    )  # record/1393583 /export/xme

    expected = [
        {
            'curated_relation': True,
            'name': 'CERN-LHC-ATLAS',
            'record': {
                '$ref': 'http://localhost:5000/api/experiments/1108541',
            },
        },
        {
            'curated_relation': False,
            'name': 'IHEP-CEPC'
        }
    ]
    result = jobs.do(create_record(snippet))

    assert expected == result['experiments']
Пример #9
0
def test_contact_details_from_multiple_marcxml_270():
    snippet = (
        '<record> '
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="m">[email protected]</subfield>'
        '    <subfield code="p">Manfred Lindner</subfield>'
        '  </datafield>'
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="p">Wynton Marsalis</subfield>'
        '  </datafield>'
        '</record>'
    )

    expected = [
        {
            'name': 'Manfred Lindner',
            'email': '*****@*****.**',
        },
        {
            'name': 'Wynton Marsalis',
        },
    ]
    result = jobs.do(create_record(snippet))

    assert expected == result['contact_details']
Пример #10
0
def test_regions_from_043__a_corrects_misspellings():
    snippet = '<datafield tag="043" ind1=" " ind2=" ">' '  <subfield code="a">United States</subfield>' "</datafield>"

    expected = ["North America"]
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result["regions"]
Пример #11
0
def test_institutions_from_110__double_a_z():
    snippet = ('<datafield tag="110" ind1=" " ind2=" ">'
               '  <subfield code="a">Indiana U.</subfield>'
               '  <subfield code="a">NIST, Wash., D.C.</subfield>'
               '  <subfield code="z">902874</subfield>'
               '  <subfield code="z">903056</subfield>'
               '</datafield>')  # record/1328021/export/xme

    expected = [
        {
            'curated_relation': True,
            'name': 'Indiana U.',
            'record': {
                '$ref': 'http://localhost:5000/api/institutions/902874',
            },
        },
        {
            'curated_relation': True,
            'name': 'NIST, Wash., D.C.',
            'record': {
                '$ref': 'http://localhost:5000/api/institutions/903056',
            },
        },
    ]
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result['institutions']
Пример #12
0
def test_regions_from_043__a():
    snippet = '<datafield tag="043" ind1=" " ind2=" ">' '  <subfield code="a">Asia</subfield>' "</datafield>"

    expected = ["Asia"]
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result["regions"]
Пример #13
0
def test_regions_from_043__a():
    snippet = ('<datafield tag="043" ind1=" " ind2=" ">'
               '  <subfield code="a">Asia</subfield>'
               '</datafield>')

    expected = ['Asia']
    result = jobs.do(create_record(snippet))

    assert expected == result['regions']
Пример #14
0
def test_position_from_245__a():
    snippet = ('<datafield tag="245" ind1=" " ind2=" ">'
               '  <subfield code="a">Neutrino Physics</subfield>'
               '</datafield>')  # record/1467312

    expected = 'Neutrino Physics'
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result['position']
Пример #15
0
def test_date_closed_from_046__l():
    snippet = (
        '<datafield tag="046" ind1=" " ind2=" ">' '  <subfield code="l">2008-02-11</subfield>' "</datafield>"
    )  # record/934304

    expected = "2008-02-11"
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result["date_closed"]
Пример #16
0
def test_position_from_245__a():
    snippet = (
        '<datafield tag="245" ind1=" " ind2=" ">' '  <subfield code="a">Neutrino Physics</subfield>' "</datafield>"
    )  # record/1467312

    expected = "Neutrino Physics"
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result["position"]
Пример #17
0
def test_date_closed_from_046__i():
    snippet = (
        '<datafield tag="046" ind1=" " ind2=" ">' '  <subfield code="i">2015-12-15</subfield>' "</datafield>"
    )  # record/1310294

    expected = "2015-12-15"
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result["deadline_date"]
Пример #18
0
def test_regions_from_043__a_splits_on_commas():
    snippet = (
        '<datafield tag="043" ind1=" " ind2=" ">' '  <subfield code="a">Asia, North America</subfield>' "</datafield>"
    )

    expected = ["Asia", "North America"]
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result["regions"]
Пример #19
0
def test_date_closed_from_046__l():
    snippet = ('<datafield tag="046" ind1=" " ind2=" ">'
               '  <subfield code="l">2008-02-11</subfield>'
               '</datafield>')  # record/934304

    expected = '2008-02-11'
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result['date_closed']
Пример #20
0
def test_regions_from_043__a_splits_on_commas():
    snippet = ('<datafield tag="043" ind1=" " ind2=" ">'
               '  <subfield code="a">Asia, North America</subfield>'
               '</datafield>')

    expected = ['Asia', 'North America']
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result['regions']
Пример #21
0
def test_regions_from_043__a_corrects_misspellings():
    snippet = ('<datafield tag="043" ind1=" " ind2=" ">'
               '  <subfield code="a">United States</subfield>'
               '</datafield>')

    expected = ['North America']
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result['regions']
Пример #22
0
def test_date_closed_from_046__i():
    snippet = ('<datafield tag="046" ind1=" " ind2=" ">'
               '  <subfield code="i">2015-12-15</subfield>'
               '</datafield>')  # record/1310294

    expected = '2015-12-15'
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result['deadline_date']
Пример #23
0
def test_description_from_520__a():
    snippet = (
        '<datafield tag="520" ind1=" " ind2=" ">'
        '  <subfield code="a">(1) Conduct independent research in string theory related theoretical sciences;&lt;br /> &lt;br /> (2) Advising graduate students in their research;&lt;br /> &lt;br /> (3) A very small amount of teaching of undergraduate courses.&amp;nbsp;</subfield>'
        '</datafield>')  # record/1239755

    expected = '(1) Conduct independent research in string theory related theoretical sciences;<br /> <br /> (2) Advising graduate students in their research;<br /> <br /> (3) A very small amount of teaching of undergraduate courses.&nbsp;'
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result['description']
Пример #24
0
def test_institutions_from_110__a():
    snippet = (
        '<datafield tag="110" ind1=" " ind2=" ">'
        '  <subfield code="a">Coll. William and Mary</subfield>'
        "</datafield>"
    )  # record/1427342

    expected = [{"curated_relation": False, "name": "Coll. William and Mary"}]
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result["institutions"]
Пример #25
0
def test_description_from_520__a():
    snippet = (
        '<datafield tag="520" ind1=" " ind2=" ">'
        '  <subfield code="a">(1) Conduct independent research in string theory related theoretical sciences;&lt;br /> &lt;br /> (2) Advising graduate students in their research;&lt;br /> &lt;br /> (3) A very small amount of teaching of undergraduate courses.&amp;nbsp;</subfield>'
        '</datafield>'
    )  # record/1239755

    expected = '(1) Conduct independent research in string theory related theoretical sciences;<br /> <br /> (2) Advising graduate students in their research;<br /> <br /> (3) A very small amount of teaching of undergraduate courses.&nbsp;'
    result = jobs.do(create_record(snippet))

    assert expected == result['description']
Пример #26
0
def test_date_closed_from_046__l():
    snippet = (
        '<datafield tag="046" ind1=" " ind2=" ">'
        '  <subfield code="l">2008-02-11</subfield>'
        '</datafield>'
    )  # record/934304

    expected = '2008-02-11'
    result = jobs.do(create_record(snippet))

    assert expected == result['date_closed']
Пример #27
0
def test_position_from_245__a():
    snippet = (
        '<datafield tag="245" ind1=" " ind2=" ">'
        '  <subfield code="a">Neutrino Physics</subfield>'
        '</datafield>'
    )  # record/1467312

    expected = 'Neutrino Physics'
    result = jobs.do(create_record(snippet))

    assert expected == result['position']
Пример #28
0
def test_regions_from_043__a_splits_on_commas():
    snippet = (
        '<datafield tag="043" ind1=" " ind2=" ">'
        '  <subfield code="a">Asia, North America</subfield>'
        '</datafield>'
    )

    expected = ['Asia', 'North America']
    result = jobs.do(create_record(snippet))

    assert expected == result['regions']
Пример #29
0
def test_regions_from_043__a():
    snippet = (
        '<datafield tag="043" ind1=" " ind2=" ">'
        '  <subfield code="a">Asia</subfield>'
        '</datafield>'
    )

    expected = ['Asia']
    result = jobs.do(create_record(snippet))

    assert expected == result['regions']
Пример #30
0
def create_record(data, force=False, dry_run=False):
    record = marc_create_record(data)
    recid = None
    if '001' in record:
        recid = int(record['001'][0])
    if not dry_run and recid:
        prod_record = InspireProdRecords(recid=recid)
        prod_record.marcxml = data
    try:
        if _collection_in_record(record, 'institution'):
            json = strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            json = strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            json = strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            json = strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            json = strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            json = strip_empty_values(conferences.do(record))
        else:
            json = strip_empty_values(hep.do(record))
        if dry_run:
            return recid, json

        if force and any(key in json for key in ('control_number', 'recid')):
            try:
                control_number = json['control_number']
            except KeyError:
                control_number = json['recid']
            control_number = int(control_number)
            # Searches if record already exists.
            record = Record.get_record(control_number)
            if record is None:
                # Adds the record to the db session.
                rec = RecordModel(id=control_number)
                db.session.merge(rec)
                record = Record.create(json)
            else:
                record = Record(json, model=record.model)
                record.commit()
            if recid:
                prod_record.successful = True
                db.session.merge(prod_record)
            logger.info("Elaborated record {}".format(control_number))
            return control_number, dict(record)
    except Exception:
        if recid:
            prod_record.successful = False
            db.session.merge(prod_record)
            logger.exception("Error in elaborating record ID {}".format(recid))
        raise
Пример #31
0
def test_regions_from_043__a_corrects_misspellings():
    snippet = (
        '<datafield tag="043" ind1=" " ind2=" ">'
        '  <subfield code="a">United States</subfield>'
        '</datafield>'
    )

    expected = ['North America']
    result = jobs.do(create_record(snippet))

    assert expected == result['regions']
Пример #32
0
def test_continent_from_043__a():
    snippet = (
        '<datafield tag="043" ind1=" " ind2=" ">'
        '  <subfield code="a">Asia</subfield>'
        '</datafield>'
    )

    expected = 'Asia'
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result['continent']
Пример #33
0
def test_ranks_from_marcxml_656_with_single_a():
    """Two ranks inside one record."""
    snippet = ('<record>'
               '  <datafield tag="656" ind1=" " ind2=" ">'
               '    <subfield code="a">Senior</subfield>'
               '  </datafield>'
               '</record>')

    result = clean_record(jobs.do(create_record(snippet)))

    assert result['_ranks'] == ['Senior']
    assert result['ranks'] == ['SENIOR']
Пример #34
0
def test_experiments_from_693__e():
    snippet = (
        '<datafield tag="693" ind1=" " ind2=" ">'
        '  <subfield code="e">CERN-LHC-ATLAS</subfield>'
        '</datafield>'
    )  # record/1471772

    expected = [
        'CERN-LHC-ATLAS',
    ]
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result['experiments']
Пример #35
0
def test_experiments_from_693__e():
    snippet = (
        "<record>"
        '  <datafield tag="693" ind1=" " ind2=" ">'
        '    <subfield code="e">ALIGO</subfield>'
        "  </datafield>"
        "</record>"
    )  # record/1375852

    expected = [{"curated_relation": False, "name": "ALIGO"}]
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result["experiments"]
Пример #36
0
def test_ranks_from_marcxml_656_with_single_a():
    """Two ranks inside one record."""
    snippet = (
        "<record>"
        '  <datafield tag="656" ind1=" " ind2=" ">'
        '    <subfield code="a">Senior</subfield>'
        "  </datafield>"
        "</record>"
    )

    result = clean_record(jobs.do(create_record(snippet)))

    assert result["_ranks"] == ["Senior"]
    assert result["ranks"] == ["SENIOR"]
Пример #37
0
def test_contact_details_from_marcxml_270_single_p_single_m():
    snippet = (
        "<record> "
        '  <datafield tag="270" ind1=" " ind2=" ">'
        '    <subfield code="m">[email protected]</subfield>'
        '    <subfield code="p">Manfred Lindner</subfield>'
        "  </datafield>"
        "</record>"
    )

    expected = [{"name": "Manfred Lindner", "email": "*****@*****.**"}]
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result["contact_details"]
Пример #38
0
def test_ranks_from_marcxml_656_with_single_a():
    """Two ranks inside one record."""
    snippet = (
        '<record>'
        '  <datafield tag="656" ind1=" " ind2=" ">'
        '    <subfield code="a">Senior</subfield>'
        '  </datafield>'
        '</record>'
    )

    result = jobs.do(create_record(snippet))

    assert result['_ranks'] == ['Senior']
    assert result['ranks'] == ['SENIOR']
Пример #39
0
def test_institutions_from_110__a():
    snippet = ('<datafield tag="110" ind1=" " ind2=" ">'
               '  <subfield code="a">Coll. William and Mary</subfield>'
               '</datafield>')  # record/1427342

    expected = [
        {
            'curated_relation': False,
            'name': 'Coll. William and Mary',
        },
    ]
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result['institutions']
Пример #40
0
def test_ranks_from_marcxml_656_with_single_a():
    schema = load_schema('jobs')
    subschema = schema['properties']['ranks']

    snippet = (
        '<datafield tag="656" ind1=" " ind2=" ">'
        '  <subfield code="a">Senior</subfield>'
        '</datafield>'
    )

    result = jobs.do(create_record(snippet))

    assert validate(result['ranks'], subschema) is None
    assert result['ranks'] == ['SENIOR']
Пример #41
0
def create_record(recid, record, force=False, dry_run=False, validation=False):
    """Create record from marc21 model."""
    errors = ""

    if _collection_in_record(record, 'institution'):
        json = strip_empty_values(institutions.do(record))
    elif _collection_in_record(record, 'experiment'):
        json = strip_empty_values(experiments.do(record))
    elif _collection_in_record(record, 'journals'):
        json = strip_empty_values(journals.do(record))
    elif _collection_in_record(record, 'hepnames'):
        json = strip_empty_values(hepnames.do(record))
    elif _collection_in_record(record, 'job') or \
            _collection_in_record(record, 'jobhidden'):
        json = strip_empty_values(jobs.do(record))
    elif _collection_in_record(record, 'conferences'):
        json = strip_empty_values(conferences.do(record))
    else:
        json = strip_empty_values(hep.do(record))

    if validation:
        try:
            validate(json)
        except ValidationError as err:
            errors = "ValidationError: Record {0}: {1}".format(recid, err)
            current_app.logger.warning(errors)

    if dry_run:
        return errors, recid, json

    if force and any(key in json for key in ('control_number', 'recid')):
        try:
            control_number = json['control_number']
        except KeyError:
            control_number = json['recid']
        control_number = int(control_number)
        # Searches if record already exists.
        with db.session.begin_nested():
            record = Record.get_record(control_number)
            if record is None:
                # Adds the record to the db session.
                rec = RecordModel(id=control_number)
                db.session.merge(rec)
                record = Record.create(json)
            else:
                record = Record(json, model=record.model)
                record.commit()
        logger.info("Elaborated record {}".format(control_number))
        return errors, control_number, dict(record)
Пример #42
0
def test_ranks_from_marcxml_double_656():
    """Two ranks inside one record."""
    snippet = ('<record>'
               '  <datafield tag="656" ind1=" " ind2=" ">'
               '    <subfield code="a">Senior</subfield>'
               '  </datafield>'
               '  <datafield tag="656" ind1=" " ind2=" ">'
               '    <subfield code="a">Junior</subfield>'
               '  </datafield>'
               '</record>')

    result = jobs.do(create_record(snippet))

    assert result['_ranks'] == ['Senior', 'Junior']
    assert result["ranks"] == ['SENIOR', 'JUNIOR']
Пример #43
0
def test_date_closed_from_046_i_l_an_email():
    snippet = ('<record>'
               '  <datafield tag="046" ind1=" " ind2=" ">'
               '    <subfield code="l">[email protected]</subfield>'
               '  </datafield>'
               '  <datafield tag="046" ind1=" " ind2=" ">'
               '    <subfield code="i">8888</subfield>'
               '  </datafield>'
               '</record>')  # record/1089529

    result = clean_record(jobs.do(create_record(snippet)))

    assert result['deadline_date'] == '8888'
    assert result['reference_email'] == [
        '*****@*****.**',
    ]
Пример #44
0
def test_experiments_from_693__e():
    snippet = ('<record>'
               '  <datafield tag="693" ind1=" " ind2=" ">'
               '    <subfield code="e">ALIGO</subfield>'
               '  </datafield>'
               '</record>')  # record/1375852

    expected = [
        {
            'curated_relation': False,
            'name': 'ALIGO',
        },
    ]
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result['experiments']
Пример #45
0
def test_contact_details_from_marcxml_270_single_p_single_m():
    snippet = ('<record> '
               '  <datafield tag="270" ind1=" " ind2=" ">'
               '    <subfield code="m">[email protected]</subfield>'
               '    <subfield code="p">Manfred Lindner</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'name': 'Manfred Lindner',
            'email': '*****@*****.**',
        },
    ]
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result['contact_details']
Пример #46
0
def overdo_marc_dict(record):
    """Convert MARC Groupable Ordered Dict into JSON."""
    if _collection_in_record(record, 'institution'):
        return institutions.do(record)
    elif _collection_in_record(record, 'experiment'):
        return experiments.do(record)
    elif _collection_in_record(record, 'journals'):
        return journals.do(record)
    elif _collection_in_record(record, 'hepnames'):
        return hepnames.do(record)
    elif _collection_in_record(record, 'job') or \
            _collection_in_record(record, 'jobhidden'):
        return jobs.do(record)
    elif _collection_in_record(record, 'conferences'):
        return conferences.do(record)
    else:
        return hep.do(record)
Пример #47
0
def test_contact_details_from_marcxml_270_double_p_single_m():
    """Two people having same e-mail address. We do not support it."""
    snippet = ('<record> '
               '  <datafield tag="270" ind1=" " ind2=" ">'
               '    <subfield code="m">[email protected]</subfield>'
               '    <subfield code="p">Manfred Lindner</subfield>'
               '    <subfield code="p">Boogeyman</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'email': '*****@*****.**',
        },
    ]
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result['contact_details']
Пример #48
0
def test_contact_details_from_marcxml_270_single_p_double_m():
    """One person having two e-mail addresses. We do not support it."""
    snippet = ('<record> '
               '  <datafield tag="270" ind1=" " ind2=" ">'
               '    <subfield code="m">[email protected]</subfield>'
               '    <subfield code="m">[email protected]</subfield>'
               '    <subfield code="p">Manfred Lindner</subfield>'
               '  </datafield>'
               '</record>')

    expected = [
        {
            'name': 'Manfred Lindner'
        },
    ]
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result['contact_details']
Пример #49
0
def test_date_closed_from_046__i_l_an_url():
    snippet = (
        '<record>'
        '  <datafield tag="046" ind1=" " ind2=" ">'
        '    <subfield code="i">2012-06-01</subfield>'
        '  </datafield>'
        '  <datafield tag="046" ind1=" " ind2=" ">'
        '    <subfield code="l">http://www.pma.caltech.edu/physics-search</subfield>'
        '  </datafield>'
        '</record>')  # record/963314

    result = clean_record(jobs.do(create_record(snippet)))

    assert result['deadline_date'] == '2012-06-01'
    assert result['urls'] == [
        {
            'value': 'http://www.pma.caltech.edu/physics-search',
        },
    ]
Пример #50
0
def test_experiments_from_693__e__0():
    snippet = ('<record>'
               '  <datafield tag="693" ind1=" " ind2=" ">'
               '    <subfield code="e">CERN-LHC-ATLAS</subfield>'
               '    <subfield code="0">1108541</subfield>'
               '  </datafield>'
               '</record>')  # record/1332138

    expected = [
        {
            'curated_relation': True,
            'name': 'CERN-LHC-ATLAS',
            'record': {
                '$ref': 'http://localhost:5000/api/experiments/1108541',
            },
        },
    ]
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result['experiments']
Пример #51
0
def test_experiments_from_triple_693__e__0():
    snippet = ('<record>'
               '  <datafield tag="693" ind1=" " ind2=" ">'
               '    <subfield code="e">CERN-NA-049</subfield>'
               '    <subfield code="0">1110308</subfield>'
               '  </datafield>'
               '  <datafield tag="693" ind1=" " ind2=" ">'
               '    <subfield code="e">CERN-NA-061</subfield>'
               '    <subfield code="0">1108234</subfield>'
               '  </datafield>'
               '  <datafield tag="693" ind1=" " ind2=" ">'
               '    <subfield code="e">CERN-LHC-ALICE</subfield>'
               '    <subfield code="0">1110642</subfield>'
               '  </datafield>'
               '</record>')  # record/1469159

    expected = [{
        'curated_relation': True,
        'name': 'CERN-NA-049',
        'record': {
            '$ref': 'http://localhost:5000/api/experiments/1110308',
        },
    }, {
        'curated_relation': True,
        'name': 'CERN-NA-061',
        'record': {
            '$ref': 'http://localhost:5000/api/experiments/1108234',
        },
    }, {
        'curated_relation': True,
        'name': 'CERN-LHC-ALICE',
        'record': {
            '$ref': 'http://localhost:5000/api/experiments/1110642',
        },
    }]
    result = clean_record(jobs.do(create_record(snippet)))

    assert expected == result['experiments']
Пример #52
0
def test_experiments_from_693__e__0_and_e():
    snippet = ('<record>'
               '  <datafield tag="693" ind1=" " ind2=" ">'
               '    <subfield code="e">CERN-LHC-ATLAS</subfield>'
               '    <subfield code="0">1108541</subfield>'
               '  </datafield>'
               '  <datafield tag="693" ind1=" " ind2=" ">'
               '    <subfield code="e">IHEP-CEPC</subfield>'
               '  </datafield>'
               '</record>')  # record/1393583 /export/xme

    expected = [{
        'curated_relation': True,
        'name': 'CERN-LHC-ATLAS',
        'record': {
            '$ref': 'http://localhost:5000/api/experiments/1108541',
        },
    }, {
        'curated_relation': False,
        'name': 'IHEP-CEPC'
    }]
    result = jobs.do(create_record(snippet))

    assert expected == result['experiments']
Пример #53
0
def create_record(record, force=True, dry_run=False):
    """Create record from marc21 model."""
    errors = ""

    if _collection_in_record(record, 'institution'):
        json = strip_empty_values(institutions.do(record))
    elif _collection_in_record(record, 'experiment'):
        json = strip_empty_values(experiments.do(record))
    elif _collection_in_record(record, 'journals'):
        json = strip_empty_values(journals.do(record))
    elif _collection_in_record(record, 'hepnames'):
        json = strip_empty_values(hepnames.do(record))
    elif _collection_in_record(record, 'job') or \
            _collection_in_record(record, 'jobhidden'):
        json = strip_empty_values(jobs.do(record))
    elif _collection_in_record(record, 'conferences'):
        json = strip_empty_values(conferences.do(record))
    else:
        json = strip_empty_values(hep.do(record))

    if dry_run:
        return errors, json

    return json