Exemplo n.º 1
0
def convert_marcxml(source):
    """Convert MARC XML to JSON."""
    from dojson.contrib.marc21.utils import create_record, split_blob

    from scoap3.dojson.conferences import conferences
    from scoap3.dojson.experiments import experiments
    from scoap3.dojson.hep import hep
    from scoap3.dojson.hepnames import hepnames
    from scoap3.dojson.institutions import institutions
    from scoap3.dojson.jobs import jobs
    from scoap3.dojson.journals import journals
    from scoap3.dojson.utils import strip_empty_values

    for data in split_blob(source.read()):
        record = create_record(data)
        if _collection_in_record(record, 'institution'):
            yield strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            yield strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            yield strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            yield strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            yield strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            yield strip_empty_values(conferences.do(record))
        else:
            yield strip_empty_values(hep.do(record))
Exemplo n.º 2
0
def convert_marcxml(source):
    """Convert MARC XML to JSON."""
    from dojson.contrib.marc21.utils import create_record, split_blob

    from inspirehep.dojson.utils import strip_empty_values
    from inspirehep.dojson.hep import hep
    from inspirehep.dojson.institutions import institutions
    from inspirehep.dojson.journals import journals
    from inspirehep.dojson.experiments import experiments
    from inspirehep.dojson.hepnames import hepnames
    from inspirehep.dojson.jobs import jobs
    from inspirehep.dojson.conferences import conferences

    for data in split_blob(source.read()):
        record = create_record(data)
        if _collection_in_record(record, 'institution'):
            yield strip_empty_values(institutions.do(record))
        elif _collection_in_record(record, 'experiment'):
            yield strip_empty_values(experiments.do(record))
        elif _collection_in_record(record, 'journals'):
            yield strip_empty_values(journals.do(record))
        elif _collection_in_record(record, 'hepnames'):
            yield strip_empty_values(hepnames.do(record))
        elif _collection_in_record(record, 'job') or \
                _collection_in_record(record, 'jobhidden'):
            yield strip_empty_values(jobs.do(record))
        elif _collection_in_record(record, 'conferences'):
            yield strip_empty_values(conferences.do(record))
        else:
            yield strip_empty_values(hep.do(record))
Exemplo n.º 3
0
def convert_marcxml(source):
    """Convert MARC XML to JSON."""
    from dojson.contrib.marc21 import marc21
    from dojson.contrib.marc21.utils import create_record, split_blob

    for data in split_blob(source.read()):
        yield marc21.do(create_record(data))
Exemplo n.º 4
0
def convert_marcxml(source):
    """Convert MARC XML to JSON."""
    from dojson.contrib.marc21 import marc21
    from dojson.contrib.marc21.utils import create_record, split_blob

    for data in split_blob(source.read()):
        yield marc21.do(create_record(data))
Exemplo n.º 5
0
def records():
    """Load records."""
    import pkg_resources
    from dojson.contrib.marc21 import marc21
    from dojson.contrib.marc21.utils import create_record, split_blob
    from flask_login import login_user, logout_user
    from invenio_accounts.models import User
    from invenio_deposit.api import Deposit

    users = User.query.all()

    # pkg resources the demodata
    data_path = pkg_resources.resource_filename(
        'invenio_records', 'data/marc21/bibliographic.xml')
    with open(data_path) as source:
        with current_app.test_request_context():
            indexer = RecordIndexer()
            with db.session.begin_nested():
                for index, data in enumerate(split_blob(source.read()),
                                             start=1):
                    login_user(users[index % len(users)])
                    # do translate
                    record = marc21.do(create_record(data))
                    # create record
                    indexer.index(Deposit.create(record))
                    logout_user()
            db.session.commit()
Exemplo n.º 6
0
 def test_records_created(self):
     """Record - demo file how many records are created."""
     xmltext = pkg_resources.resource_string(
         'invenio.testsuite',
         os.path.join('data', 'demo_record_marc_data.xml'))
     recs = [record for record in split_blob(xmltext)]
     assert len(recs) == 142
Exemplo n.º 7
0
def records():
    """Load records."""
    import pkg_resources
    import uuid
    from flask_login import login_user, logout_user
    from dojson.contrib.marc21 import marc21
    from dojson.contrib.marc21.utils import create_record, split_blob
    from invenio_accounts.models import User
    from invenio_deposit.api import Deposit

    users = User.query.all()

    # pkg resources the demodata
    data_path = pkg_resources.resource_filename(
        'invenio_records', 'data/marc21/bibliographic.xml'
    )
    with open(data_path) as source:
        with current_app.test_request_context():
            indexer = RecordIndexer()
            with db.session.begin_nested():
                for index, data in enumerate(split_blob(source.read()),
                                             start=1):
                    login_user(users[index % len(users)])
                    # do translate
                    record = marc21.do(create_record(data))
                    # create record
                    indexer.index(Deposit.create(record))
                    logout_user()
            db.session.commit()
Exemplo n.º 8
0
def cds(temp, source):
    """CDS demo records."""
    click.echo('Loading data it may take several minutes.')
    # pkg resources the demodata
    if not source:
        source = pkg_resources.resource_filename('cds.modules.fixtures',
                                                 'data/records.xml')
    files = _handle_source(source, temp)
    # Record indexer
    indexer = RecordIndexer()
    for f in files:
        with open(f) as source:
            # FIXME: Add some progress
            # with click.progressbar(data) as records:
            with db.session.begin_nested():
                for index, data in enumerate(split_blob(source.read()),
                                             start=1):
                    # create uuid
                    rec_uuid = uuid.uuid4()
                    # do translate
                    record = marc21.do(create_record(data))
                    # create PID
                    current_pidstore.minters['recid'](rec_uuid, record)
                    # create record
                    indexer.index(Record.create(record, id_=rec_uuid))
    db.session.commit()
    click.echo('DONE :)')
Exemplo n.º 9
0
def records():
    """Load records."""
    import pkg_resources
    import uuid
    from dojson.contrib.marc21 import marc21
    from dojson.contrib.marc21.utils import create_record, split_blob
    from invenio_pidstore import current_pidstore
    from invenio_records.api import Record

    # pkg resources the demodata
    data_path = pkg_resources.resource_filename(
        'invenio_records', 'data/marc21/bibliographic.xml'
    )
    with open(data_path) as source:
        indexer = RecordIndexer()
        with db.session.begin_nested():
            for index, data in enumerate(split_blob(source.read()), start=1):
                # create uuid
                rec_uuid = uuid.uuid4()
                # do translate
                record = marc21.do(create_record(data))
                # create PID
                current_pidstore.minters['recid_minter'](
                    rec_uuid, record
                )
                # create record
                indexer.index(Record.create(record, id_=rec_uuid))
        db.session.commit()
Exemplo n.º 10
0
 def test_records_created(self):
     """Record - demo file how many records are created."""
     xmltext = pkg_resources.resource_string(
         'invenio.testsuite',
         os.path.join('data', 'demo_record_marc_data.xml'))
     recs = [record for record in split_blob(xmltext)]
     assert len(recs) == 142
Exemplo n.º 11
0
def records():
    """Load records."""
    import pkg_resources
    import uuid
    from dojson.contrib.marc21 import marc21
    from dojson.contrib.marc21.utils import create_record, split_blob
    from invenio_pidstore import current_pidstore
    from invenio_records.api import Record

    # pkg resources the demodata
    data_path = pkg_resources.resource_filename(
        'invenio_records', 'data/marc21/bibliographic.xml')
    with open(data_path) as source:
        indexer = RecordIndexer()
        with db.session.begin_nested():
            for index, data in enumerate(split_blob(source.read()), start=1):
                # create uuid
                rec_uuid = uuid.uuid4()
                # do translate
                record = marc21.do(create_record(data))
                # create PID
                current_pidstore.minters['recid'](rec_uuid, record)
                # create record
                indexer.index(Record.create(record, id_=rec_uuid))
        db.session.commit()
Exemplo n.º 12
0
 def test_records_created(self):
     """Record - demo file how many records are created."""
     xmlpath = os.path.join(os.path.dirname(__file__), 'data',
                            'demo_record_marc_data.xml')
     with open(xmlpath, 'r') as xmltext:
         recs = [record for record in split_blob(xmltext.read())]
         assert len(recs) == 142
Exemplo n.º 13
0
def load(source):
    """Load MARC XML and return Python dict."""
    for data in split_blob(source.read()):
        record = create_record(data)
        # if record.get('999__', {}).get('a', '') == 'ALBUM':
        #     for rrecord in split_album(record):
        #         yield rrecord
        yield record
Exemplo n.º 14
0
def convert_marcxml(source):
    """Convert MARC XML to JSON."""
    from dojson.contrib.marc21.utils import create_record, split_blob

    #from .book import book
    from .book import book

    for data in split_blob(source.read()):
        record = create_record(data)
        #if _collection_in_record(record, 'book'):
        #    yield book.do(record)
        #else:
        #    yield default.do(record)
        yield book.do(record)
Exemplo n.º 15
0
def records():
    """Load records."""
    import pkg_resources
    from invenio_records.api import Record
    from dojson.contrib.marc21 import marc21
    from dojson.contrib.marc21.utils import create_record, split_blob

    # pkg resources the demodata
    data_path = pkg_resources.resource_filename(
        'invenio_records', 'data/marc21/bibliographic.xml'
    )
    with open(data_path) as source:
        with db.session.begin_nested():
            for data in split_blob(source.read()):
                Record.create(marc21.do(create_record(data)))
Exemplo n.º 16
0
    def test_error_catching(self):
        """ Record - catch any record conversion issues """
        blob = """<?xml version="1.0" encoding="UTF-8"?>
        <collection>
        <record>
          <datafield tag="FFT" ind1=" " ind2=" ">
            <subfield code="a">/path/to</subfield>
            <subfield code="t">Test</subfield>
            </record>
        </collection>
        """

        records = [create_record(r) for r in split_blob(blob)]
        assert len(records) == 1
        assert 'FFT__' in records[0]
Exemplo n.º 17
0
def convert_marcxml(source):
    """Convert MARC XML to JSON."""
    from dojson.contrib.marc21.utils import create_record, split_blob

    # from .book import book
    from .book import book
    from .audio import audio

    for data in split_blob(source.read()):
        record = create_record(data)
        if _collection_in_record(record, 'book'):
            yield book.do(record)
        if _collection_in_record(record, 'audio'):
            yield audio.do(record)

        # else:
        #    yield default.do(record)
        yield book.do(record)
Exemplo n.º 18
0
def demo_records(app):
    """Create demo records."""
    data_path = pkg_resources.resource_filename('cds.modules.fixtures',
                                                'data/records.xml')

    with open(data_path) as source:
        indexer = RecordIndexer()
        with _db.session.begin_nested():
            for index, data in enumerate(split_blob(source.read()), start=1):
                # create uuid
                rec_uuid = uuid.uuid4()
                # do translate
                record = marc21.do(create_record(data))
                # create PID
                current_pidstore.minters['recid'](rec_uuid, record)
                # create record
                indexer.index(Record.create(record, id_=rec_uuid))
        _db.session.commit()
    return data_path
Exemplo n.º 19
0
def split_marcxml(source):
    """Split a MARCXML file using dojson MARC21 utils."""
    from dojson.contrib.marc21.utils import split_blob
    return [data for data in split_blob(source.read())]
Exemplo n.º 20
0
def split_marcxml(source):
    """Split a MARCXML file using dojson MARC21 utils."""
    from dojson.contrib.marc21.utils import split_blob
    return [data for data in split_blob(source.read())]