Пример #1
0
def migrate_chunk(chunk, broken_output=None, dry_run=False):
    from invenio_indexer.api import RecordIndexer

    from ..pidstore.minters import inspire_recid_minter

    indexer = RecordIndexer()

    index_queue = []
    for raw_record in chunk:
        record = marc_create_record(raw_record, keep_singletons=False)
        json_record = create_record(record)
        if '$schema' in json_record:
            json_record['$schema'] = url_for(
                'invenio_jsonschemas.get_schema',
                schema_path="records/{0}".format(json_record['$schema'])
            )
        rec_uuid = str(Record.create(json_record, id_=None).id)

        # Create persistent identifier.
        pid = inspire_recid_minter(rec_uuid, json_record)

        index_queue.append(pid.object_uuid)

        db.session.commit()

    # Request record indexing
    for i in index_queue:
        indexer.index_by_id(i)

    # Send task to migrate files.
    return rec_uuid
Пример #2
0
def records():
    """Load records."""
    import pkg_resources
    import uuid
    from dojson.contrib.marc21 import marc21
    from dojson.contrib.marc21.utils import create_record, split_blob
    from invenio_pidstore import current_pidstore
    from invenio_records.api import Record

    # pkg resources the demodata
    data_path = pkg_resources.resource_filename(
        'invenio_records', 'data/marc21/bibliographic.xml'
    )
    with open(data_path) as source:
        indexer = RecordIndexer()
        with db.session.begin_nested():
            for index, data in enumerate(split_blob(source.read()), start=1):
                # create uuid
                rec_uuid = uuid.uuid4()
                # do translate
                record = marc21.do(create_record(data))
                # create PID
                current_pidstore.minters['recid_minter'](
                    rec_uuid, record
                )
                # create record
                indexer.index(Record.create(record, id_=rec_uuid))
        db.session.commit()
def test_basic_search(app, db, es):
    """Test basic search functionality."""
    # The index should be empty
    assert len(ItemSearch().execute()) == 0

    # Create item1, search for everything
    item1 = Item.create({})
    item1.commit()

    record_indexer = RecordIndexer()
    record_indexer.index(item1)

    current_search.flush_and_refresh('_all')

    assert len(ItemSearch().execute()) == 1

    # Create item2, search for everything again
    item2 = Item.create({'foo': 'bar'})
    item2.commit()
    record_indexer.index(item2)

    current_search.flush_and_refresh('_all')

    assert len(ItemSearch().execute()) == 2

    # Search for item2
    assert len(ItemSearch().query('match', foo='bar').execute()) == 1

    # Search for nonsense
    assert len(ItemSearch().query('match', foo='banana').execute()) == 0
Пример #4
0
def update_expired_embargoes():
    """Release expired embargoes every midnight."""
    logger = current_app.logger
    base_url = urlunsplit((
        current_app.config.get('PREFERRED_URL_SCHEME', 'http'),
        current_app.config['JSONSCHEMAS_HOST'],
        current_app.config.get('APPLICATION_ROOT') or '', '', ''
    ))
    # The task needs to run in a request context as JSON Schema validation
    # will use url_for.
    with current_app.test_request_context('/', base_url=base_url):
        s = B2ShareRecordsSearch(
            using=current_search_client,
            index='records'
        ).query(
            'query_string',
            query='open_access:false AND embargo_date:{{* TO {0}}}'.format(
                datetime.now(timezone.utc).isoformat()
            ),
            allow_leading_wildcard=False
        ).fields([])
        record_ids = [hit.meta.id for hit in s.scan()]
        if record_ids:
            logger.info('Changing access of {} embargoed publications'
                        ' to public.'.format(len(record_ids)))
        for record in Record.get_records(record_ids):
            logger.debug('Making embargoed publication {} public'.format(
                record.id))
            record['open_access'] = True
            record.commit()
        db.session.commit()

        indexer = RecordIndexer()
        indexer.bulk_index(record_ids)
        indexer.process_bulk_queue()
Пример #5
0
def records():
    """Load test data fixture."""
    import uuid
    from invenio_records.api import Record
    from invenio_pidstore.models import PersistentIdentifier, PIDStatus

    create_test_user()

    indexer = RecordIndexer()

    # Record 1 - Live record
    with db.session.begin_nested():
        rec_uuid = uuid.uuid4()
        pid1 = PersistentIdentifier.create(
            'recid', '1', object_type='rec', object_uuid=rec_uuid,
            status=PIDStatus.REGISTERED)
        Record.create({
            'title': 'Registered',
            'description': 'This is an awesome description',
            'control_number': '1',
            'access_right': 'restricted',
            'access_conditions': 'fuu',
            'owners': [1, 2],
            'recid': 1
        }, id_=rec_uuid)
        indexer.index_by_id(pid1.object_uuid)

    db.session.commit()

    sleep(3)
Пример #6
0
def store_record(obj, *args, **kwargs):
    """Create and index new record in main record space."""
    assert "$schema" in obj.data, "No $schema attribute found!"

    # Create record
    # FIXME: Do some preprocessing of obj.data before creating a record so that
    # we're sure that the schema will be validated without touching the full
    # holdingpen stack.
    record = Record.create(obj.data, id_=None)

    # Create persistent identifier.
    pid = inspire_recid_minter(str(record.id), record)

    # Commit any changes to record
    record.commit()

    # Dump any changes to record
    obj.data = record.dumps()

    # Commit to DB before indexing
    db.session.commit()

    # Index record
    indexer = RecordIndexer()
    indexer.index_by_id(pid.object_uuid)
Пример #7
0
def records():
    """Load records."""
    import pkg_resources
    import uuid
    from flask_login import login_user, logout_user
    from dojson.contrib.marc21 import marc21
    from dojson.contrib.marc21.utils import create_record, split_blob
    from invenio_accounts.models import User
    from invenio_deposit.api import Deposit

    users = User.query.all()

    # pkg resources the demodata
    data_path = pkg_resources.resource_filename(
        'invenio_records', 'data/marc21/bibliographic.xml'
    )
    with open(data_path) as source:
        with current_app.test_request_context():
            indexer = RecordIndexer()
            with db.session.begin_nested():
                for index, data in enumerate(split_blob(source.read()),
                                             start=1):
                    login_user(users[index % len(users)])
                    # do translate
                    record = marc21.do(create_record(data))
                    # create record
                    indexer.index(Deposit.create(record))
                    logout_user()
            db.session.commit()
Пример #8
0
def test_reindex(app, script_info):
    """Test reindex."""
    # load records
    with app.test_request_context():
        runner = CliRunner()
        rec_uuid = uuid.uuid4()
        data = {'title': 'Test0'}
        record = Record.create(data, id_=rec_uuid)
        db.session.commit()

        # Initialize queue
        res = runner.invoke(cli.queue, ['init', 'purge'],
                            obj=script_info)
        assert 0 == res.exit_code

        res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info)
        assert 0 == res.exit_code
        res = runner.invoke(cli.run, [], obj=script_info)
        assert 0 == res.exit_code

        sleep(5)
        indexer = RecordIndexer()
        index, doc_type = indexer.record_to_index(record)
        res = current_search_client.get(index=index, doc_type=doc_type,
                                        id=rec_uuid)
        assert res['found']

        # Destroy queue
        res = runner.invoke(cli.queue, ['delete'],
                            obj=script_info)
        assert 0 == res.exit_code
Пример #9
0
def glossary_terms():
    """Load demo terms records."""
    from invenio_db import db
    from invenio_records import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.termid import \
        cernopendata_termid_minter

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/glossary-term-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    glossary_terms_json = glob.glob(os.path.join(data, 'terms', '*.json'))

    for filename in glossary_terms_json:

        click.echo('Loading glossary-terms from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                if "collections" not in data and \
                    not isinstance(
                        data.get("collections", None), basestring):
                    data["collections"] = []
                data["collections"].append({"primary": "Terms"})
                id = uuid.uuid4()
                cernopendata_termid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Пример #10
0
def load_records(app, filename, schema, tries=5):
    """Try to index records."""
    indexer = RecordIndexer()
    records = []
    with app.app_context():
        with mock.patch('invenio_records.api.Record.validate',
                        return_value=None):
            data_filename = pkg_resources.resource_filename(
                'invenio_records', filename)
            records_data = load(data_filename)
            with db.session.begin_nested():
                for item in records_data:
                    record_id = uuid.uuid4()
                    item_dict = dict(marc21.do(item))
                    item_dict['$schema'] = schema
                    recid_minter(record_id, item_dict)
                    oaiid_minter(record_id, item_dict)
                    record = Record.create(item_dict, id_=record_id)
                    indexer.index(record)
                    records.append(record.id)
            db.session.commit()

        # Wait for indexer to finish
        for i in range(tries):
            response = current_search_client.search()
            if response['hits']['total'] >= len(records):
                break
            current_search.flush_and_refresh('_all')

    return records
Пример #11
0
def record_not_yet_deleted(app):
    snippet = (
        '<record>'
        '  <controlfield tag="001">333</controlfield>'
        '  <controlfield tag="005">20160913214552.0</controlfield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">HEP</subfield>'
        '  </datafield>'
        '</record>'
    )

    with app.app_context():
        json_record = hep.do(create_record(snippet))
        json_record['$schema'] = 'http://localhost:5000/schemas/records/hep.json'

        with db.session.begin_nested():
            record = record_upsert(json_record)
            if record:
                ri = RecordIndexer()
                ri.index(record)

        db.session.commit()

    yield

    with app.app_context():
        _delete_record_from_everywhere('literature', 333)
Пример #12
0
def continuous_migration():
    """Task to continuously migrate what is pushed up by Legacy."""
    indexer = RecordIndexer()
    redis_url = current_app.config.get('CACHE_REDIS_URL')
    r = StrictRedis.from_url(redis_url)

    try:
        while r.llen('legacy_records'):
            raw_record = r.lpop('legacy_records')
            if raw_record:
                # FIXME use migrate_and_insert_record(raw_record)
                # The record might be None, in case a parallel
                # continuous_migration task has already consumed the queue.
                raw_record = zlib.decompress(raw_record)
                record = marc_create_record(raw_record, keep_singletons=False)
                recid = int(record['001'][0])
                prod_record = InspireProdRecords(recid=recid)
                prod_record.marcxml = raw_record
                json_record = create_record(record)
                with db.session.begin_nested():
                    try:
                        record = record_upsert(json_record)
                    except ValidationError as e:
                        # Invalid record, will not get indexed
                        errors = "ValidationError: Record {0}: {1}".format(
                            recid, e
                        )
                        prod_record.valid = False
                        prod_record.errors = errors
                        db.session.merge(prod_record)
                        continue
                indexer.index_by_id(record.id)
    finally:
        db.session.commit()
        db.session.close()
Пример #13
0
def test_indexer_bulk_index(app, queue):
    """Test delay indexing."""
    with app.app_context():
        with establish_connection() as c:
            indexer = RecordIndexer()
            id1 = uuid.uuid4()
            id2 = uuid.uuid4()
            indexer.bulk_index([id1, id2])
            indexer.bulk_delete([id1, id2])

            consumer = Consumer(
                connection=c,
                queue=indexer.mq_queue.name,
                exchange=indexer.mq_exchange.name,
                routing_key=indexer.mq_routing_key)

            messages = list(consumer.iterqueue())
            [m.ack() for m in messages]

            assert len(messages) == 4
            data0 = messages[0].decode()
            assert data0['id'] == str(id1)
            assert data0['op'] == 'index'
            data2 = messages[2].decode()
            assert data2['id'] == str(id1)
            assert data2['op'] == 'delete'
Пример #14
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))
                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Пример #15
0
def receive_after_model_commit(sender, changes):
    """Perform actions after models committed to database."""
    indexer = RecordIndexer()
    for model_instance, change in changes:
        if isinstance(model_instance, RecordMetadata):
            if change in ('insert', 'update'):
                indexer.index(InspireRecord(model_instance.json, model_instance))
            else:
                indexer.delete(InspireRecord(model_instance.json, model_instance))
Пример #16
0
def update_authors_recid(record_id, uuid, profile_recid):
    """Update author profile for a given signature.

    The method receives UUIDs representing record and signature
    respectively together with an author profile recid.
    The new recid will be placed in the signature with the given
    UUID.

    :param record_id:
        A string representing UUID of a given record.

        Example:
            record_id = "a5afb151-8f75-4e91-8dc1-05e7e8e8c0b8"

    :param uuid:
        A string representing UUID of a given signature.

        Example:
            uuid = "c2f432bd-2f52-4c16-ac66-096f168c762f"

    :param profile_recid:
        A string representing author profile recid, that
        updated signature should point to.

        Example:
            profile_recid = "1"
    """
    try:
        record = Record.get_record(record_id)
        update_flag = False

        for author in record['authors']:
            if author['uuid'] == uuid:
                author['recid'] = str(profile_recid)
                update_flag = True

        if update_flag:
            # Disconnect the signal on insert of a new record.
            before_record_index.disconnect(append_updated_record_to_queue)

            # Update the record in the database.
            record.commit()
            db.session.commit()

            # Update the record in Elasticsearch.
            indexer = RecordIndexer()
            indexer.index_by_id(record.id)
    except StaleDataError as exc:
        raise update_authors_recid.retry(exc=exc)
    finally:
        # Reconnect the disconnected signal.
        before_record_index.connect(append_updated_record_to_queue)

    # Report.
    logger.info("Updated signature %s with profile %s",
                uuid, profile_recid)
Пример #17
0
def closed_access_record(db, es, record_with_files_creation):
    """Creation of a full record with closed access right."""
    _, record, record_url = record_with_files_creation
    record['access_right'] = AccessRight.CLOSED
    record.commit()
    db.session.commit()
    indexer = RecordIndexer()
    indexer.index(record)
    current_search.flush_and_refresh(index='records')
    return record
Пример #18
0
def index_record(obj, eng):
    """
    Index the record.

    It only should be indexed when every other step finished successfully.
    """

    recid = obj.data['control_number']
    pid = PersistentIdentifier.get('recid', recid)
    indexer = RecordIndexer()
    indexer.index_by_id(pid.object_uuid)
Пример #19
0
def create_record(app, item_dict, mint_oaiid=True):
    """Create test record."""
    indexer = RecordIndexer()
    with app.test_request_context():
        record_id = uuid.uuid4()
        recid_minter(record_id, item_dict)
        if mint_oaiid:
            oaiid_minter(record_id, item_dict)
        record = Record.create(item_dict, id_=record_id)
        indexer.index(record)
        return record
Пример #20
0
def update_expired_embargos():
    """Release expired embargoes every midnight."""
    record_ids = AccessRight.get_expired_embargos()
    for record in Record.get_records(record_ids):
        record['access_right'] = AccessRight.OPEN
        record.commit()
    db.session.commit()

    indexer = RecordIndexer()
    indexer.bulk_index(record_ids)
    indexer.process_bulk_queue()
def indexed_loans(es, test_loans):
    """Get a function to wait for records to be flushed to index."""
    indexer = RecordIndexer()
    for pid, loan in test_loans:
        indexer.index(loan)
    current_search.flush_and_refresh(index="loans")

    yield test_loans

    for pid, loan in test_loans:
        indexer.delete_by_id(loan.id)
    current_search.flush_and_refresh(index="loans")
Пример #22
0
def remove_records(app, record_ids):
    """Remove all records."""
    with app.app_context():
        indexer = RecordIndexer()
        for r_id in record_ids:
            record = RecordMetadata.query.get(r_id)
            indexer.delete_by_id(r_id)
            pids = PersistentIdentifier.query.filter_by(
                object_uuid=r_id).all()
            for pid in pids:
                db.session.delete(pid)
            db.session.delete(record)
        db.session.commit()
Пример #23
0
def _create_and_index_record(record):
    record = Record.create(record)
    inspire_recid_minter(record.id, record)
    # invenio-collections will populate _collections field in record upon
    # commit
    db.session.commit()

    # Record needs to be indexed since views fetch records from ES
    r = RecordIndexer()
    r.index(record)
    es.indices.refresh('records-hep')

    return record
Пример #24
0
def oaiserver(sets, records):
    """Initialize OAI-PMH server."""
    from invenio_db import db
    from invenio_oaiserver.models import OAISet
    from invenio_records.api import Record

    # create a OAI Set
    with db.session.begin_nested():
        for i in range(sets):
            db.session.add(OAISet(
                spec='test{0}'.format(i),
                name='Test{0}'.format(i),
                description='test desc {0}'.format(i),
                search_pattern='title_statement.title:Test{0}'.format(i),
            ))

    # create a record
    schema = {
        'type': 'object',
        'properties': {
            'title_statement': {
                'type': 'object',
                'properties': {
                    'title': {
                        'type': 'string',
                    },
                },
            },
            'field': {'type': 'boolean'},
        },
    }

    search.client.indices.delete_alias('_all', '_all', ignore=[400, 404])
    search.client.indices.delete('*')

    with app.app_context():
        indexer = RecordIndexer()
        with db.session.begin_nested():
            for i in range(records):
                record_id = uuid.uuid4()
                data = {
                    'title_statement': {'title': 'Test{0}'.format(i)},
                    '$schema': schema,
                }
                recid_minter(record_id, data)
                oaiid_minter(record_id, data)
                record = Record.create(data, id_=record_id)
                indexer.index(record)

        db.session.commit()
Пример #25
0
def test_record_can_be_deleted(app, record_not_yet_deleted):
    with app.test_client() as client:
        assert client.get('/api/literature/333').status_code == 200

    record = get_db_record('literature', 333)
    record['deleted'] = True
    record.commit()
    if record:
        ri = RecordIndexer()
        ri.index(record)
    db.session.commit()

    with app.test_client() as client:
        assert client.get('/api/literature/333').status_code == 410
Пример #26
0
def _delete_record_from_everywhere(pid_type, record_control_number):
    record = get_db_record(pid_type, record_control_number)

    ri = RecordIndexer()
    ri.delete(record)
    record.delete(force=True)

    pid = PersistentIdentifier.get(pid_type, record_control_number)
    PersistentIdentifier.delete(pid)

    object_uuid = pid.object_uuid
    PersistentIdentifier.query.filter(
        object_uuid == PersistentIdentifier.object_uuid).delete()

    db.session.commit()
Пример #27
0
def index_after_commit(sender, changes):
    """Index a record in ES after it was committed to the DB.

    This cannot happen in an ``after_record_commit`` receiver from Invenio-Records
    because, despite the name, at that point we are not yet sure whether the record
    has been really committed to the DB.
    """
    indexer = RecordIndexer()

    for model_instance, change in changes:
        if isinstance(model_instance, RecordMetadata):
            if change in ('insert', 'update'):
                indexer.index(Record(model_instance.json, model_instance))
            else:
                indexer.delete(Record(model_instance.json, model_instance))
Пример #28
0
def store_record(obj, *args, **kwargs):
    """Create and index new record in main record space."""
    if '$schema' in obj.data:
        obj.data['$schema'] = url_for(
            'invenio_jsonschemas.get_schema',
            schema_path="records/{0}".format(obj.data['$schema'])
        )
    # Create record
    rec_uuid = str(Record.create(obj.data, id_=None).id)

    # Create persistent identifier.
    pid = inspire_recid_minter(rec_uuid, obj.data)
    db.session.commit()

    # Index record
    indexer = RecordIndexer()
    indexer.index_by_id(pid.object_uuid)
Пример #29
0
def test_records_can_be_merged(app, records_not_merged_in_marcxml):
    with app.test_client() as client:
        assert client.get('/api/literature/111').status_code == 200
        assert client.get('/api/literature/222').status_code == 200

    record = get_db_record('literature', 222)
    record['deleted'] = True
    record['new_record'] = {'$ref': 'http://localhost:5000/api/record/111'}
    record.commit()
    if record:
        ri = RecordIndexer()
        ri.index(record)
    db.session.commit()

    with app.test_client() as client:
        assert client.get('/api/literature/111').status_code == 200
        assert client.get('/api/literature/222').status_code == 301
Пример #30
0
def test_before_record_index_dynamic_connect(app):
    """Test before_record_index.dynamic_connect."""
    with app.app_context():
        with patch('invenio_records.api.Record.validate'):
            auth_record = Record.create({
                '$schema': '/records/authorities/authority-v1.0.0.json',
                'title': 'Test'
            })
            bib_record = Record.create({
                '$schema': '/records/bibliographic/bibliographic-v1.0.0.json',
                'title': 'Test'
            })
            db.session.commit()

        def _simple(sender, json=None, **kwargs):
            json['simple'] = 'simple'

        def _custom(sender, json=None, **kwargs):
            json['custom'] = 'custom'

        def _cond(sender, connect_kwargs, index=None, **kwargs):
            return 'bibliographic' in index

        _receiver1 = before_record_index.dynamic_connect(
            _simple, index='records-authorities-authority-v1.0.0')
        _receiver2 = before_record_index.dynamic_connect(_custom,
                                                         condition_func=_cond)

        action = RecordIndexer()._index_action(
            dict(id=str(auth_record.id), op='index'))
        assert 'title' in action['_source']
        assert action['_source']['simple'] == 'simple'

        action = RecordIndexer()._index_action(
            dict(id=str(bib_record.id), index='foo', op='index'))
        assert 'title' in action['_source']
        assert action['_source']['custom'] == 'custom'

        before_record_index.disconnect(_receiver1)
        before_record_index.disconnect(_receiver2)
Пример #31
0
def index_after_commit(sender, changes):
    """Index a record in ES after it was committed to the DB.

    This cannot happen in an ``after_record_commit`` receiver from Invenio-Records
    because, despite the name, at that point we are not yet sure whether the record
    has been really committed to the DB.
    """
    indexer = RecordIndexer()
    for model_instance, change in changes:
        if isinstance(model_instance, RecordMetadata):
            if change in ('insert', 'update') and not model_instance.json.get("deleted"):
                if hasattr(model_instance, '_enhanced_record'):
                    record = model_instance._enhanced_record
                else:
                    record = model_instance.json
                indexer.index(InspireRecord(record, model_instance))
            else:
                try:
                    indexer.delete(InspireRecord(
                        model_instance.json, model_instance))
                except NotFoundError:
                    # Record not found in ES
                    LOGGER.debug('Record %s not found in ES',
                                 model_instance.json.get("id"))
                    pass

            pid_type = get_pid_type_from_schema(model_instance.json['$schema'])
            pid_value = model_instance.json['control_number']
            db_version = model_instance.version_id

            index_modified_citations_from_record.delay(pid_type, pid_value, db_version)
Пример #32
0
def articles():
    """Load demo article records."""
    from invenio_db import db
    from invenio_records import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.artid import \
        cernopendata_articleid_minter

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/article-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/articles')

    articles_json = get_jsons_from_dir(data)

    for filename in articles_json:
        with open(filename, 'rb') as source:
            for data in json.load(source):

                # Replace body with responding content
                assert data["body"]["content"]
                content_filename = os.path.join(*([
                    "/",
                ] + filename.split('/')[:-1] + [
                    data["body"]["content"],
                ]))

                with open(content_filename) as body_field:
                    data["body"]["content"] = body_field.read()
                if "collections" not in data and \
                   not isinstance(data.get("collections", None), basestring):
                    data["collections"] = []
                id = uuid.uuid4()
                cernopendata_articleid_minter(id, data)
                record = Record.create(data, id_=id)
                record['$schema'] = schema
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Пример #33
0
def _create_records(path, verbose):
    """Create demo records."""
    indexer = RecordIndexer(
        record_to_index=lambda record: ('records', 'record')
    )
    if verbose > 0:
        click.secho('Creating records', fg='yellow', bold=True)
    with db.session.begin_nested():
        records_dir = os.path.join(path, 'records')
        nb_records = 0
        for root, dirs, files in os.walk(records_dir):
            for filename in files:
                split_filename = os.path.splitext(filename)
                if split_filename[1] == '.json':
                    rec_uuid = UUID(split_filename[0])
                    with open(os.path.join(records_dir, root,
                                           filename)) as record_file:
                        record_str = record_file.read()
                    record_str = resolve_community_id(record_str)
                    record_str = resolve_block_schema_id(record_str)
                    deposit = Deposit.create(json.loads(record_str),
                                             id_=rec_uuid)
                    ObjectVersion.create(deposit.files.bucket, 'myfile',
                        stream=BytesIO(b'mycontent'))
                    deposit.publish()
                    pid, record = deposit.fetch_published()
                    # index the record
                    indexer.index(record)
                    if verbose > 1:
                        click.secho('CREATED RECORD {0}:\n {1}'.format(
                            str(rec_uuid), json.dumps(record,
                                                  indent=4)
                        ))
                        click.secho('CREATED DEPOSIT {0}:\n {1}'.format(
                            str(rec_uuid), json.dumps(deposit,
                                                  indent=4)
                        ))
                    nb_records += 1
    if verbose > 0:
        click.secho('Created {} records!'.format(nb_records), fg='green')
Пример #34
0
def test_indexer_bulk_index(app, queue):
    """Test delay indexing."""
    with app.app_context():
        with establish_connection() as c:
            indexer = RecordIndexer()
            id1 = uuid.uuid4()
            id2 = uuid.uuid4()
            indexer.bulk_index([id1, id2])
            indexer.bulk_delete([id1, id2])

            consumer = Consumer(
                connection=c,
                queue=indexer.mq_queue.name,
                exchange=indexer.mq_exchange.name,
                routing_key=indexer.mq_routing_key)

            messages = list(consumer.iterqueue())
            [m.ack() for m in messages]

            assert len(messages) == 4
            data0 = messages[0].decode()
            assert data0['id'] == str(id1)
            assert data0['op'] == 'index'
            data2 = messages[2].decode()
            assert data2['id'] == str(id1)
            assert data2['op'] == 'delete'
Пример #35
0
def update_expired_embargoes():
    """Release expired embargoes every midnight."""
    logger = current_app.logger
    base_url = urlunsplit(
        (current_app.config.get('PREFERRED_URL_SCHEME', 'http'),
         current_app.config['JSONSCHEMAS_HOST'],
         current_app.config.get('APPLICATION_ROOT') or '', '', ''))
    # The task needs to run in a request context as JSON Schema validation
    # will use url_for.
    with current_app.test_request_context('/', base_url=base_url):
        s = B2ShareRecordsSearch(
            using=current_search_client, index='records').query(
                'query_string',
                query='open_access:false AND embargo_date:{{* TO {0}}}'.format(
                    datetime.now(timezone.utc).isoformat()),
                allow_leading_wildcard=False).fields([])
        record_ids = [hit.meta.id for hit in s.scan()]
        if record_ids:
            logger.info('Changing access of {} embargoed publications'
                        ' to public.'.format(len(record_ids)))
        for record in Record.get_records(record_ids):
            logger.debug('Making embargoed publication {} public'.format(
                record.id))
            record['open_access'] = True
            record.commit()
        db.session.commit()

        indexer = RecordIndexer()
        indexer.bulk_index(record_ids)
        indexer.process_bulk_queue()
def test_basic_search(app, db, es):
    """Test basic search functionality."""
    # The index should be empty
    assert len(ItemSearch().execute()) == 0

    # Create item1, search for everything
    item1 = Item.create({})
    item1.commit()

    record_indexer = RecordIndexer()
    record_indexer.index(item1)

    current_search.flush_and_refresh('_all')

    assert len(ItemSearch().execute()) == 1

    # Create item2, search for everything again
    item2 = Item.create({'foo': 'bar'})
    item2.commit()
    record_indexer.index(item2)

    current_search.flush_and_refresh('_all')

    assert len(ItemSearch().execute()) == 2

    # Search for item2
    assert len(ItemSearch().query('match', foo='bar').execute()) == 1

    # Search for nonsense
    assert len(ItemSearch().query('match', foo='banana').execute()) == 0
Пример #37
0
def demo_init():
    """Initialize demo site."""
    from flask import current_app
    records = []
    # Import bibliographic records
    click.secho('Importing bibliographic records', fg='green')
    records += import_records(
        marc21,
        current_app.extensions['invenio-jsonschemas'].path_to_url(
            'marc21/bibliographic/bd-v1.0.2.json'),
        pkg_resources.resource_filename('invenio_records',
                                        'data/marc21/bibliographic.xml'),
    )
    # FIXME add support for authority records.
    # Import authority records
    # click.secho('Importing authority records', fg='green')
    # records += import_records(
    #     marc21_authority,
    #     current_app.extensions['invenio-jsonschemas'].path_to_url(
    #         'marc21/authority/ad-v1.0.2.json'),
    #     pkg_resources.resource_filename(
    #         'invenio_records', 'data/marc21/authority.xml'),
    # )
    db.session.commit()
    # Index all records
    click.secho('Indexing records', fg='green')
    indexer = RecordIndexer()
    indexer.bulk_index(records)
    indexer.process_bulk_queue()
Пример #38
0
def test_open_access_permissions(client, json_headers, testdata, users):
    """Test GET open/close access documents."""
    # set the documents to have read access only by patron2. `_access` should
    # be totally ignored.
    indexer = RecordIndexer()
    doc1 = Document.get_record_by_pid("docid-open-access")
    doc2 = Document.get_record_by_pid("docid-closed-access")
    for doc in [doc1, doc2]:
        doc.update(dict(_access=dict(read=["patron2"])))
        doc.commit()
        db.session.commit()
        indexer.index(doc)
    current_search.flush_and_refresh(index="documents")

    test_data = [
        ("anonymous", "docid-open-access", 200, 1),
        ("patron1", "docid-open-access", 200, 1),
        ("patron2", "docid-open-access", 200, 1),
        ("librarian", "docid-open-access", 200, 1),
        ("admin", "docid-open-access", 200, 1),
        ("anonymous", "docid-closed-access", 401, 0),
        ("patron1", "docid-closed-access", 403, 0),
        ("patron2", "docid-closed-access", 403, 0),
        ("librarian", "docid-closed-access", 200, 1),
        ("admin", "docid-closed-access", 200, 1),
    ]
    for user, pid, status_code, n_hits in test_data:
        # item endpoint
        user_login(client, user, users)
        url = url_for("invenio_records_rest.docid_item", pid_value=pid)
        res = client.get(url, headers=json_headers)
        assert res.status_code == status_code

        # list endpoint
        user_login(client, user, users)
        url = url_for("invenio_records_rest.docid_list",
                      q="pid:{}".format(pid))
        res = client.get(url, headers=json_headers)
        hits = json.loads(res.data.decode("utf-8"))
        assert hits["hits"]["total"] == n_hits
Пример #39
0
    def prepare_data():
        """Prepare data."""
        days = current_app.config[
            "ILS_CIRCULATION_MAIL_OVERDUE_REMINDER_INTERVAL"
        ]
        loans = testdata["loans"]

        recs = []
        now = arrow.utcnow()

        def new_end_date(loan, date):
            loan["end_date"] = date.date().isoformat()
            loan["state"] = "ITEM_ON_LOAN"
            loan.commit()
            recs.append(loan)

        # overdue loans
        date = now - timedelta(days=days)
        new_end_date(loans[0], date)

        date = now - timedelta(days=days * 2)
        new_end_date(loans[1], date)

        # not overdue
        date = now - timedelta(days=-1)
        new_end_date(loans[2], date)

        # not overdue or overdue but not to be notified
        remaining_not_overdue = loans[3:]
        for loan in remaining_not_overdue:
            days = random.choice([-1, 0, 1])
            date = now - timedelta(days=days)
            new_end_date(loan, date)
        db.session.commit()

        indexer = RecordIndexer()
        for rec in recs:
            indexer.index(rec)

        current_search.flush_and_refresh(index="*")
Пример #40
0
def testdata(app, db, es_clear, patron1):
    """Create, index and return test data."""
    data = load_json_from_datadir("locations.json")
    locations = _create_records(db, data, Location, LOCATION_PID_TYPE)

    data = load_json_from_datadir("internal_locations.json")
    int_locs = _create_records(db, data, InternalLocation,
                               INTERNAL_LOCATION_PID_TYPE)

    data = load_json_from_datadir("documents.json")
    documents = _create_records(db, data, Document, DOCUMENT_PID_TYPE)

    data = load_json_from_datadir("series.json")
    series = _create_records(db, data, Series, SERIES_PID_TYPE)

    data = load_json_from_datadir("items.json")
    items = _create_records(db, data, Item, ITEM_PID_TYPE)

    data = load_json_from_datadir("eitems.json")
    eitems = _create_records(db, data, EItem, EITEM_PID_TYPE)

    data = load_json_from_datadir("loans.json")
    loans = _create_records(db, data, Loan, CIRCULATION_LOAN_PID_TYPE)

    # index
    ri = RecordIndexer()
    for rec in (locations + int_locs + series + documents + items + eitems +
                loans):
        ri.index(rec)

    current_search.flush_and_refresh(index="*")
    return {
        "documents": documents,
        "eitems": eitems,
        "internal_locations": int_locs,
        "items": items,
        "loans": loans,
        "locations": locations,
        "series": series,
    }
Пример #41
0
def curate(community):
    """Index page with uploader and list of existing depositions.

    :param community_id: ID of the community to curate.
    """
    if request.method == 'POST':
        action = request.json.get('action')
        recid = request.json.get('recid')

        # 'recid' is mandatory
        if not recid:
            abort(400)
        if action not in ['accept', 'reject', 'remove']:
            abort(400)

        # Resolve recid to a Record
        resolver = Resolver(pid_type='recid',
                            object_type='rec',
                            getter=Record.get_record)
        pid, record = resolver.resolve(recid)

        # Perform actions
        if action == "accept":
            community.accept_record(record)
        elif action == "reject":
            community.reject_record(record)
        elif action == "remove":
            community.remove_record(record)

        record.commit()
        db.session.commit()
        RecordIndexer().index_by_id(record.id)
        return jsonify({'status': 'success'})

    ctx = {'community': community}
    community_id = community.id
    community_flg = "0"

    # Get index style
    style = IndexStyle.get(
        current_app.config['WEKO_INDEX_TREE_STYLE_OPTIONS']['id'])
    width = style.width if style else '3'
    height = style.height if style else None

    sort_options, display_number = SearchSetting.get_results_setting()

    return render_template(current_app.config['COMMUNITIES_CURATE_TEMPLATE'],
                           community_id=community_id,
                           sort_option=sort_options,
                           width=width,
                           height=height,
                           **ctx)
Пример #42
0
def test_get_record_no_acls_authenticated(app, db, es, es_acl_prepare,
                                          test_users):
    pid, record = create_record({}, clz=SchemaEnforcingRecord)
    RecordIndexer().index(record)

    # make sure it is flushed
    current_search_client.indices.flush()

    # try to get it ...
    with app.test_client() as client:
        login(client, test_users.u1)
        res = client.get(record_url(pid))
        assert res.status_code == 403  # Forbidden
Пример #43
0
def unindex_record_trigger(sender, *args, **kwargs):
    """Unindex the given record if it is a publication."""

    record = kwargs['record']

    if is_publication(record.model):
        # The indexer requires that the record still exists in the database
        # when it is removed from the search index. Thus we have to unindex it
        # synchonously.
        try:
            RecordIndexer().delete(record)
        except NotFoundError:
            pass
Пример #44
0
def indexed_records(es, records):
    """Fixture for the records, which are already indexed."""
    # es.indices.flush('*')
    # # delete all elasticsearch indices and recreate them
    # for deleted in current_search.delete(ignore=[404]):
    #     pass
    # for created in current_search.create(None):
    #     pass
    # flush the indices so that indexed records are searchable
    for pid_name, record in records.items():
        RecordIndexer().index(record)
    es.indices.flush('*')
    return records
Пример #45
0
def remove_oaiset_spec(record_uuid, spec):
    """Remove the OAI spec from the record and commit."""
    rec = Record.get_record(record_uuid)
    rec['_oai']['sets'] = sorted(
        [s for s in rec['_oai'].get('sets', []) if s != spec])
    rec['_oai']['updated'] = datetime_to_datestamp(datetime.utcnow())
    if not rec['_oai']['sets']:
        del rec['_oai']['sets']
    rec.commit()
    db.session.commit()
    RecordIndexer().bulk_index([
        str(rec.id),
    ])
Пример #46
0
def test_custom_search(es, api, json_headers, record_with_bucket,
                       custom_metadata, query, result):
    """Test custom metadata search."""
    pid, record = record_with_bucket
    record['custom'] = custom_metadata
    RecordIndexer().index(record)
    current_search.flush_and_refresh(index='records')
    with api.test_request_context():
        with api.test_client() as client:
            res = client.get(url_for('invenio_records_rest.recid_list',
                                     custom=query),
                             headers=json_headers)
            assert len(res.json) == result
Пример #47
0
 def reindex_pid(pid_type, RecordClass):
     index_name = None
     indexer = RecordIndexer()
     for pid in tqdm.tqdm(PersistentIdentifier.query.filter_by(
             pid_type=pid_type, object_type='rec', status=PIDStatus.REGISTERED.value)):
         record = RecordClass.get_record(pid.object_uuid)
         if only and str(record.id) != only:
             continue
         try:
             index_name, doc_type = indexer.record_to_index(record)
             index_name = build_alias_name(index_name)
             # print('Indexing', record.get('id'), 'into', index_name)
             indexer.index(record)
         except:
             with open('/tmp/indexing-error.json', 'a') as f:
                 print(json.dumps(record.dumps(), indent=4, ensure_ascii=False), file=f)
                 traceback.print_exc(file=f)
             if raise_on_error:
                 raise
     if index_name:
         current_search_client.indices.refresh(index_name)
         current_search_client.indices.flush(index_name)
Пример #48
0
def test_delete(app):
    """Test record indexing."""
    with app.app_context():
        recid = uuid.uuid4()
        record = Record.create({'title': 'Test'}, id_=recid)
        db.session.commit()

        client_mock = MagicMock()
        RecordIndexer(search_client=client_mock).delete(record)

        doc_type = app.config['INDEXER_DEFAULT_DOC_TYPE'] if lt_es7 else '_doc'
        client_mock.delete.assert_called_with(
            id=str(recid),
            index=app.config['INDEXER_DEFAULT_INDEX'],
            doc_type=doc_type,
            version=record.revision_id,
            version_type='external_gte',
        )

        with patch('invenio_indexer.api.RecordIndexer.delete') as fun:
            RecordIndexer(search_client=client_mock).delete_by_id(recid)
            assert fun.called
Пример #49
0
def test_process_bulk_queue(app, queue):
    """Test process indexing."""
    with app.app_context():
        # Create a test record
        r = Record.create({'title': 'test'})
        db.session.commit()
        invalid_id2 = uuid.uuid4()

        RecordIndexer().bulk_index([r.id, invalid_id2])
        RecordIndexer().bulk_delete([r.id, invalid_id2])

        ret = {}

        def _mock_bulk(client, actions_iterator, **kwargs):
            ret['actions'] = list(actions_iterator)
            return len(ret['actions'])

        with patch('invenio_indexer.api.bulk', _mock_bulk):
            # Invalid actions are rejected
            assert RecordIndexer().process_bulk_queue() == 2
            assert [x['_op_type'] for x in ret['actions']] == \
                ['index', 'delete']
Пример #50
0
def request(community_id, record_id, accept):
    """Request a record acceptance to a community."""
    c = Community.get(community_id)
    assert c is not None
    record = Record.get_record(record_id)
    if accept:
        c.add_record(record)
        record.commit()
    else:
        InclusionRequest.create(community=c, record=record,
                                notify=False)
    db.session.commit()
    RecordIndexer().index_by_id(record.id)
Пример #51
0
def test_oai_set_result_count(mocker, audit_records, db, es, communities,
                              oai_sources, issues):
    db_records, es_records, oai2d_records = oai_sources

    for recid in db_records:
        _, record = record_resolver.resolve(recid)
        record['_oai']['sets'] = ['user-c1']
        record.commit()
    db.session.commit()

    indexer = RecordIndexer()
    for recid in es_records:
        _, record = record_resolver.resolve(recid)
        record['_oai']['sets'] = ['user-c1']
        indexer.index(record)
    current_search.flush_and_refresh(index='records')

    # '/oai2d' needs straight-forward cheating... There's no way to be sure
    # why the endpoint sometimes fails to report the correct results. It could
    # be a Resumption Token issue, or even an indexing issue on Elasticsearch.
    # Either way, we have to be able to replicate when running on production
    # this behavior and report it as an issue.
    oai2d_ids_mock = MagicMock()
    oai2d_ids_mock.return_value = set(oai2d_records)
    oai2d_ids_mock = mocker.patch(
        'zenodo.modules.auditor.oai.OAISetResultCheck'
        '._oai2d_endpoint_identifiers',
        new=oai2d_ids_mock)

    audit = OAIAudit('testAudit', logging.getLogger('auditorTesting'), [])
    check = OAISetResultCheck(audit, Community.get('c1'))
    check.perform()
    audit.clear_db_oai_set_cache()

    result_issues = check.issues.get('missing_ids', {})
    db_issues, es_issues, api_issues = issues
    assert set(result_issues.get('db', [])) == set(db_issues)
    assert set(result_issues.get('es', [])) == set(es_issues)
    assert set(result_issues.get('oai2d', [])) == set(api_issues)
Пример #52
0
def create_record(data):
    """Create a record.

    :param dict data: The record data.
    """
    indexer = RecordIndexer()
    with db.session.begin_nested():
        # create uuid
        rec_uuid = uuid.uuid4()
        # add the schema
        host = current_app.config.get('JSONSCHEMAS_HOST')
        data["$schema"] = \
            current_app.extensions['invenio-jsonschemas'].path_to_url(
            'custom_record/custom-record-v1.0.0.json')
        # create PID
        current_pidstore.minters['custid'](
            rec_uuid, data, pid_value='custom_pid_{}'.format(rec_uuid))
        # create record
        created_record = Record.create(data, id_=rec_uuid)
        # index the record
        indexer.index(created_record)
    db.session.commit()
Пример #53
0
 def delete(cls, data, vendor=None, delindex=True, force=False):
     """Delete a IrokoRecord record."""
     assert data.get(cls.pid_uuid_field)
     pid = data.get(cls.pid_uuid_field)
     record = cls.get_record_by_pid(pid, with_deleted=False)
     pid.delete()
     result = record.delete(force=force)
     if delindex:
         try:
             RecordIndexer().delete(record)
         except NotFoundError:
             pass
     return result
Пример #54
0
def test_records_serializers_dc(app, test_records_data):
    with app.app_context():
        pid, record = make_record(test_records_data)
        rec = {
            '_source':
            RecordIndexer._prepare_record(record, 'records', 'record').copy(),
            '_version':
            record.revision_id
        }
        dcxml = oaipmh_oai_dc(pid=pid, record=rec)

        namespaces = {'dc': 'http://purl.org/dc/elements/1.1/'}
        identifiers = dcxml.xpath('//dc:identifier', namespaces=namespaces)
        titles = dcxml.xpath('//dc:title', namespaces=namespaces)
        creators = dcxml.xpath('//dc:creator', namespaces=namespaces)
        descriptions = dcxml.xpath('//dc:description', namespaces=namespaces)
        subjects = dcxml.xpath('//dc:subject', namespaces=namespaces)
        contributors = dcxml.xpath('//dc:contributor', namespaces=namespaces)
        rights = dcxml.xpath('//dc:rights', namespaces=namespaces)
        publishers = dcxml.xpath('//dc:publisher', namespaces=namespaces)
        languages = dcxml.xpath('//dc:language', namespaces=namespaces)
        types = dcxml.xpath('//dc:type', namespaces=namespaces)

        assert identifiers
        for x in identifiers:
            assert x.text.endswith(pid.pid_value)

        assert [x.text
                for x in titles] == [r['title'] for r in record['titles']]
        assert [x.text for x in creators
                ] == [r['creator_name'] for r in record['creators']]
        assert [x.text for x in descriptions
                ] == [r['description'] for r in record['descriptions']]
        assert [x.text for x in types] == [
            r['resource_type_general'] for r in record['resource_types']
        ]
        assert [x.text for x in contributors
                ] == [r['contributor_name'] for r in record['contributors']]
        assert [x.text for x in publishers] == [record['publisher']]
        assert [x.text for x in languages] == [record['language']]

        assert [x.text for x in subjects] == record.get('keywords')

        rights = [x.text for x in rights]
        access = 'info:eu-repo/semantics/closedAccess'
        if record['open_access']:
            access = 'info:eu-repo/semantics/openAccess'
        assert access in rights
        license = record.get('license', {}).get('license')
        if license:
            assert license in rights
Пример #55
0
    def delete(self, **kwargs):
        """Delete a record."""
        from b2share.modules.deposit.api import Deposit
        from b2share.modules.deposit.providers import DepositUUIDProvider

        pid = self.pid
        # Fetch deposit id from record and resolve deposit record and pid.
        depid = PersistentIdentifier.get(DepositUUIDProvider.pid_type,
                                         pid.pid_value)
        if depid.status == PIDStatus.REGISTERED:
            depid, deposit = Resolver(
                pid_type=depid.pid_type,
                object_type='rec',
                # Retrieve the deposit with the Record class on purpose
                # as the current Deposit api prevents the deletion of
                # published deposits.
                getter=Deposit.get_record,
            ).resolve(depid.pid_value)
            deposit.delete()

        # Mark all record's PIDs as DELETED
        all_pids = PersistentIdentifier.query.filter(
            PersistentIdentifier.object_type == pid.object_type,
            PersistentIdentifier.object_uuid == pid.object_uuid,
        ).all()
        for rec_pid in all_pids:
            if not rec_pid.is_deleted():
                rec_pid.delete()

        # Mark the bucket as deleted
        # delete all buckets linked to the deposit
        res = Bucket.query.join(RecordsBuckets).\
            filter(RecordsBuckets.bucket_id == Bucket.id,
                   RecordsBuckets.record_id == self.id).all()
        for bucket in res:
            bucket.deleted = True

        # Mark the record and deposit as deleted. The record is unindexed
        # via the trigger on record deletion.
        super(B2ShareRecord, self).delete()

        version_master = PIDNodeVersioning(pid=pid)
        # If the parent has no other children and no draft child
        # mark it as deleted
        if not version_master.children.all():
            if not version_master.draft_child:
                version_master.parent.delete()
        else:
            # Reindex the "new" last published version in order to have
            # its "is_last_version" up to date.
            RecordIndexer().index_by_id(version_master.last_child.object_uuid)
Пример #56
0
def import_v1_data(verbose, download, token, download_directory,limit):
    click.secho("Importing data to the current instance")
    logger = logging.getLogger("sqlalchemy.engine")
    logger.setLevel(logging.ERROR)

    logfile = open(current_app.config.get('MIGRATION_LOGFILE'), 'a')
    logfile.write("\n\n\n~~~ Starting import task download={} limit={}"
                  .format(download, limit))
    if os.path.isdir(download_directory):
        os.chdir(download_directory)
    else:
        raise click.ClickException("%s does not exist or is not a directory. If you want to import "
                                   "records specify an empty, existing directory."
                                   % download_directory)
    if limit and not download:
        raise click.ClickException("Limit can only be set with download")

    if download:
        filelist = os.listdir('.')
        if len(filelist) > 0:
            click.secho("!!! Downloading data into existing directory, "
                        "overwriting previous data", fg='red')
        click.secho("----------")
        click.secho("Downloading data into directory %s" % download_directory)
        if limit is not None:
            limit = int(limit)
            click.secho("Limiting to %d records for debug purposes" % limit)
        download_v1_data(token, download_directory, logfile, limit)
    
    indexer = RecordIndexer(record_to_index=record_to_index)
    dirlist = os.listdir('.')

    click.secho("-----------")
    click.secho("Processing %d downloaded records" % (len(dirlist)))
    base_url = urlunsplit((
        current_app.config.get('PREFERRED_URL_SCHEME', 'http'),
        # current_app.config['SERVER_NAME'],
        current_app.config['JSONSCHEMAS_HOST'],
        current_app.config.get('APPLICATION_ROOT') or '', '', ''
    ))
    for d in dirlist:
        try:
            process_v1_record(d, indexer, base_url, logfile)
        except:
            logfile.write("\n********************")
            logfile.write("\nERROR: exception while processing record /{}/___record.json___\n"
                          .format(d))
            logfile.write(traceback.format_exc())
            logfile.write("\n********************")

    logfile.close()
Пример #57
0
def bulk_index_records(records):
    """Bulk index a list of records."""
    indexer = RecordIndexer()

    click.echo("Bulk indexing {} records...".format(len(records)))
    indexer.bulk_index([str(r.id) for r in records])
    indexer.process_bulk_queue()
    click.echo("Indexing completed!")
    def prepare_data():
        """Prepare data."""
        loans = testdata["loans"]

        recs = []
        now = arrow.utcnow()

        def new_expiration_date(loan, date):
            loan["request_expire_date"] = date.date().isoformat()
            loan["state"] = "PENDING"
            loan.commit()
            recs.append(loan)

        # expired loans
        date = now - timedelta(days=1)
        new_expiration_date(loans[0], date)
        new_expiration_date(loans[1], date)
        date = now - timedelta(days=2)
        new_expiration_date(loans[2], date)
        expired_pids = [loans[0]["pid"], loans[1]["pid"], loans[2]["pid"]]

        # not expired loans
        not_expired_pids = []
        remaining_not_expired = loans[3:]
        n_days = 0  # today
        for loan in remaining_not_expired:
            date = now + timedelta(days=n_days)
            new_expiration_date(loan, date)
            not_expired_pids.append(loan["pid"])
            n_days += 1
        db.session.commit()

        indexer = RecordIndexer()
        for rec in recs:
            indexer.index(rec)

        current_search.flush_and_refresh(index="*")
        return expired_pids, not_expired_pids
Пример #59
0
def test_delete_action(app):
    """Test delete action."""
    with app.app_context():
        testid = str(uuid.uuid4())
        action = RecordIndexer()._delete_action(
            dict(id=testid, op='delete', index='idx', doc_type='doc'))
        assert action['_op_type'] == 'delete'
        assert action['_index'] == 'idx'
        assert action['_type'] == 'doc'
        assert action['_id'] == testid

        # Skip JSONSchema validation
        with patch('invenio_records.api.Record.validate'):
            record = Record.create({
                '$schema': {
                    '$ref': '/records/authorities/authority-v1.0.0.json'
                },
                'title': 'Test',
            })
            db.session.commit()
        action = RecordIndexer()._delete_action(
            dict(id=str(record.id), op='delete', index=None, doc_type=None))
        assert action['_op_type'] == 'delete'
        assert action['_index'] == 'records-authorities-authority-v1.0.0'
        assert action['_type'] == 'authority-v1.0.0' if lt_es7 else '_doc'
        assert action['_id'] == str(record.id)

        record.delete()
        db.session.commit()
        action = RecordIndexer()._delete_action(
            dict(id=str(record.id), op='delete', index=None, doc_type=None))
        assert action['_op_type'] == 'delete'
        # Deleted record doesn't have '$schema', so index and doc type cannot
        # be determined, resulting to the defaults from config
        assert action['_index'] == app.config['INDEXER_DEFAULT_INDEX']
        assert action['_type'] == \
            app.config['INDEXER_DEFAULT_DOC_TYPE'] if lt_es7 else '_doc'
        assert action['_id'] == str(record.id)