def records(): """Load test data fixture.""" import uuid from invenio_records.api import Record from invenio_pidstore.models import PersistentIdentifier, PIDStatus create_test_user() indexer = RecordIndexer() # Record 1 - Live record with db.session.begin_nested(): rec_uuid = uuid.uuid4() pid1 = PersistentIdentifier.create('recid', '1', object_type='rec', object_uuid=rec_uuid, status=PIDStatus.REGISTERED) Record.create( { 'title': 'Registered', 'description': 'This is an awesome description', 'control_number': '1', 'access_right': 'restricted', 'access_conditions': 'fuu', 'owners': [1, 2], 'recid': 1 }, id_=rec_uuid) indexer.index_by_id(pid1.object_uuid) db.session.commit() sleep(3)
def load_records(app, filename, schema, tries=5): """Try to index records.""" indexer = RecordIndexer() records = [] with app.app_context(): with mock.patch('invenio_records.api.Record.validate', return_value=None): data_filename = pkg_resources.resource_filename( 'invenio_records', filename) records_data = load(data_filename) with db.session.begin_nested(): for item in records_data: record_id = uuid.uuid4() item_dict = dict(marc21.do(item)) item_dict['$schema'] = schema recid_minter(record_id, item_dict) oaiid_minter(record_id, item_dict) record = Record.create(item_dict, id_=record_id) indexer.index(record) records.append(record.id) db.session.commit() # Wait for indexer to finish for i in range(tries): response = current_search_client.search() if response['hits']['total'] >= len(records): break current_search.flush_and_refresh('_all') return records
def test_indexer_bulk_index(app, queue): """Test delay indexing.""" with app.app_context(): with establish_connection() as c: indexer = RecordIndexer() id1 = uuid.uuid4() id2 = uuid.uuid4() indexer.bulk_index([id1, id2]) indexer.bulk_delete([id1, id2]) consumer = Consumer(connection=c, queue=indexer.mq_queue.name, exchange=indexer.mq_exchange.name, routing_key=indexer.mq_routing_key) messages = list(consumer.iterqueue()) [m.ack() for m in messages] assert len(messages) == 4 data0 = messages[0].decode() assert data0['id'] == str(id1) assert data0['op'] == 'index' data2 = messages[2].decode() assert data2['id'] == str(id1) assert data2['op'] == 'delete'
def load(source, verbose, cache, files, skip, max=None): """Load records attach files and index them.""" data = json.load(source) if isinstance(data, dict): data = [data] # to upload remote fulltext files upload_dir = os.path.join(current_app.instance_path, 'uploads') try: os.makedirs(upload_dir) except FileExistsError: pass # initialize file location if needed if not Location.get_default(): data_dir = os.path.join(current_app.instance_path, 'files') db.session.add( Location(name='default', uri='file://' + data_dir, default=True)) db.session.commit() # create records and index them click.secho('Creating records...', fg='green') rec_uuids = load_records_with_files(data, upload_dir, max, verbose, files, cache, skip) click.secho('Put %d records for indexing...' % len(rec_uuids), fg='green') RecordIndexer().bulk_index(rec_uuids) click.secho('Execute "run" command to process the queue!', fg='yellow')
def migrate_chunk(chunk, broken_output=None, dry_run=False): from invenio_indexer.api import RecordIndexer from ..pidstore.minters import inspire_recid_minter indexer = RecordIndexer() index_queue = [] for raw_record in chunk: record = marc_create_record(raw_record, keep_singletons=False) json_record = create_record(record) if '$schema' in json_record: json_record['$schema'] = url_for( 'invenio_jsonschemas.get_schema', schema_path="records/{0}".format(json_record['$schema']) ) rec_uuid = str(Record.create(json_record, id_=None).id) # Create persistent identifier. pid = inspire_recid_minter(rec_uuid, json_record) index_queue.append(pid.object_uuid) db.session.commit() # Request record indexing for i in index_queue: indexer.index_by_id(i) # Send task to migrate files. return rec_uuid
def test_index_action(app): """Test index action.""" with app.app_context(): record = Record.create({'title': 'Test'}) db.session.commit() def receiver(sender, json=None, record=None, arguments=None, **kwargs): json['extra'] = 'extra' arguments['pipeline'] = 'foobar' with before_record_index.connected_to(receiver): action = RecordIndexer()._index_action( dict( id=str(record.id), op='index', )) assert action['_op_type'] == 'index' assert action['_index'] == app.config['INDEXER_DEFAULT_INDEX'] assert action['_id'] == str(record.id) if lt_es7: assert action['_type'] == \ app.config['INDEXER_DEFAULT_DOC_TYPE'] assert action['_version'] == record.revision_id assert action['_version_type'] == 'external_gte' else: assert action['_type'] == '_doc' assert action['version'] == record.revision_id assert action['version_type'] == 'external_gte' assert action['pipeline'] == 'foobar' assert 'title' in action['_source'] assert 'extra' in action['_source']
def remove(community_id, record_id): """Remove a record from community.""" c = Community.get(community_id) assert c is not None c.remove_record(record_id) db.session.commit() RecordIndexer().index_by_id(record_id)
def add_oai_information(obj, eng): """Adds OAI information like identifier""" recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_oai' not in existing_record: try: oaiid_minter(pid.object_uuid, existing_record) except PIDAlreadyExists: existing_record['_oai'] = { 'id': 'oai:beta.scoap3.org:%s' % recid, 'sets': _get_oai_sets(existing_record) } if 'id' not in existing_record['_oai']: current_app.logger.info('adding new oai id') oaiid_minter(pid.object_uuid, existing_record) if 'sets' not in existing_record[ '_oai'] or not existing_record['_oai']['sets']: existing_record['_oai']['sets'] = _get_oai_sets(existing_record) existing_record['_oai']['updated'] = datetime.utcnow().strftime( '%Y-%m-%dT%H:%M:%SZ') existing_record.commit() obj.save() db.session.commit() indexer = RecordIndexer() indexer.index_by_id(pid.object_uuid)
def _create_records(path, verbose): """Create demo records.""" indexer = RecordIndexer( record_to_index=lambda record: ('records', 'record')) if verbose > 0: click.secho('Creating records', fg='yellow', bold=True) with db.session.begin_nested(): records_dir = os.path.join(path, 'records') nb_records = 0 for root, dirs, files in os.walk(records_dir): for filename in files: split_filename = os.path.splitext(filename) if split_filename[1] == '.json': rec_uuid = UUID(split_filename[0]) path = os.path.join(records_dir, root, filename) record, deposit = _create_record_from_filepath( path, rec_uuid, indexer, nb_records, verbose) if verbose > 1: click.secho('CREATED RECORD {0}:\n {1}'.format( str(rec_uuid), json.dumps(record, indent=4))) click.secho('CREATED DEPOSIT {0}:\n {1}'.format( str(rec_uuid), json.dumps(deposit, indent=4))) nb_records += 1 if verbose > 0: click.secho('Created {} records!'.format(nb_records), fg='green')
def keywords_harvesting(self, max_retries=5, countdown=5): """Harvest all keywords.""" try: # load from remote API the up-to-date list of keywords keywords_api = _get_keywords_from_api( url=current_app.config['CDS_KEYWORDS_HARVESTER_URL']) # load the list of keywords in the database keywords_db = query_to_objects( query=KeywordSearch().params(version=True), cls=Keyword) # index lists indexer = RecordIndexer() _update_existing_keywords( indexer=indexer, keywords_api=keywords_api, keywords_db=keywords_db) _delete_not_existing_keywords( indexer=indexer, keywords_api=keywords_api, keywords_db=keywords_db) db.session.commit() except RequestException as exc: raise self.retry(max_retries=max_retries, countdown=countdown, exc=exc)
def store_record(obj, eng): """Stores record in database""" set_springer_source_if_needed(obj) obj.data['record_creation_year'] = parse_date( obj.data['record_creation_date']).year try: record = Record.create(obj.data, id_=None) # Create persistent identifier. pid = scoap3_recid_minter(str(record.id), record) obj.save() record.commit() # Commit to DB before indexing db.session.commit() obj.data['control_number'] = record['control_number'] obj.save() # Index record indexer = RecordIndexer() indexer.index_by_id(pid.object_uuid) except ValidationError as err: __halt_and_notify("Validation error: %s. Skipping..." % (err, ), obj, eng) except PIDAlreadyExists: __halt_and_notify("Record with this id already in DB", obj, eng)
def glossary_terms(): """Load demo terms records.""" from invenio_db import db from invenio_records import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.termid import \ cernopendata_termid_minter indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/glossary-term-v1.0.0.json') data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data') glossary_terms_json = glob.glob(os.path.join(data, 'terms', '*.json')) for filename in glossary_terms_json: with open(filename, 'rb') as source: for data in json.load(source): if "collections" not in data and \ not isinstance(data.get("collections", None), basestring): data["collections"] = [] data["collections"].append({"primary": "Terms"}) id = uuid.uuid4() cernopendata_termid_minter(id, data) record = Record.create(data, id_=id) record['$schema'] = schema db.session.commit() indexer.index(record) db.session.expunge_all()
def update_item_sort_custom_es(cls, index_path, sort_json=[]): """Set custom sort. :param index_path selected index path :param sort_json custom setted item sort """ try: upd_item_sort_q = {"query": {"match": {"path.tree": "@index"}}} es_index = current_app.config['SEARCH_UI_SEARCH_INDEX'] es_doc_type = current_app.config['INDEXER_DEFAULT_DOCTYPE'] query_q = json.dumps(upd_item_sort_q).replace("@index", index_path) query_q = json.loads(query_q) indexer = RecordIndexer() res = indexer.client.search(index=es_index, body=query_q) for d in sort_json: for h in res.get("hits").get("hits"): if int(h.get('_source').get('control_number')) == int( d.get("id")): body = { 'doc': { 'custom_sort': d.get('custom_sort'), } } indexer.client.update(index=es_index, doc_type=es_doc_type, id=h.get("_id"), body=body) break except Exception as ex: current_app.logger.debug(ex) return
def test_cli_full_reindex(app, db, es, capsys, es_acl_prepare, test_users): pid, record = create_record( { '$schema': RECORD_SCHEMA, 'keywords': ['blah'] }, clz=SchemaEnforcingRecord) RecordIndexer().index(record) current_search_client.indices.flush() with db.session.begin_nested(): acl = ElasticsearchACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1, record_selector={'term': { 'keywords': 'blah' }}) db.session.add(acl) u = UserActor(name='test', acl=acl, originator=test_users.u1, users=[test_users.u1]) db.session.add(u) # now the record is not indexed and ACL is not in the helper index, check it ... retrieved = RecordsSearch( index=schema_to_index(RECORD_SCHEMA)[0]).get_record( record.id).execute().hits[0].to_dict() assert '_invenio_explicit_acls' not in retrieved # just a precaution test assert current_explicit_acls.enabled_schemas == {RECORD_SCHEMA} # and run the reindex - should reindex one record from invenio_explicit_acls.cli import full_reindex_impl full_reindex_impl(verbose=True, records=True, in_bulk=False) captured = capsys.readouterr() assert captured.out.strip() == """ Reindexing ACLs Updating ACL representation for "test" (%s) on schemas ['records/record-v1.0.0.json'] Getting records for schema records/record-v1.0.0.json ... collected 1 records Adding 1 records to indexing queue""".strip() % (acl.id) current_search_client.indices.flush() retrieved = RecordsSearch( index=schema_to_index(RECORD_SCHEMA)[0]).get_record( record.id).execute().hits[0].to_dict() assert clear_timestamp(retrieved['_invenio_explicit_acls']) == [{ 'id': str(acl.id), 'operation': 'get', 'timestamp': 'cleared', 'user': [1] }]
def publish(self): """Publish GitHub release as record.""" id_ = uuid.uuid4() deposit = None try: db.session.begin_nested() deposit = self.deposit_class.create(self.metadata, id_=id_) deposit['_deposit']['created_by'] = self.event.user_id deposit['_deposit']['owners'] = [self.event.user_id] # Fetch the deposit files for key, url in self.files: # Make a HEAD request to get GitHub to compute the # Content-Length. res = self.gh.api.session.head(url, allow_redirects=True) # Now, download the file res = self.gh.api.session.get(url, stream=True) if res.status_code != 200: raise Exception( "Could not retrieve archive from GitHub: {url}".format( url=url)) size = int(res.headers.get('Content-Length', 0)) ObjectVersion.create( bucket=deposit.files.bucket, key=key, stream=res.raw, size=size or None, mimetype=res.headers.get('Content-Type'), ) # GitHub-specific SIP store agent sip_agent = { '$schema': current_jsonschemas.path_to_url( current_app.config['SIPSTORE_GITHUB_AGENT_JSONSCHEMA']), 'user_id': self.event.user_id, 'github_id': self.release['author']['id'], 'email': self.gh.account.user.email, } deposit.publish(user_id=self.event.user_id, sip_agent=sip_agent) self.model.recordmetadata = deposit.model db.session.commit() # Send Datacite DOI registration task recid_pid, record = deposit.fetch_published() datacite_register.delay(recid_pid.pid_value, str(record.id)) except Exception: db.session.rollback() # Remove deposit from index since it was not commited. if deposit and deposit.id: try: RecordIndexer().delete(deposit) except Exception: current_app.logger.exception( "Failed to remove uncommited deposit from index.") raise
def prepare_data(): """Prepare data.""" days = current_app.config[ "ILS_CIRCULATION_MAIL_OVERDUE_REMINDER_INTERVAL"] loans = testdata["loans"] recs = [] now = arrow.utcnow() def new_end_date(loan, date): loan["end_date"] = date.date().isoformat() loan["state"] = "ITEM_ON_LOAN" loan.commit() recs.append(loan) # overdue loans date = now - timedelta(days=days) new_end_date(loans[0], date) date = now - timedelta(days=days * 2) new_end_date(loans[1], date) # not overdue or overdue but not to be notified remaining_not_overdue = loans[2:] for loan in remaining_not_overdue: days = random.choice([-1, 0, 1]) date = now - timedelta(days=days) new_end_date(loan, date) db.session.commit() indexer = RecordIndexer() for rec in recs: indexer.index(rec) current_search.flush_and_refresh(index="*")
def oaiset_update_records(minimal_record, db, es): """Fixture with records for query-based OAISet updating tests.""" rec_ok = { 'title': 'extra', '_oai': { 'id': '12345', 'sets': ['extra', 'user-foobar'], 'updated': datetime(1970, 1, 1).isoformat(), } } # Record which needs removal of 'extra' from oai sets rec_remove = deepcopy(rec_ok) rec_remove['title'] = 'other' # Record which needs addition of 'extra' to oai sets rec_add = deepcopy(rec_ok) rec_add['_oai']['sets'] = [ 'user-foobar', ] records = [ rec_ok, rec_remove, rec_add, ] rec_uuids = [] for record_meta in records: rec = RecordMetadata() rec.json = deepcopy(record_meta) db.session.add(rec) db.session.commit() RecordIndexer().index_by_id(rec.id) rec_uuids.append(rec.id) current_search.flush_and_refresh('records') return rec_uuids
def continuous_migration(): """Task to continuously migrate what is pushed up by Legacy.""" indexer = RecordIndexer() redis_url = current_app.config.get('CACHE_REDIS_URL') r = StrictRedis.from_url(redis_url) try: while r.llen('legacy_records'): raw_record = r.lpop('legacy_records') if raw_record: # FIXME use migrate_and_insert_record(raw_record) # The record might be None, in case a parallel # continuous_migration task has already consumed the queue. raw_record = zlib.decompress(raw_record) record = marc_create_record(raw_record, keep_singletons=False) recid = int(record['001'][0]) prod_record = InspireProdRecords(recid=recid) prod_record.marcxml = raw_record json_record = create_record(record) with db.session.begin_nested(): try: record = record_upsert(json_record) except ValidationError as e: # Invalid record, will not get indexed errors = "ValidationError: Record {0}: {1}".format( recid, e ) prod_record.valid = False prod_record.errors = errors db.session.merge(prod_record) continue indexer.index_by_id(record.id) finally: db.session.commit() db.session.close()
def create_fake_record(bulk_size, fake): """Create records for demo purposes.""" records_bulk = [] start = timeit.default_timer() for _ in range(bulk_size): # Create fake record metadata record_data = { "contributors": [{ "name": fake.name() }], "description": fake.bs(), "title": fake.company() + "'s dataset", } # Create record in DB rec_uuid = uuid.uuid4() current_pidstore.minters["recid"](rec_uuid, record_data) Record.create(record_data, id_=rec_uuid) # Add record for bulk indexing records_bulk.append(rec_uuid) # Flush to index and database db.session.commit() click.secho(f"Writing {bulk_size} records to the database", fg="green") # Bulk index records ri = RecordIndexer() ri.bulk_index(records_bulk) current_search.flush_and_refresh(index="records") click.secho(f"Sending {bulk_size} records to be indexed", fg="green") stop = timeit.default_timer() click.secho(f"Creating {bulk_size} records took {stop - start}.", fg="green")
def get_record_sets(record): """Find matching sets.""" # get lists of sets with search_pattern equals to None but already in the # set list inside the record record_sets = set(record.get('_oai', {}).get('sets', [])) for spec in _build_cache(): if spec in record_sets: yield spec # get list of sets that match using percolator index, doc_type = RecordIndexer().record_to_index(record) document = record.dumps() percolator_doc_type = _get_percolator_doc_type(index) _create_percolator_mapping(index, percolator_doc_type) results = _percolate_query(index, doc_type, percolator_doc_type, document) prefix = 'oaiset-' prefix_len = len(prefix) for match in results: set_name = match['_id'] if set_name.startswith(prefix): name = set_name[prefix_len:] yield name raise StopIteration
def delete_record(self, fileinstance_id, record_uuid): """Delete a record. :param fileinstance_id: The file instance id. :param record_uuid: The record's uuid. """ # get the FileInstance object file_instance = FileInstance.get(fileinstance_id) # get the uri of the file for the directory of the folder uri = file_instance.uri # building the path to delete by storing the index of the folder data i = uri.find('data') # removing the record indexing, the record and the file instance recind = RecordIndexer() recind.delete_by_id(record_uuid=record_uuid) self.delete_bucket() FileInstance.query.filter_by(id=fileinstance_id).delete() PersistentIdentifier.query.filter_by(object_uuid=record_uuid).delete() db.session.commit() # removing the file on disk and the folder containing it # the full path is /home/<user>/.local/share/virtualenvs/ # fare-platform-<code>/var/instance/data/<f1>/<f2>/<bucketid>/<filename> # after have stored the index of the folder "data", where there are all # the records, the path is passed to the function below # and trimmed at <f1>, a folder name composed by 2 character, # at the index "i" is added 8 because is the number of # character for completing the path, terminating at "<f1>/" shutil.rmtree(uri[:i + 8]) current_app.logger.info("Deleted file= " + self['title'] + ", by user= " + current_user.email)
def test_get_record_no_acls_anonymous(app, db, es, es_acl_prepare, test_users): with db.session.begin_nested(): # create an empty ACL in order to get the _invenio_explicit_acls filled acl = DefaultACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1) db.session.add(acl) actor = UserActor(name='test', acl=acl, users=[], originator=test_users.u1) db.session.add(actor) pid, record = create_record({}, clz=SchemaEnforcingRecord) RecordIndexer().index(record) # make sure it is flushed current_search_client.indices.refresh() current_search_client.indices.flush() # try to get it ... with app.test_client() as client: res = client.get(record_url(pid)) assert res.status_code == 401 # unauthorized # get it directly from ES res = get_from_es(pid)['_source'] assert res['control_number'] == pid.pid_value assert res['$schema'] == 'https://localhost/schemas/' + RECORD_SCHEMA assert '_invenio_explicit_acls' in res
def run(self, event): """Process the circulation event. This method builds the frame, fetching the item and calling *_run* in a nested transaction. """ resolver = Resolver(pid_type='crcitm', object_type='rec', getter=Item.get_record) _, item = resolver.resolve(event.payload['item_id']) self.circulation_event_schema.context['item'] = item data, errors = self.circulation_event_schema.load(event.payload) if errors: event.response_code = 400 event.response = {'message': errors} return if data.get('dry_run'): event.response_code = 204 return with db.session.begin_nested(): data, _ = self.circulation_event_schema.dump(data) self._run(item, data) item.commit() RecordIndexer().index(item)
def test_citation_formatter_citeproc_get(api, api_client, es, db, full_record, users): """Test records REST citeproc get.""" r = Record.create(full_record) pid = PersistentIdentifier.create('recid', '12345', object_type='rec', object_uuid=r.id, status=PIDStatus.REGISTERED) db.session.commit() db.session.refresh(pid) RecordIndexer().index_by_id(r.id) current_search.flush_and_refresh(index='records') login_user_via_session(api_client, email=users[2]['email']) with api.test_request_context(): records_url = url_for('invenio_records_rest.recid_item', pid_value=pid.pid_value) res = api_client.get(records_url, query_string={'style': 'apa'}, headers={'Accept': 'text/x-bibliography'}) assert res.status_code == 200 assert 'Doe, J.' in res.get_data(as_text=True) assert 'Test title (Version 1.2.5).' in res.get_data(as_text=True) assert '(2014).' in res.get_data(as_text=True)
def prepare_data(): """Prepare data.""" days = current_app.config["ILS_CIRCULATION_LOAN_WILL_EXPIRE_DAYS"] loans = testdata["loans"] recs = [] now = arrow.utcnow() def new_end_date(loan, date): loan["end_date"] = date.date().isoformat() loan["state"] = "ITEM_ON_LOAN" loan.commit() recs.append(loan) # expiring loans date = now + timedelta(days=days) new_end_date(loans[0], date) new_end_date(loans[1], date) new_end_date(loans[2], date) # not expiring remaining_not_overdue = loans[3:] for loan in remaining_not_overdue: days = random.choice([-2, -1, 0, 1, 2]) date = now + timedelta(days=days) new_end_date(loan, date) db.session.commit() indexer = RecordIndexer() for rec in recs: indexer.index(rec) current_search.flush_and_refresh(index="*")
def cleanup_indexed_deposits(): """Delete indexed deposits that do not exist in the database. .. note:: This task exists because of deposit REST API calls sometimes failing after the deposit has already been sent for indexing to ES, leaving an inconsistent state of a deposit existing in ES and not in the database. It should be removed once a proper signal mechanism has been implemented in the ``invenio-records-rest`` and ``invenio-deposit`` modules. """ search = RecordsSearch(index='deposits') q = (search.query('term', **{ '_deposit.status': 'draft' }).fields(['_deposit.id'])) res = q.scan() es_depids_info = [(d.to_dict().get('_deposit.id', [None])[0], d.meta.id) for d in res] es_depids = {p for p, _ in es_depids_info} db_depids_query = PersistentIdentifier.query.filter( PersistentIdentifier.pid_type == 'depid', PersistentIdentifier.pid_value.in_(es_depids)) db_depids = {d.pid_value for d in db_depids_query} missing_db_depids = filter(lambda d: d[0] not in db_depids, es_depids_info) indexer = RecordIndexer() deposit_index = 'deposits-records-record-v1.0.0' deposit_doc_type = 'deposit-record-v1.0.0' for _, deposit_id in missing_db_depids: indexer.client.delete(id=str(deposit_id), index=deposit_index, doc_type=deposit_doc_type)
def records(): """Load records.""" import pkg_resources import uuid from dojson.contrib.marc21 import marc21 from dojson.contrib.marc21.utils import create_record, split_blob from invenio_pidstore import current_pidstore from invenio_records.api import Record # pkg resources the demodata data_path = pkg_resources.resource_filename( 'invenio_records', 'data/marc21/bibliographic.xml') with open(data_path) as source: indexer = RecordIndexer() with db.session.begin_nested(): for index, data in enumerate(split_blob(source.read()), start=1): # create uuid rec_uuid = uuid.uuid4() # do translate record = marc21.do(create_record(data)) # create PID current_pidstore.minters['recid'](rec_uuid, record) # create record indexer.index(Record.create(record, id_=rec_uuid)) db.session.commit()
def test_basic_search(app, db, es): """Test basic search functionality.""" # The index should be empty assert len(ItemSearch().execute()) == 0 # Create item1, search for everything item1 = Item.create({}) item1.commit() record_indexer = RecordIndexer() record_indexer.index(item1) current_search.flush_and_refresh('_all') assert len(ItemSearch().execute()) == 1 # Create item2, search for everything again item2 = Item.create({'foo': 'bar'}) item2.commit() record_indexer.index(item2) current_search.flush_and_refresh('_all') assert len(ItemSearch().execute()) == 2 # Search for item2 assert len(ItemSearch().query('match', foo='bar').execute()) == 1 # Search for nonsense assert len(ItemSearch().query('match', foo='banana').execute()) == 0
def demo_init(): """Initialize demo site.""" from flask import current_app records = [] # Import bibliographic records click.secho('Importing bibliographic records', fg='green') records += import_records( marc21, current_app.extensions['invenio-jsonschemas'].path_to_url( 'marc21/bibliographic/bd-v1.0.2.json'), pkg_resources.resource_filename('invenio_records', 'data/marc21/bibliographic.xml'), ) # FIXME add support for authority records. # Import authority records # click.secho('Importing authority records', fg='green') # records += import_records( # marc21_authority, # current_app.extensions['invenio-jsonschemas'].path_to_url( # 'marc21/authority/ad-v1.0.2.json'), # pkg_resources.resource_filename( # 'invenio_records', 'data/marc21/authority.xml'), # ) db.session.commit() # Index all records click.secho('Indexing records', fg='green') indexer = RecordIndexer() indexer.bulk_index(records) indexer.process_bulk_queue()
def test_reindex(app, script_info): """Test reindex.""" # load records with app.test_request_context(): runner = CliRunner() rec_uuid = uuid.uuid4() data = {'title': 'Test0'} record = Record.create(data, id_=rec_uuid) db.session.commit() # Initialize queue res = runner.invoke(cli.queue, ['init', 'purge'], obj=script_info) assert 0 == res.exit_code res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info) assert 0 == res.exit_code res = runner.invoke(cli.run, [], obj=script_info) assert 0 == res.exit_code sleep(5) indexer = RecordIndexer() index, doc_type = indexer.record_to_index(record) res = current_search_client.get(index=index, doc_type=doc_type, id=rec_uuid) assert res['found'] # Destroy queue res = runner.invoke(cli.queue, ['delete'], obj=script_info) assert 0 == res.exit_code