def es(app): """Elasticsearch fixture.""" # remove all indices and data to get to a well-defined state current_search_client.indices.refresh() current_search_client.indices.flush() for idx in current_search_client.indices.get('*'): try: print("Removing index", idx) current_search_client.indices.delete(idx) except: traceback.print_exc() pass current_search_client.indices.refresh() current_search_client.indices.flush() # just to make sure no index is left untouched for idx in current_search_client.indices.get('*'): try: print("Warning: leftover index", idx) current_search_client.indices.delete(idx) except: traceback.print_exc() pass try: list(current_search.create()) except RequestError: list(current_search.delete(ignore=[404])) list(current_search.create(ignore=[400])) current_search_client.indices.refresh() yield current_search_client list(current_search.delete(ignore=[404]))
def es(app): """Provide elasticsearch access.""" try: list(current_search.create()) except RequestError: list(current_search.delete(ignore=[404])) list(current_search.create()) yield current_search list(current_search.delete(ignore=[404]))
def es(app): """Provide elasticsearch access.""" try: list(current_search.create()) except (IndexAlreadyExistsError, RequestError): list(current_search.delete(ignore=[404])) list(current_search.create(ignore=[400])) yield current_search list(current_search.delete(ignore=[404]))
def es(app): """Provide elasticsearch access.""" try: list(current_search.create()) except RequestError: list(current_search.delete(ignore=[404])) list(current_search.create()) yield current_search list(current_search.delete(ignore=[404]))
def es(app): """Elasticsearch fixture.""" try: list(current_search.create()) except RequestError: list(current_search.delete(ignore=[404])) list(current_search.create(ignore=[400])) current_search_client.indices.refresh() yield current_search_client list(current_search.delete(ignore=[404]))
def es(app): """Elasticsearch fixture.""" try: list(current_search.create()) except (RequestError, IndexAlreadyExistsError): list(current_search.delete(ignore=[404])) list(current_search.create(ignore=[400])) current_search_client.indices.refresh() yield current_search_client list(current_search.delete(ignore=[404]))
def es(app): """Provide elasticsearch access.""" try: list(current_search.create()) except RequestError: list(current_search.delete(ignore=[400, 404])) list(current_search.create()) current_search_client.indices.refresh() yield current_search_client list(current_search.delete(ignore=[404]))
def es(app): """Provide elasticsearch access.""" try: list(current_search.create()) except RequestError: list(current_search.delete(ignore=[400, 404])) list(current_search.create()) current_search_client.indices.refresh() yield current_search_client list(current_search.delete(ignore=[404]))
def es(app_deposit): """Elasticsearch fixture.""" try: list(current_search.create()) except RequestError: list(current_search.delete(ignore=[404])) list(current_search.create(ignore=[400])) current_search_client.indices.refresh() yield current_search_client list(current_search.delete(ignore=[404]))
def prefixed_es(app): """Elasticsearch fixture.""" app.config['SEARCH_INDEX_PREFIX'] = 'test-' try: list(current_search.create()) except (RequestError, IndexAlreadyExistsError): list(current_search.delete(ignore=[404])) list(current_search.create(ignore=[400])) current_search_client.indices.refresh() yield current_search_client list(current_search.delete(ignore=[404])) app.config['SEARCH_INDEX_PREFIX'] = ''
def es(app): """Provide elasticsearch access.""" app.config.update(dict(SEARCH_AUTOINDEX=[], )) InvenioSearch(app) with app.app_context(): try: list(current_search.create()) except RequestError: list(current_search.delete(ignore=[404])) list(current_search.create()) yield current_search list(current_search.delete(ignore=[404]))
def es(app): """Provide elasticsearch access.""" app.config.update(dict( SEARCH_AUTOINDEX=[], )) InvenioSearch(app) with app.app_context(): try: list(current_search.create()) except RequestError: list(current_search.delete(ignore=[404])) list(current_search.create()) yield current_search list(current_search.delete(ignore=[404]))
def es(app): """Provide elasticsearch access.""" InvenioSearch(app) try: list(current_search.create()) except RequestError: list(current_search.delete(ignore=[404])) list(current_search.create(ignore=[400])) current_search_client.indices.refresh() queue = app.config['INDEXER_MQ_QUEUE'] with establish_connection() as c: q = queue(c) q.declare() yield current_search_client list(current_search.delete(ignore=[404]))
def clean_app(request, base_app): """Application with database and elasticsearch cleaned.""" with base_app.app_context(): try: db.session.remove() drop_database(db.engine.url) except ProgrammingError: pass create_database(db.engine.url) # reset elasticsearch for deleted in current_search.delete(ignore=[404]): pass # reset queues current_queues.delete() current_queues.declare() yield base_app def finalize(): with base_app.app_context(): db.session.remove() drop_database(db.engine.url) # Dispose the engine in order to close all connections. This is # needed for sqlite in memory databases. db.engine.dispose() current_queues.delete() request.addfinalizer(finalize) return base_app
def remap_indexes(ctx, yes_i_know, indexes, ignore_checks): if not yes_i_know: click.confirm( "This operation will irreversibly remove data from selected indexes in ES, do you want to continue?", abort=True, ) if not indexes: click.echo("You should specify indexes which you want to remap") click.echo( f"Available indexes are: {', '.join(current_search.mappings.keys())}" ) ctx.exit(1) wrong_indexes = list(set(indexes) - set(current_search.mappings.keys())) if not ignore_checks and len(wrong_indexes) > 0: click.echo(f"Indexes {', '.join(wrong_indexes)} not recognized.") click.echo( f"Available indexes are: {', '.join(current_search.mappings.keys())}" ) ctx.exit(1) click.echo(f"Deleting indexes: {', '.join(indexes)}") deleted_indexes = list(current_search.delete(index_list=indexes)) if not ignore_checks and len(deleted_indexes) != len(indexes): click.echo( f"Number of deleted indexes ({len(deleted_indexes)} is different than requested ones ({len(indexes)}", err=True, ) click.echo("deleted indexes %s" % [i[0] for i in deleted_indexes]) ctx.exit(1) created_indexes = list( current_search.create(ignore_existing=True, index_list=indexes)) click.echo("remapped indexes %s" % [i[0] for i in created_indexes])
def clean_app(request, base_app): """Application with database and elasticsearch cleaned.""" with base_app.app_context(): try: db.session.remove() drop_database(db.engine.url) except ProgrammingError: pass create_database(db.engine.url) # reset elasticsearch for deleted in current_search.delete(ignore=[404]): pass # reset queues current_queues.delete() current_queues.declare() yield base_app def finalize(): with base_app.app_context(): db.session.remove() drop_database(db.engine.url) # Dispose the engine in order to close all connections. This is # needed for sqlite in memory databases. db.engine.dispose() current_queues.delete() request.addfinalizer(finalize) return base_app
def es(appctx): """Setup and teardown all registered Elasticsearch indices. Scope: module This fixture will create all registered indexes in Elasticsearch and remove once done. Fixtures that perform changes (e.g. index or remove documents), should used the function-scoped :py:data:`es_clear` fixture to leave the indexes clean for the following tests. """ from invenio_search import current_search, current_search_client from invenio_search.errors import IndexAlreadyExistsError try: list(current_search.put_templates()) except IndexAlreadyExistsError: current_search_client.indices.delete_template('*') list(current_search.put_templates()) try: list(current_search.create()) except IndexAlreadyExistsError: list(current_search.delete(ignore=[404])) list(current_search.create()) current_search_client.indices.refresh() try: yield current_search_client finally: current_search_client.indices.delete(index='*') current_search_client.indices.delete_template('*')
def simple_record(app): yield { 'data': { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'document_type': ['article'], 'titles': [{'title': 'Superconductivity'}], 'acquisition_source': {'source': 'arXiv'}, 'dois': [{'value': '10.3847/2041-8213/aa9110'}], }, 'extra_data': { 'source_data': { 'data': { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'document_type': ['article'], 'titles': [{'title': 'Superconductivity'}], 'acquisition_source': {'source': 'arXiv'}, 'dois': [{'value': '10.3847/2041-8213/aa9110'}], }, 'extra_data': {}, }, }, } list(current_search.delete(index_list='holdingpen-hep')) list(current_search.create(ignore=[400], ignore_existing=True))
def app(): """Flask application. Creates a Flask application with a simple testing configuration, then creates an application context and yields, so that all tests have access to one. See: http://flask.pocoo.org/docs/0.12/appcontext/. """ app = create_app( DEBUG=False, # Tests may fail when turned on because of Flask bug (A setup function was called after the first request # was handled. when initializing - when Alembic initialization) WTF_CSRF_ENABLED=False, CELERY_TASK_ALWAYS_EAGER=True, CELERY_RESULT_BACKEND='cache', CELERY_CACHE_BACKEND='memory', CELERY_TASK_EAGER_PROPAGATES=True, TESTING=True, PRODUCTION_MODE=True, SQLALCHEMY_DATABASE_URI= "postgresql+psycopg2://scoap3:dbpass123@localhost:5432/scoap3") with app.app_context(): db.session.close() db.drop_all() db.create_all() list(current_search.delete(ignore=[404])) list(current_search.create(ignore=[400])) _create_files_location() current_search.flush_and_refresh('*') yield app
def es(app): """Elasticsearch fixture.""" # remove all indices and data to get to a well-defined state for idx in current_search_client.indices.get('*'): try: current_search_client.indices.delete(idx) except: pass try: list(current_search.create()) except RequestError: list(current_search.delete(ignore=[404])) list(current_search.create(ignore=[400])) current_search_client.indices.refresh() yield current_search_client list(current_search.delete(ignore=[404]))
def reindex_all_relationships(destroy: bool = False): """Reindex all relationship documents.""" if destroy: list(current_search.delete(ignore=[400, 404])) list(current_search.create(ignore=[400, 404])) q = GroupRelationship.query.yield_per(1000) for chunk in chunks(q, 1000): index_documents(map(build_doc, chunk), bulk=True)
def _es_create_indexes(): """Create all registered Elasticsearch indexes.""" try: list(current_search.create()) except RequestError: list(current_search.delete()) list(current_search.create()) current_search_client.indices.refresh()
def app(): """ Deprecated: do not use this fixtures for new tests, unless for very specific use cases. Use `isolated_app` instead. Flask application with demosite data and without any database isolation: any db transaction performed during the tests are persisted into the db. Creates a Flask application with a simple testing configuration, then creates an application context and inside of it recreates all databases and indices from the fixtures. Finally it yields, so that all tests that explicitly use the ``app`` fixtures have access to an application context. See: http://flask.pocoo.org/docs/0.12/appcontext/. """ app = create_app( DEBUG=False, # Tests may fail when turned on because of Flask bug (A setup function was called after the first request was handled. when initializing - when Alembic initialization) WTF_CSRF_ENABLED=False, CELERY_TASK_ALWAYS_EAGER=True, CELERY_RESULT_BACKEND='cache', CELERY_CACHE_BACKEND='memory', CELERY_TASK_EAGER_PROPAGATES=True, SECRET_KEY='secret!', RECORD_EDITOR_FILE_UPLOAD_FOLDER='tests/integration/editor/temp', TESTING=True, ) app.extensions['invenio-search'].register_mappings( 'records', 'inspirehep.modules.records.mappings') with app.app_context(), mock.patch( 'inspirehep.modules.records.receivers.index_modified_citations_from_record.delay' ): # Celery task imports must be local, otherwise their # configuration would use the default pickle serializer. from inspirehep.modules.migrator.tasks import migrate_from_file db.session.close() db.drop_all() drop_alembic_version_table() alembic = Alembic(app=current_app) alembic.upgrade() list(current_search.delete(ignore=[404])) list(current_search.create(ignore=[400])) init_all_storage_paths() init_users_and_permissions() init_authentication_token() migrate_from_file('./inspirehep/demosite/data/demo-records.xml.gz', wait_for_results=True) current_search.flush_and_refresh( 'records-hep') # Makes sure that all HEP records were migrated. yield app
def clean_records(): """Truncate all the records from various tables.""" from sqlalchemy.engine import reflection from invenio_search import current_search click.secho('>>> Truncating all records.') tables_to_truncate = [ "records_metadata", "records_metadata_version", "inspire_prod_records", "inspire_orcid_records", "pidstore_pid", ] db.session.begin(subtransactions=True) try: # Grab any table with foreign keys to records_metadata for truncating inspector = reflection.Inspector.from_engine(db.engine) for table_name in inspector.get_table_names(): for foreign_key in inspector.get_foreign_keys(table_name): if foreign_key["referred_table"] == "records_metadata": tables_to_truncate.append(table_name) if not click.confirm("Going to truncate:\n{0}".format( "\n".join(tables_to_truncate))): return click.secho('Truncating tables...', fg='red', bold=True, err=True) with click.progressbar(tables_to_truncate) as tables: for table in tables: db.engine.execute( "TRUNCATE TABLE {0} RESTART IDENTITY CASCADE".format( table)) click.secho("\tTruncated {0}".format(table)) db.session.commit() current_search.aliases = { k: v for k, v in current_search.aliases.iteritems() if k.startswith('records') } click.secho('Destroying indexes...', fg='red', bold=True, err=True) with click.progressbar( current_search.delete(ignore=[400, 404])) as bar: for name, response in bar: click.secho(name) click.secho('Creating indexes...', fg='green', bold=True, err=True) with click.progressbar(current_search.create(ignore=[400])) as bar: for name, response in bar: click.secho(name) except Exception as err: # noqa db.session.rollback() current_app.logger.exception(err)
def es(app): """Provide elasticsearch access.""" list(current_search.delete(ignore=[400, 404])) current_search_client.indices.delete(index='*') list(current_search.create()) current_search_client.indices.refresh() try: yield current_search_client finally: current_search_client.indices.delete(index='*')
def es(base_app): """Provide elasticsearch access.""" list(current_search.delete(ignore=[400, 404])) current_search_client.indices.delete(index='*') list(current_search.create()) current_search_client.indices.refresh() try: yield current_search_client finally: current_search_client.indices.delete(index='*')
def elasticsearch_index_destroy(alembic, verbose): """Destroy the elasticsearch indices and indexing queue.""" for _ in current_search.delete(ignore=[400, 404]): pass queue = current_app.config['INDEXER_MQ_QUEUE'] with establish_connection() as c: q = queue(c) try: q.delete() except amqp.exceptions.NotFound: pass
def app(request, tmpdir): """Flask application fixture.""" from b2share.factory import create_api instance_path = tmpdir.mkdir('instance_dir').strpath os.environ.update( B2SHARE_INSTANCE_PATH=os.environ.get( 'INSTANCE_PATH', instance_path), ) app = create_api( TESTING=True, SERVER_NAME='localhost:5000', JSONSCHEMAS_HOST='localhost:5000', DEBUG_TB_ENABLED=False, SQLALCHEMY_DATABASE_URI=os.environ.get( 'SQLALCHEMY_DATABASE_URI', 'sqlite://'), LOGIN_DISABLED=False, WTF_CSRF_ENABLED=False, SECRET_KEY="CHANGE_ME", SECURITY_PASSWORD_SALT='CHANGEME', CELERY_ALWAYS_EAGER=True, CELERY_RESULT_BACKEND="cache", CELERY_CACHE_BACKEND="memory", CELERY_EAGER_PROPAGATES_EXCEPTIONS=True, SUPPORT_EMAIL='*****@*****.**', ) # update the application with the configuration provided by the test if hasattr(request, 'param') and 'config' in request.param: app.config.update(**request.param['config']) with app.app_context(): if app.config['SQLALCHEMY_DATABASE_URI'] != 'sqlite://': try: drop_database(db.engine.url) except ProgrammingError: pass create_database(db.engine.url) db.create_all() for deleted in current_search.delete(ignore=[404]): pass for created in current_search.create(None): pass def finalize(): with app.app_context(): db.drop_all() if app.config['SQLALCHEMY_DATABASE_URI'] != 'sqlite://': drop_database(db.engine.url) request.addfinalizer(finalize) return app
def indexed_records(records): """Fixture for the records, which are already indexed.""" current_search_client.indices.flush('*') # delete all elasticsearch indices and recreate them for deleted in current_search.delete(ignore=[404]): pass for created in current_search.create(None): pass # flush the indices so that indexed records are searchable for pid_name, record in records.items(): RecordIndexer().index(record) current_search_client.indices.flush('*') return records
def clear_environment(app): with app.app_context(): db.session.close() db.drop_all() drop_alembic_version_table() alembic = Alembic(app=app) alembic.upgrade() list(current_search.delete(ignore=[404])) list(current_search.create(ignore=[400])) current_search.flush_and_refresh('records-hep') init_all_storage_paths() init_users_and_permissions()
def elasticsearch_index_destroy(alembic, verbose): """Destroy the elasticsearch indices and indexing queue.""" # Delete "records" index as it might have been created during the upgrade. # This happens when the indices have not been initialized yet and are # indexed. Normally "records" should be an alias, not an index. current_search_client.indices.delete(index='records', ignore=[404]) for _ in current_search.delete(ignore=[400, 404]): pass queue = current_app.config['INDEXER_MQ_QUEUE'] with establish_connection() as c: q = queue(c) try: q.delete() except amqp.exceptions.NotFound: pass
def elasticsearch_index_destroy(alembic, verbose): """Destroy the elasticsearch indices and indexing queue.""" # Delete "records" index as it might have been created during the upgrade. # This happens when the indices have not been initialized yet and are # indexed. Normally "records" should be an alias, not an index. current_search_client.indices.delete(index='records', ignore=[404]) for _ in current_search.delete(ignore=[400, 404]): pass queue = current_app.config['INDEXER_MQ_QUEUE'] with establish_connection() as c: q = queue(c) try: q.delete() except amqp.exceptions.NotFound: pass
def test_remap_index_which_is_missing_in_es(inspire_app, cli, override_config): config = {"SEARCH_INDEX_PREFIX": f"{random.getrandbits(64)}-"} with override_config(**config): indexes_before = set(current_search.client.indices.get("*").keys()) result = cli.invoke( ["index", "remap", "--index", "records-authors", "--yes-i-know"]) current_search.flush_and_refresh("*") assert result.exit_code == 1 indexes_after = set(current_search.client.indices.get("*").keys()) difference = sorted(indexes_after - indexes_before) assert len(difference) == 0 list(current_search.delete("*")) current_search._current_suffix = None
def test_remap_index_when_there_are_more_than_one_indexes_with_same_name_but_different_postfix( inspire_app, cli): current_search._current_suffix = f"-{random.getrandbits(64)}" list(current_search.create(ignore_existing=True, index_list="records-data")) current_search._current_suffix = f"-{random.getrandbits(64)}" indexes_before = set(current_search.client.indices.get("*").keys()) result = cli.invoke( ["index", "remap", "--index", "records-data", "--yes-i-know"]) current_search.flush_and_refresh("*") assert result.exit_code == 1 indexes_after = set(current_search.client.indices.get("*").keys()) difference = sorted(indexes_after - indexes_before) assert len(difference) == 0 list(current_search.delete("*")) current_search._current_suffix = None
def purge(yes_i_know): """Removes every entry from DB and ES related to workflows""" query = "TRUNCATE {tables} RESTART IDENTITY" if not yes_i_know: click.confirm( 'This operation will irreversibly remove data from DB and ES, do you want to continue?', abort=True) click.secho('Truncating tables from DB:\n* {}'.format('\n* '.join(TABLES))) db.session.execute(query.format(tables=', '.join(TABLES))) db.session.commit() click.secho('Removing workflows indices from ES...') list(current_search.delete(index_list=ES_INDICES)) click.secho('Recreating indices...') list(current_search.create(ignore_existing=True, index_list=ES_INDICES)) click.secho('Purge completed')
def elasticsearch_index_destroy(alembic, verbose): """Destroy the elasticsearch indices and indexing queue.""" # Delete "records" index as it might have been created during the upgrade. # This happens when the indices have not been initialized yet and are # indexed. Normally "records" should be an alias, not an index. # Adjuste: In ES 7 we need to specify the real index for deletion, not the alias # Real index name is first collected by get_alias method. for index in current_search_client.indices.get_alias(index='records', ignore=[404]).keys(): current_search_client.indices.delete(index=index, ignore=[404]) for _ in current_search.delete(ignore=[400, 404]): pass queue = current_app.config['INDEXER_MQ_QUEUE'] with establish_connection() as c: q = queue(c) try: q.delete() except amqp.exceptions.NotFound: pass
def test_record_indexing(app, test_users, test_records, script_info, login_user): """Test record indexing and reindexing.""" creator = test_users['deposits_creator'] with app.app_context(): # flush the indices so that indexed records are searchable current_search_client.indices.flush('*') # records and deposits should be indexed subtest_record_search(app, creator, test_records, test_records, login_user) with app.app_context(): current_search_client.indices.flush('*') # delete all elasticsearch indices and recreate them for deleted in current_search.delete(ignore=[404]): pass for created in current_search.create(None): pass # flush the indices so that indexed records are searchable current_search_client.indices.flush('*') # all records should have been deleted subtest_record_search(app, creator, [], [], login_user) with app.app_context(): runner = CliRunner() # Initialize queue res = runner.invoke(cli.queue, ['init', 'purge'], obj=script_info) assert 0 == res.exit_code # schedule a reindex task res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info) assert 0 == res.exit_code # execute scheduled tasks synchronously process_bulk_queue.delay() # flush the indices so that indexed records are searchable current_search_client.indices.flush('*') # records and deposits should be indexed again subtest_record_search(app, creator, test_records, test_records, login_user)
def test_record_indexing(app, test_users, test_records, script_info, login_user): """Test record indexing and reindexing.""" creator = test_users['deposits_creator'] with app.app_context(): # flush the indices so that indexed records are searchable current_search_client.indices.flush('*') # records and deposits should be indexed subtest_record_search(app, creator, test_records, test_records, login_user) with app.app_context(): current_search_client.indices.flush('*') # delete all elasticsearch indices and recreate them for deleted in current_search.delete(ignore=[404]): pass for created in current_search.create(None): pass # flush the indices so that indexed records are searchable current_search_client.indices.flush('*') # all records should have been deleted subtest_record_search(app, creator, [], [], login_user) with app.app_context(): runner = CliRunner() # Initialize queue res = runner.invoke(cli.queue, ['init', 'purge'], obj=script_info) assert 0 == res.exit_code # schedule a reindex task res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info) assert 0 == res.exit_code # execute scheduled tasks synchronously process_bulk_queue.delay() # flush the indices so that indexed records are searchable current_search_client.indices.flush('*') # records and deposits should be indexed again subtest_record_search(app, creator, test_records, test_records, login_user)
def teardown(): with app.app_context(): drop_database(str(db.engine.url)) list(current_search.delete(ignore=[404]))
def clean_records(): """Truncate all the records from various tables.""" from sqlalchemy.engine import reflection from invenio_search import current_search click.secho('>>> Truncating all records.') tables_to_truncate = [ "records_metadata", "records_metadata_version", "inspire_prod_records", "inspire_orcid_records", "pidstore_pid", ] db.session.begin(subtransactions=True) try: # Grab any table with foreign keys to records_metadata for truncating inspector = reflection.Inspector.from_engine(db.engine) for table_name in inspector.get_table_names(): for foreign_key in inspector.get_foreign_keys(table_name): if foreign_key["referred_table"] == "records_metadata": tables_to_truncate.append(table_name) if not click.confirm("Going to truncate:\n{0}".format( "\n".join(tables_to_truncate))): return click.secho('Truncating tables...', fg='red', bold=True, err=True) with click.progressbar(tables_to_truncate) as tables: for table in tables: db.engine.execute("TRUNCATE TABLE {0} RESTART IDENTITY CASCADE".format(table)) click.secho("\tTruncated {0}".format(table)) db.session.commit() current_search.aliases = { k: v for k, v in current_search.aliases.iteritems() if k.startswith('records') } click.secho('Destroying indexes...', fg='red', bold=True, err=True) with click.progressbar( current_search.delete(ignore=[400, 404])) as bar: for name, response in bar: click.secho(name) click.secho('Creating indexes...', fg='green', bold=True, err=True) with click.progressbar( current_search.create(ignore=[400])) as bar: for name, response in bar: click.secho(name) except Exception as err: # noqa db.session.rollback() current_app.logger.exception(err)
def teardown(): with app.app_context(): current_collections.unregister_signals() list(current_search.delete())
def es(app): """Provide elasticsearch access.""" list(current_search.create()) yield current_search list(current_search.delete(ignore=[404]))
def teardown(): with app.app_context(): list(current_search.delete())