def reindex(config, **kwargs): """ Recreate the Search Index. """ client = config.registry["elasticsearch.client"] db = Session(bind=config.registry["sqlalchemy.engine"]) # We use a randomly named index so that we can do a zero downtime reindex. # Essentially we'll use a randomly named index which we will use until all # of the data has been reindexed, at which point we'll point an alias at # our randomly named index, and then delete the old randomly named index. # Create the new index and associate all of our doc types with it. index_base = config.registry["elasticsearch.index"] random_token = binascii.hexlify(os.urandom(5)).decode("ascii") new_index_name = "{}-{}".format(index_base, random_token) doc_types = config.registry.get("search.doc_types", set()) new_index = get_index( new_index_name, doc_types, using=client, shards=config.registry.get("elasticsearch.shards", 1), replicas=config.registry.get("elasticsearch.replicas", 1), ) new_index.create() # From this point on, if any error occurs, we want to be able to delete our # in progress index. try: db.execute( """ BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE """ ) db.execute("SET statement_timeout = '600s'") bulk(client, _project_docs(db)) except: new_index.delete() raise finally: db.rollback() db.close() # Now that we've finished indexing all of our data, we'll point the alias # at our new randomly named index and delete the old index. if client.indices.exists_alias(name=index_base): to_delete = set() actions = [] for name in client.indices.get_alias(name=index_base): to_delete.add(name) actions.append({"remove": {"index": name, "alias": index_base}}) actions.append({"add": {"index": new_index_name, "alias": index_base}}) client.indices.update_aliases({"actions": actions}) client.indices.delete(",".join(to_delete)) else: client.indices.put_alias(name=index_base, index=new_index_name)
def populate_db(config): """ Sync the Warehouse database with initial sponsors list. Once this command is executed once, you shouldn't need to run it again. """ # Imported here because we don't want to trigger an import from anything # but warehouse.cli at the module scope. from warehouse.db import Session session = Session(bind=config.registry["sqlalchemy.engine"]) for data in SPONSORS_DICTS: name = data["name"] sponsor = session.query(Sponsor).filter_by(name=name).one_or_none() if sponsor: print(f"Skipping {name} sponsor because it already exists.") continue params = data.copy() img = params.pop("image") params["is_active"] = True params["link_url"] = params.pop("url") params["activity_markdown"] = "\n\n".join(params.pop("activity", [])).strip() params["color_logo_url"] = BLACK_BASE_URL + img if params["footer"] or params["infra_sponsor"]: params["white_logo_url"] = WHITE_BASE_URL + img sponsor = Sponsor(**params) try: session.add(sponsor) session.commit() print(f"{name} sponsor created with success.") except Exception as e: session.rollback() print(f"Error while creating {name} sponsor:") print(f"\t{e}")
def shell(config, type_): """ Open up a Python shell with Warehouse preconfigured in it. """ # Imported here because we don't want to trigger an import from anything # but warehouse.cli at the module scope. from warehouse.db import Session if type_ is None: type_ = autodetect() runner = {"bpython": bpython, "ipython": ipython, "plain": plain}[type_] session = Session(bind=config.registry["sqlalchemy.engine"]) try: runner(config=config, db=session) except ImportError: raise click.ClickException( "The {!r} shell is not available.".format(type_)) from None
def reindex(config, **kwargs): """ Recreate the Search Index. """ client = config.registry["elasticsearch.client"] db = Session(bind=config.registry["sqlalchemy.engine"]) number_of_replicas = config.registry.get("elasticsearch.replicas", 0) refresh_interval = config.registry.get("elasticsearch.interval", "1s") # We use a randomly named index so that we can do a zero downtime reindex. # Essentially we'll use a randomly named index which we will use until all # of the data has been reindexed, at which point we'll point an alias at # our randomly named index, and then delete the old randomly named index. # Create the new index and associate all of our doc types with it. index_base = config.registry["elasticsearch.index"] random_token = binascii.hexlify(os.urandom(5)).decode("ascii") new_index_name = "{}-{}".format(index_base, random_token) doc_types = config.registry.get("search.doc_types", set()) # Create the new index with zero replicas and index refreshes disabled # while we are bulk indexing. new_index = get_index( new_index_name, doc_types, using=client, shards=config.registry.get("elasticsearch.shards", 1), replicas=0, interval="-1", ) # From this point on, if any error occurs, we want to be able to delete our # in progress index. try: db.execute("SET statement_timeout = '600s'") for _ in parallel_bulk(client, _project_docs(db)): pass except: new_index.delete() raise finally: db.rollback() db.close() # Now that we've finished indexing all of our data we can optimize it and # update the replicas and refresh intervals. client.indices.forcemerge(index=new_index_name) client.indices.put_settings(index=new_index_name, body={ "index": { "number_of_replicas": number_of_replicas, "refresh_interval": refresh_interval, } }) # Point the alias at our new randomly named index and delete the old index. if client.indices.exists_alias(name=index_base): to_delete = set() actions = [] for name in client.indices.get_alias(name=index_base): to_delete.add(name) actions.append({"remove": {"index": name, "alias": index_base}}) actions.append({"add": {"index": new_index_name, "alias": index_base}}) client.indices.update_aliases({"actions": actions}) client.indices.delete(",".join(to_delete)) else: client.indices.put_alias(name=index_base, index=new_index_name)
def reindex(config, **kwargs): """ Recreate the Search Index. """ client = config.registry["elasticsearch.client"] db = Session(bind=config.registry["sqlalchemy.engine"]) number_of_replicas = config.registry.get("elasticsearch.replicas", 0) refresh_interval = config.registry.get("elasticsearch.interval", "1s") # We use a randomly named index so that we can do a zero downtime reindex. # Essentially we'll use a randomly named index which we will use until all # of the data has been reindexed, at which point we'll point an alias at # our randomly named index, and then delete the old randomly named index. # Create the new index and associate all of our doc types with it. index_base = config.registry["elasticsearch.index"] random_token = binascii.hexlify(os.urandom(5)).decode("ascii") new_index_name = "{}-{}".format(index_base, random_token) doc_types = config.registry.get("search.doc_types", set()) shards = config.registry.get("elasticsearch.shards", 1) # Create the new index with zero replicas and index refreshes disabled # while we are bulk indexing. new_index = get_index( new_index_name, doc_types, using=client, shards=shards, replicas=0, interval="-1", ) new_index.create(wait_for_active_shards=shards) # From this point on, if any error occurs, we want to be able to delete our # in progress index. try: db.execute("SET statement_timeout = '600s'") for _ in parallel_bulk(client, _project_docs(db)): pass except: # noqa new_index.delete() raise finally: db.rollback() db.close() # Now that we've finished indexing all of our data we can optimize it and # update the replicas and refresh intervals. client.indices.forcemerge(index=new_index_name) client.indices.put_settings( index=new_index_name, body={ "index": { "number_of_replicas": number_of_replicas, "refresh_interval": refresh_interval, } } ) # Point the alias at our new randomly named index and delete the old index. if client.indices.exists_alias(name=index_base): to_delete = set() actions = [] for name in client.indices.get_alias(name=index_base): to_delete.add(name) actions.append({"remove": {"index": name, "alias": index_base}}) actions.append({"add": {"index": new_index_name, "alias": index_base}}) client.indices.update_aliases({"actions": actions}) client.indices.delete(",".join(to_delete)) else: client.indices.put_alias(name=index_base, index=new_index_name)