def watcher_thread(conn_string, run_id_dict, handlers_dict, dict_lock, watcher_thread_exit): try: for notif in await_pg_notifications( conn_string, channels=[CHANNEL_NAME], timeout=POLLING_CADENCE, yield_on_timeout=True, exit_event=watcher_thread_exit, ): if notif is None: if watcher_thread_exit.is_set(): break else: run_id, index_str = notif.payload.split('_') if run_id not in run_id_dict: continue index = int(index_str) with dict_lock: handlers = handlers_dict.get(run_id, []) engine = create_engine( conn_string, isolation_level='AUTOCOMMIT', poolclass=db.pool.NullPool ) try: res = engine.execute( db.select([SqlEventLogStorageTable.c.event]).where( SqlEventLogStorageTable.c.id == index ), ) dagster_event = deserialize_json_to_dagster_namedtuple(res.fetchone()[0]) finally: engine.dispose() for (cursor, callback) in handlers: if index >= cursor: callback(dagster_event) except psycopg2.OperationalError: pass
def __init__(self, postgres_url, inst_data=None): self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) self.postgres_url = check.str_param(postgres_url, "postgres_url") self._disposed = False self._event_watcher = PostgresEventWatcher(self.postgres_url) # Default to not holding any connections open to prevent accumulating connections per DagsterInstance self._engine = create_engine( self.postgres_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool ) self._secondary_index_cache = {} table_names = retry_pg_connection_fn(lambda: db.inspect(self._engine).get_table_names()) if "event_logs" not in table_names: with self.connect() as conn: alembic_config = get_alembic_config(__file__) retry_pg_creation_fn(lambda: SqlEventLogStorageMetadata.create_all(conn)) # This revision may be shared by any other dagster storage classes using the same DB stamp_alembic_rev(alembic_config, self._engine)
def __init__(self, postgres_url, inst_data=None): self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) self.postgres_url = postgres_url # Default to not holding any connections open to prevent accumulating connections per DagsterInstance self._engine = create_engine(self.postgres_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool) table_names = retry_pg_connection_fn( lambda: db.inspect(self._engine).get_table_names()) missing_main_table = "schedules" not in table_names and "jobs" not in table_names if missing_main_table: with self.connect() as conn: alembic_config = get_alembic_config(__file__) retry_pg_creation_fn( lambda: ScheduleStorageSqlMetadata.create_all(conn)) # This revision may be shared by any other dagster storage classes using the same DB stamp_alembic_rev(alembic_config, conn)
def from_local(base_dir, inst_data=None): check.str_param(base_dir, "base_dir") mkdir_p(base_dir) conn_string = create_db_conn_string(base_dir, "runs") engine = create_engine(conn_string, poolclass=NullPool) alembic_config = get_alembic_config(__file__) should_mark_indexes = False with engine.connect() as connection: db_revision, head_revision = check_alembic_revision(alembic_config, connection) if not (db_revision and head_revision): RunStorageSqlMetadata.create_all(engine) engine.execute("PRAGMA journal_mode=WAL;") stamp_alembic_rev(alembic_config, connection) should_mark_indexes = True run_storage = SqliteRunStorage(conn_string, inst_data) if should_mark_indexes: # mark all secondary indexes run_storage.build_missing_indexes() return run_storage
def from_local(cls, base_dir, inst_data=None): check.str_param(base_dir, "base_dir") mkdir_p(base_dir) conn_string = create_db_conn_string(base_dir, "schedules") engine = create_engine(conn_string, poolclass=NullPool) alembic_config = get_alembic_config(__file__) should_migrate_data = False with engine.connect() as connection: db_revision, head_revision = check_alembic_revision( alembic_config, connection) if not (db_revision and head_revision): ScheduleStorageSqlMetadata.create_all(engine) engine.execute("PRAGMA journal_mode=WAL;") stamp_alembic_rev(alembic_config, connection) should_migrate_data = True schedule_storage = cls(conn_string, inst_data) if should_migrate_data: schedule_storage.migrate() schedule_storage.optimize() return schedule_storage
def __init__(self, mysql_url, inst_data=None): experimental_class_warning("MySQLScheduleStorage") self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) self.mysql_url = mysql_url # Default to not holding any connections open to prevent accumulating connections per DagsterInstance self._engine = create_engine( self.mysql_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool, ) table_names = retry_mysql_connection_fn( db.inspect(self._engine).get_table_names) if "jobs" not in table_names: with self.connect() as conn: alembic_config = mysql_alembic_config(__file__) retry_mysql_creation_fn( lambda: ScheduleStorageSqlMetadata.create_all(conn)) stamp_alembic_rev(alembic_config, conn) super().__init__()
def connect(self, run_id=None): with self._db_lock: check.str_param(run_id, "run_id") conn_string = self.conn_string_for_run_id(run_id) engine = create_engine(conn_string, poolclass=NullPool) if not run_id in self._initialized_dbs: self._initdb(engine) self._initialized_dbs.add(run_id) conn = engine.connect() try: with handle_schema_errors( conn, get_alembic_config(__file__), msg="SqliteEventLogStorage for run {run_id}".format(run_id=run_id), ): yield conn finally: conn.close() engine.dispose()
def __init__(self, mysql_url, inst_data=None): experimental_class_warning("MySQLRunStorage") self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) self.mysql_url = mysql_url # Default to not holding any connections open to prevent accumulating connections per DagsterInstance self._engine = create_engine( self.mysql_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool, ) self._index_migration_cache = {} table_names = retry_mysql_connection_fn( db.inspect(self._engine).get_table_names) # Stamp and create tables if the main table does not exist (we can't check alembic # revision because alembic config may be shared with other storage classes) if "runs" not in table_names: retry_mysql_creation_fn(self._init_db) self.build_missing_indexes() super().__init__()
def __init__(self, base_dir, inst_data=None): """Note that idempotent initialization of the SQLite database is done on a per-run_id basis in the body of connect, since each run is stored in a separate database.""" self._base_dir = os.path.abspath(check.str_param(base_dir, "base_dir")) mkdir_p(self._base_dir) self._watchers = defaultdict(dict) self._obs = Observer() self._obs.start() self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) # Used to ensure that each run ID attempts to initialize its DB the first time it connects, # ensuring that the database will be created if it doesn't exist self._initialized_dbs = set() # Ensure that multiple threads (like the event log watcher) interact safely with each other self._db_lock = threading.Lock() if not os.path.exists(self.path_for_shard(INDEX_SHARD_NAME)): conn_string = self.conn_string_for_shard(INDEX_SHARD_NAME) engine = create_engine(conn_string, poolclass=NullPool) self._initdb(engine) self.reindex()
def optimize_for_dagit(self, statement_timeout): # When running in dagit, hold 1 open connection # https://github.com/dagster-io/dagster/issues/3719 self._engine = create_engine(self.mysql_url, isolation_level="AUTOCOMMIT", pool_size=1)
def wipe_storage(mysql_url): engine = create_engine(mysql_url, isolation_level="AUTOCOMMIT", poolclass=db.pool.NullPool) try: ScheduleStorageSqlMetadata.drop_all(engine) finally: engine.dispose()