def _create_cache_table(self): if not self.is_postgre_sql: sql_statement = "CREATE TABLE %s.%s ( " \ "ENTITY_TYPE_ID BIGINT NOT NULL, " \ "PARQUET_NAME VARCHAR(2048) NOT NULL, " \ "PARQUET_FILE BLOB(2G), " \ "UPDATED_TS TIMESTAMP NOT NULL DEFAULT CURRENT TIMESTAMP, " \ "CONSTRAINT %s UNIQUE(ENTITY_TYPE_ID, PARQUET_NAME) ENFORCED ) " \ "ORGANIZE BY ROW" % (self.quoted_schema, self.quoted_cache_tablename, dbhelper.quotingTableName('uc_%s' % self.cache_tablename, self.is_postgre_sql)) try: stmt = ibm_db.exec_immediate(self.db_connection, sql_statement) ibm_db.free_result(stmt) except Exception as ex: raise Exception('Execution of sql statement "%s" failed.' % sql_statement) from ex else: sql_statement = "CREATE TABLE %s.%s ( " \ "entity_type_id BIGINT NOT NULL, " \ "parquet_name VARCHAR(2048) NOT NULL, " \ "parquet_file BYTEA, " \ "updated_ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, " \ "CONSTRAINT %s UNIQUE(entity_type_id, parquet_name))" % ( self.quoted_schema, self.quoted_cache_tablename, dbhelper.quotingTableName('uc_%s' % self.cache_tablename, self.is_postgre_sql)) try: dbhelper.execute_postgre_sql_query(self.db_connection, sql_statement) except Exception as ex: raise Exception('Execution of sql statement "%s" failed.' % sql_statement) from ex logger.info('Table %s.%s has been created.' % (self.quoted_schema, self.quoted_cache_tablename))
def __init__(self, tenant_id, entity_type_id, schema, db_connection, db_type): self.tenant_id = tenant_id self.entity_type_id = entity_type_id self.schema = schema self.db_connection = db_connection self.db_type = db_type if self.db_type == 'db2': self.is_postgre_sql = False self.schema = schema.upper() self.store_tablename = DBModelStore.STORE_TABLENAME.upper() elif self.db_type == 'postgresql': self.is_postgre_sql = True self.schema = schema.lower() self.store_tablename = DBModelStore.STORE_TABLENAME.lower() else: raise Exception( 'Initialization of %s failed because the database type %s is unknown.' % (self.__class__.__name__, self.db_type)) self.quoted_schema = dbhelper.quotingSchemaName( self.schema, self.is_postgre_sql) self.quoted_store_tablename = dbhelper.quotingTableName( self.store_tablename, self.is_postgre_sql) self._handle_store_table()
def _push_cache(self, cache_filename, cache_pathname): if not self.is_postgre_sql: sql_statement = "MERGE INTO %s.%s AS TARGET " \ "USING (VALUES (?, ?, ?, CURRENT_TIMESTAMP)) " \ "AS SOURCE (ENTITY_TYPE_ID, PARQUET_NAME, PARQUET_FILE, UPDATED_TS) " \ "ON TARGET.ENTITY_TYPE_ID = SOURCE.ENTITY_TYPE_ID " \ "AND TARGET.PARQUET_NAME = SOURCE.PARQUET_NAME " \ "WHEN MATCHED THEN " \ "UPDATE SET TARGET.PARQUET_FILE = SOURCE.PARQUET_FILE, " \ "TARGET.UPDATED_TS = SOURCE.UPDATED_TS " \ "WHEN NOT MATCHED THEN " \ "INSERT (ENTITY_TYPE_ID, PARQUET_NAME, PARQUET_FILE, UPDATED_TS) " \ "VALUES (SOURCE.ENTITY_TYPE_ID, SOURCE.PARQUET_NAME, SOURCE.PARQUET_FILE, " \ "SOURCE.UPDATED_TS)" % (self.quoted_schema, self.quoted_cache_tablename) try: stmt = ibm_db.prepare(self.db_connection, sql_statement) try: ibm_db.bind_param(stmt, 1, self.entity_type_id) ibm_db.bind_param(stmt, 2, cache_filename) ibm_db.bind_param(stmt, 3, cache_pathname, ibm_db.PARAM_FILE, ibm_db.SQL_BLOB) ibm_db.execute(stmt) finally: ibm_db.free_result(stmt) except Exception as ex: raise Exception('Storing cache file %s under name %s failed with sql statement "%s"' % ( cache_pathname, cache_filename, sql_statement)) from ex else: try: f = open(cache_pathname, 'rb') try: blob = f.read() finally: f.close() except Exception as ex: raise Exception('The cache file %s could not be read from disc.' % cache_pathname) from ex else: statement1 = "INSERT INTO %s.%s (entity_type_id, parquet_name, parquet_file, updated_ts) " % ( self.quoted_schema, self.quoted_cache_tablename) statement3 = "ON CONFLICT ON CONSTRAINT %s DO update set entity_type_id = EXCLUDED.entity_type_id, " \ "parquet_name = EXCLUDED.parquet_name, parquet_file = EXCLUDED.parquet_file, " \ "updated_ts = EXCLUDED.updated_ts" % dbhelper.quotingTableName( ('uc_%s' % self.cache_tablename), self.is_postgre_sql) sql_statement = statement1 + " values (%s, %s, %s, current_timestamp) " + statement3 try: dbhelper.execute_postgre_sql_query(self.db_connection, sql_statement, (self.entity_type_id, cache_filename, psycopg2.Binary(blob))) except Exception as ex: raise Exception('Storing cache under name %s failed with sql statement "%s"' % ( cache_filename, sql_statement)) from ex logger.info('Cache has been stored under name %s in table %s.%s' % ( cache_filename, self.quoted_schema, self.quoted_cache_tablename))
def execute(self, df, start_ts, end_ts, entities=None): key_id = 'key_id_' key_timestamp = 'key_timestamp_' sql = 'SELECT %s, %s AS "%s", %s AS "%s" FROM %s' % ( ', '.join([dbhelper.quotingColumnName(col, self.dms.is_postgre_sql) for col in self.columns]), dbhelper.quotingColumnName(self.id_col, self.dms.is_postgre_sql), key_id, dbhelper.quotingColumnName(self.timestamp_col, self.dms.is_postgre_sql), key_timestamp, dbhelper.quotingTableName(self.table, self.dms.is_postgre_sql)) condition_applied = False if self.where_clause is not None: sql += ' WHERE %s' % self.where_clause condition_applied = True if start_ts is not None and end_ts is not None: # TODO start_ts and end_ts are expected to be not None if not condition_applied: sql += ' WHERE ' else: sql += ' AND ' sql += "%s < %s AND %s <= %s" % (dbhelper.quotingSqlString(str(start_ts)), dbhelper.quotingColumnName(self.timestamp_col, self.dms.is_postgre_sql), dbhelper.quotingColumnName(self.timestamp_col, self.dms.is_postgre_sql), dbhelper.quotingSqlString(str(end_ts))) condition_applied = True if entities is not None: if not condition_applied: sql += ' WHERE ' else: sql += ' AND ' sql += "%s IN (%s)" % (dbhelper.quotingColumnName(self.id_col, self.dms.is_postgre_sql), ', '.join([dbhelper.quotingSqlString(ent) for ent in entities])) self.parse_dates.add(key_timestamp) requested_col_names = self.names + [key_id, key_timestamp] df_sql = self._get_dms().db.read_sql_query(sql, parse_dates=self.parse_dates, requested_col_names=requested_col_names) df_sql = df_sql.astype(dtype={key_id: str}, errors='ignore') self.logger.debug('loaded_df=\n%s' % df_sql.head()) # reset and rename event df index to the same special column names original_event_index_names = df.index.names df = df.rename_axis([key_id, key_timestamp]) df = df.reset_index() # concat ignoring index (simple concatenation) then set index back renamed to the original one df = pd.concat([df, df_sql], ignore_index=True, sort=False) df = df.set_index(keys=[key_id, key_timestamp]) df = df.rename_axis(original_event_index_names) self.logger.debug('concatenated_df=\n%s' % df.head()) return df
def store_model(self, model_name, model, user_name=None, serialize=True): if serialize: try: model = pickle.dumps(model) except Exception as ex: raise Exception( 'Serialization of model %s that is supposed to be stored in ModelStore failed.' % model_name) from ex if not self.is_postgre_sql: sql_statement = "MERGE INTO %s.%s AS TARGET " \ "USING (VALUES (?, ?, ?, CURRENT_TIMESTAMP, ?)) " \ "AS SOURCE (ENTITY_TYPE_ID, MODEL_NAME, MODEL, UPDATED_TS, LAST_UPDATED_BY) " \ "ON TARGET.ENTITY_TYPE_ID = SOURCE.ENTITY_TYPE_ID " \ "AND TARGET.MODEL_NAME = SOURCE.MODEL_NAME " \ "WHEN MATCHED THEN " \ "UPDATE SET TARGET.MODEL = SOURCE.MODEL, " \ "TARGET.UPDATED_TS = SOURCE.UPDATED_TS " \ "WHEN NOT MATCHED THEN " \ "INSERT (ENTITY_TYPE_ID, MODEL_NAME, MODEL, UPDATED_TS, LAST_UPDATED_BY) " \ "VALUES (SOURCE.ENTITY_TYPE_ID, SOURCE.MODEL_NAME, SOURCE.MODEL, " \ "SOURCE.UPDATED_TS, SOURCE.LAST_UPDATED_BY)" % ( self.quoted_schema, self.quoted_store_tablename) try: stmt = ibm_db.prepare(self.db_connection, sql_statement) try: ibm_db.bind_param(stmt, 1, self.entity_type_id) ibm_db.bind_param(stmt, 2, model_name) ibm_db.bind_param(stmt, 3, model) ibm_db.bind_param(stmt, 4, user_name) ibm_db.execute(stmt) finally: ibm_db.free_result(stmt) except Exception as ex: raise Exception('Storing model %s failed with sql statement "%s"' % (model_name, sql_statement)) from ex else: statement1 = "INSERT INTO %s.%s (entity_type_id, model_name, model, updated_ts, last_updated_by) " % ( self.quoted_schema, self.quoted_store_tablename) statement3 = "ON CONFLICT ON CONSTRAINT %s DO update set entity_type_id = EXCLUDED.entity_type_id, " \ "model_name = EXCLUDED.model_name, model = EXCLUDED.model, " \ "updated_ts = EXCLUDED.updated_ts, last_updated_by = EXCLUDED.last_updated_by" % dbhelper.quotingTableName( ('uc_%s' % self.store_tablename), self.is_postgre_sql) sql_statement = statement1 + " values (%s, %s, %s, current_timestamp, %s) " + statement3 try: dbhelper.execute_postgre_sql_query(self.db_connection, sql_statement, (self.entity_type_id, model_name, psycopg2.Binary(model), user_name)) except Exception as ex: raise Exception('Storing model %s failed with sql statement "%s"' % (model_name, sql_statement)) from ex logger.info('Model %s of size %d bytes has been stored in table %s.%s.' % ( model_name, len(model) if model is not None else 0, self.quoted_schema, self.quoted_store_tablename))