def queue_calc_imeis_jobs(self, executor, app_config, run_id, curr_date): """ Method to queue jobs to calculate the IMEIs that are met by this condition. Arguments: executor: instance of the python executor class, to submit back the results app_config: dirbs app current configuration, to extract various configs required for the job run_id: run id of the current classification job curr_date: current date of the system """ with create_db_connection(app_config.db_config) as conn, conn.cursor() as cursor: cursor.execute(sql.SQL("""CREATE UNLOGGED TABLE {intermediate_tbl} ( imei_norm TEXT NOT NULL, virt_imei_shard SMALLINT NOT NULL ) PARTITION BY RANGE (virt_imei_shard)""") .format(intermediate_tbl=self.intermediate_tbl_id(run_id))) partition_utils.create_imei_shard_partitions(conn, tbl_name=self.intermediate_tbl_name(run_id), unlogged=True) parallel_shards = partition_utils.num_physical_imei_shards(conn) # Done with connection -- temp tables should now be committed virt_imei_shard_ranges = partition_utils.virt_imei_shard_bounds(parallel_shards) for virt_imei_range_start, virt_imei_range_end in virt_imei_shard_ranges: yield executor.submit(self._calc_imeis_job, app_config, run_id, curr_date, virt_imei_range_start, virt_imei_range_end)
def queue_update_classification_state_jobs(self, executor, app_config, run_id, curr_date): """Method to queue jobs to update the classification_state table after the IMEIs have been calculated.""" with create_db_connection(app_config.db_config) as conn: parallel_shards = partition_utils.num_physical_imei_shards(conn) virt_imei_shard_ranges = partition_utils.virt_imei_shard_bounds( parallel_shards) for virt_imei_range_start, virt_imei_range_end in virt_imei_shard_ranges: yield executor.submit(self._update_classification_state_job, app_config, run_id, curr_date, virt_imei_range_start, virt_imei_range_end)
def upgrade(self, db_conn): """Overrides AbstractMigrator upgrade method.""" with db_conn.cursor() as cursor: logger = logging.getLogger('dirbs.db') logger.info('Re-partitioning stolen_list table...') cursor.execute('ALTER TABLE historic_stolen_list ADD COLUMN virt_imei_shard SMALLINT') cursor.execute('UPDATE historic_stolen_list SET virt_imei_shard = calc_virt_imei_shard(imei_norm)') cursor.execute('ALTER TABLE historic_stolen_list ALTER COLUMN virt_imei_shard SET NOT NULL') num_shards = partition_utils.num_physical_imei_shards(db_conn) partition_utils.repartition_stolen_list(db_conn, num_physical_shards=num_shards) logger.info('Re-partitioned stolen_list table') # Now that we can create tables during classification, we need to allow dirbs_core_classify to # create tables cursor.execute('GRANT CREATE ON SCHEMA core TO dirbs_core_classify')
def queue_update_classification_state_jobs(self, executor, app_config, run_id, curr_date): """ Method to queue jobs to update the classification_state table after the IMEIs have been calculated. Arguments: executor: job executor instance to submit back the results to the queue app_config: current dirbs app config object to use configuration from run_id: run_id of the current running classification job curr_date: current date of the system """ with create_db_connection(app_config.db_config) as conn: parallel_shards = partition_utils.num_physical_imei_shards(conn) virt_imei_shard_ranges = partition_utils.virt_imei_shard_bounds(parallel_shards) for virt_imei_range_start, virt_imei_range_end in virt_imei_shard_ranges: yield executor.submit(self._update_classification_state_job, app_config, run_id, curr_date, virt_imei_range_start, virt_imei_range_end)
def queue_calc_imeis_jobs(self, executor, app_config, run_id, curr_date): """Method to queue jobs to calculate the IMEIs that are met by this condition.""" with create_db_connection( app_config.db_config) as conn, conn.cursor() as cursor: cursor.execute( sql.SQL("""CREATE UNLOGGED TABLE {intermediate_tbl} ( imei_norm TEXT NOT NULL, virt_imei_shard SMALLINT NOT NULL ) PARTITION BY RANGE (virt_imei_shard)"""). format(intermediate_tbl=self.intermediate_tbl_id(run_id))) partition_utils.create_imei_shard_partitions( conn, tbl_name=self.intermediate_tbl_name(run_id), unlogged=True) parallel_shards = partition_utils.num_physical_imei_shards(conn) # Done with connection -- temp tables should now be committed virt_imei_shard_ranges = partition_utils.virt_imei_shard_bounds( parallel_shards) for virt_imei_range_start, virt_imei_range_end in virt_imei_shard_ranges: yield executor.submit(self._calc_imeis_job, app_config, run_id, curr_date, virt_imei_range_start, virt_imei_range_end)
def upgrade(self, conn): """Overrides AbstractMigrator upgrade method.""" logger = logging.getLogger('dirbs.db') logger.info('Creating historic_whitelist table...') with conn.cursor() as cur: # create historic table for whitelist cur.execute( sql.SQL("""CREATE TABLE historic_whitelist ( imei_norm text NOT NULL, associated BOOLEAN DEFAULT FALSE, eir_id text DEFAULT NULL, start_date TIMESTAMP NOT NULL, end_date TIMESTAMP DEFAULT NULL, virt_imei_shard SMALLINT NOT NULL ) PARTITION BY RANGE (virt_imei_shard)""")) num_shards = part_utils.num_physical_imei_shards(conn) logger.debug('Creating Whitelist child partitions...') part_utils.create_imei_shard_partitions( conn, tbl_name='historic_whitelist', num_physical_shards=num_shards, fillfactor=80) # Add indices to each partition idx_metadata = [ part_utils.IndexMetadatum(idx_cols=['imei_norm'], is_unique=True, partial_sql='WHERE end_date IS NULL') ] part_utils.add_indices(conn, tbl_name='historic_whitelist', idx_metadata=idx_metadata) # creating views to historic_whitelist cur.execute("""CREATE VIEW whitelist AS SELECT imei_norm, associated, eir_id, virt_imei_shard FROM historic_whitelist WHERE end_date IS NULL WITH CHECK OPTION""") # create view for imeis that are not associated yet cur.execute("""CREATE VIEW available_whitelist AS SELECT imei_norm, virt_imei_shard FROM historic_whitelist WHERE associated IS FALSE AND end_date IS NULL WITH CHECK OPTION""") # create insert & update trigger on historic_registration_list to update whitelist # on update and insert cur.execute( """CREATE OR REPLACE FUNCTION insert_whitelist() RETURNS TRIGGER AS $BODY$ BEGIN IF new.status = 'whitelist' OR new.status IS NULL THEN INSERT INTO historic_whitelist (imei_norm, start_date, end_date, virt_imei_shard) VALUES (new.imei_norm, new.start_date, new.end_date, new.virt_imei_shard); END IF; RETURN new; END; $BODY$ LANGUAGE plpgsql; -- update function CREATE OR REPLACE FUNCTION update_whitelist() RETURNS TRIGGER AS $BODY$ BEGIN UPDATE historic_whitelist SET end_date = new.end_date WHERE imei_norm = new.imei_norm AND new.end_date IS NOT NULL; RETURN new; END; $BODY$ LANGUAGE plpgsql; -- triggers CREATE TRIGGER wl_insert_trigger AFTER INSERT ON historic_registration_list FOR EACH ROW EXECUTE PROCEDURE insert_whitelist(); CREATE TRIGGER wl_update_trigger AFTER UPDATE ON historic_registration_list FOR EACH ROW EXECUTE PROCEDURE update_whitelist(); ALTER TYPE job_command_type RENAME TO job_command_type_old; -- -- Create type for command -- CREATE TYPE job_command_type AS ENUM ( 'dirbs-catalog', 'dirbs-classify', 'dirbs-db', 'dirbs-import', 'dirbs-listgen', 'dirbs-prune', 'dirbs-report', 'dirbs-whitelist' ); ALTER TABLE job_metadata ALTER COLUMN command TYPE job_command_type USING command::TEXT::job_command_type; DROP TYPE job_command_type_old; -- -- Whitelist notification triggers -- CREATE FUNCTION notify_insert_distributor() RETURNS TRIGGER AS $BODY$ BEGIN IF new.associated IS FALSE AND new.eir_id IS NULL THEN PERFORM pg_notify('distributor_updates', row_to_json(NEW)::text); END IF; RETURN new; END; $BODY$ LANGUAGE plpgsql VOLATILE COST 100; CREATE FUNCTION notify_remove_distributor() RETURNS TRIGGER AS $BODY$ BEGIN IF new.end_date IS NOT NULL THEN PERFORM pg_notify('distributor_updates', row_to_json(NEW)::text); END IF; RETURN new; END; $BODY$ LANGUAGE plpgsql VOLATILE COST 100; CREATE TRIGGER notify_insert_trigger AFTER INSERT ON historic_whitelist FOR EACH ROW EXECUTE PROCEDURE notify_insert_distributor(); CREATE TRIGGER notify_remove_trigger AFTER UPDATE ON historic_whitelist FOR EACH ROW EXECUTE PROCEDURE notify_remove_distributor(); GRANT SELECT ON historic_whitelist TO dirbs_core_import_registration_list; GRANT UPDATE ON historic_whitelist TO dirbs_core_import_registration_list; GRANT INSERT ON historic_whitelist TO dirbs_core_import_registration_list; GRANT INSERT ON historic_whitelist TO dirbs_core_white_list; GRANT UPDATE ON historic_whitelist TO dirbs_core_white_list; GRANT SELECT ON historic_whitelist TO dirbs_core_white_list; GRANT DELETE ON historic_whitelist TO dirbs_core_white_list; """) # noqa: Q440, Q449, Q441, Q447
def lists(ctx, config, statsd, logger, run_id, conn, metadata_conn, command, metrics_root, metrics_run_root): """Prune obsolete lists data.""" curr_date = ctx.obj['CURR_DATE'] # store metadata metadata.add_optional_job_metadata( metadata_conn, command, run_id, retention_months=config.retention_config.months_retention) logger.info( 'Pruning lists tables to remove any obsolete data with end_time outside the retention window..' ) retention_months = config.retention_config.months_retention if curr_date is None: curr_date = datetime.date.today() first_month_to_drop = datetime.date( curr_date.year, curr_date.month, 1) - relativedelta.relativedelta(months=retention_months) logger.info( 'Lists data with end_time earlier than {0} will be pruned'.format( first_month_to_drop)) with utils.db_role_setter( conn, role_name='dirbs_core_power_user'), conn.cursor() as cursor: logger.debug('Calculating original number of rows in lists tables...') row_count_sql = sql.SQL( """SELECT blacklist_row_count, noft_lists_row_count, excp_lists_row_count FROM (SELECT COUNT(*) FROM blacklist) AS blacklist_row_count, (SELECT COUNT(*) FROM notifications_lists) AS noft_lists_row_count, (SELECT COUNT(*) FROM exceptions_lists) AS excp_lists_row_count""" ) cursor.execute(row_count_sql) rows_before = cursor.fetchone() blacklist_rows_before = int( rows_before.blacklist_row_count.strip('()')) notflist_rows_before = int( rows_before.noft_lists_row_count.strip('()')) excplist_rows_before = int( rows_before.excp_lists_row_count.strip('()')) rows_before = blacklist_rows_before + notflist_rows_before + excplist_rows_before logger.debug('Calculated original number of rows in lists tables...') statsd.gauge('{0}blacklist_rows_before'.format(metrics_run_root), blacklist_rows_before) statsd.gauge( '{0}notifications_lists_rows_before'.format(metrics_run_root), notflist_rows_before) statsd.gauge( '{0}exceptions_lists_rows_before'.format(metrics_run_root), excplist_rows_before) metadata.add_optional_job_metadata( metadata_conn, command, run_id, blacklist_rows_before=blacklist_rows_before, notifications_lists_rows_before=notflist_rows_before, exceptions_lists_rows_before=excplist_rows_before) # Calculate number of rows in the lists table outside the retention window job_metadata_filter_sql = """SELECT run_id FROM job_metadata WHERE command = 'dirbs-listgen' AND end_time < '{0}'""".format( first_month_to_drop) cursor.execute( sql.SQL("""SELECT COUNT(*) FROM blacklist WHERE start_run_id IN ({0})""".format( job_metadata_filter_sql))) total_bl_rows_out_window_to_prune = cursor.fetchone()[0] logger.info( 'Found {0:d} rows of blacklist table outside the retention window to prune' .format(total_bl_rows_out_window_to_prune)) cursor.execute( sql.SQL("""SELECT COUNT(*) FROM notifications_lists WHERE start_run_id IN ({0})""".format( job_metadata_filter_sql))) total_nl_rows_out_window_to_prune = cursor.fetchone()[0] logger.info( 'Found {0:d} rows of notifications lists table outside the retention window to prune' .format(total_nl_rows_out_window_to_prune)) cursor.execute( sql.SQL("""SELECT COUNT(*) FROM exceptions_lists WHERE start_run_id IN ({0})""".format( job_metadata_filter_sql))) total_nl_rows_out_window_to_prune = cursor.fetchone()[0] logger.info( 'Found {0:d} rows of exceptions lists table outside the retention window to prune' .format(total_nl_rows_out_window_to_prune)) # We repartition the tables to re-create them, passing a condition sql logger.debug('Re-creating blacklist table...') num_phys_imei_shards = partition_utils.num_physical_imei_shards(conn) src_filter_sql = cursor.mogrify( """WHERE start_run_id NOT IN ({0})""".format( job_metadata_filter_sql)) partition_utils.repartition_blacklist( conn, num_physical_shards=num_phys_imei_shards, src_filter_sql=str(src_filter_sql, encoding=conn.encoding)) logger.debug('Re-created blacklist table') logger.debug('Re-creating notifications lists table...') partition_utils.repartition_notifications_lists( conn, num_physical_shards=num_phys_imei_shards, src_filter_sql=str(src_filter_sql, encoding=conn.encoding)) logger.debug('Re-created notifications lists table') logger.debug('Re-creating exceptions lists table...') partition_utils.repartition_exceptions_lists( conn, num_physical_shards=num_phys_imei_shards, src_filter_sql=str(src_filter_sql, encoding=conn.encoding)) logger.debug('Re-created exceptions lists table') logger.debug('Calculating new number of rows in lists tables...') cursor.execute(row_count_sql) rows_after = cursor.fetchone() blacklist_rows_after = int(rows_after.blacklist_row_count.strip('()')) notflist_rows_after = int(rows_after.noft_lists_row_count.strip('()')) excplist_rows_after = int(rows_after.excp_lists_row_count.strip('()')) rows_after = blacklist_rows_after + notflist_rows_after + excplist_rows_after logger.debug('Calculated new number of rows in lists tables') statsd.gauge('{0}blacklist_rows_after'.format(metrics_run_root), blacklist_rows_after) statsd.gauge( '{0}notifications_lists_rows_after'.format(metrics_run_root), notflist_rows_after) statsd.gauge('{0}exceptions_lists_rows_after'.format(metrics_run_root), excplist_rows_after) metadata.add_optional_job_metadata( metadata_conn, command, run_id, blacklist_rows_before=blacklist_rows_after, notifications_lists_rows_before=notflist_rows_after, exceptions_lists_rows_before=excplist_rows_after) logger.info('Pruned {0:d} rows from lists tables'.format(rows_after - rows_before))
def classification_state(ctx, config, statsd, logger, run_id, conn, metadata_conn, command, metrics_root, metrics_run_root): """Prune obsolete classification_state data.""" curr_date = ctx.obj['CURR_DATE'] # Store metadata metadata.add_optional_job_metadata( metadata_conn, command, run_id, curr_date=curr_date.isoformat() if curr_date is not None else None, retention_months=config.retention_config.months_retention) logger.info( 'Pruning classification_state table to remove any classification state data related to ' 'obsolete conditions and data with end_date outside the retention window..' ) cond_config_list = [c.label for c in config.conditions] retention_months = config.retention_config.months_retention if curr_date is None: curr_date = datetime.date.today() first_month_to_drop = datetime.date( curr_date.year, curr_date.month, 1) - relativedelta.relativedelta(months=retention_months) logger.info( 'Classification state data with end_date earlier than {0} will be ' 'pruned'.format(first_month_to_drop)) with utils.db_role_setter( conn, role_name='dirbs_core_power_user'), conn.cursor() as cursor: logger.debug( 'Calculating original number of rows in classification_state table...' ) cursor.execute('SELECT COUNT(*) FROM classification_state') rows_before = cursor.fetchone()[0] logger.debug( 'Calculated original number of rows in classification_state table') statsd.gauge('{0}rows_before'.format(metrics_run_root), rows_before) metadata.add_optional_job_metadata(metadata_conn, command, run_id, rows_before=rows_before) # Calculate number of rows in the classification table outside retention window cursor.execute( sql.SQL("""SELECT COUNT(*) FROM classification_state WHERE end_date < %s """), [first_month_to_drop]) total_rows_out_window_to_prune = cursor.fetchone()[0] logger.info( 'Found {0:d} rows of classification_state table ' 'with end_date outside the retention window to prune.'.format( total_rows_out_window_to_prune)) # Calculate number of rows in the classification with conditions no longer existing cursor.execute( sql.SQL("""SELECT COUNT(*) FROM classification_state WHERE NOT starts_with_prefix(cond_name, %s)""" ), [cond_config_list]) total_rows_no_cond_to_prune = cursor.fetchone()[0] logger.info( 'Found {0:d} rows of classification_state table with conditions ' 'no longer existing to prune.'.format(total_rows_no_cond_to_prune)) logger.debug('Re-creating classification_state table...') # Basically, we just re-partition the classification_state table to re-create it, passing a src_filter_sql # parameter num_phys_imei_shards = partition_utils.num_physical_imei_shards(conn) src_filter_sql = cursor.mogrify( """WHERE (end_date > %s OR end_date IS NULL) AND cond_name LIKE ANY(%s)""", [first_month_to_drop, cond_config_list]) partition_utils.repartition_classification_state( conn, num_physical_shards=num_phys_imei_shards, src_filter_sql=str(src_filter_sql, encoding=conn.encoding)) logger.debug('Re-created classification_state table') logger.debug( 'Calculating new number of rows in classification_state table...') cursor.execute('SELECT COUNT(*) FROM classification_state') rows_after = cursor.fetchone()[0] logger.debug( 'Calculated new number of rows in classification_state table') statsd.gauge('{0}rows_after'.format(metrics_run_root), rows_after) metadata.add_optional_job_metadata(metadata_conn, command, run_id, rows_after=rows_after) logger.info('Pruned {0:d} rows from classification_state table'.format( rows_after - rows_before))
def _migrate_device_association_list(self, logger, conn): """Method to migrate barred imeis list.""" with conn.cursor() as cursor: cursor.execute( sql.SQL("""CREATE TABLE historic_device_association_list ( imei_norm text NOT NULL, uid text NOT NULL, start_date TIMESTAMP NOT NULL, end_date TIMESTAMP, virt_imei_shard SMALLINT NOT NULL ) PARTITION BY RANGE (virt_imei_shard);""")) num_shards = part_utils.num_physical_imei_shards(conn) logger.debug('Granting permissions to barred_list partitions...') part_utils._grant_perms_barred_list( conn, part_name='historic_device_association_list') logger.debug('Creating barred_list child partitions...') part_utils.create_imei_shard_partitions( conn, tbl_name='historic_device_association_list', num_physical_shards=num_shards, perms_func=part_utils._grant_perms_association_list, fillfactor=80) # Add indexes to each partition idx_metadata = [ part_utils.IndexMetadatum(idx_cols=['uid', 'imei_norm'], is_unique=True, partial_sql='WHERE end_date IS NULL') ] part_utils.add_indices(conn, tbl_name='historic_device_association_list', idx_metadata=idx_metadata) # Creating view to historic_barred_list cursor.execute("""CREATE OR REPLACE VIEW device_association_list AS SELECT uid, imei_norm, virt_imei_shard FROM historic_device_association_list WHERE end_date IS NULL WITH CHECK OPTION""" ) # noqa: Q440 cursor.execute("""GRANT SELECT ON device_association_list TO dirbs_core_classify, dirbs_core_api, dirbs_core_import_device_association_list""" ) # Creating insert trigger function cursor.execute( """CREATE FUNCTION device_association_list_staging_data_insert_trigger_fn() RETURNS TRIGGER LANGUAGE plpgsql AS $$ BEGIN NEW.uid = NULLIF(TRIM(NEW.uid), ''); NEW.imei_norm = normalize_imei(NULLIF(TRIM(NEW.imei), '')); RETURN NEW; END $$; ALTER FUNCTION device_association_list_staging_data_insert_trigger_fn() OWNER TO dirbs_core_power_user; """) logger.debug( 'Granting create permission to dirbs_core_import_device_association_list...' ) cursor.execute( 'GRANT CREATE ON SCHEMA core TO dirbs_core_import_device_association_list' )