def test_tac_api(per_test_flask_app, per_test_postgres, logger, mocked_statsd, tmpdir, request, mocked_config, api_version): """Test TAC API call works with the security role created based on abstract role.""" dsn = per_test_postgres.dsn() dsn['user'] = '******' db_config = DBConfig(ignore_env=True, **dsn) with create_db_connection(db_config) as conn, create_db_connection( db_config, autocommit=True) as metadata_conn: with get_importer( GSMADataImporter, conn, metadata_conn, db_config, tmpdir, logger, mocked_statsd, GSMADataParams( filename='sample_gsma_import_list_anonymized.txt')) as imp: imp.import_data() current_user = request.node.callspec.params['per_test_flask_app'] if current_user in ['dirbs_poweruser_login', 'dirbs_api_user']: rv = per_test_flask_app.get( url_for('{0}.tac_api'.format(api_version), tac='01234404')) assert rv.status_code == 200 results = json.loads(rv.data.decode('utf-8')) assert results['gsma'] is not None else: with pytest.raises(DatabaseRoleCheckException): per_test_flask_app.get( url_for('{0}.tac_api'.format(api_version), tac='01234404'))
def test_prune(per_test_postgres, tmpdir, logger, mocked_statsd, db_user, mocked_config, monkeypatch): """Test prune works with the poweruser security role.""" dsn = per_test_postgres.dsn() db_config = DBConfig(ignore_env=True, **dsn) with create_db_connection(db_config) as conn, create_db_connection( db_config, autocommit=True) as metadata_conn: with get_importer( OperatorDataImporter, conn, metadata_conn, db_config, tmpdir, logger, mocked_statsd, OperatorDataParams( filename= 'testData1-operator-operator4-anonymized_20161101_20161130.csv', operator='1', extract=False, perform_leading_zero_check=False, mcc_mnc_pairs=[{ 'mcc': '111', 'mnc': '04' }], perform_unclean_checks=False, perform_file_daterange_check=False)) as imp: imp.import_data() conn.commit() runner = CliRunner() monkeypatch.setattr(mocked_config.db_config, 'user', db_user) result = runner.invoke(dirbs_prune_cli, ['triplets'], obj={'APP_CONFIG': mocked_config}) if db_user in ['dirbs_poweruser_login']: assert result.exit_code == 0 else: assert result.exit_code != 0
def test_report(per_test_postgres, tmpdir, db_user, logger, mocked_statsd, mocked_config, monkeypatch): """Test catalog works with the security role created based on abstract role.""" dsn = per_test_postgres.dsn() db_config = DBConfig(ignore_env=True, **dsn) with create_db_connection(db_config) as conn, create_db_connection( db_config, autocommit=True) as metadata_conn: with get_importer( OperatorDataImporter, conn, metadata_conn, db_config, tmpdir, logger, mocked_statsd, OperatorDataParams( filename= 'testData1-operator-operator1-anonymized_20161101_20161130.csv', operator='operator1', perform_unclean_checks=False, extract=False)) as imp: imp.import_data() runner = CliRunner() output_dir = str(tmpdir) monkeypatch.setattr(mocked_config.db_config, 'user', db_user) result = runner.invoke(dirbs_report_cli, [ 'standard', '--disable-retention-check', '--disable-data-check', '11', '2016', output_dir ], obj={'APP_CONFIG': mocked_config}) if db_user in ['dirbs_poweruser_login', 'dirbs_report_user']: assert result.exit_code == 0 else: assert result.exit_code != 0
def db_conn(postgres, mocked_config, request): """Fixture to inject a DB connection into a fixture. Cleans up to make sure DB is clean after each test.""" # Create db connection current_db_user = mocked_config.db_config.user mocked_config.db_config.user = request.param conn = create_db_connection(mocked_config.db_config) yield conn # Close connection and create new one as the db role might have changed for security tests conn.close() mocked_config.db_config.user = current_db_user conn = create_db_connection(mocked_config.db_config) with conn.cursor() as table_cursor, conn.cursor() as truncate_cursor: table_cursor.execute('SELECT tablename FROM pg_tables WHERE schemaname = current_schema() ' 'AND tablename != \'schema_metadata\' AND tablename != \'radio_access_technology_map\'') for tblname in table_cursor: truncate_cursor.execute(sql.SQL('TRUNCATE {0} CASCADE').format(sql.Identifier(tblname[0]))) table_cursor.execute('SELECT sequence_name FROM information_schema.sequences ' 'WHERE sequence_schema = current_schema()') for seqname in table_cursor: truncate_cursor.execute(sql.SQL('ALTER SEQUENCE {0} RESTART WITH 1').format(sql.Identifier(seqname[0]))) table_cursor.execute('SELECT matviewname FROM pg_matviews WHERE schemaname = current_schema()') for matviewname in table_cursor: truncate_cursor.execute(sql.SQL('REFRESH MATERIALIZED VIEW CONCURRENTLY {0}') .format(sql.Identifier(matviewname[0]))) # Commit truncations conn.commit() conn.close()
def test_imei_api_registration_list(per_test_flask_app, per_test_postgres, logger, mocked_statsd, tmpdir, request, mocked_config, api_version): """Test IMEI API call after registration list import.""" dsn = per_test_postgres.dsn() db_config = DBConfig(ignore_env=True, **dsn) with create_db_connection(db_config) as conn, \ create_db_connection(db_config, autocommit=True) as metadata_conn: with get_importer( GSMADataImporter, conn, metadata_conn, db_config, tmpdir, logger, mocked_statsd, GSMADataParams( filename='gsma_dump_small_july_2016.txt')) as imp: imp.import_data() with get_importer( RegistrationListImporter, conn, metadata_conn, db_config, tmpdir, logger, mocked_statsd, RegistrationListParams( content= 'APPROVED_IMEI,make,model,status,model_number,brand_name,' 'device_type,radio_interface,device_id\n' '21260934000003,,,,,,,,1')) as imp: imp.import_data() if api_version == 'v1': rv = per_test_flask_app.get( url_for('{0}.imei_api'.format(api_version), imei='21260934000003')) assert rv.status_code == 200 else: # api version 2.0 rv = per_test_flask_app.get( url_for('{0}.imei_get_api'.format(api_version), imei='21260934000003')) assert rv.status_code == 200
def test_imei_api_pairing_list(per_test_flask_app, per_test_postgres, logger, mocked_statsd, tmpdir, request, mocked_config, api_version): """Test IMEI API call after pairing list import.""" dsn = per_test_postgres.dsn() db_config = DBConfig(ignore_env=True, **dsn) with create_db_connection(db_config) as conn, \ create_db_connection(db_config, autocommit=True) as metadata_conn: with get_importer( GSMADataImporter, conn, metadata_conn, db_config, tmpdir, logger, mocked_statsd, GSMADataParams( filename='gsma_dump_small_july_2016.txt')) as imp: imp.import_data() with get_importer( PairingListImporter, conn, metadata_conn, db_config, tmpdir, logger, mocked_statsd, PairListParams(content='imei,imsi\n' '811111013136460,111018001111111\n' '359000000000000,111015113222222\n' '357756065985824,111015113333333')) as imp: imp.import_data() if api_version == 'v1': rv = per_test_flask_app.get( url_for('{0}.imei_api'.format(api_version), imei='21260934000003')) assert rv.status_code == 200 else: # api version 2.0 rv = per_test_flask_app.get( url_for('{0}.imei_get_pairings_api'.format(api_version), imei='21260934000003')) assert rv.status_code == 200
def test_imei_api(per_test_flask_app, per_test_postgres, logger, mocked_statsd, tmpdir, request, mocked_config, api_version): """Test IMEI API call works with the security role created based on abstract role.""" dsn = per_test_postgres.dsn() db_config = DBConfig(ignore_env=True, **dsn) with create_db_connection(db_config) as conn, \ create_db_connection(db_config, autocommit=True) as metadata_conn: with get_importer(OperatorDataImporter, conn, metadata_conn, db_config, tmpdir, logger, mocked_statsd, OperatorDataParams( filename='testData1-operator-operator1-anonymized_20161101_20161130.csv', operator='operator1', perform_unclean_checks=False, extract=False)) as imp: imp.import_data() current_user = request.node.callspec.params['per_test_flask_app'] if api_version == 'v1': if current_user in ['dirbs_poweruser_login', 'dirbs_api_user']: rv = per_test_flask_app.get(url_for('{0}.imei_api'.format(api_version), imei='388260336982806', include_seen_with=1)) assert rv.status_code == 200 assert json.loads(rv.data.decode('utf-8'))['seen_with'] == \ [{'imsi': '11101400135251', 'msisdn': '22300825684694'}, {'imsi': '11101400135252', 'msisdn': '22300825684692'}] assert json.loads(rv.data.decode('utf-8'))['realtime_checks']['ever_observed_on_network'] is True else: with pytest.raises(DatabaseRoleCheckException): per_test_flask_app.get(url_for('{0}.imei_api'.format(api_version), imei='388260336982806', include_seen_with=1)) else: # api version 2.0 if current_user in ['dirbs_poweruser_login', 'dirbs_api_user']: rv = per_test_flask_app.get(url_for('{0}.imei_get_subscribers_api'.format(api_version), imei='388260336982806')) assert rv.status_code == 200 data = json.loads(rv.data.decode('utf-8')) assert len(data['subscribers']) != 0 assert data['subscribers'] == [ { 'imsi': '11101400135251', 'last_seen': '2016-11-01', 'msisdn': '22300825684694' }, { 'imsi': '11101400135252', 'last_seen': '2016-11-02', 'msisdn': '22300825684692' }] else: with pytest.raises(DatabaseRoleCheckException): per_test_flask_app.get(url_for('{0}.imei_get_subscribers_api'.format(api_version), imei='388260336982806'))
def test_classify(per_test_postgres, db_user, tmpdir, logger, mocked_statsd, monkeypatch, mocked_config): """Test classify works with the security role created based on abstract role.""" dsn = per_test_postgres.dsn() db_config = DBConfig(ignore_env=True, **dsn) with create_db_connection(db_config) as conn, create_db_connection(db_config, autocommit=True) as metadata_conn: with get_importer(OperatorDataImporter, conn, metadata_conn, db_config, tmpdir, logger, mocked_statsd, OperatorDataParams( content='date,imei,imsi,msisdn\n' '20110101,8888#888622222,123456789012345,123456789012345\n' '20110101,88888888622222,123456789012345,123456789012345\n' '20110101,8888888862222209,123456789012345,123456789012345\n' '20110101,88888862222209**,123456789012345,123456789012345', extract=False, perform_unclean_checks=False, perform_region_checks=False, perform_home_network_check=False, operator='operator1')) as imp: imp.import_data() with get_importer(GSMADataImporter, conn, metadata_conn, db_config, tmpdir, logger, mocked_statsd, GSMADataParams(filename='gsma_not_found_anonymized.txt')) as imp: imp.import_data() with get_importer(RegistrationListImporter, conn, metadata_conn, db_config, tmpdir, logger, mocked_statsd, RegistrationListParams(filename='sample_registration_list.csv')) as imp: imp.import_data() # Run dirbs-classify using db args from the temp postgres instance runner = CliRunner() monkeypatch.setattr(mocked_config.db_config, 'user', db_user) result = runner.invoke(dirbs_classify_cli, ['--no-safety-check'], obj={'APP_CONFIG': mocked_config}) if db_user in ['dirbs_poweruser_login', 'dirbs_classify_user']: assert result.exit_code == 0 else: assert result.exit_code != 0
def _update_catalog(uncataloged_files, config): """Write the new and modified files to the data catalog.""" with create_db_connection( config.db_config) as conn, conn.cursor() as cursor: for f in uncataloged_files: cursor.execute( sql.SQL( """INSERT INTO data_catalog AS dc(filename, file_type, modified_time, compressed_size_bytes, is_valid_zip, is_valid_format, md5, extra_attributes, first_seen, last_seen, uncompressed_size_bytes, num_records) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, now(), now(), %s, %s) ON CONFLICT (filename, file_type) DO UPDATE SET modified_time = %s, compressed_size_bytes = %s, is_valid_zip = %s, is_valid_format = %s, md5 = %s, extra_attributes = %s, last_seen = now(), uncompressed_size_bytes = %s, num_records = %s""" ), # noqa: Q441, Q449 [ f.filename, f.file_type, f.modified_time, f.compressed_size_bytes, f.is_valid_zip, f.is_valid_format, f.md5, json.dumps(f.extra_attributes), f.uncompressed_size_bytes, f.num_records, f.modified_time, f.compressed_size_bytes, f.is_valid_zip, f.is_valid_format, f.md5, json.dumps(f.extra_attributes), f.uncompressed_size_bytes, f.num_records ])
def check(ctx): """ Checks whether DB schema matches software DB version. :param ctx: current cli context obj """ db_config = common.ensure_config(ctx).db_config logger = logging.getLogger('dirbs.db') logger.info('Querying DB schema version for DB %s on host %s', db_config.database, db_config.host) with utils.create_db_connection(db_config) as conn: version = utils.query_db_schema_version(conn) logger.info('Code schema version: %d', code_db_schema_version) if version is None: logger.error( 'DB has not been clean installed. Maybe this DB pre-dates the version checking?' ) logger.error('DB schema version unknown.') # Exit code is used to determine if schema has(exit code:0) or has not(exit code:1) been installed. # Non-zero exit code triggers installation of schema at entrypoint of processing container. sys.exit(1) else: logger.info('DB schema version: %s', str(version)) if version < code_db_schema_version: logger.error('DB schema older than code.') elif version > code_db_schema_version: logger.error('DB schema newer than code.') else: logger.info('Schema versions match between code and DB.')
def cli(ctx): """ DIRBS script to intiliaze, configure and upgrade the PostgreSQL schema. :param ctx: current cli context obj """ config = common.ensure_config(ctx) db_config = config.db_config logger = logging.getLogger('dirbs.db') subcommand = ctx.invoked_subcommand dirbs.logging.setup_file_logging( config.log_config, 'dirbs-db_{0}_{1}'.format(subcommand, datetime.datetime.now().strftime('%Y%m%d'))) # check subcommand should try and fail regardless of these checks. # install_roles subcommand installs these roles so can't do these checks if subcommand not in ['install_roles', 'check']: with utils.create_db_connection(db_config) as conn: try: utils.warn_if_db_superuser(conn) utils.verify_db_roles_installed(conn) utils.verify_db_role_for_job(conn, 'dirbs_core_power_user') utils.verify_db_ownership(conn) utils.verify_hll_schema(conn) if subcommand != 'install': # install subcommand creates the schema, so can't check it here utils.verify_core_schema(conn) utils.verify_db_search_path(conn) except (utils.DatabaseRoleCheckException, utils.DatabaseSchemaException) as ex: logger.error(str(ex)) sys.exit(1)
def get_db_connection() -> g: """Opens a new DB connection if there is not yet for the current app context.""" if not hasattr(g, 'db_conn'): db_config = current_app.config['DIRBS_CONFIG'].db_config g.db_conn = create_db_connection(db_config, readonly=True, autocommit=True) return g.db_conn
def queue_calc_imeis_jobs(self, executor, app_config, run_id, curr_date): """ Method to queue jobs to calculate the IMEIs that are met by this condition. Arguments: executor: instance of the python executor class, to submit back the results app_config: dirbs app current configuration, to extract various configs required for the job run_id: run id of the current classification job curr_date: current date of the system """ with create_db_connection(app_config.db_config) as conn, conn.cursor() as cursor: cursor.execute(sql.SQL("""CREATE UNLOGGED TABLE {intermediate_tbl} ( imei_norm TEXT NOT NULL, virt_imei_shard SMALLINT NOT NULL ) PARTITION BY RANGE (virt_imei_shard)""") .format(intermediate_tbl=self.intermediate_tbl_id(run_id))) partition_utils.create_imei_shard_partitions(conn, tbl_name=self.intermediate_tbl_name(run_id), unlogged=True) parallel_shards = partition_utils.num_physical_imei_shards(conn) # Done with connection -- temp tables should now be committed virt_imei_shard_ranges = partition_utils.virt_imei_shard_bounds(parallel_shards) for virt_imei_range_start, virt_imei_range_end in virt_imei_shard_ranges: yield executor.submit(self._calc_imeis_job, app_config, run_id, curr_date, virt_imei_range_start, virt_imei_range_end)
def test_listgen(per_test_postgres, tmpdir, logger, mocked_statsd, db_user, mocked_config, monkeypatch, classification_data): """Test that the dirbs-listgen instance runs without an error.""" dsn = per_test_postgres.dsn() db_config = DBConfig(ignore_env=True, **dsn) with create_db_connection(db_config) as conn, create_db_connection(db_config, autocommit=True) as metadata_conn: with get_importer(OperatorDataImporter, conn, metadata_conn, db_config, tmpdir, logger, mocked_statsd, OperatorDataParams( content='date,imei,imsi,msisdn\n' '20160203,811111013136460,111018001111111,223338000000\n' '20160203,359000000000000,111015113222222,223355000000\n' '20160203,357756065985824,111015113333333,223355111111', cc=['22', '74'], mcc_mnc_pairs=[{'mcc': '111', 'mnc': '01'}], operator='operator1', extract=False)) as imp: imp.import_data() with get_importer(PairingListImporter, conn, metadata_conn, db_config, tmpdir, logger, mocked_statsd, PairListParams( content='imei,imsi,msisdn\n' '811111013136460,111018001111111,234555555555550\n' '359000000000000,111015113222222,234555555555551\n' '357756065985824,111015113333333,234555555555552')) as imp: imp.import_data() # Now run listgen as requested user runner = CliRunner() monkeypatch.setattr(mocked_config.db_config, 'user', db_user) output_dir = str(tmpdir) result = runner.invoke(dirbs_listgen_cli, [output_dir], obj={'APP_CONFIG': mocked_config}) if db_user in ['dirbs_poweruser_login', 'dirbs_listgen_user']: assert result.exit_code == 0 else: assert result.exit_code != 0
def _postgres_impl(mocked_config): """Implementation of fixture to initialise a temporary PostgreSQL instance with a clean DB schema.""" # The system needs to be set to the C locale other than en_US.UTF8 to assume that, # in collation order uppercase will come before lowercase. postgresql = testing.postgresql.Postgresql(initdb_args='-U postgres -A trust --lc-collate=C.UTF-8 ' '--lc-ctype=C.UTF-8') dsn = postgresql.dsn() # Monkey-patch Postgres config to use temp postgres instance for setting in ['database', 'host', 'port', 'user', 'password']: setattr(mocked_config.db_config, setting, dsn.get(setting, None)) # Run dirbs-db install_roles using db args from the temp postgres instance runner = CliRunner() result = runner.invoke(dirbs_db_cli, ['install_roles'], obj={'APP_CONFIG': mocked_config}) assert result.exit_code == 0 with create_db_connection(mocked_config.db_config) as conn, conn.cursor() as cursor: cursor.execute('CREATE SCHEMA hll;') cursor.execute('GRANT USAGE ON SCHEMA hll TO dirbs_core_base;') cursor.execute('CREATE EXTENSION hll SCHEMA hll;') cursor.execute(sql.SQL('ALTER DATABASE {0} OWNER TO dirbs_core_power_user') .format(sql.Identifier(dsn.get('database')))) # Run dirbs-db install using db args from the temp postgres instance result = runner.invoke(dirbs_db_cli, ['install'], catch_exceptions=False, obj={'APP_CONFIG': mocked_config}) assert result.exit_code == 0 # Create the necessary roles for security tests with create_db_connection(mocked_config.db_config) as conn, conn.cursor() as cursor: cursor.execute('CREATE ROLE dirbs_import_operator_user IN ROLE dirbs_core_import_operator LOGIN') cursor.execute('CREATE ROLE dirbs_import_gsma_user IN ROLE dirbs_core_import_gsma LOGIN') cursor.execute('CREATE ROLE dirbs_import_pairing_list_user IN ROLE dirbs_core_import_pairing_list LOGIN') cursor.execute('CREATE ROLE dirbs_import_stolen_list_user IN ROLE dirbs_core_import_stolen_list LOGIN') cursor.execute('CREATE ROLE dirbs_import_registration_list_user ' 'IN ROLE dirbs_core_import_registration_list LOGIN') cursor.execute('CREATE ROLE dirbs_import_golden_list_user IN ROLE dirbs_core_import_golden_list LOGIN') cursor.execute('CREATE ROLE dirbs_classify_user IN ROLE dirbs_core_classify LOGIN') cursor.execute('CREATE ROLE dirbs_listgen_user IN ROLE dirbs_core_listgen LOGIN') cursor.execute('CREATE ROLE dirbs_report_user IN ROLE dirbs_core_report LOGIN') cursor.execute('CREATE ROLE dirbs_api_user IN ROLE dirbs_core_api LOGIN') cursor.execute('CREATE ROLE dirbs_catalog_user IN ROLE dirbs_core_catalog LOGIN') cursor.execute('CREATE ROLE dirbs_poweruser_login IN ROLE dirbs_core_power_user LOGIN') cursor.execute('CREATE ROLE unknown_user LOGIN') yield postgresql postgresql.stop()
def repartition(ctx, num_physical_shards): """Repartition DIRBS Core tables into a new number of physical IMEI shards.""" logger = logging.getLogger('dirbs.db') config = common.ensure_config(ctx) with utils.create_db_connection( config.db_config) as conn, conn.cursor() as cursor: logger.info( 'Repartitioning DB schema in DB %s on host %s into %d physical shards...', config.db_config.database, config.db_config.host, num_physical_shards) logger.info('Re-partitioning classification_state table...') partition_utils.repartition_classification_state( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned classification_state table') logger.info('Re-partitioning registration_list table...') partition_utils.repartition_registration_list( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned registration_list table') logger.info('Re-partitioning stolen_list table...') partition_utils.repartition_stolen_list( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned stolen_list table') logger.info('Re-partitioning pairing_list table...') partition_utils.repartition_pairing_list( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned pairing_list table') logger.info('Re-partitioning blacklist table...') partition_utils.repartition_blacklist( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned blacklist table') logger.info('Re-partitioning notifications_lists table...') partition_utils.repartition_notifications_lists( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned notifications_lists table') logger.info('Re-partitioning exceptions_lists table...') partition_utils.repartition_exceptions_lists( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned exceptions_lists table') logger.info('Re-partitioning network_imeis table...') partition_utils.repartition_network_imeis( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned network_imeis table') logger.info('Re-partitioning monthly_network_triplets tables...') partition_utils.repartition_monthly_network_triplets( conn, num_physical_shards=num_physical_shards) logger.info('Re-partitioned monthly_network_triplets tables') # Update schema metadata table cursor.execute('UPDATE schema_metadata SET phys_shards = %s', [num_physical_shards])
def metadata_db_conn(postgres, mocked_config): """Fixture to inject a metadata DB connection as a fixture. Only cleans up the job_metadata table.""" # Create db connection conn = create_db_connection(mocked_config.db_config, autocommit=True) yield conn with conn.cursor() as cursor: cursor.execute('TRUNCATE job_metadata') cursor.execute('ALTER SEQUENCE job_metadata_run_id_seq RESTART WITH 1') conn.close()
def _upload_file_to_staging_table(self, input_filename): """Method to upload a single batch to the staging table.""" conn = getattr(self._thread_local_storage, 'conn', None) if conn is None: conn = self._thread_local_storage.conn = create_db_connection( self._db_config) with open(input_filename, 'r') as f, conn, conn.cursor() as cursor: cursor.copy_expert(sql=self._upload_batch_to_staging_table_query(), file=f) return cursor.rowcount
def queue_update_classification_state_jobs(self, executor, app_config, run_id, curr_date): """Method to queue jobs to update the classification_state table after the IMEIs have been calculated.""" with create_db_connection(app_config.db_config) as conn: parallel_shards = partition_utils.num_physical_imei_shards(conn) virt_imei_shard_ranges = partition_utils.virt_imei_shard_bounds( parallel_shards) for virt_imei_range_start, virt_imei_range_end in virt_imei_shard_ranges: yield executor.submit(self._update_classification_state_job, app_config, run_id, curr_date, virt_imei_range_start, virt_imei_range_end)
def install(ctx): """ Installs latest schema on clean DB instance. :param ctx: current cli context obj :return: status """ logger = logging.getLogger('dirbs.db') config = common.ensure_config(ctx) db_config = config.db_config with utils.create_db_connection(db_config) as conn, conn.cursor() as cur: logger.info('Creating initial base DB schema in DB %s on host %s', db_config.database, db_config.host) # Check if there is stuff already in there cur.execute("""SELECT COUNT(*) FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = current_schema()""") is_clean = (cur.fetchone()[0] == 0) if not is_clean: logger.error('Can\'t install latest schema into a non-clean DB') logger.error( 'Instead, use dirbs-db upgrade to upgrade the schema to the latest version' ) sys.exit(1) # Set our role here so that new objects get created with dirbs_core_power_user as owner by default with utils.db_role_setter(conn, role_name='dirbs_core_power_user'): # First we setup the schema, search path etc. sql = pkgutil.get_data('dirbs', 'sql/base/on_db_creation.sql') cur.execute(sql) # Install the base schema for v19 and set current version to 19 base_schema = 'sql/base/v19_schema.sql' logger.info('Restoring base v19 schema from SQL file: %s', base_schema) sql = pkgutil.get_data('dirbs', base_schema) cur.execute(sql) utils.set_db_schema_version(conn, min_schema_version) logger.info( 'Successfully created base v{0:d} schema. Scheduling dirbs-db upgrade...' .format(min_schema_version)) # Then we call upgrade to complete the process rv = 0 if code_db_schema_version > min_schema_version: rv = ctx.invoke(upgrade) else: # Can't do anything until we know the schema is the right version _store_job_metadata(config, 'install') return rv
def _calc_imeis_job(self, app_config, run_id, curr_date, virt_imei_range_start, virt_imei_range_end): """Function to calculate the IMEIs that are met by this condition (single job).""" with create_db_connection(app_config.db_config) as conn, conn.cursor( ) as cursor, CodeProfiler() as cp: dims_sql = [ d.sql(conn, app_config, virt_imei_range_start, virt_imei_range_end, curr_date=curr_date) for d in self.dimensions ] # Calculate the SQL for the intersection of all dimensions condition_sql = sql.SQL(' INTERSECT ').join(dims_sql) # If sticky, we need to UNION the sql with the currently selected IMEIs if self.config.sticky: condition_sql = sql.SQL("""SELECT imei_norm FROM classification_state WHERE cond_name = {cond_name} AND virt_imei_shard >= {virt_imei_range_start} AND virt_imei_shard < {virt_imei_range_end} AND end_date IS NULL UNION ALL {cond_results_sql} """).format( cond_name=sql.Literal(self.label), virt_imei_range_start=sql.Literal(virt_imei_range_start), virt_imei_range_end=sql.Literal(virt_imei_range_end), cond_results_sql=condition_sql) # Make sure we only get distinct IMEIs condition_sql = sql.SQL( """SELECT imei_norm, calc_virt_imei_shard(imei_norm) AS virt_imei_shard FROM ({0}) non_distinct GROUP BY imei_norm""").format( condition_sql) # Copy results to the temp table tbl_name = partition_utils.imei_shard_name( base_name=self.intermediate_tbl_name(run_id), virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) cursor.execute( sql.SQL( """INSERT INTO {intermediate_tbl}(imei_norm, virt_imei_shard) {condition_sql}""" ).format(intermediate_tbl=sql.Identifier(tbl_name), condition_sql=condition_sql)) matching_imeis_count = cursor.rowcount return matching_imeis_count, cp.duration
def _fetch_catalog_files(config): """Fetch all the cataloged files from the database.""" with create_db_connection( config.db_config) as conn, conn.cursor() as cursor: cursor.execute( 'SELECT filename, file_type, modified_time, compressed_size_bytes FROM data_catalog' ) cataloged_files = [] for res in cursor: file_properties = CatalogAttributes(res.filename, res.file_type, res.modified_time, res.compressed_size_bytes) cataloged_files.append(file_properties) return cataloged_files
def _store_job_metadata(config, subcommand): """ Utility method to store metadata about a dirbs-db invocation in the database. :param config: dirbs config obj :param subcommand: sub-command name """ logger = logging.getLogger('dirbs.db') with utils.create_db_connection(config.db_config, autocommit=True) as conn: # We can only really store successful database installs/upgrades as we can't store # anything in an unknown schema version. Therefore, we can store at the end of the job # and mark it as successfully complete immediately run_id = metadata.store_job_metadata(conn, 'dirbs-db', logger, job_subcommand=subcommand) metadata.log_job_success(conn, 'dirbs-db', run_id)
def install_roles(ctx): """Creates DIRBS Core PostgreSQL base roles if they don't exist.""" logger = logging.getLogger('dirbs.db') config = common.ensure_config(ctx) db_config = copy.copy(config.db_config) # Allow install_roles to work even if database doesn't exist by using the postgres DB db_config.database = 'postgres' with utils.create_db_connection(db_config) as conn, conn.cursor() as cur: if not utils.can_db_user_create_roles(conn): logger.error( 'Current PostgreSQL user does not have the CREATEROLE privilege. Please run this command ' 'as a normal user with the CREATEROLE privilege granted (preferred) or as a superuser' ) sys.exit(1) logger.info('Creating DIRBS Core PostgreSQL roles...') sql = pkgutil.get_data('dirbs', 'sql/base/roles.sql') cur.execute(sql) logger.info('Created DIRBS Core PostgreSQL roles')
def queue_update_classification_state_jobs(self, executor, app_config, run_id, curr_date): """ Method to queue jobs to update the classification_state table after the IMEIs have been calculated. Arguments: executor: job executor instance to submit back the results to the queue app_config: current dirbs app config object to use configuration from run_id: run_id of the current running classification job curr_date: current date of the system """ with create_db_connection(app_config.db_config) as conn: parallel_shards = partition_utils.num_physical_imei_shards(conn) virt_imei_shard_ranges = partition_utils.virt_imei_shard_bounds(parallel_shards) for virt_imei_range_start, virt_imei_range_end in virt_imei_shard_ranges: yield executor.submit(self._update_classification_state_job, app_config, run_id, curr_date, virt_imei_range_start, virt_imei_range_end)
def queue_calc_imeis_jobs(self, executor, app_config, run_id, curr_date): """Method to queue jobs to calculate the IMEIs that are met by this condition.""" with create_db_connection( app_config.db_config) as conn, conn.cursor() as cursor: cursor.execute( sql.SQL("""CREATE UNLOGGED TABLE {intermediate_tbl} ( imei_norm TEXT NOT NULL, virt_imei_shard SMALLINT NOT NULL ) PARTITION BY RANGE (virt_imei_shard)"""). format(intermediate_tbl=self.intermediate_tbl_id(run_id))) partition_utils.create_imei_shard_partitions( conn, tbl_name=self.intermediate_tbl_name(run_id), unlogged=True) parallel_shards = partition_utils.num_physical_imei_shards(conn) # Done with connection -- temp tables should now be committed virt_imei_shard_ranges = partition_utils.virt_imei_shard_bounds( parallel_shards) for virt_imei_range_start, virt_imei_range_end in virt_imei_shard_ranges: yield executor.submit(self._calc_imeis_job, app_config, run_id, curr_date, virt_imei_range_start, virt_imei_range_end)
def _update_classification_state_job(self, app_config, run_id, curr_date, virt_imei_range_start, virt_imei_range_end): """Function to update the classificate_state table with IMEIs that are met by this condition (single job).""" with create_db_connection(app_config.db_config) as conn, conn.cursor( ) as cursor, CodeProfiler() as cp: src_shard_name = partition_utils.imei_shard_name( base_name=self.intermediate_tbl_name(run_id), virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) # Add index on imei_norm indices = [ partition_utils.IndexMetadatum(idx_cols=['imei_norm'], is_unique=True) ] partition_utils.add_indices(conn, tbl_name=src_shard_name, idx_metadata=indices) # Analyze table for better stats/plans cursor.execute( sql.SQL('ANALYZE {0}').format(sql.Identifier(src_shard_name))) # Calculate block date if curr_date is None: curr_date = datetime.date.today() in_amnesty_eval_period, in_amnesty_period = compute_amnesty_flags( app_config, curr_date) # If condition is blocking and is not eligible for amnesty, then compute block_date. # The block_date is set to NULL for amnesty_eligible condition within the eval period. amnesty_eligible = self.config.amnesty_eligible sticky_block_date = curr_date + datetime.timedelta(days=self.config.grace_period) \ if self.config.blocking and not (amnesty_eligible and in_amnesty_eval_period) else None # If the condition's amnesty_eligible flag changed while in eval period, then make sure we update # the amnesty_granted column in the classification_state table for existing IMEIs meeting that condition. # These rows will be selected by the existing WHERE clause filters as the block_date would change # from being NULL to not-NULL or vice-versa. set_amnesty_granted_column = sql.SQL(', amnesty_granted = {0}').format(sql.Literal(amnesty_eligible)) \ if in_amnesty_eval_period else sql.SQL('') # If in amnesty period, update the block_date for IMEIs that were previously classified # as amnesty eligible. This filter is to select amnesty_granted IMEIs with not-NULL block date. # This is to make sure if the amnesty_end_date was updated, we update the block_date too. amnesty_block_date_filter = sql.SQL( 'OR cs.amnesty_granted = TRUE' ) if in_amnesty_period else sql.SQL('') dest_shard_name = partition_utils.imei_shard_name( base_name='classification_state', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) # If a condition is blocking, insert new records into state table with not null blocking date or # set a not null blocking date for the existing ones having a null block_date. # Viceversa, if a condition is not blocking, insert new records into state table with Null block_date # or set a Null block_date for the existing ones having a not-null block_date. # Set the amnesty_granted column equal to condition's amnesty_eligible flag when in amnesty eval # period, otherwise always set it to False for new IMEIs meeting the condition. cursor.execute( sql.SQL("""INSERT INTO {dest_shard} AS cs(imei_norm, cond_name, run_id, start_date, end_date, block_date, amnesty_granted, virt_imei_shard) SELECT imei_norm, %s, %s, %s, NULL, %s, %s, calc_virt_imei_shard(imei_norm) FROM {src_shard} ON CONFLICT (imei_norm, cond_name) WHERE end_date IS NULL DO UPDATE SET block_date = CASE WHEN cs.amnesty_granted = TRUE AND NOT {in_eval_period} THEN {amnesty_end_date} ELSE {sticky_block_date} END {set_amnesty_granted_column} WHERE (cs.block_date IS NULL AND excluded.block_date IS NOT NULL) OR (cs.block_date IS NOT NULL AND excluded.block_date IS NULL) {amnesty_block_date_filter}""" ) # noqa Q441 .format(src_shard=sql.Identifier(src_shard_name), dest_shard=sql.Identifier(dest_shard_name), in_eval_period=sql.Literal(in_amnesty_eval_period), set_amnesty_granted_column=set_amnesty_granted_column, amnesty_block_date_filter=amnesty_block_date_filter, amnesty_end_date=sql.Literal( app_config.amnesty_config.amnesty_period_end_date), sticky_block_date=sql.Literal(sticky_block_date)), [ self.label, run_id, curr_date, sticky_block_date, (amnesty_eligible and in_amnesty_eval_period) ]) # Get rid of records that no longer exist in the matched IMEIs list cursor.execute( sql.SQL("""UPDATE {dest_shard} dst SET end_date = %s WHERE cond_name = %s AND end_date IS NULL AND NOT EXISTS (SELECT imei_norm FROM {src_shard} WHERE imei_norm = dst.imei_norm)""" ).format(src_shard=sql.Identifier(src_shard_name), dest_shard=sql.Identifier(dest_shard_name)), [curr_date, self.label]) return cp.duration
def decorated(ctx, *args, **kwargs): _command = command or os.path.basename(sys.argv[0]) _logger_name = logger_name or _command.replace('-', '.') if callable(metrics_root): _metrics_root = metrics_root(ctx, args, **kwargs) else: _metrics_root = metrics_root if _metrics_root is None: _metrics_root = _logger_name + '.' if subcommand is not None: _metrics_root = _metrics_root + subcommand + '.' config = ensure_config(ctx) statsd = ensure_statsd(ctx) logger = logging.getLogger(_logger_name) metrics_run_root = None run_id = -1 metadata_conn = None inited_file_logging = False try: # Store time so that we can track metrics for total listgen time st = time.time() # Get metadata connection in autocommit mode metadata_conn = utils.create_db_connection(config.db_config, autocommit=True) try: # Verify DB schema utils.verify_db_schema(metadata_conn, required_role) except (utils.DatabaseSchemaException, utils.DatabaseRoleCheckException) as ex: logger.error(str(ex)) sys.exit(1) # Store metadata and get run_id run_id = metadata.store_job_metadata(metadata_conn, _command, logger, job_subcommand=subcommand) # Now that we have a run_id, we can setup logging if subcommand is not None: log_filename = '{0}_{1}_run_id_{2:d}'.format(command, subcommand, run_id) else: log_filename = '{0}_run_id_{1:d}'.format(command, run_id) inited_file_logging = dirbs.logging.setup_file_logging(config.log_config, log_filename) # Get metrics run root based on run_id metrics_run_root = '{0}runs.{1:d}.'.format(_metrics_root, run_id) # Validate that any exempted device types occur in the imported GSMA TAC DB utils.validate_exempted_device_types(metadata_conn, config) # Run the actual decorated function with injected args for config, conn, statsd, logger, # run_id and metadata_conn with utils.create_db_connection(config.db_config) as conn: # Call CLI function with injected args f(ctx, config, statsd, logger, run_id, conn, metadata_conn, _command, _metrics_root, metrics_run_root, *args, **kwargs) # Update the last success timestamp statsd.gauge('{0}last_success'.format(_metrics_root), int(time.time())) metadata.log_job_success(metadata_conn, _command, run_id) except: # noqa: E722 # Make sure we track the last failure timestamp for any exception and re-raise statsd.gauge('{0}last_failure'.format(_metrics_root), int(time.time())) # Log metadata in job_metadata table if run_id != -1: metadata.log_job_failure(metadata_conn, _command, run_id, logger) raise finally: # Make sure we init file logging so with date as a last resort so we flush our buffered # log output if not inited_file_logging: if subcommand is not None: log_filename = '{0}_{1}_run_id_unknown'.format(command, subcommand) else: log_filename = '{0}_run_id_unknown'.format(command) dirbs.logging.setup_file_logging(config.log_config, log_filename) # Only track StatsD metrics for run time if we at least retrieved a run id, as this # forms part of the key dt = int((time.time() - st) * 1000) if metrics_run_root is not None: statsd.gauge('{0}runtime.total'.format(metrics_run_root), dt) # If there was a duration_callback set, call it here with the calculated dt if duration_callback is not None: duration_callback(dt) # Cleanup metadata connection (not in with statement) if metadata_conn is not None: try: metadata_conn.close() except (psycopg2.InterfaceError, psycopg2.OperationalError) as e: logger.error(str(e))
def upgrade(ctx): """ Upgrades the current DB schema to the version supported by this code using migration scripts. :param ctx: current cli context obj """ logger = logging.getLogger('dirbs.db') config = common.ensure_config(ctx) db_config = config.db_config needs_analyze = False with utils.create_db_connection(db_config) as conn: logger.info('Querying DB schema version for DB %s on host %s', db_config.database, db_config.host) with conn.cursor() as cur: try: version = utils.query_db_schema_version(conn) except ProgrammingError: logger.warn( 'Could not determine current schema version. Assuming no version' ) version = None if version is None: logger.error( 'DB currently not installed or version number could not be determined. Can\'t upgrade' ) sys.exit(1) if version < min_schema_version: logger.error( 'Current DB schema is older than DIRBS 4.0.0. Can\'t upgrade' ) sys.exit(1) if version > code_db_schema_version: logger.error('DB schema newer than code. Can\'t upgrade') sys.exit(1) if version != code_db_schema_version: logger.info('Upgrading DB schema from version %d to %d', version, code_db_schema_version) # If we're upgrading, make sure we schedule a full ANALYZE outside the transaction later needs_analyze = True # Set our role here so that new objects get created with dirbs_core_power_user as owner by default with utils.db_role_setter(conn, role_name='dirbs_core_power_user'): for old_version in range(version, code_db_schema_version): new_version = old_version + 1 # Check if there is a special migration class, otherwise use standard SQL file try: module_name = 'dirbs.schema_migrators.v{0}_upgrade'.format( new_version) module = importlib.import_module(module_name) logger.info('Running Python migration script: %s', module_name) migrator = module.migrator() migrator.upgrade(conn) except ImportError as ex: script_name = 'sql/migration_scripts/v{0:d}_upgrade.sql'.format( new_version) logger.info('Running SQL migration script: %s', script_name) sql = pkgutil.get_data('dirbs', script_name) cur.execute(sql) # We commit after every version upgrade utils.set_db_schema_version(conn, new_version) conn.commit() logger.info( 'Successfully updated schema - DB schema version is now %d', code_db_schema_version) # Can't do anything until we know the schema is the right version _store_job_metadata(config, 'upgrade') else: logger.info('DB schema is already latest version') # Schedule a full ANALYZE at the end of an upgrade if needs_analyze: logger.info( 'Running ANALYZE of entire database after upgrade...') cur.execute('ANALYZE') logger.info( 'Finished running ANALYZE of entire database after upgrade' )
def _add_indices_parallel_single_job(db_config, *, tbl_name, idx_metadatum, if_not_exists=False): """Job function called by add_indices_parallel.""" with utils.create_db_connection(db_config) as conn: _add_index_to_single_shard(conn, part_name=tbl_name, idx_metadatum=idx_metadatum, if_not_exists=if_not_exists)