def _matching_imeis_sql(self, conn, app_config, virt_imei_range_start, virt_imei_range_end, curr_date=None): """Overrides Dimension._matching_imeis_sql.""" network_imeis_shard = part_utils.imei_shard_name(base_name='network_imeis', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) monitoring_list_shard = part_utils.imei_shard_name(base_name='historic_monitoring_list', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) if self._monitored_days > 0: interval_sql = sql.SQL("""EXTRACT(DAY FROM NOW() - start_date)""") query = sql.SQL("""SELECT imei_norm FROM {network_imeis_shard} WHERE EXISTS(SELECT imei_norm FROM {mon_list_shard} WHERE imei_norm = {network_imeis_shard}.imei_norm AND EXTRACT(DAY FROM NOW() - start_date)::INT >= {mon_days} AND end_date IS NULL)""").format( # noqa: Q449 network_imeis_shard=sql.Identifier(network_imeis_shard), mon_list_shard=sql.Identifier(monitoring_list_shard), interval_sql=interval_sql, mon_days=sql.Literal(self._monitored_days)) else: query = sql.SQL("""SELECT imei_norm FROM {network_imeis_shard} WHERE EXISTS(SELECT imei_norm FROM {mon_list_shard} WHERE imei_norm = {network_imeis_shard}.imei_norm AND end_date IS NULL)""").format( # noqa: Q449 network_imeis_shard=sql.Identifier(network_imeis_shard), mon_list_shard=sql.Identifier(monitoring_list_shard)) return query.as_string(conn)
def _matching_imeis_sql(self, conn, app_config, virt_imei_range_start, virt_imei_range_end, curr_date=None): """Overrides Dimension._matching_imeis_sql.""" network_imeis_shard = partition_utils.imei_shard_name(base_name='network_imeis', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) registration_list_shard = partition_utils.imei_shard_name(base_name='historic_registration_list', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) sql_query = sql.SQL("""SELECT imei_norm FROM {network_imeis_shard} WHERE NOT EXISTS(SELECT imei_norm FROM {reg_list_shard} WHERE imei_norm = {network_imeis_shard}.imei_norm AND end_date IS NULL AND {wl_status_filter})""") \ .format(network_imeis_shard=sql.Identifier(network_imeis_shard), reg_list_shard=sql.Identifier(registration_list_shard), wl_status_filter=registration_list_status_filter_sql()) sql_query = sql_query.as_string(conn) if len(app_config.region_config.exempted_device_types) > 0: sql_query = filter_imei_list_sql_by_device_type(conn, app_config.region_config.exempted_device_types, sql_query) return sql_query
def _matching_imeis_sql(self, conn, app_config, virt_imei_range_start, virt_imei_range_end, curr_date=None): """ Overrirdes Dimension._matching_imeis_sql. :param conn: database connection :param app_config: dirbs config obj :param virt_imei_range_start: start of imei shard range :param virt_imei_range_end: end of imei shard range :param curr_date: user defined current date :return: SQL """ network_imeis_shard = partition_utils.imei_shard_name(base_name='network_imeis', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) association_list_shard = partition_utils.imei_shard_name(base_name='historic_device_association_list', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) classification_query = sql.SQL("""SELECT imei_norm FROM {network_imeis_shard} WHERE NOT EXISTS(SELECT imei_norm FROM {association_shard} WHERE imei_norm = {network_imeis_shard}.imei_norm AND end_date IS NULL)""").format( # noqa: Q449 network_imeis_shard=sql.Identifier(network_imeis_shard), association_shard=sql.Identifier(association_list_shard)) classification_query = classification_query.as_string(conn) if len(app_config.region_config.exempted_device_types) > 0: classification_query = filter_imei_list_sql_by_device_type(conn, app_config.region_config.exempted_device_types, classification_query) return classification_query
def _matching_imeis_sql(self, conn, app_config, virt_imei_range_start, virt_imei_range_end, curr_date=None): """ Overrides Dimension._matching_imeis_sql. :param conn: database connection :param app_config: dirbs config obj :param virt_imei_range_start: virtual imei shard range start :param virt_imei_range_end: virtual imei shard range end :param curr_date: user defined current date :return: SQL """ stolen_list_shard = partition_utils.imei_shard_name( base_name='historic_stolen_list', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) return sql.SQL("""SELECT imei_norm FROM {stolen_list_shard} WHERE (status IS NULL OR status = 'blacklist') AND end_date IS NULL """).format(stolen_list_shard=sql.Identifier( stolen_list_shard)).as_string(conn)
def _matching_imeis_sql(self, conn, app_config, virt_imei_range_start, virt_imei_range_end, curr_date=None): """ Overrides Dimension._matching_imeis_sql. :param conn: database connection :param app_config: dirbs config obj :param virt_imei_range_start: virtual imei shard range start :param virt_imei_range_end: virtual imei shard range end :param curr_date: user defined current analysis :return: SQL """ network_imeis_shard = partition_utils.imei_shard_name( base_name='network_imeis', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) return sql.SQL("""SELECT imei_norm FROM {network_imeis_shard} WHERE imei_norm !~ '^\d{{14}}$'""" ).format( # noqa: W605 network_imeis_shard=sql.Identifier( network_imeis_shard)).as_string(conn)
def sql(self, conn, app_config, virt_imei_range_start, virt_imei_range_end, curr_date=None): """Interface for a dimension to return the SQL fragment associated with it.""" base_sql = self._matching_imeis_sql(conn, app_config, virt_imei_range_start, virt_imei_range_end, curr_date) if type(base_sql) == bytes: base_sql = str(base_sql, conn.encoding) # Dimensions should convert their query fragments to strings before returning assert type(base_sql) == str if self.invert: network_imeis_shard = partition_utils.imei_shard_name( base_name='network_imeis', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) dim_sql = sql.SQL("""SELECT imei_norm FROM {network_imeis_shard} WHERE NOT EXISTS(SELECT imei_norm FROM ({base_dim_sql}) base WHERE imei_norm = {network_imeis_shard}.imei_norm) """).format( network_imeis_shard=sql.Identifier(network_imeis_shard), base_dim_sql=sql.SQL(base_sql)) else: dim_sql = sql.SQL(base_sql) return dim_sql
def _matching_imeis_sql(self, conn, app_config, virt_imei_range_start, virt_imei_range_end, curr_date=None): """ Overrides Dimension._matching_imeis_sql. :param conn: database connection :param app_config: dirbs config obj :param virt_imei_range_start: virtual imei shard range start :param virt_imei_range_end: virtual imei shard range end :param curr_date: user defined current date :return: SQL """ network_imeis_shard = partition_utils.imei_shard_name( base_name='network_imeis', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) registration_list_shard = partition_utils.imei_shard_name( base_name='historic_registration_list', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) sql_query = sql.SQL( """SELECT imei_norm FROM {network_imeis_shard} WHERE NOT EXISTS(SELECT imei_norm FROM {reg_list_shard} WHERE imei_norm = {network_imeis_shard}.imei_norm AND end_date IS NULL AND {wl_status_filter})""" ).format( # noqa: Q449 network_imeis_shard=sql.Identifier(network_imeis_shard), reg_list_shard=sql.Identifier(registration_list_shard), wl_status_filter=registration_list_status_filter_sql()) sql_query = sql_query.as_string(conn) if len(app_config.region_config.exempted_device_types) > 0: sql_query = filter_imei_list_sql_by_device_type( conn, app_config.region_config.exempted_device_types, sql_query) return sql_query
def _matching_imeis_sql(self, conn, app_config, virt_imei_range_start, virt_imei_range_end, curr_date=None): """ Overrides Dimension._matching_imeis_sql. :param conn: database connection :param app_config: dirbs config obj :param virt_imei_range_start: virtual imei shard range start :param virt_imei_range_end: virtual imei shard range end :param curr_date: user defined current date for analysis :return: SQL """ analysis_end_date = compute_analysis_end_date(conn, curr_date) rbi_list = [rbi for rbi in self.final_rbi_delays.keys()] delay_list = [self.final_rbi_delays[rbi] for rbi in rbi_list] # HACK: This is used by _write_country_gsma_not_found_report in cli/report.py which instantiates # a dimension without understanding about paralllel queries. Therefore, we passed in 1 and 100 # to cover the entire range of IMEIs and expect it to read from the network_imeis table. if virt_imei_range_start == 1 and virt_imei_range_end == 100: network_imeis_shard = 'network_imeis' else: network_imeis_shard = partition_utils.imei_shard_name(base_name='network_imeis', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) # The first CTE 'not_in_gsma' calculates the first date the IMEI was observed on the network for # all IMEIs that have a TAC that is not present in the GSMA database. The min_first_seen date # is computed using the minimum of first_seen date among all operators the IMEI was observed on. # The second CTE 'rbi_delays' is the list of RBIs and corresponding delays that were configured. # The results of the two CTEs are joined using RBI as the key. # Finally, those IMEIs whose min_seen_date + RBI delay is less than curr_date are classified # as gsma_not_found and rest are excluded. # Note 1: The less than check implies that even after adding delay, these IMEIs would still not # have been allocated by the classification date and hence are not valid IMEIs. # Note 2: The delay is added on a per-IMEI basis rather than per-TAC due to potential for someone # squatting on an unallocated TAC in the past. return sql.SQL( """SELECT imei_norm FROM (WITH not_in_gsma AS (SELECT imei_norm, first_seen AS min_first_seen, LEFT(imei_norm, 2) AS rbi FROM {network_imeis_shard} WHERE NOT EXISTS (SELECT 1 FROM gsma_data WHERE tac = LEFT(imei_norm, 8))), rbi_delays AS (SELECT rbi, delay FROM UNNEST({rbi_list}::TEXT[], {delay_list}::INT[]) AS tbl(rbi, delay)) SELECT imei_norm FROM not_in_gsma LEFT JOIN rbi_delays USING (rbi) WHERE min_first_seen + COALESCE(delay, 0) < {analysis_end_date}) invalid_imeis """).format(network_imeis_shard=sql.Identifier(network_imeis_shard), # noqa: Q447, Q449 rbi_list=sql.Literal(rbi_list), delay_list=sql.Literal(delay_list), analysis_end_date=sql.Literal(analysis_end_date)).as_string(conn)
def _matching_imeis_sql(self, conn, app_config, virt_imei_range_start, virt_imei_range_end, curr_date=None): """ Overrides Dimension._matching_imeis_sql. :param conn: database connection :param app_config: dirbs config obj :param virt_imei_range_start: virtual imei shard range start :param virt_imei_range_end: virtual imei shard range end :param curr_date: user defined current date :return: SQL """ """ Compute the RAT bitmask on a per model level by OR'ing all the TAC bitmasks with the same model name. Check individually that device was seen on 2G/3G/4G RAT and if model is capable of that particular RAT. The first condition in the where clause ANDs the device_rat_bitmask with 48 (bits 4, 5 set) to get the operator_rank bits corresponding to 2G RATs. If this value is greater than 0, then device was observed on 2G RATs. The model_rat_bitmask is AND with 64 (bit 6 set) to get gsma_rank corresponding to 2G RAT. If this value is zero, then device does not have 3G capability. The second condition in the where clause ANDs the device_rat_bitmask with 960 (bits 6, 7, 8, 9 set) to get the operator_rank bits corresponding to 3G RATs. If this value is greater than 0, then device was observed on 3G RATs. The model_rat_bitmask is AND with 512 (bit 9 set) to get gsma_rank corresponding to 3G RAT. If this value is zero, then device does not have 3G capability. The third condition in the where clause ANDs the device_rat_bitmask with 7168 (bits 10, 11, 12 set) to get the operator_rank bits corresponding to 4G RATs. If this value is greater than 0, then device was observed on 4G RATs. The model_rat_bitmask is AND with 4096 (bit 12 set) to get gsma_rank corresponding to 4G RAT. If this value is zero, then device does not have 4G capability. If device was seen on a RAT that device is not capable of then it is flagged for having inconsistent RAT. IMEIs associated with TACs having NULL manufacturer or model name are excluded from classification. """ network_imeis_shard = partition_utils.imei_shard_name(base_name='network_imeis', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) return sql.SQL( """SELECT imei_norm FROM (SELECT imei_norm, SUBSTRING(imei_norm FROM 1 FOR 8) AS tac, seen_rat_bitmask AS device_rat_bitmask FROM {network_imeis_shard} WHERE seen_rat_bitmask IS NOT NULL) imei_rat JOIN (SELECT gsma_tacs.tac, gsma_per_model_rat_bitmask.model_rat_bitmask FROM (SELECT model_name, manufacturer, bit_or(rat_bitmask) AS model_rat_bitmask FROM gsma_data WHERE model_name IS NOT NULL AND manufacturer IS NOT NULL GROUP BY model_name, manufacturer) gsma_per_model_rat_bitmask JOIN gsma_data gsma_tacs USING (model_name, manufacturer)) gsma_per_tac_bitmask ON imei_rat.tac = gsma_per_tac_bitmask.tac WHERE ((device_rat_bitmask & 48) > 0 AND (model_rat_bitmask & 64) = 0) OR ((device_rat_bitmask & 960) > 0 AND (model_rat_bitmask & 512) = 0) OR ((device_rat_bitmask & 7168) > 0 AND (model_rat_bitmask & 4096) = 0)""" # noqa: Q447 ).format(network_imeis_shard=sql.Identifier(network_imeis_shard)).as_string(conn)
def _matching_imeis_sql(self, conn, app_config, virt_imei_range_start, virt_imei_range_end, curr_date=None): """Overrides Dimension._matching_imeis_sql.""" network_imeis_shard = partition_utils.imei_shard_name(base_name='network_imeis', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) return sql.SQL("""SELECT imei_norm FROM {network_imeis_shard} WHERE imei_norm !~ '^\d{{14}}$'""") \ .format(network_imeis_shard=sql.Identifier(network_imeis_shard)).as_string(conn)
def _matching_imeis_sql(self, conn, app_config, virt_imei_range_start, virt_imei_range_end, curr_date=None): """Overrides Dimension._matching_imeis_sql.""" stolen_list_shard = partition_utils.imei_shard_name(base_name='historic_stolen_list', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) return sql.SQL("""SELECT imei_norm FROM {stolen_list_shard} WHERE (status IS NULL OR status = 'blacklist') AND end_date IS NULL """).format(stolen_list_shard=sql.Identifier(stolen_list_shard)).as_string(conn)
def _matching_imeis_sql(self, conn, app_config, virt_imei_range_start, virt_imei_range_end, curr_date=None): """ Overrides Dimension._matching_imeis_sql. First six digits of the Test IMEI features: - first 2 digits are '00'; - exclude IMEIs with characters; - the third and fourth digits can be either: '10' followed by two digits both between 1 and 17 OR '44', '86' or '91' e.g. first six digits of the Test IMEI : 001 001- 001 017 00 44 00 86 00 91 :param conn: database connection :param app_config: dirbs config obj :param virt_imei_range_start: virtual imei shard range start :param virt_imei_range_end: virtual imei shard range end :param curr_date: user defined current date :return: SQL """ """Overrides Dimension._matching_imeis_sql.""" network_imeis_shard = partition_utils.imei_shard_name( base_name='network_imeis', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) return sql.SQL("""SELECT imei_norm FROM {network_imeis_shard} WHERE SUBSTRING(imei_norm, 1, 2) = '00' AND imei_norm ~ '^[0-9]{{8}}' AND ( ( SUBSTRING(imei_norm, 3, 2) = '10' AND SUBSTRING(imei_norm, 5, 2)::INT BETWEEN 1 AND 17 ) OR SUBSTRING(imei_norm, 3, 2) IN ('44', '86', '91') ) """).format( network_imeis_shard=sql.Identifier(network_imeis_shard)).as_string( conn) # noqa: Q447
def _calc_imeis_job(self, app_config, run_id, curr_date, virt_imei_range_start, virt_imei_range_end): """Function to calculate the IMEIs that are met by this condition (single job).""" with create_db_connection(app_config.db_config) as conn, conn.cursor( ) as cursor, CodeProfiler() as cp: dims_sql = [ d.sql(conn, app_config, virt_imei_range_start, virt_imei_range_end, curr_date=curr_date) for d in self.dimensions ] # Calculate the SQL for the intersection of all dimensions condition_sql = sql.SQL(' INTERSECT ').join(dims_sql) # If sticky, we need to UNION the sql with the currently selected IMEIs if self.config.sticky: condition_sql = sql.SQL("""SELECT imei_norm FROM classification_state WHERE cond_name = {cond_name} AND virt_imei_shard >= {virt_imei_range_start} AND virt_imei_shard < {virt_imei_range_end} AND end_date IS NULL UNION ALL {cond_results_sql} """).format( cond_name=sql.Literal(self.label), virt_imei_range_start=sql.Literal(virt_imei_range_start), virt_imei_range_end=sql.Literal(virt_imei_range_end), cond_results_sql=condition_sql) # Make sure we only get distinct IMEIs condition_sql = sql.SQL( """SELECT imei_norm, calc_virt_imei_shard(imei_norm) AS virt_imei_shard FROM ({0}) non_distinct GROUP BY imei_norm""").format( condition_sql) # Copy results to the temp table tbl_name = partition_utils.imei_shard_name( base_name=self.intermediate_tbl_name(run_id), virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) cursor.execute( sql.SQL( """INSERT INTO {intermediate_tbl}(imei_norm, virt_imei_shard) {condition_sql}""" ).format(intermediate_tbl=sql.Identifier(tbl_name), condition_sql=condition_sql)) matching_imeis_count = cursor.rowcount return matching_imeis_count, cp.duration
def _matching_imeis_sql(self, conn, app_config, virt_imei_range_start, virt_imei_range_end, curr_date=None): """ Overrides Dimension._matching_imeis_sql. :param conn: database connection :param app_config: dirbs config obj :param virt_imei_range_start: virtual imei shard range start :param virt_imei_range_end: virtual imei shard range end :param curr_date: user defined current date :return: SQL """ network_imeis_shard = part_utils.imei_shard_name(base_name='network_imeis', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) return sql.SQL("""SELECT imei_norm FROM {network_imeis_shard} WHERE EXISTS (SELECT 1 FROM barred_tac_list WHERE tac = LEFT(imei_norm, 8))""").format( network_imeis_shard=sql.Identifier(network_imeis_shard)).as_string(conn)
def _update_classification_state_job(self, app_config, run_id, curr_date, virt_imei_range_start, virt_imei_range_end): """Function to update the classificate_state table with IMEIs that are met by this condition (single job).""" with create_db_connection(app_config.db_config) as conn, conn.cursor( ) as cursor, CodeProfiler() as cp: src_shard_name = partition_utils.imei_shard_name( base_name=self.intermediate_tbl_name(run_id), virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) # Add index on imei_norm indices = [ partition_utils.IndexMetadatum(idx_cols=['imei_norm'], is_unique=True) ] partition_utils.add_indices(conn, tbl_name=src_shard_name, idx_metadata=indices) # Analyze table for better stats/plans cursor.execute( sql.SQL('ANALYZE {0}').format(sql.Identifier(src_shard_name))) # Calculate block date if curr_date is None: curr_date = datetime.date.today() in_amnesty_eval_period, in_amnesty_period = compute_amnesty_flags( app_config, curr_date) # If condition is blocking and is not eligible for amnesty, then compute block_date. # The block_date is set to NULL for amnesty_eligible condition within the eval period. amnesty_eligible = self.config.amnesty_eligible sticky_block_date = curr_date + datetime.timedelta(days=self.config.grace_period) \ if self.config.blocking and not (amnesty_eligible and in_amnesty_eval_period) else None # If the condition's amnesty_eligible flag changed while in eval period, then make sure we update # the amnesty_granted column in the classification_state table for existing IMEIs meeting that condition. # These rows will be selected by the existing WHERE clause filters as the block_date would change # from being NULL to not-NULL or vice-versa. set_amnesty_granted_column = sql.SQL(', amnesty_granted = {0}').format(sql.Literal(amnesty_eligible)) \ if in_amnesty_eval_period else sql.SQL('') # If in amnesty period, update the block_date for IMEIs that were previously classified # as amnesty eligible. This filter is to select amnesty_granted IMEIs with not-NULL block date. # This is to make sure if the amnesty_end_date was updated, we update the block_date too. amnesty_block_date_filter = sql.SQL( 'OR cs.amnesty_granted = TRUE' ) if in_amnesty_period else sql.SQL('') dest_shard_name = partition_utils.imei_shard_name( base_name='classification_state', virt_imei_range_start=virt_imei_range_start, virt_imei_range_end=virt_imei_range_end) # If a condition is blocking, insert new records into state table with not null blocking date or # set a not null blocking date for the existing ones having a null block_date. # Viceversa, if a condition is not blocking, insert new records into state table with Null block_date # or set a Null block_date for the existing ones having a not-null block_date. # Set the amnesty_granted column equal to condition's amnesty_eligible flag when in amnesty eval # period, otherwise always set it to False for new IMEIs meeting the condition. cursor.execute( sql.SQL("""INSERT INTO {dest_shard} AS cs(imei_norm, cond_name, run_id, start_date, end_date, block_date, amnesty_granted, virt_imei_shard) SELECT imei_norm, %s, %s, %s, NULL, %s, %s, calc_virt_imei_shard(imei_norm) FROM {src_shard} ON CONFLICT (imei_norm, cond_name) WHERE end_date IS NULL DO UPDATE SET block_date = CASE WHEN cs.amnesty_granted = TRUE AND NOT {in_eval_period} THEN {amnesty_end_date} ELSE {sticky_block_date} END {set_amnesty_granted_column} WHERE (cs.block_date IS NULL AND excluded.block_date IS NOT NULL) OR (cs.block_date IS NOT NULL AND excluded.block_date IS NULL) {amnesty_block_date_filter}""" ) # noqa Q441 .format(src_shard=sql.Identifier(src_shard_name), dest_shard=sql.Identifier(dest_shard_name), in_eval_period=sql.Literal(in_amnesty_eval_period), set_amnesty_granted_column=set_amnesty_granted_column, amnesty_block_date_filter=amnesty_block_date_filter, amnesty_end_date=sql.Literal( app_config.amnesty_config.amnesty_period_end_date), sticky_block_date=sql.Literal(sticky_block_date)), [ self.label, run_id, curr_date, sticky_block_date, (amnesty_eligible and in_amnesty_eval_period) ]) # Get rid of records that no longer exist in the matched IMEIs list cursor.execute( sql.SQL("""UPDATE {dest_shard} dst SET end_date = %s WHERE cond_name = %s AND end_date IS NULL AND NOT EXISTS (SELECT imei_norm FROM {src_shard} WHERE imei_norm = dst.imei_norm)""" ).format(src_shard=sql.Identifier(src_shard_name), dest_shard=sql.Identifier(dest_shard_name)), [curr_date, self.label]) return cp.duration