Exemplo n.º 1
0
    def partition_registration_list(self, conn, *, num_physical_shards):
        """Method to repartition registration_list for v47 upgrade."""
        with conn.cursor() as cursor, utils.db_role_setter(
                conn, role_name='dirbs_core_power_user'):
            # Create parent partition
            cursor.execute("""CREATE TABLE historic_registration_list_new (
                                   LIKE historic_registration_list INCLUDING DEFAULTS
                                                                   INCLUDING IDENTITY
                                                                   INCLUDING CONSTRAINTS
                                                                   INCLUDING STORAGE
                                                                   INCLUDING COMMENTS
                               )
                               PARTITION BY RANGE (virt_imei_shard)
                            """)
            part_utils._grant_perms_registration_list(
                conn, part_name='historic_registration_list_new')
            # Create child partitions
            part_utils.create_imei_shard_partitions(
                conn,
                tbl_name='historic_registration_list_new',
                num_physical_shards=num_physical_shards,
                perms_func=part_utils._grant_perms_registration_list,
                fillfactor=80)
            # Insert data from original partition
            cursor.execute("""INSERT INTO historic_registration_list_new
                                   SELECT *
                                     FROM historic_registration_list""")

            # Add in indexes to each partition
            idx_metadata = [
                part_utils.IndexMetadatum(idx_cols=['imei_norm'],
                                          is_unique=True,
                                          partial_sql='WHERE end_date IS NULL')
            ]
            part_utils.add_indices(conn,
                                   tbl_name='historic_registration_list_new',
                                   idx_metadata=idx_metadata)

            # Drop old view + table, rename tables, indexes and constraints
            cursor.execute('DROP VIEW registration_list')
            cursor.execute('DROP TABLE historic_registration_list CASCADE')
            part_utils.rename_table_and_indices(
                conn,
                old_tbl_name='historic_registration_list_new',
                new_tbl_name='historic_registration_list',
                idx_metadata=idx_metadata)
            cursor.execute("""CREATE OR REPLACE VIEW registration_list AS
                                   SELECT imei_norm, make, model, status, virt_imei_shard
                                     FROM historic_registration_list
                                    WHERE end_date IS NULL WITH CHECK OPTION"""
                           )
            cursor.execute("""GRANT SELECT ON registration_list
                                      TO dirbs_core_classify, dirbs_core_api, dirbs_core_import_registration_list"""
                           )
Exemplo n.º 2
0
    def _repartition_pairing_list(self, conn, *, num_physical_shards):
        """Repartition pairing list to implement change in structure."""
        with conn.cursor() as cursor, utils.db_role_setter(
                conn, role_name='dirbs_core_power_user'):
            cursor.execute("""CREATE TABLE historic_pairing_list_new (
                       LIKE historic_pairing_list INCLUDING DEFAULTS
                                                  INCLUDING IDENTITY
                                                  INCLUDING CONSTRAINTS
                                                  INCLUDING STORAGE
                                                  INCLUDING COMMENTS
                   )
                   PARTITION BY RANGE (virt_imei_shard)
                """)
            cursor.execute(
                """ALTER TABLE historic_pairing_list_new ADD COLUMN msisdn TEXT NOT NULL"""
            )

            # grant permissions
            part_utils._grant_perms_pairing_list(
                conn, part_name='historic_pairing_list_new')

            # create child partitions
            part_utils.create_imei_shard_partitions(
                conn,
                tbl_name='historic_pairing_list_new',
                num_physical_shards=num_physical_shards,
                perms_func=part_utils._grant_perms_pairing_list,
                fillfactor=80)

            # copy data from original partition
            cursor.execute("""INSERT INTO historic_pairing_list_new
                                   SELECT p.imei_norm, p.imsi, p.start_date, p.end_date, p.virt_imei_shard, m.msisdn
                                     FROM historic_pairing_list p
                               INNER JOIN monthly_network_triplets_country m ON p.imsi = m.imsi"""
                           )

            # add indexes
            idx_metadata = [
                part_utils.IndexMetadatum(
                    idx_cols=['imei_norm', 'imsi', 'msisdn'],
                    is_unique=True,
                    partial_sql='WHERE end_date IS NULL')
            ]
            part_utils.add_indices(conn,
                                   tbl_name='historic_pairing_list_new',
                                   idx_metadata=idx_metadata)

            # drop old views, tables, indexes and constraints
            cursor.execute('DROP VIEW pairing_list')
            cursor.execute('DROP TABLE historic_pairing_list CASCADE')
            part_utils.rename_table_and_indices(
                conn,
                old_tbl_name='historic_pairing_list_new',
                new_tbl_name='historic_pairing_list',
                idx_metadata=idx_metadata)

            # create new view and grant permissions
            cursor.execute("""CREATE VIEW pairing_list AS
                                   SELECT imei_norm, imsi, msisdn, virt_imei_shard
                                     FROM historic_pairing_list
                                    WHERE end_date IS NULL WITH CHECK OPTION"""
                           )
            cursor.execute("""GRANT SELECT ON pairing_list
                              TO dirbs_core_listgen, dirbs_core_report, dirbs_core_api, dirbs_core_import_pairing_list
                           """)

            # drop and recreate staging data insert trigger
            cursor.execute("""
                            DROP FUNCTION pairing_list_staging_data_insert_trigger_fn() CASCADE;

                            CREATE FUNCTION pairing_list_staging_data_insert_trigger_fn() RETURNS trigger
                                LANGUAGE plpgsql
                                AS $$
                            BEGIN
                                -- Clean/normalize data before inserting
                                NEW.imei_norm = normalize_imei(NULLIF(TRIM(NEW.imei), ''));
                                NEW.imsi = NULLIF(TRIM(new.imsi), '');
                                NEW.msisdn = NULLIF(TRIM(new.msisdn), '');
                                RETURN NEW;
                            END
                            $$;
            """)
Exemplo n.º 3
0
    def upgrade(self, db_conn):  # noqa: C901
        """Overrides AbstractMigrator upgrade method."""
        logger = logging.getLogger('dirbs.db')
        with db_conn.cursor() as cursor:
            cursor.execute(
                """CREATE FUNCTION calc_virt_imei_shard(imei TEXT) RETURNS SMALLINT
                              LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE
                              AS $$
                              BEGIN
                                  RETURN SUBSTRING(COALESCE(imei, ''), 13, 2)::SMALLINT;
                              EXCEPTION WHEN OTHERS THEN
                                  RETURN 0;
                              END;
                              $$""")

            # By default, create 4 shards
            num_initial_shards = 4

            logger.info('Re-partitioning classification_state table...')
            cursor.execute(
                'ALTER TABLE classification_state ADD COLUMN virt_imei_shard SMALLINT'
            )
            cursor.execute(
                'UPDATE classification_state SET virt_imei_shard = calc_virt_imei_shard(imei_norm)'
            )
            cursor.execute(
                'ALTER TABLE classification_state ALTER COLUMN virt_imei_shard SET NOT NULL'
            )
            part_utils.repartition_classification_state(
                db_conn, num_physical_shards=num_initial_shards)
            logger.info('Re-partitioned classification_state table')

            logger.info('Re-partitioning registration_list table...')
            cursor.execute(
                'ALTER TABLE historic_registration_list ADD COLUMN virt_imei_shard SMALLINT'
            )
            cursor.execute(
                'UPDATE historic_registration_list SET virt_imei_shard = calc_virt_imei_shard(imei_norm)'
            )
            cursor.execute(
                'ALTER TABLE historic_registration_list ALTER COLUMN virt_imei_shard SET NOT NULL'
            )
            self.partition_registration_list(
                db_conn, num_physical_shards=num_initial_shards)
            logger.info('Re-partitioned registration_list table')

            logger.info('Re-partitioning pairing_list table...')
            cursor.execute(
                'ALTER TABLE historic_pairing_list ADD COLUMN virt_imei_shard SMALLINT'
            )
            cursor.execute(
                'UPDATE historic_pairing_list SET virt_imei_shard = calc_virt_imei_shard(imei_norm)'
            )
            cursor.execute(
                'ALTER TABLE historic_pairing_list ALTER COLUMN virt_imei_shard SET NOT NULL'
            )
            part_utils.repartition_pairing_list(
                db_conn, num_physical_shards=num_initial_shards)
            logger.info('Re-partitioned pairing_list table')

            logger.info('Re-partitioning blacklist table...')
            cursor.execute(
                'ALTER TABLE blacklist ADD COLUMN virt_imei_shard SMALLINT')
            cursor.execute(
                'UPDATE blacklist SET virt_imei_shard = calc_virt_imei_shard(imei_norm)'
            )
            cursor.execute(
                'ALTER TABLE blacklist ALTER COLUMN virt_imei_shard SET NOT NULL'
            )
            part_utils.repartition_blacklist(
                db_conn, num_physical_shards=num_initial_shards)
            logger.info('Re-partitioned blacklist table')

            # Need to make sure owner of list tables is dirbs_core_listgen
            logger.info('Re-partitioning notifications_lists table...')
            # The original notifications_lists were not created with a single sequence for the IDs, so just do now
            with utils.db_role_setter(db_conn, role_name='dirbs_core_listgen'):
                cursor.execute(
                    """CREATE UNLOGGED TABLE notifications_lists_new (
                           row_id BIGSERIAL NOT NULL,
                           operator_id TEXT NOT NULL,
                           imei_norm TEXT NOT NULL,
                           imsi TEXT NOT NULL,
                           msisdn TEXT NOT NULL,
                           block_date DATE NOT NULL,
                           reasons TEXT[] NOT NULL,
                           amnesty_granted BOOLEAN DEFAULT FALSE NOT NULL,
                           start_run_id BIGINT NOT NULL,
                           end_run_id BIGINT,
                           delta_reason TEXT NOT NULL CHECK (delta_reason IN ('new', 'resolved', 'blacklisted',
                                                                              'no_longer_seen', 'changed')),
                           virt_imei_shard SMALLINT NOT NULL
                       ) PARTITION BY LIST (operator_id)
                    """)

            # Work out who the operators are
            partitions = utils.child_table_names(db_conn,
                                                 'notifications_lists')
            # Make sure that they are owned by dirbs_core_listgen (they can be owner by dirbs_core_power_user)
            # due to bad previous migration scripts
            with utils.db_role_setter(db_conn,
                                      role_name='dirbs_core_power_user'):
                for p in partitions:
                    cursor.execute(
                        sql.SQL('ALTER TABLE {0} OWNER TO dirbs_core_listgen').
                        format(sql.Identifier(p)))

            operators = [
                x.operator_id for x in utils.table_invariants_list(
                    db_conn, partitions, ['operator_id'])
            ]

            # Create operator child partitions
            for op_id in operators:
                tbl_name = part_utils.per_mno_lists_partition(
                    operator_id=op_id,
                    suffix='_new',
                    list_type='notifications')
                part_utils.create_per_mno_lists_partition(
                    db_conn,
                    operator_id=op_id,
                    parent_tbl_name='notifications_lists_new',
                    tbl_name=tbl_name,
                    num_physical_shards=1,
                    unlogged=True,
                    fillfactor=100)

            cursor.execute(
                """INSERT INTO notifications_lists_new(operator_id, imei_norm, imsi, msisdn, block_date,
                                                       reasons, start_run_id, end_run_id, delta_reason,
                                                       virt_imei_shard)
                        SELECT operator_id, imei_norm, imsi, msisdn, block_date,
                               reasons, start_run_id, end_run_id, delta_reason, calc_virt_imei_shard(imei_norm)
                          FROM notifications_lists
                """)
            # Drop old table, rename tables, indexes and constraints
            cursor.execute("""ALTER TABLE notifications_lists_new
                              RENAME CONSTRAINT notifications_lists_new_delta_reason_check
                              TO notifications_lists_delta_reason_check""")
            cursor.execute('DROP TABLE notifications_lists CASCADE')
            cursor.execute("""ALTER SEQUENCE notifications_lists_new_row_id_seq
                              RENAME TO notifications_lists_row_id_seq""")
            part_utils.rename_table_and_indices(
                db_conn,
                old_tbl_name='notifications_lists_new',
                new_tbl_name='notifications_lists')
            part_utils.repartition_notifications_lists(
                db_conn, num_physical_shards=num_initial_shards)
            logger.info('Re-partitioned notifications_lists table')

            logger.info('Re-partitioning exceptions_lists table...')
            # The original exceptions_lists were not created with a single sequence for the IDs, so just do now
            with utils.db_role_setter(db_conn, role_name='dirbs_core_listgen'):
                cursor.execute("""CREATE UNLOGGED TABLE exceptions_lists_new (
                           row_id BIGSERIAL NOT NULL,
                           operator_id TEXT NOT NULL,
                           imei_norm TEXT NOT NULL,
                           imsi TEXT NOT NULL,
                           start_run_id BIGINT NOT NULL,
                           end_run_id BIGINT,
                           delta_reason TEXT NOT NULL CHECK (delta_reason IN ('added', 'removed')),
                           virt_imei_shard SMALLINT NOT NULL
                       ) PARTITION BY LIST (operator_id)
                    """)
            # Work out who the operators are
            partitions = utils.child_table_names(db_conn, 'exceptions_lists')
            # Make sure that they are owned by dirbs_core_listgen (they can be owner by dirbs_core_power_user)
            # due to bad previous migration scripts
            with utils.db_role_setter(db_conn,
                                      role_name='dirbs_core_power_user'):
                for p in partitions:
                    cursor.execute(
                        sql.SQL('ALTER TABLE {0} OWNER TO dirbs_core_listgen').
                        format(sql.Identifier(p)))
            operators = [
                x.operator_id for x in utils.table_invariants_list(
                    db_conn, partitions, ['operator_id'])
            ]

            # Create operator child partitions
            for op_id in operators:
                tbl_name = part_utils.per_mno_lists_partition(
                    operator_id=op_id, suffix='_new', list_type='exceptions')
                part_utils.create_per_mno_lists_partition(
                    db_conn,
                    operator_id=op_id,
                    parent_tbl_name='exceptions_lists_new',
                    tbl_name=tbl_name,
                    num_physical_shards=1,
                    unlogged=True,
                    fillfactor=100)

            cursor.execute(
                """INSERT INTO exceptions_lists_new(operator_id, imei_norm, imsi, start_run_id,
                                                    end_run_id, delta_reason, virt_imei_shard)
                        SELECT operator_id, imei_norm, imsi, start_run_id, end_run_id, delta_reason,
                               calc_virt_imei_shard(imei_norm)
                          FROM exceptions_lists
                """)
            # Drop old table, rename tables, indexes and constraints
            cursor.execute("""ALTER TABLE exceptions_lists_new
                              RENAME CONSTRAINT exceptions_lists_new_delta_reason_check
                              TO exceptions_lists_delta_reason_check""")
            cursor.execute('DROP TABLE exceptions_lists CASCADE')
            cursor.execute(
                'ALTER SEQUENCE exceptions_lists_new_row_id_seq RENAME TO exceptions_lists_row_id_seq'
            )
            part_utils.rename_table_and_indices(
                db_conn,
                old_tbl_name='exceptions_lists_new',
                new_tbl_name='exceptions_lists')
            part_utils.repartition_exceptions_lists(
                db_conn, num_physical_shards=num_initial_shards)
            logger.info('Re-partitioned exceptions_lists table')

            logger.info('Re-partitioning seen_imeis (network_imeis) table')
            # First, just put everything in a temporary table so that we can call partutils
            with utils.db_role_setter(db_conn,
                                      role_name='dirbs_core_import_operator'):
                cursor.execute("""CREATE UNLOGGED TABLE network_imeis (
                           first_seen DATE NOT NULL,
                           last_seen DATE NOT NULL,
                           seen_rat_bitmask INTEGER,
                           imei_norm TEXT NOT NULL,
                           virt_imei_shard SMALLINT NOT NULL
                       )
                    """)
            #
            # We disable index scans here as doing a merge append with index scans is much slower and involves
            # a lot of seeks which kills performance on non-SSD drives. Better to use an append plan and sort
            # the results by imei_norm
            #
            cursor.execute('SET enable_indexscan = false')
            cursor.execute("""INSERT INTO network_imeis
                        SELECT MIN(first_seen),
                               MAX(last_seen),
                               bit_or(seen_rat_bitmask),
                               imei_norm,
                               calc_virt_imei_shard(imei_norm)
                          FROM seen_imeis
                      GROUP BY imei_norm
                """)
            cursor.execute('SET enable_indexscan = true')
            part_utils.repartition_network_imeis(
                db_conn, num_physical_shards=num_initial_shards)
            cursor.execute('DROP TABLE seen_imeis CASCADE')
            logger.info('Re-partitioned seen_imeis (network_imeis) table')

            # First, just put all country-level triplets in a temporary table so that we can call partition_utils
            with utils.db_role_setter(db_conn,
                                      role_name='dirbs_core_import_operator'):
                cursor.execute(
                    """CREATE UNLOGGED TABLE monthly_network_triplets_country (
                           triplet_year SMALLINT NOT NULL,
                           triplet_month SMALLINT NOT NULL,
                           first_seen DATE NOT NULL,
                           last_seen DATE NOT NULL,
                           date_bitmask INTEGER NOT NULL,
                           triplet_hash UUID NOT NULL,
                           imei_norm TEXT,
                           imsi TEXT,
                           msisdn TEXT,
                           virt_imei_shard SMALLINT NOT NULL,
                           CHECK (last_seen >= first_seen),
                           CHECK (EXTRACT(month FROM last_seen) = triplet_month AND
                                  EXTRACT(year FROM last_seen) = triplet_year),
                           CHECK (EXTRACT(month FROM first_seen) = triplet_month AND
                                  EXTRACT(year FROM first_seen) = triplet_year)
                       ) PARTITION BY RANGE (triplet_year, triplet_month)
                    """)

            # Work out what partitions to create and create them
            partitions = utils.child_table_names(db_conn, 'seen_triplets')
            # Make sure that they are owned by dirbs_core_import_operator (they can be owner by dirbs_core_power_user)
            # due to bad previous migration scripts
            with utils.db_role_setter(db_conn,
                                      role_name='dirbs_core_power_user'):
                for p in partitions:
                    cursor.execute(
                        sql.SQL(
                            'ALTER TABLE {0} OWNER TO dirbs_core_import_operator'
                        ).format(sql.Identifier(p)))

            year_month_tuples = {
                (x.triplet_year, x.triplet_month)
                for x in utils.table_invariants_list(
                    db_conn, partitions, ['triplet_year', 'triplet_month'])
            }
            for year, month in year_month_tuples:
                part_utils.create_monthly_network_triplets_country_partition(
                    db_conn, month=month, year=year, num_physical_shards=1)

            with utils.db_role_setter(db_conn,
                                      role_name='dirbs_core_import_operator'):
                cursor.execute(
                    """CREATE UNLOGGED TABLE monthly_network_triplets_per_mno (
                            LIKE monthly_network_triplets_country INCLUDING ALL,
                            operator_id TEXT NOT NULL
                       ) PARTITION BY LIST (operator_id)
                    """)

            # Work out what partitions to create and create them
            op_year_month_tuples = {
                (x.operator_id, x.triplet_year, x.triplet_month)
                for x in utils.table_invariants_list(
                    db_conn, partitions,
                    ['operator_id', 'triplet_year', 'triplet_month'])
            }
            # Create child partitions at per-MNO level
            for op, year, month in op_year_month_tuples:
                part_utils.create_monthly_network_triplets_per_mno_partition(
                    db_conn,
                    operator_id=op,
                    month=month,
                    year=year,
                    num_physical_shards=1)

            # Create temporary monthly_network_triplets_per_mno table
            for year, month in year_month_tuples:
                logger.info(
                    'Generating temporary monthly_network_triplets_per_mno entries for {0:02d}/{1:d}...'
                    .format(month, year))
                cursor.execute(
                    """INSERT INTO monthly_network_triplets_per_mno
                            SELECT %(year)s,
                                   %(month)s,
                                   first_seen,
                                   last_seen,
                                   date_bitmask,
                                   triplet_hash,
                                   imei_norm,
                                   imsi,
                                   msisdn,
                                   calc_virt_imei_shard(imei_norm),
                                   operator_id
                              FROM seen_triplets
                             WHERE triplet_year = %(year)s
                               AND triplet_month = %(month)s
                    """, {
                        'year': year,
                        'month': month
                    })
                logger.info(
                    'Generated temporary monthly_network_triplets_per_mno entries for {0:02d}/{1:d}'
                    .format(month, year))

            # Create temporary monthly_network_triplets_country table. We need to do this monthly as we need
            # to aggregate by triplets on a monthly basis
            #
            # We disable index scans here as doing a merge append with index scans is much slower and involves
            # a lot of seeks which kills performance on non-SSD drives. Better to use an append plan and sort
            # the results by imei_norm
            #
            cursor.execute('SET enable_indexscan = false')
            for year, month in year_month_tuples:
                logger.info(
                    'Generating temporary monthly_network_triplets_country entries for {0:02d}/{1:d}...'
                    .format(month, year))
                cursor.execute(
                    """INSERT INTO monthly_network_triplets_country
                            SELECT %(year)s,
                                   %(month)s,
                                   MIN(first_seen),
                                   MAX(last_seen),
                                   bit_or(date_bitmask),
                                   triplet_hash,
                                   FIRST(imei_norm),
                                   FIRST(imsi),
                                   FIRST(msisdn),
                                   calc_virt_imei_shard(FIRST(imei_norm))
                              FROM seen_triplets
                             WHERE triplet_year = %(year)s
                               AND triplet_month = %(month)s
                          GROUP BY triplet_hash
                    """, {
                        'year': year,
                        'month': month
                    })
                logger.info(
                    'Generated temporary monthly_network_triplets_country entries for {0:02d}/{1:d}'
                    .format(month, year))
            cursor.execute('SET enable_indexscan = true')

            logger.info(
                'Re-partitioning temporary monthly_network_triplets tables...')
            # Previously, the operator_data view was owned by dirbs_core_power_user but is now owned by the
            # dirbs_core_import_operator since it must be re-created
            with utils.db_role_setter(db_conn,
                                      role_name='dirbs_core_power_user'):
                cursor.execute(
                    'ALTER VIEW operator_data OWNER TO dirbs_core_import_operator'
                )
            part_utils.repartition_monthly_network_triplets(
                db_conn, num_physical_shards=num_initial_shards)
            cursor.execute('DROP TABLE seen_triplets CASCADE')
            logger.info(
                'Re-partitioned temporary monthly_network_triplets tables')

            # Replace list generation function to include virt_imei_shard
            cursor.execute("""
                DROP FUNCTION gen_blacklist(run_id BIGINT);
                DROP FUNCTION gen_notifications_list(op_id TEXT, run_id BIGINT);
                DROP FUNCTION gen_exceptions_list(op_id TEXT, run_id BIGINT);

                --
                -- Create function to generate a full blacklist for a given run_id. A value of -1 means get the latest
                -- list.
                --
                CREATE FUNCTION gen_blacklist(run_id BIGINT = -1)
                    RETURNS TABLE (
                        imei_norm       TEXT,
                        virt_imei_shard SMALLINT,
                        block_date      DATE,
                        reasons         TEXT[]
                    )
                    LANGUAGE plpgsql STRICT STABLE PARALLEL SAFE
                    AS $$
                DECLARE
                    query_run_id    BIGINT;
                BEGIN
                    --
                    -- If we don't specify a run_id, just set to the maximum run_id which will always return all rows
                    -- where end_run_id is NULL
                    --
                    IF run_id = -1 THEN
                        run_id := max_bigint();
                    END IF;

                    RETURN QUERY SELECT bl.imei_norm,
                                        bl.virt_imei_shard,
                                        bl.block_date,
                                        bl.reasons
                                   FROM blacklist bl
                                  WHERE bl.delta_reason != 'unblocked'
                                    AND run_id >= bl.start_run_id
                                    AND (run_id < bl.end_run_id OR bl.end_run_id IS NULL);
                END
                $$;

                --
                -- Create function to generate a full notifications_list for a given run_id and operator ID. A value
                -- of -1 means get the latest list.
                --
                CREATE FUNCTION gen_notifications_list(op_id TEXT, run_id BIGINT = -1)
                    RETURNS TABLE (
                        imei_norm       TEXT,
                        virt_imei_shard SMALLINT,
                        imsi            TEXT,
                        msisdn          TEXT,
                        block_date      DATE,
                        reasons         TEXT[],
                        amnesty_granted BOOLEAN
                    )
                    LANGUAGE plpgsql STRICT STABLE PARALLEL SAFE
                    AS $$
                BEGIN
                    --
                    -- If we don't specify a run_id, just set to the maximum run_id which will always return all rows
                    -- where end_run_id is NULL
                    --
                    IF run_id = -1 THEN
                        run_id := max_bigint();
                    END IF;

                    RETURN QUERY SELECT nl.imei_norm,
                                        nl.virt_imei_shard,
                                        nl.imsi,
                                        nl.msisdn,
                                        nl.block_date,
                                        nl.reasons,
                                        nl.amnesty_granted
                                   FROM notifications_lists nl
                                  WHERE nl.operator_id = op_id
                                    AND nl.delta_reason NOT IN ('resolved', 'blacklisted')
                                    AND run_id >= nl.start_run_id
                                    AND (run_id < nl.end_run_id OR nl.end_run_id IS NULL);
                END
                $$;

                --
                -- Create function to generate a full exceptions_list for a given run_id and operator ID. A value
                -- of -1 means get the latest list.
                --
                CREATE FUNCTION gen_exceptions_list(op_id TEXT, run_id BIGINT = -1)
                    RETURNS TABLE (
                        imei_norm       TEXT,
                        virt_imei_shard SMALLINT,
                        imsi            TEXT
                    )
                    LANGUAGE plpgsql STRICT STABLE PARALLEL SAFE
                    AS $$
                BEGIN
                    --
                    -- If we don't specify a run_id, just set to the maximum run_id which will always return all
                    -- rows where end_run_id is NULL
                    --
                    IF run_id = -1 THEN
                        run_id := max_bigint();
                    END IF;

                    RETURN QUERY SELECT el.imei_norm,
                                        el.virt_imei_shard,
                                        el.imsi
                                   FROM exceptions_lists el
                                  WHERE el.operator_id = op_id
                                    AND el.delta_reason != 'removed'
                                    AND run_id >= el.start_run_id
                                    AND (run_id < el.end_run_id OR el.end_run_id IS NULL);
                END
                $$;
            """)  # noqa: Q440, Q441

            # Update schema metadata table
            cursor.execute(
                """ALTER TABLE schema_metadata ADD COLUMN phys_shards SMALLINT NOT NULL
                              DEFAULT %s CHECK (phys_shards > 0 AND phys_shards <= 100)""",
                [num_initial_shards])
            cursor.execute(
                'ALTER TABLE schema_metadata ALTER COLUMN phys_shards DROP DEFAULT'
            )

            # Drop obsolete columns
            cursor.execute(
                'ALTER TABLE schema_metadata DROP COLUMN potential_whitespace_imsis_msisdns'
            )
            cursor.execute(
                'ALTER TABLE report_monthly_stats DROP COLUMN num_whitespace_imsi_records'
            )
            cursor.execute(
                'ALTER TABLE report_monthly_stats DROP COLUMN num_whitespace_msisdn_records'
            )
Exemplo n.º 4
0
    def _repartition_exceptions_lists(self, conn, *, num_physical_shards):
        """Repartition the exceptions lists to support msisdn."""
        with conn.cursor() as cursor, utils.db_role_setter(
                conn, role_name='dirbs_core_listgen'):
            cursor.execute("""CREATE TABLE exceptions_lists_new (
                       LIKE exceptions_lists INCLUDING DEFAULTS
                                             INCLUDING IDENTITY
                                             INCLUDING CONSTRAINTS
                                             INCLUDING STORAGE
                                             INCLUDING COMMENTS
                   )
                   PARTITION BY LIST (operator_id);

                   ALTER TABLE exceptions_lists_new ADD COLUMN msisdn TEXT NOT NULL;
                """)

            part_utils._grant_perms_list(
                conn,
                part_name='exceptions_lists_new')  # grant relevant permissions
            imei_shard_names = utils.child_table_names(
                conn, 'exceptions_lists')  # determine the child table names
            operators = [
                o.operator_id for o in utils.table_invariants_list(
                    conn, imei_shard_names, ['operator_id'])
            ]  # workout who the operators are

            # create child partitions for new list (operator at top level, then IMEI sharded)
            for op_id in operators:
                tbl_name = part_utils.per_mno_lists_partition(
                    operator_id=op_id, suffix='_new', list_type='exceptions')
                part_utils.create_per_mno_lists_partition(
                    conn,
                    parent_tbl_name='exceptions_lists_new',
                    tbl_name=tbl_name,
                    operator_id=op_id,
                    num_physical_shards=num_physical_shards)

            # insert data into the new parent partition
            cursor.execute("""INSERT INTO exceptions_lists_new
                                   SELECT e.row_id, e.operator_id, e.imei_norm, e.imsi, e.start_run_id, e.end_run_id,
                                          e.delta_reason, e.virt_imei_shard, p.msisdn
                                     FROM exceptions_lists e
                               INNER JOIN historic_pairing_list p ON e.imsi = p.imsi"""
                           )

            # add indexes in each partitions
            part_utils.add_indices(
                conn,
                tbl_name='exceptions_lists_new',
                idx_metadata=part_utils.exceptions_lists_indices())

            # drop old table, after assigning sequence to new table
            cursor.execute(
                'ALTER SEQUENCE exceptions_lists_row_id_seq OWNED BY exceptions_lists_new.row_id'
            )
            cursor.execute('DROP TABLE exceptions_lists CASCADE')

            # rename table, indexes and constraints
            part_utils.rename_table_and_indices(
                conn,
                old_tbl_name='exceptions_lists_new',
                new_tbl_name='exceptions_lists',
                idx_metadata=part_utils.exceptions_lists_indices())

            # recreating gen_exceptionlist function
            with utils.db_role_setter(conn, role_name='dirbs_core_power_user'):
                cursor.execute("""
                                DROP FUNCTION gen_exceptions_list(op_id TEXT, run_id BIGINT);

                                --
                                -- Recreate function to generate a full exceptions_list for a given
                                -- run_id and operator.
                                -- A value of -1 means get the latest list.
                                --
                                CREATE FUNCTION gen_exceptions_list(op_id TEXT, run_id BIGINT = -1)
                                    RETURNS TABLE (
                                        imei_norm       TEXT,
                                        virt_imei_shard SMALLINT,
                                        imsi            TEXT,
                                        msisdn          TEXT
                                    )
                                    LANGUAGE plpgsql STRICT STABLE PARALLEL SAFE
                                    AS $$
                                BEGIN
                                    --
                                    -- If we don't specify a run_id, just set to the maximum run_id which will always
                                    -- return all rows where end_run_id is NULL
                                    --
                                    IF run_id = -1 THEN
                                        run_id := max_bigint();
                                    END IF;

                                    RETURN QUERY SELECT el.imei_norm,
                                                        el.virt_imei_shard,
                                                        el.imsi,
                                                        el.msisdn
                                                   FROM exceptions_lists el
                                                  WHERE el.operator_id = op_id
                                                    AND el.delta_reason != 'removed'
                                                    AND run_id >= el.start_run_id
                                                    AND (run_id < el.end_run_id OR el.end_run_id IS NULL);
                                END
                                $$;

                                DROP FUNCTION gen_delta_exceptions_list(op_id TEXT, base_run_id BIGINT, run_id BIGINT);

                                --
                                -- Create function to generate a per-MNO delta exceptions list for a run_id, operator
                                -- id and optional base_run_id.
                                --
                                -- If not base_run_id is supplied, this function will use the maximum run_id found in
                                -- the DB that it less than than the supplied run_id
                                --
                                CREATE FUNCTION gen_delta_exceptions_list(op_id TEXT,
                                                                          base_run_id BIGINT,
                                                                          run_id BIGINT = -1)
                                    RETURNS TABLE (
                                        imei_norm       TEXT,
                                        imsi            TEXT,
                                        msisdn          TEXT,
                                        delta_reason    TEXT
                                    )
                                    LANGUAGE plpgsql STRICT STABLE PARALLEL SAFE
                                    AS $$
                                BEGIN
                                    --
                                    -- If we don't specify a run_id, just set to the maximum run_id
                                    --
                                    IF run_id = -1 THEN
                                        run_id := max_bigint();
                                    END IF;

                                    IF run_id < base_run_id THEN
                                      RAISE EXCEPTION 'Parameter base_run_id % greater than run_id %',
                                                      base_run_id, run_id;
                                    END IF;

                                    RETURN QUERY SELECT *
                                                   FROM (SELECT el.imei_norm,
                                                                el.imsi,
                                                                el.msisdn,
                                                                overall_delta_reason(el.delta_reason
                                                                        ORDER BY start_run_id DESC) AS delta_reason
                                                           FROM exceptions_lists el
                                                          WHERE operator_id = op_id
                                                            AND start_run_id > base_run_id
                                                            AND start_run_id <= run_id
                                                       GROUP BY el.imei_norm, el.imsi, el.msisdn) x
                                                  WHERE x.delta_reason IS NOT NULL;
                                END
                                $$;
                                """)  # noqa: Q440, Q441