Пример #1
0
 def test_migrate_getting_natural_key(self):
     with EdMigrateDestConnection(tenant=get_unittest_prod_tenant_name()) as prod_conn:
         self.assertEquals(get_natural_key_columns(prod_conn.get_table('dim_student')), ['student_id'])
         self.assertEquals(get_natural_key_columns(prod_conn.get_table('dim_asmt')), ['asmt_guid'])
         self.assertEquals(get_natural_key_columns(prod_conn.get_table('fact_asmt_outcome_vw')),
                           ['asmt_guid', 'student_id', 'date_taken'])
         self.assertEquals(get_natural_key_columns(prod_conn.get_table('fact_asmt_outcome')),
                           ['asmt_guid', 'student_id', 'date_taken'])
         self.assertEquals(get_natural_key_columns(prod_conn.get_table('dim_inst_hier')),
                           ['state_code', 'district_id', 'school_id'])
def update_rec_id_for_records_to_delete(conf, target_conn, table_name, prod_records_matched):
    """Update pre-prod records primary key and rec_status based on matching records from prod


    For all the matching records the natural_key will be set to the natural key of the record from prod
    and the rec_status will be updated to 'D' from 'W'

    :param conf: udl configuration object
    :param target_conn: connection object to pre-prod database
    :param table_name: name of the table being updated
    :param prod_records_matched: batch of records from prod that matches with pre-prod 'W' records based on natural keys

    """
    table = target_conn.get_table(table_name)
    for record in prod_records_matched:
        values = {table.c[pk_column]: record[pk_column] for pk_column in table.primary_key.columns.keys()}
        values[table.c[Constants.REC_STATUS]] = Constants.STATUS_DELETE
        criteria = [table.c[nk_column] == record[nk_column] for nk_column in get_natural_key_columns(table)]
        criteria.append(table.c.batch_guid == conf[mk.GUID_BATCH])
        criteria.append(table.c.rec_status == Constants.STATUS_WAITING)
        query = update(table).values(values).where(and_(*criteria))
        try:
            target_conn.execute(query)
        except IntegrityError as ie:
            e = UDLDataIntegrityError(conf[mk.GUID_BATCH], ie,
                                      "{schema}.{table}".format(schema=conf[mk.PROD_DB_SCHEMA], table=table_name),
                                      ErrorSource.DELETE_FACT_ASMT_OUTCOME_RECORD_MORE_THAN_ONCE,
                                      conf[mk.UDL_PHASE_STEP],
                                      conf[mk.TARGET_DB_SCHEMA])
            failure_time = datetime.datetime.now()
            e.insert_err_list(failure_time)
            # raise an exception and stop the pipeline
            raise e
def yield_records_to_be_deleted(prod_conn, table_name, records_marked_for_deletion, batch_size=100):
    """Yield records to  marked as deleted from pre-prod table


    The methods yields records marked for delete('W') from the pre-prod database table

    :param prod_conn: connection object to prod database
    :param table_name: name of the table as string
    :param records_marked_for_deletion: records from pre-prod marked for deletion (rec_status: 'W')
    :param batch_size: batch size to yield results

    @return: Yields records from prod in size of batch_size
    """
    table = prod_conn.get_table(table_name)
    natural_keys = get_natural_key_columns(table)
    columns_to_select = [table.c[column_name] for column_name in get_columns_names_to_pick_for_delete(table)]
    key_columns = [table.columns[key] for key in natural_keys]
    key_values = [[row[key] for key in natural_keys] for row in records_marked_for_deletion]
    query = select(columns_to_select, from_obj=table).where(and_(table.c.rec_status == Constants.STATUS_CURRENT,
                                                                 tuple_(*key_columns).in_(key_values)))
    result = prod_conn.execute(query, stream_results=True)
    rows = result.fetchmany(batch_size)
    while len(rows) > 0:
        yield rows
        rows = result.fetchmany(batch_size)
def yield_records_to_be_deleted(prod_conn,
                                table_name,
                                records_marked_for_deletion,
                                batch_size=100):
    """Yield records to  marked as deleted from pre-prod table


    The methods yields records marked for delete('W') from the pre-prod database table

    :param prod_conn: connection object to prod database
    :param table_name: name of the table as string
    :param records_marked_for_deletion: records from pre-prod marked for deletion (rec_status: 'W')
    :param batch_size: batch size to yield results

    @return: Yields records from prod in size of batch_size
    """
    table = prod_conn.get_table(table_name)
    natural_keys = get_natural_key_columns(table)
    columns_to_select = [
        table.c[column_name]
        for column_name in get_columns_names_to_pick_for_delete(table)
    ]
    key_columns = [table.columns[key] for key in natural_keys]
    key_values = [[row[key] for key in natural_keys]
                  for row in records_marked_for_deletion]
    query = select(columns_to_select, from_obj=table).where(
        and_(table.c.rec_status == Constants.STATUS_CURRENT,
             tuple_(*key_columns).in_(key_values)))
    result = prod_conn.execute(query, stream_results=True)
    rows = result.fetchmany(batch_size)
    while len(rows) > 0:
        yield rows
        rows = result.fetchmany(batch_size)
Пример #5
0
 def test_get_natural_key_columns(self):
     '''
     test get columns
     '''
     cols1 = get_natural_key_columns(self.__test_dim_table)
     cols2 = [c.name for c in self.__nkcol]
     self.assertEquals(set(cols1), set(cols2))
Пример #6
0
 def test_get_natural_key_columns(self):
     '''
     test get columns
     '''
     cols1 = get_natural_key_columns(self.__test_dim_table)
     cols2 = [c.name for c in self.__nkcol]
     self.assertEquals(set(cols1), set(cols2))
 def __init__(self, connector, batch_guid, table_name):
     self._conn = connector
     self._table = connector.get_table(table_name)
     self._natural_key_column_names = get_natural_key_columns(self._table)
     self._matcher = _Matcher(get_matcher_key_column_names(self._table))
     self._batch_clause = (
         self._table.c[Constants.BATCH_GUID] == batch_guid)
Пример #8
0
 def test_get_natural_key_columns_when_none_defined(self):
     '''
     test getting natural key columns if not defined
     '''
     test_table = Table('test_table', self.__metadata,
                        Column('student_rec_id', BigInteger, primary_key=True),
                        Column('batch_guid', String(50), nullable=True))
     self.assertTrue(len(get_natural_key_columns(test_table)) == 0)
Пример #9
0
 def test_get_natural_key_columns_when_none_defined(self):
     '''
     test getting natural key columns if not defined
     '''
     test_table = Table(
         'test_table', self.__metadata,
         Column('student_rec_id', BigInteger, primary_key=True),
         Column('batch_guid', String(50), nullable=True))
     self.assertTrue(len(get_natural_key_columns(test_table)) == 0)
Пример #10
0
 def test_migrate_getting_natural_key(self):
     with EdMigrateDestConnection(
             tenant=get_unittest_prod_tenant_name()) as prod_conn:
         self.assertEquals(
             get_natural_key_columns(prod_conn.get_table('dim_student')),
             ['student_id'])
         self.assertEquals(
             get_natural_key_columns(prod_conn.get_table('dim_asmt')),
             ['asmt_guid'])
         self.assertEquals(
             get_natural_key_columns(
                 prod_conn.get_table('fact_asmt_outcome_vw')),
             ['asmt_guid', 'student_id', 'date_taken'])
         self.assertEquals(
             get_natural_key_columns(
                 prod_conn.get_table('fact_asmt_outcome')),
             ['asmt_guid', 'student_id', 'date_taken'])
         self.assertEquals(
             get_natural_key_columns(prod_conn.get_table('dim_inst_hier')),
             ['state_code', 'district_id', 'school_id'])
def update_rec_id_for_records_to_delete(conf, target_conn, table_name,
                                        prod_records_matched):
    """Update pre-prod records primary key and rec_status based on matching records from prod


    For all the matching records the natural_key will be set to the natural key of the record from prod
    and the rec_status will be updated to 'D' from 'W'

    :param conf: udl configuration object
    :param target_conn: connection object to pre-prod database
    :param table_name: name of the table being updated
    :param prod_records_matched: batch of records from prod that matches with pre-prod 'W' records based on natural keys

    """
    table = target_conn.get_table(table_name)
    for record in prod_records_matched:
        values = {
            table.c[pk_column]: record[pk_column]
            for pk_column in table.primary_key.columns.keys()
        }
        values[table.c[Constants.REC_STATUS]] = Constants.STATUS_DELETE
        criteria = [
            table.c[nk_column] == record[nk_column]
            for nk_column in get_natural_key_columns(table)
        ]
        criteria.append(table.c.batch_guid == conf[mk.GUID_BATCH])
        criteria.append(table.c.rec_status == Constants.STATUS_WAITING)
        query = update(table).values(values).where(and_(*criteria))
        try:
            target_conn.execute(query)
        except IntegrityError as ie:
            e = UDLDataIntegrityError(
                conf[mk.GUID_BATCH], ie,
                "{schema}.{table}".format(schema=conf[mk.PROD_DB_SCHEMA],
                                          table=table_name),
                ErrorSource.DELETE_FACT_ASMT_OUTCOME_RECORD_MORE_THAN_ONCE,
                conf[mk.UDL_PHASE_STEP], conf[mk.TARGET_DB_SCHEMA])
            failure_time = datetime.datetime.now()
            e.insert_err_list(failure_time)
            # raise an exception and stop the pipeline
            raise e
def preprod_to_prod_insert_records(source_connector, dest_connector, table_name,
                                   primary_key_field_name, batch, deactivate):
    '''Process inserts for the batch
    :param source_connector: Source connection
    :param dest_connector: Destination connection
    :param table_name: name of the table to be migrated
    :param primary_key_field_name: primary key for the table_name
    :batch batch of records to be inserted

    :returns number of record updated
    '''
    dest_table = dest_connector.get_table(table_name)
    natural_keys = get_natural_key_columns(dest_table)
    # the deactivate flag is needed to avoid the record deactivation query path in unit tests
    # this part is tested as part of function tests
    if deactivate and natural_keys is not None:
        deactivate_old_records(dest_connector, dest_table, natural_keys, batch)
    # insert the new records to prod with rec_status as current
    insert_query = dest_table.insert()
    records_inserted = dest_connector.execute(insert_query, batch).rowcount
    batch_size = len(batch)
    if records_inserted != batch_size:
        raise EdMigrateRecordInsertionException
    return batch_size