def test_migrate_getting_natural_key(self): with EdMigrateDestConnection(tenant=get_unittest_prod_tenant_name()) as prod_conn: self.assertEquals(get_natural_key_columns(prod_conn.get_table('dim_student')), ['student_id']) self.assertEquals(get_natural_key_columns(prod_conn.get_table('dim_asmt')), ['asmt_guid']) self.assertEquals(get_natural_key_columns(prod_conn.get_table('fact_asmt_outcome_vw')), ['asmt_guid', 'student_id', 'date_taken']) self.assertEquals(get_natural_key_columns(prod_conn.get_table('fact_asmt_outcome')), ['asmt_guid', 'student_id', 'date_taken']) self.assertEquals(get_natural_key_columns(prod_conn.get_table('dim_inst_hier')), ['state_code', 'district_id', 'school_id'])
def update_rec_id_for_records_to_delete(conf, target_conn, table_name, prod_records_matched): """Update pre-prod records primary key and rec_status based on matching records from prod For all the matching records the natural_key will be set to the natural key of the record from prod and the rec_status will be updated to 'D' from 'W' :param conf: udl configuration object :param target_conn: connection object to pre-prod database :param table_name: name of the table being updated :param prod_records_matched: batch of records from prod that matches with pre-prod 'W' records based on natural keys """ table = target_conn.get_table(table_name) for record in prod_records_matched: values = {table.c[pk_column]: record[pk_column] for pk_column in table.primary_key.columns.keys()} values[table.c[Constants.REC_STATUS]] = Constants.STATUS_DELETE criteria = [table.c[nk_column] == record[nk_column] for nk_column in get_natural_key_columns(table)] criteria.append(table.c.batch_guid == conf[mk.GUID_BATCH]) criteria.append(table.c.rec_status == Constants.STATUS_WAITING) query = update(table).values(values).where(and_(*criteria)) try: target_conn.execute(query) except IntegrityError as ie: e = UDLDataIntegrityError(conf[mk.GUID_BATCH], ie, "{schema}.{table}".format(schema=conf[mk.PROD_DB_SCHEMA], table=table_name), ErrorSource.DELETE_FACT_ASMT_OUTCOME_RECORD_MORE_THAN_ONCE, conf[mk.UDL_PHASE_STEP], conf[mk.TARGET_DB_SCHEMA]) failure_time = datetime.datetime.now() e.insert_err_list(failure_time) # raise an exception and stop the pipeline raise e
def yield_records_to_be_deleted(prod_conn, table_name, records_marked_for_deletion, batch_size=100): """Yield records to marked as deleted from pre-prod table The methods yields records marked for delete('W') from the pre-prod database table :param prod_conn: connection object to prod database :param table_name: name of the table as string :param records_marked_for_deletion: records from pre-prod marked for deletion (rec_status: 'W') :param batch_size: batch size to yield results @return: Yields records from prod in size of batch_size """ table = prod_conn.get_table(table_name) natural_keys = get_natural_key_columns(table) columns_to_select = [table.c[column_name] for column_name in get_columns_names_to_pick_for_delete(table)] key_columns = [table.columns[key] for key in natural_keys] key_values = [[row[key] for key in natural_keys] for row in records_marked_for_deletion] query = select(columns_to_select, from_obj=table).where(and_(table.c.rec_status == Constants.STATUS_CURRENT, tuple_(*key_columns).in_(key_values))) result = prod_conn.execute(query, stream_results=True) rows = result.fetchmany(batch_size) while len(rows) > 0: yield rows rows = result.fetchmany(batch_size)
def yield_records_to_be_deleted(prod_conn, table_name, records_marked_for_deletion, batch_size=100): """Yield records to marked as deleted from pre-prod table The methods yields records marked for delete('W') from the pre-prod database table :param prod_conn: connection object to prod database :param table_name: name of the table as string :param records_marked_for_deletion: records from pre-prod marked for deletion (rec_status: 'W') :param batch_size: batch size to yield results @return: Yields records from prod in size of batch_size """ table = prod_conn.get_table(table_name) natural_keys = get_natural_key_columns(table) columns_to_select = [ table.c[column_name] for column_name in get_columns_names_to_pick_for_delete(table) ] key_columns = [table.columns[key] for key in natural_keys] key_values = [[row[key] for key in natural_keys] for row in records_marked_for_deletion] query = select(columns_to_select, from_obj=table).where( and_(table.c.rec_status == Constants.STATUS_CURRENT, tuple_(*key_columns).in_(key_values))) result = prod_conn.execute(query, stream_results=True) rows = result.fetchmany(batch_size) while len(rows) > 0: yield rows rows = result.fetchmany(batch_size)
def test_get_natural_key_columns(self): ''' test get columns ''' cols1 = get_natural_key_columns(self.__test_dim_table) cols2 = [c.name for c in self.__nkcol] self.assertEquals(set(cols1), set(cols2))
def __init__(self, connector, batch_guid, table_name): self._conn = connector self._table = connector.get_table(table_name) self._natural_key_column_names = get_natural_key_columns(self._table) self._matcher = _Matcher(get_matcher_key_column_names(self._table)) self._batch_clause = ( self._table.c[Constants.BATCH_GUID] == batch_guid)
def test_get_natural_key_columns_when_none_defined(self): ''' test getting natural key columns if not defined ''' test_table = Table('test_table', self.__metadata, Column('student_rec_id', BigInteger, primary_key=True), Column('batch_guid', String(50), nullable=True)) self.assertTrue(len(get_natural_key_columns(test_table)) == 0)
def test_get_natural_key_columns_when_none_defined(self): ''' test getting natural key columns if not defined ''' test_table = Table( 'test_table', self.__metadata, Column('student_rec_id', BigInteger, primary_key=True), Column('batch_guid', String(50), nullable=True)) self.assertTrue(len(get_natural_key_columns(test_table)) == 0)
def test_migrate_getting_natural_key(self): with EdMigrateDestConnection( tenant=get_unittest_prod_tenant_name()) as prod_conn: self.assertEquals( get_natural_key_columns(prod_conn.get_table('dim_student')), ['student_id']) self.assertEquals( get_natural_key_columns(prod_conn.get_table('dim_asmt')), ['asmt_guid']) self.assertEquals( get_natural_key_columns( prod_conn.get_table('fact_asmt_outcome_vw')), ['asmt_guid', 'student_id', 'date_taken']) self.assertEquals( get_natural_key_columns( prod_conn.get_table('fact_asmt_outcome')), ['asmt_guid', 'student_id', 'date_taken']) self.assertEquals( get_natural_key_columns(prod_conn.get_table('dim_inst_hier')), ['state_code', 'district_id', 'school_id'])
def update_rec_id_for_records_to_delete(conf, target_conn, table_name, prod_records_matched): """Update pre-prod records primary key and rec_status based on matching records from prod For all the matching records the natural_key will be set to the natural key of the record from prod and the rec_status will be updated to 'D' from 'W' :param conf: udl configuration object :param target_conn: connection object to pre-prod database :param table_name: name of the table being updated :param prod_records_matched: batch of records from prod that matches with pre-prod 'W' records based on natural keys """ table = target_conn.get_table(table_name) for record in prod_records_matched: values = { table.c[pk_column]: record[pk_column] for pk_column in table.primary_key.columns.keys() } values[table.c[Constants.REC_STATUS]] = Constants.STATUS_DELETE criteria = [ table.c[nk_column] == record[nk_column] for nk_column in get_natural_key_columns(table) ] criteria.append(table.c.batch_guid == conf[mk.GUID_BATCH]) criteria.append(table.c.rec_status == Constants.STATUS_WAITING) query = update(table).values(values).where(and_(*criteria)) try: target_conn.execute(query) except IntegrityError as ie: e = UDLDataIntegrityError( conf[mk.GUID_BATCH], ie, "{schema}.{table}".format(schema=conf[mk.PROD_DB_SCHEMA], table=table_name), ErrorSource.DELETE_FACT_ASMT_OUTCOME_RECORD_MORE_THAN_ONCE, conf[mk.UDL_PHASE_STEP], conf[mk.TARGET_DB_SCHEMA]) failure_time = datetime.datetime.now() e.insert_err_list(failure_time) # raise an exception and stop the pipeline raise e
def preprod_to_prod_insert_records(source_connector, dest_connector, table_name, primary_key_field_name, batch, deactivate): '''Process inserts for the batch :param source_connector: Source connection :param dest_connector: Destination connection :param table_name: name of the table to be migrated :param primary_key_field_name: primary key for the table_name :batch batch of records to be inserted :returns number of record updated ''' dest_table = dest_connector.get_table(table_name) natural_keys = get_natural_key_columns(dest_table) # the deactivate flag is needed to avoid the record deactivation query path in unit tests # this part is tested as part of function tests if deactivate and natural_keys is not None: deactivate_old_records(dest_connector, dest_table, natural_keys, batch) # insert the new records to prod with rec_status as current insert_query = dest_table.insert() records_inserted = dest_connector.execute(insert_query, batch).rowcount batch_size = len(batch) if records_inserted != batch_size: raise EdMigrateRecordInsertionException return batch_size