示例#1
0
 def test_start_to_end_no_duplication_between_batches(self):
     for table_name in processing_info_maps.search_type.keys():
         if processing_info_maps.search_type[table_name] == 'start_to_end':
             print('Validating start_to_end type for table {}'.format(
                 table_name))
             start_test_window = defaults.nem_data_model_start_time
             start_test_window = '2018/01/01 00:00:00'
             start_time = datetime.strptime(start_test_window,
                                            '%Y/%m/%d %H:%M:%S')
             end_time = datetime.strptime('2018/05/01 00:00:00',
                                          '%Y/%m/%d %H:%M:%S')
             if table_name == 'FCAS_4_SECOND':
                 start_test_window = '2015/01/01 00:00:00'
                 end_time = datetime.strptime('2015/01/02 00:00:00',
                                              '%Y/%m/%d %H:%M:%S')
             start_search = datetime.strptime(start_test_window,
                                              '%Y/%m/%d %H:%M:%S')
             data_tables = data_fetch_methods.dynamic_data_fetch_loop(
                 start_search=start_search,
                 start_time=start_time,
                 end_time=end_time,
                 table_name=table_name,
                 raw_data_location='E:/raw_aemo_data',
                 select_columns=defaults.table_primary_keys[table_name],
                 date_filter=None,
                 search_type='start_to_end')
             all_data = pd.concat(data_tables, sort=False)
             contains_duplicates = all_data.duplicated().any()
             self.assertEqual(False, contains_duplicates,
                              'table {}'.format(table_name))
             print('Type valid, no duplicates found.')
示例#2
0
 def test_last_contains_data_from_first(self):
     for table_name in processing_info_maps.search_type.keys():
         if processing_info_maps.search_type[table_name] == 'end':
             start_test_window = defaults.nem_data_model_start_time
             #start_test_window = '2018/01/01 00:00:00'
             start_time = datetime.strptime(start_test_window,
                                            '%Y/%m/%d %H:%M:%S')
             end_time = datetime.strptime('2018/01/01 00:00:00',
                                          '%Y/%m/%d %H:%M:%S')
             start_search = datetime.strptime(start_test_window,
                                              '%Y/%m/%d %H:%M:%S')
             data_tables = data_fetch_methods.dynamic_data_fetch_loop(
                 start_search=start_search,
                 start_time=start_time,
                 end_time=end_time,
                 table_name=table_name,
                 raw_data_location='E:/raw_aemo_data',
                 select_columns=None,
                 date_filter=None,
                 search_type='end')
             first_data_table = data_tables[
                 35].loc[:, defaults.table_primary_keys[table_name]]
             last_data_table = data_tables[-1]
             comp = pd.merge(first_data_table, last_data_table, 'left',
                             defaults.table_primary_keys[table_name])
             non_primary_col = [
                 col for col in defaults.table_columns[table_name]
                 if col not in defaults.table_primary_keys[table_name]
             ][0]
             missing_from_last = comp[comp[non_primary_col].isnull()]
             self.assertEqual(False, missing_from_last.empty)
示例#3
0
 def test_all_no_duplication_between_batches_with_finalise_step(self):
     for table_name in processing_info_maps.search_type.keys():
         if processing_info_maps.search_type[table_name] == 'all':
             print('Testing duplicate removal for table {}'.format(
                 table_name))
             start_test_window = defaults.nem_data_model_start_time
             #start_test_window = '2018/01/01 00:00:00'
             start_time = datetime.strptime(start_test_window,
                                            '%Y/%m/%d %H:%M:%S')
             end_time = datetime.strptime('2018/01/01 00:00:00',
                                          '%Y/%m/%d %H:%M:%S')
             start_search = datetime.strptime(start_test_window,
                                              '%Y/%m/%d %H:%M:%S')
             data_tables = data_fetch_methods.dynamic_data_fetch_loop(
                 start_search=start_search,
                 start_time=start_time,
                 end_time=end_time,
                 table_name=table_name,
                 raw_data_location='E:/raw_aemo_data',
                 select_columns=defaults.table_primary_keys[table_name],
                 date_filter=None,
                 search_type='all')
             all_data = pd.concat(data_tables, sort=False)
             all_data = query_wrapers.drop_duplicates_by_primary_key(
                 all_data, start_time, table_name)
             contains_duplicates = all_data.duplicated().any()
             self.assertEqual(False, contains_duplicates)
             print('Type valid, no duplicates found.')
示例#4
0
 def test_all_no_duplication_between_batches(self):
     for table_name in processing_info_maps.search_type.keys():
         if processing_info_maps.search_type[table_name] == 'all':
             print('Validating all type for table {}'.format(table_name))
             if table_name in [
                     'GENCONDATA', 'SPDCONNECTIONPOINTCONSTRAINT',
                     'SPDINTERCONNECTORCONSTRAINT', 'DUDETAILSUMMARY',
                     'LOSSMODEL', 'LOSSFACTORMODEL', 'MNSP_DAYOFFER',
                     'MNSP_PEROFFER', 'MNSP_INTERCONNECTOR',
                     'INTERCONNECTOR', 'INTERCONNECTORCONSTRAINT',
                     'DUDETAIL', 'MARKET_PRICE_THRESHOLDS'
             ]:
                 print(
                     '{} is known to contain duplicate entries and is exempted from this test, a finalise '
                     'data processing step is included in dynamic data fetch to clean up these duplicates.'
                     .format(table_name))
                 continue
             start_test_window = defaults.nem_data_model_start_time
             #start_test_window = '2018/01/01 00:00:00'
             start_time = datetime.strptime(start_test_window,
                                            '%Y/%m/%d %H:%M:%S')
             end_time = datetime.strptime('2018/01/01 00:00:00',
                                          '%Y/%m/%d %H:%M:%S')
             start_search = datetime.strptime(start_test_window,
                                              '%Y/%m/%d %H:%M:%S')
             data_tables = data_fetch_methods.dynamic_data_fetch_loop(
                 start_search=start_search,
                 start_time=start_time,
                 end_time=end_time,
                 table_name=table_name,
                 raw_data_location='E:/raw_aemo_data',
                 select_columns=defaults.table_primary_keys[table_name],
                 date_filter=None,
                 search_type='all')
             all_data = pd.concat(data_tables, sort=False)
             contains_duplicates = all_data.duplicated().any()
             self.assertEqual(False, contains_duplicates,
                              'table {}'.format(table_name))
             print('Type valid, no duplicates found.')