def complementary_merge(run_context, bookmap_location_file_name, bookmap_sheet, mergespec_file_name, input_table_names, output_table_names): db_context = run_context.getDBContext() temp_bookmap_table_name = dbutilities.get_temp_table(db_context).table_name BookMapReader(inputfile=bookmap_location_file_name, inputsheet=bookmap_sheet, read_to_db=True, db_context=db_context, outputTable=temp_bookmap_table_name) temp_prep_table_name = dbutilities.get_temp_table(db_context).table_name _comp_prepare1(run_context=run_context, bookmaps_table_name=temp_bookmap_table_name, input_table_names=input_table_names, output_table_name=temp_prep_table_name) prepped_input_table_names = {} for key in input_table_names: if key == FLAT_TABLE_KEY_NAME: prepped_input_table_names[key] = temp_prep_table_name else: prepped_input_table_names[key] = input_table_names[key] cmpmrg = ComplementaryMerge(run_context=run_context) cmpmrg.identity_insert = False cmpmrg.complementary_merge(mergespec_file=mergespec_file_name, input_table_names=prepped_input_table_names, output_table_names=output_table_names)
def read_oat_agg_sheet(self): print "Reading aggregation sheet" table = dbutilities.get_temp_table(self.db_context) data_file = os.path.join(self.data_dir, _AGG_FILE_OAT) reader = SafeExcelReader(self.run_context, data_file, _XLS_SHEET, table, self.db_context, scan_all=True) reader.createTable() table.populate_from_connection() return table
def do_17_18(self, tweak_agg_sheet_query): with self.read_g3() as g3, \ self.read_oat_agg_sheet() as temp_agg_sheet, \ dbutilities.get_temp_table(self.db_context) as agg_sheet: ## Keep first row of aggregation definition self.db_context.executeNoResults( "SELECT TOP(1) * INTO {agg_sheet} FROM {temp_table} WHERE [subject]='R' ORDER BY [import_order]" .format(agg_sheet=agg_sheet, temp_table=temp_agg_sheet)) print "Running first ttest" ttester1 = TTest(g3, self.db_context, agg_sheet, None, False) ttester1.readAggData() ttester1.execute() results1 = [] for level in ttester1.target_levels: results1.append(dbutilities.dump(level.output_table, level.id)) print "Updating data sheet" self.db_context.executeNoResults( "DELETE FROM {g3} WHERE [inclusionflagr] IS NULL OR [inclusionflagr] != 1" .format(g3=g3)) self.db_context.executeNoResults( tweak_agg_sheet_query.format(agg_sheet=agg_sheet)) print "Running second ttest" ttester2 = TTest(g3, self.db_context, agg_sheet, None, False) ttester2.readAggData() ttester2.execute() results2 = [] for level in ttester2.target_levels: results2.append(dbutilities.dump(level.output_table, level.id)) print "Comparing ttest outputs" assert len(results1) >= 1 assert len(results1) == len(results2) for i in range(len(results1)): res1 = results1[i] res2 = results2[i] keys = res1[0].keys() assert len(res1) == len(res2) for j in range(len(res2)): row1 = res1[j] row2 = res2[j] for k in keys: assert row1[k] == row2[k]
def init_temp_tables( db_context ): temp_tables = { } if not db_context: raise ValueError( 'db_context is null or empty.' ) temp_tables[ 'counts' ] = dbutilities.get_temp_table( db_context ).table_name temp_tables[ 'result' ] = dbutilities.get_temp_table( db_context ).table_name temp_tables[ 'resultc' ] = dbutilities.get_temp_table( db_context ).table_name temp_tables[ 'total' ] = dbutilities.get_temp_table( db_context ).table_name temp_tables[ 'totalc' ] = dbutilities.get_temp_table( db_context ).table_name temp_tables[ 'allfive' ] = dbutilities.get_temp_table( db_context ).table_name return temp_tables
def read_g3(self): print "Reading data" table = dbutilities.get_temp_table(self.db_context) data_file = os.path.join(self.data_dir, _XLS_FILE_G3) reader = SafeExcelReader(self.run_context, data_file, _XLS_SHEET, table, self.db_context, scan_all=False) reader.createTable() print "Tweaking data" self.db_context.executeNoResults( "UPDATE {g3} SET [state_inc_flag] = CASE WHEN ( [schtype] IN ('N','D','H') ) THEN 0 ELSE 1 END" .format(g3=table)) self.db_context.executeNoResults( "UPDATE {g3} SET [Rclass_missing_flag] = CASE WHEN ( [Rclass_id] IS NULL OR [Rclass_id]='' ) THEN 1 ELSE 0 END" .format(g3=table)) table.populate_from_connection() return table
def test_21(self): answer_dir = os.path.join(self.answer_dir, 'test_21') if not os.path.exists(answer_dir): os.makedirs(answer_dir) specimen_dir = os.path.join(self.specimen_dir, 'ttest_test_21') result = True with self.read_g3() as g3, \ self.read_oat_agg_sheet() as agg_sheet, \ dbutilities.get_temp_table( self.db_context ) as tmp, \ dbutilities.get_temp_table( self.db_context ) as tmp_agg : # As near as I can tell, the SAS test only runs for the tenth row of the agg sheet. # self.db_context.executeNoResults("DELETE FROM {agg_sheet} WHERE [import_order] != 10".format( agg_sheet=agg_sheet )) # We are just using this TTest instance to read the aggregation sheet. The actual ttest will use # another instance based on a truncated aggregation sheet. agg_sheet_reader = TTest(g3, self.db_context, agg_sheet, None, False) agg_sheet_reader.readAggData() assert dbutilities.table_exists(g3) targetParentRow = [] for target_level in agg_sheet_reader.target_levels: for parent_level in target_level.contents: for row in parent_level.contents: targetParentRow.append( (target_level, parent_level, row)) targetParentRow.sort(key=lambda (row): row[2].import_order) for target_level, parent_level, row in targetParentRow: where_t = target_level.get_where_expression() target_id = target_level.id where_p = parent_level.get_where_expression() parent_id = parent_level.id i = row.import_order # Reduce the data to the desired sample dbutilities.drop_table_if_exists(tmp) query = """ SELECT {vars}, COUNT( {input_var} ) OVER( PARTITION BY {parent_id}, {target_id} ) AS n_target, 0 AS n_parent INTO {tmp} FROM {g3} WHERE {where_t} """.format(parent_id=parent_id, target_id=target_id, input_var=row.inputvar, where_t=where_t, tmp=tmp, g3=g3, vars=Joiner(g3)) self.db_context.executeNoResults(query) query = """ UPDATE {tmp} SET n_parent = A.B FROM ( SELECT n_parent, COUNT( {input_var} ) OVER( PARTITION BY {parent_id} ) AS B FROM {tmp} WHERE {where_p} ) AS A """.format(parent_id=parent_id, input_var=row.inputvar, where_p=where_p, tmp=tmp) print query self.db_context.executeNoResults(query) query = "DELETE FROM {tmp} WHERE ( n_parent != 2 ) OR ( n_target != 1 )".format( tmp=tmp) self.db_context.executeNoResults(query) n_obs = dbutilities.n_obs(tmp) if n_obs > 0: # Reduce the aggregation sheet to the current row query = "SELECT * INTO {tmp_agg} FROM {agg_sheet} WHERE [import_order]={i}".format( tmp_agg=tmp_agg, agg_sheet=agg_sheet, i=i) self.db_context.executeNoResults(query) # Do the ttest ttester = TTest(tmp, self.db_context, tmp_agg, None, False) ttester.readAggData() ttester.execute() # Check the answer answer_file = os.path.join( answer_dir, 'row_{0}_comparison.log'.format(i)) specimen_file = os.path.join( specimen_dir, 'test_21_ttest_{0}.xls'.format(i)) result_i = self.compare_output(specimen_file, target_level, answer_file) result = result and result_i print "{1} ttest test_21 for {0}".format( i, 'PASSED' if result_i else 'FAILED') self.assertTrue(result, "TTest Test 21 FAILED") return