def setUp(self): self.runContext = SuiteContext('unittest') self.db_context = self.runContext.getDBContext(tag='unittest') clear_all(self.db_context) self.reader = SafeExcelReader(self.runContext) self.reader.db_context = self.db_context self.testDataDir = self.runContext.tests_safe_dir
def setUp(self): self.run_context = SuiteContext( "unittest" ) self.db_context = self.run_context.getDBContext( "unittest" ) self.static_context = self.run_context.getDBContext( "static" ) self.answer_dir = os.path.join( self.run_context.logs_dir, 'ttest_level_tests' ) if not os.path.exists( self.answer_dir ): os.makedirs( self.answer_dir ) self.data_dir = os.path.join( self.run_context.tests_safe_dir, 'ttest', 'input_data' ) self.specimen_dir = os.path.join( self.run_context.tests_safe_dir, 'ttest', 'sas_outputs' ) # libname ttest "H:\share\CSSC Folder\Score Report Group\Test Data\lib_TTestLevel"; # %let agg_file = &cvsroot.\ScoreReportMacros\UnitTested\lib_TTestLevel\test\HI Spring 2008 Aggregations_Melissa.xls; # %let sheet=ttestlevel; # %SafeExcelRead(filename=&agg_file., sheetname =&sheet., DS_out =aggds); self.run_context.debug( "Reading data for ttest_level tests" ) agg_file = os.path.join( self.data_dir, _AGG_FILE) reader = SafeExcelReader( self.run_context, agg_file, "ttestlevel", scan_all = True ) self.agg_ds = [ row for row in reader.getRows() ] # Import the input datasets reader.db_context = self.static_context for ( grade, filename, table_name, sheet_name ) in _GRADE_FILES: if not table_exists( table_name, self.static_context ): self.run_context.debug( "Reading data for grade {}".format( grade ) ) reader.filename = os.path.join( self.data_dir, filename ) reader.outputTable = table_name reader.sheetName = sheet_name reader.createTable()
def test1(self): run_context = SuiteContext('unittest') db_context = run_context.getDBContext('remote') self.LOGGER = run_context.get_logger() # Set up a test table definition with get_temp_table(db_context) as table: for i in xrange(100): table.add(FieldSpec("col_{}".format(i), 'NVARCHAR', 8)) fts = FastTableStream(table, use_names=False, raw=True) data = [[unicode(100 * j + i) for i in xrange(100)] for j in xrange(1000)] def do_write(): with fts: for j in xrange(5): fts.write_many(data) pr = cProfile.Profile() pr.enable() pr.runcall(do_write) pr.disable() filename = os.path.join( run_context.logs_dir, 'fast_table_write_remote_unicode_raw_profile.txt') with open(filename, 'w') as f: stats = pstats.Stats(pr, stream=f) stats.print_stats() self.LOGGER.info("Table name {}".format(table))
class AbstractMergeTestCase(unittest.TestCase): def setUp(self): self.run_context = SuiteContext('unittest') self.out_dir = os.path.join(self.run_context.tests_dir, 'merge') if os.path.exists(self.out_dir): shutil.rmtree(self.out_dir) os.makedirs(self.out_dir) self.db_context = self.run_context.getDBContext('unittest') self.setUpData() self.setUpMerge() def setUpData(self): clear_all(self.db_context) reader = SafeExcelReader(run_context=self.run_context, db_context=self.db_context, filename=os.path.join( self.run_context.tests_safe_dir, XLS_FILE), scan_all=True) reader.outputTable = reader.sheetName = "Data1" reader.createTable() reader.outputTable = reader.sheetName = "Data2" reader.createTable() self.data1 = self.db_context.getTableSpec("Data1") self.data2 = self.db_context.getTableSpec("Data2") def setUpMerge(self): self.merge_def = MergeDef(self.db_context) self.merge_def.table_name = 'merge_output' self.merge_def.left_input_table = self.data1 self.merge_def.right_input_table = self.data2 self.merge_def \ .add( MergeFieldSpec( self.data1['barcode_num'], self.data2['barcode_num'], PRIORITY_LEFT ) ) \ .add( MergeFieldSpec( self.data1['barcode_char'], self.data2['barcode_char'], PRIORITY_LEFT ) ) \ .add( MergeFieldSpec( self.data1['studentid'], self.data2['studentid'], PRIORITY_LEFT ) ) \ .add( MergeFieldSpec( self.data1['gender'], self.data2['gender'], PRIORITY_LEFT ) ) \ .add( MergeFieldSpec( self.data1['studentlnm'], self.data2['studentlnm'], PRIORITY_LEFT ) ) \ .add( MergeFieldSpec( self.data1['studentfnm'], self.data2['studentfnm'], PRIORITY_LEFT ) ) \ .add( MergeFieldSpec( self.data1['Num_1'], self.data2['Num_1'], PRIORITY_LEFT_NONMISSING ) ) \ .add( MergeFieldSpec( self.data1['Num_2'], self.data2['Num_2'], PRIORITY_RIGHT ) ) \ .add( MergeFieldSpec( self.data1['Char_1'], self.data2['Char_1'], PRIORITY_LEFT ) ) \ .add( MergeFieldSpec( self.data1['Char_2'], self.data2['Char_2'], PRIORITY_RIGHT ) ) \ .add( MergeFieldSpec( self.data1['N1'], self.data2['N1'], PRIORITY_LEFT ) ) \ .add( MergeFieldSpec( self.data1['N2'], self.data2['N2'], PRIORITY_RIGHT ) ) self.merge_def.required_merge_keys = [self.merge_def['barcode_num']] self.merge_def.optional_merge_keys = [ self.merge_def['studentid'], self.merge_def['gender'] ] self.merge_def.fuzzy_merge_keys = [(self.merge_def['studentlnm'], self.merge_def['studentfnm'])] self.merge_def.fuzzy_report_table = 'fuzzy_report' self.merge_def.left_remain_table = 'left_remainder' self.merge_def.right_remain_table = 'right_remainder' def tearDown(self): self.run_context.close()
def setUp(self): self.run_context = SuiteContext('unittest') self.db_context = self.run_context.getDBContext('unittest') self.data_dir = os.path.join(self.run_context.tests_safe_dir, 'ttest', 'input_data') self.answer_dir = os.path.join(self.run_context.logs_dir, 'ttest_tests') self.specimen_dir = os.path.join(self.run_context.tests_safe_dir, 'ttest', 'sas_outputs')
def setUp(self): self.run_context = SuiteContext('unittest') self.out_dir = os.path.join(self.run_context.tests_dir, 'merge') if os.path.exists(self.out_dir): shutil.rmtree(self.out_dir) os.makedirs(self.out_dir) self.db_context = self.run_context.getDBContext('unittest') self.setUpData() self.setUpMerge()
def main(): """ This runs a test of the complementary_merge wrapper/glue code. """ run_context = SuiteContext('OGT_test{}'.format(RUN_CONTEXT_NUMBER)) log = run_context.get_logger('ComplementaryMerge') db_context = run_context.getDBContext() intermediate_path = 'OGT Fall 2012' if ( SUMMER_OR_FALL == 'F') else 'OGT Spring 2012' # Summer has SAS variables. pathname = os.path.join(CVSROOT, 'CSSC Score Reporting', intermediate_path, 'Code/Development/Intake') bookmap_location_file_name = os.path.join(pathname, BOOKMAP_LOCATION_FILE_NAME) log.debug("main - bookmap_location_file_name[%s]" % bookmap_location_file_name) print("bookmap_location_file_name[%s]" % bookmap_location_file_name) mergespec_file_name = os.path.join(run_context.tests_safe_dir, MERGESPEC_FILE_NAME) input_table_names = { FLAT_TABLE_KEY_NAME: 'rc2FINAL', 'C': 'mc_table_C', 'M': 'mc_table_M', 'R': 'mc_table_R', 'S': 'mc_table_S', 'W': 'mc_table_W' } output_table_names = { FLAT_TABLE_KEY_NAME: 'rc2FINAL_cmrg', 'C': 'mc_table_C_cmrg', 'M': 'mc_table_M_cmrg', 'R': 'mc_table_R_cmrg', 'S': 'mc_table_S_cmrg', 'W': 'mc_table_W_cmrg' } for key in output_table_names: dbutilities.drop_table_if_exists(db_context=db_context, table=output_table_names[key]) try: complementary_merge( run_context=run_context, bookmap_location_file_name=bookmap_location_file_name, bookmap_sheet=BOOKMAP_SHEET, mergespec_file_name=mergespec_file_name, input_table_names=input_table_names, output_table_names=output_table_names) #create_mergespec_file( run_context=run_context, input_table_names=input_table_names, # new_mergespec_file='C:/new_mergespec_file.csv' ) except Exception, error_msg: log.exception('\n\n') raise
def setUp(self): self.run_context = SuiteContext('unittest') self.db_context = self.run_context.getDBContext('unittest') self.LOGGER = self.run_context.get_logger() # Set up a test table definition self.table = get_temp_table(self.db_context) (self.table.add(FieldSpec("col1", "NVARCHAR", 8)).add( FieldSpec("col2", "FLOAT")).add(FieldSpec("col3", "TINYINT")).add( FieldSpec("col4", "INT")).add(FieldSpec("col5", "BIGINT")))
def setUp(self): self.runContext = SuiteContext('unittest') self.db_context = self.runContext.getDBContext(tag='unittest') clear_all(self.db_context) self.reader = SafeExcelReader(self.runContext) self.reader.db_context = self.db_context self.testDataDir = self.runContext.tests_safe_dir self.reader.filename = os.path.join(self.testDataDir, _XLS_FILE) self.reader.sheetName = "Data1" self.reader.outputTable = "Data1" self.reader.createTable()
def main( ): """ This runs a test of the district_performance_summary_report wrapper/glue code. """ run_context = SuiteContext( 'OGT_test{}'.format( RUN_CONTEXT_NUMBER ) ) log = run_context.get_logger( 'DistrictPerformanceSummaryReport' ) specfile = os.path.join( run_context.tests_safe_dir, SPECFILE ) log.debug( "main - specfile[{}]".format( specfile ) ) # sch_type = H = 12 records # sch_type = P = 10495 records dpsr.district_performance_summary_report(run_context=run_context, specfile=specfile, input_table_name='student' )
class TestPreQC( unittest.TestCase ): def setUp(self): self.run_context = SuiteContext( 'unittest' ) self.db_context = self.run_context.getDBContext( 'unittest' ) self.LOGGER = self.run_context.get_logger() def tearDown(self): pass def runTest(self): pass
def setUpClass(cls): cls.runContext = SuiteContext('unittest') cls.db_context = cls.runContext.getDBContext(tag='unittest') clear_all(cls.db_context) cls.testDataDir = os.path.join(cls.runContext.tests_safe_dir, "means_test") #setup the tables we will need in the database reader = SafeExcelReader(run_context=cls.runContext, db_context=cls.db_context, filename=os.path.join(cls.testDataDir, _XLS_FILE), sheet_name=_MEANS_SHEET, output_table=_MEANS_SHEET) reader.createTable() reader = SafeExcelReader(run_context=cls.runContext, db_context=cls.db_context, filename=os.path.join(cls.testDataDir, _XLS_FILE), sheet_name=_PERCENTS_SHEET, output_table=_PERCENTS_SHEET) reader.createTable() reader = SafeExcelReader(run_context=cls.runContext, db_context=cls.db_context, filename=os.path.join(cls.testDataDir, _DATA_FILE), scan_all=True, delimiter='|', buffer_size=100, output_table='studentg3_n') reader.createTable()
def setUpClass(cls): cls.runContext = SuiteContext('unittest') cls.db_context = cls.runContext.getDBContext(tag='unittest') clear_all(cls.db_context) cls.testDataDir = os.path.join(cls.runContext.tests_safe_dir, "erasure_test") cls.bm_testDataDir = os.path.join(cls.runContext.tests_safe_dir, "bookmapreader_test") #setup the tables we will need in the database reader = SafeExcelReader(run_context=cls.runContext, db_context=cls.db_context, buffer_size=100, scan_all=True, filename=os.path.join(cls.testDataDir, _DATAFILE_1), output_table='AIR1', range_=(0, 0, 500, 1024)) reader.createTable() reader = SafeExcelReader(run_context=cls.runContext, db_context=cls.db_context, buffer_size=100, scan_all=True, filename=os.path.join(cls.testDataDir, _DATAFILE_2), output_table='AIR2', range_=(0, 0, 500, 1024)) reader.createTable()
def setUpClass(cls): cls.runContext = SuiteContext('unittest') cls.db_context = cls.runContext.getDBContext(tag='unittest') clear_all(cls.db_context) cls.testDataDir = os.path.join(cls.runContext.tests_safe_dir, "peercompare_test") #setup the tables we will need in the database reader = SafeExcelReader(run_context=cls.runContext, db_context=cls.db_context, filename=os.path.join(cls.testDataDir, _SAS_FILE), output_table=_SIMILAR_TABLE, delimiter='|') reader.createTable() reader = SafeExcelReader(run_context=cls.runContext, db_context=cls.db_context, filename=os.path.join(cls.testDataDir, _DATA_FILE), output_table=_DATA_TABLE) reader.createTable() reader = SafeExcelReader(run_context=cls.runContext, db_context=cls.db_context, filename=os.path.join(cls.testDataDir, _AGG_FILE), sheet_name='peerCompare', output_table=_AGG_TABLE) reader.createTable()
def setUpClass(cls): """ Set up class variable 'context' for testing environment. Note that the export_test.ini file is in the same directory as this test script so they can be coordinated within the project. Future: SuiteContext() could accept absolute pathname of ini file and calculate and expose realpath of ini file For now, can manually copy the current directory's export_test.ini file to a directory where it will be sought by RunContext, eg ~/air_python/export_test.ini, but it's a step we could otherwise skip. """ # Consider to add realpath and dirname to SuiteContext cls.dirname = os.path.dirname(os.path.realpath(__file__)) cls.context = SuiteContext("export_test")
def setUp(self): self.run_context = SuiteContext("unittest") self.db_context = self.run_context.getDBContext("unittest") self.static_context = self.run_context.getDBContext("static") self.source_data_dir = self.run_context.getConfigFile( "TESTS", "id_generator_test_source_data_dir", "%(tests_safe_dir)s/id_generator_test/source_data") if not table_exists('student_g3', self.static_context): source_file = os.path.join(self.source_data_dir, _XLS_FILE) reader = SafeExcelReader(self.run_context, source_file, "Sheet1", 'student_g3', self.static_context, scan_all=True) reader.createTable() self.answer_dir = os.path.join(self.run_context.logs_dir, 'id_generator_test') if not os.path.exists(self.answer_dir): os.makedirs(self.answer_dir) self.specimen_dir = os.path.join(self.run_context.tests_safe_dir, 'id_generator_test', 'sas_outputs')
def setUpClass(cls): cls.runContext = SuiteContext('unittest') cls.db_context = cls.runContext.getDBContext(tag='unittest') clear_all(cls.db_context) cls.testDataDir = os.path.join(cls.runContext.tests_safe_dir, "converttopdf_test") # remove the files we will output if they already exist if os.path.exists(os.path.join(cls.testDataDir, _SQL_PDF_OUTPUT)): os.remove(os.path.join(cls.testDataDir, _SQL_PDF_OUTPUT)) if os.path.exists( os.path.join(cls.testDataDir, _XLS_PDF_OUTPUT_NOSTRETCH)): os.remove(os.path.join(cls.testDataDir, _XLS_PDF_OUTPUT_NOSTRETCH)) if os.path.exists( os.path.join(cls.testDataDir, _XLS_PDF_OUTPUT_STRETCH)): os.remove(os.path.join(cls.testDataDir, _XLS_PDF_OUTPUT_STRETCH))
def setUpClass(cls): cls.runContext = SuiteContext('unittest') cls.db_context = cls.runContext.getDBContext(tag='unittest') clear_all(cls.db_context) cls.testDataDir = os.path.join(cls.runContext.tests_safe_dir, "bookmapreader_test") #setup the tables we will need in the database reader = SafeExcelReader(run_context=cls.runContext, db_context=cls.db_context, filename=os.path.join(cls.testDataDir, _XLS_FILE), sheet_name=_SHEET_NAME, output_table='bookmaplocations') reader.createTable() cnt = 0 for row in reader.getRows(): tablename = row['Subject'] + '_' + row['Form_Values'] filename_ = row["location"] bm_reader = SafeExcelReader(run_context=cls.runContext, db_context=cls.db_context, filename=filename_, sheet_name=_SHEET_NAME, output_table=tablename) bm_reader.createTable() cnt += 1 #set the 'location' column to be the name of the table we created cls.db_context.executeNoResults(""" UPDATE bookmaplocations set location='{0}' where subject='{1}' and form_values='{2}' """.format(tablename, row['subject'], row['form_values'])) #get a list of table names to check cls.tables = [] for row in cls.db_context.executeBuffered( "SELECT [subject] + '_' + [form_values] from bookmaplocations" ): cls.tables.append(row[0])
class IDGeneratorTest(unittest.TestCase): def setUp(self): self.run_context = SuiteContext("unittest") self.db_context = self.run_context.getDBContext("unittest") self.static_context = self.run_context.getDBContext("static") self.source_data_dir = self.run_context.getConfigFile( "TESTS", "id_generator_test_source_data_dir", "%(tests_safe_dir)s/id_generator_test/source_data") if not table_exists('student_g3', self.static_context): source_file = os.path.join(self.source_data_dir, _XLS_FILE) reader = SafeExcelReader(self.run_context, source_file, "Sheet1", 'student_g3', self.static_context, scan_all=True) reader.createTable() self.answer_dir = os.path.join(self.run_context.logs_dir, 'id_generator_test') if not os.path.exists(self.answer_dir): os.makedirs(self.answer_dir) self.specimen_dir = os.path.join(self.run_context.tests_safe_dir, 'id_generator_test', 'sas_outputs') def test_1(self): # Replicates the SAS test 1 g3 = self.static_context.getTableSpec('student_g3') with get_temp_table(self.db_context) as in_ds: query = "SELECT TOP 100 * INTO {in_ds:qualified} FROM {g3:qualified}".format( in_ds=in_ds, g3=g3) self.db_context.executeNoResults(query) query = "UPDATE {} REPLACE [ucrx_teachername]='---CLARK PATRISE---' WHERE [import_order]=98" out_ds = self.db_context.getTableSpec('g3_ready') id_generator = IDGenerator(ds_in=in_ds, ds_out=out_ds, db_context=self.db_context, grade_var='grade', district_var='dcrxid_attend', school_var='bcrxid_attend', subject_char_lst=SUBJECT_LST, teacher_var_lst=TEACHER_LST, teacher_label_lst=TEACHER_LABEL_LST, teacher_id_lst=TEACHER_ID_LST, class_var_lst=CLASS_LST, section_var_lst=SECTION_LST, class_label_lst=CLASS_LABEL_LST, class_id_lst=CLASS_ID_LST, test_date='0509', err_var_name='errvar') id_generator.execute() key_function = lambda row: (row.studentid if row.studentid is not None else 0, int(row.serial_number), row.ssid if row.ssid is not None else '') answer_dir = os.path.join(self.answer_dir, 'test_10') if not os.path.exists(answer_dir): os.makedirs(answer_dir) answer_file = os.path.join(answer_dir, 'comparison.log') result = compare_tables(answer_file, table="g3_ready", specimen_name=os.path.join( self.specimen_dir, 'G3_READY.XLS'), columns=COLUMNS, table_key_function=key_function, specimen_key_function=key_function, db_context=self.db_context) self.assertTrue( result, "Table comparison failed. See log in {}".format(answer_file))
def setUp(self): self.run_context = SuiteContext( 'unittest' ) self.db_context = self.run_context.getDBContext( 'unittest' ) self.LOGGER = self.run_context.get_logger()
create_index = "Create Clustered Index {indexname} on {tablename} ({fieldname})".format( indexname='preqc_ix', tablename=self.flat_tablename, fieldname='id') self.db.executeNoResults(create_index) fields = 'flat_table_id, id' for each in self.mc_items_table_names: ixname = each + '_ix' create_index = "CREATE CLUSTERED INDEX {indexname} on {tablename} ({fieldname})".format( indexname=ixname.lower(), tablename=each, fieldname=fields) self.db.executeNoResults(create_index) if __name__ == '__main__': from airassessmentreporting.testutility import SuiteContext RC = SuiteContext('unittest') # RC = RunContext('unittest') dbcontext = RC.getDBContext() print 'dbcontext=', dbcontext x = PreQC( runcontext=RC, dbcontext=dbcontext, # layoutfile='C:\CVS Projects\CSSC Score Reporting\OGT Fall 2012\Intake Layout\OGT_FA12_Op_DataLayout_IntakeLayout.xls', layoutfile= 'C:\CVS Projects\CSSC Score Reporting\OGT Fall 2012\Intake Layout\OGT_FA12_Op_DataLayout_IntakeLayout.xls', # inputfile='C:\SAS\OGT\Input\original-record1.txt', inputfile= 'H:\\share\\Ohio Graduation Tests\\Technical\\2012 October\\ScoreReports\\TextFileFromDRC\\536215_2012OhioOGTFall_Regular.txt', # inputfile='H:/share/Ohio Graduation Tests/Technical/2012 October/ScoreReports/TextFileFromDRC/536215_2012OhioOGTFall_Regular.txt', # inputfile='H:\\share\\Ohio Graduation Tests\\Technical\\2012 July\\ScoreReports\\TextFileFromDRC\\536214_2012OhioOGTSummer_Regular.txt', # inputfile='C:\SAS\OGT\Input\input-1.txt',
if result[0][0] > 0: qry = _SQL_REPORT.format( upwxraw=cols[0], upwx_finalraw_item=cols[1], upwx_oe_final=cols[2], ufwx_attempt=cols[3], ucwx_form=cols[4], flat_table=flat_table[0][0].encode('ascii'), mc_table=each[1].encode('ascii'), subject_value=each[0].encode('ascii'), attempt_check_errors=cols[5], attempt_check_error_intr=cols[5] + '_intr') result = self.dbcontext.executeNoResults(qry) print 'Check the following table for errors - {0}'.format( cols[5]) print 'ATTEMPT_CHECK MODULE ENDED' except Exception as error: print 'Error=', error if __name__ == '__main__': runcontext = SuiteContext('unittest') dbcontext = runcontext.getDBContext() ac = AttemptCheck(dbcontext=dbcontext, runcontext=runcontext, bookmapfile="""C:\SAS\OGT\Input\Bookmaplocations1.xls""") #bookmaplocations2 is the bookmapfile for the OGT Fall 2012 ac.process()
class Test(unittest.TestCase): def setUp(self): self.runContext = SuiteContext('unittest') self.db_context = self.runContext.getDBContext(tag='unittest') clear_all(self.db_context) self.reader = SafeExcelReader(self.runContext) self.reader.db_context = self.db_context self.testDataDir = self.runContext.tests_safe_dir self.reader.filename = os.path.join(self.testDataDir, _XLS_FILE) self.reader.sheetName = "Data1" self.reader.outputTable = "Data1" self.reader.createTable() def test_n_obs(self): n = n_obs('data1', self.db_context) self.assertEqual( 300, n, "Got wrong number of observations: {} instead of 300".format(n)) def test_get_tablespec_1(self): '''Does a tablespec get returned as is ''' t = self.db_context.getTableSpec("Data1") t2 = get_table_spec(t) self.assertTrue(t is t2, "Did not return the same tablespec") def test_get_tablespec_2(self): '''Does it work to specify table name and db_context? ''' t = get_table_spec('some_table', self.db_context) self.assertTrue(isinstance(t, TableSpec), "Did not return a TableSpec object") self.assertEquals(t.table_name, '[some_table]', "Did not return the right name") self.assertEquals(t.table_schema, self.db_context.schema, "Did not return the correct schema") def test_get_tablespec_3(self): '''Can we override the schema? ''' t = get_table_spec('some_table', self.db_context, 'another_schema') self.assertTrue(isinstance(t, TableSpec), "Did not return a TableSpec object") self.assertEquals(t.table_name, '[some_table]', "Did not return the right name") self.assertEquals(t.table_schema, '[another_schema]', "Did not return the correct schema") def test_get_tablespec_4(self): '''Throw error if db_context conflicts ''' another_context = self.runContext.getDBContext() t1 = self.db_context.getTableSpec("Data1") try: t2 = get_table_spec(t1, another_context) except ValueError as e: return self.fail("Did not throw expected error on conflicting db_context") def test_get_tablespec_5(self): '''Throw error if schema conflicts ''' t1 = self.db_context.getTableSpec("Data1") try: t2 = get_table_spec(t1, self.db_context, 'another_schema') except ValueError as e: return self.fail("Did not throw expected error on conflicting schema") def test_get_tablespec_6(self): '''Correctly normalizing case ''' t = get_table_spec('some_TABLE', self.db_context, 'another_SCHEMA') self.assertTrue(isinstance(t, TableSpec), "Did not return a TableSpec object") self.assertEquals(t.table_name, '[some_table]', "Did not return the right name") self.assertEquals(t.table_schema, '[another_schema]', "Did not return the correct schema") def test_get_tablespec_7(self): '''Throws error if no db_context specified ''' try: t = get_table_spec('some_table') except ValueError as e: return self.fail("Did not throw expected error on missing db_context") def test_get_tablespec_8(self): '''No error as long as db context matches ''' t = self.db_context.getTableSpec("Data1") t2 = get_table_spec(t) self.assertTrue(t is t2, "Did not return the same tablespec") def test_get_tablespec_9(self): '''No error as long as schema matches ''' t = self.db_context.getTableSpec("Data1", table_schema=self.db_context.schema) t2 = get_table_spec(t) self.assertTrue(t is t2, "Did not return the same tablespec") def test_get_column_names(self): cols = get_column_names('data1', self.db_context) self.assertListEqual(cols, [ '[barcode_num]', '[barcode_char]', '[studentid]', '[gender]', '[ethnicity]', '[studentlnm]', '[studentfnm]', '[num_1]', '[num_2]', '[char_1]', '[char_2]', '[n1]', '[n2]', '[import_order]' ], "Did not return correct column names") def test_get_table_names1(self): tables = get_table_names(self.db_context) self.assertListEqual(tables, ['[data1]'], "Did not return correct table names") def test_get_table_names2(self): '''Should not return any tables from another schema ''' tables = get_table_names(self.db_context, table_schema="another_schema") self.assertListEqual(tables, [], "Did not return correct table names") def test_get_table_names3(self): '''Should work with quoted schema name ''' tables = get_table_names(self.db_context, table_schema="[dbo]") self.assertListEqual(tables, ['[data1]'], "Did not return correct table names") def test_get_table_names4(self): '''Should work with unquoted schema name ''' tables = get_table_names(self.db_context, table_schema="dbo") self.assertListEqual(tables, ['[data1]'], "Did not return correct table names") def test_clear_all(self): # Create a table with a foreign key constraint query = "CREATE TABLE my_table( key1 bigint, var1 VARCHAR(17), FOREIGN KEY( key1 ) REFERENCES data1( import_order ) )" self.db_context.executeNoResults(query) tables = get_table_names(self.db_context) self.assertTrue('[data1]' in tables, "Did not return correct table names") self.assertTrue('[my_table]' in tables, "Did not return correct table names") self.assertEqual(len(tables), 2, "Did not return correct table names") clear_all(self.db_context) tables = get_table_names(self.db_context) self.assertListEqual(tables, [], "Did not return correct table names") def test_table_exists(self): self.assertTrue(table_exists('data1', self.db_context), "Failed to find table that exists") self.assertFalse(table_exists('zaxxiz', self.db_context), "Found table that does not exist") def test_drop_table_if_exists1(self): self.assertTrue(table_exists('data1', self.db_context), "Failed to find table that was supposed to exist") t = self.db_context.getTableSpec('data1') drop_table_if_exists(t) self.assertFalse( table_exists(t), "Table was supposed to be dropped, but its still there") def test_drop_table_if_exists2(self): '''Different schema ''' t = self.db_context.getTableSpec('data1') t.table_schema = 'not_my_schema' drop_table_if_exists(t) self.assertTrue(table_exists('data1', self.db_context), "Failed to find table that was supposed to exist") def test_drop_table_if_exists3(self): '''Different name ''' t = self.db_context.getTableSpec('data2') drop_table_if_exists(t) self.assertTrue(table_exists('data1', self.db_context), "Failed to find table that was supposed to exist") def test_assembly_exists(self): '''Assumes that the assembly has been created by running the prep_sqlserver script ''' self.assertTrue(assembly_exists('ToProperCase', self.db_context)) self.assertFalse(assembly_exists('asdfasdf', self.db_context))
class TestTTestSasCompare( unittest.TestCase ): def setUp(self): self.run_context = SuiteContext( "unittest" ) self.db_context = self.run_context.getDBContext( "unittest" ) self.static_context = self.run_context.getDBContext( "static" ) self.answer_dir = os.path.join( self.run_context.logs_dir, 'ttest_level_tests' ) if not os.path.exists( self.answer_dir ): os.makedirs( self.answer_dir ) self.data_dir = os.path.join( self.run_context.tests_safe_dir, 'ttest', 'input_data' ) self.specimen_dir = os.path.join( self.run_context.tests_safe_dir, 'ttest', 'sas_outputs' ) # libname ttest "H:\share\CSSC Folder\Score Report Group\Test Data\lib_TTestLevel"; # %let agg_file = &cvsroot.\ScoreReportMacros\UnitTested\lib_TTestLevel\test\HI Spring 2008 Aggregations_Melissa.xls; # %let sheet=ttestlevel; # %SafeExcelRead(filename=&agg_file., sheetname =&sheet., DS_out =aggds); self.run_context.debug( "Reading data for ttest_level tests" ) agg_file = os.path.join( self.data_dir, _AGG_FILE) reader = SafeExcelReader( self.run_context, agg_file, "ttestlevel", scan_all = True ) self.agg_ds = [ row for row in reader.getRows() ] # Import the input datasets reader.db_context = self.static_context for ( grade, filename, table_name, sheet_name ) in _GRADE_FILES: if not table_exists( table_name, self.static_context ): self.run_context.debug( "Reading data for grade {}".format( grade ) ) reader.filename = os.path.join( self.data_dir, filename ) reader.outputTable = table_name reader.sheetName = sheet_name reader.createTable() def test_13(self): # data locds; # set ttest.g03; # where courtesyTestedFlag=0 and TransferFlag=0 and AttemptFlagCRTmath=1; # run; with get_temp_table( self.db_context ) as loc_ds: g03=self.static_context.getTableSpec( 'g03' ) query=""" SELECT * INTO {loc_ds} FROM {table:qualified} WHERE courtesyTestedFlag=0 AND TransferFlag=0 AND AttemptFlagCRTmath=1; """.format( loc_ds=loc_ds, table=g03) self.db_context.executeNoResults(query) ttest = TTestLevel(ds_in=loc_ds, ds_out="test13_testresult", db_context=self.db_context, input_col_name='upmxscor', output_col_name='outvar', target_group_cols=['schoolcode'], parent_group_cols=['areacode'], critical_value=1.96, round_value=1 ) ttest.execute() test_columns = ( ( 'outvar', 'outvar', integer_compare ), ( 'schoolcode', 'schoolcode', mixed_compare ), ( 'areacode', 'areacode', mixed_compare ), ) table_key_function = lambda row:( row.areacode, row.schoolcode ) specimen_key_function = lambda row:( row[ 'areacode' ], row[ 'schoolcode' ] ) answer_dir = os.path.join( self.answer_dir, 'test_13' ) if not os.path.exists( answer_dir ): os.makedirs( answer_dir ) answer_file = os.path.join( answer_dir, 'comparison.log' ) result = compare_tables( answer_file, table="test13_testresult", specimen_name= os.path.join( self.specimen_dir, 'test_13/testresult.XLS' ), columns=test_columns, table_key_function=table_key_function, specimen_key_function=specimen_key_function, db_context=self.db_context) self.assertTrue( result, "Test 13 FAILED" ) def test_20(self): input_assembly_function = lambda params, table: self.assemble_input_data_n_in_group( 1, params, table ) self._run_tests( 20, input_assembly_function ) def test_21(self): input_assembly_function = lambda params, table: self.assemble_input_data_n_in_group( 2, params, table ) self._run_tests( 21, input_assembly_function ) def test_22(self): def input_function( params, table ): # Pick all targets that have more than one person with get_temp_table( self.db_context ) as temp_table_1, get_temp_table( self.db_context ) as temp_table_2: query = """ SELECT DISTINCT {parentgroups}, {targetgroups} INTO {temp_table_1:qualified} FROM {table:qualified} WHERE ({where}) AND ({wheret}) AND {parentgroups} IS NOT NULL AND {targetgroups} IS NOT NULL GROUP BY {parentgroups}, {targetgroups} HAVING COUNT(1) > 1 """.format( table=table, temp_table_1=temp_table_1, **params ) self.db_context.executeNoResults(query) # From those, pick the first target in each parent query= """ SELECT {parentgroups}, {targetgroups}, ROW_NUMBER() OVER( PARTITION BY {parentgroups} ORDER BY {targetgroups} ) AS r1 INTO {temp_table_2:qualified} FROM {temp_table_1:qualified} """.format( temp_table_1=temp_table_1, temp_table_2=temp_table_2, **params ) self.db_context.executeNoResults(query) # For each selected target, pick the first two observations in_ds = get_temp_table( self.db_context ) query = """ SELECT {columns:itemfmt='C.{{}}'} INTO {in_ds} FROM ( SELECT {columns:itemfmt='A.{{}}'}, ROW_NUMBER() OVER( PARTITION BY A.{parentgroups}, A.{targetgroups} ORDER BY A.{targetgroups} ) AS r2 FROM ( SELECT {columns} FROM {table:qualified} WHERE ({where}) AND ({wheret}) AND {parentgroups} IS NOT NULL AND {targetgroups} IS NOT NULL ) AS A INNER JOIN ( SELECT {parentgroups}, {targetgroups} FROM {temp_table_2:qualified} WHERE r1=1 ) AS B ON A.{parentgroups}=B.{parentgroups} AND A.{targetgroups}=B.{targetgroups} ) AS C WHERE C.r2<=2 """.format( in_ds=in_ds, temp_table_2=temp_table_2, table=table, columns=Joiner( table ), **params ) self.db_context.executeNoResults(query) in_ds.populate_from_connection() return in_ds self._run_tests( 22, input_function ) def assemble_input_data_n_in_group( self, n_in_group, params, table ): in_ds = get_temp_table( self.db_context ) query = """ SELECT {columns:itemfmt='A.{{}}'} INTO {in_ds} FROM ( SELECT {columns}, DENSE_RANK() OVER( PARTITION BY {parentgroups} ORDER BY {targetgroups} ) AS r1, ROW_NUMBER() OVER( PARTITION BY {parentgroups}, {targetgroups} ORDER BY {targetgroups} ) AS r2 FROM {table_name:qualified} WHERE ({where}) AND ({wheret}) AND {parentgroups} IS NOT NULL AND {targetgroups} IS NOT NULL ) AS A WHERE A.r1<={n_in_group} AND A.r2=1 """.format( table_name=table, columns=Joiner( table ), n_in_group=n_in_group, in_ds=in_ds, **params ) self.db_context.executeNoResults(query) in_ds.populate_from_connection() return in_ds def _run_tests( self, test_nbr, input_data_assembly_function ): # %do grade=3 %to 8; # data inds; # set ttest.g0&grade.; # run; # # sasfile inds load; succeed = True answer_dir = os.path.join( self.answer_dir, 'test_{}'.format( test_nbr ) ) if not os.path.exists( answer_dir ): os.makedirs( answer_dir ) answer_file = os.path.join( answer_dir, 'log_' ) for ( grade, filename, table_name, sheet_name ) in _GRADE_FILES: self.run_context.info( "Testing ttest_level on grade {} data".format( grade ) ) j = 1 for params in self.agg_ds: # data inds_&j.(keep=&&invar_&j. &&parentgroups_&j. &&targetgroups_&j.); # set inds; # &&whereT_&j. and %substr(&&where_&j.,6); # run; # # proc sort data=inds_&j.; # by &&parentgroups_&j. &&targetgroups_&j.; # run; # # data inds_&j. ; # set inds_&j.; # by &&parentgroups_&j. &&targetgroups_&j.; # if first. &&parentgroups_&j. AND missing(&&parentgroups_&j.)=0 and missing(&&targetgroups_&j.)=0 then # output; # run; params['where'] = clean_where( params['where'] ) params['wheret'] = clean_where( params['wheret'] ) table = self.static_context.getTableSpec(table_name) in_ds = input_data_assembly_function( params, table ) # %Lib_ttestlevel(indata =inds_&j.,outdata=&&outdata_&j.,invar=&&invar_&j.,outlev=&&outlev_&j.,targetgroups=&&targetgroups_&j., # parentgroups=&&parentgroups_&j.,critval=&&critval_&j.,rdValue=&&rdvalue_&j.); ds_out = params['outdata'].replace( '&grade', str( grade ) ) parentgroups = params['parentgroups'] targetgroups = params['targetgroups'] ttest = TTestLevel(ds_in=in_ds, ds_out="test{}_{}".format( test_nbr, ds_out ), db_context=self.db_context, input_col_name=params['invar'], output_col_name=params['outlev'], target_group_cols=[targetgroups], parent_group_cols=[parentgroups], target_where_expression=params['wheret'], parent_where_expression=params['where'], critical_value=params['critval'], round_value=params['rdvalue']) ttest.execute() test_columns = ( ( params['outlev'], params['outlev'], integer_compare ), ( targetgroups, targetgroups, mixed_compare ), ( parentgroups, parentgroups, mixed_compare ), ) table_key_function = lambda row:( getattr( row, parentgroups ), getattr( row, targetgroups ) ) specimen_key_function = lambda row:( row[ parentgroups ], row[ targetgroups ] ) result = compare_tables( answer_file + ds_out + ".log", table="test{}_{}".format( test_nbr, ds_out ), specimen_name= os.path.join( self.specimen_dir, 'test_{}'.format( test_nbr ), ds_out + ".XLS" ), columns=test_columns, table_key_function=table_key_function, specimen_key_function=specimen_key_function, db_context=self.db_context) succeed = succeed and result self.run_context.info( "Test_{} for ttest_level scenario {} {}". format( test_nbr, ds_out, "PASSED" if result else "FAILED" ) ) j += 1 self.assertTrue( succeed, "Failures on ttest test {}".format( test_nbr ) )
class TTestTest(unittest.TestCase): def setUp(self): self.run_context = SuiteContext('unittest') self.db_context = self.run_context.getDBContext('unittest') self.data_dir = os.path.join(self.run_context.tests_safe_dir, 'ttest', 'input_data') self.answer_dir = os.path.join(self.run_context.logs_dir, 'ttest_tests') self.specimen_dir = os.path.join(self.run_context.tests_safe_dir, 'ttest', 'sas_outputs') def test_10(self): data_file = os.path.join(self.data_dir, _XLS_FILE) reader = SafeExcelReader(self.run_context, data_file, _XLS_SHEET, 'class_info', self.db_context, scan_all=True) reader.createTable() ttester = TTest('class_info', self.db_context, os.path.join(self.data_dir, _AGG_FILE), 0, True) ttester.readAggData() ttester.execute() answer_dir = os.path.join(self.answer_dir, 'test_10') if not os.path.exists(answer_dir): os.makedirs(answer_dir) answer_file = os.path.join(answer_dir, 'comparison.log') specimen_dir = os.path.join(self.specimen_dir, 'ttest_test_10') compare_function = lambda row: (int(row.tcrxid), int(row.comp)) result = compare_tables(answer_file, table="ttest_class", specimen_name=os.path.join( specimen_dir, 'testresult.XLS'), columns=COLUMNS, table_key_function=compare_function, specimen_key_function=compare_function, db_context=self.db_context) self.assertTrue(result, "TTest Test 10 FAILED") def test_17(self): self.do_17_18( "UPDATE {agg_sheet} SET [WhereT]='Rclass_missing_flag', [WhereT_value]='0'" ) def test_18(self): self.do_17_18( "UPDATE {agg_sheet} SET [WhereP]='dummy_record_flag', [WhereP_value]='0'" ) def do_17_18(self, tweak_agg_sheet_query): with self.read_g3() as g3, \ self.read_oat_agg_sheet() as temp_agg_sheet, \ dbutilities.get_temp_table(self.db_context) as agg_sheet: ## Keep first row of aggregation definition self.db_context.executeNoResults( "SELECT TOP(1) * INTO {agg_sheet} FROM {temp_table} WHERE [subject]='R' ORDER BY [import_order]" .format(agg_sheet=agg_sheet, temp_table=temp_agg_sheet)) print "Running first ttest" ttester1 = TTest(g3, self.db_context, agg_sheet, None, False) ttester1.readAggData() ttester1.execute() results1 = [] for level in ttester1.target_levels: results1.append(dbutilities.dump(level.output_table, level.id)) print "Updating data sheet" self.db_context.executeNoResults( "DELETE FROM {g3} WHERE [inclusionflagr] IS NULL OR [inclusionflagr] != 1" .format(g3=g3)) self.db_context.executeNoResults( tweak_agg_sheet_query.format(agg_sheet=agg_sheet)) print "Running second ttest" ttester2 = TTest(g3, self.db_context, agg_sheet, None, False) ttester2.readAggData() ttester2.execute() results2 = [] for level in ttester2.target_levels: results2.append(dbutilities.dump(level.output_table, level.id)) print "Comparing ttest outputs" assert len(results1) >= 1 assert len(results1) == len(results2) for i in range(len(results1)): res1 = results1[i] res2 = results2[i] keys = res1[0].keys() assert len(res1) == len(res2) for j in range(len(res2)): row1 = res1[j] row2 = res2[j] for k in keys: assert row1[k] == row2[k] def test_20(self): with self.read_g3() as g3, \ self.read_oat_agg_sheet() as agg_sheet: self.db_context.executeNoResults( "DELETE FROM {agg_sheet} WHERE [subject] != 'R'".format( agg_sheet=agg_sheet)) self.db_context.executeNoResults( "DELETE FROM {g3} WHERE import_order != 8".format(g3=g3)) ttester = TTest(g3, self.db_context, agg_sheet, None, False) ttester.readAggData() ttester.execute() answer_dir = os.path.join(self.answer_dir, 'test_20') if not os.path.exists(answer_dir): os.makedirs(answer_dir) specimen_dir = os.path.join(self.specimen_dir, 'ttest_test_20') result = True for target_level in ttester.target_levels: answer_file = os.path.join( answer_dir, target_level.level + '_comparison.log') specimen_file = os.path.join( specimen_dir, 'test20_ttest_{0}.xls'.format(target_level.level)) result_i = self.compare_output(specimen_file, target_level, answer_file) result = result and result_i if result_i: print "PASSED ttest test_20 for " + target_level.level else: print "FAILED ttest test_20 for " + target_level.level self.assertTrue(result, "TTest Test 20 FAILED") def test_21(self): answer_dir = os.path.join(self.answer_dir, 'test_21') if not os.path.exists(answer_dir): os.makedirs(answer_dir) specimen_dir = os.path.join(self.specimen_dir, 'ttest_test_21') result = True with self.read_g3() as g3, \ self.read_oat_agg_sheet() as agg_sheet, \ dbutilities.get_temp_table( self.db_context ) as tmp, \ dbutilities.get_temp_table( self.db_context ) as tmp_agg : # As near as I can tell, the SAS test only runs for the tenth row of the agg sheet. # self.db_context.executeNoResults("DELETE FROM {agg_sheet} WHERE [import_order] != 10".format( agg_sheet=agg_sheet )) # We are just using this TTest instance to read the aggregation sheet. The actual ttest will use # another instance based on a truncated aggregation sheet. agg_sheet_reader = TTest(g3, self.db_context, agg_sheet, None, False) agg_sheet_reader.readAggData() assert dbutilities.table_exists(g3) targetParentRow = [] for target_level in agg_sheet_reader.target_levels: for parent_level in target_level.contents: for row in parent_level.contents: targetParentRow.append( (target_level, parent_level, row)) targetParentRow.sort(key=lambda (row): row[2].import_order) for target_level, parent_level, row in targetParentRow: where_t = target_level.get_where_expression() target_id = target_level.id where_p = parent_level.get_where_expression() parent_id = parent_level.id i = row.import_order # Reduce the data to the desired sample dbutilities.drop_table_if_exists(tmp) query = """ SELECT {vars}, COUNT( {input_var} ) OVER( PARTITION BY {parent_id}, {target_id} ) AS n_target, 0 AS n_parent INTO {tmp} FROM {g3} WHERE {where_t} """.format(parent_id=parent_id, target_id=target_id, input_var=row.inputvar, where_t=where_t, tmp=tmp, g3=g3, vars=Joiner(g3)) self.db_context.executeNoResults(query) query = """ UPDATE {tmp} SET n_parent = A.B FROM ( SELECT n_parent, COUNT( {input_var} ) OVER( PARTITION BY {parent_id} ) AS B FROM {tmp} WHERE {where_p} ) AS A """.format(parent_id=parent_id, input_var=row.inputvar, where_p=where_p, tmp=tmp) print query self.db_context.executeNoResults(query) query = "DELETE FROM {tmp} WHERE ( n_parent != 2 ) OR ( n_target != 1 )".format( tmp=tmp) self.db_context.executeNoResults(query) n_obs = dbutilities.n_obs(tmp) if n_obs > 0: # Reduce the aggregation sheet to the current row query = "SELECT * INTO {tmp_agg} FROM {agg_sheet} WHERE [import_order]={i}".format( tmp_agg=tmp_agg, agg_sheet=agg_sheet, i=i) self.db_context.executeNoResults(query) # Do the ttest ttester = TTest(tmp, self.db_context, tmp_agg, None, False) ttester.readAggData() ttester.execute() # Check the answer answer_file = os.path.join( answer_dir, 'row_{0}_comparison.log'.format(i)) specimen_file = os.path.join( specimen_dir, 'test_21_ttest_{0}.xls'.format(i)) result_i = self.compare_output(specimen_file, target_level, answer_file) result = result and result_i print "{1} ttest test_21 for {0}".format( i, 'PASSED' if result_i else 'FAILED') self.assertTrue(result, "TTest Test 21 FAILED") return def read_g3(self): print "Reading data" table = dbutilities.get_temp_table(self.db_context) data_file = os.path.join(self.data_dir, _XLS_FILE_G3) reader = SafeExcelReader(self.run_context, data_file, _XLS_SHEET, table, self.db_context, scan_all=False) reader.createTable() print "Tweaking data" self.db_context.executeNoResults( "UPDATE {g3} SET [state_inc_flag] = CASE WHEN ( [schtype] IN ('N','D','H') ) THEN 0 ELSE 1 END" .format(g3=table)) self.db_context.executeNoResults( "UPDATE {g3} SET [Rclass_missing_flag] = CASE WHEN ( [Rclass_id] IS NULL OR [Rclass_id]='' ) THEN 1 ELSE 0 END" .format(g3=table)) table.populate_from_connection() return table def read_oat_agg_sheet(self): print "Reading aggregation sheet" table = dbutilities.get_temp_table(self.db_context) data_file = os.path.join(self.data_dir, _AGG_FILE_OAT) reader = SafeExcelReader(self.run_context, data_file, _XLS_SHEET, table, self.db_context, scan_all=True) reader.createTable() table.populate_from_connection() return table def compare_output(self, specimen_file, target_level, answer_file): compare_function = lambda row: row[target_level.id] output_vars = set([ dbutilities.db_identifier_unquote(row.outputvar) for parent_level in target_level.contents for row in parent_level.contents ]) assert len(output_vars) >= 1 columns = [(target_level.id, target_level.id, mixed_compare)] for var in output_vars: columns.append((var, var, integer_compare)) print target_level.level, output_vars result_i = compare_tables(log_name=answer_file, table=target_level.output_table, specimen_name=specimen_file, columns=columns, table_key_function=compare_function, specimen_key_function=compare_function, db_context=self.db_context) return result_i
def setUpClass(cls): cls.runContext = SuiteContext('unittest') cls.db_context = cls.runContext.getDBContext(tag='unittest') clear_all(cls.db_context) cls.testDataDir = os.path.join(cls.runContext.tests_safe_dir, "intake_test")
class TestFastTableStream(unittest.TestCase): def setUp(self): self.run_context = SuiteContext('unittest') self.db_context = self.run_context.getDBContext('unittest') self.LOGGER = self.run_context.get_logger() # Set up a test table definition self.table = get_temp_table(self.db_context) (self.table.add(FieldSpec("col1", "NVARCHAR", 8)).add( FieldSpec("col2", "FLOAT")).add(FieldSpec("col3", "TINYINT")).add( FieldSpec("col4", "INT")).add(FieldSpec("col5", "BIGINT"))) def tearDown(self): self.table.drop() def runTest(self): pass def testWriteUnicodeCharacters(self): del self.table[:] for i in xrange(100): self.table.add(FieldSpec("col_{}".format(i), 'NVARCHAR', 8)) fts = FastTableStream(self.table, use_names=False) with fts: for j in xrange(10000): fts.write([100 * j + i for i in xrange(100)]) self.LOGGER.info("Table name {}".format(self.table)) def testWriteUnicodeMany(self): del self.table[:] for i in xrange(100): self.table.add(FieldSpec("col_{}".format(i), 'NVARCHAR', 8)) fts = FastTableStream(self.table, use_names=False) data = [[100 * j + i for i in xrange(100)] for j in xrange(1000)] def do_write(): with fts: for j in xrange(20): fts.write_many(data) pr = cProfile.Profile() pr.enable() pr.runcall(do_write) pr.disable() filename = os.path.join(self.run_context.logs_dir, 'fast_table_write_unicode_profile.txt') with open(filename, 'w') as f: stats = pstats.Stats(pr, stream=f) stats.print_stats() self.LOGGER.info("Table name {}".format(self.table)) def testWriteUnicodeRaw(self): del self.table[:] for i in xrange(100): self.table.add(FieldSpec("col_{}".format(i), 'NVARCHAR', 8)) fts = FastTableStream(self.table, use_names=False, raw=True) data = [[unicode(100 * j + i) for i in xrange(100)] for j in xrange(1000)] def do_write(): with fts: for j in xrange(20): fts.write_many(data) pr = cProfile.Profile() pr.enable() pr.runcall(do_write) pr.disable() filename = os.path.join(self.run_context.logs_dir, 'fast_table_write_unicode_raw_profile.txt') with open(filename, 'w') as f: stats = pstats.Stats(pr, stream=f) stats.print_stats() self.LOGGER.info("Table name {}".format(self.table)) def testWriteUnicodeNoNull(self): del self.table[:] for i in xrange(100): self.table.add( FieldSpec("col_{}".format(i), 'NVARCHAR', 8, nullable=False)) fts = FastTableStream(self.table, use_names=False, raw=True) data = [[unicode(100 * j + i) for i in xrange(100)] for j in xrange(1000)] def do_write(): with fts: for j in xrange(20): fts.write_many(data) pr = cProfile.Profile() pr.enable() pr.runcall(do_write) pr.disable() filename = os.path.join(self.run_context.logs_dir, 'fast_table_write_unicode_nonull_profile.txt') with open(filename, 'w') as f: stats = pstats.Stats(pr, stream=f) stats.print_stats() self.LOGGER.info("Table name {}".format(self.table)) def testWriteCharacters(self): del self.table[:] for i in xrange(100): self.table.add(FieldSpec("col_{}".format(i), 'VARCHAR', 8)) fts = FastTableStream(self.table, use_names=False) with fts: for j in xrange(10000): fts.write([str(100 * j + i)[:8] for i in xrange(100)]) self.LOGGER.info("Table name {}".format(self.table)) def testWriteMany(self): del self.table[:] for i in xrange(100): self.table.add(FieldSpec("col_{}".format(i), 'VARCHAR', 8)) fts = FastTableStream(self.table, use_names=False) data = [[100 * j + i for i in xrange(100)] for j in xrange(1000)] def do_write(): with fts: for j in xrange(20): fts.write_many(data) pr = cProfile.Profile() pr.enable() pr.runcall(do_write) pr.disable() filename = os.path.join(self.run_context.logs_dir, 'fast_table_write_char_profile.txt') with open(filename, 'w') as f: stats = pstats.Stats(pr, stream=f) stats.print_stats() self.LOGGER.info("Table name {}".format(self.table)) def testWriteRaw(self): del self.table[:] for i in xrange(100): self.table.add(FieldSpec("col_{}".format(i), 'VARCHAR', 8)) fts = FastTableStream(self.table, use_names=False, raw=True) data = [[str(100 * j + i) for i in xrange(100)] for j in xrange(1000)] def do_write(): with fts: for j in xrange(20): fts.write_many(data) pr = cProfile.Profile() pr.enable() pr.runcall(do_write) pr.disable() filename = os.path.join(self.run_context.logs_dir, 'fast_table_write_char_raw_profile.txt') with open(filename, 'w') as f: stats = pstats.Stats(pr, stream=f) stats.print_stats() self.LOGGER.info("Table name {}".format(self.table)) def testWriteIntegerMany(self): del self.table[:] for i in xrange(100): self.table.add(FieldSpec("col_{}".format(i), 'INT')) fts = FastTableStream(self.table, use_names=False) data = [[100 * j + i for i in xrange(100)] for j in xrange(1000)] def do_write(): with fts: for j in xrange(20): fts.write_many(data) pr = cProfile.Profile() pr.enable() pr.runcall(do_write) pr.disable() filename = os.path.join( self.run_context.logs_dir, 'fast_table_write_int_with_checks_profile.txt') with open(filename, 'w') as f: stats = pstats.Stats(pr, stream=f) stats.print_stats() self.LOGGER.info("Table name {}".format(self.table)) def testWriteIntegerRaw(self): del self.table[:] for i in xrange(100): self.table.add(FieldSpec("col_{}".format(i), 'INT')) fts = FastTableStream(self.table, use_names=False, raw=True) data = [[100 * j + i for i in xrange(100)] for j in xrange(1000)] def do_write(): with fts: for j in xrange(20): fts.write_many(data) pr = cProfile.Profile() pr.enable() pr.runcall(do_write) pr.disable() filename = os.path.join(self.run_context.logs_dir, 'fast_table_write_int_raw_profile.txt') with open(filename, 'w') as f: stats = pstats.Stats(pr, stream=f) stats.print_stats() self.LOGGER.info("Table name {}".format(self.table)) def testWriteNoNull(self): del self.table[:] for i in xrange(100): self.table.add( FieldSpec("col_{}".format(i), 'VARCHAR', 8, nullable=False)) fts = FastTableStream(self.table, use_names=False, raw=True) data = [[str(100 * j + i) for i in xrange(100)] for j in xrange(1000)] def do_write(): with fts: for j in xrange(20): fts.write_many(data) pr = cProfile.Profile() pr.enable() pr.runcall(do_write) pr.disable() filename = os.path.join(self.run_context.logs_dir, 'fast_table_write_char_nonull_profile.txt') with open(filename, 'w') as f: stats = pstats.Stats(pr, stream=f) stats.print_stats() self.LOGGER.info("Table name {}".format(self.table)) def testWriteFloatNoNull(self): del self.table[:] for i in xrange(100): self.table.add( FieldSpec("col_{}".format(i), 'FLOAT', nullable=False)) fts = FastTableStream(self.table, use_names=False, raw=True, dumpfile="C:\\Scratch\\float_no_null.dat") data = [[100.0 * j + i for i in xrange(100)] for j in xrange(1000)] def do_write(): with fts: for j in xrange(20): fts.write_many(data) pr = cProfile.Profile() pr.enable() pr.runcall(do_write) pr.disable() filename = os.path.join(self.run_context.logs_dir, 'fast_table_write_float_nonull_profile.txt') with open(filename, 'w') as f: stats = pstats.Stats(pr, stream=f) stats.print_stats() self.LOGGER.info("Table name {}".format(self.table)) def testWriteFloatMany(self): del self.table[:] for i in xrange(100): self.table.add(FieldSpec("col_{}".format(i), 'FLOAT')) fts = FastTableStream(self.table, use_names=False, dumpfile="C:\\Scratch\\float.dat") data = [[100.0 * j + i for i in xrange(100)] for j in xrange(1000)] def do_write(): with fts: for j in xrange(20): fts.write_many(data) pr = cProfile.Profile() pr.enable() pr.runcall(do_write) pr.disable() filename = os.path.join(self.run_context.logs_dir, 'fast_table_write_float_nonull_profile.txt') with open(filename, 'w') as f: stats = pstats.Stats(pr, stream=f) stats.print_stats() self.LOGGER.info("Table name {}".format(self.table)) def testWriteMixed(self): del self.table[:] for i in xrange(33): self.table.add(FieldSpec("float_{}".format(i), 'FLOAT')) self.table.add(FieldSpec("int_{}".format(i), 'INT')) self.table.add(FieldSpec("str_{}".format(i), 'VARCHAR', 6)) fts = FastTableStream(self.table, use_names=False) data = [] for j in xrange(1000): row = [] for i in xrange(33): k = 100 * j + i row.extend((int(k), float(k), str(k))) def do_write(): with fts: for j in xrange(20): fts.write_many(data) pr = cProfile.Profile() pr.enable() pr.runcall(do_write) pr.disable() filename = os.path.join(self.run_context.logs_dir, 'fast_table_write_mixed_profile.txt') with open(filename, 'w') as f: stats = pstats.Stats(pr, stream=f) stats.print_stats() self.LOGGER.info("Table name {}".format(self.table)) def testValidateNewTable(self): drop_table_if_exists(self.table) table_stream = FastTableStream(self.table) table_stream.validate_write_inputs() def testValidateFailNewEmptyTable(self): drop_table_if_exists(self.table) del self.table[:] table_stream = FastTableStream(self.table) try: table_stream.validate_write_inputs() except ValueError: # Expected error return self.fail( "Expected a ValueError if called on to create a new table with no fields" ) def testValidateExistingTable(self): drop_table_if_exists(self.table) self.db_context.executeNoResults(self.table.definition) table_stream = FastTableStream(self.table) table_stream.validate_write_inputs() def testValidateExistingTableAgainstEmptySpec(self): drop_table_if_exists(self.table) self.db_context.executeNoResults(self.table.definition) del self.table[:] table_stream = FastTableStream(self.table) table_stream.validate_write_inputs() def testValidateFailWrongColumnCount(self): drop_table_if_exists(self.table) self.db_context.executeNoResults(self.table.definition) self.table.add(FieldSpec('col7', 'NVARCHAR', 15)) table_stream = FastTableStream(self.table) try: table_stream.validate_write_inputs() except ValueError: # Expected error return self.fail( "Expected a ValueError if TableSpec has different column count from db" ) def testValidateFailWrongColumnName(self): drop_table_if_exists(self.table) self.db_context.executeNoResults(self.table.definition) col1 = self.table.pop('col1') col1.field_name = 'col_new' self.table.add(col1) table_stream = FastTableStream(self.table) try: table_stream.validate_write_inputs() except ValueError: # Expected error return self.fail("Expected a ValueError if column names do not match") def testValidateFailWrongColumnType(self): drop_table_if_exists(self.table) self.db_context.executeNoResults(self.table.definition) self.table['col1'].basic_type = 'VARCHAR' table_stream = FastTableStream(self.table) try: table_stream.validate_write_inputs() except ValueError: # Expected error return self.fail( "Expected a ValueError if db column type is different from TableSpec column type" ) def testValidateFailColumnTooShort(self): drop_table_if_exists(self.table) self.db_context.executeNoResults(self.table.definition) self.table['col1'].data_length = 100 table_stream = FastTableStream(self.table) try: table_stream.validate_write_inputs() except ValueError: # Expected error return self.fail( "Expected a ValueError if db column is shorter than TableSpec column" ) def testValidateColumnLonger(self): drop_table_if_exists(self.table) self.db_context.executeNoResults(self.table.definition) self.table['col1'].data_length = 1 table_stream = FastTableStream(self.table) table_stream.validate_write_inputs()
def _process_keep_columns(self): print '_process_keep_columns' if len(self.keep_columns) <> 0: dbcontext6 = self.rc.getDBContext(cached=False) all_cols_qry = """Select name from sys.columns where object_id = OBJECT_ID('{tablename}')""".format( tablename=self.output_table) all_cols = dbcontext6.execute(all_cols_qry) all_cols = [each[0].encode('ascii').upper() for each in all_cols] drop_cols = [ each for each in all_cols if each not in self.keep_columns ] for each in drop_cols: DROP_QUERY = "ALTER TABLE {tablename} DROP COLUMN {columnname}".format( tablename=self.output_table, columnname=each) print DROP_QUERY dbcontext6.executeNoResults(DROP_QUERY) dbcontext6.close() if __name__ == '__main__': runcontext = SuiteContext('unittest') dbcontext = runcontext.getDBContext(cached=False) sm = SasMerge(dbcontext=dbcontext, runcontext=runcontext, merge_table_1="Employee_merge_1", merge_table_2="Employee_merge_2", mergeids=["Emp_Num"], drop_columns=[], keep_columns=['EMP_AGE'], output_table="Merge_Output_table") sm.process()
dbcontext.executeNoResults( "sp_RENAME 'SCHOOL_ASCII.GRADE_RECODED','GRADE','COLUMN'") dbcontext.executeNoResults( "sp_RENAME 'SCHOOL_ASCII.GRADE_R_RECODED','RGRADE','COLUMN'") @time_taken def populate_school_intervention(dbcontext): print " Create SCHOOL_INTERVENTION_ASCII" drop_table_if_exists('SCHOOL_INTERVENTION_ASCII', dbcontext) dbcontext.executeNoResults(sqls['SCHOOL_INTERVENTION']) dbcontext.executeNoResults( "ALTER TABLE SCHOOL_INTERVENTION_ASCII DROP COLUMN DCRXID") dbcontext.executeNoResults( "sp_RENAME 'SCHOOL_INTERVENTION_ASCII.DCRXID_RECODED','DCRXID','COLUMN'" ) if __name__ == '__main__': runcontext = SuiteContext('sharedtest') dbcontext = runcontext.getDBContext() filename_additional_label = 'C:\Projects\OGT_S12\OGT Spring 2012 Addresses & Labels 4_24_2012.xlsx' filename_size_selection = 'C:\Projects\OGT_S12\OGT Spring 2012 Addresses & Labels 4_24_2012.xlsx' starttime = time.time() #ac = AsciiWriter(dbcontext = dbcontext,runcontext = runcontext, filename_additional_label = filename_additional_label, filename_size_selection = filename_size_selection) ac = AsciiWriter(dbcontext=dbcontext, runcontext=runcontext) ac.process() endtime = time.time() print 'TOTAL TIME TAKEN = ', endtime - starttime
class Test(unittest.TestCase): def setUp(self): self.runContext = SuiteContext('unittest') self.db_context = self.runContext.getDBContext(tag='unittest') clear_all(self.db_context) self.reader = SafeExcelReader(self.runContext) self.reader.db_context = self.db_context self.testDataDir = self.runContext.tests_safe_dir def testXLS(self): self.reader.filename = os.path.join(self.testDataDir, _XLS_FILE) self.reader.sheetName = "Data1" self.reader.outputTable = "Data1" self.reader.createTable() table_spec = self.db_context.getTableSpec('Data1') primary_key = table_spec.primary_key self.assertEquals(len(primary_key), 1) self.assertEquals(primary_key[0].field_name, '[import_order]') def testConstructor(self): reader = SafeExcelReader(self.runContext, filename=os.path.join(self.testDataDir, _XLS_FILE), sheet_name='Data1', db_context=self.runContext.getDBContext(), output_table='Temp1', get_names=True, delimiter=',', import_order='import_order') reader.createTable() for name in get_table_names(self.db_context): self.assertEqual( '[temp1]', name, "Found name '{name}' instead of '[temp1]'".format(name=name)) def testIntoPython(self): self.reader.filename = os.path.join(self.testDataDir, _XLS_FILE) self.reader.sheetName = "Data1" rows = [row for row in self.reader.getRows()] self.assertEqual(300, len(rows), 'Expected 300 rows, found %d' % len(rows)) def testCSVIntoDB(self): self.reader.filename = os.path.join(self.testDataDir, _CSV_FILE) self.reader.outputTable = "CSV1" self.reader.scan_all = True self.reader.getNames = True self.reader.delimiter = "|" self.reader.skip = 0 self.reader.range = (0, 0, 100, 1024) self.reader.createTable() table_spec = self.db_context.getTableSpec('CSV1') for col_name in _CSV_COLUMNS: self.assertTrue(col_name in table_spec, "Missing column {}".format(col_name)) self.assertEquals(n_obs(table_spec), 100, "Wrong number of rows in imported data") def test_xlcol(self): letters = map(lambda x: self.reader._xlcol(x), [0, 3, 25]) self.assertTrue(letters == ['A', 'D', 'Z'], 'xlcol - translating numbers to letters - failed') def test_xlcolnumber(self): numbers = map(lambda x: self.reader._xlcolnumber(x), ['A', 'D', 'Z']) self.assertTrue( numbers == [0, 3, 25], 'xlcolnumber - translating letters to numbers - failed')