def test_insertFile_dateParsing(self): # Create a test data file to insert, and verify no errors DBUtil.runDBScript( self.SCRIPT_FILE, False ) # Assume this works based on test_runDBScript method tableName = "TestTypes" columnNames = ["MyInteger","MyText","MyDateTime"] dataFile = StringIO() dataFile.write('''-1\t"12/11/2010"\t"12/11/2010"\n'''); dataFile.write('''-2\t"2013-04-15 13:45:21"\t"2013-04-15 13:45:21"\n'''); dataFile.write('''-3\t"2003-04-15 10:45:21"\t"2003-04-15 10:45:21"\n'''); dataFile.write('''-4\t"4/11/12 6:20"\t"4/11/12 6:20"\n'''); dataFile = StringIO(dataFile.getvalue()) dateColFormats = {"myDateTime":None} # Deliberately change capitalization to ensure robustness DBUtil.insertFile( dataFile, tableName, columnNames, dateColFormats=dateColFormats) verifyQuery = \ """select MyInteger, MyText, MyDateTime from TestTypes where MyInteger < 0 order by MyInteger desc """; expectedData = \ [ [ -1, "12/11/2010", datetime(2010,12,11) ], [ -2, "2013-04-15 13:45:21", datetime(2013,4,15,13,45,21) ], [ -3, "2003-04-15 10:45:21", datetime(2003,4,15,10,45,21) ], [ -4, "4/11/12 6:20", datetime(2012,4,11,6,20) ], ]; # Verify rows inserted with properly parsed dates results = DBUtil.execute(verifyQuery); self.assertEqual( expectedData, results );
def test_insertFile_skipErrors(self): # Similar to test_insertFile_commandline, but just test to see if skipErrors option works # Test run will show errror / warning messages from the app, but these are expected DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" columnNames = ["MyReal", "MyYesNo", "MyText", "MyInteger"] idFile = StringIO() # Try with bogus data that should generate errors dataFile = StringIO() dataFile.write("ABCD\tPositive\tBadTest\t100.123\n") dataFile.write("700.7\t" + FALSE_STR + "\tXTest\t777\n") dataFile.write("1,099\tNegative\tMoBadTest\tfoo\n") dataFile = StringIO(dataFile.getvalue()) idFile = StringIO() origStdin = sys.stdin origStdout = sys.stdout sys.stdin = dataFile sys.stdout = idFile argv = ["DBUtil.py", "-i-", "-t" + tableName, "-o-"] argv.extend(columnNames) expectErr = True actualErr = False try: DBUtil.main(argv) except Exception, err: actualErr = True
def test_insertFile_escapeStrings(self): # Create a test data file to insert, and verify no errors DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" columnNames = ["MyInteger", "MyText"] dataFile = StringIO() dataFile.write('''-1\t"A"\n''') dataFile.write('''-2\t"B\xaeb"\n''') dataFile.write('''-3\t"C"\n''') dataFile.write('''-4\tD\n''') dataFile = StringIO(dataFile.getvalue()) DBUtil.insertFile(dataFile, tableName, columnNames, escapeStrings=True) verifyQuery = \ """select MyInteger, MyText from TestTypes where MyInteger < 0 order by MyInteger desc """ expectedData = \ [ [ -1, "A"], [ -2, u"B\\xaeb"], [ -3, "C"], [ -4, "D"], ] # Verify rows inserted with properly parsed dates results = DBUtil.execute(verifyQuery) self.assertEqual(expectedData, results)
def build_clinical_item_psql_schemata(): schemata_dir = ClinicalItemDataLoader.fetch_psql_schemata_dir() for psql_table in ClinicalItemDataLoader.SQL_TABLES: schema_file_name = '%s.schema.sql' % psql_table schema_path = os.path.join(schemata_dir, schema_file_name) with open(schema_path, 'r') as schema_file: DBUtil.runDBScript(schema_file)
def test_nullCheck(self): DBUtil.runDBScript(self.SCRIPT_FILE, False) conn = DBUtil.connection() try: DBUtil.execute( "insert into TestTypes (MyText,MyInteger) values ('Test With Null', 255)", conn=conn) DBUtil.execute( "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime) values ('Test With Not Null', 255, 1.23, '2005-03-06')", conn=conn) result = DBUtil.execute( "select MyText from TestTypes where MyInteger = 255 and MyReal is null", conn=conn) self.assertEqual('Test With Null', result[0][0]) result = DBUtil.execute( "select MyText from TestTypes where MyInteger = 255 and MyReal is not null", conn=conn) self.assertEqual('Test With Not Null', result[0][0]) # Would not work with MySQL if used TIMESTAMP data type. Should be DATETIME. (TIMESTAMP tries to auto-fill values, so no nulls allowed?) result = DBUtil.execute( "select MyText from TestTypes where MyInteger = 255 and MyDateTime is null", conn=conn) self.assertEqual('Test With Null', result[0][0]) result = DBUtil.execute( "select MyText from TestTypes where MyInteger = 255 and MyDateTime is not null", conn=conn) self.assertEqual('Test With Not Null', result[0][0]) finally: conn.close()
def test_deleteRows(self): DBUtil.runDBScript(self.SCRIPT_FILE, False) query = "select count(*) from TestTypes;" # Insert some test data to delete tableName = "TestTypes" columnNames = self.DATA_COLS.split() idFile = StringIO() DBUtil.insertFile(self.DATA_FILE, tableName, columnNames, None, idFile) idValues = idFile.getvalue().split() # Count up rows before and after delete initialCount = DBUtil.execute(query)[0][0] DBUtil.deleteRows("TestTypes", idValues) afterCount = DBUtil.execute(query)[0][0] self.assertEqual(initialCount - len(idValues), afterCount) # Reinsert the test data to try deleting them by a non-default Id column idFile = StringIO() DBUtil.insertFile(self.DATA_FILE, tableName, columnNames, None, idFile) nonDefaultIds = [100, 200] initialCount = DBUtil.execute(query)[0][0] DBUtil.deleteRows("TestTypes", nonDefaultIds, "MyInteger") afterCount = DBUtil.execute(query)[0][0]
def build_clinical_item_indices(): indices_dir = ClinicalItemDataLoader.fetch_psql_indices_dir() for psql_table in ClinicalItemDataLoader.SQL_TABLES: indices_file_name = '%s.indices.sql' % psql_table indices_path = os.path.join(indices_dir, indices_file_name) if os.path.exists(indices_path): with open(indices_path, 'r') as indices_file: DBUtil.runDBScript(indices_file)
def test_findOrInsertItem(self): DBUtil.runDBScript( self.SCRIPT_FILE, False ) searchDict = {} insertDict = {} searchDict["TestTypes_id"] = +123 log.debug("Insert a new item using default params") (data,isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict) self.assertEqual(+123,data) self.assertEqual(True,isNew) log.debug("Find the existing item") (data,isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict) self.assertEqual(+123,data) self.assertEqual(False,isNew) insertDict["TestTypes_id"] = +456 log.debug("Find existing item, with optional insert data") (data,isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict) self.assertEqual(+123,data) self.assertEqual(False,isNew) searchDict["TestTypes_id"] = +789 insertDict["TestTypes_id"] = +789 insertDict["MyInteger"] = 123 log.debug("Insert a new item with actual data") (data,isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict) self.assertEqual(+789,data) self.assertEqual(True,isNew) searchDict["TestTypes_id"] = +234 insertDict["TestTypes_id"] = +234 log.debug("Retrieve a different column") (data,isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText") self.assertEqual(None,data) self.assertEqual(True,isNew) searchDict["TestTypes_id"] = +345 insertDict["TestTypes_id"] = +345 insertDict["MyText"] = "testText" log.debug("Insert and retrieve a different column") (data,isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText") self.assertEqual("testText",data) self.assertEqual(True,isNew) insertDict["MyText"] = "newText"; log.debug("Try inserting a different value under an existing row. Should NOT work") (data,isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText") self.assertEqual("testText",data) self.assertEqual(False,isNew) log.debug("Try inserting a different value under an existing row, but force the update") insertDict["MyText"] = "newText"; (data,isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText", forceUpdate=True) self.assertEqual("newText",data) self.assertEqual(False,isNew)
def test_updateFromFile_commandline(self): # Similar to test_updateFromFile, but from higher-level command-line interface DBUtil.runDBScript( self.SCRIPT_FILE, False ) # Assume this works based on test_runDBScript method # Insert some blank data first to update for idValue in self.ID_DATA: DBUtil.execute("insert into TestTypes ("+self.ID_COL+") values (%s)",(idValue,)); # Negative test case results = DBUtil.execute( self.DATA_QUERY ); self.assertNotEqual( self.DATA_ROWS, results ); # Now do the actual update from the file, but build in column names to data file dataFileWithCols = StringIO(); dataFileWithCols.write(self.DATA_COLS); dataFileWithCols.write(self.DATA_FILE.getvalue()); dataFileWithCols = StringIO(dataFileWithCols.getvalue()); sys.stdin = dataFileWithCols; argv = ["DBUtil.py","-u-","-t"+self.DATA_TABLE,"-d\\t"] DBUtil.main(argv) # Verify positive results results = DBUtil.execute( self.DATA_QUERY ); self.assertEqual( self.DATA_ROWS, results ); ######################################################## # Repeat test but data file will use more than one key column (adding MyText) # Further note that MyText is used as both a key column to look up the row to update # and as a value column to modify dataFileWithCols = StringIO() dataFileWithCols.write("MyInteger\tMyText\tMyText\tMyReal\tMyYesNo\n"); dataFileWithCols.write("100\tATest\tAAA\tNone\t"+TRUE_STR+"\t\n"); dataFileWithCols.write("200\tNone\tBBB\t222.2\tNone\t\n"); dataFileWithCols.write("300\tCTest\tNone\t333.3\t"+TRUE_STR+"\t\n"); dataFileWithCols = StringIO(dataFileWithCols.getvalue()) # Expected results after this update self.DATA_ROWS = []; self.DATA_ROWS.append([100,None, True, "AAA",]); self.DATA_ROWS.append([200,200.2,False, None,]); # This row is unchanged, because one of the key values cannot be found as null self.DATA_ROWS.append([300,333.3,True, None,]); # Negative test case results = DBUtil.execute( self.DATA_QUERY ); self.assertNotEqual( self.DATA_ROWS, results ); # Now do the actual update from the file, but with an extra parameter specifying 2 key columns sys.stdin = dataFileWithCols; argv = ["DBUtil.py","-u-","-t"+self.DATA_TABLE,"-n2"] DBUtil.main(argv) # Verify positive results results = DBUtil.execute( self.DATA_QUERY ); self.assertEqual( self.DATA_ROWS, results );
def build_stride_psql_schemata(): schemata_dir = StrideLoader.fetch_psql_schemata_dir() for params in list(STRIDE_LOADER_PARAMS.values()): psql_table = params['psql_table'] % TABLE_PREFIX log.debug('loading %s schema...' % psql_table) # Open file, feed to DBUtil, and close file. schema_file_name = '.'.join([psql_table, 'schema.sql']) schema_file_path = os.path.join(schemata_dir, schema_file_name) schema_file = open(schema_file_path, 'r') DBUtil.runDBScript(schema_file) schema_file.close()
def build_stride_psql_indices(): indices_dir = StrideLoader.fetch_psql_indices_dir() for params in list(STRIDE_LOADER_PARAMS.values()): psql_table = params['psql_table'] % TABLE_PREFIX # Open file, feed to DBUtil, and close file. indices_file_name = '.'.join([psql_table, 'indices.sql']) indices_file_path = os.path.join(indices_dir, indices_file_name) if os.path.exists(indices_file_path): log.debug('loading %s indices...' % psql_table) indices_file = open(indices_file_path, 'r') DBUtil.runDBScript(indices_file) indices_file.close()
def test_insertFile(self): # Create a test data file to insert, and verify no errors DBUtil.runDBScript( self.SCRIPT_FILE, False ) # Assume this works based on test_runDBScript method tableName = "TestTypes" idFile = StringIO() DBUtil.insertFile( self.MULTI_LINE_DATA_FILE, tableName, None, "\t", idFile ); # Assume column names extracted from first row of data file # Verify number rows inserted self.assertEqual( len(self.MULTI_LINE_DATA_ROWS), idFile.getvalue().count("\n") ) results = DBUtil.execute( self.DATA_QUERY ); self.assertEqual( self.MULTI_LINE_DATA_ROWS, results );
def _getClinicalItems(self): query = SQLQuery() # SELECT query.addSelect(CLINICAL_ITEM_ID) query.addSelect('name') # FROM query.addFrom('clinical_item') print query print query.getParams() DBUtil.runDBScript(self.SCRIPT_FILE, False) results = DBUtil.execute(str(query), query.getParams()) pd.DataFrame(results, columns=query.select).to_csv('clinical_items.csv', index=False)
def xtest_incColNamesAndTypeCodes(self): # Skip this test for now. Different DB connector modules # handle type codes differently (numbers, collections of numbers, objects containing collections) # Not using that feature for any function currently anyway DBUtil.runDBScript( self.SCRIPT_FILE, False ) cols = ["testtypes_id", "mytext", "myinteger", "myreal", "mydatetime", "myyesno"]; types= [ROWID, STRING, NUMBER, NUMBER, DATETIME, BOOLEAN]; query = "select %s from TestTypes;" % str.join(",",cols); results = DBUtil.execute( query, includeColumnNames=True, incTypeCodes=True ); self.assertEqual( cols, results[0] ); for verifyCode, typeCode in zip(types, results[1]): print verifyCode, typeCode
def test_runDBScript(self): # Just run a DB Script and make sure no ProgrammingErrors are raised. DBUtil.runDBScript( self.SCRIPT_FILE, False ) # Run some other commands to see if scripts produced expected results results = DBUtil.execute("select * from TestTypes where MyInteger > %s", (200,)) self.assertEqual( 2, len(results) ) # Test extra "includeColumnNames" parameter results = DBUtil.execute("select TestTypes_id,MyText,MyInteger,MyReal,MyDateTime,MyYesNo from TestTypes where MyInteger < %s", (100,), True) expected = ["TestTypes_id","MyText","MyInteger","MyReal","MyDateTime","MyYesNo"] for iCol in range(len(expected)): # Ignore case for comparison expected[iCol] = expected[iCol].lower(); results[0][iCol] = results[0][iCol].lower(); self.assertEqual( expected, results[0] ) self.assertEqual( 0, len(results)-1 )
def _getNonNullBaseNames(self): query = SQLQuery() # SELECT query.addSelect(BASE_NAME) query.addSelect('max_result_flag') # FROM query.addFrom('order_result_stat') # WHERE query.addWhere('max_result_flag is not null') print query print query.getParams() DBUtil.runDBScript(self.SCRIPT_FILE, False) results = DBUtil.execute(str(query), query.getParams()) pd.DataFrame(results, columns=query.select).to_csv('base_names.csv', index=False)
def test_updateFromFile(self): # Create a test data file to insert, and verify no errors DBUtil.runDBScript( self.SCRIPT_FILE, False ) # Assume this works based on test_runDBScript method # Insert some blank data first to update for idValue in self.ID_DATA: DBUtil.execute("insert into TestTypes ("+self.ID_COL+") values (%s)",(idValue,)); # Negative test case results = DBUtil.execute( self.DATA_QUERY ); self.assertNotEqual( self.DATA_ROWS, results ); # Now do the actual update from the file DBUtil.updateFromFile( self.DATA_FILE, self.DATA_TABLE, self.COL_NAMES, delim="\t" ); results = DBUtil.execute( self.DATA_QUERY ); self.assertEqual( self.DATA_ROWS, results );
def _getClinicalItemCounts(self): query = SQLQuery() # SELECT query.addSelect(CLINICAL_ITEM_ID) query.addSelect('COUNT(' + CLINICAL_ITEM_ID + ') as total') # FROM query.addFrom('patient_item') # OTHER query.addGroupBy(CLINICAL_ITEM_ID) query.addOrderBy('total', dir='desc') print(query) print(query.getParams()) DBUtil.runDBScript(self.SCRIPT_FILE, False) results = DBUtil.execute(str(query), query.getParams()) pd.DataFrame(results, columns=[CLINICAL_ITEM_ID, 'count']).to_csv('data_summary_stats/item_counts.csv', index=False)
def test_identityQuery(self): DBUtil.runDBScript( self.SCRIPT_FILE, False ) # Run some other commands to see if scripts produced expected results results = DBUtil.execute("select max(TestTypes_id) from TestTypes"); lastSeq = results[0][0] conn = DBUtil.connection() try: cur = conn.cursor() cur.execute("insert into TestTypes (MyText,MyInteger,MyYesNo) values ('Another',255,True)") cur.execute( DBUtil.identityQuery("TestTypes") ) self.assertEqual( lastSeq+1, cur.fetchone()[0] ) cur.execute("select TestTypes_id from TestTypes where MyText = 'Another' and MyInteger = 255") self.assertEqual( lastSeq+1, cur.fetchone()[0] ) finally: conn.close()
def _getLabs(self): query = SQLQuery() # SELECT query.addSelect(CLINICAL_ITEM_ID) query.addSelect('name') query.addSelect('description') # FROM query.addFrom('clinical_item') # WHERE query.addWhereLike('name','LAB%') # OTHER query.addOrderBy(CLINICAL_ITEM_ID, dir='asc') print(query) print(query.getParams()) DBUtil.runDBScript(self.SCRIPT_FILE, False) results = DBUtil.execute(str(query), query.getParams()) pd.DataFrame(results, columns=query.select).to_csv('data_summary_stats/labs.csv', index=False)
def test_execute_commandline(self): # Run basic executes for both an update and a select query, but # using the higher-level command-line "main" method interface DBUtil.runDBScript( self.SCRIPT_FILE, False ) # Assume this works based on test_runDBScript method origStdout = sys.stdout sys.stdout = StringIO() argv = ["DBUtil.py","select count(*) from TestTypes where MyInteger > 200","-"] DBUtil.main(argv) self.assertEqual( 2, int(sys.stdout.getvalue()) ) sys.stdout = origStdout origStdout = sys.stdout sys.stdout = StringIO() argv = ["DBUtil.py","insert into TestTypes (MyText,MyInteger,MyYesNo) values ('Another',255,True)","-"] DBUtil.main(argv) #self.assertEqual( 1, int(sys.stdout.getvalue()) ) sys.stdout = origStdout origStdout = sys.stdout sys.stdout = StringIO() argv = ["DBUtil.py","select count(*) from TestTypes where MyInteger > 200","-"] DBUtil.main(argv) self.assertEqual( 3, int(sys.stdout.getvalue()) ) sys.stdout = origStdout # Different test, includeColumnNames origStdout = sys.stdout sys.stdout = StringIO() argv = ["DBUtil.py","-c","select TestTypes_id,MyText,MyInteger,MyReal,MyDateTime,MyYesNo from TestTypes where MyInteger > 200 and MyYesNo = True","-"] DBUtil.main(argv) sampleLines = sys.stdout.getvalue().split("\n") expected = ["TestTypes_id","MyText","MyInteger","MyReal","MyDateTime","MyYesNo"] sampleColumns = sampleLines[0].split() for iCol in range(len(expected)): # Case-insensitive comparison expected[iCol] = expected[iCol].lower() sampleColumns[iCol] = sampleColumns[iCol].lower(); for iCol, col in enumerate(sampleColumns): self.assertEqual(expected[iCol],col) self.assertEqual( 2+1+1, len(sampleLines) ) # 2 data lines + 1 column name line + 1 newline at end of output sys.stdout = origStdout
def test_insertFile_commandline(self): # Similar to test_insertFile, but from higher-level command-line interface DBUtil.runDBScript( self.SCRIPT_FILE, False ) # Assume this works based on test_runDBScript method tableName = "TestTypes" columnNames = self.DATA_COLS.split(); idFile = StringIO() # Slightly different test, specify tab as delimiter, not just any whitespace origStdin = sys.stdin origStdout = sys.stdout sys.stdin = self.MULTI_LINE_DATA_FILE sys.stdout = idFile argv = ["DBUtil.py","-i-","-d\\t","-t"+tableName,"-o-"] DBUtil.main(argv) sys.stdout = origStdout sys.stdin = origStdin self.assertEqual( 3, idFile.getvalue().count("\n") ) results = DBUtil.execute( self.DATA_QUERY ); self.assertEqual( self.MULTI_LINE_DATA_ROWS, results );
def test_insertFile_skipErrors(self): # Similar to test_insertFile_commandline, but just test to see if skipErrors option works # Test run will show errror / warning messages from the app, but these are expected DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" columnNames = ["MyReal", "MyYesNo", "MyText", "MyInteger"] idFile = StringIO() # Try with bogus data that should generate errors dataFile = StringIO() dataFile.write("ABCD\tPositive\tBadTest\t100.123\n") dataFile.write("700.7\t" + FALSE_STR + "\tXTest\t777\n") dataFile.write("1,099\tNegative\tMoBadTest\tfoo\n") dataFile = StringIO(dataFile.getvalue()) idFile = StringIO() origStdin = sys.stdin origStdout = sys.stdout sys.stdin = dataFile sys.stdout = idFile argv = ["DBUtil.py", "-i-", "-t" + tableName, "-o-"] argv.extend(columnNames) expectErr = True actualErr = False try: DBUtil.main(argv) except Exception as err: actualErr = True self.assertEqual(expectErr, actualErr) sys.stdout = origStdout sys.stdin = origStdin # Expect no rows succesffuly inserted since errors in input self.assertEqual(0, idFile.getvalue().count("\n")) results = DBUtil.execute( "select count(*) from TestTypes where MyText like %s", ("%Test", )) self.assertEqual(0, results[0][0]) # Try again, with bogus data that should generate errors dataFile = StringIO() dataFile.write("ABCD\tPositive\tBadTest\t100.123\n") dataFile.write("700.7\t" + FALSE_STR + "\tXTest\t777\n") dataFile.write("1,099\tNegative\tMoBadTest\tfoo\n") dataFile = StringIO(dataFile.getvalue()) idFile = StringIO() origStdin = sys.stdin origStdout = sys.stdout sys.stdin = dataFile sys.stdout = idFile argv = ["DBUtil.py", "-i-", "-t" + tableName, "-o-", "-e"] # -e option skipsErrors argv.extend(columnNames) DBUtil.main(argv) sys.stdout = origStdout sys.stdin = origStdin # Still expect 1 row to get through successfuly, despite other invalid input self.assertEqual(1, idFile.getvalue().count("\n")) results = DBUtil.execute( "select count(*) from TestTypes where MyText like %s", ("%Test", )) self.assertEqual(1, results[0][0])
# Join DATA_QUERY.addFrom("patient_item") DATA_QUERY.addJoin( "clinical_item", "patient_item.clinical_item_id = clinical_item.clinical_item_id", joinType="INNER") DATA_QUERY.addWhereEqual("clinical_item_category_id = 161 AND description", "Tt Med Univ (Primary)") # Everyone #DATA_QUERY.addWhereEqual("clinical_item_category_id = 161 AND description", "Tt Pamf Med (Primary)") # Expert DATA_QUERY.addOrderBy("patient_id", dir="ASC") print(DATA_QUERY) # Write out data to CSV DBUtil.runDBScript(SCRIPT_FILE, False) results = DBUtil.execute(DATA_QUERY) unique_patient_ids = {} # output = open("/Users/jwang/Desktop/expert.csv", "w") # outlist = open("/Users/jwang/Desktop/expert_list.csv", "w") output = open("/Users/jwang/Desktop/everyone.csv", "w") #includes experts + trainees who are providing patient care outlist = open("/Users/jwang/Desktop/everyone_list.csv", "w") output.write("patient_id,clinical_item_category_id,name,description\n") #count = 0 for line in results: if (line[0] not in list(