def run(): ### Run all of our analysis ### # get the data we're going to run the analysis on username = input("Username to DB? ") password = getpass.getpass("Password:"******"connection", username, password) rm.getAllRecords() records = rm.records # run the diagnosis year analysis (this object pertains to CalledRecordDiagnoseYr class) finalRecords = identifyDiagnosisYear.run(records) # run the drugs analysis # records = rm.getDrugRecords() # print("got records") # finalRecords = identifyDrugs.run(rm, records) # run the symptoms analysis # identifySymptoms.run(records, finalRecords) ### output all of our info from all the analysis into a tab-delimited text file ### diagnosisYrStr = "RUID\tDiagnosis Year\r" for record in finalRecords: diagnosisYrStr += str(record.ruid) + "\t" + str(record.diagnosisYr) + "\r" with open("/home/suttons/MSDataAnalysis/output/diagnosisYears.txt", "a") as txtFile: txtFile.write(diagnosisYrStr) drugsStr = "RUID\tDrug\tStart Date\tEnd Date\r" for record in finalRecords: for key in record.drugs: drugsStr += ( str(record.ruid) + "\t" + str(record.drugs[key].name) + "\t" + str(record.drugs[key].startDate) + "\t" + str(record.drugs[key].endDate) + "\r" ) with open("/home/suttons/MSDataAnalysis/output/drugRanges.txt", "a") as txtFile: txtFile.write(drugsStr)
def run(): ### Run all of our analysis ### #get the data we're going to run the analysis on username = input("Username to DB? ") password = getpass.getpass('Password:') rm = RecordsManager("connection", username, password) rm.getAllRecords() records = rm.records #run the diagnosis year analysis (this object pertains to CalledRecordDiagnoseYr class) finalRecords = identifyDiagnosisYear.run(records) #run the drugs analysis #records = rm.getDrugRecords() #print("got records") #finalRecords = identifyDrugs.run(rm, records) #run the symptoms analysis #identifySymptoms.run(records, finalRecords) ### output all of our info from all the analysis into a tab-delimited text file ### diagnosisYrStr = "RUID\tDiagnosis Year\r" for record in finalRecords: diagnosisYrStr += str(record.ruid) + "\t" + str( record.diagnosisYr) + "\r" with open("/home/suttons/MSDataAnalysis/output/diagnosisYears.txt", "a") as txtFile: txtFile.write(diagnosisYrStr) drugsStr = "RUID\tDrug\tStart Date\tEnd Date\r" for record in finalRecords: for key in record.drugs: drugsStr += str(record.ruid) + "\t" + str( record.drugs[key].name) + "\t" + str( record.drugs[key].startDate) + "\t" + str( record.drugs[key].endDate) + "\r" with open("/home/suttons/MSDataAnalysis/output/drugRanges.txt", "a") as txtFile: txtFile.write(drugsStr)
def run(): foundRecords = 0 wordBasedRule = SimpleWordBasedRule("SimpleWordBasedRule") phraseListSearchRule = PhraseListSearchRule("PhraseListSearchRule") contextRule = ContextRule("ContextRule") freqDistRule = FreqDistRule("FreqDistRule") rm = RecordsManager("connection") yearExtractionWorker = yearExtractionWorker() positiveRecords = [] fileName = "phraseListSymptoms.txt" phraseList = [] category = [] categories = 0 with open(fileName) as inputfile: for line in inputfile: line = line.strip() #if line is new category if(line[:3] == '+++' and line[-3:] == '+++'): categories += 1 if(categories != 1): phraseList.append(category) category = [] else: category.append(line) percentageRequired = .10 trainingSetRecords = rm.getTrainingSetRecords("TrainingSet.txt") validationSetRecords = rm.getTrainingSetRecords("ValidationSet.txt") length = len(trainingSetRecords) i = 0 greatestMatched = 0 masterStr = "" trueNegatives = 0 truePositives = 0 falseNegatives = 0 falsePositives = 0 for record in trainingSetRecords: if(record.isPositive): freqDistRule.prep(record.content, record.isPositive) freqDistRule.prepStandardPairs() #By this point we have a standard most common 100 words matchedWordsLimit = int(input("Please enter matched words limit: ")) matchedScoreLimit = int(input("Please enter matched score limit: ")) masterStr = "" for record in validationSetRecords: isPositive = freqDistRule.run(record.content, matchedWordsLimit, matchedScoreLimit) if(isPositive == True and record.isPositive == True): #true positive truePositives += 1 #if isPositive, pull out the diagnosis year recordYr = yearExtractionWorker.work(record.content, record.diagnosisYr) masterStr += str(record.ruid) + " ---> " + str(recordYr) + "/r" if(isPositive == True and record.isPositive == False): #false positive falsePositives += 1 if(isPositive == False and record.isPositive == False): #true negative trueNegatives += 1 if(isPositive == False and record.isPositive == True): #false negative falseNegatives += 1 progress = round((i/length) * 100, 2) print("Progress: " + str(progress) + "%") i += 1 print(" ") print(" ") actualPositives = truePositives + falseNegatives actualNegatives = trueNegatives + falsePositives #print accuracy: (TP + TN)/total accuracy = round(((truePositives + trueNegatives)/length) * 100, 2) print("Accuracy (TP + TN)/total: " + str(accuracy) + "%") #print misclassification rate: (FP + FN)/total misclassificationRate = round(((falsePositives + falseNegatives)/length) * 100, 2) print("Misclassification Rate (FP + FN)/total: " + str(misclassificationRate) + "%") #print true positive rate: TP/actualPositive truePositiveRate = round(((truePositives/actualPositives)) * 100, 2) print("True Positive Rate (TP/actual Positives): " + str(truePositiveRate) + "%") #print false positive rate: FP/actualNegative falsePositiveRate = round(((falsePositives/actualNegatives)) * 100, 2) print("False Positive Rate (FP/actual Negatives): " + str(falsePositiveRate) + "%") #print specificity: TN/actualNegative specificity = round((trueNegatives/actualNegatives) * 100, 2) print("Specificity (TN/actual Negatives): " + str(specificity) + "%") print(" ") print("True Positives: " + str(truePositives)) print("True Negatives: " + str(trueNegatives)) print("False Positives: " + str(falsePositives)) print("False Negatives: " + str(falseNegatives)) f = open("yearOutput.txt", 'w') print(masterStr, file = f)
def run(): foundRecords = 0 wordBasedRule = SimpleWordBasedRule("SimpleWordBasedRule") phraseListSearchRule = PhraseListSearchRule("PhraseListSearchRule") contextRule = ContextRule("ContextRule") freqDistRule = FreqDistRule("FreqDistRule") rm = RecordsManager("connection") yearExtractionWorker = yearExtractionWorker() positiveRecords = [] fileName = "phraseListSymptoms.txt" phraseList = [] category = [] categories = 0 with open(fileName) as inputfile: for line in inputfile: line = line.strip() #if line is new category if (line[:3] == '+++' and line[-3:] == '+++'): categories += 1 if (categories != 1): phraseList.append(category) category = [] else: category.append(line) percentageRequired = .10 trainingSetRecords = rm.getTrainingSetRecords("TrainingSet.txt") validationSetRecords = rm.getTrainingSetRecords("ValidationSet.txt") length = len(trainingSetRecords) i = 0 greatestMatched = 0 masterStr = "" trueNegatives = 0 truePositives = 0 falseNegatives = 0 falsePositives = 0 for record in trainingSetRecords: if (record.isPositive): freqDistRule.prep(record.content, record.isPositive) freqDistRule.prepStandardPairs() #By this point we have a standard most common 100 words matchedWordsLimit = int(input("Please enter matched words limit: ")) matchedScoreLimit = int(input("Please enter matched score limit: ")) masterStr = "" for record in validationSetRecords: isPositive = freqDistRule.run(record.content, matchedWordsLimit, matchedScoreLimit) if (isPositive == True and record.isPositive == True): #true positive truePositives += 1 #if isPositive, pull out the diagnosis year recordYr = yearExtractionWorker.work(record.content, record.diagnosisYr) masterStr += str(record.ruid) + " ---> " + str(recordYr) + "/r" if (isPositive == True and record.isPositive == False): #false positive falsePositives += 1 if (isPositive == False and record.isPositive == False): #true negative trueNegatives += 1 if (isPositive == False and record.isPositive == True): #false negative falseNegatives += 1 progress = round((i / length) * 100, 2) print("Progress: " + str(progress) + "%") i += 1 print(" ") print(" ") actualPositives = truePositives + falseNegatives actualNegatives = trueNegatives + falsePositives #print accuracy: (TP + TN)/total accuracy = round(((truePositives + trueNegatives) / length) * 100, 2) print("Accuracy (TP + TN)/total: " + str(accuracy) + "%") #print misclassification rate: (FP + FN)/total misclassificationRate = round( ((falsePositives + falseNegatives) / length) * 100, 2) print("Misclassification Rate (FP + FN)/total: " + str(misclassificationRate) + "%") #print true positive rate: TP/actualPositive truePositiveRate = round(((truePositives / actualPositives)) * 100, 2) print("True Positive Rate (TP/actual Positives): " + str(truePositiveRate) + "%") #print false positive rate: FP/actualNegative falsePositiveRate = round(((falsePositives / actualNegatives)) * 100, 2) print("False Positive Rate (FP/actual Negatives): " + str(falsePositiveRate) + "%") #print specificity: TN/actualNegative specificity = round((trueNegatives / actualNegatives) * 100, 2) print("Specificity (TN/actual Negatives): " + str(specificity) + "%") print(" ") print("True Positives: " + str(truePositives)) print("True Negatives: " + str(trueNegatives)) print("False Positives: " + str(falsePositives)) print("False Negatives: " + str(falseNegatives)) f = open("yearOutput.txt", 'w') print(masterStr, file=f)