def MultiFileBulkDetect(filesDict, noiseDict, featDict, preprocDict, enhanceDict, classDict, \ sampleStart, sampleEnd, selectedData, sameFile, updateGSpread=False): k = 0 threadList = [] detectFileRowIndexDict = {} detectFileAccListDict = {} if (updateGSpread == True): ##Connect to google spreadsheet #gc = gspread.login(GSC.email, GSC.password) json_key = json.load( open('/tmp/mozilla_root0/BCIWheelChair2018-eef792bad9a5.json')) scope = ['https://spreadsheets.google.com/feeds'] credentials = SignedJwtAssertionCredentials( json_key['client_email'], json_key['private_key'].encode(), scope) gc = gspread.authorize(credentials) sh = gc.open(GSC.title) worksheet = sh.worksheet(GSC.sheet_title) print "\r\nDebug" print "------" print filesDict print filesDict.items() print "\r\n" for tfItem, tfValue in filesDict.items(): ## given a train file, and dict methods, detect files, loop on each and do magic! print "Training File: " + tfItem.split("/")[-1] print "************" # reset to the first column i = 0 tfDesc = TFC.getDescription(tfItem) tfName = TFC.getName(tfItem) if (updateGSpread == True): rowIndex = GSPU.getEmptyRowIndex(range(1, 10)) j = 0 worksheet.update_cell(rowIndex, 3, tfDesc) worksheet.update_cell(rowIndex, 2, tfName) accumelatedAcc = [] print "Creating Detection Paths:" print "-----------------------" for noiseItem, noiseValue in noiseDict.items(): #TODO: remove this hack! a hack till we propagate the noise removal to be an unified variable(enum or struct) throughout all the code! wrappingNoiseValue = False if (noiseItem == "Remove") & (noiseValue & True): wrappingNoiseValue = True elif (noiseItem == "Raw") & (noiseValue & True): wrappingNoiseValue = False for preprocItem, preprocValue in preprocDict.items(): #get feature dictionary for featItem, featValue in featDict.items(): for enhanceItem, enhanceValue in enhanceDict.items(): for classItem, classValue in classDict.items(): # TODO: when we support separate training from the detection, do the training here and detect for each file by itself # that would speed the process too much if we've multiple detection sessions for the same training session if (BulkUtilities.CheckTrainingFile( tfItem, tfValue, False) == False): continue #df are stored as lists for dfItem in tfValue: print "Detection File: " + dfItem.split( "/")[-1] print "************" # use rowIndex if (updateGSpread == True): if dfItem + tfItem in detectFileRowIndexDict: # a recurrent detection file print "---- test ----" print "---- recurrent ----" print dfItem print tfItem print rowIndex print "---- test ----" rowIndex = detectFileRowIndexDict[ dfItem + tfItem] else: # a new detection file ^_^ rowIndex = GSPU.getEmptyRowIndex( range(1, 10)) detectFileRowIndexDict[ dfItem + tfItem] = rowIndex dfDesc = TFC.getDescription(dfItem) dfName = TFC.getName(dfItem) worksheet.update_cell( rowIndex, 8, dfDesc) worksheet.update_cell( rowIndex, 7, dfName) print "---- test ----" print "---- first timer ----" print dfItem print tfItem print rowIndex print "---- test ----" Path = "Path " + str( i ) + ": " + noiseItem + ", " + featItem + ", " + preprocItem + ", " + enhanceItem + ", " + classItem print Path print classItem print classValue #note the program behaves strangely when threads aren't closed :| # how: for changing second value ->> readDataThread won't "always" recieve the right dfItem, but rather a cached copy of it # may be the reason we didn't wait for the thread after getAcc! thread = readDataThread(tfItem, dfItem, wrappingNoiseValue, sampleStart, sampleEnd, \ featValue, preprocValue, enhanceValue, classValue, \ False, selectedData, sameFile, True) thread.start() thread.wait() if (updateGSpread == True): Acc = thread.getAcc() thread.wait() if dfItem + tfItem in detectFileAccListDict: detectFileAccListDict[ dfItem + tfItem].append(Acc) else: detectFileAccListDict[dfItem + tfItem] = [] detectFileAccListDict[ dfItem + tfItem].append(Acc) #Write the path description and it's accuracy worksheet.update_cell( rowIndex + 1, 9 + j, Path) worksheet.update_cell( rowIndex + 1, 10 + j, Acc) #Array to hold accurcies of all paths #accumelatedAcc.append(Acc) thread.exit() if (updateGSpread): # move to the next 2 columns once finished all the detection files j += 2 # get into the next column i += 1 # for each Training File if ((updateGSpread == True) & (i > 0)): #Get the Min, Max and avrg accuracy then write them for tfItem, tfValue in filesDict.items(): for dfItem in tfValue: accumelatedAcc = detectFileAccListDict[dfItem + tfItem] rowIndex = detectFileRowIndexDict[dfItem + tfItem] mySorted = sorted(accumelatedAcc) worksheet.update_cell(rowIndex, 4, mySorted[0]) print "i is " + str(i) print mySorted print detectFileAccListDict print detectFileAccListDict.items print "Rania is here" worksheet.update_cell(rowIndex, 5, mySorted[i - 1]) temp = 0 for k in range(0, i): temp = temp + accumelatedAcc[k] avrg = temp / len(accumelatedAcc) worksheet.update_cell(rowIndex, 6, avrg) print "-----------------------" print "Finished bulk detection"
def BulkEightyTweenty( filesDict, noiseDict, featDict, preprocDict, enhanceDict, classDict, sampleStart, sampleEnd, selectedData, updateGSpread=False, ): threadList = [] offsetDict = {} # TODO: change to enums offsetDict["All"] = False offsetDict["off0"] = False offsetDict["off1"] = False offsetDict["off2"] = False offsetDict["off3"] = False offsetDict["off4"] = False if updateGSpread == True: ##Connect to google spreadsheet gc = gspread.login(GSC.email, GSC.password) sh = gc.open(GSC.title) worksheet = sh.worksheet(GSC.sheet_title) for offsetItem, offsetValue in offsetDict.items(): print "Detection Offset: " + str(offsetItem) print "************" i = 0 if updateGSpread == True: rowIndex = GSPU.getEmptyRowIndex(range(1, 10)) for tfItem, tfValue in filesDict.items(): ## given a train file, and dict methods, detect files, loop on each and do magic! print "Training File: " + tfItem print "************" desc = str(TrainingFileClass.getDescription(tfItem)) + " " + str(offsetItem) name = TrainingFileClass.getName(tfItem) if updateGSpread == True: j = 0 worksheet.update_cell(rowIndex, 3, desc) worksheet.update_cell(rowIndex, 2, name) accumelatedAcc = [] print "Creating Detection Paths:" print "-----------------------" for noiseItem, noiseValue in noiseDict.items(): # TODO: remove this hack! a hack till we propagate the noise removal to be an unified variable(enum or struct) throughout all the code! wrappingNoiseValue = False if (noiseItem == "Remove") & (noiseValue & True): wrappingNoiseValue = True elif (noiseItem == "Raw") & (noiseValue & True): wrappingNoiseValue = False for preprocItem, preprocValue in preprocDict.items(): # get feature dictionary for featItem, featValue in featDict.items(): for enhanceItem, enhanceValue in enhanceDict.items(): for classItem, classValue in classDict.items(): # if we can separate the training from the detection, do the training here and detect for each file by itself # that would speed the process too much if we've multiple detection sessions for the same training session offsetDict = CrossValidationUtilities.SelectSingleDataOffset(offsetDict, offsetItem) Path = ( "Path " + str(i) + ": " + noiseItem + ", " + featItem + ", " + preprocItem + ", " + enhanceItem + ", " + classItem + ", " + str(offsetItem) ) print Path thread = readDataThread( tfItem, tfItem, wrappingNoiseValue, sampleStart, sampleEnd, featValue, preprocValue, enhanceValue, classValue, False, offsetDict, True, True, ) thread.start() thread.wait() if updateGSpread == True: Acc = thread.getAcc() thread.wait() # Write the path description and it's accuracy worksheet.update_cell(rowIndex, 7 + j, Path) worksheet.update_cell(rowIndex, 8 + j, Acc) # Array to hold accurcies of all paths accumelatedAcc.append(Acc) j += 2 thread.exit() i += 1 if (updateGSpread == True) & (i > 0): # Get the Min, Max and avrg accuracy then write them mySorted = sorted(accumelatedAcc) worksheet.update_cell(rowIndex, 4, mySorted[0]) worksheet.update_cell(rowIndex, 5, mySorted[i - 1]) temp = 0 for k in range(0, i): temp = temp + accumelatedAcc[k] avrg = temp / len(accumelatedAcc) worksheet.update_cell(rowIndex, 6, avrg) print "-----------------------" print "Finished bulk detection"
def MultiFileBulkDetect(filesDict, noiseDict, featDict, preprocDict, enhanceDict, classDict, \ sampleStart, sampleEnd, selectedData, sameFile, updateGSpread=False): k = 0 threadList = [] detectFileRowIndexDict = {} detectFileAccListDict = {} if (updateGSpread == True): ##Connect to google spreadsheet gc = gspread.login(GSC.email, GSC.password) sh = gc.open(GSC.title) worksheet = sh.worksheet(GSC.sheet_title) print "\r\nDebug" print "------" print filesDict print filesDict.items() print "\r\n" for tfItem, tfValue in filesDict.items(): ## given a train file, and dict methods, detect files, loop on each and do magic! print "Training File: " + tfItem print "************" # reset to the first column i = 0 tfDesc = TFC.getDescription(tfItem) tfName = TFC.getName(tfItem) if (updateGSpread == True): rowIndex = GSPU.getEmptyRowIndex(range(1, 10)) j = 0 worksheet.update_cell(rowIndex, 3, tfDesc) worksheet.update_cell(rowIndex, 2, tfName) accumelatedAcc = [] print "Creating Detection Paths:" print "-----------------------" for noiseItem, noiseValue in noiseDict.items(): #TODO: remove this hack! a hack till we propagate the noise removal to be an unified variable(enum or struct) throughout all the code! wrappingNoiseValue = False if (noiseItem == "Remove") & (noiseValue & True): wrappingNoiseValue = True elif (noiseItem == "Raw") & (noiseValue & True): wrappingNoiseValue = False for preprocItem, preprocValue in preprocDict.items(): #get feature dictionary for featItem, featValue in featDict.items(): for enhanceItem, enhanceValue in enhanceDict.items(): for classItem, classValue in classDict.items(): # TODO: when we support separate training from the detection, do the training here and detect for each file by itself # that would speed the process too much if we've multiple detection sessions for the same training session if (BulkUtilities.CheckTrainingFile(tfItem, tfValue, False) == False): continue #df are stored as lists for dfItem in tfValue: print "Detection File: " + dfItem print "************" # use rowIndex if (updateGSpread == True): if dfItem + tfItem in detectFileRowIndexDict: # a recurrent detection file print "---- test ----" print "---- recurrent ----" print dfItem print tfItem print rowIndex print "---- test ----" rowIndex = detectFileRowIndexDict[dfItem + tfItem] else: # a new detection file ^_^ rowIndex = GSPU.getEmptyRowIndex(range(1, 10)) detectFileRowIndexDict[dfItem + tfItem] = rowIndex dfDesc = TFC.getDescription(dfItem) dfName = TFC.getName(dfItem) worksheet.update_cell(rowIndex, 8, dfDesc) worksheet.update_cell(rowIndex, 7, dfName) print "---- test ----" print "---- first timer ----" print dfItem print tfItem print rowIndex print "---- test ----" Path = "Path " + str(i) + ": " + noiseItem + ", " + featItem + ", " + preprocItem + ", " + enhanceItem + ", " + classItem print Path print classItem print classValue #note the program behaves strangely when threads aren't closed :| # how: for changing second value ->> readDataThread won't "always" recieve the right dfItem, but rather a cached copy of it # may be the reason we didn't wait for the thread after getAcc! thread = readDataThread(tfItem, dfItem, wrappingNoiseValue, sampleStart, sampleEnd, \ featValue, preprocValue, enhanceValue, classValue, \ False, selectedData, sameFile, True) thread.start() thread.wait() if (updateGSpread == True): Acc = thread.getAcc() thread.wait() if dfItem + tfItem in detectFileAccListDict: detectFileAccListDict[dfItem + tfItem].append(Acc) else: detectFileAccListDict[dfItem + tfItem] = [] detectFileAccListDict[dfItem + tfItem].append(Acc) #Write the path description and it's accuracy worksheet.update_cell(rowIndex + 1, 9 + j , Path) worksheet.update_cell(rowIndex + 1, 10 + j , Acc) #Array to hold accurcies of all paths #accumelatedAcc.append(Acc) thread.exit() if (updateGSpread): # move to the next 2 columns once finished all the detection files j += 2 # get into the next column i += 1 # for each Training File if ((updateGSpread == True) & (i > 0)): #Get the Min, Max and avrg accuracy then write them for tfItem, tfValue in filesDict.items(): for dfItem in tfValue: accumelatedAcc = detectFileAccListDict[dfItem + tfItem] rowIndex = detectFileRowIndexDict[dfItem + tfItem] mySorted = sorted(accumelatedAcc) worksheet.update_cell(rowIndex, 4 , mySorted[0]) print "i is " + str(i) print mySorted print detectFileAccListDict print detectFileAccListDict.items print "Rania is here" worksheet.update_cell(rowIndex, 5 , mySorted[i-1]) temp = 0 for k in range(0, i): temp = temp + accumelatedAcc[k] avrg = temp / len(accumelatedAcc) worksheet.update_cell(rowIndex, 6 , avrg) print "-----------------------" print "Finished bulk detection"
def SingleFileBulkDetect(self): #TODO: add assertions -> there must be at least a single selected checkbox for each column #TODO: add time estimation, even static ones would be great for now! calc a trial p noiseDict = self.DictOfNoiseCB() featDict = self.DictOfFeaturesCB() preprocDict = self.DictOfPreprocessingCB() enhanceDict = self.DictOfEnhancementCB() classDict = self.DictOfClassifiersCB() updateGDocs = self.CBGetter(self.updateGDocsCB) ##Connect to google spreadsheet #gc = gspread.login( GSC.email , GSC.password) if (updateGDocs == True): json_key = json.load(open('/tmp/mozilla_root0/BCIWheelChair2018-eef792bad9a5.json')) scope = ['https://spreadsheets.google.com/feeds'] credentials = SignedJwtAssertionCredentials(json_key['client_email'], json_key['private_key'].encode(), scope) gc = gspread.authorize(credentials) sh = gc.open(GSC.title) worksheet = sh.worksheet(GSC.sheet_title) self.rowIndex = GSPU.getEmptyRowIndex(range(1, 10)) worksheet.update_cell(self.rowIndex, 3, desc) worksheet.update_cell(self.rowIndex, 2, name) self.threadList = [] i = 0 j = 0 print "Traing File: " + self.trainPath.split("/")[-1] print "************" print "Detection File: " + self.detectPath.split("/")[-1] print "************" desc = TrainingFileClass.getDescription(self.trainPath) name = TrainingFileClass.getName(self.trainPath) self.accumelatedAcc = [] print "Creating Detection Paths:" print "-------------------------" for noiseItem,noiseValue in noiseDict.items(): #TODO: remove this hack! a hack till we propagate the noise removal to be an unified variable(enum or struct) throughout all the code! wrappingNoiseValue = False if (noiseItem == "Remove") & (noiseValue & True): wrappingNoiseValue = True elif (noiseItem == "Raw") & (noiseValue & True): wrappingNoiseValue = False for preprocItem, preprocValue in preprocDict.items(): #get feature dictionary for featItem, featValue in featDict.items(): for enhanceItem, enhanceValue in enhanceDict.items(): for classItem, classValue in classDict.items(): Path = "Path " + str(i) + ": " +noiseItem + ", " + featItem + ", " + preprocItem + ", " + enhanceItem + ", " + classItem print Path print classValue thread = readDataThread(self.trainPath, self.detectPath, wrappingNoiseValue, self.sampleStart, self.sampleEnd, \ featValue, preprocValue, enhanceValue, classValue, \ False, self.selectedData, self.sameFile,True) self.threadList.append(thread) self.threadList[i].start() self.threadList[i].wait() Acc = self.threadList[i].getAcc() #Write the path description and it's accuracy if (updateGDocs == True): worksheet.update_cell(self.rowIndex, 7+j , Path) worksheet.update_cell(self.rowIndex, 8+j , Acc) #Array to hold accurcies of all paths self.accumelatedAcc.append(Acc) i += 1 j += 2 #Get the Min, Max and avrg accuracy then write them self.sorted = sorted(self.accumelatedAcc) if (updateGDocs == True): worksheet.update_cell(self.rowIndex, 4 , self.sorted[0]) worksheet.update_cell(self.rowIndex, 5 , self.sorted[i-1]) temp = 0 for o in range(0, i): temp = temp + self.accumelatedAcc[o] avrg = temp / len(self.accumelatedAcc) if (updateGDocs == True): worksheet.update_cell(self.rowIndex, 6 , avrg) print "-----------------------" print "Finished bulk detection"
def SingleFileBulkDetect(self): #TODO: add assertions -> there must be at least a single selected checkbox for each column #TODO: add time estimation, even static ones would be great for now! calc a trial p noiseDict = self.DictOfNoiseCB() featDict = self.DictOfFeaturesCB() preprocDict = self.DictOfPreprocessingCB() enhanceDict = self.DictOfEnhancementCB() classDict = self.DictOfClassifiersCB() ##Connect to google spreadsheet gc = gspread.login( GSC.email , GSC.password) sh = gc.open(GSC.title) worksheet = sh.worksheet(GSC.sheet_title) self.rowIndex = GSPU.getEmptyRowIndex(range(1, 10)) self.threadList = [] i = 0 j = 0 print "Traing File: " + self.trainPath print "************" print "Detection File: " + self.detectPath print "************" desc = TrainingFileClass.getDescription(self.trainPath) name = TrainingFileClass.getName(self.trainPath) worksheet.update_cell(self.rowIndex, 3, desc) worksheet.update_cell(self.rowIndex, 2, name) self.accumelatedAcc = [] print "Creating Detection Paths:" print "-------------------------" for noiseItem,noiseValue in noiseDict.items(): #TODO: remove this hack! a hack till we propagate the noise removal to be an unified variable(enum or struct) throughout all the code! wrappingNoiseValue = False if (noiseItem == "Remove") & (noiseValue & True): wrappingNoiseValue = True elif (noiseItem == "Raw") & (noiseValue & True): wrappingNoiseValue = False for preprocItem, preprocValue in preprocDict.items(): #get feature dictionary for featItem, featValue in featDict.items(): for enhanceItem, enhanceValue in enhanceDict.items(): for classItem, classValue in classDict.items(): Path = "Path " + str(i) + ": " +noiseItem + ", " + featItem + ", " + preprocItem + ", " + enhanceItem + ", " + classItem print Path print classValue thread = readDataThread(self.trainPath, self.detectPath, wrappingNoiseValue, self.sampleStart, self.sampleEnd, \ featValue, preprocValue, enhanceValue, classValue, \ False, self.selectedData, self.sameFile,True) self.threadList.append(thread) self.threadList[i].start() self.threadList[i].wait() Acc = self.threadList[i].getAcc() #Write the path description and it's accuracy worksheet.update_cell(self.rowIndex, 7+j , Path) worksheet.update_cell(self.rowIndex, 8+j , Acc) #Array to hold accurcies of all paths self.accumelatedAcc.append(Acc) i += 1 j += 2 #Get the Min, Max and avrg accuracy then write them self.sorted = sorted(self.accumelatedAcc) worksheet.update_cell(self.rowIndex, 4 , self.sorted[0]) worksheet.update_cell(self.rowIndex, 5 , self.sorted[i-1]) temp = 0 for o in range(0, i): temp = temp + self.accumelatedAcc[o] avrg = temp / len(self.accumelatedAcc) worksheet.update_cell(self.rowIndex, 6 , avrg) print "-----------------------" print "Finished bulk detection"
def BulkEightyTweenty(filesDict, noiseDict, featDict, preprocDict, enhanceDict, classDict, \ sampleStart, sampleEnd, selectedData, updateGSpread=False): threadList = [] offsetDict = {} #TODO: change to enums offsetDict["All"] = False offsetDict["off0"] = False offsetDict["off1"] = False offsetDict["off2"] = False offsetDict["off3"] = False offsetDict["off4"] = False if (updateGSpread == True): ##Connect to google spreadsheet gc = gspread.login(GSC.email, GSC.password) sh = gc.open(GSC.title) worksheet = sh.worksheet(GSC.sheet_title) for offsetItem, offsetValue in offsetDict.items(): print "Detection Offset: " + str(offsetItem) print "************" i = 0 if (updateGSpread == True): rowIndex = GSPU.getEmptyRowIndex(range(1, 10)) for tfItem, tfValue in filesDict.items(): ## given a train file, and dict methods, detect files, loop on each and do magic! print "Training File: " + tfItem print "************" desc = str(TrainingFileClass.getDescription( tfItem)) + " " + str(offsetItem) name = TrainingFileClass.getName(tfItem) if (updateGSpread == True): j = 0 worksheet.update_cell(rowIndex, 3, desc) worksheet.update_cell(rowIndex, 2, name) accumelatedAcc = [] print "Creating Detection Paths:" print "-----------------------" for noiseItem, noiseValue in noiseDict.items(): #TODO: remove this hack! a hack till we propagate the noise removal to be an unified variable(enum or struct) throughout all the code! wrappingNoiseValue = False if (noiseItem == "Remove") & (noiseValue & True): wrappingNoiseValue = True elif (noiseItem == "Raw") & (noiseValue & True): wrappingNoiseValue = False for preprocItem, preprocValue in preprocDict.items(): #get feature dictionary for featItem, featValue in featDict.items(): for enhanceItem, enhanceValue in enhanceDict.items( ): for classItem, classValue in classDict.items(): # if we can separate the training from the detection, do the training here and detect for each file by itself # that would speed the process too much if we've multiple detection sessions for the same training session offsetDict = CrossValidationUtilities.SelectSingleDataOffset( offsetDict, offsetItem) Path = "Path " + str( i ) + ": " + noiseItem + ", " + featItem + ", " + preprocItem + ", " + enhanceItem + ", " + classItem + ", " + str( offsetItem) print Path thread = readDataThread(tfItem, tfItem, wrappingNoiseValue, sampleStart, sampleEnd, \ featValue, preprocValue, enhanceValue, classValue, \ False, offsetDict, True, True) thread.start() thread.wait() if (updateGSpread == True): Acc = thread.getAcc() thread.wait() #Write the path description and it's accuracy worksheet.update_cell( rowIndex, 7 + j, Path) worksheet.update_cell( rowIndex, 8 + j, Acc) #Array to hold accurcies of all paths accumelatedAcc.append(Acc) j += 2 thread.exit() i += 1 if ((updateGSpread == True) & (i > 0)): #Get the Min, Max and avrg accuracy then write them mySorted = sorted(accumelatedAcc) worksheet.update_cell(rowIndex, 4, mySorted[0]) worksheet.update_cell(rowIndex, 5, mySorted[i - 1]) temp = 0 for k in range(0, i): temp = temp + accumelatedAcc[k] avrg = temp / len(accumelatedAcc) worksheet.update_cell(rowIndex, 6, avrg) print "-----------------------" print "Finished bulk detection"