def waitForJobCount(self, targetCount=0, pollIntervalSeconds=60, verbose=True): if targetCount == -1: return numJobs = self.getNumJobs() if numJobs <= targetCount: return waitTimer = Timer() while numJobs > targetCount: sleepTimer = Timer() accountName = self.account if self.account == None: accountName = "local" if verbose: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str( numJobs) + " on " + accountName + " (limit=" + str( targetCount) + ")", waitTimer.elapsedTimeToString( ) + sleepString, while sleepTimer.getElapsedTime() < pollIntervalSeconds: if verbose: steps = int(10 * sleepTimer.getElapsedTime() / pollIntervalSeconds) + 1 sleepString = " [" + steps * "." + (10 - steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str( numJobs) + " on " + accountName + " (limit=" + str( targetCount) + ")", waitTimer.elapsedTimeToString( ) + sleepString, time.sleep(5) numJobs = self.getNumJobs() print >> sys.stderr, "\nAll jobs done"
def waitForJobs(self, jobs, pollIntervalSeconds=60, timeout=None, verbose=True): print >> sys.stderr, "Waiting for results" waitTimer = Timer() while (True): jobStatus = {"FINISHED": 0, "QUEUED": 0, "FAILED": 0, "RUNNING": 0} for job in jobs: jobStatus[self.getJobStatus(job)] += 1 jobStatusString = str(jobStatus["QUEUED"]) + " queued, " + str( jobStatus["RUNNING"]) + " running, " + str( jobStatus["FINISHED"]) + " finished, " + str( jobStatus["FAILED"]) + " failed" if jobStatus["QUEUED"] + jobStatus["RUNNING"] == 0: if verbose: print >> sys.stderr, "\nAll runs done (" + jobStatusString + ")" break # decide what to do if timeout == None or timeoutTimer.getElapsedTime() < timeout: sleepTimer = Timer() accountName = self.account if self.account == None: accountName = "local" if verbose: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str( len(jobs) ) + " on " + accountName + "(" + jobStatusString + "),", waitTimer.elapsedTimeToString( ) + sleepString, while sleepTimer.getElapsedTime() < pollIntervalSeconds: if verbose: steps = int(10 * sleepTimer.getElapsedTime() / pollIntervalSeconds) + 1 sleepString = " [" + steps * "." + ( 10 - steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str( len(jobs) ) + " on " + accountName + "(" + jobStatusString + "),", waitTimer.elapsedTimeToString( ) + sleepString, time.sleep(5) else: if verbose: print >> sys.stderr, "\nTimed out, ", trainTimer.elapsedTimeToString( ) break return jobStatus
def waitForJobs(self, scriptNames, timeout=None): assert len(scriptNames) == len(outputFileNames) print >> sys.stderr, "Waiting for results" finished = 0 louhiTimer = Timer() combinationStatus = {} while (True): # count finished finished = 0 processStatus = { "FINISHED": 0, "QUEUED": 0, "FAILED": 0, "RUNNING": 0 } for scriptName in scriptNames: status = self.getLouhiStatus(scriptName) combinationStatus[id] = status processStatus[status] += 1 p = processStatus processStatusString = str(p["QUEUED"]) + " queued, " + str( p["RUNNING"]) + " running, " + str( p["FINISHED"]) + " finished, " + str( p["FAILED"]) + " failed" if processStatus["QUEUED"] + processStatus["RUNNING"] == 0: print >> sys.stderr print >> sys.stderr, "All jobs done (" + processStatusString + ")" break # decide what to do if timeout == None or louhiTimer.getElapsedTime() < timeout: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str( len(combinations) ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString( ) + sleepString, #time.sleep(60) sleepTimer = Timer() while sleepTimer.getElapsedTime() < 60: steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1 sleepString = " [" + steps * "." + (10 - steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str( len(combinations) ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString( ) + sleepString, time.sleep(5) else: print >> sys.stderr print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString( ) return False return True
def waitForJobs(self, scriptNames, timeout=None): assert len(scriptNames) == len(outputFileNames) print >> sys.stderr, "Waiting for results" finished = 0 louhiTimer = Timer() combinationStatus = {} while(True): # count finished finished = 0 processStatus = {"FINISHED":0, "QUEUED":0, "FAILED":0, "RUNNING":0} for scriptName in scriptNames: status = self.getLouhiStatus(scriptName) combinationStatus[id] = status processStatus[status] += 1 p = processStatus processStatusString = str(p["QUEUED"]) + " queued, " + str(p["RUNNING"]) + " running, " + str(p["FINISHED"]) + " finished, " + str(p["FAILED"]) + " failed" if processStatus["QUEUED"] + processStatus["RUNNING"] == 0: print >> sys.stderr print >> sys.stderr, "All jobs done (" + processStatusString + ")" break # decide what to do if timeout == None or louhiTimer.getElapsedTime() < timeout: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString, #time.sleep(60) sleepTimer = Timer() while sleepTimer.getElapsedTime() < 60: steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1 sleepString = " [" + steps * "." + (10-steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString, time.sleep(5) else: print >> sys.stderr print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString() return False return True
def optimize(self, trainSets, classifySets, parameters=defaultOptimizationParameters, evaluationClass=None, evaluationArgs={}, combinationsThatTimedOut=None): if parameters.has_key("predefined"): print >> sys.stderr, "Predefined model, skipping parameter estimation" return {"predefined": parameters["predefined"]} print >> sys.stderr, "Optimizing parameters" parameterNames = parameters.keys() parameterNames.sort() # for p in self.notOptimizedParameters: # if p in parameterNames: # parameterNames.remove(p) parameterValues = [] for parameterName in parameterNames: parameterValues.append([]) for value in parameters[parameterName]: parameterValues[-1].append((parameterName, value)) combinationLists = combine.combine(*parameterValues) combinations = [] for combinationList in combinationLists: combinations.append({}) for value in combinationList: combinations[-1][value[0]] = value[1] if combinationsThatTimedOut == None: combinationsThatTimedOut = [] # # re-add non-optimized parameters to combinations # for p in self.notOptimizedParameters: # if parameters.has_key(p): # for combination in combinations: # combination[p] = parameters[p] bestResult = None combinationCount = 1 if hasattr(self, "tempDir"): mainTempDir = self.tempDir mainDebugFile = self.debugFile for combination in combinations: print >> sys.stderr, " Parameters " + str( combinationCount) + "/" + str( len(combinations)) + ":", str(combination), skip = False #print combinationsThatTimedOut for discarded in combinationsThatTimedOut: if self._dictIsIdentical(combination, discarded): print >> sys.stderr print >> sys.stderr, " Discarded before, skipping" skip = True break if skip: continue # Make copies of examples in case they are modified fold = 1 foldResults = [] for classifyExamples in classifySets: if type(trainSets[0]) == types.StringType: trainExamples = trainSets[0] else: trainExamples = [] for trainSet in trainSets: if trainSet != classifyExamples: trainExamples.extend(trainSet) trainExamplesCopy = trainExamples if type(trainExamples) == types.ListType: trainExamplesCopy = trainExamples #ExampleUtils.copyExamples(trainExamples) classifyExamplesCopy = classifyExamples if type(classifyExamples) == types.ListType: classifyExamplesCopy = classifyExamples #ExampleUtils.copyExamples(classifyExamples) if hasattr(self, "tempDir"): self.tempDir = mainTempDir + "/parameters" + str( combinationCount) + "/optimization" + str(fold) if not os.path.exists(self.tempDir): os.makedirs(self.tempDir) self.debugFile = open(self.tempDir + "/debug.txt", "wt") timer = Timer() #trainStartTime = time.time() trainRV = self.train(trainExamplesCopy, combination) #trainTime = time.time() - trainStartTime #print >> sys.stderr, " Time spent:", trainTime, "s" print >> sys.stderr, " Time spent:", timer.elapsedTimeToString( ) if trainRV == 0: predictions = self.classify(classifyExamplesCopy) evaluation = evaluationClass(predictions, **evaluationArgs) if len(classifySets) == 1: print >> sys.stderr, evaluation.toStringConcise(" ") else: print >> sys.stderr, evaluation.toStringConcise( indent=" ", title="Fold " + str(fold)) foldResults.append(evaluation) if hasattr(self, "tempDir"): evaluation.saveCSV(self.tempDir + "/results.csv") else: combinationsThatTimedOut.append(combination) print >> sys.stderr, " Timed out" fold += 1 if len(foldResults) > 0: averageResult = evaluationClass.average(foldResults) poolResult = evaluationClass.pool(foldResults) if hasattr(self, "tempDir"): TableUtils.writeCSV( combination, mainTempDir + "/parameters" + str(combinationCount) + ".csv") averageResult.saveCSV(mainTempDir + "/parameters" + str(combinationCount) + "/resultsAverage.csv") poolResult.saveCSV(mainTempDir + "/parameters" + str(combinationCount) + "/resultsPooled.csv") if len(classifySets) > 1: print >> sys.stderr, averageResult.toStringConcise( " Avg: ") print >> sys.stderr, poolResult.toStringConcise(" Pool: ") if bestResult == None or poolResult.compare( bestResult[1] ) > 0: #: averageResult.fScore > bestResult[1].fScore: #bestResult = (predictions, averageResult, combination) bestResult = (None, poolResult, combination) # Make sure memory is released, especially important since some of the previous steps # copy examples bestResult[1].classifications = None bestResult[1].predictions = None combinationCount += 1 if hasattr(self, "tempDir"): self.debugFile.close() if hasattr(self, "tempDir"): self.tempDir = mainTempDir self.debugFile = mainDebugFile return bestResult
def waitForJobs(self, jobs, pollIntervalSeconds=60, timeout=None, verbose=True): print >> sys.stderr, "Waiting for results" waitTimer = Timer() while(True): jobStatus = {"FINISHED":0, "QUEUED":0, "FAILED":0, "RUNNING":0} for job in jobs: jobStatus[self.getJobStatus(job)] += 1 jobStatusString = str(jobStatus["QUEUED"]) + " queued, " + str(jobStatus["RUNNING"]) + " running, " + str(jobStatus["FINISHED"]) + " finished, " + str(jobStatus["FAILED"]) + " failed" if jobStatus["QUEUED"] + jobStatus["RUNNING"] == 0: if verbose: print >> sys.stderr, "\nAll runs done (" + jobStatusString + ")" break # decide what to do if timeout == None or timeoutTimer.getElapsedTime() < timeout: sleepTimer = Timer() accountName = self.account if self.account == None: accountName = "local" if verbose: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str(len(jobs)) + " on " + accountName + "(" + jobStatusString + "),", waitTimer.elapsedTimeToString() + sleepString, while sleepTimer.getElapsedTime() < pollIntervalSeconds: if verbose: steps = int(10 * sleepTimer.getElapsedTime() / pollIntervalSeconds) + 1 sleepString = " [" + steps * "." + (10-steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str(len(jobs)) + " on " + accountName + "(" + jobStatusString + "),", waitTimer.elapsedTimeToString() + sleepString, time.sleep(5) else: if verbose: print >> sys.stderr, "\nTimed out, ", trainTimer.elapsedTimeToString() break return jobStatus
def waitForJobCount(self, targetCount=0, pollIntervalSeconds=60, verbose=True): if targetCount == -1: return numJobs = self.getNumJobs() if numJobs <= targetCount: return waitTimer = Timer() while numJobs > targetCount: sleepTimer = Timer() accountName = self.account if self.account == None: accountName = "local" if verbose: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str(numJobs) + " on " + accountName + " (limit=" + str(targetCount) + ")", waitTimer.elapsedTimeToString() + sleepString, while sleepTimer.getElapsedTime() < pollIntervalSeconds: if verbose: steps = int(10 * sleepTimer.getElapsedTime() / pollIntervalSeconds) + 1 sleepString = " [" + steps * "." + (10-steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str(numJobs) + " on " + accountName + " (limit=" + str(targetCount) + ")", waitTimer.elapsedTimeToString() + sleepString, time.sleep(5) numJobs = self.getNumJobs() print >> sys.stderr, "\nAll jobs done"
def waitForJobCount(self, targetCount=0, pollIntervalSeconds=60, verbose=True): if targetCount == -1: return numJobs = self.getNumJobs() if numJobs <= targetCount: return waitTimer = Timer() while numJobs > targetCount: print >> sys.stderr, "\rWaiting for " + str(numJobs) + " on " + accountName + " (limit=" + str(targetCount) + ")", waitTimer.elapsedTimeToString() + sleepString, numJobs = self.getNumJobs()
def optimizeCSC(Classifier, Evaluator, trainExamples, testExamples, classIds, combinations, workDir=None, timeout=None, cscConnection=None, downloadAllModels=False, steps="BOTH", threshold=False): bestResult = None combinationCount = 1 combinationIds = [] assert steps in ["BOTH", "SUBMIT", "RESULTS"], steps if type(classIds) == types.StringType: classIds = IdSet(filename=classIds) if Classifier.__name__ == "MultiLabelClassifier": negClass1 = True if "classifier" in combinations[0] and combinations[0][ "classifier"] == "svmperf": negClass1 = False print "negclass1", negClass1 Classifier.makeClassFiles(trainExamples, testExamples, classIds, negClass1=negClass1) if steps in ["BOTH", "SUBMIT"]: print >> sys.stderr, "Initializing runs" for combination in combinations: Stream.setIndent(" ") print >> sys.stderr, "Parameters " + str( combinationCount) + "/" + str( len(combinations)) + ":", str(combination) # Train combinationIds.append( Classifier.initTrainAndTestOnLouhi(trainExamples, testExamples, combination, cscConnection, workDir, classIds)) combinationCount += 1 else: for combination in combinations: idStr = "" for key in sorted(combination.keys()): idStr += "-" + str(key) + "_" + str(combination[key]) combinationIds.append(idStr) Stream.setIndent() if steps in ["BOTH", "RESULTS"]: Stream.setIndent(" ") print >> sys.stderr, "Waiting for results" finished = 0 louhiTimer = Timer() #combinationStatus = {} while (True): # count finished finished = 0 processStatus = { "FINISHED": 0, "QUEUED": 0, "FAILED": 0, "RUNNING": 0 } for id in combinationIds: #status = Classifier.getLouhiStatus(id, cscConnection) #combinationStatus[id] = status #processStatus[status] += 1 Classifier.getLouhiStatus(id, cscConnection, processStatus, classIds) p = processStatus processStatusString = str(p["QUEUED"]) + " queued, " + str( p["RUNNING"]) + " running, " + str( p["FINISHED"]) + " finished, " + str( p["FAILED"]) + " failed" if processStatus["QUEUED"] + processStatus["RUNNING"] == 0: print >> sys.stderr print >> sys.stderr, "All runs done (" + processStatusString + ")" break # decide what to do if timeout == None or louhiTimer.getElapsedTime() < timeout: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str( len(combinations) ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString( ) + sleepString, #time.sleep(60) sleepTimer = Timer() while sleepTimer.getElapsedTime() < 60: steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1 sleepString = " [" + steps * "." + (10 - steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str( len(combinations) ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString( ) + sleepString, time.sleep(5) else: print >> sys.stderr print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString( ) break print >> sys.stderr, "Evaluating results" #if type(testExamples) != types.ListType: # print >> sys.stderr, "Loading examples from file", testExamples # testExamples = ExampleUtils.readExamples(testExamples,False) bestCombinationId = None for i in range(len(combinationIds)): id = combinationIds[i] Stream.setIndent(" ") # Evaluate predictions = Classifier.getLouhiPredictions( id, cscConnection, workDir, classIds) if predictions == None: print >> sys.stderr, "No results for combination" + id else: if downloadAllModels: modelFileName = Classifier.downloadModel( id, cscConnection, workDir) if workDir != None: modelFileName = os.path.join(workDir, modelFileName) subprocess.call("gzip -fv " + modelFileName, shell=True) print >> sys.stderr, "Evaluating results for combination" + id evaluationOutput = "evaluation" + id + ".csv" if workDir != None: evaluationOutput = os.path.join(workDir, evaluationOutput) evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput) if threshold: print >> sys.stderr, "Thresholding" evaluator.determineThreshold(testExamples, predictions) if Classifier.__name__ != "MultiLabelClassifier": if bestResult == None or evaluator.compare( bestResult[0] ) > 0: #: averageResult.fScore > bestResult[1].fScore: bestResult = [ evaluator, None, predictions, evaluationOutput, combinations[i] ] bestCombinationId = id else: assert Evaluator.__name__ == "MultiLabelEvaluator", Evaluator.__name__ if bestResult == None: bestResult = [{}, None] for className in classIds.Ids: if className != "neg" and "---" not in className: bestResult[0][className] = [ -1, None, classIds.getId(className), None ] for className in classIds.Ids: if className != "neg" and "---" not in className: fscore = evaluator.dataByClass[classIds.getId( className)].fscore if fscore > bestResult[0][className][0]: bestResult[0][className] = [ fscore, id, bestResult[0][className][2] ] if threshold: classId = classIds.getId(className, False) if classId in evaluator.thresholds: bestResult[0][className].append( evaluator.thresholds[classId]) else: bestResult[0][className].append(0.0) else: bestResult[0][className].append(None) bestCombinationId = bestResult os.remove(predictions) # remove predictions to save space Stream.setIndent() print >> sys.stderr, "Selected parameters", bestResult[-1] #if Classifier.__name__ == "MultiLabelClassifier": # evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput) # Download best model and predictions modelFileName = Classifier.downloadModel(bestCombinationId, cscConnection, workDir) if workDir != None: modelFileName = os.path.join(workDir, modelFileName) subprocess.call("gzip -fv " + modelFileName, shell=True) modelFileName = modelFileName + ".gz" #if Classifier.__name__ != "MultiLabelClassifier": #bestResult = [None, None] bestResult[1] = modelFileName return bestResult
def optimizeCSC(Classifier, Evaluator, trainExamples, testExamples, classIds, combinations, workDir=None, timeout=None, cscConnection=None, downloadAllModels=False, steps="BOTH", threshold=False): bestResult = None combinationCount = 1 combinationIds = [] assert steps in ["BOTH", "SUBMIT", "RESULTS"], steps if type(classIds) == types.StringType: classIds = IdSet(filename=classIds) if Classifier.__name__ == "MultiLabelClassifier": negClass1 = True if "classifier" in combinations[0] and combinations[0]["classifier"] == "svmperf": negClass1 = False print "negclass1", negClass1 Classifier.makeClassFiles(trainExamples, testExamples, classIds, negClass1=negClass1) if steps in ["BOTH", "SUBMIT"]: print >> sys.stderr, "Initializing runs" for combination in combinations: Stream.setIndent(" ") print >> sys.stderr, "Parameters "+str(combinationCount)+"/"+str(len(combinations))+":", str(combination) # Train combinationIds.append(Classifier.initTrainAndTestOnLouhi(trainExamples, testExamples, combination, cscConnection, workDir, classIds) ) combinationCount += 1 else: for combination in combinations: idStr = "" for key in sorted(combination.keys()): idStr += "-" + str(key) + "_" + str(combination[key]) combinationIds.append(idStr) Stream.setIndent() if steps in ["BOTH", "RESULTS"]: Stream.setIndent(" ") print >> sys.stderr, "Waiting for results" finished = 0 louhiTimer = Timer() #combinationStatus = {} while(True): # count finished finished = 0 processStatus = {"FINISHED":0, "QUEUED":0, "FAILED":0, "RUNNING":0} for id in combinationIds: #status = Classifier.getLouhiStatus(id, cscConnection) #combinationStatus[id] = status #processStatus[status] += 1 Classifier.getLouhiStatus(id, cscConnection, processStatus, classIds) p = processStatus processStatusString = str(p["QUEUED"]) + " queued, " + str(p["RUNNING"]) + " running, " + str(p["FINISHED"]) + " finished, " + str(p["FAILED"]) + " failed" if processStatus["QUEUED"] + processStatus["RUNNING"] == 0: print >> sys.stderr print >> sys.stderr, "All runs done (" + processStatusString + ")" break # decide what to do if timeout == None or louhiTimer.getElapsedTime() < timeout: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString, #time.sleep(60) sleepTimer = Timer() while sleepTimer.getElapsedTime() < 60: steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1 sleepString = " [" + steps * "." + (10-steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString, time.sleep(5) else: print >> sys.stderr print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString() break print >> sys.stderr, "Evaluating results" #if type(testExamples) != types.ListType: # print >> sys.stderr, "Loading examples from file", testExamples # testExamples = ExampleUtils.readExamples(testExamples,False) bestCombinationId = None for i in range(len(combinationIds)): id = combinationIds[i] Stream.setIndent(" ") # Evaluate predictions = Classifier.getLouhiPredictions(id, cscConnection, workDir, classIds) if predictions == None: print >> sys.stderr, "No results for combination" + id else: if downloadAllModels: modelFileName = Classifier.downloadModel(id, cscConnection, workDir) if workDir != None: modelFileName = os.path.join(workDir, modelFileName) subprocess.call("gzip -fv " + modelFileName, shell=True) print >> sys.stderr, "Evaluating results for combination" + id evaluationOutput = "evaluation" + id + ".csv" if workDir != None: evaluationOutput = os.path.join(workDir, evaluationOutput) evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput) if threshold: print >> sys.stderr, "Thresholding" evaluator.determineThreshold(testExamples, predictions) if Classifier.__name__ != "MultiLabelClassifier": if bestResult == None or evaluator.compare(bestResult[0]) > 0: #: averageResult.fScore > bestResult[1].fScore: bestResult = [evaluator, None, predictions, evaluationOutput, combinations[i]] bestCombinationId = id else: assert Evaluator.__name__ == "MultiLabelEvaluator", Evaluator.__name__ if bestResult == None: bestResult = [{}, None] for className in classIds.Ids: if className != "neg" and "---" not in className: bestResult[0][className] = [-1, None, classIds.getId(className), None] for className in classIds.Ids: if className != "neg" and "---" not in className: fscore = evaluator.dataByClass[classIds.getId(className)].fscore if fscore > bestResult[0][className][0]: bestResult[0][className] = [fscore, id, bestResult[0][className][2]] if threshold: classId = classIds.getId(className, False) if classId in evaluator.thresholds: bestResult[0][className].append(evaluator.thresholds[classId]) else: bestResult[0][className].append(0.0) else: bestResult[0][className].append(None) bestCombinationId = bestResult os.remove(predictions) # remove predictions to save space Stream.setIndent() print >> sys.stderr, "Selected parameters", bestResult[-1] #if Classifier.__name__ == "MultiLabelClassifier": # evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput) # Download best model and predictions modelFileName = Classifier.downloadModel(bestCombinationId, cscConnection, workDir) if workDir != None: modelFileName = os.path.join(workDir, modelFileName) subprocess.call("gzip -fv " + modelFileName, shell=True) modelFileName = modelFileName + ".gz" #if Classifier.__name__ != "MultiLabelClassifier": #bestResult = [None, None] bestResult[1] = modelFileName return bestResult