gridPointDir = "grid/gridpoint-" + pId assert gridCSC.exists(gridPointDir) if gridCSC.exists(gridPointDir + "/results.csv"): print >> sys.stderr, "Downloading results" gridCSC.download(gridPointDir + "/results.csv", "results" + pId + ".csv") else: print >> sys.stderr, "Run not yet finished" finished = False time.sleep(60) if options.mode in ["ALL", "GRID_EVALUATE"]: bestResult = (-1, None, None) for filename in os.listdir(WORKDIR): if filename[-4:] == ".csv" and os.path.getsize(filename) != 0: gridRows = TableUtils.readCSV(filename) fscore = None for row in gridRows: if row["eval"] == "approximate" and row[ "event_class"] == "ALL-TOTAL": fscore = row["fscore"] break assert fscore != None, row if fscore > bestResult[0]: bestResult = (fscore, gridRows, filename) print bestResult #if options.mode in ["] # print >> sys.stderr, "Grid search complete" # print >> sys.stderr, "Tested", count - options.startFrom, "out of", count, "combinations" # print >> sys.stderr, "Best parameter combination:", bestResults[0]
try: import psyco psyco.full() print >> sys.stderr, "Found Psyco, using" except ImportError: print >> sys.stderr, "Psyco not installed" sys.path.append("..") from Utils.ProgressCounter import ProgressCounter from Utils.Parameters import splitParameters from optparse import OptionParser import Core.ExampleUtils as ExampleUtils from Core.IdSet import IdSet import Utils.TableUtils as TableUtils optparser = OptionParser(usage="%prog [options]\nCalculate f-score and other statistics.") optparser.add_option("-i", "--input", default=None, dest="input", help="Input file in csv-format", metavar="FILE") optparser.add_option("-o", "--output", default=None, dest="output", help="Output file for the statistics") optparser.add_option("-e", "--evaluator", default="BinaryEvaluator", dest="evaluator", help="Prediction evaluator class") (options, args) = optparser.parse_args() print >> sys.stderr, "Importing modules" exec "from Evaluators." + options.evaluator + " import " + options.evaluator + " as EvaluatorClass" if options.output != None: print >> sys.stderr, "Outputfile exists, removing", options.output if os.path.exists(options.output): os.remove(options.output) # Read input data fieldnames = ["class","prediction","id","fold"] rows = TableUtils.readCSV(options.input, fieldnames) evaluateCSV(rows, options, EvaluatorClass)
try: import psyco psyco.full() print >> sys.stderr, "Found Psyco, using" except ImportError: print >> sys.stderr, "Psyco not installed" sys.path.append("..") from Utils.ProgressCounter import ProgressCounter from Utils.Parameters import splitParameters from optparse import OptionParser import Core.ExampleUtils as ExampleUtils from Core.IdSet import IdSet import Utils.TableUtils as TableUtils optparser = OptionParser(usage="%prog [options]\nCalculate f-score and other statistics.") optparser.add_option("-i", "--input", default=None, dest="input", help="Input file in csv-format", metavar="FILE") optparser.add_option("-o", "--output", default=None, dest="output", help="Output file for the statistics") optparser.add_option("-e", "--evaluator", default="BinaryEvaluator", dest="evaluator", help="Prediction evaluator class") (options, args) = optparser.parse_args() print >> sys.stderr, "Importing modules" exec "from Evaluators." + options.evaluator + " import " + options.evaluator + " as EvaluatorClass" if options.output != None: print >> sys.stderr, "Outputfile exists, removing", options.output if os.path.exists(options.output): os.remove(options.output) # Read input data fieldnames = ["class","prediction","id","fold"] rows = TableUtils.readCSV(options.input, fieldnames) evaluateCSV(rows, options, EvaluatorClass)
classNameDict[classId] = className classNameFile.close() #classSet = IdSet(idDict=classNameDict, locked=True) if options.output != None: print >> sys.stderr, "Outputfile exists, removing", options.output if os.path.exists(options.output): os.remove(options.output) print >> sys.stderr, "Importing modules" exec "from Evaluators." + options.evaluator + " import " + options.evaluator + " as EvaluatorClass" fieldnames = ["class","prediction","id","fold","c"] # Find best c-parameter from parameter estimation data print >> sys.stderr, "Finding optimal c-parameters from", options.parameters rows = TableUtils.readCSV(options.parameters, fieldnames) folds = sorted(list(TableUtils.getValueSet(rows, "fold"))) cParameterByFold = {} for fold in folds: print >> sys.stderr, " Processing fold", fold foldRows = TableUtils.selectRowsCSV(rows, {"fold":fold}) cParameters = sorted(list(TableUtils.getValueSet(foldRows, "c"))) evaluators = [] cParameterByEvaluator = {} for cParameter in cParameters: print >> sys.stderr, " Processing c-parameter", cParameter, paramRows = TableUtils.selectRowsCSV(foldRows, {"c":cParameter}) evaluator = Evaluator.calculateFromCSV(paramRows, EvaluatorClass) #print evaluator.toStringConcise() cParameterByEvaluator[evaluator] = cParameter evaluators.append(evaluator)
classNameDict[classId] = className classNameFile.close() #classSet = IdSet(idDict=classNameDict, locked=True) if options.output != None: print >> sys.stderr, "Outputfile exists, removing", options.output if os.path.exists(options.output): os.remove(options.output) print >> sys.stderr, "Importing modules" exec "from Evaluators." + options.evaluator + " import " + options.evaluator + " as EvaluatorClass" fieldnames = ["class", "prediction", "id", "fold", "c"] # Find best c-parameter from parameter estimation data print >> sys.stderr, "Finding optimal c-parameters from", options.parameters rows = TableUtils.readCSV(options.parameters, fieldnames) folds = sorted(list(TableUtils.getValueSet(rows, "fold"))) cParameterByFold = {} for fold in folds: print >> sys.stderr, " Processing fold", fold foldRows = TableUtils.selectRowsCSV(rows, {"fold": fold}) cParameters = sorted(list(TableUtils.getValueSet(foldRows, "c"))) evaluators = [] cParameterByEvaluator = {} for cParameter in cParameters: print >> sys.stderr, " Processing c-parameter", cParameter, paramRows = TableUtils.selectRowsCSV(foldRows, {"c": cParameter}) evaluator = Evaluator.calculateFromCSV(paramRows, EvaluatorClass) #print evaluator.toStringConcise() cParameterByEvaluator[evaluator] = cParameter evaluators.append(evaluator)
pId = getCombinationString(params) #"-boost_"+str(param)[0:3] # param id gridPointDir = "grid/gridpoint-"+pId assert gridCSC.exists(gridPointDir) if gridCSC.exists(gridPointDir + "/results.csv"): print >> sys.stderr, "Downloading results" gridCSC.download(gridPointDir + "/results.csv", "results"+pId+".csv") else: print >> sys.stderr, "Run not yet finished" finished = False time.sleep(60) if options.mode in ["ALL", "GRID_EVALUATE"]: bestResult = (-1, None, None) for filename in os.listdir(WORKDIR): if filename[-4:] == ".csv" and os.path.getsize(filename) != 0: gridRows = TableUtils.readCSV(filename) fscore = None for row in gridRows: if row["eval"] == "approximate" and row["event_class"] == "ALL-TOTAL": fscore = row["fscore"] break assert fscore != None, row if fscore > bestResult[0]: bestResult = (fscore, gridRows, filename) print bestResult #if options.mode in ["] # print >> sys.stderr, "Grid search complete" # print >> sys.stderr, "Tested", count - options.startFrom, "out of", count, "combinations" # print >> sys.stderr, "Best parameter combination:", bestResults[0]