def train(self): self.trainData.flush() cmd = "%s -fvals %s %s" % (megam, self.mode, self.trainData.name) err = StringIO() out = StringIO() bettersystem(cmd, stdout=out, stderr=err) self.trainData.close() self.trainData = None #print err.getvalue() #print out.getvalue() self.readModel(out.getvalue())
def runtask(models, datadir): mfile = datadir/"matrix" bin = myBin(datadir) solver = bin/"chainedSolvers" mfile = datadir/"matrix" outfile = datadir/("output-%s" % "-".join(models[0].split())) output = StringIO() cmd = "%s %s %s" % (solver, " ".join(models), mfile) print "Running", cmd bettersystem(cmd, stdout=output) print "Done, postprocessing output..." outfh = file(outfile, 'w') for line in output.getvalue().split("\n"): if not line.strip().isdigit(): print line else: print >>outfh, line outfh.close() return ResultsFile(outfile)
def run(self, hogwash_job): on64 = (get_cpu_bitness() == 64) if on64: par = self.solverCmd.parent self.solverCmd = par/"../bin64"/self.solverCmd.basename() outfile = make_job_output_filename(hogwash_job, "output") cmd = "%s %s %s > %s" % (self.solverCmd, " ".join(self.solvers), self.mfile, outfile) print cmd status = bettersystem(cmd) if status != 0: raise BadExitCode(status) return ResultsFile(outfile)
devlog = file(modeldir / "devkeys", 'w') devfeats = file(modeldir / "devfeats", 'w') conditionalFeatures.timeSpanFeats(dev, logfile=devlog, blocksize=blocksize, output=devfeats) devlog.close() devfeats.close() trainErr = StringIO() bettersystem(megam + " -fvals binary " + modeldir / "feats" + " > " + modeldir / "model", stderr=trainErr) testErr = StringIO() bettersystem(megam + " -predict " + modeldir / "model" " -fvals binary " + modeldir / "devfeats" + "> " + modeldir / "predictions", stderr=testErr) trainErrStr = lastLine(trainErr) testErrStr = lastLine(testErr) print "Blocksize", blocksize, \ "train err", trainErrStr, "test err", testErrStr print "Majority baseline error", \
false1 += 1 elif truth == 1: missed1 += 1 lines += 1 prec = 0 if true1 + false1 > 0: prec = true1 / (true1 + false1) rec = true1 / (true1 + missed1) f = 0 if prec + rec > 0: f = (2*prec*rec)/(prec+rec) err = (lines-right)/lines acc = 1 - err print "Acc", acc, "P", prec, "R", rec, "F", f if __name__ == "__main__": blockdir = path(argv[1]) model = blockdir/"model" feats = blockdir/"devfeats" predictions = StringIO() bettersystem(megam + " -predict " + model + " -fvals binary " + feats, stdout=predictions, stderr=StringIO()) score(predictions, feats)
print "True clustering has", \ len(set([pt.label for pt in test])), "clusters" print "Objective value of truth: %.3g" % objective(test, clMat) print for algorithm in ["first", "best", "vote", "pivot"]: evals = [] print "================", algorithm, "=================" print for run in range(10): cmd = "%s -a %s %s" % (tester, algorithm, classFileName) output = StringIO() status = bettersystem(cmd, stdout=output, stderr=StringIO()) assert(status == 0) prop = readDataFile(output.getvalue().split("\n")) score = Eval(test, prop, clMat, filename=("run%d" % run)) evals.append(score) print "Run", run print score print algorithm print "Best Objective:" print min(evals, key=lambda x: x.stats["objective"]) print "Average:" print reduce(lambda x,y: x+y, evals).normalized(len(evals))
for power in range(1,4): for layer in range(9): dsize = (layer + 1) * 10**power print "Data: size", dsize if dsize >= 5000: break for run in range(3): print "Run:", run ddir = workdir/("data%d-%d" % (dsize, run)) ddir.mkdir() output = StringIO() cmd = "python script/genGaussians.py -n %d -t 10 -f 3 -a 1" bettersystem(cmd % dsize, stdout=output) # bettersystem("python script/genFeatureless.py -n %d" % # dsize, stdout=output) dfileName = ddir/"data" dfile = file(dfileName, 'w') dfile.write(output.getvalue()), dfile.close() output = StringIO() bettersystem("python script/classify.py -c max-ent %s" % dfileName, stdout=output) # bettersystem("python script/classify.py %s" % # dfileName, stdout=output)