示例#1
0
    def train(self):
        self.trainData.flush()
        cmd = "%s -fvals %s %s" % (megam, self.mode, self.trainData.name)
        err = StringIO()
        out = StringIO()
        bettersystem(cmd, stdout=out, stderr=err)
        self.trainData.close()
        self.trainData = None

        #print err.getvalue()
        #print out.getvalue()

        self.readModel(out.getvalue())
示例#2
0
    def train(self):
        self.trainData.flush()
        cmd = "%s -fvals %s %s" % (megam, self.mode, self.trainData.name)
        err = StringIO()
        out = StringIO()
        bettersystem(cmd, stdout=out, stderr=err)
        self.trainData.close()
        self.trainData = None

        #print err.getvalue()
        #print out.getvalue()

        self.readModel(out.getvalue())
示例#3
0
def runtask(models, datadir):
    mfile = datadir/"matrix"
    bin = myBin(datadir)
    solver = bin/"chainedSolvers"
    mfile = datadir/"matrix"
    outfile = datadir/("output-%s" % "-".join(models[0].split()))

    output = StringIO()
    cmd = "%s %s %s" % (solver, " ".join(models), mfile)
    print "Running", cmd
    bettersystem(cmd, stdout=output)
    print "Done, postprocessing output..."
    outfh = file(outfile, 'w')
    for line in output.getvalue().split("\n"):
        if not line.strip().isdigit():
            print line
        else:
            print  >>outfh, line
    outfh.close()
    
    return ResultsFile(outfile)
示例#4
0
    def run(self, hogwash_job):
        on64 = (get_cpu_bitness() == 64)
        if on64:
            par = self.solverCmd.parent
            self.solverCmd = par/"../bin64"/self.solverCmd.basename()

        outfile = make_job_output_filename(hogwash_job, "output")
        cmd = "%s %s %s > %s" % (self.solverCmd, " ".join(self.solvers),
                                 self.mfile, outfile)

        print cmd

        status = bettersystem(cmd)
        if status != 0:
            raise BadExitCode(status)

        return ResultsFile(outfile)
示例#5
0
        devlog = file(modeldir / "devkeys", 'w')
        devfeats = file(modeldir / "devfeats", 'w')

        conditionalFeatures.timeSpanFeats(dev,
                                          logfile=devlog,
                                          blocksize=blocksize,
                                          output=devfeats)

        devlog.close()
        devfeats.close()

        trainErr = StringIO()

        bettersystem(megam + " -fvals binary " + modeldir / "feats" + " > " +
                     modeldir / "model",
                     stderr=trainErr)

        testErr = StringIO()

        bettersystem(megam + " -predict " + modeldir / "model"
                     " -fvals binary " + modeldir / "devfeats" + "> " +
                     modeldir / "predictions",
                     stderr=testErr)

        trainErrStr = lastLine(trainErr)
        testErrStr = lastLine(testErr)

        print "Blocksize", blocksize, \
              "train err", trainErrStr, "test err", testErrStr
        print "Majority baseline error", \
示例#6
0
            false1 += 1
        elif truth == 1:
            missed1 += 1
        lines += 1

    prec = 0
    if true1 + false1 > 0:
        prec = true1 / (true1 + false1)
    rec = true1 / (true1 + missed1)
    f = 0
    if prec + rec > 0:
        f = (2*prec*rec)/(prec+rec)

    err = (lines-right)/lines
    acc = 1 - err
    print "Acc", acc, "P", prec, "R", rec, "F", f

if __name__ == "__main__":
    blockdir = path(argv[1])

    model = blockdir/"model"
    feats = blockdir/"devfeats"

    predictions = StringIO()

    bettersystem(megam + " -predict " + model +
                 " -fvals binary " + feats, stdout=predictions,
                 stderr=StringIO())

    score(predictions, feats)
示例#7
0
    print "True clustering has", \
          len(set([pt.label for pt in test])), "clusters"
    print "Objective value of truth: %.3g" % objective(test, clMat)
    print

    for algorithm in ["first", "best", "vote", "pivot"]:
        evals = []

        print "================", algorithm, "================="
        print

        for run in range(10):
            cmd = "%s -a %s %s" % (tester, algorithm, classFileName)
            output = StringIO()
            status = bettersystem(cmd, stdout=output, stderr=StringIO())
            assert(status == 0)

            prop = readDataFile(output.getvalue().split("\n"))

            score = Eval(test, prop, clMat, filename=("run%d" % run))
            evals.append(score)
            print "Run", run
            print score

        print algorithm
        print "Best Objective:"
        print min(evals, key=lambda x: x.stats["objective"])

        print "Average:"
        print reduce(lambda x,y: x+y, evals).normalized(len(evals))
示例#8
0
        for power in range(1,4):
            for layer in range(9):
                dsize = (layer + 1) * 10**power
                print "Data: size", dsize
                if dsize >= 5000:
                    break

                for run in range(3):
                    print "Run:", run

                    ddir = workdir/("data%d-%d" % (dsize, run))
                    ddir.mkdir()

                    output = StringIO()
                    cmd = "python script/genGaussians.py -n %d -t 10 -f 3 -a 1"
                    bettersystem(cmd % dsize, stdout=output)

#                    bettersystem("python script/genFeatureless.py -n %d" %
#                                 dsize, stdout=output)

                    dfileName = ddir/"data"
                    dfile = file(dfileName, 'w')
                    dfile.write(output.getvalue()),
                    dfile.close()

                    output = StringIO()
                    bettersystem("python script/classify.py -c max-ent %s" %
                                 dfileName, stdout=output)

#                    bettersystem("python script/classify.py %s" %
#                                 dfileName, stdout=output)