def scenario1(D, sFileStub): f = open(sFileStub + ".scen1.results", "w") EMResults = [] for iOutertrial in range(numOuterTrials): f.write("outertrial: %d\n" % iOutertrial) f.write("likelihood,NMI\n") bestEM = [] bestLikelihood = 0 for iRestart in range(numInnerTrials): EMAlg = EM.cEM(D) EMAlg.bPPC = False EMAlg.EM(len(D.classlist)) if iRestart == 0 or EMAlg.dEMLikelihood > bestLikelihood: bestLikelihood = EMAlg.dEMLikelihood bestEM = EMAlg EMResults.append(bestEM) f.write("%f,%f\n" % (bestLikelihood, utils.evaluateEM_NMI(D, EMAlg) ) ) f.flush() f.close() return EMResults
# random pairwise constraints and watch to see our # NMI increase import pickle import EM import cData import sys if len(sys.argv) < 2: print "provide filename and optionally a filename for the pickle of centers" exit(1) # use the same code for getting initial points as baturay did D = cData.cData(sys.argv[1]) D.setType("2", "random") EmAlg = EM.cEM(D) EmAlg.EM(len(D.classlist)) EmAlg.bPPC = True #Creates clusters depending on what EM guessed. D.createClusters(EmAlg) #Finds the outerpoints and the midpoints and assigns them in emclusters. D.repPoints(EmAlg) #This makes the algorithm start with good initial points. EmAlg = D.goodInitial(EmAlg) print "pickling starting position to: ", picklefname = "pickles/"+sys.argv[1].split('/')[-1]+".pickle" if len(sys.argv) > 2: picklefname = sys.argv[2] f = open(picklefname,"w") l = EmAlg.lCenters
def run(): D = cData.cData("data/winenorm3_pyre.csv") E = EM.cEM(D) E.EM(3)
import numpy import EM import cData import utils # run EM several times and get the likelihood for iRestart in range(20): D = cData.cData("data/winenorm3_pyre.csv") # D = cData.cData("data/normvert.csv") M = EM.cEM(D) M.bPPC = False M.EM(3) print M.dEMLikelihood, print " nmi: ", print utils.evaluateEM_NMI(D, M)