ss = [0]*K i = 0 for row in reader: i += 1 if (random.random() < float(options.sampleRate)): data = map(float, row) if (len(data) != K): print "Error: there are " + str(K) + " categories, but line has " + str(len(data)) + " counts." print "line " + str(i) + ": " + str(data) for k in range(0, K): ss[k] += math.log(data[k]) if (i % 1000000) == 0: print "Loading Data", i for k in range(0, K): ss[k] /= i dataLoadTime = time.time() logging.debug("all data loaded into memory") logging.debug("time to load memory: ", dataLoadTime - startTime) priors = DE.findDirichletPriors(ss, priors) print "Final priors: ", priors logging.debug("Final average loss:", DE.getTotalLoss(priors, ss)) logging.debug("best loss: ", DE.getTotalLoss([1,2], ss)) totalTime = time.time() - dataLoadTime logging.debug("Time to calculate: " + str(totalTime))
i = 0 for row in reader: i += 1 if (random.random() < float(options.sampleRate)): data = map(float, row) if (len(data) != K): print "Error: there are " + str( K) + " categories, but line has " + str(len(data)) + " counts." print "line " + str(i) + ": " + str(data) for k in range(0, K): ss[k] += math.log(data[k]) if (i % 1000000) == 0: print "Loading Data", i for k in range(0, K): ss[k] /= i dataLoadTime = time.time() logging.debug("all data loaded into memory") logging.debug("time to load memory: ", dataLoadTime - startTime) priors = DE.findDirichletPriors(ss, priors) print "Final priors: ", priors logging.debug("Final average loss:", DE.getTotalLoss(priors, ss)) logging.debug("best loss: ", DE.getTotalLoss([1, 2], ss)) totalTime = time.time() - dataLoadTime logging.debug("Time to calculate: " + str(totalTime))
uMatrix = Sample.generateRandomDataset(M, N, alphas) vVector = [N] * M init = [1.0 / K] * K MLEPriors = DME.findDirichletPriors(uMatrix, vVector, init, False) errors.append(getError(alphas, MLEPriors)) errors.sort() print "\t".join( map(str, [N, M, errors[300], errors[500], errors[700], errors[900] ])) # Test the M = infinity case errors = [] for i in range(0, 1000): ss = Sample.generateRandomDirichletsSS(N, alphas) init = [1.0 / K] * K MLEPriors = DE.findDirichletPriors(ss, init, False) error = getError(alphas, MLEPriors) errors.append(error) errors.sort() print "\t".join( map(str, [N, "Inf", errors[300], errors[500], errors[700], errors[900] ]))
print K = len(alphas) for M in [5]: errors = [] for i in range(0, 1000): uMatrix = Sample.generateRandomDataset(M, N, alphas) vVector = [N]*M init = [1.0 / K]*K MLEPriors = DME.findDirichletPriors(uMatrix, vVector, init, False) errors.append(getError(alphas, MLEPriors)) errors.sort() print "\t".join(map(str, [N, M, errors[300], errors[500], errors[700], errors[900]])) # Test the M = infinity case errors = [] for i in range(0, 1000): ss = Sample.generateRandomDirichletsSS(N, alphas) init = [1.0 / K]*K MLEPriors = DE.findDirichletPriors(ss, init, False) error = getError(alphas, MLEPriors) errors.append(error) errors.sort() print "\t".join(map(str, [N, "Inf", errors[300], errors[500], errors[700], errors[900]]))