Exemplo n.º 1
0
def buildMixtureTreeMaxKL(data, K, iterations, maxKL, branchesPerNode = 2, multinomial = None):
  logging.info("Dataset Size: " + str(len(data)))

  if (multinomial):
    (worstKL, worstN) = MME.worstFitForSingleMultinomial(data, multinomial)
    logging.info("Worst KL found in branch: " + str(worstKL))
    if (worstKL < maxKL): return None

  if (len(data) < branchesPerNode): return None

  # hyperparameters are fixed here:
  hyperP = MME.MultinomialMixtureModelHyperparams(branchesPerNode, K, [1.0 / branchesPerNode]*branchesPerNode, [1.0 / K]*K)

  mixtureModel = MME.computeDirichletMixture(data, hyperP, iterations)
  
  smallerDatasets = []
  for c in range(0, hyperP.C): smallerDatasets.append([])

  for counts in data:
    c = MME.assignComponentToCounts(counts, mixtureModel)
    smallerDatasets[c].append(counts)

  treeModel = MultinomialMixtureTree(mixtureModel)

  for c in range(0, hyperP.C):
    smallerDataset = smallerDatasets[c]
    child = buildMixtureTreeMaxKL(smallerDataset, K, iterations, maxKL, branchesPerNode, mixtureModel.multinomials[c])
    treeModel.mixtureNodes[c] = child

  return treeModel
Exemplo n.º 2
0
def buildSimpleMixtureTree(data, K, iterations, height, branchesPerNode = 2):
  if (height == 0): return None

  # hyperparameters are fixed here:
  hyperP = MME.MultinomialMixtureModelHyperparams(branchesPerNode, K, [1.0 / branchesPerNode]*branchesPerNode, [1.0 / K]*K)

  mixtureModel = MME.computeDirichletMixture(data, hyperP, iterations)
  
  smallerDatasets = []
  for c in range(0, hyperP.C): smallerDatasets.append([])

  for counts in data:
    c = MME.assignComponentToCounts(counts, mixtureModel)
    smallerDatasets[c].append(counts)

  treeModel = MultinomialMixtureTree(mixtureModel)

  for c in range(0, hyperP.C):
    smallerDataset = smallerDatasets[c]
    child = buildSimpleMixtureTree(smallerDataset, K, iterations, height - 1, branchesPerNode)
    treeModel.mixtureNodes[c] = child

  return treeModel
Exemplo n.º 3
0
#!/usr/bin/python

import multinomialMixtureEstimation as MME
import logging
logging.basicConfig(level=logging.DEBUG)

model = MME.importFile("sampleModel.txt")

dataset = []
for i in range(0, 500): dataset.append(model.sampleRow(8))

hyperP = MME.MultinomialMixtureModelHyperparams(2, 3, [1, 1], [1, 1, 1])

finalModel = MME.computeDirichletMixture(dataset, hyperP, 10)

print "Final Model:"
print finalModel.mixture
print finalModel.multinomials
Exemplo n.º 4
0
    raise ValueError('Invalid log level: %s' % loglevel)
logging.basicConfig(level=numeric_level)

C = int(options.C)
iterations = int(options.I)

print "init dataset"
dataset = []
N = 0
for row in sys.stdin:
    if (N % 100000 == 0): print "processed " + str(N) + " rows."
    splitrow = row.split("\t")
    dataset.append(map(int, splitrow))
    N += 1
print "finished dataset"

hyperP = MME.MultinomialMixtureModelHyperparams(C, 168, [1] * C, [1] * 168)

finalModel = MME.computeDirichletMixture(dataset, hyperP, iterations)

logging.debug("Final Model:")
outputModel = sys.stdin
if (options.outputModel): outputModel = open(options.outputModel, 'w')
finalModel.outputToFile(outputModel)

finalModel.outputToTSV(sys.stdout)

(worseLogProb, worstN, worstC) = MME.worstFit(dataset, finalModel)
print "worstLogProb", worseLogProb
print "worst N", worstN
print "worst C", worstC
Exemplo n.º 5
0
    raise ValueError('Invalid log level: %s' % loglevel)
logging.basicConfig(level=numeric_level)

C = int(options.C)
iterations = int(options.I)

print "init dataset"
dataset = []
N = 0
for row in sys.stdin:
  if (N % 100000 == 0): print "processed " + str(N) + " rows."
  splitrow = row.split("\t")
  dataset.append(map(int, splitrow))
  N += 1
print "finished dataset"

hyperP = MME.MultinomialMixtureModelHyperparams(C, 168, [1]*C, [1]*168)

finalModel = MME.computeDirichletMixture(dataset, hyperP, iterations)

logging.debug("Final Model:")
outputModel = sys.stdin
if (options.outputModel): outputModel = open(options.outputModel, 'w')
finalModel.outputToFile(outputModel)

finalModel.outputToTSV(sys.stdout)

(worseLogProb, worstN, worstC) = MME.worstFit(dataset, finalModel)
print "worstLogProb", worseLogProb
print "worst N", worstN
print "worst C", worstC
Exemplo n.º 6
0
#!/usr/bin/python

import multinomialMixtureEstimation as MME
import logging

logging.basicConfig(level=logging.DEBUG)

model = MME.importFile("sampleModel.txt")

dataset = []
for i in range(0, 500):
    dataset.append(model.sampleRow(8))

hyperP = MME.MultinomialMixtureModelHyperparams(2, 3, [1, 1], [1, 1, 1])

finalModel = MME.computeDirichletMixture(dataset, hyperP, 10)

print "Final Model:"
print finalModel.mixture
print finalModel.multinomials