예제 #1
def doTest(name, gen):
    # Train the model...
    df = DF()
    df.setGoal(DensityGaussian(2))  # 2 = # of features
    )  # Playing around shows that this is probably the most important number to get right when doing density estimation - the information gain heuristic just doesn't know when to stop.

    global es
    pb = ProgBar()
        32, es, callback=pb.callback, mp=doMP
    )  # 32 = number of trees to learn - you need a lot to get a good answer.
    del pb

    # Drop some stats...
    print '%i trees containing %i nodes.\nAverage error is %.3f.' % (
        df.size(), df.nodes(), df.error())

    # Visualise the density estimate...
    global img
    testSet = numpy.empty((pixel_width, 2), dtype=numpy.float32)
    pb = ProgBar()

    for y in xrange(pixel_width):
        pb.callback(y, pixel_width)
        i = 0
        for x in xrange(pixel_width):
            testSet[i, 0] = axis_half_width * float(
                x - pixel_half_width) / pixel_half_width
            testSet[i, 1] = axis_half_width * float(
                y - pixel_half_width) / pixel_half_width
            i += 1

        test = MatrixES(testSet)
        res = df.evaluate(test, mp=doMP)

        i = 0
        for x in xrange(pixel_width):
            img[y, x, :] = res[i]
            i += 1

    del pb

    print 'Maximum probability = %.2f' % img.max()
    img /= img.max()
    cv.SaveImage('test_de_circle_%s.png' % name, array2cv(img * 255))
예제 #4
                   lambda: ms.set_scale(numpy.array([5.0, 5.0]))),
                  ('Silverman', ms.scale_silverman), ('Scott', ms.scale_scott),
                  ('loo_nll', scale_loo_nll)]:
    # Calculate and print out the scales...
    print '<', name, '>'
    print 'Scale:', ms.get_scale()
    print 'loo nll for this scale =', ms.loo_nll()
    mean, sd = ms.stats()
    print 'mean = (%f, %f); sd = (%f, %f)' % (mean[0], mean[1], sd[0], sd[1])

    # Render out a normalised probability map...
    image = numpy.zeros((dim, dim, 3), dtype=numpy.float32)

    p = ProgBar()
    for row in xrange(dim):
        p.callback(row, dim)
        sam = numpy.append(numpy.linspace(-size, size, dim).reshape(
            (-1, 1)), ((row / (dim - 1.0) - 0.5) * 2.0 * size) *
                           numpy.ones(dim).reshape((-1, 1)),
        image[row, :, :] = ms.probs(sam).reshape((-1, 1))
    del p

    print 'Largest sampled probability =', image.max()
    image *= 255.0 / image.max()

    image = array2cv(image)
    cv.SaveImage('bandwidth_%s.png' % name, image)
def doRun(tdc):
    # Create a corpus...
    c = lda.Corpus(4)
    c.setWordCount(identCount() * 4)

    for i in xrange(tdc):
        dic, abn = genDoc()

        nDic = dict()
        for key, item in dic.iteritems():
            nDic[key[0] * 4 + key[1]] = item

        doc = lda.Document(nDic)
        doc.abn = abn

    # Fit a model...
    params = lda.Params()

    print 'Fitting model...'
    p = ProgBar()
    c.fit(params, p.callback)
    del p

    tw = c.topicsWords()

    # Test on a bunch of documents, creating a list of abnormality score/actually an abnormality pairs...
    ab_gt = []
    print 'Testing...'
    p = ProgBar()
    for i in xrange(testDocCount):
        p.callback(i, testDocCount)
        dic, abn = genDoc()

        nDic = dict()
        for key, item in dic.iteritems():
            nDic[key[0] * 4 + key[1]] = item

        doc = lda.Document(nDic)
        ab_gt.append((doc.negLogLikelihood(tw), abn))
    del p


    # Use the pairs to construct a roc...
    posCount = len(filter(lambda p: p[1] == True, ab_gt))
    negCount = len(ab_gt) - posCount
    print 'positive samples = ', posCount
    print 'negative samples = ', negCount

    truePos = 0
    falsePos = 0
    trueNeg = negCount
    falseNeg = posCount

    roc = []

    for p in ab_gt:
        if p[1]:
            truePos += 1
            falseNeg -= 1
            falsePos += 1
            trueNeg -= 1

        pnt = (float(falsePos) / float(falsePos + trueNeg),
               float(truePos) / float(truePos + falseNeg))

    # Save the roc to disk...
    if not sweep:
        f = open('junction_roc.txt', 'w')
        f.write('0.0 0.0\n')
        for pnt in roc:
            f.write('%f %f\n' % pnt)

    # Calculate and print out the area under the roc...
    area = 0.0
    for i in xrange(1, len(roc)):
        area += 0.5 * (roc[i - 1][1] + roc[i][1]) * (roc[i][0] - roc[i - 1][0])
    print 'area under roc =', area, '(above', (1.0 - area), ')'

    return area
예제 #14
파일: test_iris.py 프로젝트: zoginni/helit
p = ProgBar()
loo = PrecisionLOO()
for i in xrange(data.getVectors().shape[0]):
    loo.addSample(numpy.reshape(data.getVectors()[i], (1, 1)))
precision = loo.getBest()
del p

print 'Optimal standard deviation = %s' % str(math.sqrt(1.0 / precision[0, 0]))

# Create and fill the pool...
print 'Filling the pool...'
pool = Pool()
p = ProgBar()
for i in xrange(data.getVectors().shape[0]):
    p.callback(i, data.getVectors().shape[0])
    pool.store(numpy.reshape(data.getVectors()[i], (1, )),
del p

# Create the classifier...
classifier = ClassifyKDE(precision)

# Calculate the dimensions for the visualisations...
low = data.getVectors().min()
high = data.getVectors().max()
low -= 0.2 * (high - low)
high += 0.2 * (high - low)

# Quickly visualise the dataset as lines in an image...
weight = 1.0
def doRun(tdc):
  # Create directory to put images into...
  if not sweep:

  # Create a corpus...    
  c = rlda.Corpus(10,4)

  for i in xrange(tdc):
    dic, abn = genDoc(False)
    doc = rlda.Document(dic)
    doc.abn = abn

    if not sweep:
      prob = numpy.zeros((6,6,4),dtype=numpy.float_)
      for key,item in dic.iteritems():
        x,y = identToCoord(key[0])
        prob[x,y,key[1]] = item

      multProb = 255.0/prob.max()
      img = cv.CreateImage((6*25,6*25),cv.IPL_DEPTH_32F,3)
      for y in xrange(6):
        for x in xrange(6):
          coords = [(x*25,y*25),((x+1)*25,y*25),((x+1)*25,(y+1)*25),(x*25,(y+1)*25)]
          centre = (x*25+12,y*25+12)
          for d in xrange(4):
            if d%2==0:
              col = cv.RGB(0.0,prob[x,y,d]*multProb,0.0)
              col = cv.RGB(prob[x,y,d]*multProb,0.0,0.0)
            cv.FillPoly(img, [(coords[d],coords[(d+1)%4],centre)], col)


  # Fit a model...
  params = rlda.Params()

  print 'Fitting model...'
  p = ProgBar()
  del p

  ir = c.getIR()
  wrt = c.getWRT()

  # Visualise the regions...
  if not sweep:
    mult = 255.0/ir.max()
    for r in xrange(ir.shape[1]):
      rend = numpy.zeros((6,6),dtype=numpy.float_)
      for i in xrange(ir.shape[0]): rend[identToCoord(i)] = ir[i,r] * mult
      rend = numpy.repeat(numpy.repeat(rend,25,axis=0),25,axis=1)

  # Visualise the topics...
  if not sweep:
    for t in xrange(wrt.shape[2]):
      prob = numpy.zeros((6,6,4),dtype=numpy.float_)
      for i in xrange(ir.shape[0]):
        x,y = identToCoord(i)
        for r in xrange(wrt.shape[1]):
          for w in xrange(wrt.shape[0]):
            prob[x,y,w] += ir[i,r] * wrt[w,r,t]

      multProb = 255.0/prob.max()
      img = cv.CreateImage((6*25,6*25),cv.IPL_DEPTH_32F,3)
      for y in xrange(6):
        for x in xrange(6):
          coords = [(x*25,y*25),((x+1)*25,y*25),((x+1)*25,(y+1)*25),(x*25,(y+1)*25)]
          centre = (x*25+12,y*25+12)
          for d in xrange(4):
            if d%2==0:
              col = cv.RGB(0.0,prob[x,y,d]*multProb,0.0)
              col = cv.RGB(prob[x,y,d]*multProb,0.0,0.0)
            cv.FillPoly(img, [(coords[d],coords[(d+1)%4],centre)], col)

  # Test on a bunch of documents, creating a list of abnormality score/actually an abnormality pairs...
  ab_gt = []
  print 'Testing...'
  p = ProgBar()
  for i in xrange(testDocCount):
    dic, abn = genDoc()
    doc = rlda.Document(dic)
  del p


  # Use the pairs to construct a roc...
  posCount = len(filter(lambda p:p[1]==True,ab_gt))
  negCount = len(ab_gt) - posCount
  print 'positive samples = ',posCount
  print 'negative samples = ',negCount

  truePos = 0
  falsePos = 0
  trueNeg = negCount
  falseNeg = posCount

  roc = []

  for p in ab_gt:
    if p[1]:
      truePos += 1
      falseNeg -= 1
      falsePos +=1
      trueNeg -= 1

    pnt = (float(falsePos)/float(falsePos+trueNeg), float(truePos)/float(truePos+falseNeg))

  # Save the roc to disk...
  if not sweep:
    f = open('junction_roc.txt','w')
    f.write('0.0 0.0\n')
    for pnt in roc: f.write('%f %f\n'%pnt)

  # Calculate and print out the area under the roc...
  area = 0.0
  for i in xrange(1,len(roc)):
    area += 0.5*(roc[i-1][1]+roc[i][1]) * (roc[i][0]-roc[i-1][0])
  print 'area under roc =',area, '(above',(1.0-area),')'

  return area
