Exemplo n.º 1
0
def doTest(name, gen):
    # Train the model...
    df = DF()
    df.setGoal(DensityGaussian(2))  # 2 = # of features
    df.setGen(gen)
    df.getPruner().setMinTrain(
        48
    )  # Playing around shows that this is probably the most important number to get right when doing density estimation - the information gain heuristic just doesn't know when to stop.

    global es
    pb = ProgBar()
    df.learn(
        32, es, callback=pb.callback, mp=doMP
    )  # 32 = number of trees to learn - you need a lot to get a good answer.
    del pb

    # Drop some stats...
    print '%i trees containing %i nodes.\nAverage error is %.3f.' % (
        df.size(), df.nodes(), df.error())

    # Visualise the density estimate...
    global img
    testSet = numpy.empty((pixel_width, 2), dtype=numpy.float32)
    pb = ProgBar()

    for y in xrange(pixel_width):
        pb.callback(y, pixel_width)
        i = 0
        for x in xrange(pixel_width):
            testSet[i, 0] = axis_half_width * float(
                x - pixel_half_width) / pixel_half_width
            testSet[i, 1] = axis_half_width * float(
                y - pixel_half_width) / pixel_half_width
            i += 1

        test = MatrixES(testSet)
        res = df.evaluate(test, mp=doMP)

        i = 0
        for x in xrange(pixel_width):
            img[y, x, :] = res[i]
            i += 1

    del pb

    print 'Maximum probability = %.2f' % img.max()
    img /= img.max()
    cv.SaveImage('test_de_circle_%s.png' % name, array2cv(img * 255))
Exemplo n.º 2
0
def doTest(name, gen):
  # Train the model...
  df = DF()
  df.setGoal(DensityGaussian(2)) # 2 = # of features
  df.setGen(gen)
  df.getPruner().setMinTrain(48) # Playing around shows that this is probably the most important number to get right when doing density estimation - the information gain heuristic just doesn't know when to stop.
  
  global es
  pb = ProgBar()
  df.learn(32, es, callback = pb.callback, mp=doMP) # 32 = number of trees to learn - you need a lot to get a good answer.
  del pb
  
  # Drop some stats...
  print '%i trees containing %i nodes.\nAverage error is %.3f.'%(df.size(), df.nodes(), df.error())
  
  # Visualise the density estimate...
  global img
  testSet = numpy.empty((pixel_width,2), dtype=numpy.float32)
  pb = ProgBar()
  
  for y in xrange(pixel_width):
    pb.callback(y,pixel_width)
    i = 0
    for x in xrange(pixel_width):
      testSet[i,0] = axis_half_width * float(x - pixel_half_width) / pixel_half_width
      testSet[i,1] = axis_half_width * float(y - pixel_half_width) / pixel_half_width
      i += 1
    
    test = MatrixES(testSet)
    res = df.evaluate(test, mp=doMP)
    
    i = 0
    for x in xrange(pixel_width):
      img[y,x,:] = res[i]
      i += 1
    
  del pb
  
  print 'Maximum probability = %.2f'%img.max()
  img /= img.max()
  cv.SaveImage('test_de_circle_%s.png'%name,array2cv(img*255))
Exemplo n.º 3
0
def doRun(tdc):
  # Create a corpus...
  c = lda.Corpus(4)
  c.setWordCount(identCount()*4)

  for i in xrange(tdc):
    dic, abn = genDoc()
    
    nDic = dict()
    for key,item in dic.iteritems(): nDic[key[0]*4+key[1]] = item
    
    doc = lda.Document(nDic)
    doc.abn = abn
    c.add(doc)


  # Fit a model...
  params = lda.Params()
  params.setRuns(16)

  print 'Fitting model...'
  p = ProgBar()
  c.fit(params,p.callback)
  del p

  tw = c.topicsWords()


  # Test on a bunch of documents, creating a list of abnormality score/actually an abnormality pairs...
  ab_gt = []
  print 'Testing...'
  p = ProgBar()
  for i in xrange(testDocCount):
    p.callback(i,testDocCount)
    dic, abn = genDoc()

    nDic = dict()
    for key,item in dic.iteritems(): nDic[key[0]*4+key[1]] = item
    
    doc = lda.Document(nDic)
    doc.fit(tw)
    ab_gt.append((doc.negLogLikelihood(tw),abn))
  del p

  ab_gt.sort(reverse=True)


  # Use the pairs to construct a roc...
  posCount = len(filter(lambda p:p[1]==True,ab_gt))
  negCount = len(ab_gt) - posCount
  print 'positive samples = ',posCount
  print 'negative samples = ',negCount

  truePos = 0
  falsePos = 0
  trueNeg = negCount
  falseNeg = posCount

  roc = []

  for p in ab_gt:
    if p[1]:
      truePos += 1
      falseNeg -= 1
    else:
      falsePos +=1
      trueNeg -= 1

    pnt = (float(falsePos)/float(falsePos+trueNeg), float(truePos)/float(truePos+falseNeg))
    roc.append(pnt)


  # Save the roc to disk...
  if not sweep:
    f = open('junction_roc.txt','w')
    f.write('0.0 0.0\n')
    for pnt in roc: f.write('%f %f\n'%pnt)
    f.close()


  # Calculate and print out the area under the roc...
  area = 0.0
  for i in xrange(1,len(roc)):
    area += 0.5*(roc[i-1][1]+roc[i][1]) * (roc[i][0]-roc[i-1][0])
  print 'area under roc =',area, '(above',(1.0-area),')'

  return area
Exemplo n.º 4
0
                   lambda: ms.set_scale(numpy.array([5.0, 5.0]))),
                  ('Silverman', ms.scale_silverman), ('Scott', ms.scale_scott),
                  ('loo_nll', scale_loo_nll)]:
    # Calculate and print out the scales...
    print '<', name, '>'
    alg()
    print 'Scale:', ms.get_scale()
    print 'loo nll for this scale =', ms.loo_nll()
    mean, sd = ms.stats()
    print 'mean = (%f, %f); sd = (%f, %f)' % (mean[0], mean[1], sd[0], sd[1])

    # Render out a normalised probability map...
    image = numpy.zeros((dim, dim, 3), dtype=numpy.float32)

    p = ProgBar()
    for row in xrange(dim):
        p.callback(row, dim)
        sam = numpy.append(numpy.linspace(-size, size, dim).reshape(
            (-1, 1)), ((row / (dim - 1.0) - 0.5) * 2.0 * size) *
                           numpy.ones(dim).reshape((-1, 1)),
                           axis=1)
        image[row, :, :] = ms.probs(sam).reshape((-1, 1))
    del p

    print 'Largest sampled probability =', image.max()
    image *= 255.0 / image.max()

    image = array2cv(image)
    cv.SaveImage('bandwidth_%s.png' % name, image)
    print
Exemplo n.º 5
0

# Now create some documents and test them...
# Below indexed by [actual, result], with 0 to mean normal and 1 for abnormal...
print 'Testing model on new documents...'
result = numpy.zeros((6,6),dtype=numpy.int32)

testCounts = [normal_doc_test, square_doc_test, hor_star_doc_test, vert_star_doc_test, hor_in_vert_doc_test, vert_in_hor_doc_test]

p = ProgBar()
stepsDone = 0
stepsTotal = sum(testCounts)
for ab in ([None] + [x for x in sam.getAbnormDict().iterkeys()]):
  c = behClasses.index(ab)
  for _ in xrange(testCounts[c]):
    p.callback(stepsDone,stepsTotal)
    stepsDone += 1

    doc = ddhdp.Document(sampleDocument(ab))
    abnormList = model.mlDocAbnorm(doc, lone = True, cap = 0) ######## Cap is super low for testing.

    truth = c
    guess = 0
    if len(abnormList)!=0:
      guess = behClasses.index(abnormList[0]) # Doesn't handle it thinking that multiple abnormalities are present.

    result[truth,guess] += 1
del p


print 'Confusion matrix:'
Exemplo n.º 6
0
  print 'Converting... (%i pixels at a time)'%step

slices = map(lambda x: slice(x*step, (x+1)*step), xrange(data.shape[0]//step + 1))

if slices[-1].stop<data.shape[0]:
  slices.append(slice(slices[-1].stop, data.shape[0]))

## Calculate each channel in turn...
out = data.copy()

for cc in xrange(3):
  if not args.quiet:
    print 'Converting - %s...'%(['red', 'green', 'blue'][cc])
  p = ProgBar()
  for i,s in enumerate(slices):
    p.callback(i, len(slices))
    out[s,cc] = model[cc](data[s,:].astype(numpy.float32))
  del p

## Expand the data matrix back up to the order and length of the image, by expanding duplicates...
source = numpy.cumsum(keep) - 1
out = out[source,:]
out = out[numpy.argsort(index),:]
  
## Convert back from data matrix to image...
out = out.reshape(image.shape)



# Clamp unreasonable values, record where clamping occurs...
if not args.quiet:
Exemplo n.º 7
0
# Choose a reasonable size...
print 'Selecting size using loo:'
p = ProgBar()
ms.scale_loo_nll(callback = p.callback)
del p



# Plot the pdf, for reference...
image = numpy.zeros((pixels, pixels, 3), dtype=numpy.float32)

print 'Rendering probability map:'
p = ProgBar()
for row in xrange(pixels):
  p.callback(row, pixels)
  sam = numpy.append(numpy.linspace(0.0, 1.0, pixels).reshape((-1,1)), (row / float(pixels-1)) * numpy.ones(pixels).reshape((-1,1)), axis=1)
  image[row, :, :] = ms.probs(sam).reshape((-1,1))
del p

image *= 255.0 / image.max()
image = array2cv(image)
cv.SaveImage('draw_weighted_density.png', image)



# Draw a bunch of points from it and plot them...
samples = numpy.random.randint(16, 512)
draw = ms.draws(1024)

image = numpy.zeros((pixels, pixels, 3), dtype=numpy.float32)
Exemplo n.º 8
0
def doRun(tdc):
    # Create a corpus...
    c = lda.Corpus(4)
    c.setWordCount(identCount() * 4)

    for i in xrange(tdc):
        dic, abn = genDoc()

        nDic = dict()
        for key, item in dic.iteritems():
            nDic[key[0] * 4 + key[1]] = item

        doc = lda.Document(nDic)
        doc.abn = abn
        c.add(doc)

    # Fit a model...
    params = lda.Params()
    params.setRuns(16)

    print 'Fitting model...'
    p = ProgBar()
    c.fit(params, p.callback)
    del p

    tw = c.topicsWords()

    # Test on a bunch of documents, creating a list of abnormality score/actually an abnormality pairs...
    ab_gt = []
    print 'Testing...'
    p = ProgBar()
    for i in xrange(testDocCount):
        p.callback(i, testDocCount)
        dic, abn = genDoc()

        nDic = dict()
        for key, item in dic.iteritems():
            nDic[key[0] * 4 + key[1]] = item

        doc = lda.Document(nDic)
        doc.fit(tw)
        ab_gt.append((doc.negLogLikelihood(tw), abn))
    del p

    ab_gt.sort(reverse=True)

    # Use the pairs to construct a roc...
    posCount = len(filter(lambda p: p[1] == True, ab_gt))
    negCount = len(ab_gt) - posCount
    print 'positive samples = ', posCount
    print 'negative samples = ', negCount

    truePos = 0
    falsePos = 0
    trueNeg = negCount
    falseNeg = posCount

    roc = []

    for p in ab_gt:
        if p[1]:
            truePos += 1
            falseNeg -= 1
        else:
            falsePos += 1
            trueNeg -= 1

        pnt = (float(falsePos) / float(falsePos + trueNeg),
               float(truePos) / float(truePos + falseNeg))
        roc.append(pnt)

    # Save the roc to disk...
    if not sweep:
        f = open('junction_roc.txt', 'w')
        f.write('0.0 0.0\n')
        for pnt in roc:
            f.write('%f %f\n' % pnt)
        f.close()

    # Calculate and print out the area under the roc...
    area = 0.0
    for i in xrange(1, len(roc)):
        area += 0.5 * (roc[i - 1][1] + roc[i][1]) * (roc[i][0] - roc[i - 1][0])
    print 'area under roc =', area, '(above', (1.0 - area), ')'

    return area
Exemplo n.º 9
0
    ms = map(to_ms, samples)

    # Infer a good loo value for the first one, then set them all to the same...
    p = ProgBar()
    ms[0].scale_loo_nll(callback=p.callback)
    del p

    for i in xrange(1, 4):
        ms[i].copy_scale(ms[0])

    # Visualise the distributions using KDE...
    imgs = []
    p = ProgBar()
    for i in xrange(4):
        p.callback(i, 4)
        img = numpy.zeros((draw_scale * size[0], draw_scale * size[1]),
                          dtype=numpy.float32)

        sweep0 = numpy.linspace(0, size[0], img.shape[0])
        sweep1 = numpy.linspace(0, size[1], img.shape[1])

        for ij, j in enumerate(sweep0):
            points = numpy.append(j * numpy.ones(sweep1.shape[0]).reshape(
                (-1, 1)),
                                  sweep1.reshape((-1, 1)),
                                  axis=1)
            img[ij, :] = ms[i].probs(points)

        img *= 255.0 / img.max()
        imgs.append(img)
Exemplo n.º 10
0
  loo.addSample(numpy.reshape(data.getVectors()[i], (1,1)))
loo.solve(p.callback)
precision = loo.getBest()
del p


print 'Optimal standard deviation = %s'%str(math.sqrt(1.0/precision[0,0]))



# Create and fill the pool...
print 'Filling the pool...'
pool = Pool()
p = ProgBar()
for i in xrange(data.getVectors().shape[0]):
  p.callback(i, data.getVectors().shape[0])
  pool.store(numpy.reshape(data.getVectors()[i], (1,)), data.getClasses()[i])
del p

# Create the classifier...
classifier = ClassifyKDE(precision)




# Calculate the dimensions for the visualisations...
low  = data.getVectors().min()
high = data.getVectors().max()
low  -= 0.2*(high-low)
high += 0.2*(high-low)
Exemplo n.º 11
0
      if img[t_y,t_x,0] < t:
        img[t_y,t_x,:] = t
    except:
      pass

img = array2cv(255.0 * img)
cv.SaveImage('composite_draw.png', img)



# Visualise the probability - both spatial and rotational in a single image, with one colour channel each for 3 directions...
img = numpy.zeros((size, size, 3), dtype=numpy.float32)
p = ProgBar()

for y in xrange(size):
  p.callback(y, size)
  
  for index, orient_x, orient_y in [(0,1.0,0.0), (1,0.0,1.0), (2,-1.0,0.0)]:
    block = numpy.concatenate(((scale * y / float(size-1)) * numpy.ones(size).reshape((-1,1)), numpy.linspace(0.0, scale, size).reshape((-1,1)), orient_x * numpy.ones(size).reshape((-1,1)), orient_y * numpy.ones(size).reshape((-1,1))), axis=1)
    
    vals = ms.probs(block)
    img[y,:,index] = vals

del p

img *= 255 / img.max()
img = array2cv(img)
cv.SaveImage('composite_prob.png', img)


Exemplo n.º 12
0
slices = map(lambda x: slice(x * step, (x + 1) * step),
             xrange(data.shape[0] // step + 1))

if slices[-1].stop < data.shape[0]:
    slices.append(slice(slices[-1].stop, data.shape[0]))

## Calculate each channel in turn...
out = data.copy()

for cc in xrange(3):
    if not args.quiet:
        print 'Converting - %s...' % (['red', 'green', 'blue'][cc])
    p = ProgBar()
    for i, s in enumerate(slices):
        p.callback(i, len(slices))
        out[s, cc] = model[cc](data[s, :].astype(numpy.float32))
    del p

## Expand the data matrix back up to the order and length of the image, by expanding duplicates...
source = numpy.cumsum(keep) - 1
out = out[source, :]
out = out[numpy.argsort(index), :]

## Convert back from data matrix to image...
out = out.reshape(image.shape)

# Clamp unreasonable values, record where clamping occurs...
if not args.quiet:
    print 'Clamping...'
mask = numpy.zeros((out.shape[0], out.shape[1]), dtype=numpy.bool)
Exemplo n.º 13
0
        t_y = int(t * s_y + (1 - t) * e_y)
        try:
            if img[t_y, t_x, 0] < t:
                img[t_y, t_x, :] = t
        except:
            pass

img = array2cv(255.0 * img)
cv.SaveImage('composite_draw.png', img)

# Visualise the probability - both spatial and rotational in a single image, with one colour channel each for 3 directions...
img = numpy.zeros((size, size, 3), dtype=numpy.float32)
p = ProgBar()

for y in xrange(size):
    p.callback(y, size)

    for index, orient_x, orient_y in [(0, 1.0, 0.0), (1, 0.0, 1.0),
                                      (2, -1.0, 0.0)]:
        block = numpy.concatenate(
            ((scale * y / float(size - 1)) * numpy.ones(size).reshape(
                (-1, 1)), numpy.linspace(0.0, scale, size).reshape(
                    (-1, 1)), orient_x * numpy.ones(size).reshape(
                        (-1, 1)), orient_y * numpy.ones(size).reshape(
                            (-1, 1))),
            axis=1)

        vals = ms.probs(block)
        img[y, :, index] = vals

del p
Exemplo n.º 14
0
p = ProgBar()
loo = PrecisionLOO()
for i in xrange(data.getVectors().shape[0]):
    loo.addSample(numpy.reshape(data.getVectors()[i], (1, 1)))
loo.solve(p.callback)
precision = loo.getBest()
del p

print 'Optimal standard deviation = %s' % str(math.sqrt(1.0 / precision[0, 0]))

# Create and fill the pool...
print 'Filling the pool...'
pool = Pool()
p = ProgBar()
for i in xrange(data.getVectors().shape[0]):
    p.callback(i, data.getVectors().shape[0])
    pool.store(numpy.reshape(data.getVectors()[i], (1, )),
               data.getClasses()[i])
del p

# Create the classifier...
classifier = ClassifyKDE(precision)

# Calculate the dimensions for the visualisations...
low = data.getVectors().min()
high = data.getVectors().max()
low -= 0.2 * (high - low)
high += 0.2 * (high - low)

# Quickly visualise the dataset as lines in an image...
weight = 1.0
Exemplo n.º 15
0
def doRun(tdc):
  # Create a corpus...
  vlda = lda.VLDA(4, identCount()*4)

  abnDict = dict()
  for i in xrange(tdc):
    dic, abn = genDoc()
    
    nDic = dict()
    for key,item in dic.iteritems(): nDic[key[0]*4+key[1]] = item
    
    doc = vlda.add(nDic)
    abnDict[doc] = abn


  # Fit a model...
  print 'Fitting model...'
  p = ProgBar()
  vlda.solve()
  del p


  # Visualise the topics...
  if not sweep:
    for t in xrange(vlda.numTopics()):
      prob = numpy.zeros((6,6,4),dtype=numpy.float_)
      beta = vlda.getBeta(t)
      for i in xrange(beta.shape[0]):
        x,y = identToCoord(i//4)
        w = i%4
        prob[x,y,w] += beta[i]

      multProb = 255.0/prob.max()
      img = cv.CreateImage((6*25,6*25),cv.IPL_DEPTH_32F,3)
      for y in xrange(6):
        for x in xrange(6):
          coords = [(x*25,y*25),((x+1)*25,y*25),((x+1)*25,(y+1)*25),(x*25,(y+1)*25)]
          centre = (x*25+12,y*25+12)
          for d in xrange(4):
            if d%2==0:
              col = cv.RGB(0.0,prob[x,y,d]*multProb,0.0)
            else:
              col = cv.RGB(prob[x,y,d]*multProb,0.0,0.0)
            cv.FillPoly(img, [(coords[d],coords[(d+1)%4],centre)], col)

      cv.SaveImage('junction_topic_%i.png'%t,img)


  # Test on a bunch of documents, creating a list of abnormality score/actually an abnormality pairs...
  ab_gt = []
  print 'Testing...'
  p = ProgBar()
  for i in xrange(testDocCount):
    p.callback(i,testDocCount)
    dic, abn = genDoc()

    nDic = dict()
    for key,item in dic.iteritems(): nDic[key[0]*4+key[1]] = item
    
    nll = vlda.getNewNLL(nDic)
    ab_gt.append((nll,abn))
  del p

  ab_gt.sort(reverse=True)


  # Use the pairs to construct a roc...
  posCount = len(filter(lambda p:p[1]==True,ab_gt))
  negCount = len(ab_gt) - posCount
  print 'positive samples = ',posCount
  print 'negative samples = ',negCount

  truePos = 0
  falsePos = 0
  trueNeg = negCount
  falseNeg = posCount

  roc = []

  for p in ab_gt:
    if p[1]:
      truePos += 1
      falseNeg -= 1
    else:
      falsePos +=1
      trueNeg -= 1

    pnt = (float(falsePos)/float(falsePos+trueNeg), float(truePos)/float(truePos+falseNeg))
    roc.append(pnt)


  # Save the roc to disk...
  if not sweep:
    f = open('junction_roc.csv','w')
    f.write('0.0, 0.0\n')
    for pnt in roc: f.write('%f, %f\n'%pnt)
    f.close()


  # Calculate and print out the area under the roc...
  area = 0.0
  for i in xrange(1,len(roc)):
    area += 0.5*(roc[i-1][1]+roc[i][1]) * (roc[i][0]-roc[i-1][0])
  print 'area under roc =',area, '(above',(1.0-area),')'

  return area
Exemplo n.º 16
0
# Choose a reasonable size...
print 'Selecting size using loo:'
p = ProgBar()
ms.scale_loo_nll(callback = p.callback)
del p



# Render out a normalised probability map...
image = numpy.zeros((dim, dim, 3), dtype=numpy.float32)

print 'Rendering probability map:'
p = ProgBar()
for row in xrange(dim):
  p.callback(row, dim)
  sam = numpy.append(numpy.linspace(-size, size, dim).reshape((-1,1)), ((row / (dim-1.0) - 0.5) * 2.0 * size) * numpy.ones(dim).reshape((-1,1)), axis=1)
  image[row, :, :] = ms.probs(sam).reshape((-1,1))
del p

image *= 255.0 / image.max()
image = array2cv(image)
cv.SaveImage('draw_density.png', image)



# Draw a new set of samples from the KDE approximation of the distribution, and visualise...
draw = ms.draws(data.shape[0])

image = numpy.zeros((dim, dim, 3), dtype=numpy.float32)
Exemplo n.º 17
0
def doRun(tdc):
  # Create directory to put images into...
  if not sweep:
    try:
      os.makedirs('junction')
    except:
      pass

    
  # Create a corpus...    
  c = rlda.Corpus(10,4)
  c.setIdentWordCounts(identCount(),4)

  for i in xrange(tdc):
    dic, abn = genDoc(False)
    doc = rlda.Document(dic)
    doc.abn = abn
    c.add(doc)

    if not sweep:
      prob = numpy.zeros((6,6,4),dtype=numpy.float_)
      for key,item in dic.iteritems():
        x,y = identToCoord(key[0])
        prob[x,y,key[1]] = item

      multProb = 255.0/prob.max()
      img = cv.CreateImage((6*25,6*25),cv.IPL_DEPTH_32F,3)
      for y in xrange(6):
        for x in xrange(6):
          coords = [(x*25,y*25),((x+1)*25,y*25),((x+1)*25,(y+1)*25),(x*25,(y+1)*25)]
          centre = (x*25+12,y*25+12)
          for d in xrange(4):
            if d%2==0:
              col = cv.RGB(0.0,prob[x,y,d]*multProb,0.0)
            else:
              col = cv.RGB(prob[x,y,d]*multProb,0.0,0.0)
            cv.FillPoly(img, [(coords[d],coords[(d+1)%4],centre)], col)

      cv.SaveImage('junction/xdoc_%i_%s.png'%(i,str(abn)),img)


  # Fit a model...
  params = rlda.Params()
  params.setRuns(16)

  print 'Fitting model...'
  p = ProgBar()
  c.fit(params,p.callback)
  del p

  ir = c.getIR()
  wrt = c.getWRT()


  # Visualise the regions...
  if not sweep:
    mult = 255.0/ir.max()
    for r in xrange(ir.shape[1]):
      rend = numpy.zeros((6,6),dtype=numpy.float_)
      for i in xrange(ir.shape[0]): rend[identToCoord(i)] = ir[i,r] * mult
      rend = numpy.repeat(numpy.repeat(rend,25,axis=0),25,axis=1)
      cv.SaveImage('junction/region_%i.png'%r,array2cv(rend))
        


  # Visualise the topics...
  if not sweep:
    for t in xrange(wrt.shape[2]):
      prob = numpy.zeros((6,6,4),dtype=numpy.float_)
      for i in xrange(ir.shape[0]):
        x,y = identToCoord(i)
        for r in xrange(wrt.shape[1]):
          for w in xrange(wrt.shape[0]):
            prob[x,y,w] += ir[i,r] * wrt[w,r,t]

      multProb = 255.0/prob.max()
      img = cv.CreateImage((6*25,6*25),cv.IPL_DEPTH_32F,3)
      for y in xrange(6):
        for x in xrange(6):
          coords = [(x*25,y*25),((x+1)*25,y*25),((x+1)*25,(y+1)*25),(x*25,(y+1)*25)]
          centre = (x*25+12,y*25+12)
          for d in xrange(4):
            if d%2==0:
              col = cv.RGB(0.0,prob[x,y,d]*multProb,0.0)
            else:
              col = cv.RGB(prob[x,y,d]*multProb,0.0,0.0)
            cv.FillPoly(img, [(coords[d],coords[(d+1)%4],centre)], col)
      
      cv.SaveImage('junction/topic_%i.png'%t,img)


  # Test on a bunch of documents, creating a list of abnormality score/actually an abnormality pairs...
  ab_gt = []
  print 'Testing...'
  p = ProgBar()
  for i in xrange(testDocCount):
    p.callback(i,testDocCount)
    dic, abn = genDoc()
    doc = rlda.Document(dic)
    doc.fit(ir,wrt)
    ab_gt.append((doc.negLogLikeRegionVec().max(),abn))
  del p

  ab_gt.sort(reverse=True)


  # Use the pairs to construct a roc...
  posCount = len(filter(lambda p:p[1]==True,ab_gt))
  negCount = len(ab_gt) - posCount
  print 'positive samples = ',posCount
  print 'negative samples = ',negCount

  truePos = 0
  falsePos = 0
  trueNeg = negCount
  falseNeg = posCount

  roc = []

  for p in ab_gt:
    if p[1]:
      truePos += 1
      falseNeg -= 1
    else:
      falsePos +=1
      trueNeg -= 1

    pnt = (float(falsePos)/float(falsePos+trueNeg), float(truePos)/float(truePos+falseNeg))
    roc.append(pnt)


  # Save the roc to disk...
  if not sweep:
    f = open('junction_roc.txt','w')
    f.write('0.0 0.0\n')
    for pnt in roc: f.write('%f %f\n'%pnt)
    f.close()


  # Calculate and print out the area under the roc...
  area = 0.0
  for i in xrange(1,len(roc)):
    area += 0.5*(roc[i-1][1]+roc[i][1]) * (roc[i][0]-roc[i-1][0])
  print 'area under roc =',area, '(above',(1.0-area),')'

  return area
Exemplo n.º 18
0
ms.set_kernel('triangular')
ms.set_spatial('kd_tree')

# Choose a reasonable size...
print 'Selecting size using loo:'
p = ProgBar()
ms.scale_loo_nll(callback=p.callback)
del p

# Plot the pdf, for reference...
image = numpy.zeros((pixels, pixels, 3), dtype=numpy.float32)

print 'Rendering probability map:'
p = ProgBar()
for row in xrange(pixels):
    p.callback(row, pixels)
    sam = numpy.append(numpy.linspace(0.0, 1.0, pixels).reshape((-1, 1)),
                       (row / float(pixels - 1)) * numpy.ones(pixels).reshape(
                           (-1, 1)),
                       axis=1)
    image[row, :, :] = ms.probs(sam).reshape((-1, 1))
del p

image *= 255.0 / image.max()
image = array2cv(image)
cv.SaveImage('draw_weighted_density.png', image)

# Draw a bunch of points from it and plot them...
samples = numpy.random.randint(16, 512)
draw = ms.draws(1024)
Exemplo n.º 19
0
def doRun(tdc):
  # Create directory to put images into...
  if not sweep:
    try:
      os.makedirs('junction')
    except:
      pass

    
  # Create a corpus...    
  c = rlda.Corpus(10,4)
  c.setIdentWordCounts(identCount(),4)

  for i in xrange(tdc):
    dic, abn = genDoc(False)
    doc = rlda.Document(dic)
    doc.abn = abn
    c.add(doc)

    if not sweep:
      prob = numpy.zeros((6,6,4),dtype=numpy.float_)
      for key,item in dic.iteritems():
        x,y = identToCoord(key[0])
        prob[x,y,key[1]] = item

      multProb = 255.0/prob.max()
      img = cv.CreateImage((6*25,6*25),cv.IPL_DEPTH_32F,3)
      for y in xrange(6):
        for x in xrange(6):
          coords = [(x*25,y*25),((x+1)*25,y*25),((x+1)*25,(y+1)*25),(x*25,(y+1)*25)]
          centre = (x*25+12,y*25+12)
          for d in xrange(4):
            if d%2==0:
              col = cv.RGB(0.0,prob[x,y,d]*multProb,0.0)
            else:
              col = cv.RGB(prob[x,y,d]*multProb,0.0,0.0)
            cv.FillPoly(img, [(coords[d],coords[(d+1)%4],centre)], col)

      cv.SaveImage('junction/xdoc_%i_%s.png'%(i,str(abn)),img)


  # Fit a model...
  params = rlda.Params()
  params.setRuns(16)

  print 'Fitting model...'
  p = ProgBar()
  c.fit(params,p.callback)
  del p

  ir = c.getIR()
  wrt = c.getWRT()


  # Visualise the regions...
  if not sweep:
    mult = 255.0/ir.max()
    for r in xrange(ir.shape[1]):
      rend = numpy.zeros((6,6),dtype=numpy.float_)
      for i in xrange(ir.shape[0]): rend[identToCoord(i)] = ir[i,r] * mult
      rend = numpy.repeat(numpy.repeat(rend,25,axis=0),25,axis=1)
      cv.SaveImage('junction/region_%i.png'%r,array2cv(rend))
        


  # Visualise the topics...
  if not sweep:
    for t in xrange(wrt.shape[2]):
      prob = numpy.zeros((6,6,4),dtype=numpy.float_)
      for i in xrange(ir.shape[0]):
        x,y = identToCoord(i)
        for r in xrange(wrt.shape[1]):
          for w in xrange(wrt.shape[0]):
            prob[x,y,w] += ir[i,r] * wrt[w,r,t]

      multProb = 255.0/prob.max()
      img = cv.CreateImage((6*25,6*25),cv.IPL_DEPTH_32F,3)
      for y in xrange(6):
        for x in xrange(6):
          coords = [(x*25,y*25),((x+1)*25,y*25),((x+1)*25,(y+1)*25),(x*25,(y+1)*25)]
          centre = (x*25+12,y*25+12)
          for d in xrange(4):
            if d%2==0:
              col = cv.RGB(0.0,prob[x,y,d]*multProb,0.0)
            else:
              col = cv.RGB(prob[x,y,d]*multProb,0.0,0.0)
            cv.FillPoly(img, [(coords[d],coords[(d+1)%4],centre)], col)
      
      cv.SaveImage('junction/topic_%i.png'%t,img)


  # Test on a bunch of documents, creating a list of abnormality score/actually an abnormality pairs...
  ab_gt = []
  print 'Testing...'
  p = ProgBar()
  for i in xrange(testDocCount):
    p.callback(i,testDocCount)
    dic, abn = genDoc()
    doc = rlda.Document(dic)
    doc.fit(ir,wrt)
    ab_gt.append((doc.negLogLikeRegionVec().max(),abn))
  del p

  ab_gt.sort(reverse=True)


  # Use the pairs to construct a roc...
  posCount = len(filter(lambda p:p[1]==True,ab_gt))
  negCount = len(ab_gt) - posCount
  print 'positive samples = ',posCount
  print 'negative samples = ',negCount

  truePos = 0
  falsePos = 0
  trueNeg = negCount
  falseNeg = posCount

  roc = []

  for p in ab_gt:
    if p[1]:
      truePos += 1
      falseNeg -= 1
    else:
      falsePos +=1
      trueNeg -= 1

    pnt = (float(falsePos)/float(falsePos+trueNeg), float(truePos)/float(truePos+falseNeg))
    roc.append(pnt)


  # Save the roc to disk...
  if not sweep:
    f = open('junction_roc.txt','w')
    f.write('0.0 0.0\n')
    for pnt in roc: f.write('%f %f\n'%pnt)
    f.close()


  # Calculate and print out the area under the roc...
  area = 0.0
  for i in xrange(1,len(roc)):
    area += 0.5*(roc[i-1][1]+roc[i][1]) * (roc[i][0]-roc[i-1][0])
  print 'area under roc =',area, '(above',(1.0-area),')'

  return area
Exemplo n.º 20
0

# Now create some documents and test them...
# Below indexed by [actual, result], with 0 to mean normal and 1 for abnormal...
print 'Testing model on new documents...'
result = numpy.zeros((6,6),dtype=numpy.int32)

testCounts = [normal_doc_test, square_doc_test, hor_star_doc_test, vert_star_doc_test, hor_in_vert_doc_test, vert_in_hor_doc_test]

p = ProgBar()
stepsDone = 0
stepsTotal = sum(testCounts)
for ab in ([None] + [x for x in sam.getAbnormDict().iterkeys()]):
  c = behClasses.index(ab)
  for _ in xrange(testCounts[c]):
    p.callback(stepsDone,stepsTotal)
    stepsDone += 1

    doc = ddhdp.Document(sampleDocument(ab))
    abnormList = model.mlDocAbnorm(doc, lone = True, cap = 0) ######## Cap is super low for testing.

    truth = c
    guess = 0
    if len(abnormList)!=0:
      guess = behClasses.index(abnormList[0]) # Doesn't handle it thinking that multiple abnormalities are present.

    result[truth,guess] += 1
del p


print 'Confusion matrix:'