def getSegmentationAccuracy(pathArray):
  dataX = []
  dataY = []
  j = 0
  for path in pathArray:
    for f in os.listdir(path):
      j+=1
      if f[-5:] != "inkml": continue
      # print f
      traceList, symbolsList = gettrace.parseINKMLFile(path + f)
      segmentIndices = segment.segmentSymbols(traceList)
      for label, elem in symbolsList:
        # if label in ['=', 'i', 'j','\\leq', '\\log', '\\sin', '\\cos', '\\lim', '\\geq', '\\righarrow', '\\div']: continue
        
        # Correctly classified
        if elem in segmentIndices:
          strokes = [traceList[i] for i in elem]
          pixels = itp.inkml_to_pixels(strokes)
          chain = list(itertools.chain(*pixels))
          chain.append(len(strokes))
          dataX.append(chain)
          dataY.append(label)
          # cr.display(pixels)
      print j
  f = file("segmented_data_18", "w")
  pickle.dump((dataX, dataY), f)
示例#2
0
def recognize():
  global svm
  print request.form['info']
  strokes = json.loads(request.form['info'])
  symbolsIndices = segment.segmentSymbols(strokes)
  returnstr = ""
  for elem in symbolsIndices:
    s = [strokes[i] for i in elem]
    pixels = inkml_to_pixels.inkml_to_pixels(s)
    chain = list(itertools.chain(*pixels))
    chain.append(len(s))

    prediction = svm.predict(chain)[0]
    returnstr += prediction
  return returnstr
def getSegmentationAccuracy(pathArray):
  f = open('svm18px', "r")
  svm = pickle.load(f)
  
  j = 0

  total = 0
  total_correct = 0
  off_by_one = 0
  off_by_two = 0
  
  for path in pathArray:
    for f in os.listdir(path):
      j+=1
      if f[-5:] != "inkml": continue
      # print f
      traceList, symbolsList = gettrace.parseINKMLFile(path + f)
      segmentIndices = segment.segmentSymbols(traceList)

      correct = 0

      for label, elem in symbolsList:
        
        # Correctly classified
        if elem in segmentIndices:
          strokes = [traceList[i] for i in elem]
          pixels = itp.inkml_to_pixels(strokes)
          chain = list(itertools.chain(*pixels))
          chain.append(len(strokes))
          if svm.predict(chain)[0] == label:
            correct +=1
      if correct == len(symbolsList):
        total_correct +=1
      if correct + 1 >= len(symbolsList):
        off_by_one +=1
      if correct + 2 >= len(symbolsList):
        off_by_two +=1

          # cr.display(pixels)
      print j
  print total_correct
  print off_by_one
  print off_by_two

  print float(total_correct)/836
  print float(off_by_one)/836
  print float(off_by_two)/836
示例#4
0
def recognize_hmm():
  global hmm_instance
  strokes = json.loads(request.form['info'])
  symbolsIndices = segment.segmentSymbols(strokes)
  returnstr = ""
  equation = []
  for elem in symbolsIndices:
    s = [strokes[i] for i in elem]
    pixels = inkml_to_pixels.inkml_to_pixels(s)
    chain = list(itertools.chain(*pixels))
    chain.append(len(s))
    equation.append((chain,))

  prediction = hmm_instance.compute_best_sequence(equation)
  for symbol in prediction:
    returnstr += symbol
  return returnstr
def getSegmentationAccuracy(pathArray):
  trainData = []
  from time import time
  start = time()
  completely_correct = 0
  total_files = 0
  correct = 0
  total = 0
  mistakes = {}
  for path in pathArray:
    for file in os.listdir(path):
      if file[-5:] != "inkml": continue
      # print file
      traceList, symbolsList = gettrace.parseINKMLFile(path + file)
      segmentIndices = segment.segmentSymbols(traceList)
      correct_file = 0
      total_file = 0
      for label, elem in symbolsList:
        # if label in ['=', 'i', 'j','\\leq', '\\log', '\\sin', '\\cos', '\\lim', '\\geq', '\\righarrow', '\\div']: continue
        if elem in segmentIndices:
          correct_file = correct_file + 1
        else:
          if label not in mistakes:
            mistakes[label] = 0
          mistakes[label] +=1
        total_file = total_file + 1

      if correct_file == total_file:
        completely_correct +=1

      correct += correct_file
      total += total_file

      total_files +=1
          # cr.display(pixels)
  print time()-start
  print float(correct)/total
  print float(completely_correct)/total_files
  print mistakes
  c = 0
  for elem in mistakes:
    c = c + mistakes[elem]
  print c
示例#6
0
# each element in the list corresponds to a trace.
# Each trace is represented as a list of (x,y) tuples.
def parseINKMLFile(filename):
  with file(filename) as f:
    s = f.read()
  parser = MyINKMLParser()
  parser.init()
  parser.feed(s)  
  return parser.traceList, parser.symbolsList

# Given the path to an INKML file, 
def parseSymbolOrder(filename):
  orderedSymbolList = []
  with file(filename) as f:
    s = f.read()
  parser = MyINKMLParser()
  parser.init()
  parser.feed(s) 
  for key in sorted(parser.symbolOrder):
    orderedSymbolList.append(parser.symbolOrder[key])
  return orderedSymbolList

if __name__ == "__main__":
  # n = 'samples/test_sample_recognized.inkml'
  # n = 'ICFHR_package/CROHME2012_data/testDataGT/001-equation001.inkml'
  n = 'ICFHR_package/CROHME2012_data/trainData/trainData/algb09.inkml'
  # n = 'samples/train_sample_with_GT.inkml'
  traceList, symbolsList = parseINKMLFile(n)

  segmentIndices = segment.segmentSymbols(traceList)