示例#1
0
class Scg(object):
    def __init__(self, scg_id, scg_content, truth, request_at, response_at):
        self.id = scg_id
        self.content = scg_content
        self.response = truth
        self.request_at = request_at
        self.response_at = response_at
        self.truth_obj = Payload(truth)
        self.dummySample = Sample('data/inkml/65_alfonso.inkml')
        self.w_h_ratio = 1.0  # initialize here, updated in replace_traces()
        self.replace_traces()
        self.dummySample.re_calculate_IMG_MINMAX()

    def get_latex(self):
        return self.truth_obj.latex

    def replace_traces(self):
        '''
        replace the traces in dummySample with the one generated from scg_content
        :return:
        '''
        strokes = scginkparser.parse_scg_ink_file(self.content, self.id)

        #for st in strokes:
        #print st

        traces = {}

        trace_id_int = 0
        for st in strokes:
            coords = np.zeros((2, len(st)))
            idx = 0
            for x_y in st:
                coords[:, idx] = [float(x_y[0]), float(x_y[1])]
                idx += 1
            traces[trace_id_int] = Stroke(trace_id_int, coords)
            trace_id_int += 1

        # //Compute bounding box of the input expression
        x_min, y_min, x_max, y_max = get_bounding_box(
            traces)  # bounding box for the whole math expression

        # Just in case there is only one point or a sequence of	points perfectly aligned with the x or y axis
        if x_max == x_min: x_max = x_min + 1
        if y_max == y_min: y_max = y_min + 1

        self.w_h_ratio = float(x_max - x_min) / (y_max - y_min)
        # Renormalize to height [0,10000] keeping the aspect ratio
        H = 10000.0
        W = H * (x_max - x_min) / (y_max - y_min)
        for trace_key, trace_v in traces.iteritems():
            trace_v.calc_coords_h10000(H, W, x_min, y_min, x_max, y_max)

        self.dummySample.traces = traces

    def save_image(self, path):
        img, W, H = self.dummySample.render()
        print 'save image to: ', path
        misc.imsave(path, img)
示例#2
0
 def __init__(self, scg_id, scg_content, truth):
     self.id = scg_id
     self.content = scg_content
     self.truth_obj = Payload(truth)
     self.dummySample = Sample('data/inkml/65_alfonso.inkml')
     self.w_h_ratio = 1.0 # initialize here, updated in replace_traces()
     self.replace_traces()
     self.dummySample.re_calculate_IMG_MINMAX()
示例#3
0
    def process_list(self, pair_file_name, latex_out):
        '''

        :param pair_file_name, set name: for example, 'train', one train.flist, the other train.idlist
        :param outfile: for example, data/batch/train.lst
        :param latex_out: 'data/batch/latex_list.txt'
        :return:
        '''
        print "No dir specified, using default dir"
        base_dir = 'data/batch/'  # self.list_out_dir
        flist_base_dir = self.list_out_dir
        prefix = 'im2latex_'

        infile = flist_base_dir + pair_file_name + '.flist'     # data/batch/pickle/train.flist
        outfile = base_dir + prefix + pair_file_name + '.lst'   # data/batch/im2latex_train.lst
        scg_id_list_file = flist_base_dir + pair_file_name + '.idlist'

        print 'input: ', infile
        print 'output: ', outfile

        with open(infile, 'rb') as fin:
            flist = pickle.load(fin)

        with open(scg_id_list_file, 'rb') as fin:
            idlist = pickle.load(fin)

        with codecs.open(outfile, 'w', 'utf-8') as f_out:
            for file_path in flist:
                print 'file: ', file_path
                # process one file
                sample = Sample(file_path)
                latex = sample.latex
                #output latex
                latex_out.write(latex[1:-1] + '\n')
                #get filename without extension
                png_name = self.get_filename_noext(file_path)
                line = str(self.latex_index) + ' ' + png_name + ' ' + 'basic\n'
                f_out.write(line)
                #output image
                img, W, H = sample.render()
                misc.imsave(png_dir + png_name + '.png', img)
                self.latex_index += 1
            for id in idlist:
                print 'id: ', id
                logging.info('processing %d scg record', id)
                scg = self.scgs[id]
                latex = scg.get_latex()
                # output latex
                latex_out.write(latex + '\n')
                # get filename without extension
                png_name = str(id)
                line = str(self.latex_index) + ' ' + png_name + ' ' + 'basic\n'
                f_out.write(line)
                # output image (this task is done when dividing the list into 3 groups
                #scg.save_image(png_dir + png_name + '.png')
                self.latex_index += 1
示例#4
0
    def inkml2png(self):
        '''
        for all inkml files in data/batch/inkml folder, generate its png image and save it
        to data/batch/formula_images/ folder

        It is used if different resolution images need to be generated for mhr performance evaluation.
        :return:
        '''
        files, files_nolatex = self.all_inkml_files()
        for file_path in files:
            print 'file: ', file_path
            # process one file
            sample = Sample(file_path)
            # get filename without extension
            png_name = self.get_filename_noext(file_path)
            # output image
            img, W, H = sample.render()
            misc.imsave(png_dir + png_name + '.png', img)
示例#5
0
    def all_inkml_files(self):
        '''

        :return: files, files_nolatex
        '''
        files = []
        files_nolatex = []
        for root, dirnames, filenames in os.walk(self.inkml_file_path, followlinks=True):
                for filename in fnmatch.filter(filenames, '*.inkml'):
                    tmp_path = os.path.join(root, filename)
                    print tmp_path
                    sample = Sample(tmp_path)
                    if hasattr(sample, 'latex') and self.check_latex_length(sample):  #latex has been stripped
                        #print 'latex: ', tmp_path

                        files.append(tmp_path)
                    else:
                        files_nolatex.append(tmp_path)
        return files, files_nolatex
示例#6
0
        coords[:, idx] = [float(x_y[0]), float(x_y[1])]
        idx += 1
    traces[trace_id_int] = Stroke(trace_id_int, coords)
    trace_id_int += 1


# //Compute bounding box of the input expression
x_min, y_min, x_max, y_max = get_bounding_box(traces)  # bounding box for the whole math expression

# Just in case there is only one point or a sequence of	points perfectly aligned with the x or y axis
if x_max == x_min: x_max = x_min + 1;
if y_max == y_min: y_max = y_min + 1;

# Renormalize to height [0,10000] keeping the aspect ratio
H = 10000.0
W = H * (x_max - x_min) / (y_max - y_min)
for trace_key, trace_v in traces.iteritems():
    trace_v.calc_coords_h10000(H, W, x_min, y_min, x_max, y_max)


for trace_key, trace_v in traces.iteritems():
    print trace_key, trace_v
    rx, ry, rs, rt = trace_v.get_bounding_box_h10000()
    print rx, ry, rs, rt

dummy_sample =  Sample('data/inkml/65_alfonso.inkml')
dummy_sample.traces = traces
img, W, H = dummy_sample.render()
print 'save image to temp/all.png: '
misc.imsave('temp/all.png', img)
示例#7
0
 def setUp(self):
     idd = Sample('data/inkml/65_alfonso.inkml')
     self.stroke0 = idd.traces[0]
     self.stroke1 = idd.traces[1]
     self.stroke2 = idd.traces[2]
     self.stroke9 = idd.traces[9]
示例#8
0
 def test_loading_inkml(self):
     iml = Sample('data/inkml/D_357_HMA095045.inkml')
     print 'latex: ', iml.latex
示例#9
0
 def setUp(self):
     self.idd = Sample('data/inkml/65_alfonso.inkml')
     self.stroke0 = self.idd.traces[0]
     self.stroke1 = self.idd.traces[1]
     self.stroke2 = self.idd.traces[2]
     prev = self.idd.traces[1]
示例#10
0
class TestSample(TestCase):
    def setUp(self):
        self.idd = Sample('data/inkml/65_alfonso.inkml')
        self.stroke0 = self.idd.traces[0]
        self.stroke1 = self.idd.traces[1]
        self.stroke2 = self.idd.traces[2]
        prev = self.idd.traces[1]

    def test_inkml_loading(self):
        print 'loading inkml'
        print 'latex: ', self.idd.latex

    def test_loading_inkml(self):
        iml = Sample('data/inkml/D_357_HMA095045.inkml')
        print 'latex: ', iml.latex

    def test_get_bounding_box_h1000(self):
        self.assertEqual((0, 0, 69622, 10000),
                         (self.idd.ox, self.idd.oy, self.idd.os, self.idd.ot))

    def test_getAVGstroke_size(self):
        avgW, avgH = self.idd.getAVGstroke_size()
        targetW = 6075.5
        targetH = 4320.8999
        self.assertTrue(abs(avgW - targetW) < 1.0 / 2)
        self.assertTrue(abs(avgH - targetH) < 1.0 / 2)
        print avgW, avgH

    def test_nStrokes(self):
        ns = self.idd.nStrokes()
        self.assertEquals(10, ns)

    def test_detRefSymbol(self):
        '''
        expect (5764, 5063)
        :return:
        '''
        RX, RY = self.idd.detRefSymbol()
        print RX, RY
        self.assertEquals((5764, 5063), (RX, RY))

    def test_stroke_aspect_area(self):
        # expect 9623, 6794, 1.41639686, 65378662
        i = 0
        ancho, alto, aspectratio, area = self.idd.stroke_aspect_area(i)
        print ancho, alto, aspectratio, area
        self.assertEquals(9623, ancho)
        self.assertEquals(6794, alto)
        self.assertEquals(65378662, area)
        self.assertAlmostEqual(1.41639686, aspectratio)

    def test_median_vmedx(self):
        vmedx = [9623, 6039, 7360, 7360, 6982, 4152, 3586, 3586, 6793, 5284]
        print vmedx
        vmedx.sort()
        print vmedx
        l = len(vmedx)
        print vmedx[l / 2]
        self.assertEquals(6793, vmedx[l / 2])
        import numpy
        medx = numpy.median(vmedx)  # average of 6039 and 6793
        self.assertEquals(6416.0, medx)

    def test_stroke_distance1(self):
        self.idd.render()
        # distance between stroke 0 and stroke 2. the distance is before the normalization
        dmin = self.idd.stroke_distance(0, 2)
        self.assertEqual(7902.4310183639063, dmin)
        print dmin

    def test_stroke_distance2(self):
        img, W, H = self.idd.render()
        dmin = self.idd.stroke_distance(4, 7)
        print dmin

    def test_find_closest_pair(self):
        dmin, p1, p2 = self.idd.find_closest_pair(0, 2)
        print dmin
        self.assertAlmostEqual(7902.43115, dmin, 3)
        self.assertEqual((9433, 9056), p1)
        self.assertEqual((16037, 4716), p2)

    def test_render(self):
        '''
        testing rendering image from inkml file
        save image at temp/all.png
        :return:
        '''
        img, W, H = self.idd.render()
        print 'save image to temp/all.png: '
        misc.imsave('temp/all.png', img)
        print img[15, 9]
        print W, H
        #self.assertEqual(255, img[15, 9])
        #self.assertEqual((1792, 266), (W, H))

    def test_linea(self):
        W, H = 1792, 266
        img = np.ones((H, W), dtype=int) * 255
        self.idd.pix_stk = np.ones((H, W), dtype=int) * (-1)
        pa = (19.4867649, 149.881516)
        pb = (5, 145.043594)

        self.idd.linea(img, pa, pb, 0)
        print np.where(img == 0)
        print img

        self.assertEqual(0, img[145, 5])
        self.assertEqual(0, img[150, 20])

    def test_compute_strokes_distances(self):
        RX = 5764
        RY = 5063
        img, W, H = self.idd.render()
        stk_dis = self.idd.compute_strokes_distances(RX, RY)

        print '(x, y): (1169, 119): ', img[119][1169], self.idd.pix_stk[119][
            1169]
        self.assertEqual(0, img[119][1169])
        self.assertEqual(5, self.idd.pix_stk[119][1169])
        print stk_dis

    def test_get_close_strokes(self):
        self.idd.detRefSymbol()
        self.idd.render()
        self.idd.compute_strokes_distances(self.idd.RX, self.idd.RY)
        L = []
        self.idd.get_close_strokes(7, L, 0.69474973)
        self.assertEqual((6, 5), (L[0], L[1]))
        print L

        L = []
        self.idd.get_close_strokes(6, L, 0.69474973)
        self.assertEqual(5, L[0])

        L = []
        self.idd.get_close_strokes(8, L, 0.69474973)
        self.assertEqual((7, 6), (L[0], L[1]))