Пример #1
0
        # validation
        valid_stop = False
        if np.mod(uidx, sampleFreq) == 0:
            WAP_model.eval()
            with torch.no_grad():
                fpp_sample = open(valid_output[0], 'w')
                valid_count_idx = 0
                for x, y in valid:
                    for xx in x:
                        xx_pad = xx.astype(np.float32) / 255.
                        xx_pad = torch.from_numpy(
                            xx_pad[None, :, :, :]).cuda()  # (1,1,H,W)
                        sample, score = gen_sample(WAP_model,
                                                   xx_pad,
                                                   params,
                                                   multi_gpu_flag,
                                                   k=10,
                                                   maxlen=1000)
                        if len(score) == 0:
                            print('valid decode error happens')
                            valid_stop = True
                            break
                        score = score / np.array([len(s) for s in sample])
                        ss = sample[score.argmin()]
                        # write decoding results
                        fpp_sample.write(valid_uid_list[valid_count_idx])
                        valid_count_idx = valid_count_idx + 1
                        # symbols (without <eos>)
                        for vv in ss:
                            if vv == 0:  # <eos>
                                break
def main(model_path, dictionary_target, fea, latex, saveto, output, beam_k=5):
    # model architecture
    params = {}
    params['n'] = 256
    params['m'] = 256
    params['dim_attention'] = 512
    params['D'] = 684
    params['K'] = 111
    params['growthRate'] = 24
    params['reduction'] = 0.5
    params['bottleneck'] = True
    params['use_dropout'] = True
    params['input_channels'] = 1

    # load model
    model = Encoder_Decoder(params)
    model.load_state_dict(
        torch.load(model_path, map_location=lambda storage, loc: storage))
    model.cuda()

    # load dictionary
    worddicts = load_dict(dictionary_target)
    worddicts_r = [None] * len(worddicts)
    for kk, vv in worddicts.items():
        worddicts_r[vv] = kk

    # load data
    test, test_uid_list = dataIterator(fea,
                                       latex,
                                       worddicts,
                                       batch_size=8,
                                       batch_Imagesize=500000,
                                       maxlen=20000,
                                       maxImagesize=500000)

    # testing
    model.eval()
    with torch.no_grad():
        fpp_sample = open(saveto, 'w')
        test_count_idx = 0
        print('Decoding ... ')
        for x, y in test:
            for xx in x:
                print('%d : %s' %
                      (test_count_idx + 1, test_uid_list[test_count_idx]))
                xx_pad = xx.astype(np.float32) / 255.
                xx_pad = torch.from_numpy(
                    xx_pad[None, :, :, :]).cuda()  # (1,1,H,W)
                sample, score = gen_sample(model,
                                           xx_pad,
                                           params,
                                           False,
                                           k=beam_k,
                                           maxlen=1000)
                score = score / np.array([len(s) for s in sample])
                ss = sample[score.argmin()]
                # write decoding results
                fpp_sample.write(test_uid_list[test_count_idx])
                test_count_idx = test_count_idx + 1
                # symbols (without <eos>)
                for vv in ss:
                    if vv == 0:  # <eos>
                        break
                    fpp_sample.write(' ' + worddicts_r[vv])
                fpp_sample.write('\n')
    fpp_sample.close()
    print('test set decode done')
    os.system('python compute-wer.py ' + saveto + ' ' + latex + ' ' + output)
    fpp = open(output)
    stuff = fpp.readlines()
    fpp.close()
    m = re.search('WER (.*)\n', stuff[0])
    test_per = 100. * float(m.group(1))
    m = re.search('ExpRate (.*)\n', stuff[1])
    test_sacc = 100. * float(m.group(1))
    print('Valid WER: %.2f%%, ExpRate: %.2f%%' % (test_per, test_sacc))
Пример #3
0
    regr = Lambda(reshape3, output_shape=(None, 2), name='rpn_regress_reshape')(regr)

    return cls, regr


inp, nn = nn_base((None, None, 3), trainable=True)
cls, regr = rpn(nn)
basemodel = Model(inp, [cls, regr])
basemodel.summary()

import utils

xmlpath = 'VOCdevkit/Annotations'
imgpath = 'VOCdevkit/JPEGImages'
gen1 = utils.gen_sample(xmlpath, imgpath, 1)
gen2 = utils.gen_sample(xmlpath, imgpath, 1)


class losslog():
    def __init__(self, path, txt):
        with open(path, 'a+') as f:
            f.writelines(txt)


class losshistroy(Callback):
    def on_train_begin(self, logs={}):
        self.losses = []

    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
    },
                  optimizer=Adam(lr),
                  metrics={"policy": acc})
    model.summary()

    return model


if __name__ == "__main__":
    file_path = "auto.h5"

    model = get_model()

    model.load_weights(file_path)

    sample_X, sample_Y, cubes = gen_sample(10)
    cube = cubes[0]
    cube.score = 0

    list_sequences = [[cube]]

    existing_cubes = set()

    for j in range(1000):

        X = [flatten_1d_b(x[-1]) for x in list_sequences]

        value, policy = model.predict(np.array(X), batch_size=1024)

        new_list_sequences = []
Пример #5
0
def test(text_detection_modelpara, ocr_modelpara, dictionary_target):
    # load net
    net = CRAFT()  # initialize

    print('Loading text detection model from checkpoint {}'.format(
        text_detection_modelpara))
    if args.cuda:
        net.load_state_dict(copyStateDict(
            torch.load(text_detection_modelpara)))
    else:
        net.load_state_dict(
            copyStateDict(
                torch.load(text_detection_modelpara, map_location='cpu')))

    if args.cuda:
        net = net.cuda()
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = False

    params = {}
    params['n'] = 256
    params['m'] = 256
    params['dim_attention'] = 512
    params['D'] = 684
    params['K'] = 5748
    params['growthRate'] = 24
    params['reduction'] = 0.5
    params['bottleneck'] = True
    params['use_dropout'] = True
    params['input_channels'] = 3
    params['cuda'] = args.cuda

    # load model
    OCR = Encoder_Decoder(params)
    if args.cuda:
        OCR.load_state_dict(copyStateDict(torch.load(ocr_modelpara)))
    else:
        OCR.load_state_dict(
            copyStateDict(torch.load(ocr_modelpara, map_location='cpu')))
    if args.cuda:
        #OCR = OCR.cuda()
        OCR = torch.nn.DataParallel(OCR)
        cudnn.benchmark = False

    OCR.eval()
    net.eval()

    # load dictionary
    worddicts = load_dict(dictionary_target)
    worddicts_r = [None] * len(worddicts)
    for kk, vv in worddicts.items():
        worddicts_r[vv] = kk
    t = time.time()

    fontPIL = '/usr/share/fonts/truetype/fonts-japanese-gothic.ttf'  # japanese font
    size = 40
    colorBGR = (0, 0, 255)

    paper = ET.Element('paper')
    paper.set('xmlns', "http://codh.rois.ac.jp/modern-magazine/")
    # load data
    for k, image_path in enumerate(image_list[:]):
        print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list),
                                                  image_path),
              end='\r')
        res_img_file = result_folder + "res_" + os.path.basename(image_path)

        #print (res_img_file, os.path.basename(image_path), os.path.exists(res_img_file))
        #if os.path.exists(res_img_file): continue
        #image = imgproc.loadImage(image_path)
        '''image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        ret2,image = cv2.threshold(image,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
        height = image.shape[0]
        width = image.shape[1]
        scale = 1000.0/height
        H = int(image.shape[0] * scale)
        W = int(image.shape[1] * scale)
        image = cv2.resize(image , (W, H))
        print(image.shape, image_path)
        cv2.imwrite(image_path, image) 
        continue'''
        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        h, w = image.shape[0], image.shape[1]
        print(image_path)
        page = ET.SubElement(paper, "page")
        page.set('file', os.path.basename(image_path).replace('.jpg', ''))
        page.set('height', str(h))
        page.set('width', str(w))
        page.set('dpi', str(100))
        page.set('number', str(1))

        bboxes, polys, score_text = test_net(net, image, args.text_threshold,
                                             args.link_threshold,
                                             args.low_text, args.cuda,
                                             args.poly)
        text = []
        localtions = []
        for i, box in enumerate(bboxes):
            poly = np.array(box).astype(np.int32)
            min_x = np.min(poly[:, 0])
            max_x = np.max(poly[:, 0])
            min_y = np.min(poly[:, 1])
            max_y = np.max(poly[:, 1])
            if min_x < 0:
                min_x = 0
            if min_y < 0:
                min_y = 0

            #image = cv2.rectangle(image,(min_x,min_y),(max_x,max_y),(0,255,0),3)
            input_img = image[min_y:max_y, min_x:max_x]

            w = max_x - min_x + 1
            h = max_y - min_y + 1
            line = ET.SubElement(page, "line")
            line.set("x", str(min_x))
            line.set("y", str(min_y))
            line.set("height", str(h))
            line.set("width", str(w))
            if w < h:
                rate = 20.0 / w
                w = int(round(w * rate))
                h = int(round(h * rate / 20.0) * 20)
            else:
                rate = 20.0 / h
                w = int(round(w * rate / 20.0) * 20)
                h = int(round(h * rate))
            #print (w, h, rate)
            input_img = cv2.resize(input_img, (w, h))

            mat = np.zeros([1, h, w], dtype='uint8')
            mat[0, :, :] = 0.299 * input_img[:, :,
                                             0] + 0.587 * input_img[:, :,
                                                                    1] + 0.114 * input_img[:, :,
                                                                                           2]

            xx_pad = mat.astype(np.float32) / 255.
            xx_pad = torch.from_numpy(xx_pad[None, :, :, :])  # (1,1,H,W)
            if args.cuda:
                xx_pad.cuda()
            with torch.no_grad():
                sample, score, alpha_past_list = gen_sample(OCR,
                                                            xx_pad,
                                                            params,
                                                            args.cuda,
                                                            k=10,
                                                            maxlen=600)
            score = score / np.array([len(s) for s in sample])
            ss = sample[score.argmin()]
            alpha_past = alpha_past_list[score.argmin()]
            result = ''
            i = 0
            location = []
            for vv in ss:

                if vv == 0:  # <eol>
                    break
                alpha = alpha_past[i]
                if i != 0: alpha = alpha_past[i] - alpha_past[i - 1]
                (y, x) = np.unravel_index(np.argmax(alpha, axis=None),
                                          alpha.shape)
                #print (int(16* x /rate), int(16* y/rate) , chr(int(worddicts_r[vv],16)))
                location.append(
                    [int(16 * x / rate) + min_x,
                     int(16 * y / rate) + min_y])
                #image = cv2.circle(image,(int(16* x/rate) -  8 + min_x, int(16* y/rate) + 8 + min_y),25, (0,0,255), -1)

                result += chr(int(worddicts_r[vv], 16))
                '''char = ET.SubElement(line, "char") 
                char.set('num_cand', '1') 
                char.set('x', str(int(16* x/rate) -  8 + min_x)) 
                char.set('y', str(int(16* y/rate) + 8 + min_y)) 
                res = ET.SubElement(char, "result") 
                res.set('CC', str(100))
                res.text = chr(int(worddicts_r[vv],16))
                cand = ET.SubElement(char, "cand") 
                cand.set('CC', str(100))
                cand.text = chr(int(worddicts_r[vv],16))'''
                i += 1
            line.text = result
            text.append(result)
            localtions.append(location)
            image = cv2_putText_1(img=image,
                                  text=result,
                                  org=(min_x, max_x, min_y, max_y),
                                  fontFace=fontPIL,
                                  fontScale=size,
                                  color=colorBGR)

        print('save image')
        # save score text
        filename, file_ext = os.path.splitext(os.path.basename(image_path))
        mask_file = result_folder + "/res_" + filename + '_mask.jpg'
        #cv2.imwrite(mask_file, score_text)
        file_utils.saveResult(image_path, image, polys, dirname=result_folder)

    xml_string = ET.tostring(paper, 'Shift_JIS')

    fout = codecs.open('./data/result.xml', 'w', 'shift_jis')
    fout.write(xml_string.decode('shift_jis'))
    fout.close()

    print("elapsed time : {}s".format(time.time() - t))
Пример #6
0
    model = get_model()

    model.load_weights(file_path)

    x_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

    y_values = []

    for random_steps in x_values:

        success_time = 0

        for times in range(100):

            #generate 1 sample
            sample_X, sample_Y, cubes = gen_sample(random_steps)

            cube = cubes[0]
            cube.score = 0

            list_sequences = [[cube]]

            existing_cubes = set()

            #print(list_sequences)

            preview_cube = cube

            #show cube before solving
            #print([preview_cube])
Пример #7
0
def main(model_path, dictionary_target, fea, latex, saveto, output, beam_k=5):
    # model architecture
    params = {}
    params['n'] = 256
    params['m'] = 256
    params['dim_attention'] = 512
    params['D'] = 684
    params['K'] = 5748
    params['growthRate'] = 24
    params['reduction'] = 0.5
    params['bottleneck'] = True
    params['use_dropout'] = True
    params['input_channels'] = 3

    # load model
    model = Encoder_Decoder(params)
    model.load_state_dict(torch.load(model_path, map_location=lambda storage, loc: storage))
    model.cuda()

    # load dictionary
    worddicts = load_dict(dictionary_target)
    worddicts_r = [None] * len(worddicts)
    for kk, vv in worddicts.items():
        worddicts_r[vv] = kk

    start_time = time.time()
    channels = 1
    folder = './kokumin/'
    out = './kokuminOut/'
    index = 0

    # testing
    model.eval()
    with torch.no_grad():
        for img_file in os.listdir(folder):
            if  '.jpg' in  img_file:
                label_file = folder + 'res_' + img_file.replace('jpg', 'txt')
                if os.path.isfile(label_file) == False: continue
                out_file = out + img_file
                out_txtfile = out + img_file.replace('jpg', 'txt')
                img_file = folder + img_file
                #print img_file, label_file
                im = imread(img_file)
                arr = Image.fromarray(im).convert('RGB')
                draw = ImageDraw.Draw(arr)
   
                #print im.shape
                with open(label_file) as f:
                    BBs = f.readlines()
                BBs = [x.strip().split(',') for x in BBs]
                f = open(out_txtfile, 'w')
                for BB in BBs:
                    x1 = min(int(BB[0]), int(BB[2]), int(BB[4]), int(BB[6]))
                    y1 = min(int(BB[1]), int(BB[3]), int(BB[5]), int(BB[7]))
                    x2 = max(int(BB[0]), int(BB[2]), int(BB[4]), int(BB[6]))
                    y2 = max(int(BB[1]), int(BB[3]), int(BB[5]), int(BB[7]))
                    if x1 < 0: x1 = 0
                    if y1 < 0: y1 = 0

                    draw.rectangle((x1, y1, x2, y2), fill=None, outline=(255, 0 , 0))

                    f.write(str(x1) + ',' + str(y1) + ',' + str(x2) + ',' + str(y2) + ',')
                    input_img = im[y1:y2, x1:x2]
                    w = x2 - x1 + 1
                    h = y2 - y1 + 1
                    #print x1, y1, x2, y2
                    #print w, h
                    if w < h:
                        rate = 20.0/w
                        w = int(round(w*rate))
                        h = int(round(h* rate / 20.0) * 20)
                    else:
                        rate = 20.0/h
                        w = int(round(w*rate / 20.0) * 20)
                        h = int(round(h* rate))
                    #print w, h
                    input_img = imresize(input_img, (h,w))

                    mat = np.zeros([channels, h, w], dtype='uint8')  
                    mat[0,:,:] = input_img
                    #mat[0,:,:] =  0.299* input_img[:, :, 0] + 0.587 * input_img[:, :, 1] + 0.114 * input_img[:, :, 2]

                    xx_pad = mat.astype(np.float32) / 255.
                    xx_pad = torch.from_numpy(xx_pad[None, :, :, :]).cuda()  # (1,1,H,W)
                    sample, score, alpha_past_list = gen_sample(model, xx_pad, params, False, k=beam_k, maxlen=600)
                    score = score / np.array([len(s) for s in sample])
                    ss = sample[score.argmin()]
                    result = ''
                    for vv in ss:
                        if vv == 0: # <eol>
                            break
                        result += worddicts_r[vv] + ' '
                    print ('resutl:',  index, result)
                    f.write(result + '\n')
                f.close()
                arr.save(out_file,"JPEG")
Пример #8
0
                os.mkdir(valid_malpha_path)
            rec_mat = {}
            label_mat = {}
            rec_re_mat = {}
            label_re_mat = {}
            rec_ridx_mat = {}
            label_ridx_mat = {}
            with torch.no_grad():
                valid_count_idx = 0
                for x, ly, ry, re, ma, lp, rp in valid:
                    for xx, lyy, ree, rpp in zip(x, ly, re, rp):
                        xx_pad = xx.astype(np.float32) / 255.
                        xx_pad = torch.from_numpy(
                            xx_pad[None, :, :, :]).cuda()  # (1,1,H,W)
                        score, sample, malpha_list, relation_sample = \
                            gen_sample(WAP_model, xx_pad, params, multi_gpu_flag, k=3, maxlen=maxlen, rpos_beam=3)

                        key = valid_uid_list[valid_count_idx]
                        rec_mat[key] = []
                        label_mat[key] = lyy
                        rec_re_mat[key] = []
                        label_re_mat[key] = ree
                        rec_ridx_mat[key] = []
                        label_ridx_mat[key] = rpp
                        if len(score) == 0:
                            rec_mat[key].append(0)
                            rec_re_mat[key].append(0)  # End
                            rec_ridx_mat[key].append(0)
                        else:
                            score = score / np.array([len(s) for s in sample])
                            min_score_index = score.argmin()