# validation valid_stop = False if np.mod(uidx, sampleFreq) == 0: WAP_model.eval() with torch.no_grad(): fpp_sample = open(valid_output[0], 'w') valid_count_idx = 0 for x, y in valid: for xx in x: xx_pad = xx.astype(np.float32) / 255. xx_pad = torch.from_numpy( xx_pad[None, :, :, :]).cuda() # (1,1,H,W) sample, score = gen_sample(WAP_model, xx_pad, params, multi_gpu_flag, k=10, maxlen=1000) if len(score) == 0: print('valid decode error happens') valid_stop = True break score = score / np.array([len(s) for s in sample]) ss = sample[score.argmin()] # write decoding results fpp_sample.write(valid_uid_list[valid_count_idx]) valid_count_idx = valid_count_idx + 1 # symbols (without <eos>) for vv in ss: if vv == 0: # <eos> break
def main(model_path, dictionary_target, fea, latex, saveto, output, beam_k=5): # model architecture params = {} params['n'] = 256 params['m'] = 256 params['dim_attention'] = 512 params['D'] = 684 params['K'] = 111 params['growthRate'] = 24 params['reduction'] = 0.5 params['bottleneck'] = True params['use_dropout'] = True params['input_channels'] = 1 # load model model = Encoder_Decoder(params) model.load_state_dict( torch.load(model_path, map_location=lambda storage, loc: storage)) model.cuda() # load dictionary worddicts = load_dict(dictionary_target) worddicts_r = [None] * len(worddicts) for kk, vv in worddicts.items(): worddicts_r[vv] = kk # load data test, test_uid_list = dataIterator(fea, latex, worddicts, batch_size=8, batch_Imagesize=500000, maxlen=20000, maxImagesize=500000) # testing model.eval() with torch.no_grad(): fpp_sample = open(saveto, 'w') test_count_idx = 0 print('Decoding ... ') for x, y in test: for xx in x: print('%d : %s' % (test_count_idx + 1, test_uid_list[test_count_idx])) xx_pad = xx.astype(np.float32) / 255. xx_pad = torch.from_numpy( xx_pad[None, :, :, :]).cuda() # (1,1,H,W) sample, score = gen_sample(model, xx_pad, params, False, k=beam_k, maxlen=1000) score = score / np.array([len(s) for s in sample]) ss = sample[score.argmin()] # write decoding results fpp_sample.write(test_uid_list[test_count_idx]) test_count_idx = test_count_idx + 1 # symbols (without <eos>) for vv in ss: if vv == 0: # <eos> break fpp_sample.write(' ' + worddicts_r[vv]) fpp_sample.write('\n') fpp_sample.close() print('test set decode done') os.system('python compute-wer.py ' + saveto + ' ' + latex + ' ' + output) fpp = open(output) stuff = fpp.readlines() fpp.close() m = re.search('WER (.*)\n', stuff[0]) test_per = 100. * float(m.group(1)) m = re.search('ExpRate (.*)\n', stuff[1]) test_sacc = 100. * float(m.group(1)) print('Valid WER: %.2f%%, ExpRate: %.2f%%' % (test_per, test_sacc))
regr = Lambda(reshape3, output_shape=(None, 2), name='rpn_regress_reshape')(regr) return cls, regr inp, nn = nn_base((None, None, 3), trainable=True) cls, regr = rpn(nn) basemodel = Model(inp, [cls, regr]) basemodel.summary() import utils xmlpath = 'VOCdevkit/Annotations' imgpath = 'VOCdevkit/JPEGImages' gen1 = utils.gen_sample(xmlpath, imgpath, 1) gen2 = utils.gen_sample(xmlpath, imgpath, 1) class losslog(): def __init__(self, path, txt): with open(path, 'a+') as f: f.writelines(txt) class losshistroy(Callback): def on_train_begin(self, logs={}): self.losses = [] def on_batch_end(self, batch, logs={}): self.losses.append(logs.get('loss'))
}, optimizer=Adam(lr), metrics={"policy": acc}) model.summary() return model if __name__ == "__main__": file_path = "auto.h5" model = get_model() model.load_weights(file_path) sample_X, sample_Y, cubes = gen_sample(10) cube = cubes[0] cube.score = 0 list_sequences = [[cube]] existing_cubes = set() for j in range(1000): X = [flatten_1d_b(x[-1]) for x in list_sequences] value, policy = model.predict(np.array(X), batch_size=1024) new_list_sequences = []
def test(text_detection_modelpara, ocr_modelpara, dictionary_target): # load net net = CRAFT() # initialize print('Loading text detection model from checkpoint {}'.format( text_detection_modelpara)) if args.cuda: net.load_state_dict(copyStateDict( torch.load(text_detection_modelpara))) else: net.load_state_dict( copyStateDict( torch.load(text_detection_modelpara, map_location='cpu'))) if args.cuda: net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = False params = {} params['n'] = 256 params['m'] = 256 params['dim_attention'] = 512 params['D'] = 684 params['K'] = 5748 params['growthRate'] = 24 params['reduction'] = 0.5 params['bottleneck'] = True params['use_dropout'] = True params['input_channels'] = 3 params['cuda'] = args.cuda # load model OCR = Encoder_Decoder(params) if args.cuda: OCR.load_state_dict(copyStateDict(torch.load(ocr_modelpara))) else: OCR.load_state_dict( copyStateDict(torch.load(ocr_modelpara, map_location='cpu'))) if args.cuda: #OCR = OCR.cuda() OCR = torch.nn.DataParallel(OCR) cudnn.benchmark = False OCR.eval() net.eval() # load dictionary worddicts = load_dict(dictionary_target) worddicts_r = [None] * len(worddicts) for kk, vv in worddicts.items(): worddicts_r[vv] = kk t = time.time() fontPIL = '/usr/share/fonts/truetype/fonts-japanese-gothic.ttf' # japanese font size = 40 colorBGR = (0, 0, 255) paper = ET.Element('paper') paper.set('xmlns', "http://codh.rois.ac.jp/modern-magazine/") # load data for k, image_path in enumerate(image_list[:]): print("Test image {:d}/{:d}: {:s}".format(k + 1, len(image_list), image_path), end='\r') res_img_file = result_folder + "res_" + os.path.basename(image_path) #print (res_img_file, os.path.basename(image_path), os.path.exists(res_img_file)) #if os.path.exists(res_img_file): continue #image = imgproc.loadImage(image_path) '''image = cv2.imread(image_path, cv2.IMREAD_COLOR) image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) ret2,image = cv2.threshold(image,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) height = image.shape[0] width = image.shape[1] scale = 1000.0/height H = int(image.shape[0] * scale) W = int(image.shape[1] * scale) image = cv2.resize(image , (W, H)) print(image.shape, image_path) cv2.imwrite(image_path, image) continue''' image = cv2.imread(image_path, cv2.IMREAD_COLOR) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) h, w = image.shape[0], image.shape[1] print(image_path) page = ET.SubElement(paper, "page") page.set('file', os.path.basename(image_path).replace('.jpg', '')) page.set('height', str(h)) page.set('width', str(w)) page.set('dpi', str(100)) page.set('number', str(1)) bboxes, polys, score_text = test_net(net, image, args.text_threshold, args.link_threshold, args.low_text, args.cuda, args.poly) text = [] localtions = [] for i, box in enumerate(bboxes): poly = np.array(box).astype(np.int32) min_x = np.min(poly[:, 0]) max_x = np.max(poly[:, 0]) min_y = np.min(poly[:, 1]) max_y = np.max(poly[:, 1]) if min_x < 0: min_x = 0 if min_y < 0: min_y = 0 #image = cv2.rectangle(image,(min_x,min_y),(max_x,max_y),(0,255,0),3) input_img = image[min_y:max_y, min_x:max_x] w = max_x - min_x + 1 h = max_y - min_y + 1 line = ET.SubElement(page, "line") line.set("x", str(min_x)) line.set("y", str(min_y)) line.set("height", str(h)) line.set("width", str(w)) if w < h: rate = 20.0 / w w = int(round(w * rate)) h = int(round(h * rate / 20.0) * 20) else: rate = 20.0 / h w = int(round(w * rate / 20.0) * 20) h = int(round(h * rate)) #print (w, h, rate) input_img = cv2.resize(input_img, (w, h)) mat = np.zeros([1, h, w], dtype='uint8') mat[0, :, :] = 0.299 * input_img[:, :, 0] + 0.587 * input_img[:, :, 1] + 0.114 * input_img[:, :, 2] xx_pad = mat.astype(np.float32) / 255. xx_pad = torch.from_numpy(xx_pad[None, :, :, :]) # (1,1,H,W) if args.cuda: xx_pad.cuda() with torch.no_grad(): sample, score, alpha_past_list = gen_sample(OCR, xx_pad, params, args.cuda, k=10, maxlen=600) score = score / np.array([len(s) for s in sample]) ss = sample[score.argmin()] alpha_past = alpha_past_list[score.argmin()] result = '' i = 0 location = [] for vv in ss: if vv == 0: # <eol> break alpha = alpha_past[i] if i != 0: alpha = alpha_past[i] - alpha_past[i - 1] (y, x) = np.unravel_index(np.argmax(alpha, axis=None), alpha.shape) #print (int(16* x /rate), int(16* y/rate) , chr(int(worddicts_r[vv],16))) location.append( [int(16 * x / rate) + min_x, int(16 * y / rate) + min_y]) #image = cv2.circle(image,(int(16* x/rate) - 8 + min_x, int(16* y/rate) + 8 + min_y),25, (0,0,255), -1) result += chr(int(worddicts_r[vv], 16)) '''char = ET.SubElement(line, "char") char.set('num_cand', '1') char.set('x', str(int(16* x/rate) - 8 + min_x)) char.set('y', str(int(16* y/rate) + 8 + min_y)) res = ET.SubElement(char, "result") res.set('CC', str(100)) res.text = chr(int(worddicts_r[vv],16)) cand = ET.SubElement(char, "cand") cand.set('CC', str(100)) cand.text = chr(int(worddicts_r[vv],16))''' i += 1 line.text = result text.append(result) localtions.append(location) image = cv2_putText_1(img=image, text=result, org=(min_x, max_x, min_y, max_y), fontFace=fontPIL, fontScale=size, color=colorBGR) print('save image') # save score text filename, file_ext = os.path.splitext(os.path.basename(image_path)) mask_file = result_folder + "/res_" + filename + '_mask.jpg' #cv2.imwrite(mask_file, score_text) file_utils.saveResult(image_path, image, polys, dirname=result_folder) xml_string = ET.tostring(paper, 'Shift_JIS') fout = codecs.open('./data/result.xml', 'w', 'shift_jis') fout.write(xml_string.decode('shift_jis')) fout.close() print("elapsed time : {}s".format(time.time() - t))
model = get_model() model.load_weights(file_path) x_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] y_values = [] for random_steps in x_values: success_time = 0 for times in range(100): #generate 1 sample sample_X, sample_Y, cubes = gen_sample(random_steps) cube = cubes[0] cube.score = 0 list_sequences = [[cube]] existing_cubes = set() #print(list_sequences) preview_cube = cube #show cube before solving #print([preview_cube])
def main(model_path, dictionary_target, fea, latex, saveto, output, beam_k=5): # model architecture params = {} params['n'] = 256 params['m'] = 256 params['dim_attention'] = 512 params['D'] = 684 params['K'] = 5748 params['growthRate'] = 24 params['reduction'] = 0.5 params['bottleneck'] = True params['use_dropout'] = True params['input_channels'] = 3 # load model model = Encoder_Decoder(params) model.load_state_dict(torch.load(model_path, map_location=lambda storage, loc: storage)) model.cuda() # load dictionary worddicts = load_dict(dictionary_target) worddicts_r = [None] * len(worddicts) for kk, vv in worddicts.items(): worddicts_r[vv] = kk start_time = time.time() channels = 1 folder = './kokumin/' out = './kokuminOut/' index = 0 # testing model.eval() with torch.no_grad(): for img_file in os.listdir(folder): if '.jpg' in img_file: label_file = folder + 'res_' + img_file.replace('jpg', 'txt') if os.path.isfile(label_file) == False: continue out_file = out + img_file out_txtfile = out + img_file.replace('jpg', 'txt') img_file = folder + img_file #print img_file, label_file im = imread(img_file) arr = Image.fromarray(im).convert('RGB') draw = ImageDraw.Draw(arr) #print im.shape with open(label_file) as f: BBs = f.readlines() BBs = [x.strip().split(',') for x in BBs] f = open(out_txtfile, 'w') for BB in BBs: x1 = min(int(BB[0]), int(BB[2]), int(BB[4]), int(BB[6])) y1 = min(int(BB[1]), int(BB[3]), int(BB[5]), int(BB[7])) x2 = max(int(BB[0]), int(BB[2]), int(BB[4]), int(BB[6])) y2 = max(int(BB[1]), int(BB[3]), int(BB[5]), int(BB[7])) if x1 < 0: x1 = 0 if y1 < 0: y1 = 0 draw.rectangle((x1, y1, x2, y2), fill=None, outline=(255, 0 , 0)) f.write(str(x1) + ',' + str(y1) + ',' + str(x2) + ',' + str(y2) + ',') input_img = im[y1:y2, x1:x2] w = x2 - x1 + 1 h = y2 - y1 + 1 #print x1, y1, x2, y2 #print w, h if w < h: rate = 20.0/w w = int(round(w*rate)) h = int(round(h* rate / 20.0) * 20) else: rate = 20.0/h w = int(round(w*rate / 20.0) * 20) h = int(round(h* rate)) #print w, h input_img = imresize(input_img, (h,w)) mat = np.zeros([channels, h, w], dtype='uint8') mat[0,:,:] = input_img #mat[0,:,:] = 0.299* input_img[:, :, 0] + 0.587 * input_img[:, :, 1] + 0.114 * input_img[:, :, 2] xx_pad = mat.astype(np.float32) / 255. xx_pad = torch.from_numpy(xx_pad[None, :, :, :]).cuda() # (1,1,H,W) sample, score, alpha_past_list = gen_sample(model, xx_pad, params, False, k=beam_k, maxlen=600) score = score / np.array([len(s) for s in sample]) ss = sample[score.argmin()] result = '' for vv in ss: if vv == 0: # <eol> break result += worddicts_r[vv] + ' ' print ('resutl:', index, result) f.write(result + '\n') f.close() arr.save(out_file,"JPEG")
os.mkdir(valid_malpha_path) rec_mat = {} label_mat = {} rec_re_mat = {} label_re_mat = {} rec_ridx_mat = {} label_ridx_mat = {} with torch.no_grad(): valid_count_idx = 0 for x, ly, ry, re, ma, lp, rp in valid: for xx, lyy, ree, rpp in zip(x, ly, re, rp): xx_pad = xx.astype(np.float32) / 255. xx_pad = torch.from_numpy( xx_pad[None, :, :, :]).cuda() # (1,1,H,W) score, sample, malpha_list, relation_sample = \ gen_sample(WAP_model, xx_pad, params, multi_gpu_flag, k=3, maxlen=maxlen, rpos_beam=3) key = valid_uid_list[valid_count_idx] rec_mat[key] = [] label_mat[key] = lyy rec_re_mat[key] = [] label_re_mat[key] = ree rec_ridx_mat[key] = [] label_ridx_mat[key] = rpp if len(score) == 0: rec_mat[key].append(0) rec_re_mat[key].append(0) # End rec_ridx_mat[key].append(0) else: score = score / np.array([len(s) for s in sample]) min_score_index = score.argmin()