def main(): # prepare output directory # global epoch print('EAST <==> TEST <==> Create Res_file and Img_with_box <==> Begin') result_root = os.path.abspath(cfg.res_img_path) if not os.path.exists(result_root): os.mkdir(result_root) print('EAST <==> Prepare <==> Network <==> Begin') model = East() model = torch.nn.DataParallel(model, device_ids=cfg.gpu_ids) model.cuda() # 载入模型 if os.path.isfile(cfg.checkpoint): print( "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Begin".format( cfg.checkpoint)) checkpoint = torch.load(cfg.checkpoint) epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) print( "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Done".format( cfg.checkpoint)) else: print('Can not find checkpoint !!!') exit(1) predict(model, epoch)
def main(): # Prepare for dataset print('EAST <==> Prepare <==> DataLoader <==> Begin') trainset = custom_dset(transform=transforms.ToTensor()) train_loader = DataLoader(trainset, batch_size=cfg.train_batch_size_per_gpu * cfg.gpu, shuffle=True, num_workers=cfg.num_workers) print('EAST <==> Prepare <==> Batch_size:{} <==> Begin'.format(cfg.train_batch_size_per_gpu * cfg.gpu)) print('EAST <==> Prepare <==> DataLoader <==> Done') # test datalodaer # import numpy as np # import matplotlib.pyplot as plt # for batch_idx, (img, img_path, score_map, geo_map, training_mask) in enumerate(train_loader): # print("batch index:", batch_idx, ",img batch shape", np.shape(geo_map.numpy())) # h1 = img.numpy()[0].transpose(1, 2, 0).astype(np.int64) # h2 = score_map.numpy()[0].transpose(1, 2, 0).astype(np.float32)[:, :, 0] # plt.figure() # plt.subplot(1, 2, 1) # plt.imshow(h1) # plt.subplot(1, 2, 2) # plt.imshow(h2, cmap='gray') # plt.show() # Model print('EAST <==> Prepare <==> Network <==> Begin') model = East() model = torch.nn.DataParallel(model, device_ids=cfg.gpu_ids) criterion = loss.LossFunc().cuda() weight_loss = utils.Regularization(model, cfg.l2_weight_decay, p=2).cuda() pre_params = list(map(id, model.module.mobilenet.parameters())) post_params = filter(lambda p: id(p) not in pre_params, model.module.parameters()) optimizer = torch.optim.Adam([{'params': model.module.mobilenet.parameters(), 'lr': cfg.pre_lr}, {'params': post_params, 'lr': cfg.lr}]) # 计算方式 decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps) scheduler = lr_scheduler.StepLR(optimizer, step_size=cfg.decay_steps, gamma=cfg.decay_rate) model.cuda() # init or resume,恢复模型 if cfg.resume and os.path.isfile(cfg.checkpoint): start_epoch = utils.Loading_checkpoint(model, optimizer, scheduler) else: start_epoch = 0 print('EAST <==> Prepare <==> Network <==> Done') tensorboard_writer = init_tensorboard_writer('tensorboards/{}'.format(str(int(time.time())))) # train Model for epoch in range(start_epoch, cfg.max_epochs): scheduler.step() fit(train_loader, model, criterion, optimizer, epoch, weight_loss,tensorboard_writer) # 保存模型 if epoch % cfg.save_eval_iteration == 0: utils.save_checkpoint(epoch, model, optimizer, scheduler)
def main(): root_path = '/home/mathu/Documents/express_recognition/data/telephone_txt/result/' train_img = root_path + 'print_pic' train_txt = root_path + 'print_txt' # root_path = '/home/mathu/Documents/express_recognition/data/icdar2015/' # train_img = root_path + 'train2015' # train_txt = root_path + 'train_label' trainset = custom_dset(train_img, train_txt) trainloader = DataLoader(trainset, batch_size=16, shuffle=True, collate_fn=collate_fn, num_workers=4) model = East() model = model.cuda() model.load_state_dict(torch.load('./checkpoints_total/model_1440.pth')) crit = LossFunc() weight_decay = 0 optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) # weight_decay=1) scheduler = lr_scheduler.StepLR(optimizer, step_size=10000, gamma=0.94) train(epochs=1500, model=model, trainloader=trainloader, crit=crit, optimizer=optimizer, scheduler=scheduler, save_step=20, weight_decay=weight_decay) write.close()
def main(): root_path = './dataset/' train_img = root_path + 'train2015/' train_txt = root_path + 'train_label/' trainset = custom_dset(train_img, train_txt) print(trainset) trainloader = DataLoader(trainset, batch_size=16, shuffle=True, collate_fn=collate_fn, num_workers=4) model = East() model = model.cuda() crit = LossFunc() weight_decay = 0 optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) # weight_decay=1) scheduler = lr_scheduler.StepLR(optimizer, step_size=10000, gamma=0.94) train(epochs=1500, model=model, trainloader=trainloader, crit=crit, optimizer=optimizer, scheduler=scheduler, save_step=20, weight_decay=weight_decay) write.close()
def model_init(config): train_root_path = os.path.abspath(os.path.join(config["dataroot"], 'train')) train_img = os.path.join(train_root_path, 'img') train_gt = os.path.join(train_root_path, 'gt') trainset = custom_dset(train_img, train_gt) train_loader = DataLoader(trainset, batch_size=config["train_batch_size_per_gpu"] * config["gpu"], shuffle=True, collate_fn=collate_fn, num_workers=config["num_workers"]) logging.debug('Data loader created: Batch_size:{}, GPU {}:({})'.format( config["train_batch_size_per_gpu"] * config["gpu"], config["gpu"], config["gpu_ids"])) # Model model = East() model = nn.DataParallel(model, device_ids=config["gpu_ids"]) model = model.cuda() init_weights(model, init_type=config["init_type"]) logging.debug("Model initiated, init type: {}".format(config["init_type"])) cudnn.benchmark = True criterion = LossFunc() optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"]) scheduler = lr_scheduler.StepLR(optimizer, step_size=10000, gamma=0.94) # init or resume if config["resume"] and os.path.isfile(config["checkpoint"]): start_epoch = load_checkpoint(config, model, optimizer) else: start_epoch = 0 logging.debug("Model is running...") return model, criterion, optimizer, scheduler, train_loader, start_epoch
def main(): hmean = .0 is_best = False warnings.simplefilter('ignore', np.RankWarning) # Prepare for dataset print('EAST <==> Prepare <==> DataLoader <==> Begin') # train_root_path = os.path.abspath(os.path.join('./dataset/', 'train')) train_root_path = cfg.dataroot train_img = os.path.join(train_root_path, 'img') train_gt = os.path.join(train_root_path, 'gt') trainset = custom_dset(train_img, train_gt) train_loader = DataLoader(trainset, batch_size=cfg.train_batch_size_per_gpu * cfg.gpu, shuffle=True, collate_fn=collate_fn, num_workers=cfg.num_workers) print('EAST <==> Prepare <==> Batch_size:{} <==> Begin'.format( cfg.train_batch_size_per_gpu * cfg.gpu)) print('EAST <==> Prepare <==> DataLoader <==> Done') # test datalodaer """ for i in range(100000): for j, (a,b,c,d) in enumerate(train_loader): print(i, j,'/',len(train_loader)) """ # Model print('EAST <==> Prepare <==> Network <==> Begin') model = East() model = nn.DataParallel(model, device_ids=cfg.gpu_ids) model = model.cuda() init_weights(model, init_type=cfg.init_type) cudnn.benchmark = True criterion = LossFunc() optimizer = torch.optim.Adam(model.parameters(), lr=cfg.lr) scheduler = lr_scheduler.StepLR(optimizer, step_size=10000, gamma=0.94) # init or resume if cfg.resume and os.path.isfile(cfg.checkpoint): weightpath = os.path.abspath(cfg.checkpoint) print( "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Begin".format( weightpath)) checkpoint = torch.load(weightpath) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print( "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Done".format( weightpath)) else: start_epoch = 0 print('EAST <==> Prepare <==> Network <==> Done') for epoch in range(start_epoch, cfg.max_epochs): train(train_loader, model, criterion, scheduler, optimizer, epoch) if epoch % cfg.eval_iteration == 0: # create res_file and img_with_box output_txt_dir_path = predict(model, criterion, epoch) # Zip file submit_path = MyZip(output_txt_dir_path, epoch) # submit and compute Hmean hmean_ = compute_hmean(submit_path) if hmean_ > hmean: is_best = True state = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'is_best': is_best, } save_checkpoint(state, epoch)
class EASTPredictor(object): def __init__(self, lr, weight_path, output_path): self.output_path = output_path self.model = East() self.model = nn.DataParallel(self.model, device_ids=[0]) self.model = self.model.cuda() init_weights(self.model, init_type='xavier') cudnn.benchmark = True self.criterion = LossFunc() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr) self.scheduler = lr_scheduler.StepLR(self.optimizer, step_size=10000, gamma=0.94) self.weightpath = os.path.abspath(weight_path) logging.debug("EAST <==> Prepare <==> Loading checkpoint '{}' <==> Begin".format(self.weightpath)) checkpoint = torch.load(self.weightpath) self.start_epoch = checkpoint['epoch'] self.model.load_state_dict(checkpoint['state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer']) logging.debug("EAST <==> Prepare <==> Loading checkpoint '{}', epoch={} <==> Done".format(self.weightpath, self.start_epoch)) self.model.eval() def resize_image(self, im, max_side_len=2400): ''' resize image to a size multiple of 32 which is required by the network :param im: the resized image :param max_side_len: limit of max image size to avoid out of memory in gpu :return: the resized image and the resize ratio ''' h, w, _ = im.shape resize_w = w resize_h = h resize_h = resize_h if resize_h % 32 == 0 else (resize_h // 32 - 1) * 32 resize_w = resize_w if resize_w % 32 == 0 else (resize_w // 32 - 1) * 32 #resize_h, resize_w = 512, 512 im = cv2.resize(im, (int(resize_w), int(resize_h))) ratio_h = resize_h / float(h) ratio_w = resize_w / float(w) return im, (ratio_h, ratio_w) def detect(self, score_map, geo_map, score_map_thresh=1e-5, box_thresh=1e-8, nms_thres=0.1): ''' restore text boxes from score map and geo map :param score_map: :param geo_map: :param score_map_thresh: threshhold for score map :param box_thresh: threshhold for boxes :param nms_thres: threshold for nms :return: ''' if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] # filter the score map xy_text = np.argwhere(score_map > score_map_thresh) # sort the text boxes via the y axis xy_text = xy_text[np.argsort(xy_text[:, 0])] # restore start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 logging.debug('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] # nms part start = time.time() # boxes = nms_locality.nms_locality(boxes.astype(np.float64), nms_thres) logging.debug('{} boxes before merging'.format(boxes.shape[0])) boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thres) if boxes.shape[0] == 0: return None logging.debug('{} boxes before checking scores'.format(boxes.shape[0])) # here we filter some low score boxes by the average score map, this is different from the orginal paper for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes def sort_poly(self, p): min_axis = np.argmin(np.sum(p, axis=1)) p = p[[min_axis, (min_axis+1)%4, (min_axis+2)%4, (min_axis+3)%4]] if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]): return p else: return p[[0, 3, 2, 1]] def predict_one_file(self, img_file): im = cv2.imread(img_file)[:, :, ::-1] return predict_one_image(im. img_file) def predict_one_image(self, im, img_file): im_resized, (ratio_h, ratio_w) = self.resize_image(im) im_resized = im_resized.astype(np.float32) im_resized = im_resized.transpose(2, 0, 1) im_resized = torch.from_numpy(im_resized) im_resized = im_resized.cuda() im_resized = im_resized.unsqueeze(0) score, geometry = self.model(im_resized) score = score.permute(0, 2, 3, 1) geometry = geometry.permute(0, 2, 3, 1) score = score.data.cpu().numpy() geometry = geometry.data.cpu().numpy() boxes = self.detect(score_map=score, geo_map=geometry) letters = None if boxes is not None: boxes = boxes[:, :8].reshape((-1, 4, 2)) boxes[:, :, 0] /= ratio_w boxes[:, :, 1] /= ratio_h logging.debug("found {} boxes".format(len(boxes))) fstem = pathlib.Path(img_file).stem letters = self.save_boxes(os.path.join(self.output_path, fstem + "_boxes.txt"), boxes) cv2.imwrite(os.path.join(self.output_path, fstem + "_with_box.jpg"), im[:, :, ::-1]) else: logging.debug("Did not find boxes") return letters def save_boxes(self, filename, boxes): letters = [] with open(filename, 'w+') as f: for box in boxes: box = self.sort_poly(box.astype(np.int32)) if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3] - box[0]) < 5: logging.debug('wrong direction') continue #if box[0, 0] < 0 or box[0, 1] < 0 or box[1,0] < 0 or box[1,1] < 0 or box[2,0]<0 or box[2,1]<0 or box[3,0] < 0 or box[3,1]<0: # logging.debug("wrong box, {}".format(box)) # continue for x in range(4): for y in [0, 1]: if (box[x, y] < 0): box[x, y] = 0 poly = np.array([[box[0, 0], box[0, 1]], [box[1, 0], box[1, 1]], [box[2, 0], box[2, 1]], [box[3, 0], box[3, 1]]]) p_area = polygon_area(poly) if p_area > 0: poly = poly[(0, 3, 2, 1), :] f.write('{},{},{},{},{},{},{},{}\r\n' .format(poly[0, 0], poly[0, 1], poly[1, 0], poly[1, 1], poly[2, 0], poly[2, 1], poly[3, 0], poly[3, 1],)) # cv2.polylines(im[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 0, 0), thickness=1) letters.append(('', poly[0, 0], poly[0, 1], poly[2, 0], poly[2, 1])) return letters
def main(): warnings.simplefilter('ignore', np.RankWarning) #Model video_root_path = os.path.abspath('./dataset/train/') video_name_list = sorted( [p for p in os.listdir(video_root_path) if p.split('_')[0] == 'Video']) #print('video_name_list', video_name_list) print('EAST <==> Prepare <==> Network <==> Begin') model = East() AGD_model = AGD() model = nn.DataParallel(model, device_ids=cfg.gpu_ids) #AGD_model = nn.DataParallel(AGD_model, device_ids=cfg.gpu_ids) model = model.cuda() AGD_model = AGD_model.cuda() init_weights(model, init_type=cfg.init_type) cudnn.benchmark = True criterion1 = LossFunc() # criterion2 = Ass_loss() optimizer1 = torch.optim.Adam(model.parameters(), lr=cfg.lr) optimizer2 = torch.optim.Adam(AGD_model.parameters(), lr=cfg.lr) scheduler = lr_scheduler.StepLR(optimizer1, step_size=10000, gamma=0.94) # init or resume if cfg.resume and os.path.isfile(cfg.checkpoint): weightpath = os.path.abspath(cfg.checkpoint) print( "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Begin".format( weightpath)) checkpoint = torch.load(weightpath) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) #AGD_model.load_state_dict(checkpoint['model2.state_dict']) optimizer1.load_state_dict(checkpoint['optimizer']) #optimizer2.load_state_dict(checkpoint['optimizer2']) print( "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Done".format( weightpath)) else: start_epoch = 0 print('EAST <==> Prepare <==> Network <==> Done') for epoch in range(start_epoch + 1, cfg.max_epochs): for video_name in video_name_list: print( 'EAST <==> epoch:{} <==> Prepare <==> DataLoader <==>{} Begin'. format(epoch, video_name)) trainset = custom_dset(os.path.join(video_root_path, video_name)) #sampler = sampler_for_video_clip(len(trainset)) train_loader = DataLoader(trainset, batch_size=cfg.train_batch_size_per_gpu * cfg.gpu, shuffle=False, collate_fn=collate_fn, num_workers=cfg.num_workers, drop_last=True) print('EAST <==> Prepare <==> Batch_size:{} <==> Begin'.format( cfg.train_batch_size_per_gpu * cfg.gpu)) print( 'EAST <==> epoch:{} <==> Prepare <==> DataLoader <==>{} Done'. format(epoch, video_name)) train(train_loader, model, AGD_model, criterion1, criterion2, scheduler, optimizer1, optimizer2, epoch) ''' for i, (img, score_map, geo_map, training_mask, coord_ids) in enumerate(train_loader): print('i{} img.shape:{} geo_map.shape{} training_mask.shape{} coord_ids.len{}'.format(i, score_map.shape, geo_map.shape, training_mask.shape, len(coord_ids))) ''' if epoch % cfg.eval_iteration == 0: state = { 'epoch': epoch, 'model1.state_dict': model.state_dict(), 'model2.state_dict': AGD_model.state_dict(), 'optimizer1': optimizer1.state_dict(), 'optimizer2': optimizer2.state_dict() } save_checkpoint(state, epoch)