def main(): global lr all_data = TextSleep('.', is_training=True) test_data = torch.from_numpy(all_data.test_data).float() test_data = to_device(test_data) train_data = torch.from_numpy(all_data.train_data).float() train_data = to_device(train_data) val_data = torch.from_numpy(all_data.val_data).float() val_data = to_device(val_data) R = all_data.R train_loader = data.DataLoader(all_data, batch_size=cfg.batch_size, shuffle=True, num_workers=cfg.num_workers, pin_memory=True) # Model model = SleepModel(5, is_training=True) model = model.to(cfg.device) if cfg.cuda: cudnn.benchmark = True if cfg.resume: load_model(model, cfg.resume) lr = cfg.lr moment = cfg.momentum if cfg.optim == "Adam": optimizer = torch.optim.Adam(model.parameters(), lr=lr) else: optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=moment) scheduler = lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.90) print('Start training sleep model.') for epoch in range(cfg.start_epoch, cfg.start_epoch + cfg.max_epoch+1): scheduler.step() train(model, train_loader, train_data, test_data, val_data, scheduler, optimizer, epoch) with open("./result/{}.txt".format(R), "w") as f: str_train = ','.join([str(i) for i in accuracy_trains]) str_test = ','.join([str(i) for i in accuracy_tests]) str_val = ','.join([str(i) for i in accuracy_vals]) f.write("{}\n{}\n{}".format(str_train, str_test, str_val)) if torch.cuda.is_available(): torch.cuda.empty_cache()
def main(): global lr all_data = DataLoad() test_data = torch.from_numpy(all_data.test_data).float() test_data = to_device(test_data) val_data = torch.from_numpy(all_data.val_data).float() val_data = to_device(val_data) train_loader = data.DataLoader(all_data, batch_size=cfg.batch_size, shuffle=True, num_workers=cfg.num_workers, pin_memory=True) # Model model = SleepModel(5, is_training=True) model = model.to(cfg.device) if cfg.cuda: cudnn.benchmark = True if cfg.resume: load_model(model, cfg.resume) lr = cfg.lr moment = cfg.momentum if cfg.optim == "Adam": optimizer = torch.optim.Adam(model.parameters(), lr=lr) else: optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=moment) scheduler = lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.90) print('Start training sleep model.') for epoch in range(cfg.start_epoch, cfg.start_epoch + cfg.max_epoch+1): scheduler.step() train(model, train_loader, test_data, val_data, scheduler, optimizer, epoch) index = np.argsort(np.array(accuracy_vals))[::-1][:10] pred_slect = np.array(pred_np)[:,:,0][index] with open("FC_result.txt", "w") as fn: fn.write("gt,"+",".join([str(int(i)) for i in test_data.cpu().numpy()[:,0]])+"\n") for pn in pred_slect: fn.write("fc," + ",".join([str(i) for i in pn]) + "\n") if torch.cuda.is_available(): torch.cuda.empty_cache()
def inference(model, detector, test_loader): model.eval() for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(test_loader): img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) # inference output = model(img) for idx in range(img.size(0)): print('detect {} images: {}.'.format(idx, meta['image_id'][idx])) tr_pred = output[idx, 0:2].softmax(dim=0).data.cpu().numpy() tcl_pred = output[idx, 2:4].softmax(dim=0).data.cpu().numpy() sin_pred = output[idx, 4].data.cpu().numpy() cos_pred = output[idx, 5].data.cpu().numpy() radii_pred = output[idx, 6].data.cpu().numpy() batch_result = detector.detect(tr_pred, tcl_pred, sin_pred, cos_pred, radii_pred) # (n_tcl, 3) # visualization img_show = img[idx].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype( np.uint8) visualize_detection(img_show, tr_pred[1], tcl_pred[1], batch_result[idx], '{}_{}'.format(i, meta['image_id'][idx]))
def validation(model, valid_loader, criterion): with torch.no_grad(): model.eval() losses = AverageMeter() reg_losses = AverageMeter() center_loss = AverageMeter() region_loss = AverageMeter() for i, (img, reg_mask, meta) in enumerate(valid_loader): img, reg_mask = to_device(img, reg_mask) output = model(img) loss_reg, loss_dice_center, loss_dice_region = criterion( output, reg_mask) loss = loss_reg + loss_dice_center + loss_dice_region losses.update(loss.item()) reg_losses.update(loss_reg.item()) center_loss.update(loss_dice_center.item()) region_loss.update(loss_dice_region.item()) if cfg.visualization and i % cfg.visualization_frequency == 0: visualize_network_output(img, output, reg_mask, mode='val') print( 'Validation: - Loss: {:.4f} - Reg_Loss: {:.4f} - Center_Dice_Loss: {:.4f} - Region_Dice_Loss: {:.4f}' .format(loss.item(), loss_reg.item(), loss_dice_center.item(), loss_dice_region.item())) print('Validation Loss: {}'.format(losses.avg)) print('Regression Loss: {}'.format(reg_losses.avg)) print('Center Dice Loss: {}'.format(center_loss.avg)) print('Region Dice Loss: {}'.format(region_loss.avg))
def validation(model, valid_loader, criterion): model.eval() losses = AverageMeter() for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(valid_loader): print(meta['image_id']) img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) output = model(img) tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \ criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask) loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss losses.update(loss.item()) if cfg.viz and i < cfg.vis_num: visualize_network_output(output, tr_mask, tcl_mask, prefix='val_{}'.format(i)) if i % cfg.display_freq == 0: print( 'Validation: - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f}' .format(loss.item(), tr_loss.item(), tcl_loss.item(), sin_loss.item(), cos_loss.item(), radii_loss.item())) print('Validation Loss: {}'.format(losses.avg))
def train(model, train_loader, train_data, test_data, val_data, scheduler, optimizer, epoch): global train_step global accuracy_tests global accuracy_trains global accuracy_vals losses = AverageMeter(max=100) model.train() # scheduler.step() print('Epoch: {} : LR = {}'.format(epoch, scheduler.get_lr())) for i, data in enumerate(train_loader): train_step += 1 data = to_device(data) if data.shape[0] != cfg.batch_size: continue output = model(data[:, 1:]) target = data[:, 0].long() loss = F.nll_loss(output, target) optimizer.zero_grad() loss.backward() optimizer.step() losses.update(loss.item()) gc.collect() if i % cfg.display_freq == 0: print("({:d} / {:d}), loss: {:.3f}".format(i, len(train_loader), loss.item())) if epoch % cfg.save_freq == 0: labels_test = test_data[:, 0].long() output_test = model(test_data[:, 1:]) pred_test = output_test.data.max(1, keepdim=True)[1] correct_test = pred_test.eq(labels_test.data.view_as(pred_test)).cpu().sum() accuracy_test = correct_test*100.0/labels_test.shape[0] accuracy_tests.append(round(accuracy_test.item(), 3)) labels_train = train_data[:, 0].long() output_train = model(train_data[:, 1:]) pred_train = output_train.data.max(1, keepdim=True)[1] correct_train = pred_train.eq(labels_train.data.view_as(pred_train)).cpu().sum() accuracy_train = correct_train * 100.0 / labels_train.shape[0] accuracy_trains.append(round(accuracy_train.item(), 3)) labels_val = val_data[:, 0].long() output_val = model(val_data[:, 1:]) pred_val = output_val.data.max(1, keepdim=True)[1] correct_val = pred_val.eq(labels_val.data.view_as(pred_val)).cpu().sum() accuracy_val = correct_val * 100.0 / labels_val.shape[0] accuracy_vals.append(round(accuracy_val.item(), 3)) print("accuracy_train: {}; accuracy_val: {}; accuracy_test: {}" .format(accuracy_train, accuracy_val, accuracy_test)) # if epoch % cfg.save_freq == 0: # save_model(model, epoch, scheduler.get_lr(), optimizer) print('Training Loss: {}'.format(losses.avg))
def inference(detector, test_loader, output_dir): total_time = 0. if cfg.exp_name != "MLT2017": osmkdir(output_dir) else: if not os.path.exists(output_dir): mkdirs(output_dir) for i, (image, meta) in enumerate(test_loader): image = to_device(image) torch.cuda.synchronize() idx = 0 # test mode can only run with batch_size == 1 # visualization img_show = image[idx].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype(np.uint8) img_show = cv2.cvtColor(img_show, cv2.COLOR_BGR2RGB) # get detection result contours, output = detector.detect(image, img_show) tr_pred, tcl_pred = output['tr'], output['tcl'] torch.cuda.synchronize() print('detect {} / {} images: {}.'.format(i + 1, len(test_loader), meta['image_id'][idx])) pred_vis = visualize_detection(img_show, contours, tr_pred[1], tcl_pred[1]) path = os.path.join(cfg.vis_dir, '{}_test'.format(cfg.exp_name), meta['image_id'][idx]) cv2.imwrite(path, pred_vis) H, W = meta['Height'][idx].item(), meta['Width'][idx].item() img_show, contours = rescale_result(img_show, contours, H, W) # write to file if cfg.exp_name == "Icdar2015": fname = "res_" + meta['image_id'][idx].replace('jpg', 'txt') contours = data_transfer_ICDAR(contours) write_to_file(contours, os.path.join(output_dir, fname)) elif cfg.exp_name == "TD500": fname = "res_" + meta['image_id'][idx].replace('JPG', 'txt') im_show = data_transfer_TD500(contours, os.path.join(output_dir, fname), img_show) id_img = meta['image_id'][idx].replace("img_", "").replace("JPG", "jpg") path = os.path.join(cfg.vis_dir, '{}_test'.format(cfg.exp_name), id_img) cv2.imwrite(path, im_show) else: fname = meta['image_id'][idx].replace('jpg', 'txt') write_to_file(contours, os.path.join(output_dir, fname))
def validation(self, model, valid_loader, criterion, epoch, logger): with torch.no_grad(): model.eval() losses = AverageMeter() tr_losses = AverageMeter() tcl_losses = AverageMeter() sin_losses = AverageMeter() cos_losses = AverageMeter() radii_losses = AverageMeter() for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(valid_loader): img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) output = model(img) tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \ criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask) loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss # update losses losses.update(loss.item()) tr_losses.update(tr_loss.item()) tcl_losses.update(tcl_loss.item()) sin_losses.update(sin_loss.item()) cos_losses.update(cos_loss.item()) radii_losses.update(radii_loss.item()) if cfg.viz and i % cfg.viz_freq == 0: visualize_network_output(output, tr_mask, tcl_mask, mode='val') if i % cfg.display_freq == 0: print( 'Validation: - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f}' .format(loss.item(), tr_loss.item(), tcl_loss.item(), sin_loss.item(), cos_loss.item(), radii_loss.item())) logger.write_scalars( { 'loss': losses.avg, 'tr_loss': tr_losses.avg, 'tcl_loss': tcl_losses.avg, 'sin_loss': sin_losses.avg, 'cos_loss': cos_losses.avg, 'radii_loss': radii_losses.avg }, tag='val', n_iter=epoch) print('Validation Loss: {}'.format(losses.avg))
def inference(detector, test_loader, output_dir): total_time = 0. for i, (image, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(test_loader): image, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( image, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) torch.cuda.synchronize() start = time.time() idx = 0 # test mode can only run with batch_size == 1 # get detection result contours, output = detector.detect(image) torch.cuda.synchronize() end = time.time() total_time += end - start fps = (i + 1) / total_time print('detect {} / {} images: {}. ({:.2f} fps)'.format( i + 1, len(test_loader), meta['image_id'][idx], fps)) # visualization tr_pred, tcl_pred = output['tr'], output['tcl'] img_show = image[idx].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype(np.uint8) pred_vis = visualize_detection(img_show, contours, tr_pred[1], tcl_pred[1]) gt_contour = [] for annot, n_annot in zip(meta['annotation'][idx], meta['n_annotation'][idx]): if n_annot.item() > 0: gt_contour.append(annot[:n_annot].int().cpu().numpy()) gt_vis = visualize_detection(img_show, gt_contour, tr_mask[idx].cpu().numpy(), tcl_mask[idx].cpu().numpy()) im_vis = np.concatenate([pred_vis, gt_vis], axis=0) path = os.path.join(cfg.vis_dir, '{}_test'.format(cfg.exp_name), meta['image_id'][idx]) cv2.imwrite(path, im_vis) H, W = meta['Height'][idx].item(), meta['Width'][idx].item() img_show, contours = rescale_result(img_show, contours, H, W) # write to file mkdirs(output_dir) write_to_file( contours, os.path.join(output_dir, meta['image_id'][idx].replace('jpg', 'txt')))
def inference(model, detector, test_loader): model.eval() for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(test_loader): img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) # inference output = model(img) for idx in range(img.size(0)): print('detect {} / {} images: {}.'.format(i, len(test_loader), meta['image_id'][idx])) tr_pred = output[idx, 0:2].softmax(dim=0).data.cpu().numpy() tcl_pred = output[idx, 2:4].softmax(dim=0).data.cpu().numpy() sin_pred = output[idx, 4].data.cpu().numpy() cos_pred = output[idx, 5].data.cpu().numpy() radii_pred = output[idx, 6].data.cpu().numpy() batch_result = detector.detect(tr_pred, tcl_pred, sin_pred, cos_pred, radii_pred) # (n_tcl, 3) # visualization img_show = img[idx].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype( np.uint8) contours = result2polygon(img_show, batch_result) pred_vis = visualize_detection(img_show, tr_pred[1], tcl_pred[1], contours) gt_contour = [] for annot, n_annot in zip(meta['annotation'][idx], meta['n_annotation'][idx]): if n_annot.item() > 0: gt_contour.append(annot[:n_annot].int().cpu().numpy()) gt_vis = visualize_detection(img_show, tr_mask[idx].cpu().numpy(), tcl_mask[idx].cpu().numpy(), gt_contour) im_vis = np.concatenate([pred_vis, gt_vis], axis=0) path = os.path.join(cfg.vis_dir, '{}_test'.format(cfg.exp_name), meta['image_id'][idx]) cv2.imwrite(path, im_vis) H, W = meta['Height'][idx].item(), meta['Width'][idx].item() img_show, contours = rescale_result(img_show, contours, H, W) write_to_file( contours, os.path.join(cfg.output_dir, meta['image_id'][idx].replace('jpg', 'txt')))
def train(model, train_loader, criterion, scheduler, optimizer, epoch): start = time.time() losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() end = time.time() model.train() for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(train_loader): data_time.update(time.time() - end) img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) output = model(img) tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \ criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask) loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss # backward scheduler.step() optimizer.zero_grad() loss.backward() optimizer.step() losses.update(loss.item()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if cfg.viz and i < cfg.vis_num: visualize_network_output(output, tr_mask, tcl_mask, prefix='train_{}'.format(i)) if i % cfg.display_freq == 0: print( 'Epoch: [ {} ][ {:03d} / {:03d} ] - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f}' .format(epoch, i, len(train_loader), loss.item(), tr_loss.item(), tcl_loss.item(), sin_loss.item(), cos_loss.item(), radii_loss.item())) if epoch % cfg.save_freq == 0 and epoch > 0: save_model(model, epoch, scheduler.get_lr()) print('Training Loss: {}'.format(losses.avg))
def inference(detector, test_loader, output_dir): total_time = 0.0 for i, (image, reg_mask, meta) in enumerate(test_loader): image, reg_mask = to_device(image, reg_mask) torch.cuda.synchronize() start = time.time() index = 0 contours, aster_text, output = detector.detect(image) torch.cuda.synchronize() end = time.time() total_time += end - start fps = (i + 1) / total_time print('detect {} | {} images: {}. ({:.2f} fps)'.format(i + 1, len(test_loader), meta['image_id'][index], fps)) # visualization pred_mask = output['reg'] img_show = image[index].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype(np.uint8) if (cfg.spotter): pred_vis = visualize_detection_end_to_end(img_show, contours, aster_text, pred_mask) else: pred_vis = visualize_detection(img_show, contours, pred_mask) gt_contour = [] for annot, n_annot in zip(meta['annotation'][index], meta['n_annotation'][index]): if n_annot.item() > 0: gt_contour.append(annot[:n_annot].int().cpu().numpy()) gt_vis = visualize_detection(img_show, gt_contour, reg_mask[index].cpu().numpy()) im_vis = np.concatenate([pred_vis, gt_vis], axis=0) path = os.path.join(cfg.visualization_directory, '{0}_{1}_test'.format(cfg.dataset_name, cfg.backbone), meta['image_id'][index]) cv2.imwrite(path.replace('.gif', '.png'), im_vis) H, W = meta['Height'][index].item(), meta['Width'][index].item() img_show, contours = rescale_result(img_show, contours, H, W) mkdirs(output_dir) write_to_file(contours, aster_text, os.path.join(output_dir, meta['image_id'][index].replace('ts_', '') .replace('.jpg', '.txt').replace('.JPG', '.txt').replace('.png', '.txt').replace('.gif', '.txt')))
def train(model, train_loader, criterion, scheduler, optimizer, epoch): losses = AverageMeter() reg_losses = AverageMeter() center_loss = AverageMeter() region_loss = AverageMeter() model.train() print('Epoch: {} : LR = {}'.format(epoch, optimizer.param_groups[0]['lr'])) for i, (img, reg_mask, meta) in enumerate(train_loader): scheduler.step() if img is None: print("Exception loading data! Preparing loading next batch data!") continue img, reg_mask = to_device(img, reg_mask) output = model(img) loss_reg, loss_dice_center, loss_dice_region = criterion( output, reg_mask) loss = loss_reg + loss_dice_center + loss_dice_region optimizer.zero_grad() loss.backward() optimizer.step() losses.update(loss.item()) reg_losses.update(loss_reg.item()) center_loss.update(loss_dice_center.item()) region_loss.update(loss_dice_region.item()) if cfg.visualization and i % cfg.visualization_frequency == 0: visualize_network_output(img, output, reg_mask, mode='train') print( '[{:d} | {:d}] - Loss: {:.4f} - Reg_Loss: {:.4f} - Center_Dice_Loss: {:.4f} - Region_Dice_Loss: {:.4f} - LR: {:e}' .format(i, len(train_loader), loss.item(), loss_reg.item(), loss_dice_center.item(), loss_dice_region.item(), optimizer.param_groups[0]['lr'])) if epoch % cfg.save_frequency == 0: save_model(model, epoch, scheduler.get_lr(), optimizer)
def inference(detector, test_loader, output_dir): total_time = 0. for i, (image, meta) in enumerate(test_loader): # print (image) image = to_device(image) torch.cuda.synchronize() start = time.time() idx = 0 # test mode can only run with batch_size == 1 # get detection result contours, output = detector.detect(image) torch.cuda.synchronize() end = time.time() total_time += end - start fps = (i + 1) / total_time print('detect {} / {} images: {}. ({:.2f} fps)'.format( i, len(test_loader), meta['image_id'][idx], fps)) # visualization tr_pred, tcl_pred = output['tr'], output['tcl'] img_show = image[idx].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype(np.uint8) # print (meta) H, W = meta['Height'][idx].item(), meta['Width'][idx].item() img_show, contours = rescale_result(img_show, contours, H, W) # print (contours) pred_vis = visualize_detection(img_show, contours) path = os.path.join(cfg.vis_dir, '{}_deploy'.format(cfg.exp_name), meta['image_id'][idx]) cv2.imwrite(path, pred_vis) # write to file mkdirs(output_dir) write_to_file( contours, os.path.join(output_dir, meta['image_id'][idx].replace('jpg', 'txt')))
def inference(model, detector, test_loader): gt_json_path = os.path.join('/home/shf/fudan_ocr_system/datasets/', cfg.dataset, 'train_labels.json') #gt_json_path = '/workspace/mnt/group/ocr/wangxunyan/maskscoring_rcnn/crop_train/crop_result_js.json' with open(gt_json_path, 'r') as f: gt_dict = json.load(f) model.eval() result = dict() for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(test_loader): timer = {'model': 0, 'detect': 0, 'viz': 0, 'restore': 0} start = time.time() img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) # inference output = model(img) if cfg.multi_scale: size_h, size_w = img.shape[2:4] img_rescale = func.interpolate(img, scale_factor=0.5, mode='nearest') output_rescale = model(img_rescale) output_rescale = func.interpolate(output_rescale, size=(size_h, size_w), mode='nearest') timer['model'] = time.time() - start for idx in range(img.size(0)): start = time.time() print('detect {} / {} images: {}.'.format(i, len(test_loader), meta['image_id'][idx])) tr_pred = output[idx, 0:2].softmax(dim=0).data.cpu().numpy() tcl_pred = output[idx, 2:4].softmax(dim=0).data.cpu().numpy() sin_pred = output[idx, 4].data.cpu().numpy() cos_pred = output[idx, 5].data.cpu().numpy() radii_pred = output[idx, 6].data.cpu().numpy() # tr_pred_mask = 1 / (1 + np.exp(-12*tr_pred[1]+3)) tr_pred_mask = np.where(tr_pred[1] > detector.tr_conf_thresh, 1, tr_pred[1]) # tr_pred_mask = fill_hole(tr_pred_mask) tcl_pred_mask = (tcl_pred * tr_pred_mask)[1] > detector.tcl_conf_thresh if cfg.multi_scale: tr_pred_rescale = output_rescale[ idx, 0:2].sigmoid().data.cpu().numpy() tcl_pred_rescale = output_rescale[idx, 2:4].softmax( dim=0).data.cpu().numpy() tr_pred_scale_mask = np.where( tr_pred_rescale[1] + tr_pred[1] > 1, 1, tr_pred_rescale[1] + tr_pred[1]) tr_pred_mask = tr_pred_scale_mask # weighted adding origin_ratio = 0.5 rescale_ratio = 0.5 tcl_pred = (tcl_pred * origin_ratio + tcl_pred_rescale * rescale_ratio).astype( np.float32) tcl_pred_mask = (tcl_pred * tr_pred_mask)[1] > detector.tcl_conf_thresh batch_result = detector.complete_detect(tr_pred_mask, tcl_pred_mask, sin_pred, cos_pred, radii_pred) # (n_tcl, 3) timer['detect'] = time.time() - start start = time.time() # visualization img_show = img[idx].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype( np.uint8) H, W = meta['Height'][idx].item(), meta['Width'][idx].item() # get pred_contours contours = result2polygon(img_show, batch_result) if cfg.viz: resize_H = H if H % 32 == 0 else (H // 32) * 32 resize_W = W if W % 32 == 0 else (W // 32) * 32 ratio = float(img_show.shape[0] ) / resize_H if resize_H > resize_W else float( img_show.shape[1]) / resize_W resize_H = int(resize_H * ratio) resize_W = int(resize_W * ratio) gt_info = gt_dict[int(meta['image_id'][idx].lstrip( 'gt_').rstrip('.jpg').split('_')[1])] gt_contours = [] # for gt in gt_info: # if not gt['illegibility']: # gt_cont = np.array(gt['points']) # gt_cont[:, 0] = (gt_cont[:, 0] * float(resize_W) / W).astype(np.int32) # gt_cont[:, 1] = (gt_cont[:, 1] * float(resize_H) / H).astype(np.int32) # gt_contours.append(gt_cont) gt_cont = np.array(gt_info['points']) gt_cont[:, 0] = gt_cont[:, 0] * float(resize_W) / float(W) gt_cont[:, 1] = gt_cont[:, 1] * float(resize_H) / float(H) gt_contours.append(gt_cont.astype(np.int32)) illegal_contours = mask2conts( meta['illegal_mask'][idx].cpu().numpy()) predict_vis = visualize_detection( img_show, tr_pred_mask, tcl_pred_mask.astype(np.uint8), contours.copy()) gt_vis = visualize_detection(img_show, tr_mask[idx].cpu().numpy(), tcl_mask[idx].cpu().numpy(), gt_contours, illegal_contours) im_vis = np.concatenate([predict_vis, gt_vis], axis=0) path = os.path.join(cfg.vis_dir, meta['image_id'][idx]) cv2.imwrite(path, im_vis) timer['viz'] = time.time() - start start = time.time() polygons = calc_confidence(contours, tr_pred) img_show, polygons = rescale_padding_result( img_show, polygons, H, W) # filter too small polygon for i, poly in enumerate(polygons): if cv2.contourArea(poly['points']) < 100: polygons[i] = [] polygons = [item for item in polygons if item != []] # convert np.array to list for polygon in polygons: polygon['points'] = polygon['points'].tolist() result[meta['image_id'][idx].replace('.jpg', '').replace('gt', 'res')] = polygons timer['restore'] = time.time() - start print( 'Cost time {:.2f}s: model {:.2f}s, detect {:.2f}s, viz {:.2f}s, restore {:.2f}s' .format( timer['model'] + timer['detect'] + timer['viz'] + timer['restore'], timer['model'], timer['detect'], timer['viz'], timer['restore'])) # write to json file with open(os.path.join(cfg.output_dir, 'result.json'), 'w') as f: json.dump(result, f) print("Output json file in {}.".format(cfg.output_dir))
def inference(detector, test_loader, output_dir): total_time = 0. post_all_time =0. net_all_time = 0. backbone_all_time = 0. IM_all_time = 0. detach_all_time =0. if cfg.exp_name != "MLT2017": osmkdir(output_dir) else: if not os.path.exists(output_dir): mkdirs(output_dir) for i, (image, train_mask, tr_mask, meta) in enumerate(test_loader): image, train_mask, tr_mask = to_device(image, train_mask, tr_mask) torch.cuda.synchronize() idx = 0 # test mode can only run with batch_size == 1 # visualization img_show = image[idx].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype(np.uint8) # compute time start = time.time() # get detection result contours, output, net_time, post_time = detector.detect(image, img_show) end = time.time() #total_time += end - start total_time += (net_time + post_time) post_all_time += post_time net_all_time += net_time backbone_all_time+= output["backbone_time"] IM_all_time += output["IM_time"] detach_all_time += output["detach_time"] fps = (i + 1) / total_time print('detect {} / {} images: {}. ({:.2f} fps); backbone-time:{:.2f}, IM-time:{:.2f}, post-time:{:0.2f}, Transfer-time:{:.2f}'.format(i + 1, len(test_loader), meta['image_id'][idx], fps, backbone_all_time*1000/(i+1), IM_all_time*1000/(i+1), post_all_time*1000/(i+1), detach_all_time*1000/(i+1))) if cfg.exp_name == "Icdar2015" or cfg.exp_name == "MLT2017" or cfg.exp_name == "TD500": pred_vis = visualize_detection(img_show, output['bbox'], output['tr']) else: pred_vis = visualize_detection(img_show, contours, output['tr']) gt_contour = [] for annot, n_annot in zip(meta['annotation'][idx], meta['n_annotation'][idx]): if n_annot.item() > 0: gt_contour.append(annot[:n_annot].int().cpu().numpy()) gt_vis = visualize_gt(img_show, gt_contour, tr_mask[idx].cpu().numpy()) im_vis = np.concatenate([pred_vis, gt_vis], axis=0) path = os.path.join(cfg.vis_dir, '{}_test'.format(cfg.exp_name), meta['image_id'][idx].split(".")[0]+".jpg") cv2.imwrite(path, im_vis) H, W = meta['Height'][idx].item(), meta['Width'][idx].item() img_show, contours = rescale_result(img_show, contours, H, W) # write to file if cfg.exp_name == "Icdar2015": fname = "res_" + meta['image_id'][idx].replace('jpg', 'txt') contours = data_transfer_ICDAR(contours) write_to_file(contours, os.path.join(output_dir, fname)) elif cfg.exp_name == "MLT2017": out_dir = os.path.join(output_dir, str(cfg.checkepoch)) if not os.path.exists(out_dir): mkdirs(out_dir) fname = meta['image_id'][idx].split("/")[-1].replace('ts', 'res') fname = fname.split(".")[0] + ".txt" data_transfer_MLT2017(contours, os.path.join(out_dir, fname)) elif cfg.exp_name == "TD500": fname = "res_" + meta['image_id'][idx].split(".")[0]+".txt" data_transfer_TD500(contours, os.path.join(output_dir, fname)) else: fname = meta['image_id'][idx].replace('jpg', 'txt') write_to_file(contours, os.path.join(output_dir, fname))
def train(model, train_loader, criterion, scheduler, optimizer, epoch, logger): global train_step losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() end = time.time() model.train() scheduler.step() print('Epoch: {} : LR = {}'.format(epoch, lr)) for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(train_loader): data_time.update(time.time() - end) train_step += 1 img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) # 模型输出 output = model(img) # loss 计算 tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \ criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask) loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss # backward # 每次迭代清空上一次的梯度 optimizer.zero_grad() # 反向传播 loss.backward() # 更新梯度 optimizer.step() # 更新loss losses.update(loss.item()) # 计算耗时 batch_time.update(time.time() - end) end = time.time() if cfg.viz and i % cfg.viz_freq == 0: visualize_network_output(output, tr_mask, tcl_mask, mode='train') if i % cfg.display_freq == 0: print( '({:d} / {:d}) - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f}' .format(i, len(train_loader), loss.item(), tr_loss.item(), tcl_loss.item(), sin_loss.item(), cos_loss.item(), radii_loss.item())) if i % cfg.log_freq == 0: logger.write_scalars( { 'loss': loss.item(), 'tr_loss': tr_loss.item(), 'tcl_loss': tcl_loss.item(), 'sin_loss': sin_loss.item(), 'cos_loss': cos_loss.item(), 'radii_loss': radii_loss.item() }, tag='train', n_iter=train_step) if epoch % cfg.save_freq == 0: save_model(model, epoch, scheduler.get_lr(), optimizer) print('Training Loss: {}'.format(losses.avg))
def Predict(self, image_path, output_img_path="output.jpg", output_txt_path="output.txt", tr_thresh=0.4, tcl_thresh=0.4): cfg = self.system_dict["local"]["cfg"] model = self.system_dict["local"]["model"] start = time.time() image = pil_load_img(image_path) transform = BaseTransform(size=cfg.input_size, mean=cfg.means, std=cfg.stds) H, W, _ = image.shape image, polygons = transform(image) # to pytorch channel sequence image = image.transpose(2, 0, 1) meta = { 'image_id': 0, 'image_path': image_path, 'Height': H, 'Width': W } image = torch.from_numpy(np.expand_dims(image, axis=0)) image = to_device(image) if (self.system_dict["local"]["cfg"].cuda): torch.cuda.synchronize() end = time.time() print("Image loading time: {}".format(end - start)) start = time.time() detector = TextDetector(model, tr_thresh=tr_thresh, tcl_thresh=tcl_thresh) # get detection result contours, output = detector.detect(image) torch.cuda.synchronize() end = time.time() print("Inference time - {}".format(end - start)) start = time.time() tr_pred, tcl_pred = output['tr'], output['tcl'] img_show = image[0].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype(np.uint8) img_show, contours = rescale_result(img_show, contours, H, W) pred_vis = visualize_detection(img_show, contours) cv2.imwrite(output_img_path, pred_vis) # write to file self.write_to_file(contours, output_txt_path) end = time.time() print("Writing output time - {}".format(end - start))
def train(model, train_loader, criterion, scheduler, optimizer, epoch, summary_writer): start = time.time() losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() end = time.time() model.train() global total_iter for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(train_loader): data_time.update(time.time() - end) img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) output = model(img) tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \ criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask, total_iter) loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss # backward # scheduler.step() optimizer.zero_grad() loss.backward() optimizer.step() losses.update(loss.item()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if cfg.viz and i < cfg.vis_num: visualize_network_output(output, tr_mask, tcl_mask, prefix='train_{}'.format(i)) if i % cfg.display_freq == 0: print( 'Epoch: [ {} ][ {:03d} / {:03d} ] - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f} - {:.2f}s/step' .format(epoch, i, len(train_loader), loss.item(), tr_loss.item(), tcl_loss.item(), sin_loss.item(), cos_loss.item(), radii_loss.item(), batch_time.avg)) # write summary if total_iter % cfg.summary_freq == 0: print('Summary in {}'.format( os.path.join(cfg.summary_dir, cfg.exp_name))) tr_pred = output[:, 0:2].softmax(dim=1)[:, 1:2] tcl_pred = output[:, 2:4].softmax(dim=1)[:, 1:2] summary_writer.add_image('input_image', vutils.make_grid(img, normalize=True), total_iter) summary_writer.add_image( 'tr/tr_pred', vutils.make_grid(tr_pred * 255, normalize=True), total_iter) summary_writer.add_image( 'tr/tr_mask', vutils.make_grid( torch.unsqueeze(tr_mask * train_mask, 1) * 255), total_iter) summary_writer.add_image( 'tcl/tcl_pred', vutils.make_grid(tcl_pred * 255, normalize=True), total_iter) summary_writer.add_image( 'tcl/tcl_mask', vutils.make_grid( torch.unsqueeze(tcl_mask * train_mask, 1) * 255), total_iter) summary_writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], total_iter) summary_writer.add_scalar('model/tr_loss', tr_loss.item(), total_iter) summary_writer.add_scalar('model/tcl_loss', tcl_loss.item(), total_iter) summary_writer.add_scalar('model/sin_loss', sin_loss.item(), total_iter) summary_writer.add_scalar('model/cos_loss', cos_loss.item(), total_iter) summary_writer.add_scalar('model/radii_loss', radii_loss.item(), total_iter) summary_writer.add_scalar('model/loss', loss.item(), total_iter) total_iter += 1 print('Speed: {}s /step, {}s /epoch'.format(batch_time.avg, time.time() - start)) if epoch % cfg.save_freq == 0: save_model(model, optimizer, scheduler, epoch) print('Training Loss: {}'.format(losses.avg))
def train(model, train_loader, criterion, scheduler, optimizer, epoch, logger): global train_step losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() end = time.time() model.train() # scheduler.step() print('Epoch: {} : LR = {}'.format(epoch, scheduler.get_lr())) for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, gt_roi) in enumerate(train_loader): data_time.update(time.time() - end) train_step += 1 img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map \ = to_device(img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) output, gcn_data = model(img, gt_roi, to_device) tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss, gcn_loss \ = criterion(output, gcn_data, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss + gcn_loss # backward try: optimizer.zero_grad() loss.backward() except: print("loss gg") continue optimizer.step() losses.update(loss.item()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() gc.collect() if cfg.viz and i % cfg.viz_freq == 0: visualize_network_output(output, tr_mask, tcl_mask[:, :, :, 0], mode='train') if i % cfg.display_freq == 0: print( '({:d} / {:d}) Loss: {:.4f} tr_loss: {:.4f} tcl_loss: {:.4f} ' 'sin_loss: {:.4f} cos_loss: {:.4f} radii_loss: {:.4f} gcn_loss: {:.4f}' .format(i, len(train_loader), loss.item(), tr_loss.item(), tcl_loss.item(), sin_loss.item(), cos_loss.item(), radii_loss.item(), gcn_loss.item())) if i % cfg.log_freq == 0: logger.write_scalars( { 'loss': loss.item(), 'tr_loss': tr_loss.item(), 'tcl_loss': tcl_loss.item(), 'sin_loss': sin_loss.item(), 'cos_loss': cos_loss.item(), 'radii_loss': radii_loss.item(), 'gcn_loss:': gcn_loss.item() }, tag='train', n_iter=train_step) if epoch % cfg.save_freq == 0: save_model(model, epoch, scheduler.get_lr(), optimizer) print('Training Loss: {}'.format(losses.avg))
def train(self, model, train_loader, criterion, scheduler, optimizer, epoch, logger, train_step): losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() end = time.time() model.train() scheduler.step() lr = scheduler.get_lr()[0] print('Epoch: {} : LR = {}'.format(epoch, lr)) for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(train_loader): data_time.update(time.time() - end) train_step += 1 img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) output = model(img) tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \ criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask) loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss # backward optimizer.zero_grad() loss.backward() optimizer.step() losses.update(loss.item()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if cfg.viz and i % cfg.viz_freq == 0: visualize_network_output(output, tr_mask, tcl_mask, mode='train') if i % cfg.display_freq == 0: #print(loss.item()) #print(tr_loss.item()) #print(tcl_loss.item()) #print(sin_loss.item()) #print(cos_loss.item()) #print(radii_loss.item()) try: print( '({:d} / {:d}) - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f}' .format(i, len(train_loader), loss.item(), tr_loss.item(), tcl_loss.item(), sin_loss.item(), cos_loss.item(), radii_loss.item())) except: print('({:d} / {:d}) - Loss: {:.4f} - tr_loss: {:.4f}'. format(i, len(train_loader), loss.item(), tr_loss.item())) if i % cfg.log_freq == 0: try: logger.write_scalars( { 'loss': loss.item(), 'tr_loss': tr_loss.item(), 'tcl_loss': tcl_loss.item(), 'sin_loss': sin_loss.item(), 'cos_loss': cos_loss.item(), 'radii_loss': radii_loss.item() }, tag='train', n_iter=train_step) except: logger.write_scalars( { 'loss': loss.item(), 'tr_loss': tr_loss.item() }, tag='train', n_iter=train_step) if epoch % cfg.save_freq == 0: self.save_model(model, epoch, scheduler.get_lr(), optimizer) print('Training Loss: {}'.format(losses.avg)) return train_step
def inference(detector, test_loader, output_dir): total_time = 0. if cfg.exp_name != "MLT2017": osmkdir(output_dir) else: if not os.path.exists(output_dir): mkdirs(output_dir) for i, (image, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(test_loader): image, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( image, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) torch.cuda.synchronize() start = time.time() idx = 0 # test mode can only run with batch_size == 1 # visualization img_show = image[idx].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype(np.uint8) # get detection result contours, output = detector.detect(image, img_show) tr_pred, tcl_pred = output['tr'], output['tcl'] torch.cuda.synchronize() end = time.time() total_time += end - start fps = (i + 1) / total_time print('detect {} / {} images: {}. ({:.2f} fps)'.format(i + 1, len(test_loader), meta['image_id'][idx], fps)) pred_vis = visualize_detection(img_show, contours, tr_pred[1], tcl_pred[1]) gt_contour = [] for annot, n_annot in zip(meta['annotation'][idx], meta['n_annotation'][idx]): if n_annot.item() > 0: gt_contour.append(annot[:n_annot].int().cpu().numpy()) gt_vis = visualize_gt(img_show, gt_contour, tr_mask[idx].cpu().numpy(), tcl_mask[idx, :, :, 0].cpu().numpy()) im_vis = np.concatenate([pred_vis, gt_vis], axis=0) # path = os.path.join(cfg.vis_dir, '{}_test'.format(cfg.exp_name), meta['image_id'][idx]) # cv2.imwrite(path, im_vis) H, W = meta['Height'][idx].item(), meta['Width'][idx].item() img_show, contours = rescale_result(img_show, contours, H, W) # write to file if cfg.exp_name == "Icdar2015": fname = "res_" + meta['image_id'][idx].replace('jpg', 'txt') contours = data_transfer_ICDAR(contours) write_to_file(contours, os.path.join(output_dir, fname)) elif cfg.exp_name == "MLT2017": path = os.path.join(cfg.vis_dir, '{}_test'.format(cfg.exp_name), meta['image_id'][idx].split("/")[-1]) cv2.imwrite(path, im_vis) out_dir = os.path.join(output_dir, str(cfg.checkepoch)) if not os.path.exists(out_dir): mkdirs(out_dir) fname = meta['image_id'][idx].split("/")[-1].replace('ts', 'res') fname = fname.split(".")[0] + ".txt" data_transfer_MLT2017(contours, os.path.join(out_dir, fname)) elif cfg.exp_name == "TD500": fname = "res_img_" + meta['image_id'][idx].replace('jpg', 'txt') data_transfer_TD500(contours, os.path.join(output_dir, fname)) else: fname = meta['image_id'][idx].replace('jpg', 'txt') write_to_file(contours, os.path.join(output_dir, fname))