class VisualDLWriter(Callback): """ Use VisualDL to log data or image """ def __init__(self, model): super(VisualDLWriter, self).__init__(model) assert six.PY3, "VisualDL requires Python >= 3.5" try: from visualdl import LogWriter except Exception as e: logger.error('visualdl not found, plaese install visualdl. ' 'for example: `pip install visualdl`.') raise e self.vdl_writer = LogWriter( model.cfg.get('vdl_log_dir', 'vdl_log_dir/scalar')) self.vdl_loss_step = 0 self.vdl_mAP_step = 0 self.vdl_image_step = 0 self.vdl_image_frame = 0 def on_step_end(self, status): mode = status['mode'] if dist.get_world_size() < 2 or dist.get_rank() == 0: if mode == 'train': training_staus = status['training_staus'] for loss_name, loss_value in training_staus.get().items(): self.vdl_writer.add_scalar(loss_name, loss_value, self.vdl_loss_step) self.vdl_loss_step += 1 elif mode == 'test': ori_image = status['original_image'] result_image = status['result_image'] self.vdl_writer.add_image( "original/frame_{}".format(self.vdl_image_frame), ori_image, self.vdl_image_step) self.vdl_writer.add_image( "result/frame_{}".format(self.vdl_image_frame), result_image, self.vdl_image_step) self.vdl_image_step += 1 # each frame can display ten pictures at most. if self.vdl_image_step % 10 == 0: self.vdl_image_step = 0 self.vdl_image_frame += 1 def on_epoch_end(self, status): mode = status['mode'] if dist.get_world_size() < 2 or dist.get_rank() == 0: if mode == 'eval': for metric in self.model._metrics: for key, map_value in metric.get_results().items(): self.vdl_writer.add_scalar("{}-mAP".format(key), map_value[0], self.vdl_mAP_step) self.vdl_mAP_step += 1
def main(): cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) check_config(cfg) # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) # check if paddlepaddle version is satisfied check_version() main_arch = cfg.architecture dataset = cfg.TestReader['dataset'] test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img) dataset.set_images(test_images) place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) model = create(main_arch) startup_prog = fluid.Program() infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): inputs_def = cfg['TestReader']['inputs_def'] inputs_def['iterable'] = True feed_vars, loader = model.build_inputs(**inputs_def) test_fetches = model.test(feed_vars) infer_prog = infer_prog.clone(True) reader = create_reader(cfg.TestReader, devices_num=1) loader.set_sample_list_generator(reader, place) exe.run(startup_prog) if cfg.weights: checkpoint.load_params(exe, infer_prog, cfg.weights) # parse infer fetches assert cfg.metric in ['COCO', 'VOC', 'OID', 'WIDERFACE'], \ "unknown metric type {}".format(cfg.metric) extra_keys = [] if cfg['metric'] in ['COCO', 'OID']: extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg['metric'] == 'VOC' or cfg['metric'] == 'WIDERFACE': extra_keys = ['im_id', 'im_shape'] keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys) # parse dataset category if cfg.metric == 'COCO': from ppdet.utils.coco_eval import bbox2out, mask2out, segm2out, get_category_info if cfg.metric == 'OID': from ppdet.utils.oid_eval import bbox2out, get_category_info if cfg.metric == "VOC": from ppdet.utils.voc_eval import bbox2out, get_category_info if cfg.metric == "WIDERFACE": from ppdet.utils.widerface_eval_utils import bbox2out, lmk2out, get_category_info anno_file = dataset.get_anno() with_background = dataset.with_background use_default_label = dataset.use_default_label clsid2catid, catid2name = get_category_info(anno_file, with_background, use_default_label) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() # use VisualDL to log image if FLAGS.use_vdl: assert six.PY3, "VisualDL requires Python >= 3.5" from visualdl import LogWriter vdl_writer = LogWriter(FLAGS.vdl_log_dir) vdl_image_step = 0 vdl_image_frame = 0 # each frame can display ten pictures at most. imid2path = dataset.get_imid2path() resultBBox = [] for iter_id, data in enumerate(loader()): outs = exe.run(infer_prog, feed=data, fetch_list=values, return_numpy=False) res = { k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(keys, outs) } logger.info('Infer iter {}'.format(iter_id)) if 'TTFNet' in cfg.architecture: res['bbox'][1].append([len(res['bbox'][0])]) if 'CornerNet' in cfg.architecture: from ppdet.utils.post_process import corner_post_process post_config = getattr(cfg, 'PostProcess', None) corner_post_process(res, post_config, cfg.num_classes) bbox_results = None mask_results = None segm_results = None lmk_results = None if 'bbox' in res: bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized) if 'mask' in res: mask_results = mask2out([res], clsid2catid, model.mask_head.resolution) if 'segm' in res: segm_results = segm2out([res], clsid2catid) if 'landmark' in res: lmk_results = lmk2out([res], is_bbox_normalized) # bbox 四个值:左上角坐标 + 宽度 + 高度 # {'image_id': 0, 'category_id': 0, 'bbox': [695.04443359375, 723.8153686523438, 128.288818359375, 61.5987548828125], 'score': 0.9990022778511047} im_ids = res['im_id'][0] image_path = imid2path[int(im_ids[0])] prefix = image_path.split('/')[-1] imageName = prefix.split('.')[0] for i, result in enumerate(bbox_results): score = result["score"] bbox = result["bbox"] x1 = str(int(bbox[0])) y1 = str(int(bbox[1])) x2 = str(int(bbox[2] + bbox[0])) y2 = str(int(bbox[3] + bbox[1])) if (score > 0.01): resStr = imageName + ' ' + str(round( score, 3)) + ' ' + x1 + ' ' + y1 + ' ' + x2 + ' ' + y2 + '\n' resultBBox.append(resStr) # visualize result for im_id in im_ids: image_path = imid2path[int(im_id)] image = Image.open(image_path).convert('RGB') image = ImageOps.exif_transpose(image) # use VisualDL to log original image if FLAGS.use_vdl: original_image_np = np.array(image) vdl_writer.add_image( "original/frame_{}".format(vdl_image_frame), original_image_np, vdl_image_step) image = visualize_results(image, int(im_id), catid2name, FLAGS.draw_threshold, bbox_results, mask_results, segm_results, lmk_results) # use VisualDL to log image with bbox if FLAGS.use_vdl: infer_image_np = np.array(image) vdl_writer.add_image("bbox/frame_{}".format(vdl_image_frame), infer_image_np, vdl_image_step) vdl_image_step += 1 if vdl_image_step % 10 == 0: vdl_image_step = 0 vdl_image_frame += 1 save_name = get_save_image_name(FLAGS.output_dir, image_path) logger.info("Detection bbox results save in {}".format(save_name)) image.save(save_name, quality=95) resulttxtPath = "/home/aistudio/work/PaddleDetection-release-2.0-beta/output/test_result.txt" f = open(resulttxtPath, 'w+', encoding='utf-8') for i, p in enumerate(resultBBox): f.write(p) f.close()
def main(): cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) check_config(cfg) # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) # check if paddlepaddle version is satisfied check_version() main_arch = cfg.architecture dataset = cfg.TestReader['dataset'] test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img) dataset.set_images(test_images) place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) model = create(main_arch) startup_prog = fluid.Program() infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): inputs_def = cfg['TestReader']['inputs_def'] inputs_def['iterable'] = True feed_vars, loader = model.build_inputs(**inputs_def) test_fetches = model.test(feed_vars) infer_prog = infer_prog.clone(True) reader = create_reader(cfg.TestReader, devices_num=1) loader.set_sample_list_generator(reader, place) exe.run(startup_prog) if cfg.weights: checkpoint.load_params(exe, infer_prog, cfg.weights) # parse infer fetches assert cfg.metric in ['COCO', 'VOC', 'OID', 'WIDERFACE'], \ "unknown metric type {}".format(cfg.metric) extra_keys = [] if cfg['metric'] in ['COCO', 'OID']: extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg['metric'] == 'VOC' or cfg['metric'] == 'WIDERFACE': extra_keys = ['im_id', 'im_shape'] keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys) # parse dataset category if cfg.metric == 'COCO': from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info if cfg.metric == 'OID': from ppdet.utils.oid_eval import bbox2out, get_category_info if cfg.metric == "VOC": from ppdet.utils.voc_eval import bbox2out, get_category_info if cfg.metric == "WIDERFACE": from ppdet.utils.widerface_eval_utils import bbox2out, lmk2out, get_category_info anno_file = dataset.get_anno() with_background = dataset.with_background use_default_label = dataset.use_default_label clsid2catid, catid2name = get_category_info(anno_file, with_background, use_default_label) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() # use VisualDL to log image if FLAGS.use_vdl: assert six.PY3, "VisualDL requires Python >= 3.5" from visualdl import LogWriter vdl_writer = LogWriter(FLAGS.vdl_log_dir) vdl_image_step = 0 vdl_image_frame = 0 # each frame can display ten pictures at most. imid2path = dataset.get_imid2path() for iter_id, data in enumerate(loader()): outs = exe.run(infer_prog, feed=data, fetch_list=values, return_numpy=False) res = { k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(keys, outs) } logger.info('Infer iter {}'.format(iter_id)) if 'TTFNet' in cfg.architecture: res['bbox'][1].append([len(res['bbox'][0])]) bbox_results = None mask_results = None lmk_results = None if 'bbox' in res: bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized) if 'mask' in res: mask_results = mask2out([res], clsid2catid, model.mask_head.resolution) if 'landmark' in res: lmk_results = lmk2out([res], is_bbox_normalized) # visualize result im_ids = res['im_id'][0] for im_id in im_ids: image_path = imid2path[int(im_id)] image = Image.open(image_path).convert('RGB') # use VisualDL to log original image if FLAGS.use_vdl: original_image_np = np.array(image) vdl_writer.add_image( "original/frame_{}".format(vdl_image_frame), original_image_np, vdl_image_step) image = visualize_results(image, int(im_id), catid2name, FLAGS.draw_threshold, bbox_results, mask_results, lmk_results) # use VisualDL to log image with bbox if FLAGS.use_vdl: infer_image_np = np.array(image) vdl_writer.add_image("bbox/frame_{}".format(vdl_image_frame), infer_image_np, vdl_image_step) vdl_image_step += 1 if vdl_image_step % 10 == 0: vdl_image_step = 0 vdl_image_frame += 1 save_name = get_save_image_name(FLAGS.output_dir, image_path) logger.info("Detection bbox results save in {}".format(save_name)) image.save(save_name, quality=95)
def train(self): if not self.is_parallel: writer = LogWriter(logdir=self.result_dir + "/log/") self.genA2B.train(), self.genB2A.train(), self.disGA.train( ), self.disGB.train(), self.disLA.train(), self.disLB.train() start_iter = 1 if self.resume: print(self.result_dir, self.dataset, os.path.join(self.result_dir, self.dataset, 'model', '*.pt')) model_list = glob( os.path.join(self.result_dir, self.dataset, 'model', '*.pt')) print("resuming, model_list", model_list) if not len(model_list) == 0: model_list.sort() start_iter = int(model_list[-1].split('_')[-1].split('.')[0]) print("resuming, start_iter", start_iter) self.load(os.path.join(self.result_dir, self.dataset, 'model'), start_iter) print(" [*] Load SUCCESS") if self.decay_flag and start_iter > (self.iteration // 2): self.G_optim._learning_rate -= (self.lr / (self.iteration // 2)) * ( start_iter - self.iteration // 2) self.D_optim._learning_rate -= (self.lr / (self.iteration // 2)) * ( start_iter - self.iteration // 2) # training loop print('training start !') start_time = time.time() for step in range(start_iter, self.iteration + 1): if self.decay_flag and step > (self.iteration // 2): self.G_optim._learning_rate -= (self.lr / (self.iteration // 2)) self.D_optim._learning_rate -= (self.lr / (self.iteration // 2)) try: real_A, _ = trainA_iter.next() except: trainA_iter = iter(self.trainA_loader) real_A, _ = trainA_iter.next() try: real_B, _ = trainB_iter.next() except: trainB_iter = iter(self.trainB_loader) real_B, _ = trainB_iter.next() real_A = real_A[0] real_B = real_B[0] ##some handling needed using paddle dataloader # Update D if hasattr(self.D_optim, "_optimizer"): # support meta optimizer self.D_optim._optimizer.clear_gradients() else: self.D_optim.clear_gradients() fake_A2B, _, _ = self.genA2B(real_A) fake_B2A, _, _ = self.genB2A(real_B) real_GA_logit, real_GA_cam_logit, _ = self.disGA(real_A) real_LA_logit, real_LA_cam_logit, _ = self.disLA(real_A) real_GB_logit, real_GB_cam_logit, _ = self.disGB(real_B) real_LB_logit, real_LB_cam_logit, _ = self.disLB(real_B) fake_GA_logit, fake_GA_cam_logit, _ = self.disGA(fake_B2A) fake_LA_logit, fake_LA_cam_logit, _ = self.disLA(fake_B2A) fake_GB_logit, fake_GB_cam_logit, _ = self.disGB(fake_A2B) fake_LB_logit, fake_LB_cam_logit, _ = self.disLB(fake_A2B) D_ad_loss_GA = self.MSE_loss( real_GA_logit, torch.ones_like(real_GA_logit).to( self.device)) + self.MSE_loss( fake_GA_logit, torch.zeros_like(fake_GA_logit).to(self.device)) D_ad_cam_loss_GA = self.MSE_loss( real_GA_cam_logit, torch.ones_like(real_GA_cam_logit).to( self.device)) + self.MSE_loss( fake_GA_cam_logit, torch.zeros_like(fake_GA_cam_logit).to(self.device)) D_ad_loss_LA = self.MSE_loss( real_LA_logit, torch.ones_like(real_LA_logit).to( self.device)) + self.MSE_loss( fake_LA_logit, torch.zeros_like(fake_LA_logit).to(self.device)) D_ad_cam_loss_LA = self.MSE_loss( real_LA_cam_logit, torch.ones_like(real_LA_cam_logit).to( self.device)) + self.MSE_loss( fake_LA_cam_logit, torch.zeros_like(fake_LA_cam_logit).to(self.device)) D_ad_loss_GB = self.MSE_loss( real_GB_logit, torch.ones_like(real_GB_logit).to( self.device)) + self.MSE_loss( fake_GB_logit, torch.zeros_like(fake_GB_logit).to(self.device)) D_ad_cam_loss_GB = self.MSE_loss( real_GB_cam_logit, torch.ones_like(real_GB_cam_logit).to( self.device)) + self.MSE_loss( fake_GB_cam_logit, torch.zeros_like(fake_GB_cam_logit).to(self.device)) D_ad_loss_LB = self.MSE_loss( real_LB_logit, torch.ones_like(real_LB_logit).to( self.device)) + self.MSE_loss( fake_LB_logit, torch.zeros_like(fake_LB_logit).to(self.device)) D_ad_cam_loss_LB = self.MSE_loss( real_LB_cam_logit, torch.ones_like(real_LB_cam_logit).to( self.device)) + self.MSE_loss( fake_LB_cam_logit, torch.zeros_like(fake_LB_cam_logit).to(self.device)) D_loss_A = self.adv_weight * (D_ad_loss_GA + D_ad_cam_loss_GA + D_ad_loss_LA + D_ad_cam_loss_LA) / self.n_gpu D_loss_B = self.adv_weight * (D_ad_loss_GB + D_ad_cam_loss_GB + D_ad_loss_LB + D_ad_cam_loss_LB) / self.n_gpu Discriminator_loss = D_loss_A + D_loss_B Discriminator_loss.backward() if self.is_parallel: self.disGA.apply_collective_grads() self.disGB.apply_collective_grads() self.disLA.apply_collective_grads() self.disLB.apply_collective_grads() self.genA2B.apply_collective_grads() self.genB2A.apply_collective_grads() self.D_optim.minimize(Discriminator_loss) # Update G if hasattr(self.G_optim, "_optimizer"): # support meta optimizer self.G_optim._optimizer.clear_gradients() else: self.G_optim.clear_gradients() fake_A2B, fake_A2B_cam_logit, _ = self.genA2B(real_A) fake_B2A, fake_B2A_cam_logit, _ = self.genB2A(real_B) fake_A2B2A, _, _ = self.genB2A(fake_A2B) fake_B2A2B, _, _ = self.genA2B(fake_B2A) fake_A2A, fake_A2A_cam_logit, _ = self.genB2A(real_A) fake_B2B, fake_B2B_cam_logit, _ = self.genA2B(real_B) fake_GA_logit, fake_GA_cam_logit, _ = self.disGA(fake_B2A) fake_LA_logit, fake_LA_cam_logit, _ = self.disLA(fake_B2A) fake_GB_logit, fake_GB_cam_logit, _ = self.disGB(fake_A2B) fake_LB_logit, fake_LB_cam_logit, _ = self.disLB(fake_A2B) G_ad_loss_GA = self.MSE_loss( fake_GA_logit, torch.ones_like(fake_GA_logit).to(self.device)) G_ad_cam_loss_GA = self.MSE_loss( fake_GA_cam_logit, torch.ones_like(fake_GA_cam_logit).to(self.device)) G_ad_loss_LA = self.MSE_loss( fake_LA_logit, torch.ones_like(fake_LA_logit).to(self.device)) G_ad_cam_loss_LA = self.MSE_loss( fake_LA_cam_logit, torch.ones_like(fake_LA_cam_logit).to(self.device)) G_ad_loss_GB = self.MSE_loss( fake_GB_logit, torch.ones_like(fake_GB_logit).to(self.device)) G_ad_cam_loss_GB = self.MSE_loss( fake_GB_cam_logit, torch.ones_like(fake_GB_cam_logit).to(self.device)) G_ad_loss_LB = self.MSE_loss( fake_LB_logit, torch.ones_like(fake_LB_logit).to(self.device)) G_ad_cam_loss_LB = self.MSE_loss( fake_LB_cam_logit, torch.ones_like(fake_LB_cam_logit).to(self.device)) G_recon_loss_A = self.L1_loss(fake_A2B2A, real_A) G_recon_loss_B = self.L1_loss(fake_B2A2B, real_B) G_identity_loss_A = self.L1_loss(fake_A2A, real_A) G_identity_loss_B = self.L1_loss(fake_B2B, real_B) G_cam_loss_A = self.BCE_loss( fake_B2A_cam_logit, torch.ones_like(fake_B2A_cam_logit).to( self.device)) + self.BCE_loss( fake_A2A_cam_logit, torch.zeros_like(fake_A2A_cam_logit).to(self.device)) G_cam_loss_B = self.BCE_loss( fake_A2B_cam_logit, torch.ones_like(fake_A2B_cam_logit).to( self.device)) + self.BCE_loss( fake_B2B_cam_logit, torch.zeros_like(fake_B2B_cam_logit).to(self.device)) G_loss_A = (self.adv_weight * (G_ad_loss_GA + G_ad_cam_loss_GA + G_ad_loss_LA + G_ad_cam_loss_LA) + self.cycle_weight * G_recon_loss_A + self.identity_weight * G_identity_loss_A + self.cam_weight * G_cam_loss_A) / self.n_gpu G_loss_B = (self.adv_weight * (G_ad_loss_GB + G_ad_cam_loss_GB + G_ad_loss_LB + G_ad_cam_loss_LB) + self.cycle_weight * G_recon_loss_B + self.identity_weight * G_identity_loss_B + self.cam_weight * G_cam_loss_B) / self.n_gpu Generator_loss = G_loss_A + G_loss_B Generator_loss.backward() if self.is_parallel: self.disGA.apply_collective_grads() self.disGB.apply_collective_grads() self.disLA.apply_collective_grads() self.disLB.apply_collective_grads() self.genA2B.apply_collective_grads() self.genB2A.apply_collective_grads() self.G_optim.minimize(Generator_loss) # clip parameter of AdaILN and ILN, applied after optimizer step self.Rho_clipper(self.genA2B) self.Rho_clipper(self.genB2A) if not self.is_parallel: writer.add_scalar(tag="G/G_loss_A", step=step, value=G_loss_A.numpy()) writer.add_scalar(tag="G/G_loss_B", step=step, value=G_loss_B.numpy()) writer.add_scalar(tag="D/D_loss_A", step=step, value=D_loss_A.numpy()) writer.add_scalar(tag="D/D_loss_B", step=step, value=D_loss_B.numpy()) writer.add_scalar(tag="D/Discriminator_loss", step=step, value=Discriminator_loss.numpy()) writer.add_scalar(tag="D/Generator_loss", step=step, value=Generator_loss.numpy()) if step % 10 == 9: writer.add_image("fake_A2B", (porch.Tensor(fake_A2B[0] * 255)).clamp_( 0, 255).numpy().transpose( [1, 2, 0]).astype(np.uint8), step) writer.add_image("fake_B2A", (porch.Tensor(fake_B2A[0] * 255)).clamp_( 0, 255).numpy().transpose( [1, 2, 0]).astype(np.uint8), step) writer.add_image("fake_A2B2A", (porch.Tensor(fake_A2B[0] * 255)).clamp_( 0, 255).numpy().transpose( [1, 2, 0]).astype(np.uint8), step) writer.add_image("fake_B2A2B", (porch.Tensor(fake_B2A[0] * 255)).clamp_( 0, 255).numpy().transpose( [1, 2, 0]).astype(np.uint8), step) print("[%5d/%5d] time: %4.4f d_loss: %.8f, g_loss: %.8f" % (step, self.iteration, time.time() - start_time, Discriminator_loss, Generator_loss)) if step % self.print_freq == 0: train_sample_num = 5 test_sample_num = 5 A2B = np.zeros((self.img_size * 7, 0, 3)) B2A = np.zeros((self.img_size * 7, 0, 3)) self.genA2B.eval(), self.genB2A.eval(), self.disGA.eval( ), self.disGB.eval(), self.disLA.eval(), self.disLB.eval() for _ in range(train_sample_num): try: real_A, _ = trainA_iter.next() except: trainA_iter = iter(self.trainA_loader) real_A, _ = trainA_iter.next() try: real_B, _ = trainB_iter.next() except: trainB_iter = iter(self.trainB_loader) real_B, _ = trainB_iter.next() real_A, real_B = real_A[0], real_B[0] fake_A2B, _, fake_A2B_heatmap = self.genA2B(real_A) fake_B2A, _, fake_B2A_heatmap = self.genB2A(real_B) fake_A2B2A, _, fake_A2B2A_heatmap = self.genB2A(fake_A2B) fake_B2A2B, _, fake_B2A2B_heatmap = self.genA2B(fake_B2A) fake_A2A, _, fake_A2A_heatmap = self.genB2A(real_A) fake_B2B, _, fake_B2B_heatmap = self.genA2B(real_B) A2B = np.concatenate( (A2B, np.concatenate( (RGB2BGR(tensor2numpy(denorm(real_A[0]))), cam(tensor2numpy(fake_A2A_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_A2A[0]))), cam(tensor2numpy(fake_A2B_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_A2B[0]))), cam(tensor2numpy(fake_A2B2A_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_A2B2A[0])))), 0)), 1) B2A = np.concatenate( (B2A, np.concatenate( (RGB2BGR(tensor2numpy(denorm(real_B[0]))), cam(tensor2numpy(fake_B2B_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_B2B[0]))), cam(tensor2numpy(fake_B2A_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_B2A[0]))), cam(tensor2numpy(fake_B2A2B_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_B2A2B[0])))), 0)), 1) for _ in range(test_sample_num): try: real_A, _ = testA_iter.next() except: testA_iter = iter(self.testA_loader) real_A, _ = testA_iter.next() try: real_B, _ = testB_iter.next() except: testB_iter = iter(self.testB_loader) real_B, _ = testB_iter.next() real_A, real_B = real_A[0], real_B[0] fake_A2B, _, fake_A2B_heatmap = self.genA2B(real_A) fake_B2A, _, fake_B2A_heatmap = self.genB2A(real_B) fake_A2B2A, _, fake_A2B2A_heatmap = self.genB2A(fake_A2B) fake_B2A2B, _, fake_B2A2B_heatmap = self.genA2B(fake_B2A) fake_A2A, _, fake_A2A_heatmap = self.genB2A(real_A) fake_B2B, _, fake_B2B_heatmap = self.genA2B(real_B) A2B = np.concatenate( (A2B, np.concatenate( (RGB2BGR(tensor2numpy(denorm(real_A[0]))), cam(tensor2numpy(fake_A2A_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_A2A[0]))), cam(tensor2numpy(fake_A2B_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_A2B[0]))), cam(tensor2numpy(fake_A2B2A_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_A2B2A[0])))), 0)), 1) B2A = np.concatenate( (B2A, np.concatenate( (RGB2BGR(tensor2numpy(denorm(real_B[0]))), cam(tensor2numpy(fake_B2B_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_B2B[0]))), cam(tensor2numpy(fake_B2A_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_B2A[0]))), cam(tensor2numpy(fake_B2A2B_heatmap[0]), self.img_size), RGB2BGR(tensor2numpy(denorm(fake_B2A2B[0])))), 0)), 1) if (not self.is_parallel ) or fluid.dygraph.parallel.Env().local_rank == 0: cv2.imwrite( os.path.join(self.result_dir, self.dataset, 'img', 'A2B_%07d.png' % step), A2B * 255.0) cv2.imwrite( os.path.join(self.result_dir, self.dataset, 'img', 'B2A_%07d.png' % step), B2A * 255.0) self.genA2B.train(), self.genB2A.train(), self.disGA.train( ), self.disGB.train(), self.disLA.train(), self.disLB.train() if step % self.save_freq == 0: if (not self.is_parallel ) or fluid.dygraph.parallel.Env().local_rank == 0: self.save( os.path.join(self.result_dir, self.dataset, 'model'), step) if step % 1000 == 0: params = {} params['genA2B'] = self.genA2B.state_dict() params['genB2A'] = self.genB2A.state_dict() params['disGA'] = self.disGA.state_dict() params['disGB'] = self.disGB.state_dict() params['disLA'] = self.disLA.state_dict() params['disLB'] = self.disLB.state_dict() if (not self.is_parallel ) or fluid.dygraph.parallel.Env().local_rank == 0: torch.save( params, os.path.join(self.result_dir, self.dataset + '_params_latest.pt'))
def main(): # Step 0: preparation writer = LogWriter(logdir="./log/scalar") place = paddle.fluid.CUDAPlace(0) with fluid.dygraph.guard(place): # Step 1: Define training dataloader image_folder = "" image_list_file = "dummy_data/fabric_list.txt" transform = Transform() #Normalize2() # [0,255]-->[0,1] x_data = DataLoader(image_folder, image_list_file, transform=transform) x_dataloader = fluid.io.DataLoader.from_generator(capacity=2, return_list=True) x_dataloader.set_sample_generator(x_data, args.batch_size) total_batch = len(x_data) // args.batch_size # Step 2: Create model if args.net == "basic": D = Discriminator() G = Generator() E = Invertor() else: raise NotImplementedError( f"args.net: {args.net} is not Supported!") # Step 3: Define criterion and optimizer criterion = Basic_Loss D_optim = AdamOptimizer(learning_rate=args.lr, parameter_list=D.parameters()) G_optim = AdamOptimizer(learning_rate=args.lr, parameter_list=G.parameters()) E_optim = AdamOptimizer(learning_rate=args.lr, parameter_list=E.parameters()) G_loss_meter = AverageMeter() D_loss_meter = AverageMeter() E_loss_meter = AverageMeter() D.train() G.train() E.train() # Step 4: Slight Training iteration = -1 is_slight_Train = True for epoch in range(1, args.epoch_num + 1): #optim Discriminator for (x, x_labels) in x_dataloader(): n = x.shape[0] if is_slight_Train: iteration += 1 x = fluid.layers.cast(x, dtype="float32") x = fluid.layers.transpose(x, perm=[0, 3, 1, 2]) preds_x = D(x) preds_x_array = preds_x.numpy() #print("D(x),1",preds_array.shape, np.mean(preds_array)) writer.add_scalar(tag="D(x)=1", step=iteration, value=np.mean(preds_x_array)) if np.mean(preds_x_array) >= 0.98: is_slight_Train = False z = np.random.rand(n, 64) zeros = np.zeros((n, 1)) z = to_variable(z) zeros = to_variable(zeros) z = fluid.layers.cast(z, dtype="float32") zeros = fluid.layers.cast(zeros, dtype="int64") preds_fx = D(G(z)) preds_fx_array = preds_fx.numpy() writer.add_scalar(tag="D(G(z))=0", step=iteration, value=np.mean(preds_fx_array)) D_loss = criterion(preds_x, x_labels) + criterion( preds_fx, zeros) D_loss.backward() D_optim.minimize(D_loss) D.clear_gradients() D_loss_meter.update(D_loss.numpy()[0], n) writer.add_scalar(tag="D_loss", step=iteration, value=D_loss_meter.avg) print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " + f"STEP{iteration}, " + f"Average D Loss: {D_loss_meter.avg:4f}, ") z = np.random.rand(n, 64) ones = np.ones((n, 1)) z = to_variable(z) ones = to_variable(ones) z = fluid.layers.cast(z, dtype="float32") ones = fluid.layers.cast(ones, dtype="int64") preds = D(G(z)) preds_array = preds.numpy() writer.add_scalar(tag="D(G(z))=1", step=iteration, value=np.mean(preds_array)) G_loss = criterion(preds, ones) G_loss.backward() G_optim.minimize(G_loss) G.clear_gradients() G_loss_meter.update(G_loss.numpy()[0], n) writer.add_scalar(tag="G_loss", step=iteration, value=G_loss_meter.avg) print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " + f"STEP{iteration}, " + f"Average G Loss: {G_loss_meter.avg:4f}") if epoch % args.save_freq == 0 or epoch == args.epoch_num or not is_slight_Train: D_model_path = os.path.join(args.checkpoint_folder, f"D_{args.net}-Epoch-{epoch}") G_model_path = os.path.join(args.checkpoint_folder, f"G_{args.net}-Epoch-{epoch}") # save model and optmizer states model_dict = D.state_dict() fluid.save_dygraph(model_dict, D_model_path) optim_dict = D_optim.state_dict() fluid.save_dygraph(optim_dict, D_model_path) model_dict = G.state_dict() fluid.save_dygraph(model_dict, G_model_path) optim_dict = G_optim.state_dict() fluid.save_dygraph(optim_dict, G_model_path) print( f'----- Save model: {D_model_path}.pdparams, {G_model_path}.pdparams' ) if not is_slight_Train: break # Step 5: full training for Generator and Discriminator D_optim = AdamOptimizer(learning_rate=args.lr * 10, parameter_list=D.parameters()) G_optim = AdamOptimizer(learning_rate=args.lr * 10, parameter_list=G.parameters()) G_loss_meter = AverageMeter() D_loss_meter = AverageMeter() for epoch in range(1, args.epoch_num + 1): for (x, x_labels) in x_dataloader(): n = x.shape[0] iteration += 1 x = fluid.layers.cast(x, dtype="float32") x = fluid.layers.transpose(x, perm=[0, 3, 1, 2]) preds1 = D(x) preds_array = preds1.numpy() writer.add_scalar(tag="D(x)=1", step=iteration, value=np.mean(preds_array)) z = np.random.rand(n, 64) zeros = np.zeros((n, 1)) z = to_variable(z) zeros = to_variable(zeros) z = fluid.layers.cast(z, dtype="float32") zeros = fluid.layers.cast(zeros, dtype="int64") preds2 = D(G(z)) preds_array = preds2.numpy() #print("DG(z),0:",preds_array.shape, np.mean(preds_array)) writer.add_scalar(tag="D(G(z))=0", step=iteration, value=np.mean(preds_array)) D_loss = criterion(preds1, x_labels) + criterion(preds2, zeros) D_loss.backward() D_optim.minimize(D_loss) D.clear_gradients() D_loss_meter.update(D_loss.numpy()[0], n) writer.add_scalar(tag="D_loss", step=iteration, value=D_loss_meter.avg) print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " + f"STEP{iteration}, " + f"Average D Loss: {D_loss_meter.avg:4f} ") z = np.random.rand(n, 64) ones = np.ones((n, 1)) z = to_variable(z) ones = to_variable(ones) z = fluid.layers.cast(z, dtype="float32") ones = fluid.layers.cast(ones, dtype="int64") preds = D(G(z)) preds_array = preds.numpy() #print("DG(z),1:",preds_array.shape, np.mean(preds_array)) writer.add_scalar(tag="D(G(z))=1", step=iteration, value=np.mean(preds_array)) G_loss = criterion(preds, ones) G_loss.backward() G_optim.minimize(G_loss) G.clear_gradients() G_loss_meter.update(G_loss.numpy()[0], n) writer.add_scalar(tag="G_loss", step=iteration, value=G_loss_meter.avg) print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " + f"STEP{iteration}, " + f"Average G Loss: {G_loss_meter.avg:4f}") if epoch % args.save_freq == 0 or epoch == args.epoch_num: D_model_path = os.path.join(args.checkpoint_folder, f"D_{args.net}-Epoch-{epoch}") G_model_path = os.path.join(args.checkpoint_folder, f"G_{args.net}-Epoch-{epoch}") # save model and optmizer states model_dict = D.state_dict() fluid.save_dygraph(model_dict, D_model_path) optim_dict = D_optim.state_dict() fluid.save_dygraph(optim_dict, D_model_path) model_dict = G.state_dict() fluid.save_dygraph(model_dict, G_model_path) optim_dict = G_optim.state_dict() fluid.save_dygraph(optim_dict, G_model_path) print( f'----- Save model: {D_model_path}.pdparams, {G_model_path}.pdparams' ) # Step 6: full training for Inverter E_optim = AdamOptimizer(learning_rate=args.lr * 10, parameter_list=E.parameters()) E_loss_meter = AverageMeter() for epoch in range(1, args.epoch_num + 1): for (x, x_labels) in x_dataloader(): n = x.shape[0] iteration += 1 x = fluid.layers.cast(x, dtype="float32") image = x.numpy()[0] * 255 writer.add_image(tag="x", step=iteration, img=image) x = fluid.layers.transpose(x, perm=[0, 3, 1, 2]) invert_x = G(E(x)) invert_image = fluid.layers.transpose(invert_x, perm=[0, 2, 3, 1]) invert_image = invert_image.numpy()[0] * 255 #print("D(x),1",preds_array.shape, np.mean(preds_array)) writer.add_image(tag="invert_x", step=iteration, img=invert_image) print(np.max(invert_image), np.min(invert_image)) E_loss = fluid.layers.mse_loss(invert_x, x) print("E_loss shape:", E_loss.numpy().shape) E_loss.backward() E_optim.minimize(E_loss) E.clear_gradients() E_loss_meter.update(E_loss.numpy()[0], n) writer.add_scalar(tag="E_loss", step=iteration, value=E_loss_meter.avg) print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " + f"STEP{iteration}, " + f"Average E Loss: {E_loss_meter.avg:4f}, ") if epoch % args.save_freq == 0 or epoch == args.epoch_num: E_model_path = os.path.join(args.checkpoint_folder, f"E_{args.net}-Epoch-{epoch}") # save model and optmizer states model_dict = E.state_dict() fluid.save_dygraph(model_dict, E_model_path) optim_dict = E_optim.state_dict() fluid.save_dygraph(optim_dict, E_model_path) print( f'----- Save model: {E_model_path}.pdparams, {E_model_path}.pdparams' )
def train(self, loaders): args = self.args nets = self.nets nets_ema = self.nets_ema optims = self.optims writer = LogWriter(logdir=self.args.checkpoint_dir + "/log/") # fetch random validation images for debugging fetcher = InputFetcher(loaders.src, loaders.ref, args.latent_dim, 'train') fetcher_val = InputFetcher(loaders.val, None, args.latent_dim, 'val') inputs_val = next(fetcher_val) # resume training if necessary if args.resume_iter > 0: self._load_checkpoint(args.resume_iter) # remember the initial value of ds weight initial_lambda_ds = args.lambda_ds print('Start training...') import tqdm start_time = time.time() tqdm_descriptor = tqdm.trange(args.resume_iter, args.total_iters) for i in tqdm_descriptor: # fetch images and labels inputs = next(fetcher) x_real, y_org = inputs.x_src, inputs.y_src x_ref, x_ref2, y_trg = inputs.x_ref, inputs.x_ref2, inputs.y_ref z_trg, z_trg2 = inputs.z_trg, inputs.z_trg2 masks = nets.fan.get_heatmap(x_real) if args.w_hpf > 0 else None # train the discriminator d_loss, d_losses_latent = compute_d_loss(nets, args, x_real, y_org, y_trg, z_trg=z_trg, masks=masks) self._reset_grad() d_loss.backward() optims.discriminator.minimize(d_loss) d_loss, d_losses_ref = compute_d_loss(nets, args, x_real, y_org, y_trg, x_ref=x_ref, masks=masks) self._reset_grad() d_loss.backward() optims.discriminator.minimize(d_loss) # train the generator if i - args.resume_iter > 100: ##train discriminator first g_loss, g_losses_latent, sample_1 = compute_g_loss( nets, args, x_real, y_org, y_trg, z_trgs=[z_trg, z_trg2], masks=masks) self._reset_grad() g_loss.backward() optims.generator.minimize(g_loss) optims.mapping_network.minimize(g_loss) optims.style_encoder.minimize(g_loss) g_loss, g_losses_ref, sample_2 = compute_g_loss( nets, args, x_real, y_org, y_trg, x_refs=[x_ref, x_ref2], masks=masks) self._reset_grad() g_loss.backward() optims.generator.minimize(g_loss) # compute moving average of network parameters moving_average(nets.generator, nets_ema.generator, beta=0.999) moving_average(nets.mapping_network, nets_ema.mapping_network, beta=0.999) moving_average(nets.style_encoder, nets_ema.style_encoder, beta=0.999) # decay weight for diversity sensitive loss if args.lambda_ds > 0: args.lambda_ds -= (initial_lambda_ds / args.ds_iter) # print out log info if (i + 1) % args.print_every == 0: elapsed = time.time() - start_time elapsed = str(datetime.timedelta(seconds=elapsed))[:-7] log = "Elapsed time [%s], Iteration [%i/%i], " % ( elapsed, i + 1, args.total_iters) all_losses = dict() for loss, prefix in zip([ d_losses_latent, d_losses_ref, g_losses_latent, g_losses_ref ], ['D/latent_', 'D/ref_', 'G/latent_', 'G/ref_']): for key, value in loss.items(): all_losses[prefix + key] = value writer.add_scalar(tag=prefix + key, step=i + 1, value=value) all_losses['G/lambda_ds'] = args.lambda_ds log += ' '.join([ '%s: [%.4f]' % (key, value) for key, value in all_losses.items() ]) tqdm_descriptor.set_description(log) writer.add_image("x_fake", (utils.denormalize(sample_1) * 255).numpy().transpose([1, 2, 0]).astype( np.uint8), i + 1) # generate images for debugging if (i + 1) % args.sample_every == 0: os.makedirs(args.sample_dir, exist_ok=True) utils.debug_image(nets_ema, args, inputs=inputs_val, step=i + 1) # save model checkpoints if (i + 1) % args.save_every == 0: self._save_checkpoint(step=i + 1) # compute FID and LPIPS if necessary if (i + 1) % args.eval_every == 0: calculate_metrics(nets_ema, args, i + 1, mode='latent') calculate_metrics(nets_ema, args, i + 1, mode='reference') else: if (i + 1) % args.print_every == 0: elapsed = time.time() - start_time elapsed = str(datetime.timedelta(seconds=elapsed))[:-7] log = "Elapsed time [%s], Iteration [%i/%i], " % ( elapsed, i + 1, args.total_iters) all_losses = dict() for loss, prefix in zip([d_losses_latent, d_losses_ref], ['D/latent_', 'D/ref_']): for key, value in loss.items(): all_losses[prefix + key] = value writer.add_scalar(tag=prefix + key, step=i + 1, value=value) log += ' '.join([ '%s: [%.4f]' % (key, value) for key, value in all_losses.items() ]) tqdm_descriptor.set_description(log) writer.close()
def synthesis(text_input, args): local_rank = dg.parallel.Env().local_rank place = (fluid.CUDAPlace(local_rank) if args.use_gpu else fluid.CPUPlace()) with open(args.config) as f: cfg = yaml.load(f, Loader=yaml.Loader) # tensorboard if not os.path.exists(args.output): os.mkdir(args.output) writer = LogWriter(os.path.join(args.output, 'log')) fluid.enable_dygraph(place) with fluid.unique_name.guard(): network_cfg = cfg['network'] model = TransformerTTS( network_cfg['embedding_size'], network_cfg['hidden_size'], network_cfg['encoder_num_head'], network_cfg['encoder_n_layers'], cfg['audio']['num_mels'], network_cfg['outputs_per_step'], network_cfg['decoder_num_head'], network_cfg['decoder_n_layers']) # Load parameters. global_step = io.load_parameters( model=model, checkpoint_path=args.checkpoint_transformer) model.eval() # init input text = np.asarray(text_to_sequence(text_input)) text = fluid.layers.unsqueeze(dg.to_variable(text).astype(np.int64), [0]) mel_input = dg.to_variable(np.zeros([1, 1, 80])).astype(np.float32) pos_text = np.arange(1, text.shape[1] + 1) pos_text = fluid.layers.unsqueeze( dg.to_variable(pos_text).astype(np.int64), [0]) for i in range(args.max_len): pos_mel = np.arange(1, mel_input.shape[1] + 1) pos_mel = fluid.layers.unsqueeze( dg.to_variable(pos_mel).astype(np.int64), [0]) mel_pred, postnet_pred, attn_probs, stop_preds, attn_enc, attn_dec = model( text, mel_input, pos_text, pos_mel) if stop_preds.numpy()[0, -1] > args.stop_threshold: break mel_input = fluid.layers.concat([mel_input, postnet_pred[:, -1:, :]], axis=1) global_step = 0 for i, prob in enumerate(attn_probs): for j in range(4): x = np.uint8(cm.viridis(prob.numpy()[j]) * 255) writer.add_image('Attention_%d_0' % global_step, x, i * 4 + j) if args.vocoder == 'griffin-lim': #synthesis use griffin-lim wav = synthesis_with_griffinlim(postnet_pred, cfg['audio']) elif args.vocoder == 'waveflow': # synthesis use waveflow wav = synthesis_with_waveflow(postnet_pred, args, args.checkpoint_vocoder, place) else: print( 'vocoder error, we only support griffinlim and waveflow, but recevied %s.' % args.vocoder) writer.add_audio(text_input + '(' + args.vocoder + ')', wav, 0, cfg['audio']['sr']) if not os.path.exists(os.path.join(args.output, 'samples')): os.mkdir(os.path.join(args.output, 'samples')) write( os.path.join(os.path.join(args.output, 'samples'), args.vocoder + '.wav'), cfg['audio']['sr'], wav) print("Synthesis completed !!!") writer.close()
def main(args): local_rank = dg.parallel.Env().local_rank nranks = dg.parallel.Env().nranks parallel = nranks > 1 with open(args.config) as f: cfg = yaml.load(f, Loader=yaml.Loader) global_step = 0 place = fluid.CUDAPlace(local_rank) if args.use_gpu else fluid.CPUPlace() if not os.path.exists(args.output): os.mkdir(args.output) writer = LogWriter(os.path.join(args.output, 'log')) if local_rank == 0 else None fluid.enable_dygraph(place) network_cfg = cfg['network'] model = TransformerTTS( network_cfg['embedding_size'], network_cfg['hidden_size'], network_cfg['encoder_num_head'], network_cfg['encoder_n_layers'], cfg['audio']['num_mels'], network_cfg['outputs_per_step'], network_cfg['decoder_num_head'], network_cfg['decoder_n_layers']) model.train() optimizer = fluid.optimizer.AdamOptimizer( learning_rate=dg.NoamDecay(1 / (cfg['train']['warm_up_step'] * (cfg['train']['learning_rate']**2)), cfg['train']['warm_up_step']), parameter_list=model.parameters(), grad_clip=fluid.clip.GradientClipByGlobalNorm(cfg['train'][ 'grad_clip_thresh'])) # Load parameters. global_step = io.load_parameters( model=model, optimizer=optimizer, checkpoint_dir=os.path.join(args.output, 'checkpoints'), iteration=args.iteration, checkpoint_path=args.checkpoint) print("Rank {}: checkpoint loaded.".format(local_rank)) if parallel: strategy = dg.parallel.prepare_context() model = fluid.dygraph.parallel.DataParallel(model, strategy) reader = LJSpeechLoader( cfg['audio'], place, args.data, cfg['train']['batch_size'], nranks, local_rank, shuffle=True).reader iterator = iter(tqdm(reader)) global_step += 1 while global_step <= cfg['train']['max_iteration']: try: batch = next(iterator) except StopIteration as e: iterator = iter(tqdm(reader)) batch = next(iterator) character, mel, mel_input, pos_text, pos_mel, stop_tokens = batch mel_pred, postnet_pred, attn_probs, stop_preds, attn_enc, attn_dec = model( character, mel_input, pos_text, pos_mel) mel_loss = layers.mean( layers.abs(layers.elementwise_sub(mel_pred, mel))) post_mel_loss = layers.mean( layers.abs(layers.elementwise_sub(postnet_pred, mel))) loss = mel_loss + post_mel_loss stop_loss = cross_entropy( stop_preds, stop_tokens, weight=cfg['network']['stop_loss_weight']) loss = loss + stop_loss if local_rank == 0: writer.add_scalar('training_loss/mel_loss', mel_loss.numpy(), global_step) writer.add_scalar('training_loss/post_mel_loss', post_mel_loss.numpy(), global_step) writer.add_scalar('stop_loss', stop_loss.numpy(), global_step) if parallel: writer.add_scalar('alphas/encoder_alpha', model._layers.encoder.alpha.numpy(), global_step) writer.add_scalar('alphas/decoder_alpha', model._layers.decoder.alpha.numpy(), global_step) else: writer.add_scalar('alphas/encoder_alpha', model.encoder.alpha.numpy(), global_step) writer.add_scalar('alphas/decoder_alpha', model.decoder.alpha.numpy(), global_step) writer.add_scalar('learning_rate', optimizer._learning_rate.step().numpy(), global_step) if global_step % cfg['train']['image_interval'] == 1: for i, prob in enumerate(attn_probs): for j in range(cfg['network']['decoder_num_head']): x = np.uint8( cm.viridis(prob.numpy()[j * cfg['train'][ 'batch_size'] // nranks]) * 255) writer.add_image( 'Attention_%d_0' % global_step, x, i * 4 + j) for i, prob in enumerate(attn_enc): for j in range(cfg['network']['encoder_num_head']): x = np.uint8( cm.viridis(prob.numpy()[j * cfg['train'][ 'batch_size'] // nranks]) * 255) writer.add_image( 'Attention_enc_%d_0' % global_step, x, i * 4 + j) for i, prob in enumerate(attn_dec): for j in range(cfg['network']['decoder_num_head']): x = np.uint8( cm.viridis(prob.numpy()[j * cfg['train'][ 'batch_size'] // nranks]) * 255) writer.add_image( 'Attention_dec_%d_0' % global_step, x, i * 4 + j) if parallel: loss = model.scale_loss(loss) loss.backward() model.apply_collective_grads() else: loss.backward() optimizer.minimize(loss) model.clear_gradients() # save checkpoint if local_rank == 0 and global_step % cfg['train'][ 'checkpoint_interval'] == 0: io.save_parameters( os.path.join(args.output, 'checkpoints'), global_step, model, optimizer) global_step += 1 if local_rank == 0: writer.close()
def main(args): config = Config(args.config) cfg = config(vars(args), mode=['infer', 'init']) scale = cfg['infer']['scale'] mdname = cfg['infer']['model'] imgname = ''.join(mdname) # + '/' + str(scale) #dirname = ''.join(mdname) + '_' + str(scale) sz = cfg['infer']['sz'] infer_size = cfg['infer']['infer_size'] #save_path = os.path.join(args.save_dir, cfg['init']['result']) list = cfg['infer']['infer_size'] save_path = create_path(args.save_dir, cfg['init']['result']) save_path = create_path(save_path, imgname) save_path = create_path(save_path, str(scale)) tif_path = create_path(save_path, cfg['infer']['lab']) color_path = create_path(save_path, cfg['infer']['color']) gray_path = create_path(save_path, cfg['infer']['gray']) vdl_dir = os.path.join(args.save_dir, cfg['init']['vdl_dir']) palette = cfg['infer']['palette'] palette = np.array(palette, dtype=np.uint8) num_class = cfg['init']['num_classes'] batchsz = cfg['infer']['batchsz'] infer_path = os.path.join(cfg['infer']['root_path'], cfg['infer']['path']) tagname = imgname + '/' + str(scale) vdl_dir = os.path.join(vdl_dir, 'infer') writer = LogWriter(logdir=vdl_dir) infer_ds = TeDataset(path=cfg['infer']['root_path'], fl=cfg['infer']['path'], sz=sz) total = len(infer_ds) # select model #addresult = np.zeros((total//batchsz,batchsz,num_class,sz,sz)) addresult = np.zeros((total, num_class, sz, sz)) for mnet in mdname: result_list = [] net = modelset(mode=mnet, num_classes=cfg['init']['num_classes']) # load moel input = InputSpec([None, 3, 64, 64], 'float32', 'x') label = InputSpec([None, 1, 64, 64], 'int64', 'label') model = paddle.Model(net, input, label) model.load(path=os.path.join(args.save_dir, mnet) + '/' + mnet) model.prepare() result = model.predict( infer_ds, batch_size=batchsz, num_workers=cfg['infer']['num_workers'], stack_outputs=True # [160,2,64,64] ) addresult = result[0] + scale * addresult pred = construct(addresult, infer_size, sz=sz) # pred = construct(addresult,infer_size,sz = sz) # # 腐蚀膨胀 # read vdl file_list = os.listdir(infer_path) file_list.sort(key=lambda x: int(x[-5:-4])) step = 0 for i, fl in enumerate(file_list): name, _ = fl.split(".") # save pred lab_img = Image.fromarray(pred[i].astype(np.uint8)).convert("L") saveimg(lab_img, tif_path, name=name, type='.tif') # gray_label label = colorize(pred[i], palette) writer.add_image(tag=tagname, img=saveimg(label, gray_path, name=name, type='.png', re_out=True), step=step, dataformats='HW') step += 1 # color_label file = os.path.join(infer_path, fl) out = blend_image(file, label, alpha=0.25) writer.add_image(tag=tagname, img=saveimg(out, color_path, name=name, type='.png', re_out=True), step=step, dataformats='HWC') step += 1 writer.close()