def update(self, key=None): if key in self.key_dir.keys(): d = self.key_dir[key] # reverse direction if tuple(x + y for x, y in zip(self.cur_dir, d)) != (0, 0): self.cur_dir = self.key_dir[key] x_dir, y_dir = self.cur_dir self.x = cycle(self.x + x_dir * self.speed, self.scr_board, self.scr_w - self.scr_board) self.y = cycle(self.y + y_dir * self.speed, self.scr_board, self.scr_h - self.scr_board)
def train_megadepth(args): # save a copy for the current args in out_folder out_folder = os.path.join(args.outdir, args.exp_name) os.makedirs(out_folder, exist_ok=True) f = os.path.join(out_folder, 'args.txt') with open(f, 'w') as file: for arg in vars(args): attr = getattr(args, arg) file.write('{} = {}\n'.format(arg, attr)) # tensorboard writer tb_log_dir = os.path.join(args.logdir, args.exp_name) print('tensorboard log files are stored in {}'.format(tb_log_dir)) writer = SummaryWriter(tb_log_dir) # megadepth data loader train_loader = MegaDepthLoader(args).load_data() train_loader_iterator = iter(cycle(train_loader)) # define model model = CAPSModel(args) start_step = model.start_step # training loop for step in range(start_step + 1, start_step + args.n_iters + 1): data = next(train_loader_iterator) model.set_input(data) model.optimize_parameters() model.write_summary(writer, step) if step % args.save_interval == 0 and step > 0: model.save_model(step)
def data_loader2(args): if args.crop_size == 28: root_path = args.dataset_root data_root = os.path.join(root_path, args.dataset + "_%s" % args.n_shot, args.n_shot) print(data_root) train_loader, s_dlen, num_classes = omniglot_data_loader( root=data_root, batch_size=args.batch_size, resize_size=args.resize_size, crop_size=args.crop_size) print("omniglot data_loader") elif args.crop_size == 64: root_path = args.dataset_root data_root = os.path.join(root_path, args.dataset + "_%s" % args.n_shot, args.n_shot) print(data_root) train_loader, s_dlen, num_classes = vgg_data_loader( root=data_root, batch_size=args.batch_size, resize_size=args.resize_size, crop_size=args.crop_size) print("vgg data_loader, animal, cub") elif args.crop_size == 128: root_path = args.dataset_root data_root = os.path.join(root_path, args.dataset + "_%s" % args.n_shot, args.n_shot) train_loader, s_dlen, num_classes = animal_data_loader( root=data_root, batch_size=args.batch_size, resize_size=args.resize_size, crop_size=args.crop_size) print("animal data_loader") else: raise Exception("Enter omniglot or vgg or animal") train_loader = iter(utils.cycle(train_loader)) return train_loader, s_dlen, num_classes
def update(self, delta_time, scene): super().update(delta_time) # Find the nearest ball nearest_ball = None for ball in scene.balls: if nearest_ball is None: nearest_ball = ball elif ball.y < nearest_ball.y: nearest_ball = ball # Check if a ball was found if nearest_ball is not None: # Calculate distance to the ball distance_x = nearest_ball.x - self.x distance_y = nearest_ball.y - self.y angle = -math.degrees(math.atan2( distance_y, distance_x)) + self.angle_offset - 13.5 # Calculate target angle speed = 0.025 * delta_time diff = cycle(angle - self.arc_angle, -180, 180) target_angle = diff if abs( diff) < speed else diff / abs(diff) * speed # Move the shooter self.arc_angle = clamp(self.arc_angle + target_angle, self.min_angle, self.max_angle) # Move the gear if self.min_angle < self.arc_angle < self.max_angle: self.polygon_smooth += target_angle * 0.1 * delta_time # Reduce shoot delay self.shoot_delay = approach(self.shoot_delay, 0, 0.06 * delta_time) # Shoot if self.shoot_delay == 0 and not scene.score and self.cooldown == 0: # Reset cooldown self.cooldown = self.max_cooldown # Calculate new shoot delay self.shoot_delay = random.randint(self.min_shoot_delay, self.max_shoot_delay) # Calculate new angle offset self.angle_offset = random.randint(-5, 5) # Instantiate the projectile scene.projectiles.add( Projectile(self.x, self.y, self.color, -(self.arc_angle + 13.5), 20)) # Play a shooting sound random.choice(self.snd_shoot).play()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--continue', dest='cont', action='store_true', help="continue training from checkpoint") parser.add_argument('--ckpt', type=str, default='latest', required=False, help="desired checkpoint to restore") parser.add_argument('-g', '--gpu_ids', type=int, default=0, required=False, help="specify gpu ids") parser.add_argument('--vis', action='store_true', default=False, help="visualize output in training") args = parser.parse_args() # create experiment config config = get_config(args) print(config) # create network and training agent tr_agent = get_agent(config) print(tr_agent.net) # load from checkpoint if provided if args.cont: tr_agent.load_ckpt(args.ckpt) # writer = SummaryWriter() # create dataloader # train_loader = get_dataloader(PHASE_TRAINING, batch_size=config.batch_size, num_workers=2, dataset_json="/home/huydd/train_noise/result_json/result.json") val_loader = get_dataloader( PHASE_TESTING, batch_size=config.batch_size, num_workers=2, dataset_json="/home/huydd/other_done/result_json/result.json") val_loader_step = get_dataloader( PHASE_TESTING, batch_size=config.batch_size, num_workers=2, dataset_json="/home/huydd/other_done/result_json/result.json") val_loader_step = cycle(val_loader_step) epoch_acc = tr_agent.evaluate(val_loader) print(epoch_acc)
def main(): # create experiment config containing all hyperparameters config = get_config('train') # create network and training agent tr_agent = get_agent(config) # load from checkpoint if provided if config.cont: tr_agent.load_ckpt(config.ckpt) # create dataloader train_loader = get_dataloader('train', config) val_loader = get_dataloader('validation', config) val_loader = cycle(val_loader) # start training clock = tr_agent.clock for e in range(clock.epoch, config.n_epochs): # begin iteration pbar = tqdm(train_loader) for b, data in enumerate(pbar): # train step outputs, losses = tr_agent.train_func(data) # visualize if config.vis_frequency is not None and clock.step % config.vis_frequency == 0: tr_agent.visualize_batch(data, "train", outputs=outputs) pbar.set_description("EPOCH[{}][{}]".format(e, b)) pbar.set_postfix( OrderedDict({k: v.item() for k, v in losses.items()})) # validation step if clock.step % config.val_frequency == 0: data = next(val_loader) outputs, losses = tr_agent.val_func(data) if config.vis_frequency is not None and clock.step % config.vis_frequency == 0: tr_agent.visualize_batch(data, "validation", outputs=outputs) clock.tick() tr_agent.update_learning_rate() clock.tock() if clock.epoch % config.save_frequency == 0: tr_agent.save_ckpt() tr_agent.save_ckpt('latest')
def train_megadepth(args): # save a copy for the current args in out_folder out_folder = os.path.join(args.outdir, args.exp_name) os.makedirs(out_folder, exist_ok=True) f = os.path.join(out_folder, 'args.txt') with open(f, 'w') as file: for arg in vars(args): attr = getattr(args, arg) file.write('{} = {}\n'.format(arg, attr)) # tensorboard writer tb_log_dir = os.path.join(args.logdir, args.exp_name) print('tensorboard log files are stored in {}'.format(tb_log_dir)) writer = SummaryWriter(tb_log_dir) # megadepth data loader train_loader = MegaDepthLoader(args).load_data() print(len(train_loader)) test_loader = MegaDepthLoader(args, "test").load_data() train_loader_iterator = iter(cycle(train_loader)) # define model model = CAPSModel(args) start_step = model.start_step # training loop val_total_loss = 1e6 start_time = time.time() for step in range(start_step + 1, start_step + args.n_iters + 1): data = next(train_loader_iterator) model.set_input(data, 'train') model.optimize_parameters() if step % args.log_scalar_interval == 0: model.write_summary(writer, step) if step % args.save_interval == 0 and step > 0: val_loss = 0. for test_sample in tqdm(test_loader): model.set_input(test_sample, 'test') val_loss += model.validate() val_loss /= len(test_loader) if val_loss < val_total_loss: model.save_model(step) val_total_loss = val_loss print("%s | Step: %d, Loss: %2.5f" % ("val_caps", step, val_total_loss)) writer.add_scalar('val:total_loss', val_loss, step)
def data_loader2(args): if args.dataset == "omniglot": root_path = args.dataset_root data_root = os.path.join(root_path, args.dataset + "_%s" % args.n_shot, args.n_shot) print(data_root) train_loader, s_dlen, num_classes = omniglot_data_loader( root=data_root, batch_size=64, resize_size=32, crop_size=28) print("omniglot data_loader") elif args.dataset == "vgg": root_path = args.dataset_root data_root = os.path.join(root_path, args.dataset + "_%s" % args.n_shot, args.n_shot) print(data_root) train_loader, s_dlen, num_classes = vgg_data_loader( root=data_root, batch_size=64, resize_size=84, crop_size=64) print("vgg data_loader") elif args.dataset == "animal": root_path = args.dataset_root data_root = os.path.join(root_path, args.dataset + "_%s" % args.n_shot, args.n_shot) print(data_root) train_loader, s_dlen, num_classes = vgg_data_loader( root=data_root, batch_size=64, resize_size=84, crop_size=64) print("animal data_loader") elif args.dataset == "cub": root_path = args.dataset_root data_root = os.path.join(root_path, args.dataset + "_%s" % args.n_shot, args.n_shot) print(data_root) train_loader, s_dlen, num_classes = vgg_data_loader( root=data_root, batch_size=64, resize_size=72, crop_size=64) print("cub data_loader") else: raise Exception("Enter omniglot or vgg or animal") train_loader = iter(utils.cycle(train_loader)) return train_loader, s_dlen, num_classes
print('Preparing the dataloaders...') collate_fn = DATASET_PARAMETERS['collate_fn']( DATASET_PARAMETERS['nframe_range']) voice_loader = DataLoader(voice_dataset, shuffle=True, drop_last=True, batch_size=DATASET_PARAMETERS['batch_size'], num_workers=DATASET_PARAMETERS['workers_num'], collate_fn=collate_fn) face_loader = DataLoader(face_dataset, shuffle=True, drop_last=True, batch_size=DATASET_PARAMETERS['batch_size'], num_workers=DATASET_PARAMETERS['workers_num']) voice_iterator = iter(cycle(voice_loader)) face_iterator = iter(cycle(face_loader)) # networks, Fe, Fg, Fd (f+d), Fc (f+c) print('Initializing networks...') e_net, e_optimizer = get_network('e', NETWORKS_PARAMETERS, train=False) g_net, g_optimizer = get_network('g', NETWORKS_PARAMETERS, train=True) f_net, f_optimizer = get_network('f', NETWORKS_PARAMETERS, train=True) d_net, d_optimizer = get_network('d', NETWORKS_PARAMETERS, train=True) c_net, c_optimizer = get_network('c', NETWORKS_PARAMETERS, train=True) # label for real/fake faces real_label = torch.full((DATASET_PARAMETERS['batch_size'], 1), 1) fake_label = torch.full((DATASET_PARAMETERS['batch_size'], 1), 0) # Meters for recording the training status
def worker(gpu, P): torch.cuda.set_device(gpu) print("Use GPU: {} for training".format(gpu)) gin.parse_config_files_and_bindings([ 'configs/defaults/gan.gin', 'configs/defaults/augment.gin', P.gin_config ], []) options = get_options_dict() P.rank = P.rank * P.n_gpus_per_node + gpu dist.init_process_group(backend='nccl', init_method=f'tcp://127.0.0.1:{P.port}', world_size=P.world_size, rank=P.rank) train_set, _, image_size = get_dataset(dataset=options['dataset']) train_sampler = DistributedSampler(train_set) options['batch_size'] = options['batch_size'] // P.n_gpus_per_node drop_last = 'moco' in P.architecture train_loader = DataLoader(train_set, shuffle=False, pin_memory=True, num_workers=P.workers, batch_size=options['batch_size'], drop_last=drop_last, sampler=train_sampler) train_loader = cycle(train_loader, distributed=True) generator, discriminator = get_architecture(P.architecture, image_size, P=P) if P.resume: print(f"=> Loading checkpoint from '{P.resume}'") state_G = torch.load(f"{P.resume}/gen.pt") state_D = torch.load(f"{P.resume}/dis.pt") generator.load_state_dict(state_G) discriminator.load_state_dict(state_D) if P.finetune: print(f"=> Loading checkpoint for fine-tuning: '{P.finetune}'") state_D = torch.load(f"{P.finetune}/dis.pt") discriminator.load_state_dict(state_D, strict=False) discriminator.reset_parameters(discriminator.linear) P.comment += 'ft' generator = nn.SyncBatchNorm.convert_sync_batchnorm(generator) discriminator = nn.SyncBatchNorm.convert_sync_batchnorm(discriminator) generator = generator.cuda() discriminator = discriminator.cuda() G_optimizer = optim.Adam(generator.parameters(), lr=options["lr"], betas=options["beta"]) D_optimizer = optim.Adam(discriminator.parameters(), lr=options["lr_d"], betas=options["beta"]) if P.rank == 0: if P.resume: logger = Logger(None, resume=P.resume) else: logger = Logger(f'{P.filename}{P.comment}', subdir=f'gan/{P.gin_stem}/{P.architecture}') shutil.copy2(P.gin_config, f"{logger.logdir}/config.gin") P.logdir = logger.logdir P.eval_seed = np.random.randint(10000) else: class DummyLogger(object): def log(self, string): pass def log_dirname(self, string): pass logger = DummyLogger() if P.resume: opt = torch.load(f"{P.resume}/optim.pt") G_optimizer.load_state_dict(opt['optim_G']) D_optimizer.load_state_dict(opt['optim_D']) logger.log(f"Checkpoint loaded from '{P.resume}'") P.starting_step = opt['epoch'] + 1 else: logger.log(generator) logger.log(discriminator) logger.log( f"# Params - G: {count_parameters(generator)}, D: {count_parameters(discriminator)}" ) logger.log(options) P.starting_step = 1 if P.finetune: logger.log(f"Checkpoint loaded from '{P.finetune}'") dist.barrier() P.augment_fn = get_augment(mode=P.aug).cuda() generator = DistributedDataParallel(generator, device_ids=[gpu], broadcast_buffers=False) generator.sample_latent = generator.module.sample_latent discriminator = DistributedDataParallel(discriminator, device_ids=[gpu], broadcast_buffers=False) train(P, options, P.train_fn, models=(generator, discriminator), optimizers=(G_optimizer, D_optimizer), train_loader=train_loader, logger=logger)
def train(logdir, model_name, iterations, checkpoint_interval, batch_size, temperature, hidden_size, n_layers, rnn_cell, learning_rate, learning_rate_decay_steps, learning_rate_decay_rate): device = torch.device( 'cuda:0') if torch.cuda.is_available() else torch.device('cpu') os.makedirs(logdir, exist_ok=True) dataset = dataloader.JazzDataset() loader = DataLoader(dataset, batch_size, shuffle=True) model_class = getattr(rnn, model_name) model = model_class(hidden_size, rnn_cell, n_layers) optimizer = torch.optim.Adam(model.parameters(), learning_rate) criterion = nn.NLLLoss() scheduler = StepLR(optimizer, step_size=learning_rate_decay_steps, gamma=learning_rate_decay_rate) model = model.to(device) summary(model) loop = tqdm(range(0, iterations + 1), desc='Training', unit='Steps') for i, batch in zip(loop, cycle(loader)): scheduler.step() optimizer.zero_grad() batch = batch.to(device) # shape of (batch_size, n_steps) c_0, h_0 = model.init_hidden(batch.shape[0]) c_0 = c_0.to(device) h_0 = h_0.to(device) # TODO: Fill in below init_hidden = None hidden = init_hidden loss = 0.0 for step in range(batch.shape[1] - 1): # n_steps - 1 # TODO: Fill in below # Forward model. # x=semgent of batch, corresponds to step, # hidden=state of hidden nodes of last/or initial step pred, hidden = model(x=None, hidden=None) # TODO: Fill in below # Hint: use criterion. See torch.nn.NLLLoss() function loss += None loss.backward() optimizer.step() # print loss loop.set_postfix_str("loss: {:.3f}".format(loss)) # save model if i % checkpoint_interval == 0: torch.save( { 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'model_name': model_name, 'hparams': dict(hidden_size=hidden_size, n_layers=n_layers, rnn_cell=rnn_cell) }, os.path.join(logdir, 'model-{:d}.pt'.format(i)))
def main(): a = argparse.ArgumentParser() a.add_argument("--debug", "-D", action="store_true") a.add_argument("--loss_only", "-L", action="store_true") args = a.parse_args() print("MODEL ID: {}".format(C.id)) print("DEBUG MODE: {}".format(['OFF', 'ON'][args.debug])) if not args.debug: summary_writer = SummaryWriter(C.log_dpath) """ Load DataLoader """ dataset = MSVD(C) vocab = dataset.vocab train_data_loader = iter(cycle(dataset.train_data_loader)) print( '#vocabs: {} ({}), #words: {} ({}). Trim words which appear less than {} times.' .format(vocab.n_vocabs, vocab.n_vocabs_untrimmed, vocab.n_words, vocab.n_words_untrimmed, C.min_count)) """ Build Models """ decoder = build_decoder(vocab.n_vocabs) if C.use_recon: reconstructor = build_reconstructor() lambda_recon = torch.autograd.Variable(torch.tensor(1.), requires_grad=True) lambda_recon = lambda_recon.to(C.device) """ Train """ train_loss = 0 if C.use_recon: train_dec_loss = 0 train_rec_loss = 0 if C.reconstructor_type == "global": forward_reconstructor = forward_global_reconstructor elif C.reconstructor_type == "local": forward_reconstructor = forward_local_reconstructor else: raise NotImplementedError("Unknown reconstructor type '{}'".format( C.reconstructor_type)) for iteration, batch in enumerate(train_data_loader, 1): _, encoder_outputs, targets = batch encoder_outputs = encoder_outputs.to(C.device) targets = targets.to(C.device) targets = targets.long() target_masks = targets > C.init_word2idx['<PAD>'] # Decoder decoder['model'].train() decoder_loss, decoder_hiddens, decoder_output_indices = forward_decoder( decoder, encoder_outputs, targets, target_masks, C.decoder_teacher_forcing_ratio) # Reconstructor if C.use_recon: reconstructor['model'].train() recon_loss = forward_reconstructor(decoder_hiddens, encoder_outputs, reconstructor) # Loss if C.use_recon: loss = decoder_loss + lambda_recon * recon_loss else: loss = decoder_loss # Backprop decoder['optimizer'].zero_grad() if C.use_recon: reconstructor['optimizer'].zero_grad() loss.backward() if C.use_gradient_clip: torch.nn.utils.clip_grad_norm_(decoder['model'].parameters(), C.gradient_clip) decoder['optimizer'].step() if C.use_recon: reconstructor['optimizer'].step() train_dec_loss += decoder_loss.item() train_rec_loss += recon_loss.item() train_loss += loss.item() """ Log Train Progress """ if args.debug or iteration % C.log_every == 0: n_trains = C.log_every * C.batch_size train_loss /= n_trains if C.use_recon: train_dec_loss /= n_trains train_rec_loss /= n_trains if not args.debug: summary_writer.add_scalar(C.tx_train_loss, train_loss, iteration) summary_writer.add_scalar(C.tx_lambda_decoder, decoder['lambda_reg'].item(), iteration) if C.use_recon: summary_writer.add_scalar(C.tx_train_loss_decoder, train_dec_loss, iteration) summary_writer.add_scalar(C.tx_train_loss_reconstructor, train_rec_loss, iteration) summary_writer.add_scalar( C.tx_lambda_reconstructor, reconstructor['lambda_reg'].item(), iteration) summary_writer.add_scalar(C.tx_lambda, lambda_recon.item(), iteration) msg = "Iter {} / {} ({:.1f}%): loss {:.5f}".format( iteration, C.n_iterations, iteration / C.n_iterations * 100, train_loss) if C.use_recon: msg += " (dec {:.5f} + rec {:.5f})".format( train_dec_loss, train_rec_loss) print(msg) train_loss = 0 if C.use_recon: train_dec_loss = 0 train_rec_loss = 0 """ Log Validation Progress """ if args.debug or iteration % C.validate_every == 0: val_loss = 0 if C.use_recon: val_dec_loss = 0 val_rec_loss = 0 gt_captions = [] pd_captions = [] val_data_loader = iter(dataset.val_data_loader) for batch in val_data_loader: _, encoder_outputs, targets = batch encoder_outputs = encoder_outputs.to(C.device) targets = targets.to(C.device) targets = targets.long() target_masks = targets > C.init_word2idx['<PAD>'] # Decoder decoder['model'].eval() decoder_loss, decoder_hiddens, decoder_output_indices = forward_decoder( decoder, encoder_outputs, targets, target_masks) # Reconstructor if C.use_recon: reconstructor['model'].eval() recon_loss = forward_reconstructor(decoder_hiddens, encoder_outputs, reconstructor) # Loss if C.use_recon: loss = decoder_loss + lambda_recon * recon_loss else: loss = decoder_loss if C.use_recon: val_dec_loss += decoder_loss.item() * C.batch_size val_rec_loss += recon_loss.item() * C.batch_size val_loss += loss.item() * C.batch_size _, _, targets = batch gt_idxs = targets.cpu().numpy() pd_idxs = decoder_output_indices.cpu().numpy() gt_captions += convert_idxs_to_sentences( gt_idxs, vocab.idx2word, vocab.word2idx['<EOS>']) pd_captions += convert_idxs_to_sentences( pd_idxs, vocab.idx2word, vocab.word2idx['<EOS>']) n_vals = len(val_data_loader) * C.batch_size val_loss /= n_vals if C.use_recon: val_dec_loss /= n_vals val_rec_loss /= n_vals msg = "[Validation] Iter {} / {} ({:.1f}%): loss {:.5f}".format( iteration, C.n_iterations, iteration / C.n_iterations * 100, val_loss) if C.use_recon: msg += " (dec {:.5f} + rec {:5f})".format( val_dec_loss, val_rec_loss) print(msg) if not args.debug: summary_writer.add_scalar(C.tx_val_loss, val_loss, iteration) if C.use_recon: summary_writer.add_scalar(C.tx_val_loss_decoder, val_dec_loss, iteration) summary_writer.add_scalar(C.tx_val_loss_reconstructor, val_rec_loss, iteration) caption_pairs = [(gt, pred) for gt, pred in zip(gt_captions, pd_captions)] caption_log = "\n\n".join([ "[GT] {} \n[PD] {}".format(gt, pd) for gt, pd in caption_pairs ]) summary_writer.add_text(C.tx_predicted_captions, caption_log, iteration) """ Log Test Progress """ if not args.loss_only and (args.debug or iteration % C.test_every == 0): pd_vid_caption_pairs = [] score_data_loader = dataset.score_data_loader print("[Test] Iter {} / {} ({:.1f}%)".format( iteration, C.n_iterations, iteration / C.n_iterations * 100)) for search_method in C.search_methods: if isinstance(search_method, str): method = search_method search_method_id = search_method if isinstance(search_method, tuple): method = search_method[0] search_method_id = "-".join( (str(s) for s in search_method)) scores = evaluate(C, dataset, score_data_loader, decoder['model'], search_method) score_summary = " ".join([ "{}: {:.3f}".format(score, scores[score]) for score in C.scores ]) print("\t{}: {}".format(search_method_id, score_summary)) if not args.debug: for score in C.scores: summary_writer.add_scalar( C.tx_score[search_method_id][score], scores[score], iteration) """ Save checkpoint """ if iteration % C.save_every == 0: if not os.path.exists(C.save_dpath): os.makedirs(C.save_dpath) ckpt_fpath = os.path.join(C.save_dpath, "{}_checkpoint.tar".format(iteration)) if C.use_recon: torch.save( { 'iteration': iteration, 'dec': decoder['model'].state_dict(), 'rec': reconstructor['model'].state_dict(), 'dec_opt': decoder['optimizer'].state_dict(), 'rec_opt': reconstructor['optimizer'].state_dict(), 'loss': loss, 'config': C, }, ckpt_fpath) else: torch.save( { 'iteration': iteration, 'dec': decoder['model'].state_dict(), 'dec_opt': decoder['optimizer'].state_dict(), 'loss': loss, 'config': C, }, ckpt_fpath) if iteration == C.n_iterations: break
def main(): a = argparse.ArgumentParser() a.add_argument("--debug", "-D", action="store_true") a.add_argument("--loss_only", "-L", action="store_true") args = a.parse_args() print("MODEL ID: {}".format(C.id)) print("DEBUG MODE: {}".format(['OFF', 'ON'][args.debug])) if not args.debug: summary_writer = SummaryWriter(C.log_dpath) """ Load DataLoader """ MSVD = _MSVD(C) vocab = MSVD.vocab train_data_loader = iter(cycle(MSVD.train_data_loader)) val_data_loader = iter(cycle(MSVD.val_data_loader)) print('n_vocabs: {} ({}), n_words: {} ({}). MIN_COUNT: {}'.format( vocab.n_vocabs, vocab.n_vocabs_untrimmed, vocab.n_words, vocab.n_words_untrimmed, C.min_count)) """ Build Decoder """ decoder = Decoder(model_name=C.decoder_model, n_layers=C.decoder_n_layers, encoder_size=C.encoder_output_size, embedding_size=C.embedding_size, embedding_scale=C.embedding_scale, hidden_size=C.decoder_hidden_size, attn_size=C.decoder_attn_size, output_size=vocab.n_vocabs, embedding_dropout=C.embedding_dropout, dropout=C.decoder_dropout, out_dropout=C.decoder_out_dropout) decoder = decoder.to(C.device) decoder_loss_func = nn.CrossEntropyLoss() decoder_optimizer = optim.Adam(decoder.parameters(), lr=C.decoder_learning_rate, weight_decay=C.decoder_weight_decay, amsgrad=C.decoder_use_amsgrad) decoder_lambda = torch.autograd.Variable(torch.tensor(0.001), requires_grad=True) decoder_lambda = decoder_lambda.to(C.device) """ Build Reconstructor """ if C.use_recon: if C.reconstructor_type == "local": reconstructor = LocalReconstructor( model_name=C.reconstructor_model, n_layers=C.reconstructor_n_layers, decoder_hidden_size=C.decoder_hidden_size, hidden_size=C.reconstructor_hidden_size, dropout=C.reconstructor_dropout, decoder_dropout=C.reconstructor_decoder_dropout, attn_size=C.reconstructor_attn_size) elif C.reconstructor_type == "global": reconstructor = GlobalReconstructor( model_name=C.reconstructor_model, n_layers=C.reconstructor_n_layers, decoder_hidden_size=C.decoder_hidden_size, hidden_size=C.reconstructor_hidden_size, dropout=C.reconstructor_dropout, decoder_dropout=C.reconstructor_decoder_dropout, caption_max_len=C.caption_max_len) else: raise NotImplementedError("Unknown reconstructor: {}".format( C.reconstructor_type)) reconstructor = reconstructor.to(C.device) reconstructor_loss_func = nn.MSELoss() reconstructor_optimizer = optim.Adam( reconstructor.parameters(), lr=C.reconstructor_learning_rate, weight_decay=C.reconstructor_weight_decay, amsgrad=C.reconstructor_use_amsgrad) reconstructor_lambda = torch.autograd.Variable(torch.tensor(0.01), requires_grad=True) reconstructor_lambda = reconstructor_lambda.to(C.device) loss_lambda = torch.autograd.Variable(torch.tensor(1.), requires_grad=True) loss_lambda = loss_lambda.to(C.device) """ Train """ train_loss = 0 if C.use_recon: train_dec_loss = 0 train_rec_loss = 0 for iteration, batch in enumerate(train_data_loader, 1): if C.use_recon: loss, decoder_loss, _, recon_loss = dec_rec_step( batch, decoder, decoder_loss_func, decoder_lambda, decoder_optimizer, reconstructor, reconstructor_loss_func, reconstructor_lambda, reconstructor_optimizer, loss_lambda, is_train=True) train_dec_loss += decoder_loss train_rec_loss += recon_loss else: loss, _ = dec_step(batch, decoder, decoder_loss_func, decoder_lambda, decoder_optimizer, is_train=True) train_loss += loss """ Log Train Progress """ if args.debug or iteration % C.log_every == 0: train_loss /= C.log_every if C.use_recon: train_dec_loss /= C.log_every train_rec_loss /= C.log_every if not args.debug: summary_writer.add_scalar(C.tx_train_loss, train_loss, iteration) summary_writer.add_scalar(C.tx_lambda_decoder, decoder_lambda.item(), iteration) if C.use_recon: summary_writer.add_scalar(C.tx_train_loss_decoder, train_dec_loss, iteration) summary_writer.add_scalar(C.tx_train_loss_reconstructor, train_rec_loss, iteration) summary_writer.add_scalar(C.tx_lambda_reconstructor, reconstructor_lambda.item(), iteration) summary_writer.add_scalar(C.tx_lambda, loss_lambda.item(), iteration) if C.use_recon: print( "Iter {} / {} ({:.1f}%): loss {:.5f} (dec {:.5f} + rec {:.5f})" .format(iteration, C.train_n_iteration, iteration / C.train_n_iteration * 100, train_loss, train_dec_loss, train_rec_loss)) else: print("Iter {} / {} ({:.1f}%): loss {:.5f}".format( iteration, C.train_n_iteration, iteration / C.train_n_iteration * 100, train_loss)) train_loss = 0 if C.use_recon: train_dec_loss = 0 train_rec_loss = 0 """ Log Validation Progress """ if args.debug or iteration % C.validate_every == 0: val_loss = 0 val_dec_loss = 0 val_rec_loss = 0 gt_captions = [] pd_captions = [] for batch in val_data_loader: if C.use_recon: loss, decoder_loss, decoder_output_indices, recon_loss = dec_rec_step( batch, decoder, decoder_loss_func, decoder_lambda, decoder_optimizer, reconstructor, reconstructor_loss_func, reconstructor_lambda, reconstructor_optimizer, loss_lambda, is_train=False) val_dec_loss += decoder_loss * C.batch_size val_rec_loss += recon_loss * C.batch_size else: loss, decoder_output_indices = dec_step(batch, decoder, decoder_loss_func, decoder_lambda, decoder_optimizer, is_train=False) val_loss += loss * C.batch_size _, _, targets = batch gt_idxs = targets.cpu().numpy() pd_idxs = decoder_output_indices.cpu().numpy() gt_captions += convert_idxs_to_sentences( gt_idxs, vocab.idx2word, vocab.word2idx['<EOS>']) pd_captions += convert_idxs_to_sentences( pd_idxs, vocab.idx2word, vocab.word2idx['<EOS>']) if len(pd_captions) >= C.n_val: assert len(gt_captions) == len(pd_captions) gt_captions = gt_captions[:C.n_val] pd_captions = pd_captions[:C.n_val] break val_loss /= C.n_val val_dec_loss /= C.n_val val_rec_loss /= C.n_val if C.use_recon: print( "[Validation] Iter {} / {} ({:.1f}%): loss {:.5f} (dec {:.5f} + rec {:5f})" .format(iteration, C.train_n_iteration, iteration / C.train_n_iteration * 100, val_loss, val_dec_loss, val_rec_loss)) else: print( "[Validation] Iter {} / {} ({:.1f}%): loss {:.5f}".format( iteration, C.train_n_iteration, iteration / C.train_n_iteration * 100, val_loss)) caption_pairs = [(gt, pred) for gt, pred in zip(gt_captions, pd_captions)] caption_pairs = sample_n(caption_pairs, min(C.n_val_logs, C.batch_size)) caption_log = "\n\n".join([ "[GT] {} \n[PD] {}".format(gt, pd) for gt, pd in caption_pairs ]) if not args.debug: summary_writer.add_scalar(C.tx_val_loss, val_loss, iteration) if C.use_recon: summary_writer.add_scalar(C.tx_val_loss_decoder, val_dec_loss, iteration) summary_writer.add_scalar(C.tx_val_loss_reconstructor, val_rec_loss, iteration) summary_writer.add_text(C.tx_predicted_captions, caption_log, iteration) """ Log Test Progress """ if not args.loss_only and (args.debug or iteration % C.test_every == 0): pd_vid_caption_pairs = [] score_data_loader = MSVD.score_data_loader print("[Test] Iter {} / {} ({:.1f}%)".format( iteration, C.train_n_iteration, iteration / C.train_n_iteration * 100)) for search_method in C.search_methods: if isinstance(search_method, str): method = search_method search_method_id = search_method if isinstance(search_method, tuple): method = search_method[0] search_method_id = "-".join( (str(s) for s in search_method)) scores = evaluate(C, MSVD, score_data_loader, decoder, search_method) score_summary = " ".join([ "{}: {:.3f}".format(score, scores[score]) for score in C.scores ]) print("\t{}: {}".format(search_method_id, score_summary)) if not args.debug: for score in C.scores: summary_writer.add_scalar( C.tx_score[search_method_id][score], scores[score], iteration) """ Save checkpoint """ if iteration % C.save_every == 0: if not os.path.exists(C.save_dpath): os.makedirs(C.save_dpath) fpath = os.path.join(C.save_dpath, "{}_checkpoint.tar".format(iteration)) if C.use_recon: torch.save( { 'iteration': iteration, 'dec': decoder.state_dict(), 'rec': reconstructor.state_dict(), 'dec_opt': decoder_optimizer.state_dict(), 'rec_opt': reconstructor_optimizer.state_dict(), 'loss': loss, 'config': C, }, fpath) else: torch.save( { 'iteration': iteration, 'dec': decoder.state_dict(), 'dec_opt': decoder_optimizer.state_dict(), 'loss': loss, 'config': C, }, fpath) if iteration == C.train_n_iteration: break
def main(): parser = argparse.ArgumentParser() parser.add_argument('-c', '--continue', dest='continue_path', type=str, required=False) parser.add_argument('-g', '--gpu_ids', type=int, default=0, required=False) args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_ids) config.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") config.isTrain = True if not os.path.exists('train_log'): os.symlink(config.exp_dir, 'train_log') # get dataset train_loader = get_dataloader("train", batch_size=config.batch_size) val_loader = get_dataloader("test", batch_size=config.batch_size) val_cycle = cycle(val_loader) dataset_size = len(train_loader) print('The number of training motions = %d' % (dataset_size * config.batch_size)) # create tensorboard writer train_tb = SummaryWriter(os.path.join(config.log_dir, 'train.events')) val_tb = SummaryWriter(os.path.join(config.log_dir, 'val.events')) # get model net = CycleGANModel(config) net.print_networks(True) # start training clock = TrainClock() net.train() for e in range(config.nr_epochs): # begin iteration pbar = tqdm(train_loader) for b, data in enumerate(pbar): net.train() net.set_input( data) # unpack data from dataset and apply preprocessing net.optimize_parameters( ) # calculate loss functions, get gradients, update network weights # get loss losses_values = net.get_current_losses() # update tensorboard train_tb.add_scalars('train_loss', losses_values, global_step=clock.step) # visualize if clock.step % config.visualize_frequency == 0: motion_dict = net.infer() for k, v in motion_dict.items(): phase = 'h' if k[-1] == 'A' else 'nh' motion3d = train_loader.dataset.preprocess_inv( v.detach().cpu().numpy()[0], phase) img = plot_motion(motion3d, phase) train_tb.add_image(k, img, global_step=clock.step) pbar.set_description("EPOCH[{}][{}/{}]".format( e, b, len(train_loader))) pbar.set_postfix(OrderedDict(losses_values)) # validation if clock.step % config.val_frequency == 0: net.eval() data = next(val_cycle) net.set_input(data) net.forward() losses_values = net.get_current_losses() val_tb.add_scalars('val_loss', losses_values, global_step=clock.step) # visualize if clock.step % config.visualize_frequency == 0: motion_dict = net.infer() for k, v in motion_dict.items(): phase = 'h' if k[-1] == 'A' else 'nh' motion3d = val_loader.dataset.preprocess_inv( v.detach().cpu().numpy()[0], phase) img = plot_motion(motion3d, phase) val_tb.add_image(k, img, global_step=clock.step) clock.tick() # leraning_rate to tensorboarrd lr = net.optimizers[0].param_groups[0]['lr'] train_tb.add_scalar("learning_rate", lr, global_step=clock.step) if clock.epoch % config.save_frequency == 0: net.save_networks(epoch=e) clock.tock() net.update_learning_rate( ) # update learning rates at the end of every epoch.
def train(self): # build model self.build_model() loader = data_loader(self.data_root, self.batch_size, img_size=512) loader = iter(cycle(loader)) mean_path_length = torch.tensor(0.0).to(dev) average_path_length = torch.tensor(0.0).to(dev) for iters in tqdm(range(self.max_iter + 1)): real_img = next(loader) real_img = real_img.to(dev) # ===============================================================# # 1. Train the discriminator # # ===============================================================# self.set_phase(mode="train") self.reset_grad() # Compute loss with real images. d_real_out = self.D(real_img) d_loss_real = F.softplus(-d_real_out).mean() # Compute loss with face images. z = torch.randn(2 * self.batch_size, self.z_dim).to(dev) w = self.M(z) dlatents_in = make_latents(w, self.batch_size, len(self.channel_list)) fake_img, _ = self.G(dlatents_in) d_fake_out = self.D(fake_img.detach()) d_loss_fake = F.softplus(d_fake_out).mean() d_loss = d_loss_real + d_loss_fake if iters % self.r1_iter == 0: real_img.requires_grad = True d_real_out = self.D(real_img) r1_loss = self.r1_regularization(d_real_out, real_img) r1_loss = self.r1_lambda / 2 * r1_loss * self.r1_iter d_loss = d_loss + r1_loss d_loss.backward() self.d_optimizer.step() # ===============================================================# # 2. Train the Generator # # ===============================================================# if (iters + 1) % self.n_critic == 0: self.reset_grad() # Compute loss with fake images. z = torch.randn(2 * self.batch_size, self.z_dim).to(dev) w = self.M(z) dlatents_in = make_latents(w, self.batch_size, len(self.channel_list)) fake_img, _ = self.G(dlatents_in) d_fake_out = self.D(fake_img) g_loss = F.softplus(-d_fake_out).mean() if iters % self.ppl_iter == 0: path_loss, mean_path_length, path_length = self.path_length_regularization( fake_img, dlatents_in, mean_path_length) path_loss = path_loss * self.ppl_iter * self.ppl_lambda g_loss = g_loss + path_loss mean_path_length = mean_path_length.mean() average_path_length += mean_path_length.mean() # Backward and optimize. g_loss.backward() self.g_optimizer.step() # ===============================================================# # 3. Save parameters and images # # ===============================================================# # self.lr_update() torch.cuda.synchronize() self.set_phase(mode="test") self.exponential_moving_average() # Print total loss if iters % self.print_loss_iter == 0: print( "Iter : [%d/%d], D_loss : [%.3f, %.3f, %.3f.], G_loss : %.3f, R1_reg : %.3f, " "PPL_reg : %.3f, Path_length : %.3f" % (iters, self.max_iter, d_loss.item(), d_loss_real.item(), d_loss_fake.item(), g_loss.item(), r1_loss.item(), path_loss.item(), mean_path_length.item())) # Save generated images. if iters % self.save_image_iter == 0: fixed_w = self.M(self.fixed_z) self.save_img(iters, fixed_w) # Save the G and D parameters. if iters % self.save_parameter_iter == 0: self.save_model(iters) # Save the logs on the tensorboard. if iters % self.save_log_iter == 0: self.writer.add_scalar('g_loss/g_loss', g_loss.item(), iters) self.writer.add_scalar('d_loss/d_loss_total', d_loss.item(), iters) self.writer.add_scalar('d_loss/d_loss_real', d_loss_real.item(), iters) self.writer.add_scalar('d_loss/d_loss_fake', d_loss_fake.item(), iters) self.writer.add_scalar('reg/r1_regularization', r1_loss.item(), iters) self.writer.add_scalar('reg/ppl_regularization', path_loss.item(), iters) self.writer.add_scalar('length/path_length', mean_path_length.item(), iters) self.writer.add_scalar( 'length/avg_path_length', average_path_length.item() / (iters // self.ppl_iter + 1), iters)
train_x, val_x, test_x = data.normalize([train_x, val_x, test_x]) train_xx = th.split(train_x, [len(x) for x in train_xx]) train_datasets = [D.TensorDataset(x) for x in train_xx] train_loader = D.DataLoader(D.TensorDataset(train_x, train_y), args.bsi) val_loader = D.DataLoader(D.TensorDataset(val_x, val_y), args.bsi) test_loader = D.DataLoader(D.TensorDataset(test_x, test_y), args.bsi) pclass_list = [len(y) / len(train_y) for y in train_yy] n_classes = len(train_yy) if len(args.bst) == n_classes: bs_list = args.bst elif len(args.bst) == 1: bs_list = [args.bst[0]] * n_classes else: raise RuntimeError() train_loaders = [utils.cycle(D.DataLoader(ds, bs, shuffle=True)) \ for ds, bs in zip(train_datasets, bs_list)] if args.model == 'linear': model = th.nn.Linear(train_x.size(1), n_classes) elif args.model == 'mlp': model = mlp.MLP([train_x.size(1), 64, 64, 64, n_classes], th.relu, bn=True) elif args.model == 'resnet': model = resnet.ResNet(18, n_classes)[args.model] else: raise RuntimeError() dev = th.device('cpu') if args.gpu < 0 else th.device('cuda:%d' % args.gpu) model = model.to(dev) params = list(model.parameters()) kwargs = {'params' : params, 'lr' : args.lr, 'weight_decay' : args.wd} opt = {'sgd' : optim.SGD(**kwargs),
def __init__(self, cfg, local_cfg): self.cfg = cfg self.local_cfg = local_cfg self.device = torch.device(self.local_cfg.gpu) # setup models self.cfg.model.gen.shape = self.cfg.dataset.shape self.cfg.model.dis.shape = self.cfg.dataset.shape self.G = define_G(self.cfg) self.D = define_D(self.cfg) self.G_ema = define_G(self.cfg) self.G_ema.eval() ema_inplace(self.G_ema, self.G, 0.0) self.A = DiffAugment(policy=self.cfg.solver.augment) self.lidar = LiDAR( num_ring=cfg.dataset.shape[0], num_points=cfg.dataset.shape[1], min_depth=cfg.dataset.min_depth, max_depth=cfg.dataset.max_depth, angle_file=osp.join(cfg.dataset.root, "angles.pt"), ) self.lidar.eval() self.G.to(self.device) self.D.to(self.device) self.G_ema.to(self.device) self.A.to(self.device) self.lidar.to(self.device) self.G = DDP(self.G, device_ids=[self.local_cfg.gpu], broadcast_buffers=False) self.D = DDP(self.D, device_ids=[self.local_cfg.gpu], broadcast_buffers=False) if dist.get_rank() == 0: print("minibatch size per gpu:", self.local_cfg.batch_size) print("number of gradient accumulation:", self.cfg.solver.num_accumulation) self.ema_decay = 0.5**(self.cfg.solver.batch_size / (self.cfg.solver.smoothing_kimg * 1000)) # training dataset self.dataset = define_dataset(self.cfg.dataset, phase="train") self.loader = torch.utils.data.DataLoader( self.dataset, batch_size=self.local_cfg.batch_size, shuffle=False, num_workers=self.local_cfg.num_workers, pin_memory=self.cfg.pin_memory, sampler=torch.utils.data.distributed.DistributedSampler( self.dataset), drop_last=True, ) self.loader = cycle(self.loader) # validation dataset self.val_dataset = define_dataset(self.cfg.dataset, phase="val") self.val_loader = torch.utils.data.DataLoader( self.val_dataset, batch_size=self.local_cfg.batch_size, shuffle=True, num_workers=self.local_cfg.num_workers, pin_memory=self.cfg.pin_memory, drop_last=False, ) # loss criterion self.loss_weight = dict(self.cfg.solver.loss) self.criterion = {} self.criterion["gan"] = GANLoss(self.cfg.solver.gan_mode).to( self.device) if "gp" in self.loss_weight and self.loss_weight["gp"] > 0.0: self.criterion["gp"] = True if "pl" in self.loss_weight and self.loss_weight["pl"] > 0.0: self.criterion["pl"] = True self.pl_ema = torch.tensor(0.0).to(self.device) if dist.get_rank() == 0: print("loss: {}".format(tuple(self.criterion.keys()))) # optimizer self.optim_G = optim.Adam( params=self.G.parameters(), lr=self.cfg.solver.lr.alpha.gen, betas=(self.cfg.solver.lr.beta1, self.cfg.solver.lr.beta2), ) self.optim_D = optim.Adam( params=self.D.parameters(), lr=self.cfg.solver.lr.alpha.dis, betas=(self.cfg.solver.lr.beta1, self.cfg.solver.lr.beta2), ) # automatic mixed precision self.enable_amp = cfg.enable_amp self.scaler = torch.cuda.amp.GradScaler(enabled=self.enable_amp) if dist.get_rank() == 0 and self.enable_amp: print("amp enabled") # resume from checkpoints self.start_iteration = 0 if self.cfg.resume is not None: state_dict = torch.load(self.cfg.resume, map_location="cpu") self.start_iteration = state_dict[ "step"] // self.cfg.solver.batch_size self.G.module.load_state_dict(state_dict["G"]) self.D.module.load_state_dict(state_dict["D"]) self.G_ema.load_state_dict(state_dict["G_ema"]) self.optim_G.load_state_dict(state_dict["optim_G"]) self.optim_D.load_state_dict(state_dict["optim_D"]) if "pl" in self.criterion: self.criterion["pl"].pl_ema = state_dict["pl_ema"].to( self.device) # for visual validation self.fixed_noise = torch.randn(self.local_cfg.batch_size, cfg.model.gen.in_ch, device=self.device)
def iter_train_epoch(self): loader_acoustic = self.batchiter_acoustic loader = iter(cycle(self.batchiter_train)) self.model.train() timer = utils.Timer() timer.tic() tot_loss = 0. tot_phone = 0 tot_token = 0 tot_sequence = 0 tot_phone_acoustic = 0 tot_sequence_acoustic = 0 n_accu_batch = self.accumulate_grad_batch tot_iter_num = len(loader_acoustic) for niter, ((utts_acoustic, data_acoustic), (utts, data)) in enumerate(zip(loader_acoustic, loader)): niter += 1 if n_accu_batch == self.accumulate_grad_batch: self.optimizer.zero_grad() feats_acoustic, len_feat_acoustic, phones_acoustic, len_phone_acoustic = \ (i.to(self.device) for i in data_acoustic) feats, len_feat, phones, len_phone, target_in, targets, paddings = \ (i.to(self.device) for i in data) if niter == 1 and self.epoch == 1: print( 'feats_acoustic:\t{}\nlen_feat_acoustic:\t{}\nphones_acoustic:\t{}\nlen_phone_acoustic:\t{}' .format(feats_acoustic.size(), len_feat_acoustic.size(), phones_acoustic.size(), len_phone_acoustic.size())) print( 'feats_acoustic:\n{}\nlen_feat_acoustic:\t{}\nphones_acoustic:\t{}\nlen_phone_acoustic:\t{}' .format(feats_acoustic[0], len_feat_acoustic[0], phones_acoustic[0], len_phone_acoustic[0])) print( 'feats:\t{}\nlen_feat:\t{}\nphones:\t{}\nlen_phone:\t{}\ntargets:\t{}\npaddings:\t{}' .format(feats.size(), len_feat.size(), phones.size(), len_phone.size(), target_in.size(), targets.size(), paddings.size())) print( 'feats:\n{}\nlen_feat:\t{}\nphones:\t{}\nlen_phone:\t{}\ntarget_in:\t{}\ntargets:\t{}\npaddings:\t{}' .format(feats[0], len_feat[0], phones[0], len_phone[0], target_in[0], targets[0], paddings[0])) timer.tic() # general acoustic loss n_phone_acoustic = len_phone_acoustic.sum() tot_phone_acoustic += n_phone_acoustic n_sequence_acoustic = len(utts_acoustic) tot_sequence_acoustic += n_sequence_acoustic loss_ctc_acoustic, loss_qua_acoustic, loss_ce_phone_acoustic = \ self.model(feats_acoustic, len_feat_acoustic, phones_acoustic, len_phone_acoustic, label_smooth=self.label_smooth) print(timer.toc()) loss_ce_phone_acoustic = loss_ce_phone_acoustic.sum( ) / n_phone_acoustic loss_ctc_acoustic = loss_ctc_acoustic.sum() / n_sequence_acoustic loss_qua_acoustic = loss_qua_acoustic.sum() / n_sequence_acoustic loss_acoustic = loss_ce_phone_acoustic + \ self.lambda_qua * loss_qua_acoustic + \ self.lambda_ctc * loss_ctc_acoustic loss_acoustic.backward() loss_ctc, loss_qua, loss_ce_phone, loss_ce_target = \ self.model(feats, len_feat, phones, len_phone, target_in, targets, paddings, label_smooth=self.label_smooth) print(timer.toc()) n_phone = len_phone.sum() n_token = torch.sum(1 - paddings).float() tot_phone += n_phone tot_token += n_token n_sequence = len(utts) tot_sequence += n_sequence loss_ce_phone = loss_ce_phone.sum() / n_phone loss_ce_target = loss_ce_target.sum() / n_token loss_ctc = loss_ctc.sum() / n_sequence loss_qua = loss_qua.sum() / n_sequence loss = loss_ce_phone + loss_ce_target + \ self.lambda_qua * loss_qua + \ self.lambda_ctc * loss_ctc loss.backward() tot_loss += loss n_accu_batch -= 1 if n_accu_batch == 0 or niter == tot_iter_num: self.step += 1 # to be consistant with metric clip_grad_norm_(self.model.parameters(), self.grad_max_norm) self.lr_scheduler.step() # then, update learning rate self.lr_scheduler.set_lr(self.optimizer, self.init_lr) self.optimizer.step() n_accu_batch = self.accumulate_grad_batch else: continue if niter % self.print_inteval == 0: print( '''Epoch {} | Step {} | acoustic {}/{} {} | target {} | lr: {:.3e} | sent/sec: {:.1f} acoustic cur_all_loss: {:.3f} loss_ce_phone: {:.3f} loss_ctc: {:.3f} loss_qua: {:.3f} target cur_all_loss: {:.3f} loss_ce_phone: {:.3f} loss_ctc: {:.3f} loss_qua: {:.3f} loss_ce_char: {:.3f} '''.format( self.epoch, self.step, niter, tot_iter_num, list(feats_acoustic.size()), list(feats.size()), list(self.optimizer.param_groups)[0]["lr"], tot_sequence_acoustic / timer.toc(), loss_acoustic, loss_ce_phone_acoustic, loss_ctc_acoustic, loss_qua_acoustic, loss, loss_ce_phone, loss_ctc, loss_qua, loss_ce_target, ), flush=True) torch.cuda.empty_cache() time.sleep(2) return (tot_loss / tot_token).item()
def main(): # load table data df_train = pd.read_csv("../input/train_curated.csv") df_noisy = pd.read_csv("../input/train_noisy.csv") df_test = pd.read_csv("../input/sample_submission.csv") labels = df_test.columns[1:].tolist() for label in labels: df_train[label] = df_train['labels'].apply(lambda x: label in x) df_noisy[label] = df_noisy['labels'].apply(lambda x: label in x) df_train['path'] = "../input/mel128/train/" + df_train['fname'] df_test['path'] = "../input/mel128/test/" + df_train['fname'] df_noisy['path'] = "../input/mel128/noisy/" + df_noisy['fname'] # calc sampling weight df_train['weight'] = 1 df_noisy['weight'] = len(df_train) / len(df_noisy) # generate pseudo label with sharpening tmp = np.load("../input/pseudo_label/preds_noisy.npy").mean(axis=(0, 1)) tmp = tmp**TEMPERATURE tmp = tmp / tmp.sum(axis=1)[:, np.newaxis] df_noisy_pseudo = df_noisy.copy() df_noisy_pseudo[labels] = tmp # fold splitting folds = list( KFold(n_splits=NUM_FOLD, shuffle=True, random_state=SEED).split(np.arange(len(df_train)))) folds_noisy = list( KFold(n_splits=NUM_FOLD, shuffle=True, random_state=SEED).split(np.arange(len(df_noisy)))) # Training log_columns = [ 'epoch', 'bce', 'lwlrap', 'bce_noisy', 'lwlrap_noisy', 'semi_mse', 'val_bce', 'val_lwlrap', 'time' ] for fold, (ids_train_split, ids_valid_split) in enumerate(folds): if fold + 1 not in FOLD_LIST: continue print("fold: {}".format(fold + 1)) train_log = pd.DataFrame(columns=log_columns) # build model model = ResNet(NUM_CLASS).cuda() model.load_state_dict( torch.load("{}/weight_fold_{}_epoch_512.pth".format( LOAD_DIR, fold + 1))) # prepare data loaders df_train_fold = df_train.iloc[ids_train_split].reset_index(drop=True) dataset_train = MelDataset( df_train_fold['path'], df_train_fold[labels].values, crop=CROP_LENGTH, crop_mode='additional', crop_rate=CROP_RATE, mixup=True, freqmask=True, gain=True, ) train_loader = DataLoader( dataset_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True, ) df_valid = df_train.iloc[ids_valid_split].reset_index(drop=True) dataset_valid = MelDataset( df_valid['path'], df_valid[labels].values, ) valid_loader = DataLoader( dataset_valid, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, ) dataset_noisy = MelDataset( df_noisy['path'], df_noisy[labels].values, crop=CROP_LENGTH, crop_mode='additional', crop_rate=CROP_RATE, mixup=True, freqmask=True, gain=True, ) noisy_loader = DataLoader( dataset_noisy, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True, ) noisy_itr = cycle(noisy_loader) df_semi = pd.concat([ df_train.iloc[ids_train_split], df_noisy_pseudo.iloc[folds_noisy[fold][0]] ]).reset_index(drop=True) semi_sampler = torch.utils.data.sampler.WeightedRandomSampler( df_semi['weight'].values, len(df_semi)) dataset_semi = MelDataset( df_semi['path'], df_semi[labels].values, crop=CROP_LENGTH, crop_mode='additional', crop_rate=CROP_RATE, mixup=True, freqmask=True, gain=True, ) semi_loader = DataLoader( dataset_semi, batch_size=BATCH_SIZE, shuffle=False, num_workers=1, pin_memory=True, sampler=semi_sampler, ) semi_itr = cycle(semi_loader) # set optimizer and loss optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LR[0]) scheduler = CosineLR(optimizer, step_size_min=LR[1], t0=len(train_loader) * NUM_CYCLE, tmult=1) # training for epoch in range(NUM_EPOCH): # train for one epoch bce, lwlrap, bce_noisy, lwlrap_noisy, mse_semi = train( (train_loader, noisy_itr, semi_itr), model, optimizer, scheduler, epoch) # evaluate on validation set val_bce, val_lwlrap = validate(valid_loader, model) # print log endtime = time.time() - starttime print("Epoch: {}/{} ".format(epoch + 1, NUM_EPOCH) + "CE: {:.4f} ".format(bce) + "LwLRAP: {:.4f} ".format(lwlrap) + "Noisy CE: {:.4f} ".format(bce_noisy) + "Noisy LWLRAP: {:.4f} ".format(lwlrap_noisy) + "Semi MSE: {:.4f} ".format(mse_semi) + "Valid CE: {:.4f} ".format(val_bce) + "Valid LWLRAP: {:.4f} ".format(val_lwlrap) + "sec: {:.1f}".format(endtime)) # save log and weights train_log_epoch = pd.DataFrame([[ epoch + 1, bce, lwlrap, bce_noisy, lwlrap_noisy, mse_semi, val_bce, val_lwlrap, endtime ]], columns=log_columns) train_log = pd.concat([train_log, train_log_epoch]) train_log.to_csv("{}/train_log_fold{}.csv".format( OUTPUT_DIR, fold + 1), index=False) if (epoch + 1) % NUM_CYCLE == 0: torch.save( model.state_dict(), "{}/weight_fold_{}_epoch_{}.pth".format( OUTPUT_DIR, fold + 1, epoch + 1))
def update(self, delta_time, scene): # Destroy projectile by radius if self.radius == 0: self.destroy() return # Calculate the new position percentage = self.radius / self.max_radius speed = 0.47 * percentage * delta_time self.x = clamp(self.x + speed * math.cos(math.radians(self.angle)), self.radius, 250 - self.radius) self.y = clamp(self.y + speed * math.sin(math.radians(self.angle)), 0, 350) # Reverse projectile direction when the projectile collides with the sides if self.x == self.radius or self.x == 250 - self.radius: self.angle = cycle(180 - self.angle, -180, 180) # Create the trail particles x = self.x + random.randint(0, round(self.radius)) - self.radius / 2 y = self.y + random.randint(0, round(self.radius)) - self.radius / 27 image_speed = random.uniform(0.005, 0.01) scene.bottom_particles.add( Dust(x, y, Colors.LIGHT_GREY, 0, 0, image_speed)) # Detect collisions between projectiles for projectile in scene.projectiles: # If is a projectile of the same entity continue if self.color == projectile.color: continue # Calculate distance to the projectile distance_x = projectile.x - self.x distance_y = projectile.y - self.y distance = math.hypot(distance_x, distance_y) # Detect if the projectile is close enough to another projectile if distance <= projectile.radius + self.radius: # The projectile with smaller radius is destroyed if self.radius < projectile.radius: self.collide = True elif projectile.radius < self.radius: projectile.collide = True else: self.collide = True projectile.collide = True # Detect collisions with balls for ball in scene.balls: # Calculate distance to the ball distance_x = ball.x - self.x distance_y = ball.y - self.y distance = math.hypot(distance_x, distance_y) # Detect if the projectile is close enough to a ball if distance <= ball.radius + self.radius: # How the projectile collided has to be destroyed self.collide = True # Calculate the direction after collide collision_angle = math.atan2(distance_y, distance_x) ball.hspeed = (ball.hspeed / 3 ) + 1.5 * percentage * math.cos(collision_angle) ball.vspeed = (ball.vspeed / 3 ) + 1.5 * percentage * math.sin(collision_angle) # Slash particles up x, y = polygon_coordinate(self.x, self.y, math.degrees(collision_angle), self.radius) for _ in range(random.randint(2, 4)): angle = math.degrees(collision_angle) + random.randint( -60, 60) duration = percentage * random.randint(50, 100) scene.top_particles.add( Slash(x, y, Colors.LIGHT_WHITE, angle, duration)) # Slash particles down for _ in range(random.randint(1, 2)): angle = math.degrees(-collision_angle) + random.randint( -60, 60) duration = percentage * random.randint(30, 60) scene.top_particles.add( Slash(x, y, Colors.LIGHT_WHITE, angle, duration)) # Play the collision sound random.choice(self.snd_collide).play() # Check if the projectile has collided if self.collide: # Make the sceen shake scene.shake = True # Dush particles radius = round(self.radius / 2) for _ in range(radius): x = self.x + random.randint(-radius, radius) y = self.y + random.randint(-radius, radius) hspeed = 1.5 * math.cos(math.radians( self.angle)) + random.randint(-2, 2) vspeed = 1.5 * math.sin(math.radians( self.angle)) + random.randint(-2, 2) image_speed = random.uniform(0.005, 0.01) scene.top_particles.add( Dust(x, y, self.color, hspeed, vspeed, image_speed)) # Destroy projectile by collision self.destroy() return # Reduce projectile radius self.radius = approach(self.radius, 0, 0.015 * delta_time)
def train(logdir, model_name, iterations, checkpoint_interval, batch_size, temperature, hidden_size, n_layers, rnn_cell, learning_rate, learning_rate_decay_steps, learning_rate_decay_rate): device = torch.device( 'cuda:0') if torch.cuda.is_available() else torch.device('cpu') os.makedirs(logdir, exist_ok=True) dataset = dataloader.JazzDataset() loader = DataLoader(dataset, batch_size, shuffle=True) model_class = getattr(rnn, model_name) model = model_class(hidden_size, rnn_cell, n_layers) optimizer = torch.optim.Adam(model.parameters(), learning_rate) criterion = nn.NLLLoss() scheduler = StepLR(optimizer, step_size=learning_rate_decay_steps, gamma=learning_rate_decay_rate) model = model.to(device) summary(model) loop = tqdm(range(0, iterations + 1), desc='Training', unit='Steps') for i, batch in zip(loop, cycle(loader)): scheduler.step() optimizer.zero_grad() batch = batch.to(device) # shape of (batch_size, sequence_length) c_0, h_0 = model.init_hidden(batch.shape[0]) c_0 = c_0.to(device) h_0 = h_0.to(device) # TODO: Fill in below init_hidden = (c_0, h_0) hidden = init_hidden loss = 0.0 for step in range(batch.shape[1] - 1): # sequence_length - 1 # TODO: Fill in below # run a step of training model. # x = semgent of batch, corresponds to current step. shape of (batch_size, 1) x = [batch[i][step] for i in range(batch.shape[0])] x = torch.tensor(x, dtype=torch.long) x = x.to(device) pred, hidden = model(x=x, hidden=hidden) # TODO: Fill in below # calcuate loss between prediction and the values of next step. # Hint: use criterion. See torch.nn.NLLLoss() function x1 = [batch[i][step + 1] for i in range(batch.shape[0])] x1 = torch.tensor(x1, dtype=torch.long) x1 = x1.to(device) loss += criterion(pred, x1) loss.backward() optimizer.step() # print loss loop.set_postfix_str("loss: {:.3f}".format(loss)) # save model if i % checkpoint_interval == 0: torch.save( { 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'model_name': model_name, 'hparams': dict(hidden_size=hidden_size, n_layers=n_layers, rnn_cell=rnn_cell) }, os.path.join(logdir, 'model-{:d}.pt'.format(i)))
visual_transform = transforms.Compose( [transforms.Resize((256, 256)), transforms.ToTensor()]) train_set = SingleDataset(train_path, train_transform) visual_set = SingleDataset(visual_path, visual_transform) normal_set = SingleDataset(normal_path, transform=train_transform) train_loader = DataLoader(train_set, num_workers=0, batch_size=8, shuffle=True) visual_loader = DataLoader(visual_set, num_workers=0, batch_size=8, shuffle=True) normal_loader = DataLoader(normal_set, num_workers=0, batch_size=8, shuffle=True) iter_normal_loader = iter(cycle(normal_loader)) # %% # config network, loss function and optimizer input_nc = 1 output_nc = 1 ngf = 96 ndf = 96 netG = define_G(input_nc, output_nc, ngf, model_name, norm='instance').cuda() netD = define_D(input_nc, ndf, 'patch', norm='instance').cuda() netF_path = r'D:\model\covid-19\validating, epoch%3A40, loss%3A0.0503, acc%3A0.9819.pt' netF = models.resnet18(pretrained=True) num_features = netF.fc.in_features netF.fc = nn.Linear(num_features, 2, bias=False)
def main(): # load table data df_train = pd.read_csv("../input/train_curated.csv") df_noisy = pd.read_csv("../input/train_noisy.csv") df_test = pd.read_csv("../input/sample_submission.csv") labels = df_test.columns[1:].tolist() for label in labels: df_train[label] = df_train['labels'].apply(lambda x: label in x) df_noisy[label] = df_noisy['labels'].apply(lambda x: label in x) df_train['path'] = "../input/mel128/train/" + df_train['fname'] df_test['path'] = "../input/mel128/test/" + df_train['fname'] df_noisy['path'] = "../input/mel128/noisy/" + df_noisy['fname'] # fold splitting folds = list(KFold(n_splits=NUM_FOLD, shuffle=True, random_state=SEED).split(np.arange(len(df_train)))) # Training log_columns = ['epoch', 'bce', 'lwlrap', 'bce_noisy', 'lwlrap_noisy', 'val_bce', 'val_lwlrap', 'time'] for fold, (ids_train_split, ids_valid_split) in enumerate(folds): if fold+1 not in FOLD_LIST: continue print("fold: {}".format(fold + 1)) train_log = pd.DataFrame(columns=log_columns) # build model model = ResNet(NUM_CLASS).cuda() # prepare data loaders df_train_fold = df_train.iloc[ids_train_split].reset_index(drop=True) dataset_train = MelDataset(df_train_fold['path'], df_train_fold[labels].values, crop=CROP_LENGTH, crop_mode='random', mixup=True, freqmask=True, gain=True, ) train_loader = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True, ) df_valid = df_train.iloc[ids_valid_split].reset_index(drop=True) dataset_valid = MelDataset(df_valid['path'], df_valid[labels].values,) valid_loader = DataLoader(dataset_valid, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, ) dataset_noisy = MelDataset(df_noisy['path'], df_noisy[labels].values, crop=CROP_LENGTH, crop_mode='random', mixup=True, freqmask=True, gain=True, ) noisy_loader = DataLoader(dataset_noisy, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True, ) noisy_itr = cycle(noisy_loader) # set optimizer and loss optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LR[0]) scheduler = CosineLR(optimizer, step_size_min=LR[1], t0=len(train_loader) * NUM_CYCLE, tmult=1) # training for epoch in range(NUM_EPOCH): # train for one epoch bce, lwlrap, bce_noisy, lwlrap_noisy = train((train_loader, noisy_itr), model, optimizer, scheduler, epoch) # evaluate on validation set val_bce, val_lwlrap = validate(valid_loader, model) # print log endtime = time.time() - starttime print("Epoch: {}/{} ".format(epoch + 1, NUM_EPOCH) + "CE: {:.4f} ".format(bce) + "LwLRAP: {:.4f} ".format(lwlrap) + "Noisy CE: {:.4f} ".format(bce_noisy) + "Noisy LWLRAP: {:.4f} ".format(lwlrap_noisy) + "Valid CE: {:.4f} ".format(val_bce) + "Valid LWLRAP: {:.4f} ".format(val_lwlrap) + "sec: {:.1f}".format(endtime) ) # save log and weights train_log_epoch = pd.DataFrame( [[epoch+1, bce, lwlrap, bce_noisy, lwlrap_noisy, val_bce, val_lwlrap, endtime]], columns=log_columns) train_log = pd.concat([train_log, train_log_epoch]) train_log.to_csv("{}/train_log_fold{}.csv".format(OUTPUT_DIR, fold+1), index=False) if (epoch+1)%NUM_CYCLE==0: torch.save(model.state_dict(), "{}/weight_fold_{}_epoch_{}.pth".format(OUTPUT_DIR, fold+1, epoch+1))
def get_epoch_iterator(self, **kwargs): labeled = cycle(self.ds_labeled.get_epoch_iterator, **kwargs) unlabeled = self.ds_unlabeled.get_epoch_iterator(**kwargs) return imap(self.mergedicts, labeled, unlabeled)
def train(self): # build model self.build_model() for step in range(len(self.channel_list)): if step > 4: self.batch_size = self.batch_size // 2 loader = data_loader(self.data_root, self.batch_size, img_size=2 * (2**(step + 1))) loader = iter(cycle(loader)) if step == 0 or step == 1 or step == 2: self.max_iter = 20000 elif step == 3 or step == 4 or step == 5: self.max_iter = 50000 else: self.max_iter = 100000 alpha = 0.0 for iters in range(self.max_iter + 1): real_img = next(loader) real_img = real_img.to(dev) # ===============================================================# # 1. Train the discriminator # # ===============================================================# self.set_phase(mode="train") self.reset_grad() # Compute loss with real images. d_real_out = self.D(real_img, step, alpha) d_loss_real = -d_real_out.mean() # Compute loss with face images. z = torch.rand(self.batch_size, self.z_dim, 1, 1).to(dev) fake_img = self.G(z, step, alpha) d_fake_out = self.D(fake_img.detach(), step, alpha) d_loss_fake = d_fake_out.mean() # Compute loss for gradient penalty. beta = torch.rand(self.batch_size, 1, 1, 1).to(dev) x_hat = (beta * real_img.data + (1 - beta) * fake_img.data).requires_grad_(True) d_x_hat_out = self.D(x_hat, step, alpha) d_loss_gp = self.gradient_penalty(d_x_hat_out, x_hat) # Backward and optimize. d_loss = d_loss_real + d_loss_fake + self.lambda_gp * d_loss_gp d_loss.backward() self.d_optimizer.step() # ===============================================================# # 2. Train the Generator # # ===============================================================# if (iters + 1) % self.n_critic == 0: self.reset_grad() # Compute loss with fake images. fake_img = self.G(z, step, alpha) d_fake_out = self.D(fake_img, step, alpha) g_loss = -d_fake_out.mean() # Backward and optimize. g_loss.backward() self.g_optimizer.step() # ===============================================================# # 3. Save parameters and images # # ===============================================================# # self.lr_update() torch.cuda.synchronize() alpha += 1 / (self.max_iter // 2) self.set_phase(mode="test") self.exponential_moving_average() # Print total loss if iters % self.print_loss_iter == 0: print( "Step : [%d/%d], Iter : [%d/%d], D_loss : [%.3f, %.3f, %.3f., %.3f], G_loss : %.3f" % (step, len(self.channel_list) - 1, iters, self.max_iter, d_loss.item(), d_loss_real.item(), d_loss_fake.item(), d_loss_gp.item(), g_loss.item())) # Save generated images. if iters % self.save_image_iter == 0: self.save_img(iters, self.fixed_z, step) # Save the G and D parameters. if iters % self.save_parameter_iter == 0: self.save_model(iters, step) # Save the logs on the tensorboard. if iters % self.save_log_iter == 0: self.writer.add_scalar('g_loss/g_loss', g_loss.item(), iters) self.writer.add_scalar('d_loss/d_loss_total', d_loss.item(), iters) self.writer.add_scalar('d_loss/d_loss_real', d_loss_real.item(), iters) self.writer.add_scalar('d_loss/d_loss_fake', d_loss_fake.item(), iters) self.writer.add_scalar('d_loss/d_loss_gp', d_loss_gp.item(), iters)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--continue', dest='cont', action='store_true', help="continue training from checkpoint") parser.add_argument('--ckpt', type=str, default='latest', required=False, help="desired checkpoint to restore") parser.add_argument('-g', '--gpu_ids', type=int, default=0, required=False, help="specify gpu ids") parser.add_argument('--vis', action='store_true', default=False, help="visualize output in training") args = parser.parse_args() # create experiment config config = get_config(args) print(config) # create network and training agent tr_agent = get_agent(config) print(tr_agent.net) # load from checkpoint if provided if args.cont: tr_agent.load_ckpt(args.ckpt) # create dataloader train_loader = get_dataloader(PHASE_TRAINING, batch_size=config.batch_size, num_workers=config.num_workers) val_loader = get_dataloader(PHASE_TESTING, batch_size=config.batch_size, num_workers=config.num_workers) val_loader_step = get_dataloader(PHASE_TESTING, batch_size=config.batch_size, num_workers=config.num_workers) val_loader_step = cycle(val_loader_step) # val_loader = cycle(val_loader) # start training clock = tr_agent.clock for e in range(clock.epoch, config.nr_epochs): # begin iteration pbar = tqdm(train_loader) for b, data in enumerate(pbar): # train step outputs, losses = tr_agent.train_func(data) # visualize if args.vis and clock.step % config.visualize_frequency == 0: tr_agent.visualize_batch(data, PHASE_TRAINING, outputs) pbar.set_description("EPOCH[{}][{}]".format(e, b)) pbar.set_postfix(OrderedDict({k: v.item() for k, v in losses.items()})) # validation step if clock.step % config.val_frequency == 0: # data = next(val_loader) data = next(val_loader_step) outputs, losses = tr_agent.val_func(data) if args.vis and clock.step % config.visualize_frequency == 0: tr_agent.visualize_batch(data, PHASE_TESTING, outputs) clock.tick() tr_agent.evaluate(val_loader) tr_agent.update_learning_rate() clock.tock() if clock.epoch % config.save_frequency == 0: tr_agent.save_ckpt() tr_agent.save_ckpt('latest')
poptorch_model = trainingModel(model, opts, optimizer=optimizer) # Compile model logger("---------- Compilation/Loading from Cache Started ---------") start_compile = time.perf_counter() datum = get_generated_datum(config) poptorch_model.compile(*datum) duration_compilation = time.perf_counter() - start_compile logger(f"Compiled/Loaded model in {duration_compilation} secs") logger("-----------------------------------------------------------") # Training loop logger("--------------------- Training Started --------------------") factor = config.gradient_accumulation * config.batches_per_step start_train = time.perf_counter() loader = cycle(loader) train_iterator = tqdm(range(steps_finished, config.training_steps), desc="Training", disable=config.disable_progress_bar) for step in train_iterator: start_step = time.perf_counter() outputs = poptorch_model(*next(loader)) scheduler.step() poptorch_model.setOptimizer(optimizer) step_length = time.perf_counter() - start_step step_throughput = config.samples_per_step / step_length train_iterator.set_description( f"Step: {step} / {config.training_steps-1} - " f"LR: {scheduler.get_last_lr()[0]:.2e} - " f"Loss: {outputs[0].div(factor).mean().item():3.3f} - "
df_noisy['path'], df_noisy[labels].values, crop=CROP_LENGTH, crop_mode='random', mixup=True, freqmask=True, gain=True, ) noisy_loader = DataLoader( dataset_noisy, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True, ) noisy_itr = cycle(noisy_loader) # set optimizer and loss optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LR[0]) scheduler = CosineLR(optimizer, step_size_min=LR[1], t0=len(train_loader) * NUM_CYCLE, tmult=1) # training min_val_lwlrap = 0 trigger = 0 for epoch in range(NUM_EPOCH): # train for one epoch
def worker(P): gin.parse_config_files_and_bindings([ 'configs/defaults/gan.gin', 'configs/defaults/augment.gin', P.gin_config ], []) options = get_options_dict() train_set, _, image_size = get_dataset(dataset=options['dataset']) train_loader = DataLoader(train_set, shuffle=True, pin_memory=True, num_workers=P.workers, batch_size=options['batch_size'], drop_last=True) train_loader = cycle(train_loader) if P.no_lazy: P.d_reg_every = 1 if P.ema_start_k is None: P.ema_start_k = P.halflife_k P.accum = 0.5**(options['batch_size'] / (P.halflife_k * 1000)) generator, discriminator = get_architecture(P.architecture, image_size, P=P) g_ema, _ = get_architecture(P.architecture, image_size, P=P) if P.resume: print(f"=> Loading checkpoint from '{P.resume}'") state_G = torch.load(f"{P.resume}/gen.pt") state_D = torch.load(f"{P.resume}/dis.pt") state_Ge = torch.load(f"{P.resume}/gen_ema.pt") generator.load_state_dict(state_G) discriminator.load_state_dict(state_D) g_ema.load_state_dict(state_Ge) if P.finetune: print(f"=> Loading checkpoint for fine-tuning: '{P.finetune}'") state_D = torch.load(f"{P.finetune}/dis.pt") discriminator.load_state_dict(state_D, strict=False) discriminator.reset_parameters(discriminator.linear) P.comment += 'ft' generator = generator.cuda() discriminator = discriminator.cuda() P.augment_fn = get_augment(mode=P.aug).cuda() GD = G_D(generator, discriminator, P.augment_fn).cuda() g_ema = g_ema.cuda() g_ema.eval() G_optimizer = optim.Adam(generator.parameters(), lr=options["lr"], betas=options["beta"]) D_optimizer = optim.Adam(discriminator.parameters(), lr=options["lr_d"], betas=options["beta"]) if P.resume: logger = Logger(None, resume=P.resume) else: _desc = f"R{P.lbd_r1}_mix{P.style_mix}_H{P.halflife_k}" if P.halflife_lr > 0: _desc += f"_lr{P.halflife_lr / 1000000:.1f}M" _desc += f"_NoLazy" if P.no_lazy else "_Lazy" logger = Logger(f'{P.filename}_{_desc}{P.comment}', subdir=f'gan_dp/st_{P.gin_stem}/{P.architecture}') shutil.copy2(P.gin_config, f"{logger.logdir}/config.gin") P.logdir = logger.logdir P.eval_seed = np.random.randint(10000) if P.resume: opt = torch.load(f"{P.resume}/optim.pt") G_optimizer.load_state_dict(opt['optim_G']) D_optimizer.load_state_dict(opt['optim_D']) logger.log(f"Checkpoint loaded from '{P.resume}'") P.starting_step = opt['epoch'] + 1 else: logger.log(generator) logger.log(discriminator) logger.log( f"# Params - G: {count_parameters(generator)}, D: {count_parameters(discriminator)}" ) logger.log(options) P.starting_step = 1 logger.log(f"Use G moving average: {P.accum}") if P.finetune: logger.log(f"Checkpoint loaded from '{P.finetune}'") GD = nn.DataParallel(GD) train(P, options, models=(generator, discriminator, GD, g_ema), optimizers=(G_optimizer, D_optimizer), train_loader=train_loader, logger=logger)
'mnist': data.load_binary_mnist }[args.ds]() x, y = data.shuffle(x, y) [[[ax_neg, ax_pos], [ay_neg, ay_pos]], [[bx_neg, bx_pos], [by_neg, by_pos]], [[cx_neg, cx_pos], [cy_neg, cy_pos]]] = data.partition(x, y, args.ptt) ax, bx, cx = th.cat([ax_pos, ax_neg]), th.cat([bx_pos, bx_neg]), th.cat([cx_pos, cx_neg]) ax, bx, cx = data.normalize([ax, bx, cx]) ay, by, cy = th.cat([ay_pos, ay_neg]), th.cat([by_pos, by_neg]), th.cat([cy_pos, cy_neg]) ax_pos, ax_neg = ax[:len(ax_pos)], ax[len(ax_pos):] pos, neg = D.TensorDataset(ax_pos), D.TensorDataset(ax_neg) pos_loader = utils.cycle(D.DataLoader(pos, args.bs_pos, shuffle=True)) neg_loader = utils.cycle(D.DataLoader(neg, args.bs_neg, shuffle=True)) a_loader = D.DataLoader(D.TensorDataset(ax, ay), args.bsi) b_loader = D.DataLoader(D.TensorDataset(bx, by), args.bsi) c_loader = D.DataLoader(D.TensorDataset(cx, cy), args.bsi) p0 = len(ax_neg) / len(ax) p1 = 1 - p0 if args.model == 'linear': model = th.nn.Linear(ax_pos.size(1), 2) elif args.model == 'mlp': model = mlp.MLP([ax_pos.size(1), 64, 64, 64, 2], th.relu, bn=True) elif args.model == 'resnet': model = resnet.ResNet(18, 2)