def main(): args = cfg.parse_args() torch.cuda.manual_seed(args.random_seed) assert args.exp_name assert os.path.exists(args.load_path) args.path_helper = set_log_dir('logs_eval', args.exp_name) logger = create_logger(args.path_helper['log_path'], phase='test') gen_net = Generator(args=args).cuda() # set writer print(f'=> resuming from {args.load_path}') assert os.path.exists(args.load_path) checkpoint_file = os.path.join(args.load_path, 'Model', 'checkpoint_best1.pth') assert os.path.exists(checkpoint_file) checkpoint = torch.load(checkpoint_file) gen_net.load_state_dict(checkpoint['gen_state_dict']) logger.info(args) writer_dict = { 'writer': SummaryWriter(args.path_helper['log_path']), 'valid_global_steps': 0, } z = torch.cuda.FloatTensor( np.random.normal(0, 1, (args.eval_batch_size, args.latent_dim))) gen_imgs = gen_net(z).mul_(127.5).add_(127.5).clamp_(0.0, 255.0).permute( 0, 2, 3, 1).to('cpu', torch.uint8).numpy() for img_idx, img in enumerate(gen_imgs): file_name = os.path.join(fid_buffer_dir, f'iter{iter_idx}_b{img_idx}.png') imsave(file_name, img) print('Images saved at: ' + fid_buffer_dir)
def main(): args = cfg.parse_args() torch.manual_seed(args.random_seed) random.seed(args.random_seed) torch.cuda.manual_seed(args.random_seed) torch.backends.cudnn.benchmark = True # set tf env # import network gen_net = eval('models.'+args.model+'.Generator')(args=args).cuda() # initial np.random.seed(args.random_seed) fixed_z = torch.cuda.FloatTensor(np.random.normal(0, 1, (16, args.latent_dim))) # set writer print(f'=> resuming from {args.load_path}') checkpoint_file = args.load_path assert os.path.exists(checkpoint_file) checkpoint = torch.load(checkpoint_file) gen_net.load_state_dict(checkpoint['avg_gen_state_dict']) if 'avg_gen_state_dict' in checkpoint: gen_net.load_state_dict(checkpoint['avg_gen_state_dict']) epoch = checkpoint['epoch'] print(f'=> loaded checkpoint {checkpoint_file} (epoch {epoch})') else: gen_net.load_state_dict(checkpoint) print(f'=> loaded checkpoint {checkpoint_file}') print(args) imgs = validate_cp(fixed_z, gen_net, n_row=4) if not os.path.exists(args.save_path): os.mkdir(args.save_path) imsave(os.path.join(args.save_path, 'test_result.png'), imgs)
def main(): args = cfg.parse_args() torch.manual_seed(args.random_seed) random.seed(args.random_seed) torch.cuda.manual_seed(args.random_seed) assert args.exp_name assert args.load_path.endswith('.pth') assert os.path.exists(args.load_path) args.path_helper = set_log_dir('logs_eval', args.exp_name) logger = create_logger(args.path_helper['log_path'], phase='test') # set tf env _init_inception() inception_path = check_or_download_inception(None) create_inception_graph(inception_path) # import network gen_net = eval('models.' + args.model + '.Generator')(args=args).cuda() # fid stat if args.dataset.lower() == 'cifar10': fid_stat = 'fid_stat/fid_stats_cifar10_train.npz' else: raise NotImplementedError(f'no fid stat for {args.dataset.lower()}') assert os.path.exists(fid_stat) # initial np.random.seed(args.random_seed) fixed_z = torch.cuda.FloatTensor( np.random.normal(0, 1, (25, args.latent_dim))) if args.percent < 0.9: pruning_generate(gen_net, (1 - args.percent)) see_remain_rate(gen_net) # set writer logger.info(f'=> resuming from {args.load_path}') checkpoint_file = args.load_path assert os.path.exists(checkpoint_file) checkpoint = torch.load(checkpoint_file) if 'avg_gen_state_dict' in checkpoint: gen_net.load_state_dict(checkpoint['avg_gen_state_dict']) epoch = checkpoint['epoch'] logger.info(f'=> loaded checkpoint {checkpoint_file} (epoch {epoch})') else: gen_net.load_state_dict(checkpoint) logger.info(f'=> loaded checkpoint {checkpoint_file}') logger.info(args) writer_dict = { 'writer': SummaryWriter(args.path_helper['log_path']), 'valid_global_steps': 0, } inception_score, fid_score = validate(args, fixed_z, fid_stat, gen_net, writer_dict, epoch) logger.info(f'Inception score: {inception_score}, FID score: {fid_score}.') writer_dict['writer'].close()
def main(): args = cfg.parse_args() torch.cuda.manual_seed(args.random_seed) assert args.exp_name assert args.load_path.endswith(".pth") assert os.path.exists(args.load_path) args.path_helper = set_log_dir("logs_eval", args.exp_name) logger = create_logger(args.path_helper["log_path"], phase="test") # set tf env _init_inception() inception_path = check_or_download_inception(None) create_inception_graph(inception_path) # import network gen_net = eval("models." + args.gen_model + ".Generator")(args=args).cuda() # fid stat if args.dataset.lower() == "cifar10": fid_stat = "fid_stat/fid_stats_cifar10_train.npz" elif args.dataset.lower() == "stl10": fid_stat = "fid_stat/stl10_train_unlabeled_fid_stats_48.npz" else: raise NotImplementedError(f"no fid stat for {args.dataset.lower()}") assert os.path.exists(fid_stat) # initial fixed_z = torch.cuda.FloatTensor( np.random.normal(0, 1, (25, args.latent_dim))) # set writer logger.info(f"=> resuming from {args.load_path}") checkpoint_file = args.load_path assert os.path.exists(checkpoint_file) checkpoint = torch.load(checkpoint_file) if "avg_gen_state_dict" in checkpoint: gen_net.load_state_dict(checkpoint["avg_gen_state_dict"]) epoch = checkpoint["epoch"] logger.info(f"=> loaded checkpoint {checkpoint_file} (epoch {epoch})") else: gen_net.load_state_dict(checkpoint) logger.info(f"=> loaded checkpoint {checkpoint_file}") logger.info(args) writer_dict = { "writer": SummaryWriter(args.path_helper["log_path"]), "valid_global_steps": 0, } inception_score, fid_score = validate(args, fixed_z, fid_stat, gen_net, writer_dict, clean_dir=False) logger.info(f"Inception score: {inception_score}, FID score: {fid_score}.")
def main(): args = cfg.parse_args() # write into tensorboard log_path = os.path.join(args.demo_path, args.demo_name + '/log') vid_path = os.path.join(args.demo_path, args.demo_name + '/vids') if not os.path.exists(log_path) and not os.path.exists(vid_path): os.makedirs(log_path) os.makedirs(vid_path) writer = SummaryWriter(log_path) device = torch.device("cuda:0") G = Generator().to(device) G = nn.DataParallel(G) G.load_state_dict(torch.load(args.model_path)) with torch.no_grad(): G.eval() za = torch.randn(args.n_za_test, args.d_za, 1, 1, 1).to(device) zm = torch.randn(args.n_zm_test, args.d_zm, 1, 1, 1).to(device) n_za = za.size(0) n_zm = zm.size(0) za = za.unsqueeze(1).repeat(1, n_zm, 1, 1, 1, 1).contiguous().view( n_za * n_zm, -1, 1, 1, 1) zm = zm.repeat(n_za, 1, 1, 1, 1) vid_fake = G(za, zm) vid_fake = vid_fake.transpose(2, 1) # bs x 16 x 3 x 64 x 64 vid_fake = ((vid_fake - vid_fake.min()) / (vid_fake.max() - vid_fake.min())).data writer.add_video(tag='generated_videos', global_step=1, vid_tensor=vid_fake) writer.flush() # save into videos print('==> saving videos...') save_videos(vid_path, vid_fake, n_za, n_zm) return
def main(): global g_vel_cmd, g_swa_cmd, g_acc_cmd, g_sar_cmd, g_cfg_type_cmd, g_enable_cmd, g_clear_cmd, g_vel_pre, g_vel_err_pre_int, g_vel_err_pre, g_throttle_pre, g_published g_vel_cmd = 0.0 g_swa_cmd = 0.0 g_acc_cmd = 0.0 g_sar_cmd = 0.0 g_cfg_type_cmd = 0 g_enable_cmd = 0 g_clear_cmd = 0 g_vel_pre = 0.0 g_vel_err_pre = 0.0 g_vel_err_pre_int = 0.0 g_throttle_pre = 0.0 g_published = False vehicle_ns = parse_args() ns_obj = VehicleCfg(vehicle_ns) node_name, brake_topic_name, throttle_topic_name, steering_topic_name, gear_topic_name, turnsignal_topic_name, _ = ns_obj.get_llc_properties( ) control_topic_name = ns_obj.get_control_properties() brake_obj = BrakeVehicle() throttle_obj = ThrottleVehicle() steering_obj = SteeringVehicle() gear_obj = GearVehicle() turnsignal_obj = TurnSignalVehicle() rospy.init_node(node_name, anonymous=True) brake_obj.set_pub(topic_name=brake_topic_name) throttle_obj.set_pub(topic_name=throttle_topic_name) steering_obj.set_pub(topic_name=steering_topic_name) gear_obj.set_pub(topic_name=gear_topic_name) turnsignal_obj.set_pub(topic_name=turnsignal_topic_name) rate = rospy.Rate(10) while not rospy.is_shutdown(): rospy.Subscriber(control_topic_name, PlatMsgVehicleCmd, control_callback) if g_published: cfg_type_cmd_cur = g_cfg_type_cmd vel_cmd_cur, swa_cmd_cur, acc_cmd_cur, sar_cmd_cur = set_control_cfg( cfg_type_cmd_cur) brake_req_cur, throttle_req_cur, steering_req_cur = exec_low_level_control( cfg_type_cmd_cur, vel_cmd_cur, swa_cmd_cur, acc_cmd_cur, sar_cmd_cur) fill_pub_msgs(cfg_type_cmd_cur, brake_obj, throttle_obj, steering_obj, brake_req_cur, throttle_req_cur, steering_req_cur) g_published = False rate.sleep()
def main(): args = cfg.parse_args() torch.manual_seed(args.random_seed) random.seed(args.random_seed) torch.cuda.manual_seed(args.random_seed) torch.backends.cudnn.benchmark = True # set tf env # import network gen_net = eval('models.' + args.model + '.Generator')(args=args).cuda() # initial np.random.seed(args.random_seed) # set writer print(f'=> resuming from {args.load_path}') checkpoint_file = args.load_path assert os.path.exists(checkpoint_file) checkpoint = torch.load(checkpoint_file) pruning_generate( gen_net, checkpoint['avg_gen_state_dict']) # Create a buffer for mask] gen_net.load_state_dict(checkpoint['avg_gen_state_dict']) if 'avg_gen_state_dict' in checkpoint: gen_net.load_state_dict(checkpoint['avg_gen_state_dict']) epoch = checkpoint['epoch'] print(f'=> loaded checkpoint {checkpoint_file} (epoch {epoch})') else: gen_net.load_state_dict(checkpoint) print(f'=> loaded checkpoint {checkpoint_file}') print(args) count = 0 for _ in range(1000): fixed_z = torch.cuda.FloatTensor( np.random.normal(0, 1, (60, args.latent_dim))) gen_imgs = gen_net(fixed_z) gen_imgs = np.moveaxis(gen_imgs.detach().cpu().numpy(), 1, -1) for i in range(gen_imgs.shape[0]): img = gen_imgs[i] img = (img + 1) / 2 imsave( os.path.join(args.save_path, 'test_result_{}.png'.format(count)), img) count = count + 1
def main(): args = cfg.parse_args() torch.cuda.manual_seed(args.random_seed) assert args.exp_name assert args.load_path.endswith('.pth') assert os.path.exists(args.load_path) args.path_helper = set_log_dir('logs_eval', args.exp_name) logger = create_logger(args.path_helper['log_path'], phase='test') # set tf env _init_inception() inception_path = check_or_download_inception(None) create_inception_graph(inception_path) # import network gen_net = eval('models.' + args.model + '.Generator')(args=args).cuda() # initial fixed_z = torch.cuda.FloatTensor( np.random.normal(0, 1, (25, args.latent_dim))) # set writer logger.info(f'=> resuming from {args.load_path}') checkpoint_file = args.load_path assert os.path.exists(checkpoint_file) checkpoint = torch.load(checkpoint_file) if 'avg_gen_state_dict' in checkpoint: gen_net.load_state_dict(checkpoint['avg_gen_state_dict']) epoch = checkpoint['epoch'] logger.info(f'=> loaded checkpoint {checkpoint_file} (epoch {epoch})') else: gen_net.load_state_dict(checkpoint) logger.info(f'=> loaded checkpoint {checkpoint_file}') logger.info(args) writer_dict = { 'writer': SummaryWriter(args.path_helper['log_path']), 'valid_global_steps': 0, } inception_score, fid_score = validate(args, fixed_z, gen_net, writer_dict) logger.info(f'Inception score: {inception_score}, FID score: {fid_score}.')
def main(): args = cfg.parse_args() random.seed(args.random_seed) torch.manual_seed(args.random_seed) torch.cuda.manual_seed(args.random_seed) np.random.seed(args.random_seed) torch.backends.cudnn.deterministic = False torch.backends.cudnn.benchmark = True os.environ['PYTHONHASHSEED'] = str(args.random_seed) # import network gen_net = eval('models.' + args.model + '.Generator')(args=args) dis_net = eval('models.' + args.model + '.Discriminator')(args=args) # weight init def weights_init(m): if isinstance(m, nn.Conv2d): if args.init_type == 'normal': nn.init.normal_(m.weight.data, 0.0, 0.02) elif args.init_type == 'orth': nn.init.orthogonal_(m.weight.data) elif args.init_type == 'xavier_uniform': nn.init.xavier_uniform(m.weight.data, 1.) else: raise NotImplementedError('{} unknown inital type'.format( args.init_type)) elif isinstance(m, nn.BatchNorm2d): nn.init.normal_(m.weight.data, 1.0, 0.02) nn.init.constant_(m.bias.data, 0.0) gen_net.apply(weights_init) dis_net.apply(weights_init) initial_dis_net_weight = deepcopy(dis_net.state_dict()) initial_gen_net_weight = deepcopy(gen_net.state_dict()) torch.save(initial_dis_net_weight, os.path.join(args.save_path, 'initial_dis_net.pth')) torch.save(initial_gen_net_weight, os.path.join(args.save_path, 'initial_gen_net.pth'))
def main(): vehicle_ns = cfg.parse_args() ns_obj = VehicleCfg(vehicle_ns) node_name, topic_name, model_name, frame_id = ns_obj.get_odom_properties() rospy.init_node(node_name, anonymous=True) odom_pub = rospy.Publisher(topic_name, Odometry, queue_size=100) rospy.wait_for_service('/gazebo/get_model_state') get_model_srv = rospy.ServiceProxy('/gazebo/get_model_state', GetModelState) header = Header() odom = Odometry() header.frame_id = frame_id model = GetModelStateRequest() model.model_name = model_name rate = rospy.Rate(100) while not rospy.is_shutdown(): result = get_model_srv(model) odom.pose.pose = result.pose odom.twist.twist = result.twist header.stamp = rospy.Time.now() odom.header = header odom_pub.publish(odom) rate.sleep()
def test_autogan_cifar10_a_Generator(args1, myargs): import cfg, os, torch import numpy as np myargs.config = getattr(myargs.config, 'train_autogan_cifar10_a') myargs.args = args1 args = cfg.parse_args() for k, v in myargs.config.items(): setattr(args, k, v) args.tf_inception_model_dir = os.path.expanduser( args.tf_inception_model_dir) args.fid_stat = os.path.expanduser(args.fid_stat) args.data_path = os.path.expanduser(args.data_path) gen_net = Generator(args=args).cuda() z = torch.cuda.FloatTensor(np.random.normal(0, 1, (16, args.latent_dim))) x = gen_net(z) import torchviz g = torchviz.make_dot(x) g.view() pass
def main(): args = cfg.parse_args() # write into tensorboard log_path = os.path.join(args.demo_path, args.demo_name + '/log') vid_path = os.path.join(args.demo_path, args.demo_name + '/vids') if not os.path.exists(log_path) and not os.path.exists(vid_path): os.makedirs(log_path) os.makedirs(vid_path) writer = SummaryWriter(log_path) device = torch.device("cuda:0") G = Generator().to(device) G = nn.DataParallel(G) G.load_state_dict(torch.load(args.model_path)) with torch.no_grad(): G.eval() za = torch.randn(args.n_za_test, args.d_za, 1, 1, 1).to(device) # appearance # generating frames from [16, 20, 24, 28, 32, 36, 40, 44, 48] for i in range(9): zm = torch.randn(args.n_zm_test, args.d_zm, (i+1), 1, 1).to(device) # 16+i*4 vid_fake = G(za, zm) vid_fake = vid_fake.transpose(2,1) vid_fake = ((vid_fake - vid_fake.min()) / (vid_fake.max() - vid_fake.min())).data writer.add_video(tag='generated_videos_%dframes'%(16+i*4), global_step=1, vid_tensor=vid_fake) writer.flush() print('saving videos') save_videos(vid_path, vid_fake, args.n_za_test, (16+i*4)) return
args.n_classes = 143 fid_stat = None else: raise NotImplementedError(f'no fid stat for {args.dataset.lower()}') if fid_stat: assert os.path.exists(fid_stat), f"{fid_stat} not found" # get network gen_net, _ = get_network_func(args) gen_net.cuda() # load checkpoint checkpoint_file = args.load_path assert os.path.exists(checkpoint_file), print( f"checkpoint file {checkpoint_file} not found.") logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file, map_location=lambda storage, loc: storage) gen_net.load_state_dict(checkpoint) logger.info(f"=> loaded checkpoint '{checkpoint_file}' ") # evaluation torch.cuda.empty_cache() inception_score, fid_score = validate(args, fid_stat, gen_net, None) logger.info(f'Inception score: {inception_score}, FID score: {fid_score} ') if __name__ == '__main__': config = parse_args() main(config)
, default=argparse.SUPPRESS ) parser.add_argument( 'align' , help='alignments between target and source words' , nargs='?' , default=argparse.SUPPRESS ) parser.add_argument( '-s','--subcorpora' , help='YAML description of subcorpora lines (space separated file list)' , default=argparse.SUPPRESS , action=store_training ) if __name__ == '__main__': os.putenv('LANG','C') os.putenv('LC_ALL','C') d = cfg.parse_args(parser,write='$outdir/rules.config',modeldir=True) cfgf = open(os.path.join(d.outdir,'rules.config'),'a') print >> cfgf, '\nrules:', d.outdir cfgf.close() dir = os.path.abspath(os.path.dirname(__file__)) finp = os.path.join(dir,'ghkm','filterbadinput') names = [] triplefiles = [d.config['target'], d.config['source'], d.config['align']] steps = cfg.steps(d) hp = d.hadoop training = os.path.join(d.tmpdir,'training') trainingtmp = os.path.join(d.tmpdir,'training.tmp') trainingnew = trainingtmp + '.new'
if logfile: print >> decodescript, ' 2> $LOG \\' #print >> decodescript, ' 2> >(gzip > $LOG.gz) \\' if stage == 'forest': print >> decodescript, "| %s/join_forests" % d.scriptdir else: print >> decodescript, "| %s/join_nbests %s" % (d.scriptdir,d.config['decoder']['options']['nbests']) print >> decodescript, '\n\n' if logfile: print >> decodescript, 'gzip $LOG\n\n' if include_instruction_pipe: print >> decodescript, 'gzip $INSLOG\n\n' decodescript.close() os.chmod(decodefile, stat.S_IRWXU | os.stat(decodefile)[stat.ST_MODE]) return decodefile if __name__ == '__main__': import argparse arp = argparse.ArgumentParser() arp.add_argument( 'decodepipe') arp.add_argument( 'tunedir' , nargs='?' , help='output directory of ruleset pipeline' , action=cfg.store_abspath , default=argparse.SUPPRESS ) d = cfg.parse_args(arp,default='$tunedir/tune.config', modeldir=True) write_script(d,'nbest', weightstring=os.path.join(d.config['tunedir'],'weights.final'),logfile=False,include_instruction_pipe=True,decodefile=d.config['decodepipe']) pass
def main(): args = cfg.parse_args() torch.cuda.manual_seed(args.random_seed) print(args) # create logging folder log_path = os.path.join(args.save_path, args.exp_name + '/log') model_path = os.path.join(args.save_path, args.exp_name + '/models') os.makedirs(log_path, exist_ok=True) os.makedirs(model_path, exist_ok=True) writer = SummaryWriter(log_path) # tensorboard # load model print('==> loading models') device = torch.device("cuda:0") G = Generator(args.dim_z, args.dim_a, args.nclasses, args.ch).to(device) VD = VideoDiscriminator(args.nclasses, args.ch).to(device) ID = ImageDiscriminator(args.ch).to(device) G = nn.DataParallel(G) VD = nn.DataParallel(VD) ID = nn.DataParallel(ID) # optimizer optimizer_G = torch.optim.Adam(G.parameters(), args.g_lr, (0.5, 0.999)) optimizer_VD = torch.optim.Adam(VD.parameters(), args.d_lr, (0.5, 0.999)) optimizer_ID = torch.optim.Adam(ID.parameters(), args.d_lr, (0.5, 0.999)) # loss criterion_gan = nn.BCEWithLogitsLoss().to(device) criterion_l1 = nn.L1Loss().to(device) # prepare dataset print('==> preparing dataset') transform = torchvision.transforms.Compose([ transforms_vid.ClipResize((args.img_size, args.img_size)), transforms_vid.ClipToTensor(), transforms_vid.ClipNormalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) transform_test = torchvision.transforms.Compose([ transforms.Resize((args.img_size, args.img_size)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) if args.dataset == 'mug': dataset_train = MUG('train', args.data_path, transform=transform) dataset_val = MUG('val', args.data_path, transform=transform) dataset_test = MUG_test(args.data_path, transform=transform_test) else: raise NotImplementedError dataloader_train = torch.utils.data.DataLoader( dataset=dataset_train, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True, pin_memory=True, drop_last=True) dataloader_val = torch.utils.data.DataLoader(dataset=dataset_val, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True) dataloader_test = torch.utils.data.DataLoader( dataset=dataset_test, batch_size=args.batch_size_test, num_workers=args.num_workers, shuffle=False, pin_memory=True) print('==> start training') for epoch in range(args.max_epoch): train(args, epoch, G, VD, ID, optimizer_G, optimizer_VD, optimizer_ID, criterion_gan, criterion_l1, dataloader_train, writer, device) if epoch % args.val_freq == 0: val(args, epoch, G, criterion_l1, dataloader_val, device, writer) test(args, epoch, G, dataloader_test, device, writer) if epoch % args.save_freq == 0: torch.save(G.state_dict(), os.path.join(model_path, 'G_%d.pth' % (epoch))) torch.save(VD.state_dict(), os.path.join(model_path, 'VD_%d.pth' % (epoch))) torch.save(ID.state_dict(), os.path.join(model_path, 'ID_%d.pth' % (epoch))) return
def main(): args = cfg.parse_args() random.seed(args.random_seed) torch.manual_seed(args.random_seed) torch.cuda.manual_seed(args.random_seed) np.random.seed(args.random_seed) # set tf env _init_inception() inception_path = check_or_download_inception(None) create_inception_graph(inception_path) # import netwo # weight init def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv2d') != -1: if args.init_type == 'normal': nn.init.normal_(m.weight.data, 0.0, 0.02) elif args.init_type == 'orth': nn.init.orthogonal_(m.weight.data) elif args.init_type == 'xavier_uniform': nn.init.xavier_uniform(m.weight.data, 1.) else: raise NotImplementedError('{} unknown inital type'.format( args.init_type)) elif classname.find('BatchNorm2d') != -1: nn.init.normal_(m.weight.data, 1.0, 0.02) nn.init.constant_(m.bias.data, 0.0) gen_net = eval('models.' + args.model + '.Generator')(args=args).cuda() dis_net = eval('models.' + args.model + '.Discriminator')(args=args).cuda() gen_net.apply(weights_init) dis_net.apply(weights_init) avg_gen_net = deepcopy(gen_net) initial_gen_net_weight = torch.load(os.path.join(args.init_path, 'initial_gen_net.pth'), map_location="cpu") initial_dis_net_weight = torch.load(os.path.join(args.init_path, 'initial_dis_net.pth'), map_location="cpu") assert id(initial_dis_net_weight) != id(dis_net.state_dict()) assert id(initial_gen_net_weight) != id(gen_net.state_dict()) # set optimizer gen_optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, gen_net.parameters()), args.g_lr, (args.beta1, args.beta2)) dis_optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, dis_net.parameters()), args.d_lr, (args.beta1, args.beta2)) gen_scheduler = LinearLrDecay(gen_optimizer, args.g_lr, 0.0, 0, args.max_iter * args.n_critic) dis_scheduler = LinearLrDecay(dis_optimizer, args.d_lr, 0.0, 0, args.max_iter * args.n_critic) # set up data_loader dataset = datasets.ImageDataset(args) train_loader = dataset.train # fid stat if args.dataset.lower() == 'cifar10': fid_stat = 'fid_stat/fid_stats_cifar10_train.npz' elif args.dataset.lower() == 'stl10': fid_stat = 'fid_stat/fid_stats_stl10_train.npz' else: raise NotImplementedError('no fid stat for %s' % args.dataset.lower()) assert os.path.exists(fid_stat) # epoch number for dis_net args.max_epoch = args.max_epoch * args.n_critic if args.max_iter: args.max_epoch = np.ceil(args.max_iter * args.n_critic / len(train_loader)) # initial fixed_z = torch.cuda.FloatTensor( np.random.normal(0, 1, (25, args.latent_dim))) start_epoch = 0 best_fid = 1e4 print('=> resuming from %s' % args.load_path) assert os.path.exists(args.load_path) checkpoint_file = args.load_path assert os.path.exists(checkpoint_file) checkpoint = torch.load(checkpoint_file) pruning_generate(gen_net, checkpoint['gen_state_dict']) dis_net.load_state_dict(checkpoint['dis_state_dict']) total = 0 total_nonzero = 0 for m in dis_net.modules(): if isinstance(m, nn.Conv2d): total += m.weight_orig.data.numel() mask = m.weight_orig.data.abs().clone().gt(0).float().cuda() total_nonzero += torch.sum(mask) conv_weights = torch.zeros(total) index = 0 for m in dis_net.modules(): if isinstance(m, nn.Conv2d): size = m.weight_orig.data.numel() conv_weights[index:( index + size)] = m.weight_orig.data.view(-1).abs().clone() index += size y, i = torch.sort(conv_weights) # thre_index = int(total * args.percent) # only care about the non zero weights # e.g: total = 100, total_nonzero = 80, percent = 0.2, thre_index = 36, that means keep 64 thre_index = total - total_nonzero thre = y[int(thre_index)] pruned = 0 print('Pruning threshold: {}'.format(thre)) zero_flag = False masks = OrderedDict() for k, m in enumerate(dis_net.modules()): if isinstance(m, nn.Conv2d): weight_copy = m.weight_orig.data.abs().clone() mask = weight_copy.gt(thre).float() masks[k] = mask pruned = pruned + mask.numel() - torch.sum(mask) m.weight_orig.data.mul_(mask) if int(torch.sum(mask)) == 0: zero_flag = True print( 'layer index: {:d} \t total params: {:d} \t remaining params: {:d}' .format(k, mask.numel(), int(torch.sum(mask)))) print('Total conv params: {}, Pruned conv params: {}, Pruned ratio: {}'. format(total, pruned, pruned / total)) pruning_generate(avg_gen_net, checkpoint['gen_state_dict']) see_remain_rate(gen_net) if not args.finetune_G: gen_weight = gen_net.state_dict() gen_orig_weight = rewind_weight(initial_gen_net_weight, gen_weight.keys()) gen_weight.update(gen_orig_weight) gen_net.load_state_dict(gen_weight) gen_avg_param = copy_params(gen_net) if args.finetune_D: dis_net.load_state_dict(checkpoint['dis_state_dict']) else: dis_net.load_state_dict(initial_dis_net_weight) for k, m in enumerate(dis_net.modules()): if isinstance(m, nn.Conv2d): m.weight_orig.data.mul_(masks[k]) orig_dis_net = eval('models.' + args.model + '.Discriminator')(args=args).cuda() orig_dis_net.load_state_dict(checkpoint['dis_state_dict']) orig_dis_net.eval() args.path_helper = set_log_dir('logs', args.exp_name + "_{}".format(args.percent)) logger = create_logger(args.path_helper['log_path']) #logger.info('=> loaded checkpoint %s (epoch %d)' % (checkpoint_file, start_epoch)) logger.info(args) writer_dict = { 'writer': SummaryWriter(args.path_helper['log_path']), 'train_global_steps': start_epoch * len(train_loader), 'valid_global_steps': start_epoch // args.val_freq, } # train loop for epoch in tqdm(range(int(start_epoch), int(args.max_epoch)), desc='total progress'): lr_schedulers = (gen_scheduler, dis_scheduler) if args.lr_decay else None see_remain_rate(gen_net) see_remain_rate_orig(dis_net) if not args.use_kd_D: train_with_mask(args, gen_net, dis_net, gen_optimizer, dis_optimizer, gen_avg_param, train_loader, epoch, writer_dict, masks, lr_schedulers) else: train_with_mask_kd(args, gen_net, dis_net, orig_dis_net, gen_optimizer, dis_optimizer, gen_avg_param, train_loader, epoch, writer_dict, masks, lr_schedulers) if epoch and epoch % args.val_freq == 0 or epoch == int( args.max_epoch) - 1: backup_param = copy_params(gen_net) load_params(gen_net, gen_avg_param) inception_score, fid_score = validate(args, fixed_z, fid_stat, gen_net, writer_dict, epoch) logger.info( 'Inception score: %.4f, FID score: %.4f || @ epoch %d.' % (inception_score, fid_score, epoch)) load_params(gen_net, backup_param) if fid_score < best_fid: best_fid = fid_score is_best = True else: is_best = False else: is_best = False avg_gen_net.load_state_dict(gen_net.state_dict()) load_params(avg_gen_net, gen_avg_param) save_checkpoint( { 'epoch': epoch + 1, 'model': args.model, 'gen_state_dict': gen_net.state_dict(), 'dis_state_dict': dis_net.state_dict(), 'avg_gen_state_dict': avg_gen_net.state_dict(), 'gen_optimizer': gen_optimizer.state_dict(), 'dis_optimizer': dis_optimizer.state_dict(), 'best_fid': best_fid, 'path_helper': args.path_helper }, is_best, args.path_helper['ckpt_path'])
def main(): args = cfg.parse_args() random.seed(args.random_seed) torch.manual_seed(args.random_seed) torch.cuda.manual_seed(args.random_seed) np.random.seed(args.random_seed) torch.backends.cudnn.deterministic = False torch.backends.cudnn.benchmark = True os.environ['PYTHONHASHSEED'] = str(args.random_seed) # set tf env _init_inception() inception_path = check_or_download_inception(None) create_inception_graph(inception_path) # import network gen_net = eval('models.'+args.model+'.Generator')(args=args) dis_net = eval('models.'+args.model+'.Discriminator')(args=args) initial_gen_net_weight = torch.load(os.path.join(args.init_path, 'initial_gen_net.pth'), map_location="cpu") initial_dis_net_weight = torch.load(os.path.join(args.init_path, 'initial_dis_net.pth'), map_location="cpu") gen_net = gen_net.cuda() dis_net = dis_net.cuda() gen_net.load_state_dict(initial_gen_net_weight) dis_net.load_state_dict(initial_dis_net_weight) # set optimizer gen_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, gen_net.parameters()), args.g_lr, (args.beta1, args.beta2)) dis_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, dis_net.parameters()), args.d_lr, (args.beta1, args.beta2)) gen_scheduler = LinearLrDecay(gen_optimizer, args.g_lr, 0.0, 0, args.max_iter * args.n_critic) dis_scheduler = LinearLrDecay(dis_optimizer, args.d_lr, 0.0, 0, args.max_iter * args.n_critic) # set up data_loader dataset = datasets.ImageDataset(args) train_loader = dataset.train # fid stat if args.dataset.lower() == 'cifar10': fid_stat = 'fid_stat/fid_stats_cifar10_train.npz' elif args.dataset.lower() == 'stl10': fid_stat = 'fid_stat/fid_stats_stl10_train.npz' else: raise NotImplementedError('no fid stat for %s' % args.dataset.lower()) assert os.path.exists(fid_stat) # epoch number for dis_net args.max_epoch = args.max_epoch * args.n_critic if args.max_iter: args.max_epoch = np.ceil(args.max_iter * args.n_critic / len(train_loader)) # initial fixed_z = torch.cuda.FloatTensor(np.random.normal(0, 1, (25, args.latent_dim))) gen_avg_param = copy_params(gen_net) start_epoch = 0 best_fid = 1e4 # set writer if args.load_path: print('=> resuming from %s' % args.load_path) assert os.path.exists(args.load_path) checkpoint_file = os.path.join(args.load_path, 'Model', 'checkpoint.pth') assert os.path.exists(checkpoint_file) checkpoint = torch.load(checkpoint_file) start_epoch = checkpoint['epoch'] best_fid = checkpoint['best_fid'] gen_net.load_state_dict(checkpoint['gen_state_dict']) dis_net.load_state_dict(checkpoint['dis_state_dict']) gen_optimizer.load_state_dict(checkpoint['gen_optimizer']) dis_optimizer.load_state_dict(checkpoint['dis_optimizer']) avg_gen_net = deepcopy(gen_net) avg_gen_net.load_state_dict(checkpoint['avg_gen_state_dict']) gen_avg_param = copy_params(avg_gen_net) del avg_gen_net args.path_helper = checkpoint['path_helper'] logger = create_logger(args.path_helper['log_path']) logger.info('=> loaded checkpoint %s (epoch %d)' % (checkpoint_file, start_epoch)) else: # create new log dir assert args.exp_name args.path_helper = set_log_dir('logs', args.exp_name) logger = create_logger(args.path_helper['log_path']) logger.info(args) writer_dict = { 'writer': SummaryWriter(args.path_helper['log_path']), 'train_global_steps': start_epoch * len(train_loader), 'valid_global_steps': start_epoch // args.val_freq, } # train loop switch = False for epoch in range(int(start_epoch), int(args.max_epoch)): lr_schedulers = (gen_scheduler, dis_scheduler) if args.lr_decay else None train(args, gen_net, dis_net, gen_optimizer, dis_optimizer, gen_avg_param, train_loader, epoch, writer_dict, lr_schedulers) if epoch and epoch % args.val_freq == 0 or epoch == int(args.max_epoch)-1: backup_param = copy_params(gen_net) load_params(gen_net, gen_avg_param) inception_score, fid_score = validate(args, fixed_z, fid_stat, gen_net, writer_dict, epoch) logger.info('Inception score: %.4f, FID score: %.4f || @ epoch %d.' % (inception_score, fid_score, epoch)) load_params(gen_net, backup_param) if fid_score < best_fid: best_fid = fid_score is_best = True else: is_best = False else: is_best = False avg_gen_net = deepcopy(gen_net) load_params(avg_gen_net, gen_avg_param) save_checkpoint({ 'epoch': epoch + 1, 'model': args.model, 'gen_state_dict': gen_net.state_dict(), 'dis_state_dict': dis_net.state_dict(), 'avg_gen_state_dict': avg_gen_net.state_dict(), 'gen_optimizer': gen_optimizer.state_dict(), 'dis_optimizer': dis_optimizer.state_dict(), 'best_fid': best_fid, 'path_helper': args.path_helper, 'seed': args.random_seed }, is_best, args.path_helper['ckpt_path']) del avg_gen_net
def main(): args = cfg.parse_args() torch.cuda.manual_seed(args.random_seed) # Setting up the resnet model if args.pretrained: resnet = torchvision.models.resnet50(pretrained=True, progress=True) else: resnet = torchvision.models.resnet50(pretrained=False, progress=True) num_features = resnet.fc.in_features resnet.fc = nn.Linear(num_features, args.num_classes) resnet = resnet.cuda() # Setting up the optimizer if args.optimizer == 'sgd': optimizer = optim.SGD(resnet.parameters(), lr=args.lr, weight_decay=1e-4) elif args.optimizer == 'sgd_momentum': optimizer = optim.SGD(resnet.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4) elif args.optimizer == 'adam': optimizer = optim.Adam( filter(lambda p: p.requires_grad, resnet.parameters()), args.g_lr, (args.beta1, args.beta2)) else: optimizer = None assert optimizer != None criterion = nn.CrossEntropyLoss() if args.percentage == 1.0: train_data, val_data, test_data = get_train_validation_test_data( args.train_csv_path, args.train_img_path, args.val_csv_path, args.val_img_path, args.test_csv_path, args.test_img_path) else: train_data = get_label_unlabel_dataset(args.train_csv_path, args.train_img_path, args.percentage) _, val_data, test_data = get_train_validation_test_data( args.train_csv_path, args.train_img_path, args.val_csv_path, args.val_img_path, args.test_csv_path, args.test_img_path) train_loader = DataLoader(train_data, batch_size=args.train_batch_size, shuffle=True, drop_last=True, num_workers=args.num_workers) val_loader = DataLoader(val_data, batch_size=args.eval_batch_size, shuffle=True, drop_last=True, num_workers=args.num_workers) test_loader = DataLoader(test_data, batch_size=args.eval_batch_size, shuffle=True, drop_last=True, num_workers=args.num_workers) print('Training Datasize:', len(train_data)) start_epoch = 0 best_acc1 = 0 best_acc2 = 0 best_acc3 = 0 # set writer if args.load_path: print(f'=> resuming from {args.load_path}') assert os.path.exists(args.load_path) checkpoint_file = os.path.join(args.load_path, 'Model', 'checkpoint_last.pth') assert os.path.exists(checkpoint_file) checkpoint = torch.load(checkpoint_file) start_epoch = checkpoint['epoch'] resnet.load_state_dict(checkpoint['resnet_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) args.path_helper = checkpoint['path_helper'] logger = create_logger(args.path_helper['log_path']) logger.info( f'=> loaded checkpoint {checkpoint_file} (epoch {start_epoch})') else: # create new log dir assert args.exp_name args.path_helper = set_log_dir('logs', args.exp_name) logger = create_logger(args.path_helper['log_path']) logger.info(args) writer_dict = { 'writer': SummaryWriter(args.path_helper['log_path']), 'train_global_steps': start_epoch * len(train_loader), 'valid_global_steps': start_epoch // args.val_freq, } start = time.time() for epoch in tqdm(range(int(start_epoch), int(args.max_epoch)), desc='total progress'): best_curr_acc1, best_curr_acc2, best_curr_acc3 = train( args, resnet, optimizer, criterion, train_loader, val_loader, epoch, writer_dict, best_acc1, best_acc2, best_acc3) best_acc1, best_acc2, best_acc3 = best_curr_acc1, best_curr_acc2, best_curr_acc3 if epoch and epoch % args.val_freq == 0 or epoch == int( args.max_epoch) - 1: val_acc = get_val_acc(val_loader, resnet) logger.info(f'Validation Accuracy {val_acc} || @ epoch {epoch}.') save_checkpoint( { 'epoch': epoch + 1, 'resnet_state_dict': resnet.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'path_helper': args.path_helper }, False, False, False, args.path_helper['ckpt_path'], filename='checkpoint_last.pth') end = time.time() final_val_acc = get_val_acc(val_loader, resnet) final_test_acc = get_val_acc(test_loader, resnet) time_elapsed = end - start print('\n Final Validation Accuracy:', final_val_acc.data, '\n Final Test Accuracy:', final_test_acc.data, '\n Time Elapsed:', time_elapsed, 'seconds.')
def main(): args = cfg.parse_args() torch.cuda.manual_seed(args.random_seed) # set visible GPU ids if len(args.gpu_ids) > 0: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_ids # set TensorFlow environment for evaluation (calculate IS and FID) _init_inception() inception_path = check_or_download_inception('./tmp/imagenet/') create_inception_graph(inception_path) # the first GPU in visible GPUs is dedicated for evaluation (running Inception model) str_ids = args.gpu_ids.split(',') args.gpu_ids = [] for id in range(len(str_ids)): if id >= 0: args.gpu_ids.append(id) if len(args.gpu_ids) > 1: args.gpu_ids = args.gpu_ids[1:] else: args.gpu_ids = args.gpu_ids # genotype G genotypes_root = os.path.join('exps', args.genotypes_exp, 'Genotypes') genotype_G = np.load(os.path.join(genotypes_root, 'latest_G.npy')) # import network from genotype basemodel_gen = eval('archs.' + args.arch + '.Generator')(args, genotype_G) gen_net = torch.nn.DataParallel( basemodel_gen, device_ids=args.gpu_ids).cuda(args.gpu_ids[0]) basemodel_dis = eval('archs.' + args.arch + '.Discriminator')(args) dis_net = torch.nn.DataParallel( basemodel_dis, device_ids=args.gpu_ids).cuda(args.gpu_ids[0]) # basemodel_gen = eval('archs.' + args.arch + '.Generator')(args=args) # gen_net = torch.nn.DataParallel(basemodel_gen, device_ids=args.gpu_ids).cuda(args.gpu_ids[0]) # basemodel_dis = eval('archs.' + args.arch + '.Discriminator')(args=args) # dis_net = torch.nn.DataParallel(basemodel_dis, device_ids=args.gpu_ids).cuda(args.gpu_ids[0]) # weight init def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv2d') != -1: if args.init_type == 'normal': nn.init.normal_(m.weight.data, 0.0, 0.02) elif args.init_type == 'orth': nn.init.orthogonal_(m.weight.data) elif args.init_type == 'xavier_uniform': nn.init.xavier_uniform(m.weight.data, 1.) else: raise NotImplementedError('{} unknown inital type'.format( args.init_type)) elif classname.find('BatchNorm2d') != -1: nn.init.normal_(m.weight.data, 1.0, 0.02) nn.init.constant_(m.bias.data, 0.0) gen_net.apply(weights_init) dis_net.apply(weights_init) # set up data_loader dataset = datasets.ImageDataset(args) train_loader = dataset.train # epoch number for dis_net args.max_epoch_D = args.max_epoch_G * args.n_critic if args.max_iter_G: args.max_epoch_D = np.ceil(args.max_iter_G * args.n_critic / len(train_loader)) max_iter_D = args.max_epoch_D * len(train_loader) # set optimizer gen_optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, gen_net.parameters()), args.g_lr, (args.beta1, args.beta2)) dis_optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, dis_net.parameters()), args.d_lr, (args.beta1, args.beta2)) gen_scheduler = LinearLrDecay(gen_optimizer, args.g_lr, 0.0, 0, max_iter_D) dis_scheduler = LinearLrDecay(dis_optimizer, args.d_lr, 0.0, 0, max_iter_D) # fid stat if args.dataset.lower() == 'cifar10': fid_stat = 'fid_stat/fid_stats_cifar10_train.npz' elif args.dataset.lower() == 'stl10': fid_stat = 'fid_stat/stl10_train_unlabeled_fid_stats_48.npz' else: raise NotImplementedError(f'no fid stat for {args.dataset.lower()}') assert os.path.exists(fid_stat) # initial gen_avg_param = copy_params(gen_net) start_epoch = 0 best_fid = 1e4 # set writer if args.checkpoint: # resuming print(f'=> resuming from {args.checkpoint}') assert os.path.exists(os.path.join('exps', args.checkpoint)) checkpoint_file = os.path.join('exps', args.checkpoint, 'Model', 'checkpoint_best.pth') assert os.path.exists(checkpoint_file) checkpoint = torch.load(checkpoint_file) start_epoch = checkpoint['epoch'] best_fid = checkpoint['best_fid'] gen_net.load_state_dict(checkpoint['gen_state_dict']) dis_net.load_state_dict(checkpoint['dis_state_dict']) gen_optimizer.load_state_dict(checkpoint['gen_optimizer']) dis_optimizer.load_state_dict(checkpoint['dis_optimizer']) avg_gen_net = deepcopy(gen_net) avg_gen_net.load_state_dict(checkpoint['avg_gen_state_dict']) gen_avg_param = copy_params(avg_gen_net) del avg_gen_net args.path_helper = checkpoint['path_helper'] logger = create_logger(args.path_helper['log_path']) logger.info( f'=> loaded checkpoint {checkpoint_file} (epoch {start_epoch})') else: # create new log dir assert args.exp_name args.path_helper = set_log_dir('exps', args.exp_name) logger = create_logger(args.path_helper['log_path']) logger.info(args) writer_dict = { 'writer': SummaryWriter(args.path_helper['log_path']), 'train_global_steps': start_epoch * len(train_loader), 'valid_global_steps': start_epoch // args.val_freq, } # model size logger.info('Param size of G = %fMB', count_parameters_in_MB(gen_net)) logger.info('Param size of D = %fMB', count_parameters_in_MB(dis_net)) print_FLOPs(basemodel_gen, (1, args.latent_dim), logger) print_FLOPs(basemodel_dis, (1, 3, args.img_size, args.img_size), logger) # for visualization if args.draw_arch: from utils.genotype import draw_graph_G draw_graph_G(genotype_G, save=True, file_path=os.path.join(args.path_helper['graph_vis_path'], 'latest_G')) fixed_z = torch.cuda.FloatTensor( np.random.normal(0, 1, (100, args.latent_dim))) # train loop for epoch in tqdm(range(int(start_epoch), int(args.max_epoch_D)), desc='total progress'): lr_schedulers = (gen_scheduler, dis_scheduler) if args.lr_decay else None train(args, gen_net, dis_net, gen_optimizer, dis_optimizer, gen_avg_param, train_loader, epoch, writer_dict, lr_schedulers) if epoch % args.val_freq == 0 or epoch == int(args.max_epoch_D) - 1: backup_param = copy_params(gen_net) load_params(gen_net, gen_avg_param) inception_score, std, fid_score = validate(args, fixed_z, fid_stat, gen_net, writer_dict) logger.info( f'Inception score mean: {inception_score}, Inception score std: {std}, ' f'FID score: {fid_score} || @ epoch {epoch}.') load_params(gen_net, backup_param) if fid_score < best_fid: best_fid = fid_score is_best = True else: is_best = False else: is_best = False # save model avg_gen_net = deepcopy(gen_net) load_params(avg_gen_net, gen_avg_param) save_checkpoint( { 'epoch': epoch + 1, 'model': args.arch, 'gen_state_dict': gen_net.state_dict(), 'dis_state_dict': dis_net.state_dict(), 'avg_gen_state_dict': avg_gen_net.state_dict(), 'gen_optimizer': gen_optimizer.state_dict(), 'dis_optimizer': dis_optimizer.state_dict(), 'best_fid': best_fid, 'path_helper': args.path_helper }, is_best, args.path_helper['ckpt_path']) del avg_gen_net
def main(): args = cfg.parse_args() torch.cuda.manual_seed(args.random_seed) # set visible GPU ids if len(args.gpu_ids) > 0: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_ids # set TensorFlow environment for evaluation (calculate IS and FID) _init_inception() inception_path = check_or_download_inception('./tmp/imagenet/') create_inception_graph(inception_path) # the first GPU in visible GPUs is dedicated for evaluation (running Inception model) str_ids = args.gpu_ids.split(',') args.gpu_ids = [] for id in range(len(str_ids)): if id >= 0: args.gpu_ids.append(id) if len(args.gpu_ids) > 1: args.gpu_ids = args.gpu_ids[1:] else: args.gpu_ids = args.gpu_ids # genotype G genotypes_root = os.path.join('exps', args.genotypes_exp, 'Genotypes') genotype_G = np.load(os.path.join(genotypes_root, 'latest_G.npy')) # import network from genotype basemodel_gen = eval('archs.' + args.arch + '.Generator')(args, genotype_G) gen_net = torch.nn.DataParallel( basemodel_gen, device_ids=args.gpu_ids).cuda(args.gpu_ids[0]) basemodel_dis = eval('archs.' + args.arch + '.Discriminator')(args) dis_net = torch.nn.DataParallel( basemodel_dis, device_ids=args.gpu_ids).cuda(args.gpu_ids[0]) # fid stat if args.dataset.lower() == 'cifar10': fid_stat = 'fid_stat/fid_stats_cifar10_train.npz' elif args.dataset.lower() == 'stl10': fid_stat = 'fid_stat/stl10_train_unlabeled_fid_stats_48.npz' else: raise NotImplementedError(f'no fid stat for {args.dataset.lower()}') assert os.path.exists(fid_stat) # set writer print(f'=> resuming from {args.checkpoint}') assert os.path.exists(os.path.join('exps', args.checkpoint)) checkpoint_file = os.path.join('exps', args.checkpoint, 'Model', 'checkpoint_best.pth') assert os.path.exists(checkpoint_file) checkpoint = torch.load(checkpoint_file) epoch = checkpoint['epoch'] - 1 gen_net.load_state_dict(checkpoint['gen_state_dict']) dis_net.load_state_dict(checkpoint['dis_state_dict']) avg_gen_net = deepcopy(gen_net) avg_gen_net.load_state_dict(checkpoint['avg_gen_state_dict']) gen_avg_param = copy_params(avg_gen_net) del avg_gen_net assert args.exp_name args.path_helper = set_log_dir('exps', args.exp_name) logger = create_logger(args.path_helper['log_path']) logger.info(f'=> loaded checkpoint {checkpoint_file} (epoch {epoch})') logger.info(args) writer_dict = { 'writer': SummaryWriter(args.path_helper['log_path']), 'valid_global_steps': epoch // args.val_freq, } # model size logger.info('Param size of G = %fMB', count_parameters_in_MB(gen_net)) logger.info('Param size of D = %fMB', count_parameters_in_MB(dis_net)) print_FLOPs(basemodel_gen, (1, args.latent_dim), logger) print_FLOPs(basemodel_dis, (1, 3, args.img_size, args.img_size), logger) # for visualization if args.draw_arch: from utils.genotype import draw_graph_G draw_graph_G(genotype_G, save=True, file_path=os.path.join(args.path_helper['graph_vis_path'], 'latest_G')) fixed_z = torch.cuda.FloatTensor( np.random.normal(0, 1, (100, args.latent_dim))) # test load_params(gen_net, gen_avg_param) inception_score, std, fid_score = validate(args, fixed_z, fid_stat, gen_net, writer_dict) logger.info( f'Inception score mean: {inception_score}, Inception score std: {std}, ' f'FID score: {fid_score} || @ epoch {epoch}.')
def main(): args = cfg.parse_args() torch.cuda.manual_seed(args.random_seed) print(args) # set tf env _init_inception() inception_path = check_or_download_inception(None) create_inception_graph(inception_path) # weight init def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv2d') != -1: if args.init_type == 'normal': nn.init.normal_(m.weight.data, 0.0, 0.02) elif args.init_type == 'orth': nn.init.orthogonal_(m.weight.data) elif args.init_type == 'xavier_uniform': nn.init.xavier_uniform(m.weight.data, 1.) else: raise NotImplementedError('{} unknown inital type'.format( args.init_type)) elif classname.find('BatchNorm2d') != -1: nn.init.normal_(m.weight.data, 1.0, 0.02) nn.init.constant_(m.bias.data, 0.0) gen_net, dis_net, gen_optimizer, dis_optimizer = create_shared_gan( args, weights_init) # initial start_search_iter = 0 # set writer if args.load_path: print(f'=> resuming from {args.load_path}') assert os.path.exists(args.load_path) checkpoint_file = os.path.join(args.load_path, 'Model', 'checkpoint.pth') assert os.path.exists(checkpoint_file) checkpoint = torch.load(checkpoint_file) cur_stage = checkpoint['cur_stage'] start_search_iter = checkpoint['search_iter'] gen_net.load_state_dict(checkpoint['gen_state_dict']) dis_net.load_state_dict(checkpoint['dis_state_dict']) gen_optimizer.load_state_dict(checkpoint['gen_optimizer']) dis_optimizer.load_state_dict(checkpoint['dis_optimizer']) prev_archs = checkpoint['prev_archs'] prev_hiddens = checkpoint['prev_hiddens'] args.path_helper = checkpoint['path_helper'] logger = create_logger(args.path_helper['log_path']) logger.info( f'=> loaded checkpoint {checkpoint_file} (search iteration {start_search_iter})' ) else: # create new log dir assert args.exp_name args.path_helper = set_log_dir('logs', args.exp_name) logger = create_logger(args.path_helper['log_path']) prev_archs = None prev_hiddens = None # set controller && its optimizer cur_stage = 0 # set up data_loader dataset = datasets.ImageDataset(args, 2**(cur_stage + 3)) train_loader = dataset.train print(args.rl_num_eval_img, "##############################") logger.info(args) writer_dict = { 'writer': SummaryWriter(args.path_helper['log_path']), 'controller_steps': start_search_iter * args.ctrl_step } g_loss_history = RunningStats(args.dynamic_reset_window) d_loss_history = RunningStats(args.dynamic_reset_window) # train loop Agent = SAC(131) print(Agent.alpha) memory = ReplayMemory(2560000) updates = 0 outinfo = { 'rewards': [], 'a_loss': [], 'critic_error': [], } Best = False Z_NUMPY = None WARMUP = True update_time = 1 for search_iter in tqdm(range(int(start_search_iter), 100), desc='search progress'): logger.info(f"<start search iteration {search_iter}>") if search_iter >= 1: WARMUP = False ### Define number of layers, currently only support 1->3 total_layer_num = 3 ### Different image size for different layers ds = [ datasets.ImageDataset(args, 2**(k + 3)) for k in range(total_layer_num) ] train_loaders = [d.train for d in ds] last_R = 0. # Initial reward last_fid = 10000 # Inital reward last_arch = [] # Set exploration if search_iter > 69: update_time = 10 Best = True else: Best = False gen_net.set_stage(-1) last_R, last_fid, last_state = get_is(args, gen_net, args.rl_num_eval_img, get_is_score=True) for layer in range(total_layer_num): cur_stage = layer # This defines which layer to use as output, for example, if cur_stage==0, then the output will be the first layer output. Set it to 2 if you want the output of the last layer. action = Agent.select_action([layer, last_R, 0.01 * last_fid] + last_state, Best) arch = [ action[0][0], action[0][1], action[1][0], action[1][1], action[1][2], action[2][0], action[2][1], action[2][2], action[3][0], action[3][1], action[4][0], action[4][1], action[5][0], action[5][1] ] # print(arch) # argmax to get int description of arch cur_arch = [np.argmax(k) for k in action] # Pad the skip option 0=False (for only layer 1 and layer2, not layer0, see builing_blocks.py for why) if layer == 0: cur_arch = cur_arch[0:4] elif layer == 1: cur_arch = cur_arch[0:5] elif layer == 2: if cur_arch[4] + cur_arch[5] == 2: cur_arch = cur_arch[0:4] + [3] elif cur_arch[4] + cur_arch[5] == 0: cur_arch = cur_arch[0:4] + [0] elif cur_arch[4] == 1 and cur_arch[5] == 0: cur_arch = cur_arch[0:4] + [1] else: cur_arch = cur_arch[0:4] + [2] # Get the network arch with the new architecture attached. last_arch += cur_arch gen_net.set_arch(last_arch, layer) # Set the network, given cur_stage # Train network dynamic_reset = train_qin(args, gen_net, dis_net, g_loss_history, d_loss_history, gen_optimizer, dis_optimizer, train_loaders[layer], cur_stage, smooth=False, WARMUP=WARMUP) # Get reward, use the jth layer output for generation. (layer 0:j), and the proposed progressive state R, fid, state = get_is(args, gen_net, args.rl_num_eval_img, z_numpy=Z_NUMPY) # Print exploitation mark, for better readability of the log. if Best: print("arch:", cur_arch, "Exploitation:", Best) else: print("arch:", cur_arch, "Exploring...") # Proxy reward of the up-to-now (0:j) architecture. print("update times:", updates, "step:", layer + 1, "IS:", R, "FID:", fid) mask = 0 if layer == total_layer_num - 1 else 1 if search_iter >= 0: # warm up memory.push([layer, last_R, 0.01 * last_fid] + last_state, arch, R - last_R + 0.01 * (last_fid - fid), [layer + 1, R, 0.01 * fid] + state, mask) # Append transition to memory if len(memory) >= 64: # Number of updates per step in environment for i in range(update_time): # Update parameters of all the networks critic_1_loss, critic_2_loss, policy_loss, ent_loss, alpha = Agent.update_parameters( memory, min(len(memory), 256), updates) updates += 1 outinfo['critic_error'] = min(critic_1_loss, critic_2_loss) outinfo['entropy'] = ent_loss outinfo['a_loss'] = policy_loss print("full batch", outinfo, alpha) last_R = R # next step last_fid = fid last_state = state outinfo['rewards'] = R critic_1_loss, critic_2_loss, policy_loss, ent_loss, alpha = Agent.update_parameters( memory, len(memory), updates) updates += 1 outinfo['critic_error'] = min(critic_1_loss, critic_2_loss) outinfo['entropy'] = ent_loss outinfo['a_loss'] = policy_loss print("full batch", outinfo, alpha) # Clean up and start a new trajectory from scratch del gen_net, dis_net, gen_optimizer, dis_optimizer gen_net, dis_net, gen_optimizer, dis_optimizer = create_shared_gan( args, weights_init) print(outinfo, len(memory)) Agent.save_model("test") WARMUP = False
def main(): args = cfg.parse_args() torch.cuda.manual_seed(args.random_seed) # set tf env _init_inception() inception_path = check_or_download_inception(None) create_inception_graph(inception_path) # import network gen_net = eval('models.' + args.model + '.Generator')(args=args).cuda() dis_net = eval('models.' + args.model + '.Discriminator')(args=args).cuda() # weight init def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv2d') != -1: if args.init_type == 'normal': nn.init.normal_(m.weight.data, 0.0, 0.02) elif args.init_type == 'orth': nn.init.orthogonal_(m.weight.data) elif args.init_type == 'xavier_uniform': nn.init.xavier_uniform(m.weight.data, 1.) else: raise NotImplementedError('{} unknown inital type'.format( args.init_type)) elif classname.find('BatchNorm2d') != -1: nn.init.normal_(m.weight.data, 1.0, 0.02) nn.init.constant_(m.bias.data, 0.0) gen_net.apply(weights_init) dis_net.apply(weights_init) # set optimizer gen_optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, gen_net.parameters()), args.g_lr, (args.beta1, args.beta2)) dis_optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, dis_net.parameters()), args.d_lr, (args.beta1, args.beta2)) gen_scheduler = LinearLrDecay(gen_optimizer, args.g_lr, 0.0, 0, args.max_iter * args.n_critic) dis_scheduler = LinearLrDecay(dis_optimizer, args.d_lr, 0.0, 0, args.max_iter * args.n_critic) # set up data_loader dataset = datasets.ImageDataset(args) train_loader = dataset.train # fid stat if args.dataset.lower() == 'cifar10': fid_stat = 'fid_stat/fid_stats_cifar10_train.npz' elif args.dataset.lower() == 'stl10': fid_stat = 'fid_stat/stl10_train_unlabeled_fid_stats_48.npz' else: raise NotImplementedError(f'no fid stat for {args.dataset.lower()}') assert os.path.exists(fid_stat) # epoch number for dis_net args.max_epoch = args.max_epoch * args.n_critic if args.max_iter: args.max_epoch = np.ceil(args.max_iter * args.n_critic / len(train_loader)) # initial fixed_z = torch.cuda.FloatTensor( np.random.normal(0, 1, (25, args.latent_dim))) gen_avg_param = copy_params(gen_net) start_epoch = 0 best_fid = 1e4 # set writer if args.load_path: print(f'=> resuming from {args.load_path}') assert os.path.exists(args.load_path) checkpoint_file = os.path.join(args.load_path, 'Model', 'checkpoint.pth') assert os.path.exists(checkpoint_file) checkpoint = torch.load(checkpoint_file) start_epoch = checkpoint['epoch'] best_fid = checkpoint['best_fid'] gen_net.load_state_dict(checkpoint['gen_state_dict']) dis_net.load_state_dict(checkpoint['dis_state_dict']) gen_optimizer.load_state_dict(checkpoint['gen_optimizer']) dis_optimizer.load_state_dict(checkpoint['dis_optimizer']) avg_gen_net = deepcopy(gen_net) avg_gen_net.load_state_dict(checkpoint['avg_gen_state_dict']) gen_avg_param = copy_params(avg_gen_net) del avg_gen_net args.path_helper = checkpoint['path_helper'] logger = create_logger(args.path_helper['log_path']) logger.info( f'=> loaded checkpoint {checkpoint_file} (epoch {start_epoch})') else: # create new log dir assert args.exp_name args.path_helper = set_log_dir('logs', args.exp_name) logger = create_logger(args.path_helper['log_path']) logger.info(args) writer_dict = { 'writer': SummaryWriter(args.path_helper['log_path']), 'train_global_steps': start_epoch * len(train_loader), 'valid_global_steps': start_epoch // args.val_freq, } # train loop for epoch in tqdm(range(int(start_epoch), int(args.max_epoch)), desc='total progress'): lr_schedulers = (gen_scheduler, dis_scheduler) if args.lr_decay else None train(args, gen_net, dis_net, gen_optimizer, dis_optimizer, gen_avg_param, train_loader, epoch, writer_dict, lr_schedulers) if epoch and epoch % args.val_freq == 0 or epoch == int( args.max_epoch) - 1: backup_param = copy_params(gen_net) load_params(gen_net, gen_avg_param) inception_score, fid_score = validate(args, fixed_z, fid_stat, gen_net, writer_dict) logger.info( f'Inception score: {inception_score}, FID score: {fid_score} || @ epoch {epoch}.' ) load_params(gen_net, backup_param) if fid_score < best_fid: best_fid = fid_score is_best = True else: is_best = False else: is_best = False avg_gen_net = deepcopy(gen_net) load_params(avg_gen_net, gen_avg_param) save_checkpoint( { 'epoch': epoch + 1, 'model': args.model, 'gen_state_dict': gen_net.state_dict(), 'dis_state_dict': dis_net.state_dict(), 'avg_gen_state_dict': avg_gen_net.state_dict(), 'gen_optimizer': gen_optimizer.state_dict(), 'dis_optimizer': dis_optimizer.state_dict(), 'best_fid': best_fid, 'path_helper': args.path_helper }, is_best, args.path_helper['ckpt_path']) del avg_gen_net
def main(): args = cfg.parse_args() random.seed(args.random_seed) torch.manual_seed(args.random_seed) torch.cuda.manual_seed(args.random_seed) # set tf env _init_inception() inception_path = check_or_download_inception(None) create_inception_graph(inception_path) # weight init gen_net = eval('models.' + args.model + '.Generator')(args=args) dis_net = eval('models.' + args.model + '.Discriminator')(args=args) # weight init def weights_init(m): if isinstance(m, nn.Conv2d): if args.init_type == 'normal': nn.init.normal_(m.weight.data, 0.0, 0.02) elif args.init_type == 'orth': nn.init.orthogonal_(m.weight.data) elif args.init_type == 'xavier_uniform': nn.init.xavier_uniform(m.weight.data, 1.) else: raise NotImplementedError('{} unknown inital type'.format( args.init_type)) elif isinstance(m, nn.BatchNorm2d): nn.init.normal_(m.weight.data, 1.0, 0.02) nn.init.constant_(m.bias.data, 0.0) gen_net.apply(weights_init) dis_net.apply(weights_init) gen_net = gen_net.cuda() dis_net = dis_net.cuda() avg_gen_net = deepcopy(gen_net) initial_gen_net_weight = deepcopy(gen_net.state_dict()) initial_dis_net_weight = deepcopy(dis_net.state_dict()) assert id(initial_dis_net_weight) != id(dis_net.state_dict()) assert id(initial_gen_net_weight) != id(gen_net.state_dict()) # set optimizer gen_optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, gen_net.parameters()), args.g_lr, (args.beta1, args.beta2)) dis_optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, dis_net.parameters()), args.d_lr, (args.beta1, args.beta2)) gen_scheduler = LinearLrDecay(gen_optimizer, args.g_lr, 0.0, 0, args.max_iter * args.n_critic) dis_scheduler = LinearLrDecay(dis_optimizer, args.d_lr, 0.0, 0, args.max_iter * args.n_critic) # set up data_loader dataset = datasets.ImageDataset(args) train_loader = dataset.train # fid stat if args.dataset.lower() == 'cifar10': fid_stat = 'fid_stat/fid_stats_cifar10_train.npz' elif args.dataset.lower() == 'stl10': fid_stat = 'fid_stat/fid_stats_stl10_train.npz' else: raise NotImplementedError('no fid stat for %s' % args.dataset.lower()) assert os.path.exists(fid_stat) # epoch number for dis_net args.max_epoch = args.max_epoch * args.n_critic if args.max_iter: args.max_epoch = np.ceil(args.max_iter * args.n_critic / len(train_loader)) # initial np.random.seed(args.random_seed) fixed_z = torch.cuda.FloatTensor( np.random.normal(0, 1, (25, args.latent_dim))) start_epoch = 0 best_fid = 1e4 args.path_helper = set_log_dir('logs', args.exp_name + "_{}".format(args.percent)) logger = create_logger(args.path_helper['log_path']) # logger.info('=> loaded checkpoint %s (epoch %d)' % (checkpoint_file, start_epoch)) logger.info(args) writer_dict = { 'writer': SummaryWriter(args.path_helper['log_path']), 'train_global_steps': start_epoch * len(train_loader), 'valid_global_steps': start_epoch // args.val_freq, } print('=> resuming from %s' % args.load_path) assert os.path.exists(args.load_path) checkpoint_file = os.path.join(args.load_path, 'Model', 'checkpoint.pth') assert os.path.exists(checkpoint_file) checkpoint = torch.load(checkpoint_file) gen_net.load_state_dict(checkpoint['gen_state_dict']) torch.manual_seed(args.random_seed) pruning_generate(gen_net, (1 - args.percent), args.pruning_method) torch.manual_seed(args.random_seed) pruning_generate(avg_gen_net, (1 - args.percent), args.pruning_method) see_remain_rate(gen_net) if args.second_seed: dis_net.apply(weights_init) if args.finetune_D: dis_net.load_state_dict(checkpoint['dis_state_dict']) else: dis_net.load_state_dict(initial_dis_net_weight) gen_weight = gen_net.state_dict() gen_orig_weight = rewind_weight(initial_gen_net_weight, gen_weight.keys()) assert id(gen_weight) != id(gen_orig_weight) gen_weight.update(gen_orig_weight) gen_net.load_state_dict(gen_weight) gen_avg_param = copy_params(gen_net) if args.use_kd_D: orig_dis_net = eval('models.' + args.model + '.Discriminator')(args=args).cuda() orig_dis_net.load(checkpoint['dis_state_dict']) orig_dis_net.eval() # train loop for epoch in tqdm(range(int(start_epoch), int(args.max_epoch)), desc='total progress'): lr_schedulers = (gen_scheduler, dis_scheduler) if args.lr_decay else None see_remain_rate(gen_net) if not args.use_kd_D: train(args, gen_net, dis_net, gen_optimizer, dis_optimizer, gen_avg_param, train_loader, epoch, writer_dict, lr_schedulers) else: train_kd(args, gen_net, dis_net, orig_dis_net, gen_optimizer, dis_optimizer, gen_avg_param, train_loader, epoch, writer_dict, lr_schedulers) if epoch and epoch % args.val_freq == 0 or epoch == int( args.max_epoch) - 1: backup_param = copy_params(gen_net) load_params(gen_net, gen_avg_param) inception_score, fid_score = validate(args, fixed_z, fid_stat, gen_net, writer_dict, epoch) logger.info( 'Inception score: %.4f, FID score: %.4f || @ epoch %d.' % (inception_score, fid_score, epoch)) load_params(gen_net, backup_param) if fid_score < best_fid: best_fid = fid_score is_best = True else: is_best = False else: is_best = False avg_gen_net.load_state_dict(gen_net.state_dict()) load_params(avg_gen_net, gen_avg_param) save_checkpoint( { 'epoch': epoch + 1, 'model': args.model, 'gen_state_dict': gen_net.state_dict(), 'dis_state_dict': dis_net.state_dict(), 'avg_gen_state_dict': avg_gen_net.state_dict(), 'gen_optimizer': gen_optimizer.state_dict(), 'dis_optimizer': dis_optimizer.state_dict(), 'best_fid': best_fid, 'path_helper': args.path_helper }, is_best, args.path_helper['ckpt_path'])
def main(): args = cfg.parse_args() torch.cuda.manual_seed(args.random_seed) # set tf env _init_inception(MODEL_DIR) inception_path = check_or_download_inception(None) create_inception_graph(inception_path) # weight init def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv2d') != -1: if args.init_type == 'normal': nn.init.normal_(m.weight.data, 0.0, 0.02) elif args.init_type == 'orth': nn.init.orthogonal_(m.weight.data) elif args.init_type == 'xavier_uniform': nn.init.xavier_uniform(m.weight.data, 1.) else: raise NotImplementedError('{} unknown inital type'.format( args.init_type)) elif classname.find('BatchNorm2d') != -1: nn.init.normal_(m.weight.data, 1.0, 0.02) nn.init.constant_(m.bias.data, 0.0) gen_net, dis_net, gen_optimizer, dis_optimizer = create_shared_gan( args, weights_init) # set grow controller grow_ctrler = GrowCtrler(args.grow_step1, args.grow_step2) # initial start_search_iter = 0 # set writer if args.load_path: print(f'=> resuming from {args.load_path}') assert os.path.exists(args.load_path) checkpoint_file = os.path.join(args.load_path, 'Model', 'checkpoint.pth') assert os.path.exists(checkpoint_file) checkpoint = torch.load(checkpoint_file, map_location={'cuda:0': 'cpu'}) # set controller && its optimizer cur_stage = checkpoint['cur_stage'] controller, ctrl_optimizer = create_ctrler(args, cur_stage, weights_init) start_search_iter = checkpoint['search_iter'] gen_net.load_state_dict(checkpoint['gen_state_dict']) dis_net.load_state_dict(checkpoint['dis_state_dict']) controller.load_state_dict(checkpoint['ctrl_state_dict']) gen_optimizer.load_state_dict(checkpoint['gen_optimizer']) dis_optimizer.load_state_dict(checkpoint['dis_optimizer']) ctrl_optimizer.load_state_dict(checkpoint['ctrl_optimizer']) prev_archs = checkpoint['prev_archs'] prev_hiddens = checkpoint['prev_hiddens'] args.path_helper = checkpoint['path_helper'] logger = create_logger(args.path_helper['log_path']) logger.info( f'=> loaded checkpoint {checkpoint_file} (search iteration {start_search_iter})' ) else: # create new log dir assert args.exp_name args.path_helper = set_log_dir('logs', args.exp_name) logger = create_logger(args.path_helper['log_path']) prev_archs = None prev_hiddens = None # set controller && its optimizer cur_stage = 0 controller, ctrl_optimizer = create_ctrler(args, cur_stage, weights_init) # set up data_loader dataset = datasets.ImageDataset(args, 2**(cur_stage + 3), args.dis_batch_size, args.num_workers) train_loader = dataset.train logger.info(args) writer_dict = { 'writer': SummaryWriter(args.path_helper['log_path']), 'controller_steps': start_search_iter * args.ctrl_step } g_loss_history = RunningStats(args.dynamic_reset_window) d_loss_history = RunningStats(args.dynamic_reset_window) # train loop for search_iter in tqdm(range(int(start_search_iter), int(args.max_search_iter)), desc='search progress'): logger.info(f"<start search iteration {search_iter}>") if search_iter == args.grow_step1 or search_iter == args.grow_step2: # save cur_stage = grow_ctrler.cur_stage(search_iter) logger.info(f'=> grow to stage {cur_stage}') prev_archs, prev_hiddens = get_topk_arch_hidden( args, controller, gen_net, prev_archs, prev_hiddens) # grow section del controller del ctrl_optimizer controller, ctrl_optimizer = create_ctrler(args, cur_stage, weights_init) dataset = datasets.ImageDataset(args, 2**(cur_stage + 3), args.dis_batch_size, args.num_workers) train_loader = dataset.train dynamic_reset = train_shared(args, gen_net, dis_net, g_loss_history, d_loss_history, controller, gen_optimizer, dis_optimizer, train_loader, prev_hiddens=prev_hiddens, prev_archs=prev_archs) train_controller(args, controller, ctrl_optimizer, gen_net, prev_hiddens, prev_archs, writer_dict) if dynamic_reset: logger.info('re-initialize share GAN') del gen_net, dis_net, gen_optimizer, dis_optimizer gen_net, dis_net, gen_optimizer, dis_optimizer = create_shared_gan( args, weights_init) save_checkpoint( { 'cur_stage': cur_stage, 'search_iter': search_iter + 1, 'gen_model': args.gen_model, 'dis_model': args.dis_model, 'controller': args.controller, 'gen_state_dict': gen_net.state_dict(), 'dis_state_dict': dis_net.state_dict(), 'ctrl_state_dict': controller.state_dict(), 'gen_optimizer': gen_optimizer.state_dict(), 'dis_optimizer': dis_optimizer.state_dict(), 'ctrl_optimizer': ctrl_optimizer.state_dict(), 'prev_archs': prev_archs, 'prev_hiddens': prev_hiddens, 'path_helper': args.path_helper }, False, args.path_helper['ckpt_path']) final_archs, _ = get_topk_arch_hidden(args, controller, gen_net, prev_archs, prev_hiddens) logger.info(f"discovered archs: {final_archs}")
def main(): args = cfg.parse_args() torch.cuda.manual_seed(args.rand_seed) if args.dataset == 'cifar': sample_x = torch.zeros((args.batch_size, 3, 32, 32)) netE = Res18_Quadratic(3, args.n_chan, 32, normalize=False, AF=nn.ELU()) elif args.dataset == 'mnist': sample_x = torch.zeros((args.batch_size, 1, 32, 32)) netE = Res12_Quadratic(1, args.n_chan, 32, normalize=False, AF=nn.ELU()) elif args.dataset == 'fmnist': sample_x = torch.zeros((args.batch_size, 1, 32, 32)) netE = Res12_Quadratic(1, args.n_chan, 32, normalize=False, AF=nn.ELU()) else: NotImplementedError('{} unknown dataset'.format(args.dataset)) #setup gpu device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") netE = netE.to(device) if args.n_gpus > 1: netE = nn.DataParallel(netE) root = 'logs/' + args.log + '_' + args.time #single sampling mode save a single file with custom number of images #all sampling mode save #set annealing schedule if args.annealing_schedule == 'exp': Nsampling = 2000 #exponential schedule with flat region in the beginning and end Tmax, Tmin = 100, 1 T = Tmax * np.exp(-np.linspace(0, Nsampling - 1, Nsampling) * (np.log(Tmax / Tmin) / Nsampling)) T = np.concatenate((Tmax * np.ones((500, )), T), axis=0) T = np.concatenate((T, Tmin * np.linspace(1, 0, 200)), axis=0) elif args.annealing_schedule == 'lin': Nsampling = 2000 #linear schedule with flat region in the beginning and end Tmax, Tmin = 100, 1 T = np.linspace(Tmax, Tmin, Nsampling) T = np.concatenate((Tmax * np.ones((500, )), T), axis=0) T = np.concatenate((T, Tmin * np.linspace(1, 0, 200)), axis=0) #sample if args.sample_mode == 'single': filename = args.file_name + str(args.net_indx) + '.pt' netE.load_state_dict(torch.load(root + '/models/' + filename)) n_batches = int(np.ceil(args.n_samples_save / args.batch_size)) denoise_samples = [] print('sampling starts') for i in range(n_batches): initial_x = 0.5 + torch.randn_like(sample_x).to(device) x_list, E_trace = Annealed_Langevin_E(netE, initial_x, args.sample_step_size, T, 100) x_denoise = SS_denoise(x_list[-1][:].to(device), netE, 0.1) denoise_samples.append(x_denoise) print('batch {}/{} finished'.format((i + 1), n_batches)) denoise_samples = torch.cat(denoise_samples, 0) torch.save( denoise_samples, root + '/samples/' + args.dataset + '_' + str(args.n_samples_save) + 'samples.pt') elif args.sample_mode == 'all': n_batches = int(np.ceil(256 / args.batch_size)) i = args.net_indx while True: filename = args.file_name + str(i) + '.pt' i += args.save_every try: netE.load_state_dict(torch.load(root + '/models/' + filename)) except: print(root + '/models/' + filename) print('file not found or reached last file') break print('generating samples for ' + filename) denoise_samples = [] for i in range(n_batches): initial_x = 0.5 + torch.randn_like(sample_x).to(device) x_list, E_trace = Annealed_Langevin_E(netE, initial_x, args.sample_step_size, T, 100) print(str(len(x_list))) x_denoise = SS_denoise(x_list[-1].to(device), netE, 0.1) denoise_samples.append(x_denoise) print('batch {}/{} finished'.format((i + 1), n_batches)) denoise_samples = torch.cat(denoise_samples, 0) save_sample_pdf( denoise_samples[0:256], (16, 16), root + '/samples/' + args.dataset + '_256samples_' + str(i) + 'knet_denoise.pdf')
nargs='?', default=argparse.SUPPRESS) parser.add_argument('align', help='alignments between target and source words', nargs='?', default=argparse.SUPPRESS) parser.add_argument( '-s', '--subcorpora', help='YAML description of subcorpora lines (space separated file list)', default=argparse.SUPPRESS, action=store_training) if __name__ == '__main__': os.putenv('LANG', 'C') os.putenv('LC_ALL', 'C') d = cfg.parse_args(parser, write='$outdir/rules.config', modeldir=True) cfgf = open(os.path.join(d.outdir, 'rules.config'), 'a') print >> cfgf, '\nrules:', d.outdir cfgf.close() dir = os.path.abspath(os.path.dirname(__file__)) finp = os.path.join(dir, 'ghkm', 'filterbadinput') names = [] triplefiles = [d.config['target'], d.config['source'], d.config['align']] steps = cfg.steps(d) hp = d.hadoop training = os.path.join(d.tmpdir, 'training') trainingtmp = os.path.join(d.tmpdir, 'training.tmp') trainingnew = trainingtmp + '.new'
def main(): args = cfg.parse_args() torch.cuda.manual_seed(args.random_seed) torch.cuda.manual_seed_all(args.random_seed) np.random.seed(args.random_seed) random.seed(args.random_seed) torch.backends.cudnn.deterministic = True # set tf env _init_inception() inception_path = check_or_download_inception(None) create_inception_graph(inception_path) # epoch number for dis_net dataset = datasets.ImageDataset(args, cur_img_size=8) train_loader = dataset.train if args.max_iter: args.max_epoch = np.ceil(args.max_iter / len(train_loader)) else: args.max_iter = args.max_epoch * len(train_loader) args.max_epoch = args.max_epoch * args.n_critic # import network gen_net = eval('models.' + args.gen_model + '.Generator')(args=args).cuda() dis_net = eval('models.' + args.dis_model + '.Discriminator')(args=args).cuda() gen_net.set_arch(args.arch, cur_stage=2) # weight init def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv2d') != -1: if args.init_type == 'normal': nn.init.normal_(m.weight.data, 0.0, 0.02) elif args.init_type == 'orth': nn.init.orthogonal_(m.weight.data) elif args.init_type == 'xavier_uniform': nn.init.xavier_uniform_(m.weight.data, 1.) else: raise NotImplementedError('{} unknown inital type'.format( args.init_type)) elif classname.find('BatchNorm2d') != -1: nn.init.normal_(m.weight.data, 1.0, 0.02) nn.init.constant_(m.bias.data, 0.0) gen_net.apply(weights_init) dis_net.apply(weights_init) gpu_ids = [i for i in range(int(torch.cuda.device_count()))] gen_net = torch.nn.DataParallel(gen_net.to("cuda:0"), device_ids=gpu_ids) dis_net = torch.nn.DataParallel(dis_net.to("cuda:0"), device_ids=gpu_ids) gen_net.module.cur_stage = 0 dis_net.module.cur_stage = 0 gen_net.module.alpha = 1. dis_net.module.alpha = 1. # set optimizer if args.optimizer == "adam": gen_optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, gen_net.parameters()), args.g_lr, (args.beta1, args.beta2)) dis_optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, dis_net.parameters()), args.d_lr, (args.beta1, args.beta2)) elif args.optimizer == "adamw": gen_optimizer = AdamW(filter(lambda p: p.requires_grad, gen_net.parameters()), args.g_lr, weight_decay=args.wd) dis_optimizer = AdamW(filter(lambda p: p.requires_grad, dis_net.parameters()), args.g_lr, weight_decay=args.wd) gen_scheduler = LinearLrDecay(gen_optimizer, args.g_lr, 0.0, 0, args.max_iter * args.n_critic) dis_scheduler = LinearLrDecay(dis_optimizer, args.d_lr, 0.0, 0, args.max_iter * args.n_critic) # fid stat if args.dataset.lower() == 'cifar10': fid_stat = 'fid_stat/fid_stats_cifar10_train.npz' elif args.dataset.lower() == 'stl10': fid_stat = 'fid_stat/stl10_train_unlabeled_fid_stats_48.npz' elif args.fid_stat is not None: fid_stat = args.fid_stat else: raise NotImplementedError(f'no fid stat for {args.dataset.lower()}') assert os.path.exists(fid_stat) # initial fixed_z = torch.cuda.FloatTensor( np.random.normal(0, 1, (64, args.latent_dim))) gen_avg_param = copy_params(gen_net) start_epoch = 0 best_fid = 1e4 # set writer if args.load_path: print(f'=> resuming from {args.load_path}') assert os.path.exists(args.load_path) checkpoint_file = os.path.join(args.load_path) assert os.path.exists(checkpoint_file) checkpoint = torch.load(checkpoint_file) start_epoch = checkpoint['epoch'] best_fid = checkpoint['best_fid'] gen_net.load_state_dict(checkpoint['gen_state_dict']) dis_net.load_state_dict(checkpoint['dis_state_dict']) gen_optimizer.load_state_dict(checkpoint['gen_optimizer']) dis_optimizer.load_state_dict(checkpoint['dis_optimizer']) # avg_gen_net = deepcopy(gen_net) # avg_gen_net.load_state_dict(checkpoint['avg_gen_state_dict']) gen_avg_param = checkpoint['gen_avg_param'] # del avg_gen_net cur_stage = cur_stages(start_epoch, args) gen_net.module.cur_stage = cur_stage dis_net.module.cur_stage = cur_stage gen_net.module.alpha = 1. dis_net.module.alpha = 1. args.path_helper = checkpoint['path_helper'] else: # create new log dir assert args.exp_name args.path_helper = set_log_dir('logs', args.exp_name) logger = create_logger(args.path_helper['log_path']) logger.info(args) writer_dict = { 'writer': SummaryWriter(args.path_helper['log_path']), 'train_global_steps': start_epoch * len(train_loader), 'valid_global_steps': start_epoch // args.val_freq, } def return_states(): states = {} states['epoch'] = epoch states['best_fid'] = best_fid_score states['gen_state_dict'] = gen_net.state_dict() states['dis_state_dict'] = dis_net.state_dict() states['gen_optimizer'] = gen_optimizer.state_dict() states['dis_optimizer'] = dis_optimizer.state_dict() states['gen_avg_param'] = gen_avg_param states['path_helper'] = args.path_helper return states # train loop for epoch in range(start_epoch + 1, args.max_epoch): train( args, gen_net, dis_net, gen_optimizer, dis_optimizer, gen_avg_param, train_loader, epoch, writer_dict, fixed_z, ) backup_param = copy_params(gen_net) load_params(gen_net, gen_avg_param) fid_score = validate( args, fixed_z, fid_stat, epoch, gen_net, writer_dict, ) logger.info(f'FID score: {fid_score} || @ epoch {epoch}.') load_params(gen_net, backup_param) is_best = False if epoch == 1 or fid_score < best_fid_score: best_fid_score = fid_score is_best = True if is_best or epoch % 1 == 0: states = return_states() save_checkpoint(states, is_best, args.path_helper['ckpt_path'], filename=f'checkpoint_epoch_{epoch}.pth')
def main(): args = cfg.parse_args() random.seed(args.random_seed) torch.manual_seed(args.random_seed) torch.cuda.manual_seed(args.random_seed) # set tf env _init_inception() inception_path = check_or_download_inception(None) create_inception_graph(inception_path) # weight init def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv2d') != -1: if args.init_type == 'normal': nn.init.normal_(m.weight.data, 0.0, 0.02) elif args.init_type == 'orth': nn.init.orthogonal_(m.weight.data) elif args.init_type == 'xavier_uniform': nn.init.xavier_uniform(m.weight.data, 1.) else: raise NotImplementedError('{} unknown inital type'.format( args.init_type)) elif classname.find('BatchNorm2d') != -1: nn.init.normal_(m.weight.data, 1.0, 0.02) nn.init.constant_(m.bias.data, 0.0) gen_net = Generator(bottom_width=args.bottom_width, gf_dim=args.gf_dim, latent_dim=args.latent_dim).cuda() dis_net = eval('models.' + args.model + '.Discriminator')(args=args).cuda() gen_net.apply(weights_init) dis_net.apply(weights_init) initial_gen_net_weight = torch.load(os.path.join(args.init_path, 'initial_gen_net.pth'), map_location="cpu") initial_dis_net_weight = torch.load(os.path.join(args.init_path, 'initial_dis_net.pth'), map_location="cpu") os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu exp_str = args.dir args.load_path = os.path.join('output', exp_str, 'pth', 'epoch{}.pth'.format(args.load_epoch)) # state dict: assert os.path.exists(args.load_path) checkpoint = torch.load(args.load_path) print('=> loaded checkpoint %s' % args.load_path) state_dict = checkpoint['generator'] gen_net = load_subnet(args, state_dict, initial_gen_net_weight).cuda() avg_gen_net = deepcopy(gen_net) # set optimizer gen_optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, gen_net.parameters()), args.g_lr, (args.beta1, args.beta2)) dis_optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, dis_net.parameters()), args.d_lr, (args.beta1, args.beta2)) gen_scheduler = LinearLrDecay(gen_optimizer, args.g_lr, 0.0, 0, args.max_iter * args.n_critic) dis_scheduler = LinearLrDecay(dis_optimizer, args.d_lr, 0.0, 0, args.max_iter * args.n_critic) # set up data_loader dataset = datasets.ImageDataset(args) train_loader = dataset.train # fid stat if args.dataset.lower() == 'cifar10': fid_stat = 'fid_stat/fid_stats_cifar10_train.npz' else: raise NotImplementedError('no fid stat for %s' % args.dataset.lower()) assert os.path.exists(fid_stat) # epoch number for dis_net args.max_epoch = args.max_epoch * args.n_critic if args.max_iter: args.max_epoch = np.ceil(args.max_iter * args.n_critic / len(train_loader)) # initial np.random.seed(args.random_seed) fixed_z = torch.cuda.FloatTensor( np.random.normal(0, 1, (25, args.latent_dim))) start_epoch = 0 best_fid = 1e4 args.path_helper = set_log_dir('logs', args.exp_name) logger = create_logger(args.path_helper['log_path']) #logger.info('=> loaded checkpoint %s (epoch %d)' % (checkpoint_file, start_epoch)) logger.info(args) writer_dict = { 'writer': SummaryWriter(args.path_helper['log_path']), 'train_global_steps': start_epoch * len(train_loader), 'valid_global_steps': start_epoch // args.val_freq, } gen_avg_param = copy_params(gen_net) # train loop for epoch in tqdm(range(int(start_epoch), int(args.max_epoch)), desc='total progress'): lr_schedulers = (gen_scheduler, dis_scheduler) if args.lr_decay else None train(args, gen_net, dis_net, gen_optimizer, dis_optimizer, gen_avg_param, train_loader, epoch, writer_dict, lr_schedulers) if epoch and epoch % args.val_freq == 0 or epoch == int( args.max_epoch) - 1: backup_param = copy_params(gen_net) load_params(gen_net, gen_avg_param) inception_score, fid_score = validate(args, fixed_z, fid_stat, gen_net, writer_dict) logger.info( 'Inception score: %.4f, FID score: %.4f || @ epoch %d.' % (inception_score, fid_score, epoch)) load_params(gen_net, backup_param) if fid_score < best_fid: best_fid = fid_score is_best = True else: is_best = False else: is_best = False avg_gen_net.load_state_dict(gen_net.state_dict()) load_params(avg_gen_net, gen_avg_param) save_checkpoint( { 'epoch': epoch + 1, 'model': args.model, 'gen_state_dict': gen_net.state_dict(), 'dis_state_dict': dis_net.state_dict(), 'avg_gen_state_dict': avg_gen_net.state_dict(), 'gen_optimizer': gen_optimizer.state_dict(), 'dis_optimizer': dis_optimizer.state_dict(), 'best_fid': best_fid, 'path_helper': args.path_helper }, is_best, args.path_helper['ckpt_path'])
if epoch and epoch % args.val_freq == 0 or epoch == args.num_epochs - 1: fid_score = validate(args, fid_stat, gen_net, writer_dict, valid_loader) logger.info(f'FID score: {fid_score} || @ epoch {epoch}.') if fid_score < best_fid: best_fid = fid_score is_best = True else: is_best = False else: is_best = False save_checkpoint( { 'epoch': epoch + 1, 'gen_state_dict': gen_net.state_dict(), 'dis_state_dict': dis_net.state_dict(), 'enc_state_dict': enc_net.state_dict(), 'gen_optimizer': gen_optimizer.state_dict(), 'dis_optimizer': dis_optimizer.state_dict(), 'ae_recon_optimizer': ae_recon_optimizer.state_dict(), 'ae_reg_optimizer': ae_reg_optimizer.state_dict(), 'best_fid': best_fid, 'path_helper': args.path_helper }, is_best, args.path_helper['ckpt_path']) if __name__ == '__main__': arg = cfg.parse_args() xmp.spawn(main, args=(arg, ), nprocs=8)
import numpy as np import torch import torch.nn as nn from scipy import stats from torch.utils import data import cfg import datasets import experiments as exp import logger import utils args = cfg.parse_args() exp_func = getattr(exp, args.experiment) # Model _model = cfg.get_model(args.model_name, args.dataset, scales=args.scales, basemodel=args.basemodel_name) model = nn.DataParallel(_model) model = model.cuda() # Optimizer optimizer = cfg.get_optimizer(model, args.optimizer, lr=args.lr) scheduler = cfg.get_scheduler(optimizer) # Criterion criterion_func = cfg.get_criterion(args.criterion, cuda=True) criterion = {'embed': criterion_func['MSE'], 'abstr': criterion_func['CE']}
def main(): args = cfg.parse_args() torch.cuda.manual_seed(args.rand_seed) #switch datasets and models if args.dataset == 'cifar': from data.cifar import inf_train_gen itr = inf_train_gen(args.batch_size, flip=False) netE = Res18_Quadratic(3, args.n_chan, 32, normalize=False, AF=nn.ELU()) #netE = SE_Res18_Quadratic(3,args.n_chan,32,normalize=False,AF=Swish()) elif args.dataset == 'mnist': from data.mnist_32 import inf_train_gen itr = inf_train_gen(args.batch_size) netE = Res12_Quadratic(1, args.n_chan, 32, normalize=False, AF=nn.ELU()) elif args.dataset == 'fmnist': #print(dataset+str(args.n_chan)) from data.fashion_mnist_32 import inf_train_gen itr = inf_train_gen(args.batch_size) netE = Res12_Quadratic(1, args.n_chan, 32, normalize=False, AF=nn.ELU()) else: NotImplementedError('{} unknown dataset'.format(args.dataset)) #setup gpu device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") netE = netE.to(device) if args.n_gpus > 1: netE = nn.DataParallel(netE) #setup path now = datetime.now() timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') #pdb.set_trace() print(str(args.cont)) #print(str(args.time)) if args.cont == True: root = 'logs/' + args.log + '_' + args.time #compose string for loading #load network file_name = 'netE_' + str(args.net_indx) + '.pt' netE.load_state_dict(torch.load(root + '/models/' + file_name)) else: # start new will create logging folder root = 'logs/' + args.log + '_' + timestamp #add timestemp #over write if folder already exist, not likely to happen as timestamp is used if os.path.isdir(root): shutil.rmtree(root) os.makedirs(root) os.makedirs(root + '/models') os.makedirs(root + '/samples') writer = SummaryWriter(root) # setup optimizer and lr scheduler params = {'lr': args.max_lr, 'betas': (0.9, 0.95)} optimizerE = torch.optim.Adam(netE.parameters(), **params) if args.lr_schedule == 'exp': scheduler = torch.optim.lr_scheduler.StepLR(optimizerE, int(args.n_iter / 6)) elif args.lr_schedule == 'cosine': scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizerE, args.n_iter, eta_min=1e-6, last_epoch=-1) elif args.lr_schedule == 'const': scheduler = torch.optim.lr_scheduler.StepLR(optimizerE, int(args.n_iter)) #train print_interval = 50 max_iter = args.n_iter + args.net_indx batchSize = args.batch_size sigma0 = 0.1 sigma02 = sigma0**2 if args.noise_distribution == 'exp': sigmas_np = np.logspace(np.log10(args.min_noise), np.log10(args.max_noise), batchSize) elif args.noise_distribution == 'lin': sigmas_np = np.linspace(args.min_noise, args.max_noise, batchSize) sigmas = torch.Tensor(sigmas_np).view((batchSize, 1, 1, 1)).to(device) start_time = time.time() for i in range(args.net_indx, args.net_indx + args.n_iter): x_real = itr.__next__().to(device) x_noisy = x_real + sigmas * torch.randn_like(x_real) x_noisy = x_noisy.requires_grad_() E = netE(x_noisy).sum() grad_x = torch.autograd.grad(E, x_noisy, create_graph=True)[0] x_noisy.detach() optimizerE.zero_grad() LS_loss = ((( (x_real - x_noisy) / sigmas / sigma02 + grad_x / sigmas)**2) / batchSize).sum() LS_loss.backward() optimizerE.step() scheduler.step() if (i + 1) % print_interval == 0: time_spent = time.time() - start_time start_time = time.time() netE.eval() E_real = netE(x_real).mean() E_noise = netE(torch.rand_like(x_real)).mean() netE.train() print( 'Iteration {}/{} ({:.0f}%), E_real {:e}, E_noise {:e}, Normalized Loss {:e}, time {:4.1f}' .format(i + 1, max_iter, 100 * ((i + 1) / max_iter), E_real.item(), E_noise.item(), (sigma02**2) * (LS_loss.item()), time_spent)) writer.add_scalar('E_real', E_real.item(), i + 1) writer.add_scalar('E_noise', E_noise.item(), i + 1) writer.add_scalar('loss', (sigma02**2) * LS_loss.item(), i + 1) del E_real, E_noise, x_real, x_noisy if (i + 1) % args.save_every == 0: print("-" * 50) file_name = args.file_name + str(i + 1) + '.pt' torch.save(netE.state_dict(), root + '/models/' + file_name)