def main(): create_log_dir(log_dir, __file__) train_env = make_vec_env(train_env_num) test_env = make_vec_env(test_env_num) agent_online = make_agent_online() agent_train = make_agent_train() agent_online.load_state_dict(agent_train.state_dict()) trainer = OnPolicyTrainer(agent_online, agent_train, train_env, **trainer_args, test_env=test_env) trainer.train(**train_args) train_env.close() test_env.close()
def main(args): # I/O config_file = args.config_file config = imp.load_source('config', config_file) if args.name: config.name = args.name trainset = Dataset(config.train_dataset_path) network = Network() network.initialize(config, trainset.num_classes) # Initalization for running log_dir = utils.create_log_dir(config, config_file) summary_writer = tf.summary.FileWriter(log_dir, network.graph) if config.restore_model: network.restore_model(config.restore_model, config.restore_scopes) proc_func = lambda images: preprocess(images, config, True) trainset.start_batch_queue(config.batch_format, proc_func=proc_func) # Main Loop print('\nStart Training\nname: {}\n# epochs: {}\nepoch_size: {}\nbatch_size: {}\n'.format( config.name, config.num_epochs, config.epoch_size, config.batch_format['size'])) global_step = 0 start_time = time.time() for epoch in range(config.num_epochs): # Training for step in range(config.epoch_size): # Prepare input learning_rate = utils.get_updated_learning_rate(global_step, config) batch = trainset.pop_batch_queue() wl, sm, global_step = network.train(batch['mu'].reshape(config.batch_format['size'], -1) , batch['conv_final'].reshape(config.batch_format['size'], -1) , batch['label'] , learning_rate , config.keep_prob) wl['lr'] = learning_rate # Display if step % config.summary_interval == 0: duration = time.time() - start_time start_time = time.time() utils.display_info(epoch, step, duration, wl) summary_writer.add_summary(sm, global_step=global_step) # Save the model network.save_model(log_dir, global_step)
def main(args): print('start main') test_1v1_target = 'cfp_fp,agedb_30' test_1v1_target = 'cfp_fp' test_lfw_openset_numTrials = 0 # I/O config_file = args.config_file config = imp.load_source('config', config_file) if args.name: config.name = args.name t1 = time.time() read_imagelist_from_file = False imagelist_file_for_train = 'data/list_to_train_ms1m-retinaface-t1-img.txt' if read_imagelist_from_file: trainset = Dataset(imagelist_file_for_train) print('time', time.time() - t1) else: trainset = Dataset(config.train_dataset_path) print('time', time.time() - t1) # trainset.write_datalist_to_file(imagelist_file_for_train) trainset.set_base_seed(config.base_random_seed) network = Network() network.initialize(config, trainset.num_classes) # Initalization for running log_dir = utils.create_log_dir(config, config_file) summary_writer = tf.summary.FileWriter(log_dir, network.graph) if config.restore_model: print(config.restore_model) network.restore_model(config.restore_model, config.restore_scopes, config.exclude_restore_scopes) test_images_lfw = None if test_lfw_openset_numTrials > 0 and args.dataset_path: lfw_paths = get_paths_all(os.path.expanduser(args.dataset_path)) test_images_lfw = preprocess(lfw_paths, config, False) ver_list = [] ver_name_list = [] for name in test_1v1_target.split(','): path = os.path.join(config.test_data_dir_mx, name + ".bin") if os.path.exists(path): image_size = [112, 112] data_list, issame_list = verification.load_bin(path, image_size) data_list = data_list[0].asnumpy() images = preprocess(data_list, network.config, False) data_set = (images, issame_list) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) proc_func = lambda images: preprocess(images, config, True) trainset.start_batch_queue(config.batch_format, proc_func=proc_func) # batch = trainset.pop_batch_queue() # Main Loop print( '\nStart Training\nname: {}\n# epochs: {}\nepoch_size: {}\nbatch_size: {}\n' .format(config.name, config.num_epochs, config.epoch_size, config.batch_format['size'])) global_step = 0 network.save_model(log_dir, global_step) start_time = time.time() for epoch in range(config.num_epochs + 1): # Save the model network.save_model(log_dir, global_step) if epoch > 0: info_w = '' if test_lfw_openset_numTrials > 0 and args.dataset_path: mu, sigma_sq = network.extract_feature(test_images_lfw, 64, verbose=True) quality_score = -np.mean(np.log(sigma_sq), axis=1) print('sigma_sq percentile [0, 10, 30, 50, 70, 90, 100]') print( 'sigma_sq ', np.percentile(quality_score.ravel(), [0, 10, 30, 50, 70, 90, 100])) feat_pfe = np.concatenate([mu, sigma_sq], axis=1) info1 = openset_lfw(mu, utils.pair_cosin_score, test_lfw_openset_numTrials) info_w += info1 + '\n' print(info1) info2 = openset_lfw(feat_pfe, utils.nvm_MLS_score, test_lfw_openset_numTrials) print(info2) info_w += info2 + '\n' info3 = openset_lfw(feat_pfe, utils.nvm_MLS_score_attention, test_lfw_openset_numTrials) print(info3) info_w += info3 + '\n' info_ver = '' for i in range(len(ver_list)): print('---', ver_name_list[i], '---') info_ver_ = verification.eval_images(ver_list[i][0], ver_list[i][1], network, 128, 10) print(info_ver_) info_ver += '---' + ver_name_list[i] + '\n' info_ver += info_ver_ + '\n' info_w += info_ver + '\n' with open(os.path.join(log_dir, 'training-log.txt'), 'a') as f: f.write(info_w) if epoch == config.num_epochs: break # Training for step in range(config.epoch_size): # Prepare input learning_rate = utils.get_updated_learning_rate( global_step, config) batch = trainset.pop_batch_queue() if len(batch['image']) > len(batch['label']): batch['label'] = np.concatenate( [batch['label'], batch['label']], axis=0) wl, global_step = network.train(batch['image'], batch['label'], learning_rate, config.keep_prob) wl['lr'] = learning_rate # Display if step % config.summary_interval == 0: duration = time.time() - start_time start_time = time.time() with open(os.path.join(log_dir, 'training-log.txt'), 'a') as f: s = utils.display_info(epoch, step, duration, wl) print(s) f.write(s + '\n')
def main(): args = get_args() log_dir = create_log_dir(args) if not args.test: writer = SummaryWriter(log_dir) else: writer = None SEED = 721 if args.ram_obs or args.env == "slimevolley_v0": obs_type = 'ram' else: obs_type = 'rgb_image' env = make_env(args.env, SEED, obs_type=obs_type) state_spaces = env.observation_spaces action_spaces = env.action_spaces print('state_spaces: ', state_spaces, ', action_spaces: ', action_spaces) learner_args = {'device': args.device} env.reset() print(env.agents) agents = env.agents if args.train_both: fixed_agents = [] else: fixed_agents = [ 'first_0' ] # SlimeVolley: opponent is the first, the second agent is the learnable one if obs_type == 'ram': model = MultiPPODiscrete(agents, state_spaces, action_spaces, 'MLP', fixed_agents, learner_args, **hyperparams).to(args.device) else: # model = PPODiscrete(state_space, action_space, 'CNN', learner_args, **hyperparams).to(device) model = MultiPPODiscrete(agents, state_spaces, action_spaces, 'CNN', fixed_agents, learner_args, **hyperparams).to(args.device) load_model(model, args) for individual_model in model.agents.values(): individual_model.policy.share_memory() individual_model.policy_old.share_memory() individual_model.value.share_memory() ShareParameters(individual_model.optimizer) path = 'model/' + args.env os.makedirs(path, exist_ok=True) if args.fictitious: path = path + '/fictitious_' processes = [] for p in range(args.num_envs): process = Process(target=parallel_rollout, args=(p, args.env, model, writer, max_eps, \ max_timesteps, selfplay_interval,\ args.render, path, args.against_baseline, \ args.selfplay, args.fictitious, SEED)) # the args contain shared and not shared process.daemon = True # all processes closed when the main stops processes.append(process) [p.start() for p in processes] [p.join() for p in processes] # finished at the same time env.close()
def main(): args = get_args() log_dir = create_log_dir(args) if not args.test: writer = SummaryWriter(log_dir) else: writer = None SEED = 721 if args.ram_obs or args.env == "slimevolley_v0": obs_type = 'ram' else: obs_type = 'rgb_image' # env = make_env(args.env, SEED, obs_type=obs_type) VectorEnv = [ DummyVectorEnv, SubprocVectorEnv ][1] # https://github.com/thu-ml/tianshou/blob/master/tianshou/env/venvs.py envs = VectorEnv([ lambda: make_env(args.env, obs_type=obs_type) for _ in range(args.num_envs) ]) envs.seed(np.random.randint(1000, size=args.num_envs).tolist()) # random seeding state_spaces = envs.observation_spaces[ 0] # same for all env instances, so just take one action_spaces = envs.action_spaces[ 0] # same for all env instances, so just take one print('state_spaces: ', state_spaces, ', action_spaces: ', action_spaces) learner_args = {'device': args.device} envs.reset() agents = envs.agents[0] # same for all env instances, so just take one print('agents: ', agents) if args.train_both: fixed_agents = [] else: fixed_agents = [ 'first_0' ] # SlimeVolley: opponent is the first, the second agent is the learnable one if obs_type == 'ram': model = ParallelMultiPPODiscrete(args.num_envs, agents, state_spaces, action_spaces, 'MLP', fixed_agents, learner_args, **hyperparams).to(args.device) else: model = ParallelMultiPPODiscrete(args.num_envs, agents, state_spaces, action_spaces, 'CNN', fixed_agents, learner_args, **hyperparams).to(args.device) load_model(model, args) path = f'model/{args.env}/' os.makedirs(path, exist_ok=True) if args.fictitious: path = path + 'fictitious_' parallel_rollout(envs, model, writer, max_eps=max_eps, max_timesteps=max_timesteps, selfplay_interval=selfplay_interval,\ render=args.render, model_path=path, against_baseline=args.against_baseline, selfplay=args.selfplay, \ fictitious=args.fictitious, test=args.test, args=args) envs.close()
# init demo buffer demo_data = BCDataSet(args.demo_file) demo_buffer = DataLoader(demo_data, args.batch_size, shuffle=True) # init trainer and train trainer = BehaviorCloningTrainer(agent, test_env, demo_buffer, args.log_dir) if args.load_checkpoint is not None: trainer.load(args.load_checkpoint) trainer.train(args.n_epoch, args.n_tests_per_epoch) test_env.close() def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--config', '-c', help='path to yaml config', type=str) args = parser.parse_args() return args if __name__ == '__main__': args_ = parse_args() with open(args_.config) as f: config = yaml.safe_load(f) # create log-dir and copy config into it create_log_dir(config['log_dir']) shutil.copyfile(args_.config, config['log_dir'] + 'config.yaml') main(config)
def main(): args = get_args() log_dir = create_log_dir(args) if not args.test: writer = SummaryWriter(log_dir) else: writer = None SEED = 721 if args.ram_obs or args.env == "slimevolley_v0": obs_type = 'ram' else: obs_type = 'rgb_image' env = make_env( args.env, SEED, obs_type=obs_type ) # TODO used for providing spaces info, can also modify SubprocVecEnv wrapper # https://stable-baselines.readthedocs.io/en/master/guide/vec_envs.html?highlight=multiprocessing envs = SubprocVecEnv([ lambda: make_env(args.env, obs_type=obs_type) for _ in range(args.num_envs) ], start_method='spawn') # envs.seed(np.random.randint(1000, size=args.num_envs).tolist()) # random seeding envs.seed(SEED) # fix seeding state_spaces = env.observation_spaces action_spaces = env.action_spaces print('state_spaces: ', state_spaces, ', action_spaces: ', action_spaces) learner_args = {'device': args.device} env.reset() agents = env.agents print('agents: ', agents) if args.train_both: fixed_agents = [] else: fixed_agents = [ 'first_0' ] # SlimeVolley: opponent is the first, the second agent is the learnable one if obs_type == 'ram': model = ParallelMultiPPODiscrete(args.num_envs, agents, state_spaces, action_spaces, 'MLP', fixed_agents, learner_args, **hyperparams).to(args.device) else: model = ParallelMultiPPODiscrete(args.num_envs, agents, state_spaces, action_spaces, 'CNN', fixed_agents, learner_args, **hyperparams).to(args.device) load_model(model, args) path = f"model/{args.env}/" os.makedirs(path, exist_ok=True) if args.fictitious: path = path + 'fictitious_' parallel_rollout(envs, model, writer, max_eps=max_eps, max_timesteps=max_timesteps, selfplay_interval=selfplay_interval,\ render=args.render, model_path=path, against_baseline=args.against_baseline, selfplay=args.selfplay, \ fictitious=args.fictitious, test=args.test) envs.close()
def main(): args = get_args() print_args(args) log_dir = create_log_dir(args) if not args.test: writer = SummaryWriter(log_dir) else: writer = None SEED = 721 if args.ram_obs or args.env == "slimevolley_v0": obs_type = 'ram' else: obs_type = 'rgb_image' env = make_env(args.env, SEED, obs_type=obs_type) state_spaces = env.observation_spaces action_spaces = env.action_spaces print('state_spaces: ', state_spaces, ', action_spaces: ', action_spaces) learner_args = {'device': args.device} env.reset() print(env.agents) agents = env.agents if args.train_both: fixed_agents = [] else: fixed_agents = [ 'first_0' ] # SlimeVolley: opponent is the first, the second agent is the learnable one path = f"model/{args.env}/" os.makedirs(path, exist_ok=True) data_path = f"data/{args.env}/" os.makedirs(data_path, exist_ok=True) if obs_type == 'ram': model = MultiPPODiscrete(agents, state_spaces, action_spaces, 'MLP', fixed_agents, learner_args, **hyperparams).to(args.device) else: # model = PPODiscrete(state_space, action_space, 'CNN', learner_args, **hyperparams).to(device) model = MultiPPODiscrete(agents, state_spaces, action_spaces, 'CNN', fixed_agents, learner_args, **hyperparams).to(args.device) path = path + 'cnn_' if args.selfplay: os.makedirs(path + 'selfplay/', exist_ok=True) load_model(model, args) if args.fictitious: path = path + 'fictitious_' eval_env = make_env(args.env, np.random.randint(0, 100), obs_type=obs_type) evaluater = Evaluater(eval_env, max_timesteps) parallel_rollout(env, model, writer, evaluater=evaluater, max_eps=max_eps, max_timesteps=max_timesteps, selfplay_interval=selfplay_interval,\ render=args.render, model_path=path, against_baseline=args.against_baseline, selfplay=args.selfplay, \ fictitious=args.fictitious, test=args.test) env.close()
def main(args): config_file = args.config_file # I/O config = utils.import_file(config_file, "config") trainset = Dataset(config.train_dataset_path, config.mode) testset = Dataset(config.test_dataset_path, config.mode) network = AdvFaces() network.initialize(config, trainset.num_classes) # Initalization for running log_dir = utils.create_log_dir(config, config_file) summary_writer = tf.summary.FileWriter(log_dir, network.graph) if config.restore_model: network.restore_model(config.restore_model, config.restore_scopes) proc_func = lambda images: preprocess(images, config, True) trainset.start_batch_queue(config.batch_size, batch_format=config.batch_format, proc_func=proc_func) # # Main Loop # print("\nStart Training\n# epochs: %d\nepoch_size: %d\nbatch_size: %d\n" % (config.num_epochs, config.epoch_size, config.batch_size)) global_step = 0 start_time = time.time() for epoch in range(config.num_epochs): if epoch == 0: print("Loading Test Set") originals = preprocess(testset.images, config, is_training=False) targets = preprocess(testset.targets, config, False) print('Done loading test set') test_images = np.squeeze( originals[np.where(testset.labels < 5)[0]]) target_feats = network.aux_matcher_extract_feature(targets) output_dir = os.path.join(log_dir, "samples") if not os.path.isdir(output_dir): os.makedirs(output_dir) utils.save_manifold(test_images, os.path.join(output_dir, "original.jpg")) print("Computing initial success rates..") success_rate(network, config, originals, targets, target_feats, log_dir, global_step) print("testing.") test( network, config, test_images, targets, log_dir, global_step, ) # Training for step in range(config.epoch_size): # Prepare input learning_rate = utils.get_updated_learning_rate( global_step, config) batch = trainset.pop_batch_queue() wl, sm, global_step = network.train( batch["images"], batch["targets"], batch["labels"], learning_rate, config.keep_prob, trainset.num_classes, ) wl["lr"] = learning_rate # Display if step % config.summary_interval == 0: duration = time.time() - start_time start_time = time.time() utils.display_info(epoch, step, duration, wl) summary_writer.add_summary(sm, global_step=global_step) # Computing success rate success_rate(network, config, originals, targets, target_feats, log_dir, global_step) # Testing test( network, config, test_images, targets, log_dir, global_step, ) # Save the model network.save_model(log_dir, global_step)