def bc_from_dataset_and_params(dataset, bc_params, model_save_dir, num_epochs, lr, adam_eps): # Setup env gym_env = init_gym_env(bc_params) # Train and save model create_dir_if_not_exists(BC_SAVE_DIR + model_save_dir) model = GAIL("MlpPolicy", gym_env, dataset, verbose=1) model.pretrain(dataset, n_epochs=num_epochs, learning_rate=lr, adam_epsilon=adam_eps, save_dir=BC_SAVE_DIR + model_save_dir) save_bc_model(model_save_dir, model, bc_params) return model
os.chdir('/Users/cusgadmin/Documents/UCB/Academics/SSastry/Multi_agent_competition/') if args.train: now = datetime.datetime.now() print(colored('Loading expert data from {}!'.format(args.exp_file),'red')) exp_data = np.load(args.exp_file) print(colored('Expert evader has won {} games!'\ .format(len(exp_data['episode_returns'])),'red')) dataset = ExpertDataset(expert_path=args.exp_file, verbose=1) start_time = time.time() model = GAIL('MlpPolicy', 'gym_pursuitevasion_small:pursuitevasion_small-v0', dataset, verbose=1) print(colored('Training a behaviour cloning agent for {} iterations!'.format(int(args.total_iters)),'red')) model.pretrain(dataset=dataset,n_epochs=int(args.total_iters)) model.save('games{}_iters{}_{}_bc_pursuitevasion_small'.format(len(exp_data['episode_returns']),\ int(args.total_iters),str(now.strftime('%Y%m%d')))) end_time = time.time() print(colored('Training time: {:.2f}s = {:.2f}min = {:.4f}hrs'.format(end_time-start_time,\ (end_time-start_time)/60,(end_time-start_time)/3600),'red')) print(colored('Trained BC policy','red')) else: #test print(colored('Trained on expert data from {}!'.format(args.exp_file),'red')) # exp_data = np.load(args.exp_file)s print(colored('Testing learnt policy from model file {} for {} games!'.\ format(args.model,int(args.num_test)),'red')) start_time = time.time() model = GAIL.load(args.model) env = gym.make('gym_pursuitevasion_small:pursuitevasion_small-v0')