def main(): # parse arguments parser: ArgumentParser = ArgumentParser() parser.add_argument('--model_dir', type=str, required=True, help="Directory of nnet model") parser.add_argument('--data_dir', type=str, required=True, help="Directory of data") parser.add_argument('--env', type=str, required=True, help="Environment: cube3, 15-puzzle, 24-puzzle") parser.add_argument('--max_steps', type=int, default=None, help="Maximum number ofsteps to take when solving " "with GBFS. If none is given, then this " "is set to the maximum number of " "backwards steps taken to create the " "data") args = parser.parse_args() # environment env: Environment = env_utils.get_environment(args.env) # get device and nnet on_gpu: bool device: torch.device device, devices, on_gpu = nnet_utils.get_device() print("device: %s, devices: %s, on_gpu: %s" % (device, devices, on_gpu)) heuristic_fn = nnet_utils.load_heuristic_fn(args.model_dir, device, on_gpu, env.get_nnet_model(), env, clip_zero=False) gbfs_test(args.data_dir, env, heuristic_fn, max_solve_steps=args.max_steps)
def main(): # get environment env: Environment = env_utils.get_environment("puzzle8") # get nnet model nnet: nn.Module = get_nnet_model() device = torch.device('cpu') batch_size: int = 100 num_itrs: int = 10000 # get data print("Preparing Data\n") data = pickle.load(open("data/data.pkl", "rb")) states_nnet, outputs = sample_training_data(data['states'], data['output'], env, batch_size*num_itrs) # train with supervised learning print("Training DNN\n") nnet.train() train_nnet(nnet, states_nnet, outputs, batch_size, num_itrs, 0) # get performance print("Evaluating DNN\n") nnet.eval() for cost_to_go in np.unique(data["output"]): idxs_targ: np.array = np.where(data["output"] == cost_to_go)[0] states_targ: List[State] = [data["states"][idx] for idx in idxs_targ] states_targ_nnet: np.ndarray = env.state_to_nnet_input(states_targ) out_nnet = nnet(states_nnet_to_pytorch_input(states_targ_nnet, device)).cpu().data.numpy() mse = float(np.mean((out_nnet - cost_to_go) ** 2)) print("Cost-To-Go: %i, Ave DNN Output: %f, MSE: %f" % (cost_to_go, float(np.mean(out_nnet)), mse))
def main(): torch.set_num_threads(4) #Set up TesnorBoard writer #writer = SummaryWriter() # get environment env: Environment = env_utils.get_environment("puzzle8") # get nnet model nnet: nn.Module = get_nnet_model() device = torch.device('cpu') batch_size: int = 100 num_itrs_per_vi_update: int = 200 #num_vi_updates: int = 1 num_vi_updates: int = 50 # get data print("Preparing Data\n") data = pickle.load(open("data/data.pkl", "rb")) ''' # train with supervised learning print("Training DNN\n") train_itr: int = 0 for vi_update in range(num_vi_updates): #for vi_update in range(1): print("--- Value Iteration Update: %i ---" % vi_update) states: List[State] = env.generate_states(batch_size*num_itrs_per_vi_update, (0, 500)) #states: List[State] = env.generate_states(200, (0, 500)) states_nnet: np.ndarray = env.state_to_nnet_input(states) inputs_tensor = torch.from_numpy(states_nnet).float() #writer.add_graph(nnet, inputs_tensor) outputs_np = value_iteration(nnet, device, env, states) outputs = np.expand_dims(np.array(outputs_np), 1) nnet.train() train_nnet(nnet, states_nnet, outputs, batch_size, num_itrs_per_vi_update, train_itr) nnet.eval() evaluate_cost_to_go(nnet, device, env, data["states"], data["output"]) #pdb.set_trace() train_itr = train_itr + num_itrs_per_vi_update #writer.close() #pdb.set_trace() FILE = "model.pth" torch.save(nnet.state_dict(), FILE) ''' FILE = "model.pth" nnet.load_state_dict(torch.load(FILE)) generate_plot(nnet, device, env, data["states"], data["output"])
def main(): # parse arguments parser: ArgumentParser = ArgumentParser() parser.add_argument('--states', type=str, required=True, help="File containing states to solve") parser.add_argument('--model_dir', type=str, required=True, help="Directory of nnet model") parser.add_argument('--env', type=str, required=True, help="Environment: cube3, 15-puzzle, 24-puzzle") parser.add_argument('--batch_size', type=int, default=1, help="Batch size for BWAS") parser.add_argument('--weight', type=float, default=1.0, help="Weight of path cost") parser.add_argument('--language', type=str, default="python", help="python or cpp") parser.add_argument('--results_dir', type=str, required=True, help="Directory to save results") parser.add_argument('--start_idx', type=int, default=0, help="") parser.add_argument('--nnet_batch_size', type=int, default=None, help="Set to control how many states per GPU are " "evaluated by the neural network at a time. " "Does not affect final results, " "but will help if nnet is running out of " "memory.") parser.add_argument('--verbose', action='store_true', default=False, help="Set for verbose") parser.add_argument('--debug', action='store_true', default=False, help="Set when debugging") args = parser.parse_args() if not os.path.exists(args.results_dir): os.makedirs(args.results_dir) results_file: str = "%s/results.pkl" % args.results_dir output_file: str = "%s/output.txt" % args.results_dir if not args.debug: sys.stdout = data_utils.Logger(output_file, "w") # get data input_data = pickle.load(open(args.states, "rb")) states: List[State] = input_data['states'][args.start_idx:] print(len(states)) # environment env: Environment = env_utils.get_environment(args.env) # initialize results results: Dict[str, Any] = dict() results["states"] = states if args.language == "python": solns, paths, times, num_nodes_gen = bwas_python(args, env, states) elif args.language == "cpp": solns, paths, times, num_nodes_gen = bwas_cpp(args, env, states, results_file) else: raise ValueError("Unknown language %s" % args.language) results["solutions"] = solns results["paths"] = paths results["times"] = times results["num_nodes_generated"] = num_nodes_gen pickle.dump(results, open(results_file, "wb"), protocol=-1)
def main(): torch.set_num_threads(1) # get environment env: Environment = env_utils.get_environment("puzzle8") # get nnet model nnet: nn.Module = get_nnet_model() # get optimizer and lr scheduler optimizer = torch.optim.Adam(nnet.parameters(), lr=1e-3) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.996) criterion = nn.MSELoss() device = torch.device('cpu') batch_size: int = 100 num_itrs: int = 10000 with open("sample_outputs/exercise_1_akash.txt", 'w') as f: # get data f.write("Preparing Data\n") data = pickle.load(open("data/data.pkl", "rb")) states_nnet, outputs = sample_training_data(data['states'], data['output'], env, batch_size * num_itrs) # train with supervised learning f.write("Training DNN\n") nnet.train() train_nnet(nnet, states_nnet, outputs, batch_size, num_itrs, 0, criterion, optimizer, scheduler, f) # get performance f.write("Evaluating DNN\n") nnet.eval() for cost_to_go in np.unique(data["output"]): idxs_targ: np.array = np.where(data["output"] == cost_to_go)[0] states_targ: List[State] = [ data["states"][idx] for idx in idxs_targ ] states_targ_nnet: np.ndarray = env.state_to_nnet_input(states_targ) out_nnet = nnet( states_nnet_to_pytorch_input( states_targ_nnet, device).float()).cpu().data.numpy() mse = float(np.mean((out_nnet - cost_to_go)**2)) f.write("Cost-To-Go: %i, Ave DNN Output: %f, MSE: %f \n" % (cost_to_go, float(np.mean(out_nnet)), mse))
def main(): torch.set_num_threads(1) # get environment env: Environment = env_utils.get_environment("puzzle8") # get nnet model nnet: nn.Module = get_nnet_model() # get optimizer and lr scheduler optimizer = torch.optim.Adam(nnet.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=100, gamma=0.996) criterion = nn.MSELoss() device = torch.device('cpu') batch_size: int = 100 num_itrs_per_vi_update: int = 200 num_vi_updates: int = 50 with open("sample_outputs/exercise_2_akash.txt", 'w') as f: # get data f.write("Preparing Data\n") data = pickle.load(open("data/data.pkl", "rb")) # train with supervised learning f.write("Training DNN\n") train_itr: int = 0 for vi_update in range(num_vi_updates): f.write("--- Value Iteration Update: %i --- \n" % vi_update) states: List[State] = env.generate_states( batch_size*num_itrs_per_vi_update, (0, 500)) states_nnet: np.ndarray = env.state_to_nnet_input(states) outputs_np = value_iteration(nnet, device, env, states) outputs = np.expand_dims(np.array(outputs_np), 1) nnet.train() train_nnet(nnet, states_nnet, outputs, batch_size, num_itrs_per_vi_update, train_itr, criterion, optimizer, scheduler, f) nnet.eval() evaluate_cost_to_go(nnet, device, env, data["states"], data["output"], f) train_itr = train_itr + num_itrs_per_vi_update
def main(): parser: ArgumentParser = ArgumentParser() parser.add_argument('--env', type=str, default="puzzle8", help="") parser.add_argument('--weight_g', type=float, default=1.0, help="") parser.add_argument('--weight_h', type=float, default=1.0, help="") args = parser.parse_args() # get environment env, viz, states = env_utils.get_environment(args.env) torch.set_num_threads(1) # get heuristic fn value_net = load_nnet("nnets/value_net.pt") value_net.eval() def heuristic_fn_help(states_inp): return heuristic_fn(value_net, states_inp, env) # load data states, ctgs_shortest = load_data() num_exs: int = len(states) costs: List[float] = [] start_time = time.time() for state_idx, state in enumerate(states): start_time_i = time.time() soln = astar(state, env, heuristic_fn_help, args.weight_g, args.weight_h) assert is_valid_soln(state, soln, env), "solution must be valid" cost = get_soln_cost(state, soln, env) costs.append(cost) print("%i/%i - cost: %i, shortest path cost: %i, " "time: %.2f" % (state_idx + 1, num_exs, cost, ctgs_shortest[state_idx], time.time() - start_time_i)) print("Avg cost: %.2f, Avg shortest path cost: %.2f " "Total time: %s" % (float(np.mean(costs)), float( np.mean(ctgs_shortest)), time.time() - start_time))
def main(): torch.set_num_threads(1) # get environment env: Environment = env_utils.get_environment("puzzle8") # get nnet model nnet: nn.Module = get_nnet_model() device = torch.device('cpu') batch_size: int = 100 num_itrs_per_vi_update: int = 200 num_vi_updates: int = 50 # get data print("Preparing Data\n") data = pickle.load(open("data/data.pkl", "rb")) # train with supervised learning print("Training DNN\n") train_itr: int = 0 for vi_update in range(num_vi_updates): print("--- Value Iteration Update: %i ---" % vi_update) states: List[State] = env.generate_states( batch_size * num_itrs_per_vi_update, (0, 500)) states_nnet: np.ndarray = env.state_to_nnet_input(states) outputs_np = value_iteration(nnet, device, env, states) outputs = np.expand_dims(np.array(outputs_np), 1) nnet.train() train_nnet(nnet, states_nnet, outputs, batch_size, num_itrs_per_vi_update, train_itr) nnet.eval() evaluate_cost_to_go(nnet, device, env, data["states"], data["output"]) train_itr = train_itr + num_itrs_per_vi_update
def main(): parser: ArgumentParser = ArgumentParser() parser.add_argument('--env', type=str, required=True, help="") parser.add_argument('--algorithm', type=str, required=True, help="policy_iteration, value_iteration, " "q_learning") parser.add_argument('--epsilon', type=float, default=0.1, help="epsilon-greedy policy") parser.add_argument('--learning_rate', type=float, default=0.5, help="learning rate") parser.add_argument('--discount', type=float, default=1.0, help="Discount") args = parser.parse_args() # get environment env, viz, states = env_utils.get_environment(args.env) if args.algorithm == "policy_iteration": run_policy_iteration(states, env, args.discount, viz) elif args.algorithm == "value_iteration": run_value_iteration(states, env, args.discount, viz) elif args.algorithm == "q_learning": run_q_learning(states, env, args.discount, args.epsilon, args.learning_rate, viz) else: raise ValueError("Unknown algorithm %s" % args.algorithm) print("DONE") if viz is not None: viz.mainloop()
def main(): parser: ArgumentParser = ArgumentParser() parser.add_argument('--env', type=str, required=True, help="") parser.add_argument('--algorithm', type=str, required=True, help="supervised, value_iteration, q_learning") parser.add_argument('--epsilon', type=float, default=0.5, help="epsilon-greedy policy") parser.add_argument('--discount', type=float, default=1.0, help="Discount") parser.add_argument('--grade', default=False, action='store_true', help="") args = parser.parse_args() # get environment env, viz, states = env_utils.get_environment(args.env) torch.set_num_threads(1) if args.algorithm == "supervised": start_time = time.time() data_file: str = "data/puzzle8.pkl" data_dict = pickle.load(open(data_file, "rb")) states_nnet = env.states_to_nnet_input(data_dict['states']) values_gt = -data_dict['output'] value_net: nn.Module = get_value_net() supervised(value_net, states_nnet, values_gt, 100, 10000) # test value_net.eval() out_nnet = value_net(torch.tensor(states_nnet)).cpu().data.numpy() mse_total: float = float(np.mean((out_nnet - values_gt) ** 2)) print("Final MSE: %f" % mse_total) print("Time: %f (secs)" % (time.time() - start_time)) elif args.algorithm == "value_iteration": start_time = time.time() value_net: nn.Module = get_value_net() deep_vi(value_net, env, 20000, 50, 200, 100) # test num_test: int = 1000 states_test: List[State] = env.sample_start_states(num_test) num_solved: int = 0 for state in states_test: state_end = follow_greedy_policy(value_net, env, state, 30) if env.is_terminal(state_end): num_solved += 1 print("Solved: %i/%i" % (num_solved, num_test)) print("Time: %f (secs)" % (time.time() - start_time)) elif args.algorithm == "q_learning": start_time = time.time() dqn: nn.Module = deep_q_learning(env, args.epsilon, args.discount, 1000, 30, 100, 10000, 20, viz) """ # test data_file: str = "data/action_vals_aifarm_0.pkl" action_vals: Dict[State, List[float]] = pickle.load(open(data_file, "rb")) get_action_val_diff(dqn, env, states, action_vals) """ print("Time: %f (secs)" % (time.time() - start_time)) else: raise ValueError("Unknown algorithm %s" % args.algorithm) print("DONE") if viz is not None: viz.mainloop()
def main(): # arguments parser: ArgumentParser = ArgumentParser() args_dict: Dict[str, Any] = parse_arguments(parser) if not args_dict["debug"]: sys.stdout = data_utils.Logger(args_dict["output_save_loc"], "a") # environment env: Environment = env_utils.get_environment(args_dict['env']) # get device on_gpu: bool device: torch.device device, devices, on_gpu = nnet_utils.get_device() print("device: %s, devices: %s, on_gpu: %s" % (device, devices, on_gpu)) # load nnet nnet: nn.Module itr: int update_num: int nnet, itr, update_num = load_nnet(args_dict['curr_dir'], env) nnet.to(device) if on_gpu and (not args_dict['single_gpu_training']): nnet = nn.DataParallel(nnet) # training while itr < args_dict['max_itrs']: # update targ_file: str = "%s/model_state_dict.pt" % args_dict['targ_dir'] all_zeros: bool = not os.path.isfile(targ_file) heur_fn_i_q, heur_fn_o_qs, heur_procs = nnet_utils.start_heur_fn_runners(args_dict['num_update_procs'], args_dict['targ_dir'], device, on_gpu, env, all_zeros=all_zeros, clip_zero=True, batch_size=args_dict[ "update_nnet_batch_size"]) states_nnet: List[np.ndarray] outputs: np.ndarray states_nnet, outputs = do_update(args_dict["back_max"], update_num, env, args_dict['max_update_steps'], args_dict['update_method'], args_dict['states_per_update'], args_dict['eps_max'], heur_fn_i_q, heur_fn_o_qs) nnet_utils.stop_heuristic_fn_runners(heur_procs, heur_fn_i_q) # train nnet num_train_itrs: int = args_dict['epochs_per_update'] * np.ceil(outputs.shape[0] / args_dict['batch_size']) print("Training model for update number %i for %i iterations" % (update_num, num_train_itrs)) last_loss = nnet_utils.train_nnet(nnet, states_nnet, outputs, device, args_dict['batch_size'], num_train_itrs, itr, args_dict['lr'], args_dict['lr_d']) itr += num_train_itrs # save nnet torch.save(nnet.state_dict(), "%s/model_state_dict.pt" % args_dict['curr_dir']) pickle.dump(itr, open("%s/train_itr.pkl" % args_dict['curr_dir'], "wb"), protocol=-1) pickle.dump(update_num, open("%s/update_num.pkl" % args_dict['curr_dir'], "wb"), protocol=-1) # test start_time = time.time() heuristic_fn = nnet_utils.get_heuristic_fn(nnet, device, env, batch_size=args_dict['update_nnet_batch_size']) max_solve_steps: int = min(update_num + 1, args_dict['back_max']) gbfs_test(args_dict['num_test'], args_dict['back_max'], env, heuristic_fn, max_solve_steps=max_solve_steps) print("Test time: %.2f" % (time.time() - start_time)) # clear cuda memory torch.cuda.empty_cache() print("Last loss was %f" % last_loss) if last_loss < args_dict['loss_thresh']: # Update nnet print("Updating target network") copy_files(args_dict['curr_dir'], args_dict['targ_dir']) update_num = update_num + 1 pickle.dump(update_num, open("%s/update_num.pkl" % args_dict['curr_dir'], "wb"), protocol=-1) print("Done")
def main(): parser: ArgumentParser = ArgumentParser() parser.add_argument('--env', type=str, required=True, help="Environment") parser.add_argument('--back_max', type=int, required=True, help="Maximum number of steps to take backwards from " "goal") parser.add_argument('--data_dir', type=str, required=True, help="Directory to save files") parser.add_argument('--num_per_file', type=int, default=int(1e6), help="Number of states per file") parser.add_argument('--num_files', type=int, default=100, help="Number of files") parser.add_argument('--num_procs', type=int, default=1, help="Number of processors to use when generating " "data") parser.add_argument('--start_idx', type=int, default=0, help="Start index for file name") args = parser.parse_args() env: Environment = env_utils.get_environment(args.env) assert args.num_per_file >= args.back_max, "Number of states per file should be greater than the or equal to the " \ "number of backwards steps" if not os.path.exists(args.data_dir): os.makedirs(args.data_dir) # make filepath queues filepath_queue = Queue() filepaths = [ "%s/data_%i.pkl" % (args.data_dir, train_idx + args.start_idx) for train_idx in range(args.num_files) ] for filepath in filepaths: filepath_queue.put(filepath) # generate_and_save_states(env, args.num_per_file, args.back_max, filepath_queue) # start data runners data_procs = [] for _ in range(args.num_procs): data_proc = Process(target=generate_and_save_states, args=(env, args.num_per_file, args.back_max, filepath_queue)) data_proc.daemon = True data_proc.start() data_procs.append(data_proc) # stop data runners for _ in range(len(data_procs)): filepath_queue.put(None) for data_proc in data_procs: data_proc.join()
def main(): # parse arguments parser: ArgumentParser = ArgumentParser() parser.add_argument('--env', type=str, required=True, help="") parser.add_argument('--num_states', type=int, default=100, help="") parser.add_argument('--back_max', type=int, default=30, help="") args = parser.parse_args() # get environment env: Environment = env_utils.get_environment(args.env) # generate goal states start_time = time.time() states: List[State] = env.generate_goal_states(args.num_states) elapsed_time = time.time() - start_time states_per_sec = len(states) / elapsed_time print("Generated %i goal states in %s seconds (%.2f/second)" % (len(states), elapsed_time, states_per_sec)) # get data start_time = time.time() states: List[State] states, _ = env.generate_states(args.num_states, (0, args.back_max)) elapsed_time = time.time() - start_time states_per_sec = len(states) / elapsed_time print("Generated %i states in %s seconds (%.2f/second)" % (len(states), elapsed_time, states_per_sec)) # expand start_time = time.time() env.expand(states) elapsed_time = time.time() - start_time states_per_sec = len(states) / elapsed_time print("Expanded %i states in %s seconds (%.2f/second)" % (len(states), elapsed_time, states_per_sec)) # nnet format start_time = time.time() states_nnet = env.state_to_nnet_input(states) elapsed_time = time.time() - start_time states_per_sec = len(states) / elapsed_time print("Converted %i states to nnet format in " "%s seconds (%.2f/second)" % (len(states), elapsed_time, states_per_sec)) # get heuristic fn on_gpu: bool device: torch.device device, devices, on_gpu = nnet_utils.get_device() print("device: %s, devices: %s, on_gpu: %s" % (device, devices, on_gpu)) nnet: nn.Module = env.get_nnet_model() nnet.to(device) if on_gpu: nnet = nn.DataParallel(nnet) # nnet initialize print("") heuristic_fn = nnet_utils.get_heuristic_fn(nnet, device, env) heuristic_fn(states) # compute start_time = time.time() heuristic_fn(states) nnet_time = time.time() - start_time states_per_sec = len(states) / nnet_time print("Computed heuristic for %i states in %s seconds (%.2f/second)" % (len(states), nnet_time, states_per_sec)) # multiprocessing print("") start_time = time.time() ctx = get_context("spawn") queue1: ctx.Queue = ctx.Queue() queue2: ctx.Queue = ctx.Queue() proc = ctx.Process(target=data_runner, args=(queue1, queue2)) proc.daemon = True proc.start() print("Process start time: %.2f" % (time.time() - start_time)) queue1.put(states_nnet) queue2.get() start_time = time.time() queue1.put(states_nnet) print("State nnet send time: %s" % (time.time() - start_time)) start_time = time.time() queue2.get() print("States nnet receive time: %.2f" % (time.time() - start_time)) start_time = time.time() proc.join() print("Process join time: %.2f" % (time.time() - start_time))