def state_to_tensor(self, state): key = str(state) tensor = self.tensor_cache.get(key) if tensor is None: tensor = SIMULATOR.state_to_tensor(state).to(Device.get_device()) self.tensor_cache[key] = tensor return tensor
def run_exper(model, steps, get_features, pre_proc_features): r_tup, e_tup = [], [] rover_poss = [] total_stats = {'total': 0, 'good': 0} from environment import SIMULATOR # initializing our environment my_sim = SIMULATOR() # beginning of an episode state_temp = my_sim.reset() observation = my_sim.state_to_tensor(state_temp) state_obs = observation total_moves = 0 # main loop prev_input = None for i in range(steps): # preprocess the observation, set input as difference between images cur_input = observation x = cur_input.astype( np.float).ravel() if prev_input is not None else np.zeros(70) x = x[10:80] if prev_input is not None else x x = np.array([x[i] for i in range(len(x)) if not (i % 10 == 0)]) x = np.array([x[i] for i in range(len(x)) if not ((i - 8) % 9 == 0)]) prev_input = cur_input x, rover_pos = get_rover_pos(x, r_tup, e_tup, rover_poss) rover_poss.append(rover_pos) x = np.array(x) """ x = x[x != 0] if(len(x) == 1): x = np.zeros(4) x = x.tolist() x.append(-7.) x = np.array(x) """ x_t = pre_proc_features.fit_transform(x.reshape(-1, 1)) x_t = x_t.reshape(1, INPUT_SIZE)[0] # forward the policy network and sample action according to the proba distribution proba = model.predict(np.expand_dims(x_t, axis=1).T) action = proba.argmax() #run one step state_temp, reward, done, r_tup, e_tup = my_sim.step(action) observation = my_sim.state_to_tensor(state_temp) #my_sim.render() total_moves += 1 if (total_moves == MAX_STEPS): done = True total_moves = 0 # if episode is over, reset to beginning if done: total_stats['total'] += 1 so = np.asarray(state_obs).ravel().tolist() o = np.asarray(observation).ravel().tolist() #print("state obs ===============") #print(state_obs) #print("obs ===============") #print(observation) try: index_obs = so.index(7.0) except ValueError: index_obs = -1 try: index_curr = o.index(7.0) except ValueError: index_curr = -1 if (index_obs != -1 and index_curr == -1): #print("Good Game") #print(so) #print(o) total_stats['good'] += 1 state_temp = my_sim.reset() observation = my_sim.state_to_tensor(state_temp) state_obs = observation rover_poss = [] #my_sim.render() return total_stats
def run_exper(model, steps, get_features, pre_proc_features): from environment import SIMULATOR # initializing our environment my_sim = SIMULATOR() # beginning of an episode state_temp = my_sim.reset() observation = my_sim.state_to_tensor(state_temp) r_tup, e_tup, rover_poss = [], [], [] # main loop prev_input = None total_moves = 0 MAX_MOVES = 25 for i in range(steps): total_moves += 1 start = time.perf_counter() cur_input = observation x = cur_input.astype( np.float).ravel() if prev_input is not None else np.zeros(70) x = x[10:80] if prev_input is not None else x x = np.array([x[i] for i in range(len(x)) if not (i % 10 == 0)]) x = np.array([x[i] for i in range(len(x)) if not ((i - 8) % 9 == 0)]) x, rov_pos = get_rover_pos(x, r_tup, e_tup, rover_poss) x = np.array(x) rover_poss.append(rov_pos) """ x = x[x != 0] if(len(x) == 1): x = np.zeros(4) x = x.tolist() x.append(-7.) x = np.array(x) """ #print_map(x) x_t = pre_proc_features.fit_transform(x.reshape(-1, 1)) x_t = x_t.reshape(1, INPUT_SIZE)[0] print("Shape = ", x_t.shape) prev_input = cur_input # forward the policy network and sample action according to the proba distribution #print_map(x) proba = model.predict(np.expand_dims(x_t, axis=1).T) end = time.perf_counter() action = proba[0].argmax() print("Time taken = ", end - start) #run one step state_temp, reward, done, r_tup, e_tup = my_sim.step(action) observation = my_sim.state_to_tensor(state_temp) my_sim.render() time.sleep(1) if total_moves == MAX_MOVES: total_moves = 0 done = True # if episode is over, reset to beginning if done: state_temp = my_sim.reset() observation = my_sim.state_to_tensor(state_temp) my_sim.render() rover_poss = []
default='sparse', type=str, help='Choose between encoded or sparse') args = parser.parse_args() data_type = args.data_type model = get_model(data_type) import numpy as np import gym # gym initialization from environment import SIMULATOR my_sim = SIMULATOR() state_temp = my_sim.reset() observation = my_sim.state_to_tensor(state_temp) prev_input = None # Hyperparameters to calculate discount rewards gamma = 0.99 # initialization of variables used in the main loop x_train, y_train, y_pred, rewards, r_tup, e_tup, rover_poss = [], [], [], [], [], [], [] reward_sum = 0 episode_nb = 0 resume = True running_reward = None EPOCHS_BEFORE_SAVING = 50 moves_count = 0 MAX_NEG_REWARD = -100 get_features, pre_proc_features = get_pre_proc_info(data_type)