def node_delete(project, label): e = edges(project) n = nodes(project) edata = load(e) ndata = load(n) node_del_record(ndata[0], ndata[1], edata[0], edata[1], label) save(n, ndata)
def edge_update(project, record): e = edges(project) n = nodes(project) edata = load(e) ndata = load(n) edge_upd_record(ndata[0], ndata[1], edata[0], edata[1], record) save(e, ndata)
def run(episodes=2500, #2500, render=False, experiment='ERA5-v0', max_actions=1000, knn=0.1, save_dir=None): env = gym.make(experiment) print(env.observation_space) print(env.action_space) steps = env.spec.timestep_limit # pulls from the init file where it is registered # agent = DDPGAgent(env) agent = WolpertingerAgent(env, max_actions=max_actions, k_ratio=knn) timer = Timer() data = util.data.Data() data.set_agent(agent.get_name(), int(agent.action_space.get_number_of_actions()), agent.k_nearest_neighbors, 3) data.set_experiment(experiment, agent.low.tolist(), agent.high.tolist(), episodes) agent.add_data_fetch(data) print(data.get_file_name()) full_epoch_timer = Timer() reward_sum = 0 # EREZ ADD num_avg = 40 recent_rew_list = [] for ep in range(episodes): timer.reset() observation = env.reset() total_reward = 0 print('Episode ', ep, '/', episodes - 1, 'started...', end='') for t in range(steps): if render: env.render() action = agent.act(observation) data.set_action(action.tolist()) data.set_state(observation.tolist()) prev_observation = observation observation, reward, done, info = env.step(action[0] if len(action) == 1 else action) data.set_reward(reward) episode = {'obs': prev_observation, 'action': action, 'reward': reward, 'obs2': observation, 'done': done, 't': t} agent.observe(episode) total_reward += reward if done or (t == steps - 1): t += 1 reward_sum += total_reward time_passed = timer.get_time() # NOTE: shouldnt we be reporting average over recent? # Added better print formating #print('\tReward:{:04.4f} \tSteps:{} \tt:{} \t({}/step) \tCur avg={:04.4f}'.format(total_reward, t, # time_passed, round( # time_passed / t), # reward_sum / (ep + 1))) # EREZ ADDED recent_rew_list.append(total_reward) if ep < num_avg: recent_avg = sum(recent_rew_list)/len(recent_rew_list) else: recent_avg = sum(recent_rew_list[-num_avg:])/num_avg print('\tReward:{:05.3f} \tSteps:{} \tt:{} \t({}/step) \tCur avg={:04.4f}'.format(total_reward, t, time_passed, round( time_passed / t), recent_avg)) #print('\tReward:{:04.4f} \tSteps:{} \tt:{} \t({}/step) \tCur avg={:04.4f}'.format(total_reward, t, # time_passed, round( # time_passed / t), # round(reward_sum / (ep + 1)))) # TODO -- look into these and change how we write out directories data.finish_and_store_episode() break # end of episodes time = full_epoch_timer.get_time() print('Run {} episodes in {} seconds and got {} average reward'.format( episodes, time / 1000, reward_sum / episodes)) #data.save() if save_dir is None: data.save() # EREZ ADDED ARG else: data.save(path=save_dir) # EREZ ADDED ARG agent.save(save_dir) # could add a seperate call to data,save from within this! # Code added below return agent
def run(experiment, episodes, max_actions, knn, action_space_config=['off', 'square', 1000, 10], result_dir=PROJECT_DIR, render=False, load_agent=True, agent_to_load=None, save_agent=True, save_data=True, training_flag=True, id=0, comment="run", close_session=True, silent=False, tempsave=True, save_action_space=False): env = gym.make(experiment) print(env.observation_space) print(env.action_space) steps = env.spec.timestep_limit agent = WolpertingerAgent(env, result_dir, max_actions=max_actions, k_ratio=knn, training_flag=training_flag, action_space_config=action_space_config, save_action_space=save_action_space) if load_agent: if agent_to_load is not None: agent.load_agent(agent_name=agent_to_load[0], comment=agent_to_load[1]) else: agent.load_agent(comment=comment) timer = Timer() if save_data: data = util.data.Data(agent.get_dir(), comment=comment, tempsave=tempsave) data.set_agent(agent.get_name(), int(agent.action_space.get_size()), agent.k_nearest_neighbors, agent.get_version()) data.set_experiment(experiment, agent.low.tolist(), agent.high.tolist(), episodes) data.set_id(id) agent.add_data_fetch(data) print(data.get_file_name()) # if render: # monitor = Monitor(400, env.observation_space.shape[0], env.action_space.shape[0], 50, # [agent.low.tolist()[0], agent.high.tolist()[0]]) full_epoch_timer = Timer() reward_sum = 0 ou = OUNoise(1, mu=0, theta=0.5, sigma=.1) # temp_buffer = [0] * 150 for ep in range(episodes): timer.reset() observation = env.reset() total_reward = 0 if not silent: print('Episode ', ep, '/', episodes - 1, end='. ') for t in range(steps): if render: env.render() action = agent.act(observation) if save_data: data.set_action(action.tolist()) data.set_state(observation.tolist()) prev_observation = observation # some environments need the action as scalar valua, and other as array # for scalar: action[0] if len(action) == 1 else action observation, reward, done, info = env.step(action.flatten()) # if render: # monitor.add_data(observation, action, reward) # monitor.repaint() if save_data: data.set_reward(reward) # if render: # monitor.add_data(observation, action, reward) episode = { 'obs': prev_observation, 'action': action, 'reward': reward, 'obs2': observation, 'done': done, 't': t } agent.observe(episode) total_reward += reward # print(episode['obs'], episode['action'], episode['obs2'], episode['reward']) if done or (t == steps - 1): # if render: # monitor.end_of_episode() t += 1 reward_sum += total_reward time_passed = timer.get_time() if not silent: print( 'Reward:{} Steps:{} t:{} ({}/step) Curr avg={}, {} actions({})' .format(total_reward, t, time_passed, round(time_passed / t), round(reward_sum / (ep + 1)), agent.get_action_space_size(), agent.get_action_space_size() / max_actions)) if save_data: data.finish_and_store_episode() break # end of episodes time = full_epoch_timer.get_time() print('Run {} episodes in {} seconds and got {} average reward'.format( episodes, time / 1000, reward_sum / episodes)) if save_data: data.save() if save_agent: agent.save_agent(force=True, comment=comment) if close_session: agent.close_session() print("END") return agent
def edge_delete(project, source, target): e = edges(project) data = load(e) edge_del_record(data[0], data[1], source, target) save(e, data)
def node_create(project, record): n = nodes(project) data = load(n) node_add_record(data[0], data[1], record) save(n, data)
def create(which, f, attr): data = load(f) create_it(which, data[0], attr) save(f, data)
def run(episodes=2500, render=False, experiment='InvertedPendulum-v1', max_actions=1000, knn=0.1): env = gym.make(experiment) print(env.observation_space) print(env.action_space) steps = env.spec.timestep_limit # agent = DDPGAgent(env) agent = WolpertingerAgent(env, max_actions=max_actions, k_ratio=knn) timer = Timer() data = util.data.Data() data.set_agent(agent.get_name(), int(agent.action_space.get_number_of_actions()), agent.k_nearest_neighbors, 3) data.set_experiment(experiment, agent.low.tolist(), agent.high.tolist(), episodes) agent.add_data_fetch(data) print(data.get_file_name()) full_epoch_timer = Timer() reward_sum = 0 for ep in range(episodes): timer.reset() observation = env.reset() total_reward = 0 print('Episode ', ep, '/', episodes - 1, 'started...', end='') for t in range(steps): if render: env.render() action = agent.act(observation) data.set_action(action.tolist()) data.set_state(observation.tolist()) prev_observation = observation observation, reward, done, info = env.step( action[0] if len(action) == 1 else action) data.set_reward(reward) episode = { 'obs': prev_observation, 'action': action, 'reward': reward, 'obs2': observation, 'done': done, 't': t } agent.observe(episode) total_reward += reward if done or (t == steps - 1): t += 1 reward_sum += total_reward time_passed = timer.get_time() print('Reward:{} Steps:{} t:{} ({}/step) Cur avg={}'.format( total_reward, t, time_passed, round(time_passed / t), round(reward_sum / (ep + 1)))) data.finish_and_store_episode() break # end of episodes time = full_epoch_timer.get_time() print('Run {} episodes in {} seconds and got {} average reward'.format( episodes, time / 1000, reward_sum / episodes)) data.save()