Пример #1
0
def node_delete(project, label):
  e = edges(project)
  n = nodes(project)
  edata = load(e)
  ndata = load(n)
  node_del_record(ndata[0], ndata[1], edata[0], edata[1], label)
  save(n, ndata)
Пример #2
0
def node_delete(project, label):
    e = edges(project)
    n = nodes(project)
    edata = load(e)
    ndata = load(n)
    node_del_record(ndata[0], ndata[1], edata[0], edata[1], label)
    save(n, ndata)
Пример #3
0
def edge_update(project, record):
  e = edges(project)
  n = nodes(project)
  edata = load(e)
  ndata = load(n)
  edge_upd_record(ndata[0], ndata[1], edata[0], edata[1], record)
  save(e, ndata)
Пример #4
0
def edge_update(project, record):
    e = edges(project)
    n = nodes(project)
    edata = load(e)
    ndata = load(n)
    edge_upd_record(ndata[0], ndata[1], edata[0], edata[1], record)
    save(e, ndata)
Пример #5
0
def run(episodes=2500,   #2500,
        render=False,
        experiment='ERA5-v0',            
        max_actions=1000,
        knn=0.1,
        save_dir=None):

    env = gym.make(experiment)

    print(env.observation_space)
    print(env.action_space)

    steps = env.spec.timestep_limit     # pulls from the init file where it is registered 

    # agent = DDPGAgent(env)
    agent = WolpertingerAgent(env, max_actions=max_actions, k_ratio=knn)

    timer = Timer()

    data = util.data.Data()
    data.set_agent(agent.get_name(), int(agent.action_space.get_number_of_actions()),
                   agent.k_nearest_neighbors, 3)
    data.set_experiment(experiment, agent.low.tolist(), agent.high.tolist(), episodes)

    agent.add_data_fetch(data)
    print(data.get_file_name())

    full_epoch_timer = Timer()
    reward_sum = 0


    # EREZ ADD 
    num_avg = 40
    recent_rew_list = []    

    for ep in range(episodes):

        timer.reset()
        observation = env.reset()

        total_reward = 0
        print('Episode ', ep, '/', episodes - 1, 'started...', end='')



        for t in range(steps):

            if render:
                env.render()

            action = agent.act(observation)

            data.set_action(action.tolist())

            data.set_state(observation.tolist())

            prev_observation = observation
            observation, reward, done, info = env.step(action[0] if len(action) == 1 else action)

            data.set_reward(reward)

            episode = {'obs': prev_observation,
                       'action': action,
                       'reward': reward,
                       'obs2': observation,
                       'done': done,
                       't': t}

            agent.observe(episode)

            total_reward += reward

            if done or (t == steps - 1):
                t += 1
                reward_sum += total_reward
                time_passed = timer.get_time()

                # NOTE: shouldnt we be reporting average over recent? 
                # Added better print formating 
                #print('\tReward:{:04.4f} \tSteps:{} \tt:{} \t({}/step) \tCur avg={:04.4f}'.format(total_reward, t,
                #                                                            time_passed, round(
                #                                                                time_passed / t),
                #                                                            reward_sum / (ep + 1)))

                # EREZ ADDED 
                recent_rew_list.append(total_reward)

                if ep < num_avg:
                    recent_avg = sum(recent_rew_list)/len(recent_rew_list)
                else:
                    recent_avg = sum(recent_rew_list[-num_avg:])/num_avg

                print('\tReward:{:05.3f} \tSteps:{} \tt:{} \t({}/step) \tCur avg={:04.4f}'.format(total_reward, t,
                                                                            time_passed, round(
                                                                                time_passed / t),
                                                                                recent_avg))
                #print('\tReward:{:04.4f} \tSteps:{} \tt:{} \t({}/step) \tCur avg={:04.4f}'.format(total_reward, t,
                #                                                            time_passed, round(
                #                                                                time_passed / t),
                #                                                            round(reward_sum / (ep + 1))))

                # TODO -- look into these and change how we write out directories 
                data.finish_and_store_episode()

                break
    # end of episodes
    time = full_epoch_timer.get_time()
    print('Run {} episodes in {} seconds and got {} average reward'.format(
        episodes, time / 1000, reward_sum / episodes))

    #data.save()

    if save_dir is None:
        data.save() # EREZ ADDED ARG 
    else:
        data.save(path=save_dir) # EREZ ADDED ARG 
        agent.save(save_dir)  # could add a seperate call to data,save from within this!


    # Code added below 
    return agent 
Пример #6
0
def run(experiment,
        episodes,
        max_actions,
        knn,
        action_space_config=['off', 'square', 1000, 10],
        result_dir=PROJECT_DIR,
        render=False,
        load_agent=True,
        agent_to_load=None,
        save_agent=True,
        save_data=True,
        training_flag=True,
        id=0,
        comment="run",
        close_session=True,
        silent=False,
        tempsave=True,
        save_action_space=False):

    env = gym.make(experiment)

    print(env.observation_space)
    print(env.action_space)

    steps = env.spec.timestep_limit

    agent = WolpertingerAgent(env,
                              result_dir,
                              max_actions=max_actions,
                              k_ratio=knn,
                              training_flag=training_flag,
                              action_space_config=action_space_config,
                              save_action_space=save_action_space)
    if load_agent:
        if agent_to_load is not None:
            agent.load_agent(agent_name=agent_to_load[0],
                             comment=agent_to_load[1])
        else:
            agent.load_agent(comment=comment)
    timer = Timer()

    if save_data:
        data = util.data.Data(agent.get_dir(),
                              comment=comment,
                              tempsave=tempsave)
        data.set_agent(agent.get_name(), int(agent.action_space.get_size()),
                       agent.k_nearest_neighbors, agent.get_version())
        data.set_experiment(experiment, agent.low.tolist(),
                            agent.high.tolist(), episodes)
        data.set_id(id)

        agent.add_data_fetch(data)
        print(data.get_file_name())

    # if render:
    #     monitor = Monitor(400, env.observation_space.shape[0], env.action_space.shape[0], 50,
    #                       [agent.low.tolist()[0], agent.high.tolist()[0]])

    full_epoch_timer = Timer()
    reward_sum = 0

    ou = OUNoise(1, mu=0, theta=0.5, sigma=.1)

    # temp_buffer = [0] * 150

    for ep in range(episodes):

        timer.reset()
        observation = env.reset()

        total_reward = 0
        if not silent:
            print('Episode ', ep, '/', episodes - 1, end='. ')

        for t in range(steps):

            if render:
                env.render()

            action = agent.act(observation)

            if save_data:
                data.set_action(action.tolist())
                data.set_state(observation.tolist())

            prev_observation = observation
            # some environments need the action as scalar valua, and other as array
            # for scalar: action[0] if len(action) == 1 else action
            observation, reward, done, info = env.step(action.flatten())

            # if render:
            #     monitor.add_data(observation, action, reward)
            #     monitor.repaint()
            if save_data:
                data.set_reward(reward)
            # if render:
            #     monitor.add_data(observation, action, reward)

            episode = {
                'obs': prev_observation,
                'action': action,
                'reward': reward,
                'obs2': observation,
                'done': done,
                't': t
            }
            agent.observe(episode)

            total_reward += reward

            # print(episode['obs'], episode['action'], episode['obs2'], episode['reward'])
            if done or (t == steps - 1):
                # if render:
                #     monitor.end_of_episode()

                t += 1
                reward_sum += total_reward
                time_passed = timer.get_time()
                if not silent:
                    print(
                        'Reward:{} Steps:{} t:{} ({}/step) Curr avg={}, {} actions({})'
                        .format(total_reward, t, time_passed,
                                round(time_passed / t),
                                round(reward_sum / (ep + 1)),
                                agent.get_action_space_size(),
                                agent.get_action_space_size() / max_actions))
                if save_data:
                    data.finish_and_store_episode()

                break

    # end of episodes
    time = full_epoch_timer.get_time()
    print('Run {} episodes in {} seconds and got {} average reward'.format(
        episodes, time / 1000, reward_sum / episodes))

    if save_data:
        data.save()
    if save_agent:
        agent.save_agent(force=True, comment=comment)

    if close_session:
        agent.close_session()
    print("END")
    return agent
Пример #7
0
def edge_delete(project, source, target):
  e = edges(project)
  data = load(e)
  edge_del_record(data[0], data[1], source, target)
  save(e, data)
Пример #8
0
def node_create(project, record):
  n = nodes(project)
  data = load(n)
  node_add_record(data[0], data[1], record)
  save(n, data)
Пример #9
0
def create(which, f, attr):
  data = load(f)
  create_it(which, data[0], attr)
  save(f, data)
Пример #10
0
def create(which, f, attr):
    data = load(f)
    create_it(which, data[0], attr)
    save(f, data)
def run(episodes=2500,
        render=False,
        experiment='InvertedPendulum-v1',
        max_actions=1000,
        knn=0.1):

    env = gym.make(experiment)

    print(env.observation_space)
    print(env.action_space)

    steps = env.spec.timestep_limit

    # agent = DDPGAgent(env)
    agent = WolpertingerAgent(env, max_actions=max_actions, k_ratio=knn)

    timer = Timer()

    data = util.data.Data()
    data.set_agent(agent.get_name(),
                   int(agent.action_space.get_number_of_actions()),
                   agent.k_nearest_neighbors, 3)
    data.set_experiment(experiment, agent.low.tolist(), agent.high.tolist(),
                        episodes)

    agent.add_data_fetch(data)
    print(data.get_file_name())

    full_epoch_timer = Timer()
    reward_sum = 0

    for ep in range(episodes):

        timer.reset()
        observation = env.reset()

        total_reward = 0
        print('Episode ', ep, '/', episodes - 1, 'started...', end='')
        for t in range(steps):

            if render:
                env.render()

            action = agent.act(observation)

            data.set_action(action.tolist())

            data.set_state(observation.tolist())

            prev_observation = observation
            observation, reward, done, info = env.step(
                action[0] if len(action) == 1 else action)

            data.set_reward(reward)

            episode = {
                'obs': prev_observation,
                'action': action,
                'reward': reward,
                'obs2': observation,
                'done': done,
                't': t
            }

            agent.observe(episode)

            total_reward += reward

            if done or (t == steps - 1):
                t += 1
                reward_sum += total_reward
                time_passed = timer.get_time()
                print('Reward:{} Steps:{} t:{} ({}/step) Cur avg={}'.format(
                    total_reward, t, time_passed, round(time_passed / t),
                    round(reward_sum / (ep + 1))))

                data.finish_and_store_episode()

                break
    # end of episodes
    time = full_epoch_timer.get_time()
    print('Run {} episodes in {} seconds and got {} average reward'.format(
        episodes, time / 1000, reward_sum / episodes))

    data.save()
Пример #12
0
def edge_delete(project, source, target):
    e = edges(project)
    data = load(e)
    edge_del_record(data[0], data[1], source, target)
    save(e, data)
Пример #13
0
def node_create(project, record):
    n = nodes(project)
    data = load(n)
    node_add_record(data[0], data[1], record)
    save(n, data)