Exemplos de Agent em Python, exemplos de model.agent.Agent em Python

Exemplo n.º 1

0

Exibir arquivo

    def execute(self, agent: Agent, state: SimState) -> None:
        if agent.state() is not AgentState.INFECTIVE:
            return

        if np.random.random() < state.remove_prob():
            agent.set_state(AgentState.REMOVED)
        else:
            agent.update_sick_days()

Exemplo n.º 2

0

Exibir arquivo

Arquivo: main.py Projeto: trungtv/deep-trading-agent

def main(config_file_path):
    config_parser = get_config_parser(config_file_path)
    config = get_config(config_parser)
    logger = get_logger(config)

    with tf.Session() as sess:
        processor = Processor(config, logger)
        env = Environment(logger, config, processor.price_blocks,
                          processor.timestamp_blocks)
        agent = Agent(sess, logger, config, env)
        agent.train()

        agent.summary_writer.close()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: movement_strategy.py Projeto: bennyboer/py_epi_sim

    def move_agent(self, agent: Agent, state: SimState) -> None:
        grid = agent.grid()
        if grid.is_fully_occupied():
            return

        if agent.state() is AgentState.DEAD or agent.is_quarantined():
            return  # We don't want zombies

        move_probability = np.random.randint(low=0, high=100)
        if move_probability <= state.get_mixing_value_m() * 100:
            new_grid_pos = get_free_pos(grid)
            old_grid_pos = agent.get_pos()
            grid.move_agent(old_grid_pos, new_grid_pos)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: state_util.py Projeto: hautan/reinforcement-learning

def play_game(p1: Agent, p2: Agent, env: Environment, draw=False):
    print("play game!")
    current_player =None
    
    while not env.game_over():
        #alternate between players
        if current_player == p1:
            current_player = p2
        else:
            current_player = p1
            
        # draw the board before the user who wants to see it makes a move
        if draw:
            if draw == 1 and current_player == p1:
                env.draw_board()
            if draw == 2 and current_player == p2:
                env.draw_board()            

            
        #make an action
        current_player.take_action(env)
        
        #update state history
        state = env.get_state()
        p1.update_state_history(state)
        p2.update_state_history(state)
    
    if draw:
        env.draw_board()
        
    #do the value function update
    p1.update(env)
    p2.update(env)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: main.py Projeto: chinshou/deep-trading-agent

def main(config_file_path):
    config_parser = get_config_parser(config_file_path)
    config = get_config(config_parser)
    logger = get_logger(config)

    with tf.Session() as sess:
        processor = Processor(config, logger)
        env = Environment(logger, config, processor.diff_blocks, 
                                            processor.price_blocks, 
                                                processor.timestamp_blocks)
        agent = Agent(sess, logger, config, env)
        agent.train()

        agent.summary_writer.close()

Exemplo n.º 6

0

Exibir arquivo

Arquivo: grid.py Projeto: bennyboer/py_epi_sim

 def execute_all(agent: Agent):
     try:
         func_list = filtered[agent.state()]
         for func in func_list:
             func[0].execute(agent, state)
     except KeyError:
         pass

Exemplo n.º 7

0

Exibir arquivo

    def __init__(self):
        LOG.info('init master')

        self.__loop_count = 0
        self.__train_step = 0

        self.__args = self._set_args()
        LOG.info("the args is{}".format(self.__args))
        self.rainbow = Agent(self.__args, ACTION_SPACE)
        self.rainbow.train()

        self.__count_list = list()
        self.__queue_list = list()
        self.__memory_list = list()
        for _ in range(MAX_WORKER_COUNT):
            self.__count_list.append(0)
            self.__queue_list.append(queue.Queue())
            self.__memory_list.append(ReplayMemory(self.__args, self.__args.memory_capacity))

        self.__priority_weight_increase = (1 - self.__args.priority_weight) / (
                self.__args.T_max - self.__args.learn_start)

Exemplo n.º 8

0

Exibir arquivo

def agent_form():
    agent = Agent()
    agent.name = raw_input("Nome do agente:\n")
    agent.address = raw_input("Endereço do agente:\n")
    agent.telephone = raw_input("Telefone do agente:\n")
    agent.category = raw_input("Categoria a qual o agente pertence:\n")
    agent_service.add_agent(agent)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: movement_strategy.py Projeto: bennyboer/py_epi_sim

    def move_agent(self, agent: Agent, state: SimState) -> None:
        grid = agent.grid()
        if grid.is_fully_occupied():
            return

        if agent.state() is AgentState.DEAD or agent.is_quarantined():
            return  # We don't want zombies

        move_probability = np.random.randint(low=0, high=100)
        if move_probability <= state.get_mixing_value_m() * 100:
            radius = state.movement_limit_radius()

            if state.movement_limit_high_distances_are_uncommon():
                # Recalculate radius -> lower radius is more probable
                mean = 0
                standard_deviation = radius / 3

                radius = min(
                    max(
                        1,
                        int(
                            np.round(
                                np.abs(
                                    norm.rvs(size=1,
                                             loc=mean,
                                             scale=standard_deviation)[0])))),
                    radius)

            try:
                new_grid_pos = get_free_pos_limited(
                    grid,
                    pos=agent.get_pos(),
                    radius=radius,
                    metric=state.movement_limit_metric(),
                )
                old_grid_pos = agent.get_pos()
                grid.move_agent(old_grid_pos, new_grid_pos)
            finally:
                return

Exemplo n.º 10

0

Exibir arquivo

def main(config_file_path):
    config_parser = get_config_parser(config_file_path)
    config = get_config(config_parser)
    logger = get_logger(config)

    with tf.Session() as sess:
        preprocessor = Preprocessor(config, logger)
        env = Environment(logger, config, preprocessor.price_blocks)
        agent = Agent(sess, logger, config, env)

        summary_writer = tf.summary.FileWriter(config[TENSORBOARD_LOG_DIR])
        summary_writer.add_graph(sess.graph)
        summary_writer.close()

Exemplo n.º 11

0

Exibir arquivo

    def execute(self, agent: Agent, state: SimState) -> None:
        if agent.state() is not AgentState.INCUBATION:
            return

        if agent.incubation_days() is state.incubation_period():
            agent.set_state(AgentState.INFECTIVE)
        else:
            agent.update_incubation_days()

Exemplo n.º 12

0

Exibir arquivo

Arquivo: grid.py Projeto: bennyboer/py_epi_sim

 def spawn_agent(self, grid_pos: GridPos, agent_state: AgentState) -> None:
     """
     Create an agent with the status at the position, if it is not already occupied.
     Author: Beil Benedikt
     :param grid_pos:
     :param agent_state:
     :return: Nothing
     """
     if self.is_occupied(grid_pos):
         raise ValueError(
             "This field is already occupied. No agent can be created here. "
         )
     self.set_agent(Agent(self.__scheduler, grid_pos, agent_state, self),
                    grid_pos)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: main.py Projeto: RamtinRassoli/drlnd-continuous-control

def main(config_path, train_mode=True, weights_path=None):
    """Load the environment, create an agent, and train it.
    """
    config = get_config(config_path)
    env, brain_name = load_environment()
    state_size, action_size = get_env_metadata(env, brain_name)
    agent = Agent(state_size=state_size,
                  action_size=action_size,
                  config=config,
                  random_seed=10)

    scores = ddpg(env, brain_name, agent, config, train_mode, weights_path)
    env.close()

    return scores

Exemplo n.º 14

0

Exibir arquivo

    def execute(self, agent: Agent, state: SimState) -> None:
        """Basically the same method as in the DefaultStatusStrategy, but adding the lethality check.
        :param agent Agent to update
        :param state State the simulation is in"""
        if agent.state() is not AgentState.INFECTIVE:
            return

        if np.random.random() < state.remove_prob():
            if np.random.random() < state.lethality():
                agent.set_state(AgentState.DEAD)
            else:
                agent.set_state(AgentState.IMMUNE)
        else:
            agent.update_sick_days()

Exemplo n.º 15

0

Exibir arquivo

    def execute(self, agent: Agent, state: SimState) -> None:
        if agent.is_quarantined():
            return

        if agent.state() is AgentState.INFECTIVE or AgentState.INCUBATION:
            infection_radius = state.infection_env_radius()
            infection_env_size = infection_radius * 2 + 1
            size = agent.grid().get_size()
            check_list = list()
            grid_pos = agent.get_pos()
            x = grid_pos.row()
            y = grid_pos.col()

            if state.infection_env_metric() == EnvironmentMetric.MANHATTAN:
                for r in range(0, infection_env_size):
                    offset = abs(infection_radius - r)
                    check_row = y - infection_radius + r
                    for c in range(offset, infection_env_size - offset):
                        check_column = x - infection_radius + c
                        check_list.append((check_column, check_row))

            elif state.infection_env_metric() == EnvironmentMetric.EUCLIDEAN:
                for r in range(0, infection_env_size):
                    check_row = y - infection_radius + r
                    for c in range(0, infection_env_size):
                        check_column = x - infection_radius + c
                        distance = np.round(np.sqrt((infection_radius - r) ** 2 + (infection_radius - c) ** 2))
                        if 0 < distance <= infection_radius:
                            check_list.append((check_column, check_row))

            else:
                raise ValueError('Metric not implemented')

            check_list = list(filter(lambda pos: 0 <= pos[0] < size and 0 <= pos[1] < size, check_list))

            for check_pos in check_list:
                to_check = agent.grid().get_agent(GridPos(np.uint(check_pos[0]), np.uint(check_pos[1])))
                if to_check is not None and to_check.state() is AgentState.SUSCEPTIBLE:
                    if np.random.random() < state.infection_prob():
                        if state.incubation_period_enabled():
                            to_check.set_state(AgentState.INCUBATION)
                        else:
                            to_check.set_state(AgentState.INFECTIVE)
                        agent.update_infected_count()

Exemplo n.º 16

0

Exibir arquivo

def generate_quick_agent_observation(reduce_A=True,
                                     num_neighbours=2,
                                     reduce_A_policies=True,
                                     reduce_A_inference=True):

    idea_levels = 2  # the levels of beliefs that agents can have about the idea (e.g. 'True' vs. 'False', in case `idea_levels` ==2)
    num_H = 2  #the number of hashtags, or observations that can shed light on the idea
    h_idea_mapping = np.eye(num_H)
    h_idea_mapping[:, 0] = utils.softmax(h_idea_mapping[:, 0] * 1.0)
    h_idea_mapping[:, 1] = utils.softmax(h_idea_mapping[:, 1] * 1.0)
    agent_params = {
        "neighbour_params": {
            "ecb_precisions": np.array([[8.0, 8.0], [8.0, 8.0]]),
            "num_neighbours": num_neighbours,
            "env_determinism": 9.0,
            "belief_determinism": np.array([7.0, 7.0])
        },
        "idea_mapping_params": {
            "num_H": num_H,
            "idea_levels": idea_levels,
            "h_idea_mapping": h_idea_mapping
        },
        "policy_params": {
            "initial_action": [np.random.randint(num_H), 0],
            "belief2tweet_mapping": np.eye(num_H),
            "E_lr": 0.7
        },
        "C_params": {
            "preference_shape": None,
            "cohesion_exp": None,
            "cohesion_temp": None
        }
    }
    observation = np.zeros(num_neighbours + 3)
    observation[2] = 1
    agent = Agent(**agent_params,
                  reduce_A=reduce_A,
                  reduce_A_policies=reduce_A_policies,
                  reduce_A_inferennce=reduce_A_inference)

    return agent, observation

Exemplo n.º 17

0

Exibir arquivo

import pyglet
from pyglet.window import key
from model.agent import Agent
from game.view import View
import numpy as np
import sys

# define constants
_width, _height = 300, 400


if __name__ == '__main__':
    _screen = View(_width, _height, 'Tetris')
    _board = _screen._board

    if len(sys.argv) == 1:
        _agent = Agent()
        _agent.run(_board)
    else:
        model = sys.argv[1]
        _screen.use_trained_agent(model)


    pyglet.app.run()

Exemplo n.º 18

0

Exibir arquivo

class Master:
    """
    master, train AI model
    """

    def __init__(self):
        LOG.info('init master')

        self.__loop_count = 0
        self.__train_step = 0

        self.__args = self._set_args()
        LOG.info("the args is{}".format(self.__args))
        self.rainbow = Agent(self.__args, ACTION_SPACE)
        self.rainbow.train()

        self.__count_list = list()
        self.__queue_list = list()
        self.__memory_list = list()
        for _ in range(MAX_WORKER_COUNT):
            self.__count_list.append(0)
            self.__queue_list.append(queue.Queue())
            self.__memory_list.append(ReplayMemory(self.__args, self.__args.memory_capacity))

        self.__priority_weight_increase = (1 - self.__args.priority_weight) / (
                self.__args.T_max - self.__args.learn_start)

    def send_transition(self, index, state, action_index, reward, done):
        self.__queue_list[index].put((state, action_index, reward, done))
        return

    def __get_action_data(self, idx):
        while True:
            if not self.__queue_list[idx].empty():
                (state, action_index, reward, done) = self.__queue_list[idx].get()
                self.__memory_list[idx].append(state, action_index, reward, done)
                self.__count_list[idx] += 1
                return True
            return False

    def __get_train_data(self):
        index_list = list()
        for idx in range(MAX_WORKER_COUNT):
            if self.__get_action_data(idx) is True:
                index_list.append(idx)
        return index_list

    def __save_train_model(self):
        if self.__train_step % 2e4 == 0:
            st = time.time()
            self.rainbow.save('./Model/', name='model_{}.pth'.format(self.__train_step))
            et = time.time()
            cost_ime = ((et - st) * 1000)
            LOG.info('saving rainbow costs {} ms at train step {}'.format(cost_ime, self.__train_step))

    def __print_progress_log(self, start_time):
        if self.__loop_count % LOG_FREQUENCY == 0:
            cost_ime = ((time.time() - start_time) * 1000)
            LOG.info('train rainbow is {} ms at loop count {}'.format(cost_ime, self.__loop_count))

    def train(self):

        start_time = time.time()
        index_list = self.__get_train_data()

        if len(index_list) == 0:
            return

        for _ in range(3):
            i = np.random.randint(len(index_list))
            idx = index_list[i]

            if self.__count_list[idx] >= self.__args.learn_start:

                # Anneal importance sampling weight β to 1
                self.__memory_list[idx].priority_weight = min(
                    self.__memory_list[idx].priority_weight + self.__priority_weight_increase, 1)

                if self.__loop_count % self.__args.replay_frequency == 0:
                    start_time = time.time()
                    self.rainbow.learn(self.__memory_list[idx])  # Train with n-step distributional double-Q learning
                    self.__print_progress_log(start_time)
                    self.__save_train_model()
                    self.__train_step += 1

        # Update target network
        if self.__loop_count % self.__args.target_update == 0:
            # LOG.info('master updates target net at train step {}'.format(self.__trainStep))
            self.rainbow.update_target_net()

        if self.__loop_count % LOG_FREQUENCY == 0:
            LOG.info('train time is {} ms at loop count {}'.format(((time.time() - start_time) * 1000),
                                                                   self.__loop_count))

        self.__loop_count += 1

        return

    # pylint: disable=R0201
    def _set_args(self):
        parser = argparse.ArgumentParser(description='Rainbow')
        parser.add_argument('--enable-cuda', action='store_true', help='Enable CUDA')
        parser.add_argument('--enable-cudnn', action='store_true', help='Enable cuDNN')

        parser.add_argument('--T-max', type=int, default=int(50e6), metavar='STEPS',
                            help='Number of training steps (4x number of frames)')

        parser.add_argument('--architecture', type=str, default='canonical', choices=['canonical', 'data-efficient'],
                            metavar='ARCH', help='Network architecture')
        parser.add_argument('--history-length', type=int, default=4, metavar='T',
                            help='Number of consecutive states processed')
        parser.add_argument('--hidden-size', type=int, default=512, metavar='SIZE', help='Network hidden size')
        parser.add_argument('--noisy-std', type=float, default=0.1, metavar='σ',
                            help='Initial standard deviation of noisy linear layers')
        parser.add_argument('--atoms', type=int, default=51, metavar='C', help='Discretised size of value distribution')
        parser.add_argument('--V-min', type=float, default=-10, metavar='V',
                            help='Minimum of value distribution support')
        parser.add_argument('--V-max', type=float, default=10, metavar='V',
                            help='Maximum of value distribution support')

        parser.add_argument('--model', type=str, metavar='PARAMS', help='Pretrained model (state dict)')
        parser.add_argument('--memory-capacity', type=int, default=int(40000), metavar='CAPACITY',
                            help='Experience replay memory capacity')
        parser.add_argument('--replay-frequency', type=int, default=1, metavar='k',
                            help='Frequency of sampling from memory')
        parser.add_argument('--priority-exponent', type=float, default=0.5, metavar='ω',
                            help='Prioritised experience replay exponent (originally denoted α)')
        parser.add_argument('--priority-weight', type=float, default=0.4, metavar='β',
                            help='Initial prioritised experience replay importance sampling weight')
        parser.add_argument('--multi-step', type=int, default=3, metavar='n',
                            help='Number of steps for multi-step return')
        parser.add_argument('--discount', type=float, default=0.99, metavar='γ', help='Discount factor')
        parser.add_argument('--target-update', type=int, default=int(1e3), metavar='τ',
                            help='Number of steps after which to update target network')
        parser.add_argument('--learning-rate', type=float, default=1e-4, metavar='η', help='Learning rate')
        parser.add_argument('--adam-eps', type=float, default=1.5e-4, metavar='ε', help='Adam epsilon')
        parser.add_argument('--batch-size', type=int, default=32, metavar='SIZE', help='Batch size')
        parser.add_argument('--learn-start', type=int, default=int(400), metavar='STEPS',
                            help='Number of steps before starting training')

        # Setup
        args = parser.parse_args()

        # set random seed
        np.random.seed(123)
        torch.manual_seed(np.random.randint(1, 10000))

        args.enable_cuda = True
        args.enable_cudnn = True

        # set torch device
        if torch.cuda.is_available() and args.enable_cuda:
            args.device = torch.device('cuda')
            torch.cuda.manual_seed(np.random.randint(1, 10000))
            torch.backends.cudnn.enabled = args.enable_cudnn
        else:
            args.device = torch.device('cpu')

        return args

Exemplo n.º 19

0

Exibir arquivo

Arquivo: train.py Projeto: WodkaRHR/schafkopf

import pytorch_lightning as pl
from data.rufspiel_lange_karte import RufspielLangeKarteDataModule
from model.agent import Agent

trainer = pl.Trainer()
model = Agent(embedding_dim=16)
data = RufspielLangeKarteDataModule('games/rufspiel_lange_karte')
trainer.fit(model, data)

Exemplo n.º 20

0

Exibir arquivo

Arquivo: main.py Projeto: dushufan/tetrai

import pyglet
from model.agent import Agent
from game.view import View
import sys

# define constants
WIDTH, HEIGHT = 300, 400

if __name__ == '__main__':
    screen = View(WIDTH, HEIGHT, 'Tetris')
    board = screen.board

    if len(sys.argv) == 2:
        if sys.argv[1] == 'train':
            _agent = Agent()
            _agent.run(board)
    elif len(sys.argv) >= 3:
        model = sys.argv[2]
        if sys.argv[1] == 'play':
            screen.use_trained_agent(model)
        if sys.argv[1] == 'train':
            _agent = Agent(model_num=model)
            _agent.run(board)

    pyglet.app.run()

Exemplo n.º 21

0

Exibir arquivo

    return agent_params
        # %%
fig, axs = plt.subplots(2, 2)        #plt.figure(figsize=(12,8))
fig.set_figheight(20)
fig.set_figwidth(20)
env_d = 8
c = 0
for i, ecb in enumerate(np.linspace(3,9,2)):
    print("ECB")
    print(ecb)
    for j, belief_d in enumerate(np.linspace(3,9,2)):
        print("BELIEF D")
        print(belief_d)
        agent_params = agent_p(belief_d = belief_d, env_d = env_d, ecb = ecb)
        
        agent = Agent(**agent_params,reduce_A=True)
        T = 100

        neighbour_0_tweets = 1*np.ones(T) # neighbour 1 tweets a bunch of Hashtag 1's
        neighbour_1_tweets = 2*np.ones(T) # neighbour 2 tweets a bunch of Hashtag 2's

        my_first_neighbour = 0
        my_first_tweet = 0

        if my_first_neighbour == 0:
            observation = (my_first_tweet, int(neighbour_0_tweets[0]), 0, my_first_neighbour)
        elif my_first_neighbour == 1:
            observation = (my_first_tweet, 0, int(neighbour_1_tweets[0]), my_first_neighbour)

        history_of_idea_beliefs = np.zeros((T,idea_levels)) # history of my own posterior over the truth/falsity of the idea
        history_of_beliefs_about_other = np.zeros((T,agent.genmodel.num_states[1],num_neighbours)) # histoyr of my posterior beliefs about the beliefs of my two neighbours about the truth/falsity of the idea

Exemplo n.º 22

0

Exibir arquivo

Arquivo: test.py Projeto: trajamsmith/89-atariRL

import numpy as np

ENV_NAME = 'BreakoutDeterministic-v4'

# Create environment
game_wrapper = GameWrapper(MAX_NOOP_STEPS)
print("The environment has the following {} actions: {}".format(
    game_wrapper.env.action_space.n,
    game_wrapper.env.unwrapped.get_action_meanings()))

# Create agent
MAIN_DQN = build_q_network(LEARNING_RATE, input_shape=INPUT_SHAPE)
TARGET_DQN = build_q_network(input_shape=INPUT_SHAPE)

replay_buffer = ReplayBuffer(size=MEM_SIZE, input_shape=INPUT_SHAPE)
agent = Agent(MAIN_DQN, TARGET_DQN, replay_buffer, input_shape=INPUT_SHAPE)

print('Loading model...')
# We only want to load the replay buffer when resuming training
agent.load('./saved_models/save-02502048/', load_replay_buffer=False)
print('Loaded.')

terminal = True
eval_rewards = []
evaluate_frame_number = 0

for frame in range(EVAL_LENGTH):
    if terminal:
        game_wrapper.reset(evaluation=True)
        life_lost = True
        episode_reward_sum = 0

Exemplo n.º 23

0

Exibir arquivo

def main_function(location, num_of_panels, num_of_turbines, num_of_batteries):
    # Get arguments
    if len(sys.argv) > 1:
        episodes_num = int(sys.argv[1])
    else:
        episodes_num = 2000

    # House dependent parameters
    # location = 'California'
    # num_of_panels = 30   # Number of 250-watts solar panels
    # num_of_turbines = 2  # Number of 400 KW wind turbines
    # num_of_batteries = 2

    house = House(location, num_of_panels, num_of_turbines, num_of_batteries)

    # Main dependent parameters
    num_of_months = 12
    num_of_days = 30  # number of days per episode
    num_time_states = 4
    epsilon = 0.5
    alpha = 0.8

    # Initiate Agent
    agent = Agent()
    Q = agent.initialize_Q()
    avg_Q_old = np.mean(Q)

    # For printing and plots
    print_iteration = 50
    # ARMAN: What is a print_flag?
    print_flag = False

    # ARMAN: Needs comments
    rList = []
    solarList = []
    windList = []
    ffList = []
    battstorageList = []
    battusedList = []
    energyList = []

    solarSubList = []
    windSubList = []
    ffSubList = []
    battstorageSubList = []
    battusedSubList = []

    final_itr = []
    final_list = []

    final_solar = []
    solar_dict = {0: [], 1: [], 2: [], 3: []}

    final_wind = []
    wind_dict = {0: [], 1: [], 2: [], 3: []}

    final_ff = []
    ff_dict = {0: [], 1: [], 2: [], 3: []}

    final_battery = []
    battery_dict = {0: [], 1: [], 2: [], 3: []}

    ## for realtime plotting
    # fig, ax = plt.subplots()
    # ax.set_ylabel("Energy (kWh)")
    # ax.set_title("Evolution of Energy Use")

    for itr in range(episodes_num):
        if itr % print_iteration == 0:
            print_flag = True

        # The house stays constant for every episode
        env = EnergyEnvironment(house)
        cur_state = env.state
        total_reward = 0

        solar_avg = 0
        wind_avg = 0
        ff_avg = 0
        batt_storage_avg = 0
        batt_used_avg = 0

        # for month in range(num_of_months):
        #     env.state[env.month_index] = month

        for day in range(num_of_days):
            total_solar_energy = 0
            total_wind_energy = 0
            total_grid_energy = 0
            total_battery_used = 0

            for i in range(num_time_states):
                action, cur_state_index, action_index = agent.get_action(
                    cur_state, Q, epsilon)
                reward, next_state = env.step(action, cur_state)

                Q = agent.get_Q(action, cur_state, Q, epsilon, cur_state_index,
                                action_index, reward, alpha)

                cur_state = next_state
                total_reward += reward

                # calculate total
                total_solar_energy += env.solar_energy
                total_wind_energy += env.wind_energy
                total_grid_energy += env.grid_energy
                total_battery_used += env.battery_used

                if itr == (episodes_num - 1):
                    solar_dict[i].append(env.solar_energy)
                    wind_dict[i].append(env.wind_energy)
                    ff_dict[i].append(env.grid_energy)
                    battery_dict[i].append(env.battery_used)

            # store how much is stored in the battery at the end of each day
            total_battery_stored = env.battery_energy

            # save total daily energy produced from different sources
            solarSubList.append(total_solar_energy)
            windSubList.append(total_wind_energy)
            ffSubList.append(total_grid_energy)
            battstorageSubList.append(total_battery_stored)
            battusedSubList.append(total_battery_used)

            solar_avg = np.mean(solarSubList)
            wind_avg = np.mean(windSubList)
            ff_avg = np.mean(ffSubList)
            batt_storage_avg = np.mean(battstorageSubList)
            batt_used_avg = np.mean(battusedSubList)

        if print_flag:
            avg_Q_new = np.mean(Q)
            avg_Q_change = abs(avg_Q_new - avg_Q_old)
            utils.print_info(itr, env, solar_avg, wind_avg, ff_avg,
                             batt_storage_avg, batt_used_avg, avg_Q_change)
            avg_Q_old = avg_Q_new
            solarList.append(solar_avg)
            windList.append(wind_avg)
            ffList.append(ff_avg)
            battstorageList.append(batt_storage_avg)
            battusedList.append(np.mean(batt_used_avg))

            # plt.ion()
            # plots.real_time_plot([[solar_avg], [wind_avg], [ff_avg],
            #                                [batt_storage_avg], [batt_used_avg]],
            #                     colors=['b', 'g', 'r', 'purple', 'gray'],
            #                     legends=["Solar Energy", "Wind Energy", "Fossil Fuel Energy", "Battery Storage",
            #                              "Battery Usage"], ax=ax)

            solarSubList = []
            windSubList = []
            ffSubList = []
            battstorageSubList = []
            battusedSubList = []

        print_flag = False

        #total reward per episode appended for learning curve visualization
        rList.append(total_reward)

        #decrease exploration factor by a little bit every episode
        epsilon = max(0, epsilon - 0.0005)
        alpha = max(0, alpha - 0.0005)

    # plt.close()
    print("Score over time: " + str(sum(rList) / episodes_num))
    print("Q-values:", Q)

    plots.plot_learning_curve(rList)

    for i in range(num_time_states):
        final_solar.append(np.mean(solar_dict[i]))
        final_wind.append(np.mean(wind_dict[i]))
        final_ff.append(np.mean(ff_dict[i]))
        final_battery.append(np.mean(battery_dict[i]))

    energyList.append(solarList)
    energyList.append(windList)
    energyList.append(ffList)
    # energyList.append(battstorageList)
    energyList.append(battusedList)

    final_itr.append(final_solar)
    final_itr.append(final_wind)
    final_itr.append(final_ff)
    final_itr.append(final_battery)

    # plots.multiBarPlot_final(list(range(4)), final_itr, colors=['b', 'g', 'r', 'purple', 'gray'], ylabel="Energy (kWh)",
    #              title="Final Iteration of Energy Use", legends=["Solar Energy",  "Wind Energy", "Fossil Fuel Energy", "Battery Storage", "Battery Usage"])
    #
    # plots.multiBarPlot(list(range(len(solarList))), energyList, colors=['b', 'g', 'r', 'purple', 'gray'], ylabel="Energy (kWh)",
    #              title="Evolution of Energy Use", legends=["Solar Energy",  "Wind Energy", "Fossil Fuel Energy", "Battery Storage", "Battery Usage"])

    return list(range(len(solarList))), energyList, list(
        range(len(final_solar))), final_itr, list(range(len(rList))), rList

Exemplo n.º 24

0

Exibir arquivo

 def execute(self, agent: Agent, state: SimState) -> None:
     """Updates the agents 'vaccine' before executing other checks"""
     if agent.state() == AgentState.SUSCEPTIBLE and self.days == state.vaccine_time() \
             and np.random.random() < state.vaccine_share():
         agent.set_state(AgentState.IMMUNE)

Exemplo n.º 25

0

Exibir arquivo

Arquivo: train.py Projeto: trajamsmith/89-atariRL

# TODO: Move this to another module
# Create or load the agent
if LOAD_FROM is None:
    frame_number = 0
    rewards = []
    loss_list = []

    # Build main and target networks
    MAIN_DQN = build_q_network(LEARNING_RATE, input_shape=INPUT_SHAPE)
    TARGET_DQN = build_q_network(input_shape=INPUT_SHAPE)

    replay_buffer = ReplayBuffer(size=MEM_SIZE, input_shape=INPUT_SHAPE)
    agent = Agent(MAIN_DQN,
                  TARGET_DQN,
                  replay_buffer,
                  input_shape=INPUT_SHAPE,
                  batch_size=BATCH_SIZE)
else:
    # TODO: LOADING IS A LITTLE BROKEN AT THE MOMENTS!
    # Load the agent instead
    print('Loading from', LOAD_FROM)
    meta = agent.load(LOAD_FROM, LOAD_REPLAY_BUFFER)

    # Apply information loaded from meta
    frame_number = meta['frame_number']
    rewards = meta['rewards']
    loss_list = meta['loss_list']

    print('Loaded')

Exemplo n.º 26

0

Exibir arquivo

Arquivo: run_AtariGame.py Projeto: shao19950821/Deep-RL

def run_Game(model, env_name, lifes, episodes):
    if model == 'DQN':
        from model.cnnBrain import DQN_Brain as Brain
        from model.agent import DQN_Agent as Agent
    elif model == 'DDQN':
        from model.cnnBrain import DDQN_Brain as Brain
        from model.agent import DDQN_Agent as Agent
    elif model == 'PDQN':
        from model.cnnBrain import PDQN_Brain as Brain
        from model.agent import PDQN_Agent as Agent
    elif model == 'PDDQN':
        from model.cnnBrain import PDDQN_Brain as Brain
        from model.agent import PDDQN_Agent as Agent
    elif model == 'DQN_PER':
        from model.cnnBrain import DQN_PER_Brain as Brain
        from model.agent import DQN_PER_Agent as Agent
    elif model == 'DDQN_PER':
        from model.cnnBrain import DDQN_PER_Brain as Brain
        from model.agent import DDQN_PER_Agent as Agent

    # lifes = 5
    # env_name = 'Breakout'
    env = gym.make("{}NoFrameskip-v4".format(env_name))  # 定义使用 gym 库中的那一个环境

    print('\nThe config:\n', configs, '\n')
    filters_per_layer = configs['Brain']['filters_per_layer']
    kernel_size_per_layer = configs['Brain']['kernel_size_per_layer']
    conv_strides_per_layer = configs['Brain']['conv_strides_per_layer']
    learning_rate = configs['Brain']['learning_rate']
    output_graph = configs['Brain']['output_graph']
    restore = configs['Brain']['restore']

    reward_decay = configs['Agent']['reward_decay']
    replace_target_iter = configs['Agent']['replace_target_iter']
    memory_size = configs['Agent']['memory_size']
    batch_size = configs['Agent']['batch_size']
    MAX_EPSILON = configs['Agent']['MAX_EPSILON']
    MIN_EPSILON = configs['Agent']['MIN_EPSILON']
    LAMBDA = configs['Agent']['LAMBDA']

    replay_start_size = configs['ExperienceReplay']['replay_start_size']
    update_frequency = configs['ExperienceReplay']['update_frequency']

    brain = Brain(n_actions=env.action_space.n,
                  observation_width=84,
                  observation_height=84,
                  observation_depth=4,
                  learning_rate=learning_rate,
                  filters_per_layer=filters_per_layer,
                  kernel_size_per_layer=kernel_size_per_layer,
                  conv_strides_per_layer=conv_strides_per_layer,
                  restore=restore,
                  output_graph=output_graph,
                  checkpoint_dir=(env_name + '_' + model + '_CNN_Net'))
    agent = Agent(
        brain=brain,
        n_actions=env.action_space.n,
        observation_space_shape=env.observation_space.shape,
        reward_decay=reward_decay,
        MAX_EPSILON=MAX_EPSILON,  # epsilon 的最大值
        MIN_EPSILON=MIN_EPSILON,  # epsilon 的最小值
        LAMBDA=LAMBDA,
        replace_target_iter=replace_target_iter,
        memory_size=memory_size,
        batch_size=batch_size,
    )
    dataStorage = DataStorage()

    env = wrap_env(env)
    run_AtariGame(episodes, model, env, env_name, lifes, agent, brain,
                  dataStorage, replay_start_size, update_frequency,
                  False)  # last params = True 记录 q value

Exemplo n.º 27

0

Exibir arquivo

def initialize_network(G, agent_constructor_params, T):
    """
    Initializes a network object G that stores agent-level information (e.g. parameters of individual
    generative models, global node-indices, ...) and information about the generative process.
    """

    single_node_attrs = {
        'agent': {},
        'self_global_label_mapping': {},
        'qs': {},
        'q_pi': {},
        'o': {},
        'selected_actions': {},
        'my_tweet': {},
        'other_tweet': {},
        'sampled_neighbors': {}
    }

    single_node_attrs['stored_data'] = {
        i: list(single_node_attrs.keys())
        for i in G.nodes()
    }

    for agent_i in G.nodes():

        agent = Agent(**agent_constructor_params[agent_i])
        self_global_label_mapping = dict(
            zip(range(G.degree(agent_i)), list(nx.neighbors(G, agent_i))))

        single_node_attrs['agent'][agent_i] = agent

        single_node_attrs['self_global_label_mapping'][
            agent_i] = self_global_label_mapping

        single_node_attrs['qs'][agent_i] = np.empty(
            (T, agent.genmodel.num_factors), dtype=object
        )  # history of the posterior beliefs  about hidden states of `agent_i`

        single_node_attrs['q_pi'][agent_i] = np.empty(
            (T, len(agent.genmodel.policies)), dtype=object
        )  # history of the posterior beliefs about policies of `agent_i`

        single_node_attrs['o'][agent_i] = np.zeros(
            (T + 1, agent.genmodel.num_modalities), dtype=int
        )  # history of the indices of the observations made by `agent_i`. One extra time index for the last timestep, which has no subsequent active inference loop

        single_node_attrs['selected_actions'][agent_i] = np.zeros(
            (T, 2),
            dtype=int)  # history indices of the actions selected by `agent_i`

        single_node_attrs['my_tweet'][agent_i] = np.zeros(
            T + 1
        )  # history of indices of `my_tweet` (same as G.nodes()[agent_i][`o`][:,0])

        single_node_attrs['other_tweet'][agent_i] = np.zeros(
            T + 1
        )  # history of indices of `other_tweet` (same as G.nodes()[agent_i][`o`][t,n+1]) where `n` is the index of the selected neighbour at time t

        single_node_attrs['sampled_neighbors'][agent_i] = np.zeros(T + 1)

    for attr, attr_dict in single_node_attrs.items():

        nx.set_node_attributes(G, attr_dict, attr)

    return G

Exemplo n.º 28

0

Exibir arquivo

    def execute(self, agent: Agent, state: SimState) -> None:
        """
        Isolate (Remove from Grid) a given share of infected people for the sickness-duration.
        Afterwards they need to be added again to the Grid as removed/dead/immune.
        """
        if agent.is_quarantined():
            if agent.state() is AgentState.DEAD or agent.state() is AgentState.IMMUNE or agent.state() is AgentState.REMOVED:

                grid = agent.grid()
                for row in range(grid.get_size()):
                    for col in range(grid.get_size()):
                        grid_pos = GridPos(np.uint(row), np.uint(col))
                        if not grid.is_occupied(grid_pos):
                            grid.set_agent(agent, grid_pos)
                            agent.set_pos(grid_pos)
                            agent.set_quarantined(False)
                            agent.grid().get_quarantinedAgents().remove(agent)
                            state.add_to_quarantined_count(-1)
                            return

        else:
            isolate_share = state.quarantine_share()  # Share of infected cells to isolate
            infected = state.infected_count()

            if agent.state() == AgentState.INFECTIVE and state.get_quarantined_count() < isolate_share * (
                    infected + state.get_quarantined_count()):
                agent.set_quarantined(True)
                agent.grid().get_quarantinedAgents().append(agent)
                agent.grid().set_agent(None, agent.get_pos())
                agent.get_scheduler().update_gui_state(agent.get_pos(), AgentState.EMPTY)
                state.add_to_quarantined_count(1)

Exemplo n.º 29

0

Exibir arquivo

Arquivo: app.py Projeto: hautan/reinforcement-learning

from model.agent import Agent
from model.environment import Environment
from model.human import Human
from state_util import initialV_x, initialV_o, play_game, get_state_hash_and_winner

if __name__ == '__main__':
    # train the agent
    p1 = Agent()
    p2 = Agent()

    # set initial V for p1 and p2
    env = Environment()
    state_winner_triples = get_state_hash_and_winner(env)


    Vx = initialV_x(env, state_winner_triples)
    p1.setV(Vx)
    Vo = initialV_o(env, state_winner_triples)
    p2.setV(Vo)

    # give each player their symbol
    p1.set_symbol(env.x)
    p2.set_symbol(env.o)

    T = 1000000
    for t in range(T):
        if t % 1000 == 0:
            print(t)
    play_game(p1, p2, Environment())
    
    # play human vs. agent

Exemplo n.º 30

0

Exibir arquivo

Arquivo: mw_game.py Projeto: violonistahiles/minesweeper

    # Set global parameters
    rewards = {
        'lose_reward': -2.5,
        'win_reward': 2.5,
        'yolo_reward': 0.1,
        'rep_point_reward': -0.5,
        'open_point_reward': 2.2
    }

    pictures = [
        'open.png', '1.png', '2.png', '3.png', '4.png', '5.png', '6.png',
        '7.png', '8.png', 'close.png', 'mine.png', 'open_mine.png'
    ]

    agent = Agent(decision_field, checkpoint_dir)

    # Set initial parameters for minesweeper
    layout = start_layout()
    window = sg.Window('Minesweeper', layout)

    # Game loop
    while True:
        event, values = window.read()

        # Case of "exit" button
        if event in (sg.WIN_CLOSED, 'Exit'):
            break

        # Case of "Start game" button
        if event in ('grid'):

Exemplo n.º 31

0

Exibir arquivo

Arquivo: run_ContolGame.py Projeto: shao19950821/Deep-RL

def run_Game(model, env_name, episodes):
    if model == 'DQN':
        from model.mlpBrain import DQN_Brain as Brain
        from model.agent import DQN_Agent as Agent
    elif model == 'DDQN':
        from model.mlpBrain import DDQN_Brain as Brain
        from model.agent import DDQN_Agent as Agent
    elif model == 'PDQN':
        from model.mlpBrain import PDQN_Brain as Brain
        from model.agent import PDQN_Agent as Agent
    elif model == 'PDDQN':
        from model.mlpBrain import PDDQN_Brain as Brain
        from model.agent import PDDQN_Agent as Agent
    elif model == 'DQN_PER':
        from model.mlpBrain import DQN_PER_Brain as Brain
        from model.agent import DQN_PER_Agent as Agent
    elif model == 'DDQN_PER':
        from model.mlpBrain import DDQN_PER_Brain as Brain
        from model.agent import DDQN_PER_Agent as Agent
    elif model == 'DQN_InAday':
        from model.mlpBrain import DQN_InAday_Brain as Brain
        from model.agent import DQN_InAday_Agent as Agent
    elif model == 'DQN_PER_Ipm':
        from model.mlpBrain import DQN_PER_Ipm_Brain as Brain
        from model.agent import DQN_PER_Ipm_Agent as Agent
    elif model == 'DDQN_PER_Ipm':
        from model.mlpBrain import DDQN_PER_Ipm_Brain as Brain
        from model.agent import DDQN_PER_Ipm_Agent as Agent
    elif model == 'PDQN_RePER':
        from model.mlpBrain import PDQN_RePER_Brain as Brain
        from model.agent import PDQN_RePER_Agent as Agent

    env = gym.make(env_name)  # 定义使用 gym 库中的那一个环境
    # env = env.unwrapped  # 注释掉的话 每局游戏 reward之和最高200

    n_actions = 11 if env_name == 'Pendulum-v0' else env.action_space.n

    print('\nThe config:\n', configs, '\n')
    neurons_per_layer = configs['Brain']['neurons_per_layer']
    learning_rate = configs['Brain']['learning_rate']
    output_graph = configs['Brain']['output_graph']
    restore = configs['Brain']['restore']

    reward_decay = configs['Agent']['reward_decay']
    replace_target_iter = configs['Agent']['replace_target_iter']
    memory_size = configs['Agent']['memory_size']
    batch_size = configs['Agent']['batch_size']
    MAX_EPSILON = configs['Agent']['MAX_EPSILON']
    MIN_EPSILON = configs['Agent']['MIN_EPSILON']
    LAMBDA = configs['Agent']['LAMBDA']

    # learning_rate 重要
    # restore 和 MAX_EPSILON 一起调整
    brain = Brain(n_actions=n_actions,
                  n_features=env.observation_space.shape[0],
                  neurons_per_layer=neurons_per_layer,
                  learning_rate=learning_rate,
                  output_graph=output_graph,
                  restore=restore,
                  checkpoint_dir=(env_name + '_' + model + '_MLP_Net'))
    agent = Agent(
        brain=brain,
        n_actions=n_actions,
        observation_space_shape=env.observation_space.shape,
        reward_decay=reward_decay,
        replace_target_iter=replace_target_iter,
        memory_size=memory_size,
        batch_size=batch_size,
        MAX_EPSILON=MAX_EPSILON,
        MIN_EPSILON=MIN_EPSILON,
        LAMBDA=LAMBDA,
    )

    dataStorage = DataStorage()

    if env_name == 'Pendulum-v0':
        run_Pendulum(episodes, env, agent, False)
    else:
        run_controlGame(episodes, env, agent, dataStorage,
                        True)  # 4-th params = True 记录 q value
    writeData2File(model, env_name, brain, agent, dataStorage)