Exemplo n.º 1
0
    def init(self, info):
        self.field = info['field']
        self.max_linear_velocity = info['max_linear_velocity']
        self.goal = info['goal']
        self.number_of_robots = info['number_of_robots']
        self.end_of_frame = False
        self._frame = 0 
        self.wheels = [0 for _ in range(25)]
        self.cur_posture = []
        self.prev_posture = []
        self.cur_posture_opp = []
        self.cur_ball = []
        self.prev_ball = []
        self.set_default_formation()

        self.previous_frame = Frame()
        self.frame_skip = 2 # number of frames to skip
        self.obs_size = 3 # state size
        self.act_size = 7 # number of discrete actions

        # for RL
        self.action = 0
        self.previous_action = 0

        self.num_inputs = self.obs_size
        # RL algorithm class
        self.load = True
        self.play = True
        self.trainer = DQN(self.number_of_robots, self.obs_size, self.act_size, self.load, self.play)

        helper.printConsole("Initializing variables...")
Exemplo n.º 2
0
 def init(self, info):
     self.field = info['field']
     self.max_linear_velocity = info['max_linear_velocity']
     self.robot_size = info['robot_size'][0]
     self.goal = info['goal']
     self.penalty_area = info['penalty_area']
     self.goal_area = info['goal_area']
     self.number_of_robots = info['number_of_robots']
     self.end_of_frame = False
     self._frame = 0
     self.speeds = [0 for _ in range(30)]
     self.cur_posture = []
     self.cur_posture_opp = []
     self.cur_ball = []
     self.previous_ball = []
     self.previous_posture = []
     self.previous_posture_opp = []
     self.predicted_ball = []
     self.idx = 0
     self.idx_opp = 0
     self.previous_frame = Frame()
     self.defense_angle = 0
     self.attack_angle = 0
     self.team = TeamK(self.field, self.goal, self.penalty_area,
                       self.goal_area, self.robot_size,
                       self.max_linear_velocity)
     helper.printConsole("Initializing variables...")
Exemplo n.º 3
0
 def print_loss(self, loss, iteration):
     if self._iterations % 100 == 0:  # Print information every 100 iterations
         helper.printConsole(
             "======================================================")
         helper.printConsole("Epsilon: " + str(self.epsilon))
         helper.printConsole("iterations: " + str(self._iterations))
         helper.printConsole("Loss: " + str(loss))
         helper.printConsole(
             "======================================================")
Exemplo n.º 4
0
    def init(self, info): 
        self.field = info['field']
        self.max_linear_velocity = info['max_linear_velocity']
        self.robot_size = info['robot_size'][0]
        self.goal = info['goal']
        self.penalty_area = info['penalty_area']
        self.goal_area = info['goal_area']
        self.number_of_robots = info['number_of_robots']
        self.end_of_frame = False
        self._frame = 0 
        self.speeds = [0 for _ in range(30)]
        self.cur_posture = []
        self.cur_posture_opp = []
        self.cur_ball = []
        self.previous_ball = []
        self.predicted_ball = []
        self.idx = 0
        self.idx_opp = 0
        self.previous_frame = Frame()
        self.defense_angle = 0
        self.attack_angle = 0
        self.gk_index = 0
        self.d1_index = 1
        self.d2_index = 2
        self.f1_index = 3
        self.f2_index = 4
        
        self.GK = Goalkeeper(self.field, self.goal, self.penalty_area, 
                                self.goal_area, self.robot_size,
                                self.max_linear_velocity)
        self.D1 = Defender_1(self.field, self.goal, self.penalty_area, 
                                self.goal_area, self.robot_size,
                                self.max_linear_velocity)
        self.D2 = Defender_2(self.field, self.goal, self.penalty_area, 
                                self.goal_area, self.robot_size,
                                self.max_linear_velocity)
        self.F1 = Forward_1(self.field, self.goal, self.penalty_area, 
                                self.goal_area, self.robot_size,
                                self.max_linear_velocity)
        self.F2 = Forward_2(self.field, self.goal, self.penalty_area,
                                self.goal_area, self.robot_size,
                                self.max_linear_velocity)
        

        
        helper.printConsole("Initializing variables...")
Exemplo n.º 5
0
    def init(self, info):
        self.field = info['field']
        self.max_linear_velocity = info['max_linear_velocity']
        self.goal = info['goal']
        self.number_of_robots = info['number_of_robots']
        self.end_of_frame = False
        self._frame = 0
        self.speeds = [0 for _ in range(20)]
        self.cur_posture = []
        self.prev_posture = []
        self.cur_posture_opp = []
        self.cur_ball = []
        self.prev_ball = []
        self.set_default_formation()

        self.previous_frame = Frame()
        self.frame_skip = 2  # number of frames to skip
        self.obs_size = 3  # state size
        self.act_size = 7  # number of discrete actions

        # for RL
        self.number_of_agents = 1  # in this example, just F2 is trained
        self.action = 0
        self.previous_action = 0
        self.state = []
        self.previous_state = []
        self.reward = 0
        self.previous_reward = 0

        # RL algorithm class
        self.trainer = DQN(self.number_of_agents, self.obs_size, self.act_size)

        # log rewards
        self.total_reward = 0
        self.rew = np.zeros(4)
        # for logging rewards
        self.t = 0
        self.episode = 1
        self.plot_reward = Logger()
        self.save_png_interval = 10
        helper.printConsole("Initializing variables...")
Exemplo n.º 6
0
    def __init__(self, n_agents, dim_obs, dim_act, load=False, play=False):
        self.n_agents = n_agents
        self._iterations = 0
        self.update_steps = 100  # Update Target Network
        self.epsilon_steps = 20000  # Decrease epsilon
        self.play = play
        if self.play == True:
            self.epsilon = 0  # Greedy choice if play is True
        else:
            self.epsilon = 0.95  # Initial epsilon value
        self.final_epsilon = 0.05  # Final epsilon value
        self.dec_epsilon = 0.025  # Decrease rate of epsilon for every generation

        self.observation_steps = 300  # Number of iterations to observe before training every generation
        self.save_num = 10000  # Save checkpoint # default: 100
        self.batch_size = 64
        self.discount_factor = 0.99

        self.num_inputs = dim_obs
        self.act_size = dim_act
        self.memory = Memory(50000)  # replay buffer
        self.net = Agent(self.num_inputs, self.act_size)
        self.target_net = Agent(self.num_inputs, self.act_size)
        self.load = load
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        if self.load == True:
            self.net.load_state_dict(
                torch.load(CHECKPOINT, map_location=torch.device(self.device)))
            helper.printConsole("loading variables...")

        update_target_model(self.net, self.target_net)
        self.net.train()
        self.target_net.train()
        self.net.to(self.device)
        self.target_net.to(self.device)
        self.gamma = 0.99
        self.grad_norm_clip = 10
        self.loss = 0
        self.lr = 0.0001
        self.optimizer = optim.Adam(self.net.parameters(), lr=self.lr)
Exemplo n.º 7
0
    def update_policy(self):

        batch = self.memory.sample(self.batch_size)

        states = torch.Tensor(batch.state).to(self.device)
        next_states = torch.Tensor(batch.next_state).to(self.device)
        actions = torch.Tensor(batch.action).long().to(self.device)
        rewards = torch.Tensor(batch.reward).to(self.device)

        q_values = self.net(states).squeeze(1)
        max_next_q_values = self.target_net(next_states).squeeze(1).max(1)[0]

        one_hot_action = torch.zeros(self.batch_size,
                                     q_values.size(-1)).to(self.device)
        one_hot_action.scatter_(1, actions.unsqueeze(1), 1)
        chosen_q_values = torch.sum(q_values.mul(one_hot_action), dim=1)

        target = rewards + self.discount_factor * max_next_q_values

        td_error = (chosen_q_values - target.detach())
        loss = (td_error**2).sum()
        self.loss = loss.cpu().data.numpy()

        self.optimizer.zero_grad()
        loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(self.net.parameters(),
                                                   self.grad_norm_clip)
        self.optimizer.step()

        if self._iterations % self.update_steps == 0:
            update_target_model(self.net, self.target_net)
            helper.printConsole("Updated target model.")

        if self._iterations % self.epsilon_steps == 0:
            self.epsilon = max(self.epsilon - self.dec_epsilon,
                               self.final_epsilon)
            helper.printConsole("New Episode! New Epsilon:" +
                                str(self.epsilon))

        return self.loss
Exemplo n.º 8
0
 def save_checkpoint(self, iteration):
     if iteration % self.save_num == 0:
         self.net.save_model(self.net, CHECKPOINT)
         helper.printConsole("Saved Checkpoint.")