Python DDPGAgent示例

编程语言: Python

命名空间/包名称: aiwc_ddpg

类/类型: DDPGAgent

hotexamples.com的示例: 11

Python DDPGAgent - 已找到11个示例。这些是从开源项目中提取的最受好评的aiwc_ddpg.DDPGAgent现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

get_action(6)

DDPGAgent(5)

replay_memory(3)

save_model(3)

train_replay(3)

train_agent(2)

ou_noise(1)

示例#1

显示文件

        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2 * info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []
            self.idx = 0
            self.wheels = [0 for _ in range(10)]

            self.state_size = 5  # the number of possible states
            self.action_size = 2  # the number of possible actions
            self.agent = DDPGAgent(self.state_size, self.action_size)
            self.global_step = 0  # iteration step
            self.inner_step = 0
            self.save_every_steps = 12000  # save the model

            self.step = 0  # statistic step
            self.stats_steps = 500  # for tensorboard
            self.reward_sum = 0
            self.score_op = False
            self.score_op_sum = 0
            self.boal_far = True
            return

示例#2

显示文件

        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2 * info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []

            # self.set_seed(0)
            random.seed(0)
            np.random.seed(0)
            tf.set_random_seed(0)

            self.state_dim = 3  # 3*my robots, relative to the ball position
            self.history_size = 4  # frame history size
            self.action_dim = 2  # 2
            self.agent = DDPGAgent(self.state_dim * self.history_size,
                                   self.action_dim, self.max_linear_velocity)

            self.wheels = np.zeros(self.number_of_robots * 2)
            history = np.zeros([self.state_dim, self.history_size])
            self.histories = [history for _ in range(self.number_of_robots)]
            action = np.zeros(self.action_dim)
            self.actions = [action for _ in range(self.number_of_robots)]
            return

示例#3

显示文件

        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2*info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []
            self.pre_ball = [0, 0]
            self.wheels = [0 for _ in range(10)]

            self.state_size = 32 # 3*robot + 2*ball
            self.action_size = 10 # 2*my robot
            self.agent = DDPGAgent(self.state_size, self.action_size, self.max_linear_velocity)
            # self.printConsole("max velocity: " + str(self.max_linear_velocity))       
            self.global_step = 0 # iteration step            
            self.save_every_steps = 12000 # save the model
 
            self.stats_steps = 6000 # for tensorboard
            self.reward_sum = 0
            self.score_sum = 0 
            self.active_flag = [[False for _ in range(5)], [False for _ in range(5)]]         
            return

示例#4

显示文件

        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2*info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []
            self.pre_ball = [0, 0]

            self.arglist = Argument()

            # Create agent trainers
            self.obs_shape_n = [3 for i in range(1)]
            self.num_adversaries = 0
            self.num_good = 1
            self.state_dim = 3 # 3*my robots, relative to the ball position
            self.history_size = 4 # frame history size
            self.action_dim = 2 # 2
            self.trainers = get_trainers(self.num_adversaries, self.obs_shape_n, self.action_dim, self.arglist)

            self.agent = DDPGAgent(self.state_dim * self.history_size, self.action_dim, self.max_linear_velocity)       
            self.global_step = 0 # iteration step            
            self.save_every_steps = 12000 # save the model every 10 minutes
 
            self.stats_steps = 6000 # for tensorboard
            self.reward_sum = 0
            self.score_sum = 0 
            self.active_flag = [[False for _ in range(5)], [False for _ in range(5)]]   
            self.inner_step = 0
            self.wheels = np.zeros(self.number_of_robots*2)
            self.history = np.zeros([self.state_dim, self.history_size])
            self.action = np.zeros(self.action_dim)
            return

示例#5

显示文件

        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2*info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.tunning_velocity = self.max_linear_velocity
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []
            self.pre_ball = [0, 0]

            # self.set_seed(0)
            random.seed(0)
            np.random.seed(0)
            tf.set_random_seed(0)

            self.state_dim = 3 # #3*my robots + 2*op robots + 2
            self.action_dim = 2 # 2
            self.agent = DDPGAgent(self.state_dim, self.action_dim, self.tunning_velocity, -self.tunning_velocity)
            # self.printConsole("max velocity: " + str(self.max_linear_velocity))       
            self.global_step = 0 # iteration step            
            self.save_every_steps = 12000 # save the model every 10 minutes
 
            self.stats_steps = 6000 # for tensorboard
            self.reward_sum = 0
            self.score_sum = 0 
            self.active_flag = [[False for _ in range(5)], [False for _ in range(5)]]   
            self.inner_step = 0
            self.wheels = np.zeros(self.number_of_robots*2)
            self.action = np.zeros(self.action_dim)
            return

示例#6

显示文件

class Component(ApplicationSession):

    def __init__(self, config):
        ApplicationSession.__init__(self, config)

    def printConsole(self, message):
        print(message)
        sys.__stdout__.flush()

    def onConnect(self):
        self.join(self.config.realm)

    @inlineCallbacks
    def onJoin(self, details):

##############################################################################
        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2*info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.tunning_velocity = self.max_linear_velocity
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []
            self.pre_ball = [0, 0]

            # self.set_seed(0)
            random.seed(0)
            np.random.seed(0)
            tf.set_random_seed(0)

            self.state_dim = 3 # #3*my robots + 2*op robots + 2
            self.action_dim = 2 # 2
            self.agent = DDPGAgent(self.state_dim, self.action_dim, self.tunning_velocity, -self.tunning_velocity)
            # self.printConsole("max velocity: " + str(self.max_linear_velocity))       
            self.global_step = 0 # iteration step            
            self.save_every_steps = 12000 # save the model every 10 minutes
 
            self.stats_steps = 6000 # for tensorboard
            self.reward_sum = 0
            self.score_sum = 0 
            self.active_flag = [[False for _ in range(5)], [False for _ in range(5)]]   
            self.inner_step = 0
            self.wheels = np.zeros(self.number_of_robots*2)
            self.action = np.zeros(self.action_dim)
            return
##############################################################################
            
        try:
            info = yield self.call(u'aiwc.get_info', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            try:
                self.sub = yield self.subscribe(self.on_event, args.key)
            except Exception as e2:
                self.printConsole("Error: {}".format(e2))
               
        init_variables(self, info)
        
        try:
            yield self.call(u'aiwc.ready', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            self.printConsole("I am ready for the game!")

    def get_coord(self, received_frame):
        self.cur_ball = received_frame.coordinates[BALL]
        self.cur_my_posture = received_frame.coordinates[MY_TEAM]
        self.cur_op_posture =received_frame.coordinates[OP_TEAM]

    def get_reward(self, reset_reason):
        # pre_potential = helper.dipole_potential(self.pre_ball[X], self.pre_ball[Y], 2.1, 3)
        # cur_potential = helper.dipole_potential(self.cur_ball[X], self.cur_ball[Y], 2.1, 3)
        reward = helper.dipole_potential(self.cur_my_posture[4][X], self.cur_my_posture[4][Y], 2.1, 0.3)

        # Add dead lock penalty
        # if(reset_reason == SCORE_MYTEAM):
        #     self.score_sum += 1
        #     reward += 24 # minimum 24
        #     self.printConsole("my team goal")

        # if(reset_reason == SCORE_OPPONENT):
        #     self.score_sum -= 1
        #     reward -= 24 # maxmimum -24
        #     self.printConsole("op team goal")

        self.printConsole("reward: " + str(reward))
        self.pre_ball = self.cur_ball
        return reward

    def set_wheel_velocity(self, robot_id, left_wheel, right_wheel):
        multiplier = 1
        
        if(abs(left_wheel) > self.max_linear_velocity or abs(right_wheel) > self.max_linear_velocity):
            if (abs(left_wheel) > abs(right_wheel)):
                multiplier = self.max_linear_velocity / abs(left_wheel)
            else:
                multiplier = self.max_linear_velocity / abs(right_wheel)
        
        self.wheels[2*robot_id] = left_wheel*multiplier
        self.wheels[2*robot_id + 1] = right_wheel*multiplier

    def position(self, robot_id, x, y):
        damping = 0.35
        mult_lin = 3.5
        mult_ang = 0.4
        ka = 0
        sign = 1
        
        dx = x - self.cur_my_posture[robot_id][X]
        dy = y - self.cur_my_posture[robot_id][Y]
        d_e = math.sqrt(math.pow(dx, 2) + math.pow(dy, 2))
        desired_th = (math.pi/2) if (dx == 0 and dy == 0) else math.atan2(dy, dx)

        d_th = desired_th - self.cur_my_posture[robot_id][TH] 
        while(d_th > math.pi):
            d_th -= 2*math.pi
        while(d_th < -math.pi):
            d_th += 2*math.pi
            
        if (d_e > 1):
            ka = 17/90
        elif (d_e > 0.5):
            ka = 19/90
        elif (d_e > 0.3):
            ka = 21/90
        elif (d_e > 0.2):
            ka = 23/90
        else:
            ka = 25/90
            
        if (d_th > helper.degree2radian(95)):
            d_th -= math.pi
            sign = -1
        elif (d_th < helper.degree2radian(-95)):
            d_th += math.pi
            sign = -1
            
        if (abs(d_th) > helper.degree2radian(85)):
            self.set_wheel_velocity(robot_id, -mult_ang*d_th, mult_ang*d_th)
        else:
            if (d_e < 5 and abs(d_th) < helper.degree2radian(40)):
                ka = 0.1
            ka *= 4
            self.set_wheel_velocity(robot_id, 
                                    sign * (mult_lin * (1 / (1 + math.exp(-3*d_e)) - damping) - mult_ang * ka * d_th),
                                    sign * (mult_lin * (1 / (1 + math.exp(-3*d_e)) - damping) + mult_ang * ka * d_th))        
            
    @inlineCallbacks
    def on_event(self, f):        

        @inlineCallbacks
        def set_wheel(self, robot_wheels):
            yield self.call(u'aiwc.set_speed', args.key, robot_wheels)
            return
            
        # initiate empty frame
        received_frame = Frame()

        if 'time' in f:
            received_frame.time = f['time']
        if 'score' in f:
            received_frame.score = f['score']
        if 'reset_reason' in f:
            received_frame.reset_reason = f['reset_reason']
        if 'coordinates' in f:
            received_frame.coordinates = f['coordinates']            
        if 'EOF' in f:
            self.end_of_frame = f['EOF']
        
        #self.printConsole(received_frame.time)
        #self.printConsole(received_frame.score)
        #self.printConsole(received_frame.reset_reason)
        #self.printConsole(self.end_of_frame)

##############################################################################        

        if (self.end_of_frame):
            
            # How to get the robot and ball coordinates: (ROBOT_ID can be 0,1,2,3,4)
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][X])            
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[BALL][X])
            #self.printConsole(received_frame.coordinates[BALL][Y])
                        
            self.global_step += 1                        
            self.get_coord(received_frame)
##############################################################################
            # Next state
            next_state = [round(self.cur_my_posture[4][X]/2.05, 2), round(self.cur_my_posture[4][Y]/1.35, 2), round(self.cur_my_posture[4][TH]/(2*math.pi), 2)]
            next_state = np.reshape([next_state], (1, self.state_dim, 1))
            next_history = np.append(next_state, self.agent.history[:, :, :3], axis=2)

            # Reward
            reward = self.get_reward(received_frame.reset_reason)

            # Reset
            if(received_frame.reset_reason != NONE):
                reset = True
                self.printConsole("reset reason: " + str(received_frame.reset_reason))
            else:
                reset = False
            
            self.agent.train_agent(np.reshape(self.agent.history, -1), self.action, reward, np.reshape(next_history, -1), reset, self.global_step)

            # save the history and get action
            self.agent.history = next_history

            # fine tuning with RL
            self.position(4, 2.05, 0)
            self.printConsole("                 rule-based action: " + str(self.wheels[8:10]))
            self.action = self.agent.get_action(np.reshape(self.agent.history, -1))
            self.printConsole("                 tuning action: " + str(self.action[:2]))
            self.wheels[8:10] += self.action
            self.printConsole("                 fine-tuned action: " + str(self.wheels[8:10]))

            set_wheel(self, self.wheels.tolist())

            self.reward_sum += reward            
##############################################################################   
            # for tensorboard         
            # self.agent.avg_q_max += self.agent.critic.predict([np.reshape(self.agent.history, (1, -1)), 
            #     np.reshape(self.agent.action, (1, -1))])[0]
##############################################################################
            # if self.global_step % 50 == 0:
            #     # print current status
            #     self.printConsole("step: " + str(self.global_step) + ", Epsilon: " + str(self.agent.epsilon))

            if self.global_step % self.save_every_steps == 0: # save the model
                self.agent.saver.save(self.agent.sess, self.agent.save_file)
                self.printConsole("Saved model")

            if reset: # plot the statics
                self.printConsole("add data to tensorboard")
                stats = [self.reward_sum, self.agent.avg_q_max / float(self.stats_steps),
                             self.agent.loss_sum / float(self.stats_steps), self.score_sum]
                for i in range(len(stats)):
                    self.agent.sess.run(self.agent.update_ops[i], feed_dict={
                        self.agent.summary_placeholders[i]: float(stats[i])
                    })
                summary_str = self.agent.sess.run(self.agent.summary_op)
                self.agent.summary_writer.add_summary(summary_str, self.inner_step)

                self.reward_sum, self.agent.avg_q_max, self.agent.loss_sum = 0, 0, 0
                self.score_sum = 0
                self.inner_step += 1
##############################################################################
            if(received_frame.reset_reason == GAME_END):
                #(virtual finish() in random_walk.cpp)
                #save your data
                with open(args.datapath + '/result.txt', 'w') as output:
                    #output.write('yourvariables')
                    output.close()
                #unsubscribe; reset or leave  
                yield self.sub.unsubscribe()
                try:
                    yield self.leave()
                except Exception as e:
                    self.printConsole("Error: {}".format(e))

            self.end_of_frame = False
##############################################################################

    def onDisconnect(self):
        if reactor.running:
            reactor.stop()

示例#7

显示文件

class Component(ApplicationSession):
    def __init__(self, config):
        ApplicationSession.__init__(self, config)

    def printConsole(self, message):
        print(message)
        sys.__stdout__.flush()

    def onConnect(self):
        self.join(self.config.realm)

    @inlineCallbacks
    def onJoin(self, details):

        ##############################################################################
        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2 * info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []

            # self.set_seed(0)
            random.seed(0)
            np.random.seed(0)
            tf.set_random_seed(0)

            self.state_dim = 3  # 3*my robots, relative to the ball position
            self.history_size = 4  # frame history size
            self.action_dim = 2  # 2
            self.agent = DDPGAgent(self.state_dim * self.history_size,
                                   self.action_dim, self.max_linear_velocity)

            self.wheels = np.zeros(self.number_of_robots * 2)
            history = np.zeros([self.state_dim, self.history_size])
            self.histories = [history for _ in range(self.number_of_robots)]
            action = np.zeros(self.action_dim)
            self.actions = [action for _ in range(self.number_of_robots)]
            return
##############################################################################

        try:
            info = yield self.call(u'aiwc.get_info', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            try:
                self.sub = yield self.subscribe(self.on_event, args.key)
            except Exception as e2:
                self.printConsole("Error: {}".format(e2))

        init_variables(self, info)

        try:
            yield self.call(u'aiwc.ready', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            self.printConsole("I am ready for the game!")

    def get_coord(self, received_frame):
        self.cur_ball = received_frame.coordinates[BALL]
        self.cur_my_posture = received_frame.coordinates[MY_TEAM]
        self.cur_op_posture = received_frame.coordinates[OP_TEAM]

    @inlineCallbacks
    def on_event(self, f):
        @inlineCallbacks
        def set_wheel(self, robot_wheels):
            yield self.call(u'aiwc.set_speed', args.key, robot_wheels)
            return

        # initiate empty frame
        received_frame = Frame()

        if 'time' in f:
            received_frame.time = f['time']
        if 'score' in f:
            received_frame.score = f['score']
        if 'reset_reason' in f:
            received_frame.reset_reason = f['reset_reason']
        if 'coordinates' in f:
            received_frame.coordinates = f['coordinates']
        if 'EOF' in f:
            self.end_of_frame = f['EOF']

        #self.printConsole(received_frame.time)
        #self.printConsole(received_frame.score)
        #self.printConsole(received_frame.reset_reason)
        #self.printConsole(self.end_of_frame)

##############################################################################

        if (self.end_of_frame):

            # How to get the robot and ball coordinates: (ROBOT_ID can be 0,1,2,3,4)
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[BALL][X])
            #self.printConsole(received_frame.coordinates[BALL][Y])

            self.get_coord(received_frame)
            ##############################################################################
            for i in range(self.number_of_robots):
                next_state = [
                    round(self.cur_ball[X], 2) -
                    round(self.cur_my_posture[i][X], 2),
                    round(self.cur_ball[Y], 2) -
                    round(self.cur_my_posture[i][Y], 2),
                    round(self.cur_my_posture[i][TH] / (2 * math.pi), 2)
                ]
                next_state = np.reshape([next_state], (self.state_dim, 1))
                self.histories[i] = np.append(
                    next_state,
                    self.histories[i][:, :self.history_size - 1],
                    axis=1)

                self.wheels[2 * i:2 * i + 2] = self.agent.get_action(
                    np.reshape(self.histories[i], -1))

            set_wheel(self, self.wheels.tolist())
            ##############################################################################
            if (received_frame.reset_reason == GAME_END):
                #(virtual finish() in random_walk.cpp)
                #save your data
                with open(args.datapath + '/result.txt', 'w') as output:
                    #output.write('yourvariables')
                    output.close()
                #unsubscribe; reset or leave
                yield self.sub.unsubscribe()
                try:
                    yield self.leave()
                except Exception as e:
                    self.printConsole("Error: {}".format(e))

            self.end_of_frame = False


##############################################################################

    def onDisconnect(self):
        if reactor.running:
            reactor.stop()

示例#8

显示文件

class Component(ApplicationSession):
    def __init__(self, config):
        ApplicationSession.__init__(self, config)

    def printConsole(self, message):
        print(message)
        sys.__stdout__.flush()

    def onConnect(self):
        self.join(self.config.realm)

    @inlineCallbacks
    def onJoin(self, details):

        ##############################################################################
        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2 * info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = [0, 0]
            self.pre_ball = [0, 0]

            self.state_size = 32  # 3*robot + 2*ball
            self.action_size = 10  # 2*my robot
            self.agent = DDPGAgent(self.state_size, self.action_size,
                                   self.max_linear_velocity)
            # self.printConsole("max velocity: " + str(self.max_linear_velocity))
            self.global_step = 0  # iteration step
            self.save_every_steps = 12000  # save the model

            self.stats_steps = 6000  # for tensorboard
            self.reward_sum = 0
            self.score_sum = 0
            self.active_flag = [[True for _ in range(5)],
                                [True for _ in range(5)]]

            self.distances = [[i for i in range(5)],
                              [i for i in range(5)]]  # distances to the ball
            self.idxs = [[i for i in range(5)], [i for i in range(5)]]
            self.deadlock_cnt = 0
            self.avoid_deadlock_cnt = 0
            self.wheels = np.zeros(self.action_size)
            return
##############################################################################

        try:
            info = yield self.call(u'aiwc.get_info', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            try:
                self.sub = yield self.subscribe(self.on_event, args.key)
            except Exception as e2:
                self.printConsole("Error: {}".format(e2))

        init_variables(self, info)

        try:
            yield self.call(u'aiwc.ready', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            self.printConsole("I am ready for the game!")

    def get_coord(self, received_frame):
        self.cur_ball = received_frame.coordinates[BALL]
        self.cur_my_posture = received_frame.coordinates[MY_TEAM]
        self.cur_op_posture = received_frame.coordinates[OP_TEAM]

    def get_reward(self, reset_reason):
        pre_potential = helper.dipole_potential(self.pre_ball[X],
                                                self.pre_ball[Y], 2.1, 5)
        cur_potential = helper.dipole_potential(self.cur_ball[X],
                                                self.cur_ball[Y], 2.1, 5)
        reward = cur_potential - pre_potential

        # if my robot become deactive, recieve penalty
        for i in range(self.number_of_robots):
            if (self.cur_my_posture[i][ACTIVE]
                    == False) and (self.active_flag[MY_TEAM][i] is True):
                reward -= 1
                self.printConsole("my team deactive")
            self.active_flag[MY_TEAM][i] = self.cur_my_posture[i][ACTIVE]

        # if opponent robot become deactive, recieve penalty
        for i in range(self.number_of_robots):
            if (self.cur_op_posture[i][ACTIVE]
                    == False) and (self.active_flag[OP_TEAM][i] is True):
                reward += 1
                self.printConsole("opponent team deactive")
            self.active_flag[OP_TEAM][i] = self.cur_op_posture[i][ACTIVE]

        if (reset_reason == SCORE_MYTEAM):
            self.score_sum += 1
            reward += 50  # minimum 25
            self.printConsole("my team goal")

        if (reset_reason == SCORE_OPPONENT):
            self.score_sum -= 1
            reward -= 50  # maxmimum -25
            self.printConsole("op team goal")

        self.printConsole("reward: " + str(reward))
        self.pre_ball = self.cur_ball
        return reward

    def sort_by_distance_to_ball(self, received_frame, team):
        # sort according to distant to the ball
        for i in range(self.number_of_robots):
            self.distances[team][i] = helper.distance(
                self.cur_ball[X], received_frame.coordinates[team][i][X],
                self.cur_ball[Y], received_frame.coordinates[team][i][Y])
        self.idxs[MY_TEAM] = sorted(range(len(self.distances[team])),
                                    key=lambda k: self.distances[team][k])

    def count_deadlock(self):
        d_ball = helper.distance(self.cur_ball[X], self.pre_ball[X],
                                 self.cur_ball[Y],
                                 self.pre_ball[Y])  # delta of ball
        #self.printConsole("boal delta: " + str(d_ball))
        if (abs(self.cur_ball[Y]) > 0.65) and (d_ball < 0.02):
            #self.printConsole("boal stop")
            self.deadlock_cnt += 1
        else:
            self.deadlock_cnt = 0
            self.avoid_deadlock_cnt = 0

    def count_goal_area(self):
        count = 0
        for i in range(self.number_of_robots):
            if (abs(self.cur_my_posture[i][X]) > 1.6) and (abs(
                    self.cur_my_posture[i][Y]) < 0.43):
                count += 1
        return count

    def count_penalty_area(self):
        count = 0
        for i in range(self.number_of_robots):
            if (abs(self.cur_my_posture[i][X]) > 1.3) and (abs(
                    self.cur_my_posture[i][Y]) < 0.7):
                count += 1
        return count

    def set_wheel_velocity(self, robot_id, left_wheel, right_wheel):
        multiplier = 1

        if (abs(left_wheel) > self.max_linear_velocity
                or abs(right_wheel) > self.max_linear_velocity):
            if (abs(left_wheel) > abs(right_wheel)):
                multiplier = self.max_linear_velocity / abs(left_wheel)
            else:
                multiplier = self.max_linear_velocity / abs(right_wheel)

        self.wheels[2 * robot_id] = left_wheel * multiplier
        self.wheels[2 * robot_id + 1] = right_wheel * multiplier

    def position(self, robot_id, x, y):
        damping = 0.35
        mult_lin = 3.5
        mult_ang = 0.4
        ka = 0
        sign = 1

        dx = x - self.cur_my_posture[robot_id][X]
        dy = y - self.cur_my_posture[robot_id][Y]
        d_e = math.sqrt(math.pow(dx, 2) + math.pow(dy, 2))
        desired_th = (math.pi /
                      2) if (dx == 0 and dy == 0) else math.atan2(dy, dx)

        d_th = desired_th - self.cur_my_posture[robot_id][TH]
        while (d_th > math.pi):
            d_th -= 2 * math.pi
        while (d_th < -math.pi):
            d_th += 2 * math.pi

        if (d_e > 1):
            ka = 17 / 90
        elif (d_e > 0.5):
            ka = 19 / 90
        elif (d_e > 0.3):
            ka = 21 / 90
        elif (d_e > 0.2):
            ka = 23 / 90
        else:
            ka = 25 / 90

        if (d_th > helper.degree2radian(95)):
            d_th -= math.pi
            sign = -1
        elif (d_th < helper.degree2radian(-95)):
            d_th += math.pi
            sign = -1

        if (abs(d_th) > helper.degree2radian(85)):
            self.set_wheel_velocity(robot_id, -mult_ang * d_th,
                                    mult_ang * d_th)
        else:
            if (d_e < 5 and abs(d_th) < helper.degree2radian(40)):
                ka = 0.1
            ka *= 4
            self.set_wheel_velocity(
                robot_id,
                sign *
                (mult_lin *
                 (1 /
                  (1 + math.exp(-3 * d_e)) - damping) - mult_ang * ka * d_th),
                sign *
                (mult_lin *
                 (1 /
                  (1 + math.exp(-3 * d_e)) - damping) + mult_ang * ka * d_th))

    @inlineCallbacks
    def on_event(self, f):
        @inlineCallbacks
        def set_wheel(self, robot_wheels):
            yield self.call(u'aiwc.set_speed', args.key, robot_wheels)
            return

        def midfielder(self, robot_id):
            goal_dist = helper.distance(self.cur_my_posture[robot_id][X],
                                        self.field[X] / 2,
                                        self.cur_my_posture[robot_id][Y], 0)
            shoot_mul = 1
            dribble_dist = 0.426
            v = 5
            goal_to_ball_unit = helper.unit(
                [self.field[X] / 2 - self.cur_ball[X], -self.cur_ball[Y]])
            delta = [
                self.cur_ball[X] - self.cur_my_posture[robot_id][X],
                self.cur_ball[Y] - self.cur_my_posture[robot_id][Y]
            ]

            if (self.distances[MY_TEAM][robot_id] < 0.5) and (delta[X] > 0):
                self.position(robot_id, self.cur_ball[X] + v * delta[X],
                              self.cur_ball[Y] + v * delta[Y])
            else:
                self.position(
                    robot_id,
                    self.cur_ball[X] - dribble_dist * goal_to_ball_unit[X],
                    self.cur_ball[Y] - dribble_dist * goal_to_ball_unit[Y])

##############################################################################
# new rule

        def chase(self, robot_id, x, y):
            self.position(robot_id, x, y)

        def avoid_deadlock(self):
            self.position(0, self.cur_ball[X], 0)
            self.position(1, self.cur_ball[X], 0)
            self.position(2, self.cur_ball[X], 0)
            self.position(3, self.cur_ball[X], 0)
            self.position(4, self.cur_ball[X], 0)

            if (self.distances[MY_TEAM][self.idxs[MY_TEAM][0]] >
                    0.2) or (self.avoid_deadlock_cnt > 20):
                self.printConsole(
                    "                                                             return to the ball"
                )
                midfielder(self, self.idxs[MY_TEAM][0])
                chase(self, self.idxs[MY_TEAM][1],
                      self.cur_my_posture[self.idxs[MY_TEAM][0]][X],
                      self.cur_my_posture[self.idxs[MY_TEAM][0]][Y])
                chase(self, self.idxs[MY_TEAM][2],
                      self.cur_my_posture[self.idxs[MY_TEAM][1]][X],
                      self.cur_my_posture[self.idxs[MY_TEAM][1]][Y])
                chase(self, self.idxs[MY_TEAM][3],
                      self.cur_my_posture[self.idxs[MY_TEAM][2]][X],
                      self.cur_my_posture[self.idxs[MY_TEAM][2]][Y])
                chase(self, self.idxs[MY_TEAM][4],
                      self.cur_my_posture[self.idxs[MY_TEAM][3]][X],
                      self.cur_my_posture[self.idxs[MY_TEAM][3]][Y])

        def avoid_goal_foul(self):
            midfielder(self, self.idxs[MY_TEAM][0])
            chase(self, self.idxs[MY_TEAM][1],
                  self.cur_my_posture[self.idxs[MY_TEAM][0]][X],
                  self.cur_my_posture[self.idxs[MY_TEAM][0]][Y])
            self.position(self.idxs[MY_TEAM][2], 0, 0)
            self.position(self.idxs[MY_TEAM][3], 0, 0)
            self.position(self.idxs[MY_TEAM][4], 0, 0)

        def avoid_penalty_foul(self):
            midfielder(self, self.idxs[MY_TEAM][0])
            chase(self, self.idxs[MY_TEAM][1],
                  self.cur_my_posture[self.idxs[MY_TEAM][0]][X],
                  self.cur_my_posture[self.idxs[MY_TEAM][0]][Y])
            chase(self, self.idxs[MY_TEAM][2],
                  self.cur_my_posture[self.idxs[MY_TEAM][1]][X],
                  self.cur_my_posture[self.idxs[MY_TEAM][1]][Y])
            self.position(self.idxs[MY_TEAM][3], 0, 0)
            self.position(self.idxs[MY_TEAM][4], 0, 0)
##############################################################################
# initiate empty frame

        received_frame = Frame()

        if 'time' in f:
            received_frame.time = f['time']
        if 'score' in f:
            received_frame.score = f['score']
        if 'reset_reason' in f:
            received_frame.reset_reason = f['reset_reason']
        if 'coordinates' in f:
            received_frame.coordinates = f['coordinates']
        if 'EOF' in f:
            self.end_of_frame = f['EOF']

        #self.printConsole(received_frame.time)
        #self.printConsole(received_frame.score)
        #self.printConsole(received_frame.reset_reason)
        #self.printConsole(self.end_of_frame)
##############################################################################
        if (self.end_of_frame):

            # How to get the robot and ball coordinates: (ROBOT_ID can be 0,1,2,3,4)
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[BALL][X])
            #self.printConsole(received_frame.coordinates[BALL][Y])

            self.get_coord(received_frame)
            ##############################################################################
            self.sort_by_distance_to_ball(received_frame, MY_TEAM)
            self.count_deadlock()
            goal_area_count = self.count_goal_area()
            penalty_area_count = self.count_penalty_area()

            # Next state
            next_state = [
                round(self.cur_my_posture[0][X] / 2.05, 2),
                round(self.cur_my_posture[0][Y] / 1.35, 2),
                round(self.cur_my_posture[0][TH] / (2 * math.pi), 2),
                round(self.cur_my_posture[1][X] / 2.05, 2),
                round(self.cur_my_posture[1][Y] / 1.35, 2),
                round(self.cur_my_posture[1][TH] / (2 * math.pi), 2),
                round(self.cur_my_posture[2][X] / 2.05, 2),
                round(self.cur_my_posture[2][Y] / 1.35, 2),
                round(self.cur_my_posture[2][TH] / (2 * math.pi), 2),
                round(self.cur_my_posture[3][X] / 2.05, 2),
                round(self.cur_my_posture[3][Y] / 1.35, 2),
                round(self.cur_my_posture[3][TH] / (2 * math.pi), 2),
                round(self.cur_my_posture[4][X] / 2.05, 2),
                round(self.cur_my_posture[4][Y] / 1.35, 2),
                round(self.cur_my_posture[4][TH] / (2 * math.pi), 2),
                round(self.cur_op_posture[0][X] / 2.05, 2),
                round(self.cur_op_posture[0][Y] / 1.35, 2),
                round(self.cur_op_posture[0][TH] / (2 * math.pi), 2),
                round(self.cur_op_posture[1][X] / 2.05, 2),
                round(self.cur_op_posture[1][Y] / 1.35, 2),
                round(self.cur_op_posture[1][TH] / (2 * math.pi), 2),
                round(self.cur_op_posture[2][X] / 2.05, 2),
                round(self.cur_op_posture[2][Y] / 1.35, 2),
                round(self.cur_op_posture[2][TH] / (2 * math.pi), 2),
                round(self.cur_op_posture[3][X] / 2.05, 2),
                round(self.cur_op_posture[3][Y] / 1.35, 2),
                round(self.cur_op_posture[3][TH] / (2 * math.pi), 2),
                round(self.cur_op_posture[4][X] / 2.05, 2),
                round(self.cur_op_posture[4][Y] / 1.35, 2),
                round(self.cur_op_posture[4][TH] / (2 * math.pi), 2),
                round(self.cur_ball[X] / 2.05, 2),
                round(self.cur_ball[Y] / 1.35, 2)
            ]
            next_state = np.reshape([next_state], (1, self.state_size, 1))
            next_history = np.append(next_state,
                                     self.agent.history[:, :, :3],
                                     axis=2)

            # Reward
            reward = self.get_reward(received_frame.reset_reason)

            # Reset
            if (received_frame.reset_reason != NONE):
                reset = True
                self.printConsole("reset reason: " +
                                  str(received_frame.reset_reason))
            else:
                reset = False

            # save the sample <s, a, r, s'> to the replay memory
            self.agent.replay_memory(self.agent.history, self.agent.action,
                                     reward, next_history, reset)
            # every time step do the training
            if len(self.agent.memory) >= self.agent.train_start:
                self.agent.train_replay()

            # save the history
            self.agent.history = next_history
            # exploration with rule-based exploration
            if np.random.rand() <= self.agent.epsilon:
                if goal_area_count > 2:
                    self.printConsole(
                        "                                                     goal foul!"
                    )
                    avoid_goal_foul(self)
                elif penalty_area_count > 3:
                    self.printConsole(
                        "                                                     penalty foul!"
                    )
                    avoid_penalty_foul(self)
                elif self.deadlock_cnt > 15:
                    self.printConsole(
                        "                        warning: deadlock")
                    # self.printConsole("robot " + str(self.idxs[MY_TEAM][0]) + " distance: " + str(self.distances[MY_TEAM][self.idxs[MY_TEAM][0]]))
                    avoid_deadlock(self)
                    self.avoid_deadlock_cnt += 1
                else:
                    midfielder(self, self.idxs[MY_TEAM][0])
                    midfielder(self, self.idxs[MY_TEAM][1])
                    chase(self, self.idxs[MY_TEAM][2],
                          self.cur_my_posture[self.idxs[MY_TEAM][1]][X],
                          self.cur_my_posture[self.idxs[MY_TEAM][1]][Y])
                    chase(self, self.idxs[MY_TEAM][3],
                          self.cur_my_posture[self.idxs[MY_TEAM][2]][X],
                          self.cur_my_posture[self.idxs[MY_TEAM][2]][Y])
                    chase(self, self.idxs[MY_TEAM][4],
                          self.cur_my_posture[self.idxs[MY_TEAM][3]][X],
                          self.cur_my_posture[self.idxs[MY_TEAM][3]][Y])

                self.agent.noise = self.agent.ou_noise(self.agent.noise, 0.0,
                                                       0.15, 0.2)
                self.printConsole("noise: " + str(self.agent.noise[:2]))
                self.wheels = self.wheels + self.agent.noise * self.agent.action_max * max(
                    self.agent.epsilon, 0)
                self.wheels = np.maximum(self.wheels, -self.agent.action_max)
                self.wheels = np.minimum(self.wheels, self.agent.action_max)
                self.agent.action = self.wheels
                self.printConsole("rule-based action")
            else:
                self.agent.action = self.agent.get_action(
                    np.reshape(self.agent.history, (1, -1)))
                self.wheels = self.agent.action
                self.printConsole("policy network action")

            self.printConsole("wheels: " + str(self.agent.action[:2]))
            set_wheel(self, self.wheels.tolist())
            # go one step
            self.global_step += 1
            self.reward_sum += reward
            ##############################################################################
            # for tensorboard
            self.agent.avg_q_max += self.agent.critic.predict([
                np.reshape(self.agent.history, (1, -1)),
                np.reshape(self.agent.action, (1, -1))
            ])[0]
            ##############################################################################
            if self.global_step % 50 == 0:
                # print current status
                self.printConsole("step: " + str(self.global_step) +
                                  ", Epsilon: " + str(self.agent.epsilon))

            if self.global_step % self.save_every_steps == 0:  # save the model
                self.agent.save_model("./save_model/simple_ddpg")
                self.printConsole("Saved model")

            if self.global_step % self.stats_steps == 0:  # plot the statics
                self.printConsole("add data to tensorboard")
                stats = [
                    self.reward_sum,
                    self.agent.avg_q_max / float(self.stats_steps),
                    self.agent.loss_sum / float(self.stats_steps),
                    self.score_sum
                ]
                for i in range(len(stats)):
                    self.agent.sess.run(self.agent.update_ops[i],
                                        feed_dict={
                                            self.agent.summary_placeholders[i]:
                                            float(stats[i])
                                        })
                summary_str = self.agent.sess.run(self.agent.summary_op)
                self.agent.summary_writer.add_summary(
                    summary_str, self.global_step / self.stats_steps)

                self.reward_sum, self.agent.avg_q_max, self.agent.loss_sum = 0, 0, 0
                self.score_sum = 0
##############################################################################
            if (received_frame.reset_reason == GAME_END):
                #(virtual finish() in random_walk.cpp)
                #save your data
                with open(args.datapath + '/result.txt', 'w') as output:
                    #output.write('yourvariables')
                    output.close()
                #unsubscribe; reset or leave
                yield self.sub.unsubscribe()
                try:
                    yield self.leave()
                except Exception as e:
                    self.printConsole("Error: {}".format(e))

            self.end_of_frame = False


##############################################################################

    def onDisconnect(self):
        if reactor.running:
            reactor.stop()

示例#9

显示文件

class Component(ApplicationSession):
    def __init__(self, config):
        ApplicationSession.__init__(self, config)

    def printConsole(self, message):
        print(message)
        sys.__stdout__.flush()

    def onConnect(self):
        self.join(self.config.realm)

    @inlineCallbacks
    def onJoin(self, details):

        ##############################################################################
        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2 * info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []
            self.prev_ball = [0, 0]

            # self.set_seed(0)
            random.seed(0)
            np.random.seed(0)
            tf.set_random_seed(0)

            self.state_dim = 3  # 3*my robots, relative to the ball position
            self.history_size = 4  # frame history size
            self.action_dim = 2  # 2
            self.agent = DDPGAgent(self.state_dim * self.history_size,
                                   self.action_dim, self.max_linear_velocity)

            self.wheels = np.zeros(self.number_of_robots * 2)
            history = np.zeros([self.state_dim, self.history_size])
            self.histories = [history for _ in range(self.number_of_robots)]
            action = np.zeros(self.action_dim)
            self.actions = [action for _ in range(self.number_of_robots)]

            self.distances = [[i for i in range(self.number_of_robots)],
                              [i for i in range(self.number_of_robots)]
                              ]  # distances to the ball
            self.idxs = [[i for i in range(self.number_of_robots)],
                         [i for i in range(self.number_of_robots)]]
            self.deadlock_cnt = 0
            self.avoid_deadlock_cnt = 0
            return
##############################################################################

        try:
            info = yield self.call(u'aiwc.get_info', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            try:
                self.sub = yield self.subscribe(self.on_event, args.key)
            except Exception as e2:
                self.printConsole("Error: {}".format(e2))

        init_variables(self, info)

        try:
            yield self.call(u'aiwc.ready', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            self.printConsole("I am ready for the game!")

    def get_coord(self, received_frame):
        self.cur_ball = received_frame.coordinates[BALL]
        self.cur_my_posture = received_frame.coordinates[MY_TEAM]
        self.cur_op_posture = received_frame.coordinates[OP_TEAM]

    def sort_by_distance_to_ball(self, received_frame, team):
        # sort according to distant to the ball
        for i in range(self.number_of_robots):
            self.distances[team][i] = helper.distance(
                self.cur_ball[X], received_frame.coordinates[team][i][X],
                self.cur_ball[Y], received_frame.coordinates[team][i][Y])
        self.idxs[MY_TEAM] = sorted(range(len(self.distances[team])),
                                    key=lambda k: self.distances[team][k])

    def count_deadlock(self):
        d_ball = helper.distance(self.cur_ball[X], self.prev_ball[X],
                                 self.cur_ball[Y],
                                 self.prev_ball[Y])  # delta of ball
        #self.printConsole("boal delta: " + str(d_ball))
        if (abs(self.cur_ball[Y]) > 0.65) and (d_ball < 0.02):
            #self.printConsole("boal stop")
            self.deadlock_cnt += 1
        else:
            self.deadlock_cnt = 0
            self.avoid_deadlock_cnt = 0

    def count_goal_area(self):
        count = 0
        for i in range(self.number_of_robots):
            if (abs(self.cur_my_posture[i][X]) > 1.6) and (abs(
                    self.cur_my_posture[i][Y]) < 0.43):
                count += 1
        return count

    def count_penalty_area(self):
        count = 0
        for i in range(self.number_of_robots):
            if (abs(self.cur_my_posture[i][X]) > 1.3) and (abs(
                    self.cur_my_posture[i][Y]) < 0.7):
                count += 1
        return count

    def set_wheel_velocity(self, robot_id, left_wheel, right_wheel):
        multiplier = 1

        if (abs(left_wheel) > self.max_linear_velocity
                or abs(right_wheel) > self.max_linear_velocity):
            if (abs(left_wheel) > abs(right_wheel)):
                multiplier = self.max_linear_velocity / abs(left_wheel)
            else:
                multiplier = self.max_linear_velocity / abs(right_wheel)

        self.wheels[2 * robot_id] = left_wheel * multiplier
        self.wheels[2 * robot_id + 1] = right_wheel * multiplier

    def position(self, robot_id, x, y):
        damping = 0.35
        mult_lin = 3.5
        mult_ang = 0.4
        ka = 0
        sign = 1

        dx = x - self.cur_my_posture[robot_id][X]
        dy = y - self.cur_my_posture[robot_id][Y]
        d_e = math.sqrt(math.pow(dx, 2) + math.pow(dy, 2))
        desired_th = (math.pi /
                      2) if (dx == 0 and dy == 0) else math.atan2(dy, dx)

        d_th = desired_th - self.cur_my_posture[robot_id][TH]
        while (d_th > math.pi):
            d_th -= 2 * math.pi
        while (d_th < -math.pi):
            d_th += 2 * math.pi

        if (d_e > 1):
            ka = 17 / 90
        elif (d_e > 0.5):
            ka = 19 / 90
        elif (d_e > 0.3):
            ka = 21 / 90
        elif (d_e > 0.2):
            ka = 23 / 90
        else:
            ka = 25 / 90

        if (d_th > helper.degree2radian(95)):
            d_th -= math.pi
            sign = -1
        elif (d_th < helper.degree2radian(-95)):
            d_th += math.pi
            sign = -1

        if (abs(d_th) > helper.degree2radian(85)):
            self.set_wheel_velocity(robot_id, -mult_ang * d_th,
                                    mult_ang * d_th)
        else:
            if (d_e < 5 and abs(d_th) < helper.degree2radian(40)):
                ka = 0.1
            ka *= 4
            self.set_wheel_velocity(
                robot_id,
                sign *
                (mult_lin *
                 (1 /
                  (1 + math.exp(-3 * d_e)) - damping) - mult_ang * ka * d_th),
                sign *
                (mult_lin *
                 (1 /
                  (1 + math.exp(-3 * d_e)) - damping) + mult_ang * ka * d_th))

    @inlineCallbacks
    def on_event(self, f):
        @inlineCallbacks
        def set_wheel(self, robot_wheels):
            yield self.call(u'aiwc.set_speed', args.key, robot_wheels)
            return

        def midfielder(self, robot_id):
            goal_dist = helper.distance(self.cur_my_posture[robot_id][X],
                                        self.field[X] / 2,
                                        self.cur_my_posture[robot_id][Y], 0)
            shoot_mul = 1
            dribble_dist = 0.426
            v = 5
            goal_to_ball_unit = helper.unit(
                [self.field[X] / 2 - self.cur_ball[X], -self.cur_ball[Y]])
            delta = [
                self.cur_ball[X] - self.cur_my_posture[robot_id][X],
                self.cur_ball[Y] - self.cur_my_posture[robot_id][Y]
            ]

            # if (self.distances[MY_TEAM][robot_id] < 0.5) and (delta[X] > 0):
            if self.distances[MY_TEAM][robot_id] < 0.5:
                # near
                self.wheels[2 * robot_id:2 * robot_id +
                            2] = self.agent.get_action(
                                np.reshape(self.histories[robot_id],
                                           -1))  # ddpg action
                self.printConsole("robot id " + str(robot_id) + " distance: " +
                                  str(self.distances[MY_TEAM][robot_id]))
                self.printConsole("robot id " + str(robot_id) + "ddpg action")
            else:
                # far
                self.position(
                    robot_id,
                    self.cur_ball[X] - dribble_dist * goal_to_ball_unit[X],
                    self.cur_ball[Y] - dribble_dist * goal_to_ball_unit[Y])
##############################################################################
# new rule

        def chase(self, robot_id, x, y):
            self.position(robot_id, x, y)

        def avoid_deadlock(self):
            self.position(0, self.cur_ball[X], 0)
            self.position(1, self.cur_ball[X], 0)
            self.position(2, self.cur_ball[X], 0)
            self.position(3, self.cur_ball[X], 0)
            self.position(4, self.cur_ball[X], 0)

            if (self.distances[MY_TEAM][self.idxs[MY_TEAM][0]] >
                    0.2) or (self.avoid_deadlock_cnt > 20):
                self.printConsole(
                    "                                                             return to the ball"
                )
                midfielder(self, self.idxs[MY_TEAM][0])
                chase(self, self.idxs[MY_TEAM][1],
                      self.cur_my_posture[self.idxs[MY_TEAM][0]][X],
                      self.cur_my_posture[self.idxs[MY_TEAM][0]][Y])
                chase(self, self.idxs[MY_TEAM][2],
                      self.cur_my_posture[self.idxs[MY_TEAM][1]][X],
                      self.cur_my_posture[self.idxs[MY_TEAM][1]][Y])
                chase(self, self.idxs[MY_TEAM][3],
                      self.cur_my_posture[self.idxs[MY_TEAM][2]][X],
                      self.cur_my_posture[self.idxs[MY_TEAM][2]][Y])
                chase(self, self.idxs[MY_TEAM][4],
                      self.cur_my_posture[self.idxs[MY_TEAM][3]][X],
                      self.cur_my_posture[self.idxs[MY_TEAM][3]][Y])

        def avoid_goal_foul(self):
            midfielder(self, self.idxs[MY_TEAM][0])
            chase(self, self.idxs[MY_TEAM][1],
                  self.cur_my_posture[self.idxs[MY_TEAM][0]][X],
                  self.cur_my_posture[self.idxs[MY_TEAM][0]][Y])
            self.position(self.idxs[MY_TEAM][2], 0, 0)
            self.position(self.idxs[MY_TEAM][3], 0, 0)
            self.position(self.idxs[MY_TEAM][4], 0, 0)

        def avoid_penalty_foul(self):
            midfielder(self, self.idxs[MY_TEAM][0])
            chase(self, self.idxs[MY_TEAM][1],
                  self.cur_my_posture[self.idxs[MY_TEAM][0]][X],
                  self.cur_my_posture[self.idxs[MY_TEAM][0]][Y])
            chase(self, self.idxs[MY_TEAM][2],
                  self.cur_my_posture[self.idxs[MY_TEAM][1]][X],
                  self.cur_my_posture[self.idxs[MY_TEAM][1]][Y])
            self.position(self.idxs[MY_TEAM][3], 0, 0)
            self.position(self.idxs[MY_TEAM][4], 0, 0)
##############################################################################

# initiate empty frame

        received_frame = Frame()

        if 'time' in f:
            received_frame.time = f['time']
        if 'score' in f:
            received_frame.score = f['score']
        if 'reset_reason' in f:
            received_frame.reset_reason = f['reset_reason']
        if 'coordinates' in f:
            received_frame.coordinates = f['coordinates']
        if 'EOF' in f:
            self.end_of_frame = f['EOF']

        #self.printConsole(received_frame.time)
        #self.printConsole(received_frame.score)
        #self.printConsole(received_frame.reset_reason)
        #self.printConsole(self.end_of_frame)

##############################################################################

        if (self.end_of_frame):

            # How to get the robot and ball coordinates: (ROBOT_ID can be 0,1,2,3,4)
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[BALL][X])
            #self.printConsole(received_frame.coordinates[BALL][Y])

            self.get_coord(received_frame)
            ##############################################################################
            self.sort_by_distance_to_ball(received_frame, MY_TEAM)
            self.count_deadlock()
            goal_area_count = self.count_goal_area()
            penalty_area_count = self.count_penalty_area()

            # update state
            for i in range(self.number_of_robots):
                next_state = [
                    round(self.cur_ball[X], 2) -
                    round(self.cur_my_posture[i][X], 2),
                    round(self.cur_ball[Y], 2) -
                    round(self.cur_my_posture[i][Y], 2),
                    round(self.cur_my_posture[i][TH] / (2 * math.pi), 2)
                ]
                next_state = np.reshape([next_state], (self.state_dim, 1))
                self.histories[i] = np.append(
                    next_state,
                    self.histories[i][:, :self.history_size - 1],
                    axis=1)

            if goal_area_count > 2:
                # self.printConsole("                                                     goal foul!")
                avoid_goal_foul(self)
            elif penalty_area_count > 3:
                # self.printConsole("                                                     penalty foul!")
                avoid_penalty_foul(self)
            elif self.deadlock_cnt > 15:
                # self.printConsole("                        warning: deadlock")
                # self.printConsole("robot " + str(self.idxs[MY_TEAM][0]) + " distance: " + str(self.distances[MY_TEAM][self.idxs[MY_TEAM][0]]))
                avoid_deadlock(self)
                self.avoid_deadlock_cnt += 1
            else:
                midfielder(self, self.idxs[MY_TEAM][0])
                midfielder(self, self.idxs[MY_TEAM][1])
                chase(self, self.idxs[MY_TEAM][2],
                      self.cur_my_posture[self.idxs[MY_TEAM][1]][X],
                      self.cur_my_posture[self.idxs[MY_TEAM][1]][Y])
                chase(self, self.idxs[MY_TEAM][3],
                      self.cur_my_posture[self.idxs[MY_TEAM][2]][X],
                      self.cur_my_posture[self.idxs[MY_TEAM][2]][Y])
                chase(self, self.idxs[MY_TEAM][4],
                      self.cur_my_posture[self.idxs[MY_TEAM][3]][X],
                      self.cur_my_posture[self.idxs[MY_TEAM][3]][Y])

            self.prev_ball = self.cur_ball
            set_wheel(self, self.wheels.tolist())
            ##############################################################################
            if (received_frame.reset_reason == GAME_END):
                #(virtual finish() in random_walk.cpp)
                #save your data
                with open(args.datapath + '/result.txt', 'w') as output:
                    #output.write('yourvariables')
                    output.close()
                #unsubscribe; reset or leave
                yield self.sub.unsubscribe()
                try:
                    yield self.leave()
                except Exception as e:
                    self.printConsole("Error: {}".format(e))

            self.end_of_frame = False


##############################################################################

    def onDisconnect(self):
        if reactor.running:
            reactor.stop()

示例#10

显示文件

class Component(ApplicationSession):
    """
    AI Base + Skeleton
    """
    def __init__(self, config):
        ApplicationSession.__init__(self, config)

    def printConsole(self, message):
        print(message)
        sys.__stdout__.flush()

    def onConnect(self):
        self.join(self.config.realm)

    @inlineCallbacks
    def onJoin(self, details):

        ##############################################################################
        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2 * info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []
            self.idx = 0
            self.wheels = [0 for _ in range(10)]

            self.state_size = 5  # the number of possible states
            self.action_size = 2  # the number of possible actions
            self.agent = DDPGAgent(self.state_size, self.action_size)
            self.global_step = 0  # iteration step
            self.inner_step = 0
            self.save_every_steps = 12000  # save the model

            self.step = 0  # statistic step
            self.stats_steps = 500  # for tensorboard
            self.reward_sum = 0
            self.score_op = False
            self.score_op_sum = 0
            self.boal_far = True
            return
##############################################################################

        try:
            info = yield self.call(u'aiwc.get_info', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            try:
                self.sub = yield self.subscribe(self.on_event, args.key)
            except Exception as e2:
                self.printConsole("Error: {}".format(e2))

        init_variables(self, info)

        try:
            yield self.call(u'aiwc.ready', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            self.printConsole("I am ready for the game!")

    def get_coord(self, received_frame):
        self.cur_ball = received_frame.coordinates[BALL]
        self.cur_my_posture = received_frame.coordinates[MY_TEAM]
        self.cur_op_posture = received_frame.coordinates[OP_TEAM]

    def find_closest_robot(self):
        min_idx = 0
        min_distance = 9999.99
        for i in range(self.number_of_robots - 1):
            measured_distance = helper.distance(self.cur_ball[X],
                                                self.cur_my_posture[i][X],
                                                self.cur_ball[Y],
                                                self.cur_my_posture[i][Y])
            if (measured_distance < min_distance):
                min_distance = measured_distance
                min_idx = i
        self.idx = min_idx

    def set_wheel_velocity(self, robot_id, left_wheel, right_wheel):
        multiplier = 1

        if (abs(left_wheel) > self.max_linear_velocity
                or abs(right_wheel) > self.max_linear_velocity):
            if (abs(left_wheel) > abs(right_wheel)):
                multiplier = self.max_linear_velocity / abs(left_wheel)
            else:
                multiplier = self.max_linear_velocity / abs(right_wheel)

        self.wheels[2 * robot_id] = left_wheel * multiplier
        self.wheels[2 * robot_id + 1] = right_wheel * multiplier

    def position(self, robot_id, x, y):
        damping = 0.35
        mult_lin = 3.5
        mult_ang = 0.4
        ka = 0
        sign = 1

        dx = x - self.cur_my_posture[robot_id][X]
        dy = y - self.cur_my_posture[robot_id][Y]
        d_e = math.sqrt(math.pow(dx, 2) + math.pow(dy, 2))
        desired_th = (math.pi /
                      2) if (dx == 0 and dy == 0) else math.atan2(dy, dx)

        d_th = desired_th - self.cur_my_posture[robot_id][TH]
        while (d_th > math.pi):
            d_th -= 2 * math.pi
        while (d_th < -math.pi):
            d_th += 2 * math.pi

        if (d_e > 1):
            ka = 17 / 90
        elif (d_e > 0.5):
            ka = 19 / 90
        elif (d_e > 0.3):
            ka = 21 / 90
        elif (d_e > 0.2):
            ka = 23 / 90
        else:
            ka = 25 / 90

        if (d_th > helper.degree2radian(95)):
            d_th -= math.pi
            sign = -1
        elif (d_th < helper.degree2radian(-95)):
            d_th += math.pi
            sign = -1

        if (abs(d_th) > helper.degree2radian(85)):
            self.set_wheel_velocity(robot_id, -mult_ang * d_th,
                                    mult_ang * d_th)
        else:
            if (d_e < 5 and abs(d_th) < helper.degree2radian(40)):
                ka = 0.1
            ka *= 4
            self.set_wheel_velocity(
                robot_id,
                sign *
                (mult_lin *
                 (1 /
                  (1 + math.exp(-3 * d_e)) - damping) - mult_ang * ka * d_th),
                sign *
                (mult_lin *
                 (1 /
                  (1 + math.exp(-3 * d_e)) - damping) + mult_ang * ka * d_th))

    @inlineCallbacks
    def on_event(self, f):
        @inlineCallbacks
        def set_wheel(self, robot_wheels):
            yield self.call(u'aiwc.set_speed', args.key, robot_wheels)
            return

        def goalie(self, robot_id):
            # Goalie just track the ball[Y] position at a fixed position on the X axis
            x = (-self.field[X] / 2) + (self.robot_size / 2) + 0.05
            y = max(
                min(self.cur_ball[Y],
                    (self.goal[Y] / 2 - self.robot_size / 2)),
                -self.goal[Y] / 2 + self.robot_size / 2)
            self.position(robot_id, x, y)

        def defender(self, robot_id, idx, offset_y):
            ox = 0.1
            oy = 0.075
            min_x = (-self.field[X] / 2) + (self.robot_size / 2) + 0.05

            # If ball is on offense
            if (self.cur_ball[X] > 0):
                # If ball is in the upper part of the field (y>0)
                if (self.cur_ball[Y] > 0):
                    self.position(
                        robot_id, (self.cur_ball[X] - self.field[X] / 2) / 2,
                        (min(self.cur_ball[Y], self.field[Y] / 3)) + offset_y)
                # If ball is in the lower part of the field (y<0)
                else:
                    self.position(
                        robot_id, (self.cur_ball[X] - self.field[X] / 2) / 2,
                        (max(self.cur_ball[Y], -self.field[Y] / 3)) + offset_y)
            # If ball is on defense
            else:
                # If robot is in front of the ball
                if (self.cur_my_posture[robot_id][X] > self.cur_ball[X] - ox):
                    # If this defender is the nearest defender from the ball
                    if (robot_id == idx):
                        self.position(
                            robot_id, (self.cur_ball[X] - ox),
                            ((self.cur_ball[Y] + oy) if
                             (self.cur_my_posture[robot_id][Y] < 0) else
                             (self.cur_ball[Y] - oy)))
                    else:
                        self.position(
                            robot_id, (max(self.cur_ball[X] - 0.03, min_x)),
                            ((self.cur_my_posture[robot_id][Y] + 0.03) if
                             (self.cur_my_posture[robot_id][Y] < 0) else
                             (self.cur_my_posture[robot_id][Y] - 0.03)))
                # If robot is behind the ball
                else:
                    if (robot_id == idx):
                        self.position(robot_id, self.cur_ball[X],
                                      self.cur_ball[Y])
                    else:
                        self.position(
                            robot_id, (max(self.cur_ball[X] - 0.03, min_x)),
                            ((self.cur_my_posture[robot_id][Y] + 0.03) if
                             (self.cur_my_posture[robot_id][Y] < 0) else
                             (self.cur_my_posture[robot_id][Y] - 0.03)))

        def midfielder(self, robot_id, idx, offset_y):
            ox = 0.1
            oy = 0.075
            ball_dist = helper.distance(self.cur_my_posture[robot_id][X],
                                        self.cur_ball[X],
                                        self.cur_my_posture[robot_id][Y],
                                        self.cur_ball[Y])
            goal_dist = helper.distance(self.cur_my_posture[robot_id][X],
                                        self.field[X] / 2,
                                        self.cur_my_posture[robot_id][Y], 0)

            if (robot_id == idx):
                if (ball_dist < 0.04):
                    # if near the ball and near the opposite team goal
                    if (goal_dist < 1.0):
                        self.position(robot_id, self.field[X] / 2, 0)
                    else:
                        # if near and in front of the ball
                        if (self.cur_ball[X] <
                                self.cur_my_posture[robot_id][X] - 0.044):
                            x_suggest = self.cur_ball[X] - 0.044
                            self.position(robot_id, x_suggest,
                                          self.cur_ball[Y])
                        # if near and behind the ball
                        else:
                            self.position(robot_id,
                                          self.field[X] + self.goal[X],
                                          -self.goal[Y] / 2)
                else:
                    if (self.cur_ball[X] < self.cur_my_posture[robot_id][X]):
                        if (self.cur_ball[Y] > 0):
                            self.position(
                                robot_id, self.cur_ball[X] - ox,
                                min(self.cur_ball[Y] - oy,
                                    0.45 * self.field[Y]))
                        else:
                            self.position(
                                robot_id, self.cur_ball[X] - ox,
                                min(self.cur_ball[Y] + oy,
                                    -0.45 * self.field[Y]))
                    else:
                        self.position(robot_id, self.cur_ball[X],
                                      self.cur_ball[Y])
            else:
                self.position(robot_id, self.cur_ball[X] - 0.1,
                              self.cur_ball[Y] + offset_y)

        # initiate empty frame
        received_frame = Frame()

        if 'time' in f:
            received_frame.time = f['time']
        if 'score' in f:
            received_frame.score = f['score']
        if 'reset_reason' in f:
            received_frame.reset_reason = f['reset_reason']
        if 'coordinates' in f:
            received_frame.coordinates = f['coordinates']
        if 'EOF' in f:
            self.end_of_frame = f['EOF']

        #self.printConsole(received_frame.time)
        #self.printConsole(received_frame.score)
        #self.printConsole(received_frame.reset_reason)
        #self.printConsole(self.end_of_frame)

##############################################################################

        if (self.end_of_frame):

            # How to get the robot and ball coordinates: (ROBOT_ID can be 0,1,2,3,4)
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[BALL][X])
            #self.printConsole(received_frame.coordinates[BALL][Y])

            self.get_coord(received_frame)
            ##############################################################################
            reward = 0
            if (received_frame.reset_reason == SCORE_OPPONENT):
                self.score_op = True

            # if ball is near my goal post and goalie is active, start memory and train
            if (self.cur_ball[X] <
                    -1.3) and (abs(self.cur_ball[Y]) <
                               0.75) and self.cur_my_posture[4][ACTIVE]:
                # next state
                self.find_closest_robot()
                next_state = [
                    round(self.cur_my_posture[4][X] / 2.05, 2),
                    round(self.cur_my_posture[4][Y] / 1.35, 2),
                    round(self.cur_my_posture[4][TH] / (2 * math.pi), 2),
                    round(self.cur_ball[X] / 2.05, 2),
                    round(self.cur_ball[Y] / 1.35, 2)
                ]
                next_state = np.reshape([next_state], (1, self.state_size, 1))
                next_history = np.append(next_state,
                                         self.agent.history[:, :, :3],
                                         axis=2)
                # Reward
                # reward = 0
                if (self.cur_ball[X] < -1.4) and (-0.65 < self.cur_ball[Y] <
                                                  0.65):
                    # if the ball is near my goal post -> penalty
                    reward += (self.cur_ball[X] +
                               1.4) + (abs(self.cur_ball[Y]) - 0.65)
                if self.score_op:
                    # if oppenent goal -> penalty
                    reward -= 100
                    self.score_op_sum += 1
                    self.printConsole("score opponet")
                    self.score_op = False
                if reward != 0:
                    self.printConsole("step reward: " + str(reward))
                # Reset
                self.printConsole("boal far: " + str(self.boal_far))

                # save the sample <s, a, r, s'> to the replay memory
                self.agent.replay_memory(self.agent.history, self.agent.action,
                                         reward, next_history, self.boal_far)
                # every time step do the training
                if len(self.agent.memory) >= self.agent.train_start:
                    self.agent.train_replay()

                # save the history and get action
                self.agent.history = next_history
                self.agent.action = self.agent.get_action(
                    np.reshape(self.agent.history, (1, -1)))
                #self.printConsole("agent action: " + str(self.agent.action))

                # Set goalkeeper wheels
                self.wheels[
                    2 * 4] = self.agent.action[0] * self.max_linear_velocity
                self.wheels[
                    2 * 4 +
                    1] = self.agent.action[1] * self.max_linear_velocity
                self.boal_far = False
                self.inner_step += 1
                self.step += 1
            else:
                # self.printConsole("stay at goal post")
                goalie(self, 4)
                self.boal_far = True

            # other robot Functions
            #goalie(self, 4)
            #defender(self, 3, self.idx, 0.2)
            #defender(self, 2, self.idx, -0.2)
            #midfielder(self, 1, self.idx, 0.15)
            #midfielder(self, 0, self.idx, -0.15)

            # self.printConsole("left wheel: " + str(self.wheels[8]) + "right wheel: " +str(self.wheels[9]))
            set_wheel(self, self.wheels)

            # go one step
            self.global_step += 1
            self.reward_sum += reward
            ##############################################################################
            # for tensorboard
            self.agent.avg_q_max += self.agent.critic.predict([
                np.reshape(self.agent.history, (1, -1)),
                np.reshape(self.agent.action, (1, -1))
            ])[0]
            ##############################################################################
            if self.global_step % 50 == 0:
                # print current status
                self.printConsole("step: " + str(self.global_step) +
                                  ", Epsilon: " + str(self.agent.epsilon))
                self.printConsole("update step: " + str(self.inner_step))

            if self.global_step % self.save_every_steps == 0:  # save the model
                self.agent.save_model("./save_model/goalie_ddpg")
                self.printConsole("Saved model")

            if (self.global_step % self.stats_steps
                    == 0) and (self.step != 0):  # plot the statics
                self.printConsole("add data to tensorboard")
                stats = [
                    self.reward_sum, self.agent.avg_q_max / float(self.step),
                    self.agent.loss_sum / float(self.step), self.score_op_sum
                ]
                for i in range(len(stats)):
                    self.agent.sess.run(self.agent.update_ops[i],
                                        feed_dict={
                                            self.agent.summary_placeholders[i]:
                                            float(stats[i])
                                        })
                summary_str = self.agent.sess.run(self.agent.summary_op)
                self.agent.summary_writer.add_summary(
                    summary_str, self.global_step / self.stats_steps)

                self.reward_sum, self.agent.avg_q_max, self.agent.loss_sum = 0, 0, 0
                self.score_op_sum, self.step = 0, 0
##############################################################################
            if (received_frame.reset_reason == GAME_END):
                #(virtual finish() in random_walk.cpp)
                #save your data
                with open(args.datapath + '/result.txt', 'w') as output:
                    #output.write('yourvariables')
                    output.close()
                #unsubscribe; reset or leave
                yield self.sub.unsubscribe()
                try:
                    yield self.leave()
                except Exception as e:
                    self.printConsole("Error: {}".format(e))

            self.end_of_frame = False


##############################################################################

    def onDisconnect(self):
        if reactor.running:
            reactor.stop()

示例#11

显示文件

class Component(ApplicationSession):
    def __init__(self, config):
        ApplicationSession.__init__(self, config)

    def printConsole(self, message):
        print(message)
        sys.__stdout__.flush()

    def onConnect(self):
        self.join(self.config.realm)

    @inlineCallbacks
    def onJoin(self, details):

        ##############################################################################
        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2 * info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []
            self.pre_ball = [0, 0]
            self.wheels = [0 for _ in range(10)]

            self.state_size = 32  # 3*robot + 2*ball
            self.action_size = 10  # 2*my robot
            self.agent = DDPGAgent(self.state_size, self.action_size)
            self.global_step = 0  # iteration step
            self.save_every_steps = 12000  # save the model

            self.stats_steps = 6000  # for tensorboard
            self.reward_sum = 0
            # self.score_op = False
            self.score_sum = 0
            self.deadlock_flag = [[False for _ in range(5)],
                                  [False for _ in range(5)]]
            return
##############################################################################

        try:
            info = yield self.call(u'aiwc.get_info', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            try:
                self.sub = yield self.subscribe(self.on_event, args.key)
            except Exception as e2:
                self.printConsole("Error: {}".format(e2))

        init_variables(self, info)

        try:
            yield self.call(u'aiwc.ready', args.key)
        except Exception as e:
            self.printConsole("Error: {}".format(e))
        else:
            self.printConsole("I am ready for the game!")

    def get_coord(self, received_frame):
        self.cur_ball = received_frame.coordinates[BALL]
        self.cur_my_posture = received_frame.coordinates[MY_TEAM]
        self.cur_op_posture = received_frame.coordinates[OP_TEAM]

    def get_reward(self, reset_reason):
        pre_potential = helper.dipole_potential(self.pre_ball[X],
                                                self.pre_ball[Y], 2.1, 3)
        cur_potential = helper.dipole_potential(self.cur_ball[X],
                                                self.cur_ball[Y], 2.1, 3)
        reward = cur_potential - pre_potential

        # Add dead lock penalty
        if (reset_reason == SCORE_MYTEAM):
            self.score_sum += 1
            reward += 10
            self.printConsole("my team goal")

        if (reset_reason == SCORE_OPPONENT):
            self.score_sum -= 1
            reward -= 10
            self.printConsole("op team goal")

        self.printConsole("reward: " + str(reward))
        self.pre_ball = self.cur_ball
        return reward

    @inlineCallbacks
    def on_event(self, f):
        @inlineCallbacks
        def set_wheel(self, robot_wheels):
            yield self.call(u'aiwc.set_speed', args.key, robot_wheels)
            return

        # initiate empty frame
        received_frame = Frame()

        if 'time' in f:
            received_frame.time = f['time']
        if 'score' in f:
            received_frame.score = f['score']
        if 'reset_reason' in f:
            received_frame.reset_reason = f['reset_reason']
        if 'coordinates' in f:
            received_frame.coordinates = f['coordinates']
        if 'EOF' in f:
            self.end_of_frame = f['EOF']

        #self.printConsole(received_frame.time)
        #self.printConsole(received_frame.score)
        #self.printConsole(received_frame.reset_reason)
        #self.printConsole(self.end_of_frame)

##############################################################################

        if (self.end_of_frame):

            # How to get the robot and ball coordinates: (ROBOT_ID can be 0,1,2,3,4)
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[MY_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][X])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][Y])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TH])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][ACTIVE])
            #self.printConsole(received_frame.coordinates[OP_TEAM][ROBOT_ID][TOUCH])
            #self.printConsole(received_frame.coordinates[BALL][X])
            #self.printConsole(received_frame.coordinates[BALL][Y])

            self.get_coord(received_frame)
            ##############################################################################
            # Next state
            next_state = [
                round(self.cur_my_posture[0][X] / 2.05, 2),
                round(self.cur_my_posture[0][Y] / 1.35, 2),
                round(self.cur_my_posture[0][TH] / (2 * math.pi), 2),
                round(self.cur_my_posture[1][X] / 2.05, 2),
                round(self.cur_my_posture[1][Y] / 1.35, 2),
                round(self.cur_my_posture[1][TH] / (2 * math.pi), 2),
                round(self.cur_my_posture[2][X] / 2.05, 2),
                round(self.cur_my_posture[2][Y] / 1.35, 2),
                round(self.cur_my_posture[2][TH] / (2 * math.pi), 2),
                round(self.cur_my_posture[3][X] / 2.05, 2),
                round(self.cur_my_posture[3][Y] / 1.35, 2),
                round(self.cur_my_posture[3][TH] / (2 * math.pi), 2),
                round(self.cur_my_posture[4][X] / 2.05, 2),
                round(self.cur_my_posture[4][Y] / 1.35, 2),
                round(self.cur_my_posture[4][TH] / (2 * math.pi), 2),
                round(self.cur_op_posture[0][X] / 2.05, 2),
                round(self.cur_op_posture[0][Y] / 1.35, 2),
                round(self.cur_op_posture[0][TH] / (2 * math.pi), 2),
                round(self.cur_op_posture[1][X] / 2.05, 2),
                round(self.cur_op_posture[1][Y] / 1.35, 2),
                round(self.cur_op_posture[1][TH] / (2 * math.pi), 2),
                round(self.cur_op_posture[2][X] / 2.05, 2),
                round(self.cur_op_posture[2][Y] / 1.35, 2),
                round(self.cur_op_posture[2][TH] / (2 * math.pi), 2),
                round(self.cur_op_posture[3][X] / 2.05, 2),
                round(self.cur_op_posture[3][Y] / 1.35, 2),
                round(self.cur_op_posture[3][TH] / (2 * math.pi), 2),
                round(self.cur_op_posture[4][X] / 2.05, 2),
                round(self.cur_op_posture[4][Y] / 1.35, 2),
                round(self.cur_op_posture[4][TH] / (2 * math.pi), 2),
                round(self.cur_ball[X] / 2.05, 2),
                round(self.cur_ball[Y] / 1.35, 2)
            ]
            next_state = np.reshape([next_state], (1, self.state_size, 1))
            next_history = np.append(next_state,
                                     self.agent.history[:, :, :3],
                                     axis=2)

            # Reward
            reward = self.get_reward(received_frame.reset_reason)

            # Reset
            if (received_frame.reset_reason != NONE):
                reset = True
                self.printConsole("reset reason: " +
                                  str(received_frame.reset_reason))
            else:
                reset = False

            # save the sample <s, a, r, s'> to the replay memory
            self.agent.replay_memory(self.agent.history, self.agent.action,
                                     reward, next_history, reset)
            # every time step do the training
            if len(self.agent.memory) >= self.agent.train_start:
                self.agent.train_replay()

            # save the history and get action
            self.agent.history = next_history
            self.agent.action = self.agent.get_action(
                np.reshape(self.agent.history, (1, -1)))

            # Set wheels
            for i in range(10):
                self.wheels[
                    i] = self.agent.action[i] * self.max_linear_velocity
            # self.printConsole("left wheel: " + str(self.wheels[8]) + "right wheel: " +str(self.wheels[9]))

            self.printConsole("wheels: " + str(self.wheels[:2]))
            set_wheel(self, self.wheels)

            # go one step
            self.global_step += 1
            self.reward_sum += reward
            ##############################################################################
            # for tensorboard
            self.agent.avg_q_max += self.agent.critic.predict([
                np.reshape(self.agent.history, (1, -1)),
                np.reshape(self.agent.action, (1, -1))
            ])[0]
            ##############################################################################
            if self.global_step % 50 == 0:
                # print current status
                self.printConsole("step: " + str(self.global_step) +
                                  ", Epsilon: " + str(self.agent.epsilon))

            if self.global_step % self.save_every_steps == 0:  # save the model
                self.agent.save_model("./save_model/simple_ddpg")
                self.printConsole("Saved model")

            if self.global_step % self.stats_steps == 0:  # plot the statics
                self.printConsole("add data to tensorboard")
                stats = [
                    self.reward_sum,
                    self.agent.avg_q_max / float(self.stats_steps),
                    self.agent.loss_sum / float(self.stats_steps),
                    self.score_sum
                ]
                for i in range(len(stats)):
                    self.agent.sess.run(self.agent.update_ops[i],
                                        feed_dict={
                                            self.agent.summary_placeholders[i]:
                                            float(stats[i])
                                        })
                summary_str = self.agent.sess.run(self.agent.summary_op)
                self.agent.summary_writer.add_summary(
                    summary_str, self.global_step / self.stats_steps)

                self.reward_sum, self.agent.avg_q_max, self.agent.loss_sum = 0, 0, 0
                self.score_sum = 0
##############################################################################
            if (received_frame.reset_reason == GAME_END):
                #(virtual finish() in random_walk.cpp)
                #save your data
                with open(args.datapath + '/result.txt', 'w') as output:
                    #output.write('yourvariables')
                    output.close()
                #unsubscribe; reset or leave
                yield self.sub.unsubscribe()
                try:
                    yield self.leave()
                except Exception as e:
                    self.printConsole("Error: {}".format(e))

            self.end_of_frame = False


##############################################################################

    def onDisconnect(self):
        if reactor.running:
            reactor.stop()