示例#1
0
    def __init__(self, sim = None, memory = None):
        '''init memory space and sim space'''

        # sim space
        self.sim = sim if sim else Sim()

        # memory space with vars and funs
        if not memory:
            self.memory = {**sys_funs}
        else:
            self.memory = memory
示例#2
0
def simulate(starting_temp, ambient_temp, controller, renderer, t=.036):
    sim = Sim(starting_temp, ambient_temp)
    
    for i in range(NUM_TICS):
        renderer.render(sim, controller)
        controller.before_tick(sim, tictime=t)
        sim.tick(tictime=t)
        controller.after_tick(sim, tictime=t)
        
    sim.cleanup()
示例#3
0
class Trainer(object):
    def __init__(self):
        """ Initializing DDPG """
        self.sim = Sim()
        self.ddpg = DDPG(a_dim=A_DIM, s_dim=S_DIM, batch_size=64, memory_capacity=100000, gamma=GAMMA, lr_a=0.001) #gamma=0.98
        self.ep_reward = 0.0
        self.current_ep = 0
        self.current_step = 0
        self.current_action = np.array([.0, .0, .0])
        self.done = True # if the episode is done
        self.var = VAR_INIT
        self.reward_record = list()
        self.ep_record = list()
        self.fig = plt.gcf()
        self.fig.show()
        self.fig.canvas.draw()
        print("Initialized DDPG")

        """ Setting communication"""
        self.pc = PC()
        self.pc.header.frame_id = 'world'
        self.pub = rospy.Publisher('normalized_state', PC, queue_size=10)
        self.pub1 = rospy.Publisher('state', PC, queue_size=10)
        """
        self.sub = rospy.Subscriber('robot_pose', PA, self.callback, queue_size=1)
        self.pub = rospy.Publisher('normalized_state', PC, queue_size=10)
        rospy.wait_for_service('airpress_control', timeout=5)
        self.target_PS = PS()
        self.action_V3 = Vector3()
        self.state = PA()
        self.updated = False # if s is updated
        self.got_callback1 = False
        self.got_callback2 = False
        print("Initialized communication")
        """

        """ Reading targets """
        """ The data should be w.r.t origin by base position """
        self.ends = pickle.load(open('./data/targets.p', 'rb'))
        self.x_offset = X_OFFSET
        self.y_offset = Y_OFFSET
        self.z_offset = Z_OFFSET
        self.scaler = 1/0.3
        self.sample_target()
        print("Read target data")

        #self.ddpg.restore_momery()
        #self.ddpg.restore_model()

        while not (rospy.is_shutdown()):
            if self.current_ep < MAX_EPISODES:
                if self.current_step < MAX_EP_STEPS:
                    #rospy.sleep(0.5)
                    p = self.sim.current_pose
                    s = np.vstack((p, self.target))
                    s = s[3:,:]
                    s = self.normalize_state(s)
                    norm_a = self.ddpg.choose_action(s.reshape(6)[-S_DIM:])
                    noise_a = np.random.normal(norm_a, self.var)
                    action = np.clip(noise_a, -1.0, 1.0)
                    self.current_action = action*A_BOUND + A_BOUND
                    try:
                        p_ = self.sim.update_pose(self.current_action)
                    except:
                        print self.ddpg.choose_action(s.reshape(6)[-S_DIM:])
                        print s.reshape(6)[-S_DIM:]
                    s_ = np.vstack((p_, self.target))

                    s_ = s_[3:,:]
                    s_ = self.normalize_state(s_)
                    self.compute_reward(s[0,:], s_[1,:])

                    self.current_step += 1

                    if self.reward > GOT_GOAL:
                        #self.reward += 1.0
                        self.ep_reward += self.reward
                        if self.current_ep% 10 ==0:
                            self.reward_record.append(self.ep_reward / self.current_step)
                            self.ep_record.append(self.current_ep)
                            plt.plot(self.ep_record, self.reward_record)
                            plt.ylim([-1.5,0.0])
                            self.fig.canvas.draw()
                            self.fig.savefig('learning.png')
                            print('\n')
                            print "Target Reached"
                            print("Normalized action:")
                            print norm_a
                            print("Noise action:")
                            print action
                            print("Output action:")
                            print self.current_action
                            print("Reward: %f" % self.reward)
                            print('Episode:', self.current_ep, ' Reward: %f' % self.ep_reward, 'Explore: %.3f' % self.var,)
                            print('*' * 40)
                            self.ddpg.save_model()
                            self.ddpg.save_memory()
                        self.done = True
                        self.current_step = 0
                        self.current_ep += 1
                        self.sample_target()
                        self.ep_reward = 0
                        #self.ddpg.save_memory()
                        #self.ddpg.save_model()
                        """
                        self.current_action = np.array([.0, .0, .0])
                        self.action_V3.x, self.action_V3.y, self.action_V3.z \
                            = self.current_action[0], self.current_action[1], self.current_action[2]
                        self.run_action(self.action_V3)
                        """

                    else:
                        self.ep_reward += self.reward
                        if self.current_step == MAX_EP_STEPS:
                            if self.current_ep % 10 ==0:
                                self.reward_record.append(self.ep_reward / self.current_step)
                                self.ep_record.append(self.current_ep)
                                plt.plot(self.ep_record, self.reward_record)
                                plt.ylim([-1.5, 0.0])
                                self.fig.canvas.draw()
                                self.fig.savefig('learning.png')
                                print('\n')
                                print "Target Failed"
                                print("Normalized action:")
                                print norm_a
                                print("Noise action:")
                                print action
                                print("Output action:")
                                print self.current_action
                                print("Reward: %f" % self.reward)
                                print('Episode:', self.current_ep, ' Reward: %f' % self.ep_reward, 'Explore: %.3f' % self.var,)
                                print('*' * 40)
                                self.ddpg.save_model()
                                self.ddpg.save_memory()
                            self.done = False
                            self.current_step = 0
                            self.current_ep += 1
                            self.sample_target()
                            self.ep_reward = 0
                            #self.ddpg.save_memory()
                            #self.ddpg.save_model()
                            """
                            self.current_action = np.array([.0, .0, .0])
                            self.action_V3.x, self.action_V3.y, self.action_V3.z \
                                = self.current_action[0], self.current_action[1], self.current_action[2]
                            self.run_action(self.action_V3)
                            """
                    self.ddpg.store_transition(s.reshape(6)[-S_DIM:], action, self.reward, s_.reshape(6)[-S_DIM:])
                    if self.ddpg.pointer > TRAIN_POINT:
                        #if (self.current_step % 10 == 0):
                        #self.var *= VAR_DECAY
                        self.var = max(0.0,self.var-1.02/(MAX_EP_STEPS*MAX_EPISODES))
                        self.ddpg.learn()
                    self.pub_state(s_)

            else:
                p = self.sim.current_pose
                s = np.vstack((p, self.target))
                s = s[3:,:]
                s = self.normalize_state(s)
                norm_a = self.ddpg.choose_action(s.reshape(6)[-S_DIM:])
                self.current_action = norm_a * A_BOUND + A_BOUND
                print("Normalized action:")
                print norm_a
                print("Current action:")
                print self.current_action

                p_ = self.sim.update_pose(self.current_action)
                s_ = np.vstack((p_, self.target))
                s_ = s_[3:,:]
                print("Distance: %f" % np.linalg.norm(s_[0,:]-s_[1,:]))
                s_ = self.normalize_state(s_)

                self.compute_reward(s_[0, :], s_[1, :])
                print('Explore: %.2f' % self.var,)
                print("Reward: %f" % self.reward)
                rospy.sleep(1)
                #print
                print self.ddpg.get_value(s.reshape(6)[-S_DIM:],norm_a,self.reward.reshape((-1,1)),s_.reshape(6)[-S_DIM:])
                print '\n'
                self.pub_state(s_)

                if self.reward > GOT_GOAL:
                    self.done = True
                    self.current_step = 0
                    self.current_ep += 1
                    self.sample_target()
                    print "Target Reached"
                    print("Episode %i Ended" % self.current_ep)
                    print("Reward: %f" % self.reward)
                    print('Episode:', self.current_ep, ' Reward: %d' % self.ep_reward, 'Explore: %.2f' % self.var,)
                    print('*' * 40)
                    self.ep_reward = 0
                    # self.ddpg.save_memory()
                    # self.ddpg.save_model()
                    """
                    self.current_action = np.array([.0, .0, .0])
                    self.action_V3.x, self.action_V3.y, self.action_V3.z \
                        = self.current_action[0], self.current_action[1], self.current_action[2]
                    self.run_action(self.action_V3)
                    """

                else:
                    self.done = False
                    self.current_step += 1
                    if self.current_step == 2:
                        print "Target Failed"
                        print("Reward: %f" % self.reward)
                        print("Episode %i Ends" % self.current_ep)
                        print(
                            'Episode:', self.current_ep, ' Reward: %d' % self.ep_reward, 'Explore: %.2f' % self.var,)
                        print('*' * 40)
                        self.current_step = 0
                        self.current_ep += 1
                        self.sample_target()
                        self.ep_reward = 0
                        # self.ddpg.save_memory()
                        # self.ddpg.save_model()
                        """
                        self.current_action = np.array([.0, .0, .0])
                        self.action_V3.x, self.action_V3.y, self.action_V3.z \
                            = self.current_action[0], self.current_action[1], self.current_action[2]
                        self.run_action(self.action_V3)
                        """


    """
    def callback(self, pa):
        n_px = (np.array([pa.poses[i].position.x for i in range(4)]) - self.x_offset)*self.scaler
        n_py = (np.array([pa.poses[i].position.y for i in range(4)]) - self.y_offset)*self.scaler
        n_pz = (np.array([pa.poses[i].position.z for i in range(4)]) - self.z_offset)*self.scaler
        self.end = np.array((n_px[3], n_py[3], n_pz[3]))
        self.n_t = (self.target - np.array([0,0,Z_OFFSET]))*self.scaler
        self.s = np.concatenate((n_px, n_py, n_pz, self.n_t))
        self.pub_state(n_px, n_py, n_pz, self.n_t)
        self.updated = True
    """


    def sample_target(self):
            self.target = self.ends[np.random.randint(self.ends.shape[0])]
    """
    def run_action(self,control):
        try:
            client = rospy.ServiceProxy('airpress_control', OneSeg)
            resp = client(control)
            return resp.status
        except rospy.ServiceException, e:
            print "Service call failed: %s"%e
    """

    def compute_reward(self,end,target):
        error = target - end
        self.reward = -np.linalg.norm(error)
        #print np.linalg.norm(error)

    """
    def pub_state(self, n_px, n_py, n_pz, n_t):
        pts = list()
        for i in range(4):
            pt = Point()
            pt.x = n_px[i]
            pt.y = n_py[i]
            pt.z = n_pz[i]
            pts.append(pt)
        pt = Point()
        pt.x, pt.y, pt.z = n_t[0], n_t[1], n_t[2]
        pts.append(pt)
        self.pc.points = pts
        self.pub.publish(self.pc)
    """
    def pub_state(self, state):
        pts = list()
        for i in range(state.shape[0]):
            pt = Point()
            pt.x = state[i,0]
            pt.y = state[i,1]
            pt.z = state[i,2]
            pts.append(pt)
        self.pc.points = pts
        self.pub.publish(self.pc)
        pts = list()
        for i in range(state.shape[0]):
            pt = Point()
            pt.x = state[i, 0] / 10.0
            pt.y = state[i, 1] / 10.0
            pt.z = state[i, 2] / 35.0 + 0.42
            pts.append(pt)
        self.pc.points = pts
        self.pub1.publish(self.pc)


    def normalize_state(self,state):
        offset = np.array([0,0,0.42])
        scaler = np.array([10,10,35])
        s = state - offset
        s = np.multiply(s, scaler)
        return s

    def calculate_dist(self, state):
        offset = np.array([0, 0, 0.42])
        scaler = np.array([10, 10, 35])
        s = np.multiply(state,1.0/scaler)
        s += offset
        return  np.linalg.norm(s)
示例#4
0
    def __init__(self):
        """ Initializing DDPG """
        self.sim = Sim()
        self.ddpg = DDPG(a_dim=A_DIM, s_dim=S_DIM, batch_size=64, memory_capacity=100000, gamma=GAMMA, lr_a=0.001) #gamma=0.98
        self.ep_reward = 0.0
        self.current_ep = 0
        self.current_step = 0
        self.current_action = np.array([.0, .0, .0])
        self.done = True # if the episode is done
        self.var = VAR_INIT
        self.reward_record = list()
        self.ep_record = list()
        self.fig = plt.gcf()
        self.fig.show()
        self.fig.canvas.draw()
        print("Initialized DDPG")

        """ Setting communication"""
        self.pc = PC()
        self.pc.header.frame_id = 'world'
        self.pub = rospy.Publisher('normalized_state', PC, queue_size=10)
        self.pub1 = rospy.Publisher('state', PC, queue_size=10)
        """
        self.sub = rospy.Subscriber('robot_pose', PA, self.callback, queue_size=1)
        self.pub = rospy.Publisher('normalized_state', PC, queue_size=10)
        rospy.wait_for_service('airpress_control', timeout=5)
        self.target_PS = PS()
        self.action_V3 = Vector3()
        self.state = PA()
        self.updated = False # if s is updated
        self.got_callback1 = False
        self.got_callback2 = False
        print("Initialized communication")
        """

        """ Reading targets """
        """ The data should be w.r.t origin by base position """
        self.ends = pickle.load(open('./data/targets.p', 'rb'))
        self.x_offset = X_OFFSET
        self.y_offset = Y_OFFSET
        self.z_offset = Z_OFFSET
        self.scaler = 1/0.3
        self.sample_target()
        print("Read target data")

        #self.ddpg.restore_momery()
        #self.ddpg.restore_model()

        while not (rospy.is_shutdown()):
            if self.current_ep < MAX_EPISODES:
                if self.current_step < MAX_EP_STEPS:
                    #rospy.sleep(0.5)
                    p = self.sim.current_pose
                    s = np.vstack((p, self.target))
                    s = s[3:,:]
                    s = self.normalize_state(s)
                    norm_a = self.ddpg.choose_action(s.reshape(6)[-S_DIM:])
                    noise_a = np.random.normal(norm_a, self.var)
                    action = np.clip(noise_a, -1.0, 1.0)
                    self.current_action = action*A_BOUND + A_BOUND
                    try:
                        p_ = self.sim.update_pose(self.current_action)
                    except:
                        print self.ddpg.choose_action(s.reshape(6)[-S_DIM:])
                        print s.reshape(6)[-S_DIM:]
                    s_ = np.vstack((p_, self.target))

                    s_ = s_[3:,:]
                    s_ = self.normalize_state(s_)
                    self.compute_reward(s[0,:], s_[1,:])

                    self.current_step += 1

                    if self.reward > GOT_GOAL:
                        #self.reward += 1.0
                        self.ep_reward += self.reward
                        if self.current_ep% 10 ==0:
                            self.reward_record.append(self.ep_reward / self.current_step)
                            self.ep_record.append(self.current_ep)
                            plt.plot(self.ep_record, self.reward_record)
                            plt.ylim([-1.5,0.0])
                            self.fig.canvas.draw()
                            self.fig.savefig('learning.png')
                            print('\n')
                            print "Target Reached"
                            print("Normalized action:")
                            print norm_a
                            print("Noise action:")
                            print action
                            print("Output action:")
                            print self.current_action
                            print("Reward: %f" % self.reward)
                            print('Episode:', self.current_ep, ' Reward: %f' % self.ep_reward, 'Explore: %.3f' % self.var,)
                            print('*' * 40)
                            self.ddpg.save_model()
                            self.ddpg.save_memory()
                        self.done = True
                        self.current_step = 0
                        self.current_ep += 1
                        self.sample_target()
                        self.ep_reward = 0
                        #self.ddpg.save_memory()
                        #self.ddpg.save_model()
                        """
                        self.current_action = np.array([.0, .0, .0])
                        self.action_V3.x, self.action_V3.y, self.action_V3.z \
                            = self.current_action[0], self.current_action[1], self.current_action[2]
                        self.run_action(self.action_V3)
                        """

                    else:
                        self.ep_reward += self.reward
                        if self.current_step == MAX_EP_STEPS:
                            if self.current_ep % 10 ==0:
                                self.reward_record.append(self.ep_reward / self.current_step)
                                self.ep_record.append(self.current_ep)
                                plt.plot(self.ep_record, self.reward_record)
                                plt.ylim([-1.5, 0.0])
                                self.fig.canvas.draw()
                                self.fig.savefig('learning.png')
                                print('\n')
                                print "Target Failed"
                                print("Normalized action:")
                                print norm_a
                                print("Noise action:")
                                print action
                                print("Output action:")
                                print self.current_action
                                print("Reward: %f" % self.reward)
                                print('Episode:', self.current_ep, ' Reward: %f' % self.ep_reward, 'Explore: %.3f' % self.var,)
                                print('*' * 40)
                                self.ddpg.save_model()
                                self.ddpg.save_memory()
                            self.done = False
                            self.current_step = 0
                            self.current_ep += 1
                            self.sample_target()
                            self.ep_reward = 0
                            #self.ddpg.save_memory()
                            #self.ddpg.save_model()
                            """
                            self.current_action = np.array([.0, .0, .0])
                            self.action_V3.x, self.action_V3.y, self.action_V3.z \
                                = self.current_action[0], self.current_action[1], self.current_action[2]
                            self.run_action(self.action_V3)
                            """
                    self.ddpg.store_transition(s.reshape(6)[-S_DIM:], action, self.reward, s_.reshape(6)[-S_DIM:])
                    if self.ddpg.pointer > TRAIN_POINT:
                        #if (self.current_step % 10 == 0):
                        #self.var *= VAR_DECAY
                        self.var = max(0.0,self.var-1.02/(MAX_EP_STEPS*MAX_EPISODES))
                        self.ddpg.learn()
                    self.pub_state(s_)

            else:
                p = self.sim.current_pose
                s = np.vstack((p, self.target))
                s = s[3:,:]
                s = self.normalize_state(s)
                norm_a = self.ddpg.choose_action(s.reshape(6)[-S_DIM:])
                self.current_action = norm_a * A_BOUND + A_BOUND
                print("Normalized action:")
                print norm_a
                print("Current action:")
                print self.current_action

                p_ = self.sim.update_pose(self.current_action)
                s_ = np.vstack((p_, self.target))
                s_ = s_[3:,:]
                print("Distance: %f" % np.linalg.norm(s_[0,:]-s_[1,:]))
                s_ = self.normalize_state(s_)

                self.compute_reward(s_[0, :], s_[1, :])
                print('Explore: %.2f' % self.var,)
                print("Reward: %f" % self.reward)
                rospy.sleep(1)
                #print
                print self.ddpg.get_value(s.reshape(6)[-S_DIM:],norm_a,self.reward.reshape((-1,1)),s_.reshape(6)[-S_DIM:])
                print '\n'
                self.pub_state(s_)

                if self.reward > GOT_GOAL:
                    self.done = True
                    self.current_step = 0
                    self.current_ep += 1
                    self.sample_target()
                    print "Target Reached"
                    print("Episode %i Ended" % self.current_ep)
                    print("Reward: %f" % self.reward)
                    print('Episode:', self.current_ep, ' Reward: %d' % self.ep_reward, 'Explore: %.2f' % self.var,)
                    print('*' * 40)
                    self.ep_reward = 0
                    # self.ddpg.save_memory()
                    # self.ddpg.save_model()
                    """
                    self.current_action = np.array([.0, .0, .0])
                    self.action_V3.x, self.action_V3.y, self.action_V3.z \
                        = self.current_action[0], self.current_action[1], self.current_action[2]
                    self.run_action(self.action_V3)
                    """

                else:
                    self.done = False
                    self.current_step += 1
                    if self.current_step == 2:
                        print "Target Failed"
                        print("Reward: %f" % self.reward)
                        print("Episode %i Ends" % self.current_ep)
                        print(
                            'Episode:', self.current_ep, ' Reward: %d' % self.ep_reward, 'Explore: %.2f' % self.var,)
                        print('*' * 40)
                        self.current_step = 0
                        self.current_ep += 1
                        self.sample_target()
                        self.ep_reward = 0
                        # self.ddpg.save_memory()
                        # self.ddpg.save_model()
                        """
示例#5
0
def train():
    print('dqn_setting')
    sess = tf.Session()

    game = Sim(SCREEN_WIDTH, SCREEN_HEIGHT, show_game=False)
    brain = DQN(sess, SCREEN_WIDTH, SCREEN_HEIGHT, NUM_ACTION)

    rewards = tf.placeholder(tf.float32, [None])
    tf.summary.scalar('avg.reward/ep.', tf.reduce_mean(rewards))

    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())

    writer = tf.summary.FileWriter('logs', sess.graph)
    summary_merged = tf.summary.merge_all()

    # initialize target network
    brain.update_target_network()

    # decide action decision time using dqn after this time
    epsilon = 1.0
    # number of frames
    time_step = 0
    total_reward_list = []

    # Starting
    for episode in range(MAX_EPISODE):
        terminal = False
        total_reward = 0

        # gmae reset and get state (=poisition data)
        state = game.reset()
        brain.init_state(state)  # set state of dqn

        while not terminal:
            # choice random action when epsilon < random var, else ( :epilon > random var), choice with dqn
            # Later time, unless use random action
            if np.random.rand() < epsilon:
                action = random.randrange(NUM_ACTION)
            else:
                action = brain.get_action()

            # epsion decrease
            if episode > OBSERVE:
                epsilon -= 1 / 1000

            # game updates, get data (state / reward / is_gameover)
            state, reward, terminal = game.step(action)
            total_reward += reward

            # brain save this state
            brain.remember(state, action, reward, terminal)

            # After little time, In interval, do training
            if time_step > OBSERVE and time_step % TRAIN_INTERVAL == 0:
                brain.train()

            # In interval, do training
            if time_step % TARGET_UPDATE_INTERVAL == 0:
                brain.update_target_network()

            time_step += 1

        print('Number of game: %d  Score: %d' % (episode + 1, total_reward))

        total_reward_list.append(total_reward)

        if episode % 10 == 0:
            summary = sess.run(summary_merged,
                               feed_dict={rewards: total_reward_list})
            writer.add_summary(summary, time_step)
            total_reward_list = []

        if episode % 100 == 0:
            saver.save(sess, 'model/dqn.ckpt', global_step=time_step)
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument("n", type=int, help="Samples per task")
    parser.add_argument("db_name", help="Name of the database to use")
    parser.add_argument("bot_name", help="Name of the bot folder")
    parser.add_argument("features", help="Path to json file with bot features")

    parser.add_argument("-db_ip", default="localhost", help="Database address")
    parser.add_argument("-db_pwd", default="root", help="User's password")
    parser.add_argument("-db_user", default="root", help="Database user")
    parser.add_argument("-db_port", default="3306", help="Database port")

    parser.add_argument("-log_file",
                        default="/var/www/vui/state.json",
                        help="Log file to save the state")

    parser.add_argument('-e',
                        '--epochs',
                        default=180,
                        type=int,
                        help="Epochs for training")
    parser.add_argument('-m',
                        '--max_history',
                        default=5,
                        type=int,
                        help="History size feature")
    parser.add_argument('-b',
                        '--batch_size',
                        default=32,
                        type=int,
                        help="Batch size for training")
    parser.add_argument('-v',
                        '--validation_split',
                        default=0.3,
                        type=float,
                        help="Validation splits for training")

    args = parser.parse_args()
    database = "mysql://{}:{}@{}:{}/{}".format(args.db_user, args.db_pwd,
                                               args.db_ip, args.db_port,
                                               args.db_name)

    db = Database.get_instance(database=database)
    #logger = Logger.get_instance(args.log_file)

    frame_factory = FrameFactory()
    goal_factory = GoalFactory()

    with open(args.features, "r") as f:
        tasks = json.load(f)['tasks']

    frames, goals = [], []
    for dial in range(int(args.n)):
        for task in tasks:
            frames.append(frame_factory.build_frames(task))
            goals.append(goal_factory.build_goals(task))

    #trainer = Seq2ActionAgentTrainer(sum(frames, []), database, RuleBasedEngine())
    trainer = FullAgentTrainer(sum(frames, []), database, RuleBasedEngine())
    #trainer = DialogManagerTrainer(sum(frames, []), database)

    #dumper = #dumper(args.bot_name, trainer, domain_file_path="/var/www/vui/",
    #							 virtual_env="/var/www/vui/python_virtualenv/",
    #									 base_path="/var/www/vui/python/BOTs/",
    #										python_path="/var/www/vui/python/")

    im = InteractionManager(trainer)
    dm, sim = DM(im), Sim(im)

    # generate validation dialogs
    n_valid_samples = round(len(frames) * args.validation_split)
    generate_dialogs(n_valid_samples,
                     im,
                     dm,
                     sim,
                     trainer,
                     frames[:n_valid_samples],
                     goals[:n_valid_samples],
                     story_file_name="valid")
    # generate training dialogs
    n_train_samples = len(frames) - n_valid_samples
    generate_dialogs(n_train_samples,
                     im,
                     dm,
                     sim,
                     trainer,
                     frames[n_valid_samples:],
                     goals[n_valid_samples:],
                     story_file_name="story")

    print("[...] building domain.yml...")
    trainer.build_domain_yml()
    if isinstance(trainer, FullAgentTrainer):
        print("[...] building nlu training file...")
        trainer.build_phrases_file()
        print("[...] building dialogs.json file...")
        trainer.build_file_for_hit()
        print("[...] cleaning transcript file...")
        trainer.build_raw_transcript()
    if isinstance(trainer, Seq2ActionAgentTrainer):
        print("[...] building user lexicon file...")
        trainer.build_user_lexicon()
        print("[...] computing sentence embeddings...")
        trainer.compute_sentence_embeddings()
示例#7
0
#!/usr/bin/env python
"""
generate data by simulation
Zhiang Chen, Oct 30, 2017

MIT License
"""
import pickle
import numpy as np
import rospy
from simulator import Sim

rospy.init_node('simulator', anonymous=True)
sim = Sim()
actions = list()
poses = list()
for x in range(21):
    for y in range(21):
        for z in range(21):
            a = np.array([x,y,z]).astype(np.float32)
            p = sim.update_pose(a)
            actions.append(a)
            poses.append(p)

sim_data = {'actions':actions,'poses':poses}
pickle.dump( sim_data, open( "./data/sim_data.p", "wb" ) )
示例#8
0
    def __init__(self):
        """ Initializing DDPG """
        self.sim = Sim()
        self.pfn = PFN(a_dim=A_DIM,
                       s_dim=S_DIM,
                       batch_size=8,
                       memory_capacity=MEMORY_NUM,
                       lr=0.001,
                       bound=A_BOUND)
        self.ep_reward = 0.0
        self.current_action = np.array([.0, .0, .0])
        self.done = True  # if the episode is done
        self.reward_record = list()
        self.ep_record = list()
        self.fig = plt.gcf()
        self.fig.show()
        self.fig.canvas.draw()
        print("Initialized PFN")
        """ Setting communication"""
        self.pc = PC()
        self.pc.header.frame_id = 'world'
        self.pub = rospy.Publisher('normalized_state', PC, queue_size=10)
        self.pub1 = rospy.Publisher('state', PC, queue_size=10)

        self.sub = rospy.Subscriber('Robot_1/pose',
                                    PS,
                                    self.callback,
                                    queue_size=1)
        rospy.wait_for_service('airpress_control', timeout=5)
        self.target_PS = PS()
        self.action_V3 = Vector3()
        self.updated = False  # if s is updated
        self.got_target = False
        print("Initialized communication")
        """ Reading targets """
        """ The data should be w.r.t origin by base position """
        self.ends = pickle.load(open('./data/targets.p', 'rb'))
        self.x_offset = X_OFFSET
        self.y_offset = Y_OFFSET
        self.z_offset = Z_OFFSET
        self.sample_target()
        print("Read target data")

        #self.pfn.restore_momery()
        self.pfn.restore_model('model_pfn')

        memory_ep = np.ones((MAX_EP_STEPS, 3 + 3 + 1 + 1)) * -100
        self.current_ep = 0
        self.current_step = 0
        while not (rospy.is_shutdown()):
            self.updated = False
            while (not self.updated) & (not rospy.is_shutdown()):
                rospy.sleep(0.1)
            real_target = self.real_target.copy()
            s = self.normalize_state(real_target)
            action, act_var = self.pfn.choose_action2(s)
            self.action_V3.x, self.action_V3.y, self.action_V3.z \
                = action[0], action[1], action[2]
            self.run_action(self.action_V3)
            print '\n'
            #rospy.sleep(1.0)
            '''
示例#9
0
文件: gui.py 项目: furbrain/SAP5
    def __init__(self, *args, **kwds):
        # begin wxGlade: MyFrame.__init__
        kwds["style"] = kwds.get("style", 0) | wx.DEFAULT_FRAME_STYLE
        wx.Frame.__init__(self, *args, **kwds)
        self.SetSize((400, 300))

        # Menu Bar
        self.frame_menubar = wx.MenuBar()
        wxglade_tmp_menu = wx.Menu()
        wxglade_tmp_menu.Append(wx.ID_SAVE, "Screenshot", "")
        self.Bind(wx.EVT_MENU, self.OnSave, id=wx.ID_SAVE)
        wxglade_tmp_menu.Append(wx.ID_EXIT, "Quit", "")
        self.Bind(wx.EVT_MENU, self.OnQuit, id=wx.ID_EXIT)
        self.frame_menubar.Append(wxglade_tmp_menu, "File")
        self.SetMenuBar(self.frame_menubar)
        # Menu Bar end
        self.sim_image = ImagePanel(self, wx.ID_ANY)
        self.sim = Sim()
        self.single_click = wx.Button(self, wx.ID_ANY, "Single")
        self.double_click = wx.Button(self, wx.ID_ANY, "Double")
        self.single_click_copy_1 = wx.Button(self, wx.ID_ANY, "Long")
        self.down_flip = wx.Button(self, wx.ID_ANY, "Down")
        self.up_flip = wx.Button(self, wx.ID_ANY, "Up")
        self.left_flip = wx.Button(self, wx.ID_ANY, "Left")
        self.right_flip = wx.Button(self, wx.ID_ANY, "Right")
        self.azimuth = wx.Slider(self,
                                 wx.ID_ANY,
                                 0,
                                 0,
                                 359,
                                 style=wx.SL_HORIZONTAL | wx.SL_LABELS)
        self.inclination = wx.Slider(self,
                                     wx.ID_ANY,
                                     0,
                                     -90,
                                     90,
                                     style=wx.SL_HORIZONTAL | wx.SL_LABELS)
        self.roll = wx.Slider(self,
                              wx.ID_ANY,
                              0,
                              -180,
                              180,
                              style=wx.SL_HORIZONTAL | wx.SL_LABELS)

        self.__set_properties()
        self.__do_layout()

        self.Bind(wx.EVT_BUTTON,
                  lambda evt: self.sim.set_input(Input.SINGLE_CLICK),
                  self.single_click)
        self.Bind(wx.EVT_BUTTON,
                  lambda evt: self.sim.set_input(Input.DOUBLE_CLICK),
                  self.double_click)
        self.Bind(wx.EVT_BUTTON,
                  lambda evt: self.sim.set_input(Input.LONG_CLICK),
                  self.single_click_copy_1)
        self.Bind(wx.EVT_BUTTON,
                  lambda evt: self.sim.set_input(Input.FLIP_DOWN),
                  self.down_flip)
        self.Bind(wx.EVT_BUTTON, lambda evt: self.sim.set_input(Input.FLIP_UP),
                  self.up_flip)
        self.Bind(wx.EVT_BUTTON,
                  lambda evt: self.sim.set_input(Input.FLIP_LEFT),
                  self.left_flip)
        self.Bind(wx.EVT_BUTTON,
                  lambda evt: self.sim.set_input(Input.FLIP_RIGHT),
                  self.right_flip)
        self.Bind(wx.EVT_COMMAND_SCROLL, self.orientation_changed,
                  self.azimuth)
        self.Bind(wx.EVT_COMMAND_SCROLL, self.orientation_changed,
                  self.inclination)
        self.Bind(wx.EVT_COMMAND_SCROLL, self.orientation_changed, self.roll)
示例#10
0
文件: gui.py 项目: furbrain/SAP5
class MyFrame(wx.Frame):
    def __init__(self, *args, **kwds):
        # begin wxGlade: MyFrame.__init__
        kwds["style"] = kwds.get("style", 0) | wx.DEFAULT_FRAME_STYLE
        wx.Frame.__init__(self, *args, **kwds)
        self.SetSize((400, 300))

        # Menu Bar
        self.frame_menubar = wx.MenuBar()
        wxglade_tmp_menu = wx.Menu()
        wxglade_tmp_menu.Append(wx.ID_SAVE, "Screenshot", "")
        self.Bind(wx.EVT_MENU, self.OnSave, id=wx.ID_SAVE)
        wxglade_tmp_menu.Append(wx.ID_EXIT, "Quit", "")
        self.Bind(wx.EVT_MENU, self.OnQuit, id=wx.ID_EXIT)
        self.frame_menubar.Append(wxglade_tmp_menu, "File")
        self.SetMenuBar(self.frame_menubar)
        # Menu Bar end
        self.sim_image = ImagePanel(self, wx.ID_ANY)
        self.sim = Sim()
        self.single_click = wx.Button(self, wx.ID_ANY, "Single")
        self.double_click = wx.Button(self, wx.ID_ANY, "Double")
        self.single_click_copy_1 = wx.Button(self, wx.ID_ANY, "Long")
        self.down_flip = wx.Button(self, wx.ID_ANY, "Down")
        self.up_flip = wx.Button(self, wx.ID_ANY, "Up")
        self.left_flip = wx.Button(self, wx.ID_ANY, "Left")
        self.right_flip = wx.Button(self, wx.ID_ANY, "Right")
        self.azimuth = wx.Slider(self,
                                 wx.ID_ANY,
                                 0,
                                 0,
                                 359,
                                 style=wx.SL_HORIZONTAL | wx.SL_LABELS)
        self.inclination = wx.Slider(self,
                                     wx.ID_ANY,
                                     0,
                                     -90,
                                     90,
                                     style=wx.SL_HORIZONTAL | wx.SL_LABELS)
        self.roll = wx.Slider(self,
                              wx.ID_ANY,
                              0,
                              -180,
                              180,
                              style=wx.SL_HORIZONTAL | wx.SL_LABELS)

        self.__set_properties()
        self.__do_layout()

        self.Bind(wx.EVT_BUTTON,
                  lambda evt: self.sim.set_input(Input.SINGLE_CLICK),
                  self.single_click)
        self.Bind(wx.EVT_BUTTON,
                  lambda evt: self.sim.set_input(Input.DOUBLE_CLICK),
                  self.double_click)
        self.Bind(wx.EVT_BUTTON,
                  lambda evt: self.sim.set_input(Input.LONG_CLICK),
                  self.single_click_copy_1)
        self.Bind(wx.EVT_BUTTON,
                  lambda evt: self.sim.set_input(Input.FLIP_DOWN),
                  self.down_flip)
        self.Bind(wx.EVT_BUTTON, lambda evt: self.sim.set_input(Input.FLIP_UP),
                  self.up_flip)
        self.Bind(wx.EVT_BUTTON,
                  lambda evt: self.sim.set_input(Input.FLIP_LEFT),
                  self.left_flip)
        self.Bind(wx.EVT_BUTTON,
                  lambda evt: self.sim.set_input(Input.FLIP_RIGHT),
                  self.right_flip)
        self.Bind(wx.EVT_COMMAND_SCROLL, self.orientation_changed,
                  self.azimuth)
        self.Bind(wx.EVT_COMMAND_SCROLL, self.orientation_changed,
                  self.inclination)
        self.Bind(wx.EVT_COMMAND_SCROLL, self.orientation_changed, self.roll)
        # end wxGlade

    def __set_properties(self):
        # begin wxGlade: MyFrame.__set_properties
        self.SetTitle("frame")
        # end wxGlade

    def __do_layout(self):
        # begin wxGlade: MyFrame.__do_layout
        sizer_2 = wx.BoxSizer(wx.VERTICAL)
        sizer_3 = wx.BoxSizer(wx.HORIZONTAL)
        sizer_6 = wx.StaticBoxSizer(
            wx.StaticBox(self, wx.ID_ANY, "Orientation"), wx.HORIZONTAL)
        grid_sizer_1 = wx.FlexGridSizer(3, 2, 0, 4)
        sizer_5 = wx.StaticBoxSizer(wx.StaticBox(self, wx.ID_ANY, "Flip"),
                                    wx.VERTICAL)
        sizer_4 = wx.StaticBoxSizer(wx.StaticBox(self, wx.ID_ANY, "Button"),
                                    wx.VERTICAL)
        sizer_2.Add(self.sim_image, 1, wx.EXPAND, 0)
        sizer_4.Add(self.single_click, 0, wx.ALIGN_CENTER | wx.ALL, 2)
        sizer_4.Add(self.double_click, 0, wx.ALIGN_CENTER | wx.ALL, 2)
        sizer_4.Add(self.single_click_copy_1, 0, wx.ALIGN_CENTER | wx.ALL, 2)
        sizer_3.Add(sizer_4, 0, wx.ALL | wx.EXPAND, 2)
        sizer_5.Add(self.down_flip, 0, wx.ALIGN_CENTER | wx.ALL, 2)
        sizer_5.Add(self.up_flip, 0, wx.ALIGN_CENTER | wx.ALL, 2)
        sizer_5.Add(self.left_flip, 0, wx.ALIGN_CENTER | wx.ALL, 2)
        sizer_5.Add(self.right_flip, 0, wx.ALIGN_CENTER | wx.ALL, 2)
        sizer_3.Add(sizer_5, 0, wx.ALL | wx.EXPAND, 2)
        label_1 = wx.StaticText(self,
                                wx.ID_ANY,
                                "Azimuth:",
                                style=wx.ALIGN_RIGHT)
        grid_sizer_1.Add(label_1, 0, wx.ALIGN_CENTER_VERTICAL | wx.ALIGN_RIGHT,
                         0)
        grid_sizer_1.Add(self.azimuth, 0, wx.ALL | wx.EXPAND, 0)
        label_2 = wx.StaticText(self,
                                wx.ID_ANY,
                                "Inclination:",
                                style=wx.ALIGN_RIGHT)
        grid_sizer_1.Add(label_2, 0, wx.ALIGN_CENTER_VERTICAL | wx.ALIGN_RIGHT,
                         0)
        grid_sizer_1.Add(self.inclination, 0, wx.ALL | wx.EXPAND, 0)
        label_3 = wx.StaticText(self, wx.ID_ANY, "Roll:", style=wx.ALIGN_RIGHT)
        grid_sizer_1.Add(label_3, 0, wx.ALIGN_CENTER_VERTICAL | wx.ALIGN_RIGHT,
                         0)
        grid_sizer_1.Add(self.roll, 0, wx.ALL | wx.EXPAND, 0)
        grid_sizer_1.AddGrowableCol(1)
        sizer_6.Add(grid_sizer_1, 1, wx.ALL | wx.EXPAND, 3)
        sizer_3.Add(sizer_6, 4, wx.ALL, 2)
        sizer_2.Add(sizer_3, 0, wx.EXPAND, 0)
        self.SetSizer(sizer_2)
        self.Layout()
        # end wxGlade

    def orientation_changed(self, event):  # wxGlade: MyFrame.<event_handler>
        self.sim.sensor.set_orientation(self.azimuth.GetValue(),
                                        self.inclination.GetValue(),
                                        self.roll.GetValue())

    def message_handler(self, message):
        result = self.sim.handleMessage(message)
        if message[0:2] == [0x01, 0x78]:
            self.sim_image.paintNow(self.sim.get_image())
        return result

    def OnSave(self, event):  # wxGlade: MyFrame.<event_handler>
        fname = "screenshot.png"
        self.sim.get_image().save(fname, "PNG")

    def OnQuit(self, event):  # wxGlade: MyFrame.<event_handler>
        self.Close()
示例#11
0
    def __init__(self):
        """ Initializing DDPG """
        self.sim = Sim()
        self.pfn = PFN(a_dim=A_DIM,
                       s_dim=S_DIM,
                       batch_size=8,
                       memory_capacity=MEMORY_NUM,
                       lr=0.001,
                       bound=A_BOUND)
        self.ep_reward = 0.0
        self.current_action = np.array([.0, .0, .0])
        self.done = True  # if the episode is done
        self.reward_record = list()
        self.ep_record = list()
        self.fig = plt.gcf()
        self.fig.show()
        self.fig.canvas.draw()
        print("Initialized PFN")
        """ Setting communication"""
        self.pc = PC()
        self.pc.header.frame_id = 'world'
        self.pub = rospy.Publisher('normalized_state', PC, queue_size=10)
        self.pub1 = rospy.Publisher('state', PC, queue_size=10)
        """ Reading targets """
        """ The data should be w.r.t origin by base position """
        self.ends = pickle.load(open('./data/targets.p', 'rb'))
        self.x_offset = X_OFFSET
        self.y_offset = Y_OFFSET
        self.z_offset = Z_OFFSET
        self.sample_target()
        print("Read target data")

        self.pfn.restore_momery()
        self.pfn.restore_model()

        memory_ep = np.ones((MAX_EP_STEPS, 3 + 3 + 1 + 1)) * -100
        self.current_ep = 0
        self.current_step = 0
        while not (rospy.is_shutdown()):
            if self.current_ep < MAX_EPISODES:
                if self.current_step < MAX_EP_STEPS:
                    #rospy.sleep(0.5)
                    s = self.normalize_state(self.target)
                    #print 'x'
                    #print s
                    action, prob = self.pfn.choose_action(s)
                    s_ = self.sim.update_pose(action)[-1, :]
                    self.compute_reward(self.target, s_)

                    #print action
                    #print self.pfn.raw_action(s)
                    #print self.target
                    #print s_
                    #print np.linalg.norm(self.target - s_)
                    #print self.reward
                    transition = np.hstack(
                        (self.target, action, self.reward, prob))
                    memory_ep[self.current_step] = transition
                    self.current_step += 1

                    state = np.array([s_, self.target])
                    self.pub_state(state)

                    if self.pfn.pointer > TRAIN_POINT:
                        self.pfn.learn()

                else:

                    best_i = np.argsort(memory_ep[:, -2])[-1]
                    best_action = memory_ep[best_i, 3:6]
                    best_s_ = self.sim.update_pose(best_action)[-1, :]
                    best_distance = np.linalg.norm(self.target - best_s_)
                    """ #best action
                    print '*'
                    j = np.argsort(memory_ep[:,-2])[0]
                    print memory_ep[best_i, :]
                    print memory_ep[j, :]
                    print best_s_
                    print self.target
                    print best_action
                    print best_distance
                    """

                    self.current_step = 0
                    mean_action, act_var = self.pfn.choose_action2(s)
                    mean_s_ = self.sim.update_pose(mean_action)[-1, :]
                    mean_distance = np.linalg.norm(mean_s_ - self.target)

                    target_action, w = self.compute_weighted_action(memory_ep)
                    s_ = self.sim.update_pose(target_action)[-1, :]
                    target_distance = np.linalg.norm(s_ - self.target)
                    self.compute_reward(self.target, s_)

                    self.pfn.store_transition(s, target_action, mean_distance,
                                              np.var(w))
                    self.pfn.store_transition(s, best_action, best_distance,
                                              w[best_i])
                    self.current_ep += 1
                    self.sample_target()
                    memory_ep = np.ones((MAX_EP_STEPS, 3 + 3 + 1 + 1)) * -100

                    if self.current_ep % 10 == 0:
                        self.reward_record.append(mean_distance)
                        self.ep_record.append(self.current_ep)
                        plt.plot(self.ep_record, self.reward_record)
                        plt.ylim([0.0, 0.1])
                        self.fig.canvas.draw()
                        self.fig.savefig('learning.png')
                        print('\n')
                        #print s
                        print('Episode:', self.current_ep)
                        print("Mean Action:")
                        print mean_action
                        print("Mean Distance:")
                        print mean_distance
                        print("Action Variance:")
                        print act_var
                        print("Target Action:")
                        print target_action
                        print("Target Distance:")
                        print target_distance
                        print("Weights Variance:")
                        print np.var(w)
                        print('*' * 40)
                        self.pfn.save_model()
                        self.pfn.save_memory()

            else:
                s = self.normalize_state(self.target)
                action, act_var = self.pfn.choose_action2(s)
                s_ = self.sim.update_pose(action)[-1, :]
                self.compute_reward(self.target, s_)

                print("Mean Action:")
                print action
                print("Action Variance:")
                print act_var
                print("Distance: %f" % np.linalg.norm(self.target - s_))
                print("Reward: %f" % self.reward)
                print '\n'
                state = np.array([s_, self.target])
                self.pub_state(state)
                rospy.sleep(1)
                self.sample_target()
示例#12
0
class Trainer(object):
    def __init__(self):
        """ Initializing DDPG """
        self.sim = Sim()
        self.pfn = PFN(a_dim=A_DIM,
                       s_dim=S_DIM,
                       batch_size=8,
                       memory_capacity=MEMORY_NUM,
                       lr=0.001,
                       bound=A_BOUND)
        self.ep_reward = 0.0
        self.current_action = np.array([.0, .0, .0])
        self.done = True  # if the episode is done
        self.reward_record = list()
        self.ep_record = list()
        self.fig = plt.gcf()
        self.fig.show()
        self.fig.canvas.draw()
        print("Initialized PFN")
        """ Setting communication"""
        self.pc = PC()
        self.pc.header.frame_id = 'world'
        self.pub = rospy.Publisher('normalized_state', PC, queue_size=10)
        self.pub1 = rospy.Publisher('state', PC, queue_size=10)
        """ Reading targets """
        """ The data should be w.r.t origin by base position """
        self.ends = pickle.load(open('./data/targets.p', 'rb'))
        self.x_offset = X_OFFSET
        self.y_offset = Y_OFFSET
        self.z_offset = Z_OFFSET
        self.sample_target()
        print("Read target data")

        self.pfn.restore_momery()
        self.pfn.restore_model()

        memory_ep = np.ones((MAX_EP_STEPS, 3 + 3 + 1 + 1)) * -100
        self.current_ep = 0
        self.current_step = 0
        while not (rospy.is_shutdown()):
            if self.current_ep < MAX_EPISODES:
                if self.current_step < MAX_EP_STEPS:
                    #rospy.sleep(0.5)
                    s = self.normalize_state(self.target)
                    #print 'x'
                    #print s
                    action, prob = self.pfn.choose_action(s)
                    s_ = self.sim.update_pose(action)[-1, :]
                    self.compute_reward(self.target, s_)

                    #print action
                    #print self.pfn.raw_action(s)
                    #print self.target
                    #print s_
                    #print np.linalg.norm(self.target - s_)
                    #print self.reward
                    transition = np.hstack(
                        (self.target, action, self.reward, prob))
                    memory_ep[self.current_step] = transition
                    self.current_step += 1

                    state = np.array([s_, self.target])
                    self.pub_state(state)

                    if self.pfn.pointer > TRAIN_POINT:
                        self.pfn.learn()

                else:

                    best_i = np.argsort(memory_ep[:, -2])[-1]
                    best_action = memory_ep[best_i, 3:6]
                    best_s_ = self.sim.update_pose(best_action)[-1, :]
                    best_distance = np.linalg.norm(self.target - best_s_)
                    """ #best action
                    print '*'
                    j = np.argsort(memory_ep[:,-2])[0]
                    print memory_ep[best_i, :]
                    print memory_ep[j, :]
                    print best_s_
                    print self.target
                    print best_action
                    print best_distance
                    """

                    self.current_step = 0
                    mean_action, act_var = self.pfn.choose_action2(s)
                    mean_s_ = self.sim.update_pose(mean_action)[-1, :]
                    mean_distance = np.linalg.norm(mean_s_ - self.target)

                    target_action, w = self.compute_weighted_action(memory_ep)
                    s_ = self.sim.update_pose(target_action)[-1, :]
                    target_distance = np.linalg.norm(s_ - self.target)
                    self.compute_reward(self.target, s_)

                    self.pfn.store_transition(s, target_action, mean_distance,
                                              np.var(w))
                    self.pfn.store_transition(s, best_action, best_distance,
                                              w[best_i])
                    self.current_ep += 1
                    self.sample_target()
                    memory_ep = np.ones((MAX_EP_STEPS, 3 + 3 + 1 + 1)) * -100

                    if self.current_ep % 10 == 0:
                        self.reward_record.append(mean_distance)
                        self.ep_record.append(self.current_ep)
                        plt.plot(self.ep_record, self.reward_record)
                        plt.ylim([0.0, 0.1])
                        self.fig.canvas.draw()
                        self.fig.savefig('learning.png')
                        print('\n')
                        #print s
                        print('Episode:', self.current_ep)
                        print("Mean Action:")
                        print mean_action
                        print("Mean Distance:")
                        print mean_distance
                        print("Action Variance:")
                        print act_var
                        print("Target Action:")
                        print target_action
                        print("Target Distance:")
                        print target_distance
                        print("Weights Variance:")
                        print np.var(w)
                        print('*' * 40)
                        self.pfn.save_model()
                        self.pfn.save_memory()

            else:
                s = self.normalize_state(self.target)
                action, act_var = self.pfn.choose_action2(s)
                s_ = self.sim.update_pose(action)[-1, :]
                self.compute_reward(self.target, s_)

                print("Mean Action:")
                print action
                print("Action Variance:")
                print act_var
                print("Distance: %f" % np.linalg.norm(self.target - s_))
                print("Reward: %f" % self.reward)
                print '\n'
                state = np.array([s_, self.target])
                self.pub_state(state)
                rospy.sleep(1)
                self.sample_target()

    def compute_weighted_action(self, memory_ep):
        sum = np.sum(memory_ep[:, -2]) + 1e-6
        try:
            w = memory_ep[:, -2] / sum
        except:
            print("Sum %f" % sum)
            print w
        target_action = np.average(memory_ep[:, S_DIM:S_DIM + A_DIM],
                                   axis=0,
                                   weights=w)
        return target_action, w

    def sample_target(self):
        self.target = self.ends[np.random.randint(self.ends.shape[0])]

    def compute_reward(self, end, target):
        error = target - end
        self.reward = 20**(-np.log2(2.5 * np.linalg.norm(error)))
        #print np.linalg.norm(error)

    def pub_state(self, state):
        pts = list()
        for i in range(state.shape[0]):
            pt = Point()
            pt.x = state[i, 0]
            pt.y = state[i, 1]
            pt.z = state[i, 2]
            pts.append(pt)
        self.pc.points = pts
        self.pub.publish(self.pc)
        pts = list()
        for i in range(state.shape[0]):
            pt = Point()
            pt.x = state[i, 0] / 10.0
            pt.y = state[i, 1] / 10.0
            pt.z = state[i, 2] / 35.0 + 0.42
            pts.append(pt)
        self.pc.points = pts
        self.pub1.publish(self.pc)

    def normalize_state(self, state):
        offset = np.array([0, 0, 0.42])
        scaler = np.array([10, 10, 35])
        s = state - offset
        s = np.multiply(s, scaler)
        return s

    def calculate_dist(self, state):
        offset = np.array([0, 0, 0.42])
        scaler = np.array([10, 10, 35])
        s = np.multiply(state, 1.0 / scaler)
        s += offset
        return np.linalg.norm(s)
示例#13
0
            for i in range(len(self.world)):
                for j in range(i + 1, len(self.world)):
                    # stop collision with itself
                    if i == j:
                        continue
                    if check_circle_collision(self.world[i], self.world[j]):
                        handle_circle_collision(self.world[i], self.world[j])

        # window edge collision
        for obj in self.world:
            if obj.min_x() <= 0:
                obj.vx = positive(obj.vx)
            elif obj.max_x() >= Sim.WIDTH:
                obj.vx = negative(obj.vx)

            if obj.min_y() <= 0:
                obj.vy = positive(obj.vy)
            elif obj.max_y() >= Sim.HEIGHT:
                obj.vy = negative(obj.vy)

        # rendering
        Sim.DRAW.rect(Sim.SURFACE, Sim.COLOURS["BLACK"], Sim.SCREEN_RECT)
        for obj in self.world:
            pos = (obj.x, obj.y)
            Sim.DRAW.circle(Sim.SURFACE, obj.colour, pos, obj.r)
        Sim.DISPLAY.flip()

if __name__ == "__main__":
    rgs = Rigid_Physics_Simulation()
    sim = Sim(rgs)
    sim.main()