Exemplo n.º 1
0
def get_weights_fn(policy):
    # TODO: Check if following needed
    import tensorflow as tf
    with tf.device(policy.device):
        return [
            policy.actor.weights, policy.critic.weights,
            policy.critic_target.weights
        ]


def set_weights_fn(policy, weights):
    actor_weights, critic_weights, critic_target_weights = weights
    update_target_variables(policy.actor.weights, actor_weights, tau=1.)
    update_target_variables(policy.critic.weights, critic_weights, tau=1.)
    update_target_variables(policy.critic_target.weights,
                            critic_target_weights,
                            tau=1.)


args.env_name

if __name__ == '__main__':
    rospy.init_node("runtest")
    armenv = ArmEnvironment()
    parser = apex_argument()
    parser.add_argument('--env-name', type=str, default="Pendulum-v0")
    parser = DDPG.get_argument(parser)
    args = parser.parse_args()

    run(args, env_fn(armenv), policy_fn, get_weights_fn, set_weights_fn)
    parser.set_defaults(model_dir='model_PPO'+slow_suffix)
    parser.set_defaults(logdir='results/PPO'+slow_suffix)
parser.set_defaults(normalise_obs=normalise_obs)
parser.set_defaults(save_model_interval=100)
parser.set_defaults(save_summary_interval=100)
parser.set_defaults(test_interval=500)

#parser.set_defaults(horizon=1024)
#parser.set_defaults(batch_size=512)
parser.set_defaults(gpu=-1)
parser.set_defaults(max_steps=100000000)
parser.set_defaults(n_warmup=0)
#parser.set_defaults(enable_gae=True)
args = parser.parse_args()

env = ArmEnvironment(static_goal=True,slow_step=slow_step)
test_env = ArmEnvironment(static_goal=True,slow_step=slow_step)

policy = PPO(
        state_shape=env.observation_space.shape,
        action_dim=get_act_dim(env.action_space),
        is_discrete=is_discrete(env.action_space),
        max_action=None if is_discrete(
            env.action_space) else env.action_space.high[0],
        batch_size=args.batch_size,
        actor_units=(64, 64),
        critic_units=(64, 64),
        n_epoch=10,
        lr_actor=3e-4,
        lr_critic=3e-4,
        hidden_activation_actor="tanh",
from __future__ import print_function
from arm_env import ArmEnvironment
from ddpg import OUNoise, DDPG
import numpy as np
import rospy

env = ArmEnvironment()
state_shape = env.state_shape
action_shape = env.action_shape
agent = DDPG(state_shape,
             action_shape,
             batch_size=128,
             gamma=0.995,
             tau=0.001,
             actor_lr=0.0001,
             critic_lr=0.001,
             use_layer_norm=True)
print('DDPG agent configured')
agent.load_model(agent.current_path + '/model/model.ckpt')
agent.load_memory()

max_episode = 10000
tot_rewards = []
print('env reset')
observation, done = env.reset()
action = agent.act(observation)
print(action)
rospy.sleep(0.8)
observation, reward, done = env.step(action)
rospy.sleep(0.8)
noise_sigma = 0.15
import rospy
from arm_env import ArmEnvironment, AllJoints
import position_command
import actionlib
from trajectory_msgs.msg import JointTrajectory, JointTrajectoryPoint
from control_msgs.msg import JointTrajectoryControllerState, FollowJointTrajectoryAction, FollowJointTrajectoryActionGoal, FollowJointTrajectoryGoal
from std_srvs.srv import Empty
from sensor_msgs.msg import JointState
from gazebo_msgs.srv import SetModelState, SetModelStateRequest, SetModelConfiguration, SetModelConfigurationRequest
from gazebo_msgs.srv import GetModelState, GetModelStateRequest
from gazebo_msgs.msg import ModelState
import numpy as np
envboi = ArmEnvironment()
envboi.reset()
zero = np.array([0, 0, 0, 0])

testpos = np.array([0, 0.9, 0.5, 1.5])

#rospy.init_node('testing_node')

# jointboi = AllJoints(['plat_joint','shoulder_joint','forearm_joint','wrist_joint'])

# jointboi.move(testpos)

# rospy.sleep(1)

# TODO fix reset function

# rospy.sleep(1)
# print(envboi.get_latest_state())
# envboi.reset()
Exemplo n.º 5
0
import rospy
from arm_env import ArmEnvironment, AllJoints
import numpy as np

rospy.init_node("test")

env = ArmEnvironment(static_goal=True)
env.reset()
rospy.sleep(1)
print("Goal Distance: ", env.get_goal_distance())

while (True):
    action = input("Enter an action:").split(' ')
    floatarr = []
    for i in action:
        if (i == '0'):
            floatarr.append(0)
        else:
            floatarr.append(float(i))
    action = np.array(floatarr)
    env.step(floatarr)
    print("Goal Distance: ", env.get_goal_distance())
    reset = eval(input("Reset goal?"))
    if (reset):
        env.set_new_goal()

# alljoints.move(pos)
# #rospy.sleep(3)
# env.pause_physics()
# env.unpause_physics()
Exemplo n.º 6
0
    def __init__(self, gui=False, audio=False):
        
        self.t = 0
        
        # ARM CONFIG
        
        arm_cfg = dict(
                        m_mins=[-1.] * 3 * 7, 
                        m_maxs=[1.] * 3 * 7, 
                        s_mins=[-1.] * 3 * 50,
                        s_maxs=[1.] * 3 * 50,
                        lengths=[0.5, 0.3, 0.2], 
                        angle_shift=0.5,
                        rest_state=[0., 0., 0.],
                        n_dmps=3, 
                        n_bfs=6, 
                        timesteps=50,
                        gui=gui)
        
        
        # SOUND CONFIG
        
        self.v_o = list(np.log2([500, 900]))
        self.v_y = list(np.log2([300, 1700]))
        self.v_u = list(np.log2([300, 800]))
        self.v_e = list(np.log2([400, 2200]))
        self.v_i = list(np.log2([300, 2300]))
        
        
        self.vowels = dict(o=self.v_o, y=self.v_y, u=self.v_u, e=self.v_e, i=self.v_i)
        
        self.human_sounds = ['oey', 'uye', 'iuo', 'eyu', 'eou', 'yeo']
        random.shuffle(self.human_sounds)
        print "human sounds", self.human_sounds
        
        
        def compute_s_sound(sound):
            s1 = self.vowels[sound[0]]
            s2 = [(self.vowels[sound[0]][0] + self.vowels[sound[1]][0]) / 2., (self.vowels[sound[0]][1] + self.vowels[sound[1]][1]) / 2.]
            s3 = self.vowels[sound[1]]
            s4 = [(self.vowels[sound[1]][0] + self.vowels[sound[2]][0]) / 2., (self.vowels[sound[1]][1] + self.vowels[sound[2]][1]) / 2.]
            s5 = self.vowels[sound[2]]
            rdm = 0.0 * (2.*np.random.random((1,10))[0] - 1.)
            return list(rdm + np.array([f[0] for f in [s1, s2, s3, s4, s5]] + [f[1] for f in [s1, s2, s3, s4, s5]]))
        
        
        self.human_sounds_traj = dict()
        self.human_sounds_traj_std = dict()
        self.best_vocal_errors = {}
        self.best_vocal_errors_evolution = []
        for hs in self.human_sounds:
            self.best_vocal_errors[hs] = 10.
            self.human_sounds_traj[hs] = compute_s_sound(hs)
            self.human_sounds_traj_std[hs] = [d - 8.5 for d in self.human_sounds_traj[hs][:5]] + [d - 10.25 for d in self.human_sounds_traj[hs][5:]]    
    
            
        self.sound_tol = 0.4
    

        # DIVA CONFIG
        
        diva_cfg = dict(
                        m_mins = np.array([-1, -1, -1, -1, -1, -1, -1]),
                        m_maxs = np.array([1, 1, 1, 1, 1, 1, 1]),
                        s_mins = np.array([ 7.5,  9.25]),
                        s_maxs = np.array([ 9.5 ,  11.25]),
                        m_used = range(7),
                        s_used = range(1, 3),
                        rest_position_diva = list([0]*7),
                        audio = audio,
                        diva_use_initial = True,
                        diva_use_goal = True,
                        used_diva = list([True]*7),
                        n_dmps_diva = 7,
                        n_bfs_diva = 2,
                        move_steps = 50,
                        )
        
        
        
        self.arm = ArmEnvironment(**arm_cfg)
        self.diva = DivaEnvironment(**diva_cfg)
        
        self.timesteps = 50
        
        
        # OBJECTS CONFIG
        
        self.caregiver_gives_obj_factor = 0.01
        self.tool_length = 0.5
        self.handle_tol = 0.2
        self.handle_tol_sq = self.handle_tol * self.handle_tol
        self.handle_noise = 0.
        self.object_tol_hand = 0.2
        self.object_tol_hand_sq = self.object_tol_hand * self.object_tol_hand
        self.object_tol_tool = 0.2
        self.object_tol_tool_sq = self.object_tol_tool * self.object_tol_tool
        
        self.diva_traj = None
        self.produced_sound = None
        
        Environment.__init__(self, 
                             m_mins= [-1.] * (21+28),
                             m_maxs= [1.] * (21+28),
                             s_mins= [-1.] * 80,
                             s_maxs= [1.] * 80)
        
        
        self.current_tool = [-0.5, 0., 0.5, 0.]
        self.current_toy1 = [0.5, 0.5, 0.]
        self.current_toy2 = [0.7, 0.7, 0.]
        self.current_toy3 = [0.9, 0.9, 0.]
        self.current_caregiver = [0., 1.7]
        self.reset()
        self.compute_tool()
        
        self.purge_logs()
        
        if self.arm.gui:
            self.init_plot()
            
        

        self.count_diva = 0
        self.count_arm = 0
        self.count_tool = 0
        self.count_toy1_by_tool = 0
        self.count_toy2_by_tool = 0
        self.count_toy3_by_tool = 0
        self.count_toy1_by_hand = 0
        self.count_toy2_by_hand = 0
        self.count_toy3_by_hand = 0
        self.count_parent_give_label = 0
        self.count_parent_give_object = 0
        self.count_produced_sounds = {}
        for hs in self.human_sounds:
            self.count_produced_sounds[hs] = 0
        
        self.time_arm = 0.
        self.time_diva = 0. 
        self.time_arm_per_it = 0.
        self.time_diva_per_it = 0. 
Exemplo n.º 7
0
class CogSci2017Environment(Environment):
    def __init__(self, gui=False, audio=False):
        
        self.t = 0
        
        # ARM CONFIG
        
        arm_cfg = dict(
                        m_mins=[-1.] * 3 * 7, 
                        m_maxs=[1.] * 3 * 7, 
                        s_mins=[-1.] * 3 * 50,
                        s_maxs=[1.] * 3 * 50,
                        lengths=[0.5, 0.3, 0.2], 
                        angle_shift=0.5,
                        rest_state=[0., 0., 0.],
                        n_dmps=3, 
                        n_bfs=6, 
                        timesteps=50,
                        gui=gui)
        
        
        # SOUND CONFIG
        
        self.v_o = list(np.log2([500, 900]))
        self.v_y = list(np.log2([300, 1700]))
        self.v_u = list(np.log2([300, 800]))
        self.v_e = list(np.log2([400, 2200]))
        self.v_i = list(np.log2([300, 2300]))
        
        
        self.vowels = dict(o=self.v_o, y=self.v_y, u=self.v_u, e=self.v_e, i=self.v_i)
        
        self.human_sounds = ['oey', 'uye', 'iuo', 'eyu', 'eou', 'yeo']
        random.shuffle(self.human_sounds)
        print "human sounds", self.human_sounds
        
        
        def compute_s_sound(sound):
            s1 = self.vowels[sound[0]]
            s2 = [(self.vowels[sound[0]][0] + self.vowels[sound[1]][0]) / 2., (self.vowels[sound[0]][1] + self.vowels[sound[1]][1]) / 2.]
            s3 = self.vowels[sound[1]]
            s4 = [(self.vowels[sound[1]][0] + self.vowels[sound[2]][0]) / 2., (self.vowels[sound[1]][1] + self.vowels[sound[2]][1]) / 2.]
            s5 = self.vowels[sound[2]]
            rdm = 0.0 * (2.*np.random.random((1,10))[0] - 1.)
            return list(rdm + np.array([f[0] for f in [s1, s2, s3, s4, s5]] + [f[1] for f in [s1, s2, s3, s4, s5]]))
        
        
        self.human_sounds_traj = dict()
        self.human_sounds_traj_std = dict()
        self.best_vocal_errors = {}
        self.best_vocal_errors_evolution = []
        for hs in self.human_sounds:
            self.best_vocal_errors[hs] = 10.
            self.human_sounds_traj[hs] = compute_s_sound(hs)
            self.human_sounds_traj_std[hs] = [d - 8.5 for d in self.human_sounds_traj[hs][:5]] + [d - 10.25 for d in self.human_sounds_traj[hs][5:]]    
    
            
        self.sound_tol = 0.4
    

        # DIVA CONFIG
        
        diva_cfg = dict(
                        m_mins = np.array([-1, -1, -1, -1, -1, -1, -1]),
                        m_maxs = np.array([1, 1, 1, 1, 1, 1, 1]),
                        s_mins = np.array([ 7.5,  9.25]),
                        s_maxs = np.array([ 9.5 ,  11.25]),
                        m_used = range(7),
                        s_used = range(1, 3),
                        rest_position_diva = list([0]*7),
                        audio = audio,
                        diva_use_initial = True,
                        diva_use_goal = True,
                        used_diva = list([True]*7),
                        n_dmps_diva = 7,
                        n_bfs_diva = 2,
                        move_steps = 50,
                        )
        
        
        
        self.arm = ArmEnvironment(**arm_cfg)
        self.diva = DivaEnvironment(**diva_cfg)
        
        self.timesteps = 50
        
        
        # OBJECTS CONFIG
        
        self.caregiver_gives_obj_factor = 0.01
        self.tool_length = 0.5
        self.handle_tol = 0.2
        self.handle_tol_sq = self.handle_tol * self.handle_tol
        self.handle_noise = 0.
        self.object_tol_hand = 0.2
        self.object_tol_hand_sq = self.object_tol_hand * self.object_tol_hand
        self.object_tol_tool = 0.2
        self.object_tol_tool_sq = self.object_tol_tool * self.object_tol_tool
        
        self.diva_traj = None
        self.produced_sound = None
        
        Environment.__init__(self, 
                             m_mins= [-1.] * (21+28),
                             m_maxs= [1.] * (21+28),
                             s_mins= [-1.] * 80,
                             s_maxs= [1.] * 80)
        
        
        self.current_tool = [-0.5, 0., 0.5, 0.]
        self.current_toy1 = [0.5, 0.5, 0.]
        self.current_toy2 = [0.7, 0.7, 0.]
        self.current_toy3 = [0.9, 0.9, 0.]
        self.current_caregiver = [0., 1.7]
        self.reset()
        self.compute_tool()
        
        self.purge_logs()
        
        if self.arm.gui:
            self.init_plot()
            
        

        self.count_diva = 0
        self.count_arm = 0
        self.count_tool = 0
        self.count_toy1_by_tool = 0
        self.count_toy2_by_tool = 0
        self.count_toy3_by_tool = 0
        self.count_toy1_by_hand = 0
        self.count_toy2_by_hand = 0
        self.count_toy3_by_hand = 0
        self.count_parent_give_label = 0
        self.count_parent_give_object = 0
        self.count_produced_sounds = {}
        for hs in self.human_sounds:
            self.count_produced_sounds[hs] = 0
        
        self.time_arm = 0.
        self.time_diva = 0. 
        self.time_arm_per_it = 0.
        self.time_diva_per_it = 0. 
        
          
          
    def save(self):
        return dict(t=self.t,
                    human_sounds=self.human_sounds,
                    best_vocal_errors=self.best_vocal_errors,
                    best_vocal_errors_evolution=self.best_vocal_errors_evolution,
                    count_diva=self.count_diva,
                    count_arm=self.count_arm,
                    count_tool=self.count_tool,
                    count_toy1_by_tool=self.count_toy1_by_tool,
                    count_toy2_by_tool=self.count_toy2_by_tool,
                    count_toy3_by_tool=self.count_toy3_by_tool,
                    count_toy1_by_hand=self.count_toy1_by_hand,
                    count_toy2_by_hand=self.count_toy2_by_hand,
                    count_toy3_by_hand=self.count_toy3_by_hand,
                    count_parent_give_label=self.count_parent_give_label,
                    count_parent_give_object=self.count_parent_give_object,
                    count_produced_sounds=self.count_produced_sounds,
                    )

    def reset(self):
        
        if self.t % 20 == 0: 
            self.reset_toys()
        
        self.current_tool[3] = 0.
        self.current_toy1[2] = 0.
        self.current_toy2[2] = 0.
        self.current_toy3[2] = 0.
        self.reset_caregiver()
        self.current_context = self.get_current_context()  
        
        self.hand = []
        self.tool = []
        self.toy1 = []
        self.toy2 = []
        self.toy3 = []
        self.caregiver = []
        
        
    def purge_logs(self):
        self.logs_tool = []
        self.logs_toy1 = []
        self.logs_toy2 = []
        self.logs_toy3 = []
        self.logs_caregiver = []
            
    def reset_tool(self):
        self.current_tool[:2] = self.reset_rand2d(region=1)
        self.current_tool[2] = np.random.random()
        self.compute_tool()
        
    def set_tool(self, pos, angle):
        self.current_tool[:2] = pos
        self.current_tool[2] = angle
        self.compute_tool()
        
    def reset_toys(self, region=0):
        self.current_toy1[:2] = self.reset_rand2d(region=region)
        self.current_toy2[:2] = self.reset_rand2d(region=region)
        self.current_toy3[:2] = self.reset_rand2d(region=region)
        
    def set_toys(self, pos1, pos2, pos3):
        self.current_toy1[:2] = pos1
        self.current_toy2[:2] = pos2
        self.current_toy3[:2] = pos3
        
    def reset_caregiver(self):
        self.current_caregiver = self.reset_rand2d()
        
    def set_caregiver(self, pos):
        self.current_caregiver = pos
                
    def reset_rand2d(self, region=None):
        if region == 0:
            rdm = np.random.random()
            if rdm < 1. / 3.:
                return self.reset_rand2d(region=1)
            elif rdm < 2. / 3.:
                return self.reset_rand2d(region=2)
            else:
                return self.reset_rand2d(region=3)
        elif region == 1:
            alpha = 2. * np.pi * np.random.random()
            r = np.random.random()
            return [r * np.cos(alpha), r * np.sin(alpha)]
        elif region == 2:
            alpha = 2. * np.pi * np.random.random()
            r = 1. + 0.5 * np.random.random()
            return [r * np.cos(alpha), r * np.sin(alpha)]  
        elif region == 3:
            alpha = 2. * np.pi * np.random.random()
            r = 1.5 + 0.5 * np.random.random()
            return [r * np.cos(alpha), r * np.sin(alpha)]            
        elif region is None:
            return [4. * np.random.random() - 2., 4. * np.random.random() - 2.]
        
        
    def get_current_context(self):
        return [d / 2. for d in self.current_tool[:2] + self.current_toy1[:2] + self.current_toy2[:2] + self.current_toy3[:2] + self.current_caregiver]
    
    def compute_tool(self):
        a = np.pi * self.current_tool[2]
        self.tool_end_pos = [self.current_tool[0] + np.cos(a) * self.tool_length, 
                        self.current_tool[1] + np.sin(a) * self.tool_length]
        
    def compute_motor_command(self, m_ag):
        return bounds_min_max(m_ag, self.conf.m_mins, self.conf.m_maxs)
    
    def motor_babbling(self, arm=False, audio=False):
        m = rand_bounds(self.conf.m_bounds)[0]
        if arm:
            r = 1.
        elif audio:
            r = 0.
        else:
            r = np.random.random()
        if r < 0.5:
            m[:21] = 0.
        else:
            m[21:] = 0.
        return m
    
    def is_hand_free(self):
        return self.current_tool[3] == 0 and (not self.current_toy1[2] == 1) and (not self.current_toy2[2] == 1) and (not self.current_toy3[2] == 1)
    
    def is_tool_free(self):
        return (not self.current_toy1[2] == 2) and (not self.current_toy2[2] == 2) and (not self.current_toy3[2] == 2) 

    def give_label(self, toy):
        
        if toy == "toy1":
            #print "Caregiver says", self.human_sounds[0] 
            return self.human_sounds_traj[self.human_sounds[0]]
        elif toy == "toy2":
            #print "Caregiver says", self.human_sounds[1]
            return self.human_sounds_traj[self.human_sounds[1]]
        elif toy == "toy3":
            #print "Caregiver says", self.human_sounds[2]
            return self.human_sounds_traj[self.human_sounds[2]]
        elif toy == "random":
            sound_id = np.random.choice([3, 4, 5])
            #print "Caregiver says", self.human_sounds[sound_id]
            return self.human_sounds_traj[self.human_sounds[sound_id]]
        else:
            raise NotImplementedError
        
    def analysis_sound(self, diva_traj):
        #return self.human_sounds[2]
        for hs in self.human_sounds:          
            error = np.linalg.norm(np.array(self.human_sounds_traj[hs]) - np.array([f[0] for f in diva_traj[[0, 12, 24, 37, 49]]] + [f[1] for f in diva_traj[[0, 12, 24, 37, 49]]]))
            if error < self.best_vocal_errors[hs]:
                self.best_vocal_errors[hs] = error
            if error < self.sound_tol:
                print "***********Agent says", hs
                return hs
        return None
    
    def caregiver_moves_obj(self, caregiver_pos, current_toy):
        middle = [caregiver_pos[0]/2, caregiver_pos[1]/2]
        return [current_toy[0] + self.caregiver_gives_obj_factor * (middle[0] - current_toy[0]), current_toy[1] + self.caregiver_gives_obj_factor * (middle[1] - current_toy[1]), current_toy[2]]
        
        
    def compute_sensori_effect(self, m):
        t = time.time()
        
        m_arm = m[:21]
        m_diva = m[21:]
        
        assert np.linalg.norm(m_arm) * np.linalg.norm(m_diva) == 0.
        
        if np.linalg.norm(m_arm) > 0.:
            cmd = "arm"
        else:
            cmd = "diva"
            
        self.purge_logs()
        
        arm_traj = self.arm.update(m_arm)
        #print "arm traj", arm_traj
        
        if cmd == "diva":
            diva_traj = self.diva.update(m_diva)
            self.diva_traj = diva_traj
            self.produced_sound = self.analysis_sound(self.diva_traj)
            if self.produced_sound is not None:
                self.count_produced_sounds[self.produced_sound] += 1
                if self.produced_sound in self.human_sounds[:3]:                    
                    self.count_parent_give_object += 1 
        else:
            diva_traj = np.zeros((50,2))
            self.produced_sound = None
        
        
        for i in range(self.timesteps):
            
            # Arm
            arm_x, arm_y, arm_angle = arm_traj[i]
            
            # Tool
            if not self.current_tool[3]:
                if self.is_hand_free() and ((arm_x - self.current_tool[0]) ** 2. + (arm_y - self.current_tool[1]) ** 2. < self.handle_tol_sq):
                    self.current_tool[0] = arm_x
                    self.current_tool[1] = arm_y
                    self.current_tool[2] = np.mod(arm_angle + self.handle_noise * np.random.randn() + 1, 2) - 1
                    self.compute_tool()
                    self.current_tool[3] = 1
            else:
                self.current_tool[0] = arm_x
                self.current_tool[1] = arm_y
                self.current_tool[2] = np.mod(arm_angle + self.handle_noise * np.random.randn() + 1, 2) - 1
                self.compute_tool()
            self.logs_tool.append([self.current_tool[:2], 
                          self.current_tool[2], 
                          self.tool_end_pos, 
                          self.current_tool[3]])
            
            
            if cmd == "arm":
                # Toy 1
                if self.current_toy1[2] == 1 or (self.is_hand_free() and ((arm_x - self.current_toy1[0]) ** 2 + (arm_y - self.current_toy1[1]) ** 2 < self.object_tol_hand_sq)):
                    self.current_toy1[0] = arm_x
                    self.current_toy1[1] = arm_y
                    self.current_toy1[2] = 1
                if self.current_toy1[2] == 2 or ((not self.current_toy1[2] == 1) and self.is_tool_free() and ((self.tool_end_pos[0] - self.current_toy1[0]) ** 2 + (self.tool_end_pos[1] - self.current_toy1[1]) ** 2 < self.object_tol_tool_sq)):
                    self.current_toy1[0] = self.tool_end_pos[0]
                    self.current_toy1[1] = self.tool_end_pos[1]
                    self.current_toy1[2] = 2
                self.logs_toy1.append([self.current_toy1])
                
                # Toy 2
                if self.current_toy2[2] == 1 or (self.is_hand_free() and ((arm_x - self.current_toy2[0]) ** 2 + (arm_y - self.current_toy2[1]) ** 2 < self.object_tol_hand_sq)):
                    self.current_toy2[0] = arm_x
                    self.current_toy2[1] = arm_y
                    self.current_toy2[2] = 1
                if self.current_toy2[2] == 2 or ((not self.current_toy2[2] == 1) and self.is_tool_free() and ((self.tool_end_pos[0] - self.current_toy2[0]) ** 2 + (self.tool_end_pos[1] - self.current_toy2[1]) ** 2 < self.object_tol_tool_sq)):
                    self.current_toy2[0] = self.tool_end_pos[0]
                    self.current_toy2[1] = self.tool_end_pos[1]
                    self.current_toy2[2] = 2
                self.logs_toy2.append([self.current_toy2])
                
                # Toy 3
                if self.current_toy3[2] == 1 or (self.is_hand_free() and ((arm_x - self.current_toy3[0]) ** 2 + (arm_y - self.current_toy3[1]) ** 2 < self.object_tol_hand_sq)):
                    self.current_toy3[0] = arm_x
                    self.current_toy3[1] = arm_y
                    self.current_toy3[2] = 1
                if self.current_toy3[2] == 2 or ((not self.current_toy3[2] == 1) and self.is_tool_free() and ((self.tool_end_pos[0] - self.current_toy3[0]) ** 2 + (self.tool_end_pos[1] - self.current_toy3[1]) ** 2 < self.object_tol_tool_sq)):
                    self.current_toy3[0] = self.tool_end_pos[0]
                    self.current_toy3[1] = self.tool_end_pos[1]
                    self.current_toy3[2] = 2
                self.logs_toy3.append([self.current_toy3])
                
                self.logs_caregiver.append([self.current_caregiver])
                
        
            else:
                # parent gives object if label is produced
                if self.produced_sound == self.human_sounds[0]:
                    self.current_toy1 =  self.caregiver_moves_obj(self.current_caregiver, self.current_toy1)
                elif self.produced_sound == self.human_sounds[1]:
                    self.current_toy2 =  self.caregiver_moves_obj(self.current_caregiver, self.current_toy2)
                elif self.produced_sound == self.human_sounds[2]:
                    self.current_toy3 =  self.caregiver_moves_obj(self.current_caregiver, self.current_toy3)
                    
                
                self.logs_toy1.append([self.current_toy1])
                self.logs_toy2.append([self.current_toy2])
                self.logs_toy3.append([self.current_toy3])
                self.logs_caregiver.append([self.current_caregiver])
        
#             if i in [0, 10, 20, 30, 40, 49]:
#                 self.hand = self.hand + [arm_x, arm_y]
#                 self.tool = self.tool + self.current_tool[:2]
#                 self.toy1 = self.toy1 + self.current_toy1[:2]
#                 self.toy2 = self.toy2 + self.current_toy2[:2]
#                 self.toy3 = self.toy3 + self.current_toy3[:2]
                
            if i in [0, 12, 24, 37, 49]:
                self.hand = self.hand + [arm_x, arm_y]
                self.tool = self.tool + self.current_tool[:2]
                self.toy1 = self.toy1 + self.current_toy1[:2]
                self.toy2 = self.toy2 + self.current_toy2[:2]
                self.toy3 = self.toy3 + self.current_toy3[:2]
                self.caregiver = self.caregiver + self.current_caregiver
        
            if self.arm.gui:
                if i % 5 == 0:
                    self.plot_step(self.ax, i)
                    
                
        if cmd == "arm":
            # parent gives label if object is touched by hand 
            if self.current_toy1[2] == 1:
                label = self.give_label("toy1")
            elif self.current_toy2[2] == 1:
                label = self.give_label("toy2")
            elif self.current_toy3[2] == 1:
                label = self.give_label("toy3")
            else:
                label = self.give_label("random")
            self.sound = label
            #print "parent sound", label, self.sound
        else:
            self.sound = [f for formants in diva_traj[[0, 12, 24, 37, 49]] for f in formants]
            self.sound = self.sound[0::2] + self.sound[1::2]
            
        
        # Sort dims            
        self.hand = self.hand[0::2] + self.hand[1::2]
        self.tool = self.tool[0::2] + self.tool[1::2]
        self.toy1 = self.toy1[0::2] + self.toy1[1::2]
        self.toy2 = self.toy2[0::2] + self.toy2[1::2]
        self.toy3 = self.toy3[0::2] + self.toy3[1::2]
        self.caregiver = self.caregiver[0::2] + self.caregiver[1::2]
        #self.sound = self.sound[0::2] + self.sound[1::2]
        

        # Analysis
        if np.linalg.norm(m[21:]) > 0:
            self.count_diva += 1
        else:
            self.count_arm += 1
        if self.current_tool[3]:
            self.count_tool += 1
        if self.current_toy1[2] == 1:
            self.count_toy1_by_hand += 1
        elif self.current_tool[3] and self.current_toy1[2] == 2:
            self.count_toy1_by_tool += 1
        if self.current_toy2[2] == 1:
            self.count_toy2_by_hand += 1
        elif self.current_tool[3] and self.current_toy2[2] == 2:
            self.count_toy2_by_tool += 1
        if self.current_toy3[2] == 1:
            self.count_toy3_by_hand += 1
        elif self.current_tool[3] and self.current_toy3[2] == 2:
            self.count_toy3_by_tool += 1
        self.count_parent_give_label = self.count_toy1_by_hand + self.count_toy2_by_hand + self.count_toy3_by_hand

        if cmd == "arm":
            self.time_arm += time.time() - t
            self.time_arm_per_it = self.time_arm / self.count_arm
        else:
            self.time_diva += time.time() - t
            self.time_diva_per_it = self.time_diva / self.count_diva
            
        #print "previous context", len(self.current_context), self.current_context
        #print "s_hand", len(self.hand), self.hand
        #print "s_tool", len(self.tool), self.tool
        #print "s_toy1", len(self.toy1), self.toy1
        #print "s_toy2", len(self.toy2), self.toy2
        #print "s_toy3", len(self.toy3), self.toy3
        #print "s_sound", len(self.sound), self.sound
        #print "s_caregiver", len(self.caregiver), self.caregiver
        
        self.t += 1
        if self. t % 100 == 0:
            self.best_vocal_errors_evolution += [self.best_vocal_errors.copy()]
        if self.t % 1000 == 0:
            print "best_vocal_errors", [(hs,self.best_vocal_errors[hs]) for hs in self.human_sounds]
        
        
        context = self.current_context
        
        # MAP TO STD INTERVAL
        hand = [d/2 for d in self.hand]
        tool = [d/2 for d in self.tool]
        toy1 = [d/2 for d in self.toy1]
        toy2 = [d/2 for d in self.toy2]
        toy3 = [d/2 for d in self.toy3]
        sound = [d - 8.5 for d in self.sound[:5]] + [d - 10.25 for d in self.sound[5:]]
        caregiver = [d/2 for d in self.caregiver]
        
        # MAP to Delta
#         hand = [hand[1] - hand[0], hand[2] - hand[1], hand[3] - hand[2], hand[4] - hand[3], hand[5] - hand[4],
#                 hand[7] - hand[6], hand[8] - hand[7], hand[9] - hand[8], hand[10] - hand[9], hand[11] - hand[10]]
#         
#         tool = [tool[1] - tool[0], tool[2] - tool[1], tool[3] - tool[2], tool[4] - tool[3], tool[5] - tool[4],
#                 tool[7] - tool[6], tool[8] - tool[7], tool[9] - tool[8], tool[10] - tool[9], tool[11] - tool[10]]
#         
#         toy1 = [toy1[1] - toy1[0], toy1[2] - toy1[1], toy1[3] - toy1[2], toy1[4] - toy1[3], toy1[5] - toy1[4],
#                 toy1[7] - toy1[6], toy1[8] - toy1[7], toy1[9] - toy1[8], toy1[10] - toy1[9], toy1[11] - toy1[10]]
#         
#         toy2 = [toy2[1] - toy2[0], toy2[2] - toy2[1], toy2[3] - toy2[2], toy2[4] - toy2[3], toy2[5] - toy2[4],
#                 toy2[7] - toy2[6], toy2[8] - toy2[7], toy2[9] - toy2[8], toy2[10] - toy2[9], toy2[11] - toy2[10]]
#         
#         toy3 = [toy3[1] - toy3[0], toy3[2] - toy3[1], toy3[3] - toy3[2], toy3[4] - toy3[3], toy3[5] - toy3[4],
#                 toy3[7] - toy3[6], toy3[8] - toy3[7], toy3[9] - toy3[8], toy3[10] - toy3[9], toy3[11] - toy3[10]]
                
        
        
        s = context + hand + tool + toy1 + toy2 + toy3 + sound + caregiver
        #print "s_sound", sound
        return bounds_min_max(s, self.conf.s_mins, self.conf.s_maxs)
    
    
    def update(self, m_ag, reset=True, log=True):
        """ Computes sensorimotor values from motor orders.

        :param numpy.array m_ag: a motor command with shape (self.conf.m_ndims, ) or a set of n motor commands of shape (n, self.conf.m_ndims)

        :param bool log: emit the motor and sensory values for logging purpose (default: True).

        :returns: an array of shape (self.conf.ndims, ) or (n, self.conf.ndims) according to the shape of the m_ag parameter, containing the motor values (which can be different from m_ag, e.g. bounded according to self.conf.m_bounds) and the corresponding sensory values.

        .. note:: self.conf.ndims = self.conf.m_ndims + self.conf.s_ndims is the dimensionality of the sensorimotor space (dim of the motor space + dim of the sensory space).
        """

        if len(np.array(m_ag).shape) == 1:
            s = self.one_update(m_ag, log)
        else:
            s = []
            for m in m_ag:
                s.append(self.one_update(m, log))
            s = np.array(s)
        if reset:
            self.reset()
        return s
    

    def print_stats(self):
        print"\n----------------------\nEnvironment Statistics\n----------------------\n"
        print "# Iterations:", self.t
        print "# Arm trials:", self.count_arm
        print "# Vocal trials:", self.count_diva
        print "# Tool actions:", self.count_tool
        print "# Produced sounds:", self.count_produced_sounds
        print "# Toy1 was reached by tool:", self.count_toy1_by_tool
        print "# Toy2 was reached by tool:", self.count_toy2_by_tool
        print "# Toy3 was reached by tool:", self.count_toy3_by_tool
        print "# Toy1 was reached by hand:", self.count_toy1_by_hand
        print "# Toy2 was reached by hand:", self.count_toy2_by_hand
        print "# Toy3 was reached by hand:", self.count_toy3_by_hand
        print "# Parent gave vocal labels:", self.count_parent_give_label
        print "# Parent gave object:", self.count_parent_give_object
        print

    def init_plot(self):
        #plt.ion()
        self.ax = plt.subplot()
        plt.gcf().set_size_inches(6., 6., forward=True)
        plt.gca().set_aspect('equal')
        #plt.show(block=False)
        plt.axis([-2, 2,-2, 2])
        plt.draw()
    
    def plot_tool_step(self, ax, i, **kwargs_plot):
        handle_pos = self.logs_tool[i][0]
        end_pos = self.logs_tool[i][2]
        
        ax.plot([handle_pos[0], end_pos[0]], [handle_pos[1], end_pos[1]], '-', color=colors_config['stick'], lw=6, **kwargs_plot)
        ax.plot(handle_pos[0], handle_pos[1], 'o', color = colors_config['gripper'], ms=12, **kwargs_plot)
        ax.plot(end_pos[0], end_pos[1], 'o', color = colors_config['magnetic'], ms=12, **kwargs_plot)                    
    
    def plot_toy1_step(self, ax, i, **kwargs_plot):
        pos = self.logs_toy1[i][0]
        rectangle = plt.Rectangle((pos[0] - 0.1, pos[1] - 0.1), 0.2, 0.2, color = colors[3], **kwargs_plot)
        ax.add_patch(rectangle) 
    
    def plot_toy2_step(self, ax, i, **kwargs_plot):
        pos = self.logs_toy2[i][0]
        rectangle = plt.Rectangle((pos[0] - 0.1, pos[1] - 0.1), 0.2, 0.2, color = colors[4], **kwargs_plot)
        ax.add_patch(rectangle) 
    
    def plot_toy3_step(self, ax, i, **kwargs_plot):
        pos = self.logs_toy3[i][0]
        rectangle = plt.Rectangle((pos[0] - 0.1, pos[1] - 0.1), 0.2, 0.2, color = colors[5], **kwargs_plot)
        ax.add_patch(rectangle) 
        
    def plot_caregiver_step(self, ax, i, **kwargs_plot):
        pos = self.logs_caregiver[i][0]
        rectangle = plt.Rectangle((pos[0] - 0.1, pos[1] - 0.1), 0.2, 0.2, color = "black", **kwargs_plot)
        ax.add_patch(rectangle) 
        
    def plot_step(self, ax, i, clean=True, **kwargs_plot):
        #t0 = time.time()
        plt.pause(0.0001)
        #print "t1", time.time() - t0
        if clean:
            plt.cla()
        #print "t2", time.time() - t0
        self.arm.plot_step(ax, i, **kwargs_plot)
        self.plot_tool_step(ax, i, **kwargs_plot)
        self.plot_toy1_step(ax, i, **kwargs_plot)
        self.plot_toy2_step(ax, i, **kwargs_plot)
        self.plot_toy3_step(ax, i, **kwargs_plot)
        self.plot_caregiver_step(ax, i, **kwargs_plot)
        #print "t3", time.time() - t0
        
        if clean:
            plt.xlim([-2, 2])
            plt.ylim([-2, 2])
            plt.gca().set_xticklabels([])
            plt.gca().set_yticklabels([])
            plt.gca().xaxis.set_major_locator(plt.NullLocator())
            plt.gca().yaxis.set_major_locator(plt.NullLocator())
            plt.xlabel("")
            plt.ylabel("")
            
        plt.draw()
        plt.show(block=False)
Exemplo n.º 8
0
    def __init__(self, gui=False, audio=False):

        self.t = 0

        # ARM CONFIG

        arm_cfg = dict(m_mins=[-1.] * 3 * 7,
                       m_maxs=[1.] * 3 * 7,
                       s_mins=[-1.] * 3 * 50,
                       s_maxs=[1.] * 3 * 50,
                       lengths=[0.5, 0.3, 0.2],
                       angle_shift=0.5,
                       rest_state=[0., 0., 0.],
                       n_dmps=3,
                       n_bfs=6,
                       timesteps=50,
                       gui=gui)

        # SOUND CONFIG

        self.v_o = list(np.log2([500, 900]))
        self.v_y = list(np.log2([300, 1700]))
        self.v_u = list(np.log2([300, 800]))
        self.v_e = list(np.log2([400, 2200]))
        self.v_i = list(np.log2([300, 2300]))

        self.vowels = dict(o=self.v_o,
                           y=self.v_y,
                           u=self.v_u,
                           e=self.v_e,
                           i=self.v_i)

        # Retrieve caregiver sounds and trajectories from json
        self.rospack = RosPack()
        with open(
                join(self.rospack.get_path('pobax_playground'), 'config',
                     'human_sounds.pickle')) as f:
            self.full_human_motor_traj, self.full_human_sounds_traj = pickle.load(
                f)
        self.human_sounds = self.full_human_sounds_traj.keys()
        rospy.loginfo('Voice node using the word %s for culbuto name' %
                      self.human_sounds[0])

        def compress_sound_traj(sound):
            assert (len(sound) == 100)
            f1s = sound[:50]
            f3s = sound[50:]
            return np.append(f1s[np.array([0, 12, 24, 37, 49])],
                             f3s[np.array([0, 12, 24, 37, 49])])

        #reduce number of sounds
        self.human_sounds = ['eyu', 'oey', 'eou', 'oyi']
        #random.shuffle(self.human_sounds)

        print self.human_sounds
        self.human_sounds_traj = dict()
        self.human_sounds_traj_std = dict()
        self.best_vocal_errors = {}
        self.best_vocal_errors_evolution = []
        for hs in self.human_sounds:
            self.best_vocal_errors[hs] = 10.
            self.human_sounds_traj[hs] = compress_sound_traj(
                self.full_human_sounds_traj[hs])
            self.human_sounds_traj_std[hs] = [
                d - 8.5 for d in self.human_sounds_traj[hs][:5]
            ] + [d - 10.25 for d in self.human_sounds_traj[hs][5:]]

        self.sound_tol = 0.4

        # DIVA CONFIG

        diva_cfg = dict(
            m_mins=np.array([-1, -1, -1, -1, -1, -1, -1]),
            m_maxs=np.array([1, 1, 1, 1, 1, 1, 1]),
            s_mins=np.array([7.5, 9.25]),
            s_maxs=np.array([9.5, 11.25]),
            m_used=range(7),
            s_used=range(1, 3),
            rest_position_diva=list([0] * 7),
            audio=audio,
            diva_use_initial=True,
            diva_use_goal=True,
            used_diva=list([True] * 7),
            n_dmps_diva=7,
            n_bfs_diva=2,
            move_steps=50,
        )

        self.arm = ArmEnvironment(**arm_cfg)
        self.diva = DivaEnvironment(**diva_cfg)

        self.timesteps = 50

        # OBJECTS CONFIG

        self.caregiver_gives_obj_factor = 0.01
        self.tool_length = 0.5
        self.handle_tol = 0.2
        self.handle_tol_sq = self.handle_tol * self.handle_tol
        self.handle_noise = 0.
        self.object_tol_hand = 0.2
        self.object_tol_hand_sq = self.object_tol_hand * self.object_tol_hand
        self.object_tol_tool = 0.2
        self.object_tol_tool_sq = self.object_tol_tool * self.object_tol_tool

        self.diva_traj = None
        self.produced_sound = None

        Environment.__init__(self,
                             m_mins=[-1.] * (21 + 28),
                             m_maxs=[1.] * (21 + 28),
                             s_mins=[-1.] * 56,
                             s_maxs=[1.] * 56)

        self.current_tool = [-0.5, 0., 0.5, 0.]
        self.current_toy1 = [0.5, 0.5, 0.]
        #self.current_toy2 = [0.7, 0.7, 0.]
        #self.current_toy3 = [0.9, 0.9, 0.]
        self.current_caregiver = [0., 1.7]
        self.reset()
        self.compute_tool()

        self.purge_logs()

        if self.arm.gui:
            self.init_plot()

        self.count_diva = 0
        self.count_arm = 0
        self.count_tool = 0
        self.count_toy1_by_tool = 0
        #self.count_toy2_by_tool = 0
        #self.count_toy3_by_tool = 0
        self.count_toy1_by_hand = 0
        #self.count_toy2_by_hand = 0
        #self.count_toy3_by_hand = 0
        self.count_parent_give_label = 0
        self.count_parent_give_object = 0
        self.count_produced_sounds = {}
        for hs in self.human_sounds:
            self.count_produced_sounds[hs] = 0

        self.time_arm = 0.
        self.time_diva = 0.
        self.time_arm_per_it = 0.
        self.time_diva_per_it = 0.
Exemplo n.º 9
0
from tf2rl.td3.policies import TD3Policy
from tf2rl.td3.td3 import TD3
from arm_env import ArmEnvironment
import rospy

rospy.init_node("testing")
env = ArmEnvironment()  #gym.make("LunarLanderContinuous-v2")
model = TD3(TD3Policy, env, log_dir_path="td3_results_3")
print(env.action_space)
print(env.observation_space)
model.learn(total_timesteps=1000000)