agent = DDPG(state_shape,
             action_shape,
             batch_size=128,
             gamma=0.995,
             tau=0.001,
             actor_lr=0.0001,
             critic_lr=0.001,
             use_layer_norm=True)
print('DDPG agent configured')
agent.load_model(agent.current_path + '/model/model.ckpt')
agent.load_memory()

max_episode = 10000
tot_rewards = []
print('env reset')
observation, done = env.reset()
action = agent.act(observation)
print(action)
rospy.sleep(0.8)
observation, reward, done = env.step(action)
rospy.sleep(0.8)
noise_sigma = 0.15
save_cutoff = 1
cutoff_count = 0
save_count = 0
curr_highest_eps_reward = -1000.0
for i in xrange(max_episode):
    if i % 100 == 0 and noise_sigma > 0.03:
        agent.noise = OUNoise(agent.nb_actions, sigma=noise_sigma)
        noise_sigma /= 2.0
    step_num = 0
import rospy
from arm_env import ArmEnvironment, AllJoints
import position_command
import actionlib
from trajectory_msgs.msg import JointTrajectory, JointTrajectoryPoint
from control_msgs.msg import JointTrajectoryControllerState, FollowJointTrajectoryAction, FollowJointTrajectoryActionGoal, FollowJointTrajectoryGoal
from std_srvs.srv import Empty
from sensor_msgs.msg import JointState
from gazebo_msgs.srv import SetModelState, SetModelStateRequest, SetModelConfiguration, SetModelConfigurationRequest
from gazebo_msgs.srv import GetModelState, GetModelStateRequest
from gazebo_msgs.msg import ModelState
import numpy as np
envboi = ArmEnvironment()
envboi.reset()
zero = np.array([0, 0, 0, 0])

testpos = np.array([0, 0.9, 0.5, 1.5])

#rospy.init_node('testing_node')

# jointboi = AllJoints(['plat_joint','shoulder_joint','forearm_joint','wrist_joint'])

# jointboi.move(testpos)

# rospy.sleep(1)

# TODO fix reset function

# rospy.sleep(1)
# print(envboi.get_latest_state())
# envboi.reset()
Пример #3
0
import rospy
from arm_env import ArmEnvironment, AllJoints
import numpy as np

rospy.init_node("test")

env = ArmEnvironment(static_goal=True)
env.reset()
rospy.sleep(1)
print("Goal Distance: ", env.get_goal_distance())

while (True):
    action = input("Enter an action:").split(' ')
    floatarr = []
    for i in action:
        if (i == '0'):
            floatarr.append(0)
        else:
            floatarr.append(float(i))
    action = np.array(floatarr)
    env.step(floatarr)
    print("Goal Distance: ", env.get_goal_distance())
    reset = eval(input("Reset goal?"))
    if (reset):
        env.set_new_goal()

# alljoints.move(pos)
# #rospy.sleep(3)
# env.pause_physics()
# env.unpause_physics()