def build_training_graph(num_agents): s = tf.placeholder(tf.float32, [num_agents, 4]) g = tf.placeholder(tf.float32, [num_agents, 2]) x = tf.expand_dims(s, 1) - tf.expand_dims(s, 0) h, mask, indices = core.network_cbf(x=x, r=config.DIST_MIN_THRES, indices=None) a = core.network_action(s=s, g=g, obs_radius=config.OBS_RADIUS, indices=indices) (loss_dang, loss_safe, acc_dang, acc_safe) = core.loss_barrier( h=h, s=s, r=config.DIST_MIN_THRES, ttc=config.TIME_TO_COLLISION, indices=indices) (loss_dang_deriv, loss_safe_deriv, acc_dang_deriv, acc_safe_deriv ) = core.loss_derivatives(s=s, a=a, h=h, x=x, r=config.DIST_MIN_THRES, indices=indices, ttc=config.TIME_TO_COLLISION, alpha=config.ALPHA_CBF) loss_action = core.loss_actions( s=s, g=g, a=a, r=config.DIST_MIN_THRES, ttc=config.TIME_TO_COLLISION) loss_list = [2 * loss_dang, loss_safe, 2 * loss_dang_deriv, loss_safe_deriv, 0.01 * loss_action] acc_list = [acc_dang, acc_safe, acc_dang_deriv, acc_safe_deriv] weight_loss = [ config.WEIGHT_DECAY * tf.nn.l2_loss(v) for v in tf.trainable_variables()] loss = 10 * tf.math.add_n(loss_list + weight_loss) return s, g, a, loss_list, loss, acc_list
def build_training_graph(num_agents): # s is the state vectors of the agents s = tf.placeholder(tf.float32, [num_agents, 8]) # s_ref is the goal states s_ref = tf.placeholder(tf.float32, [num_agents, 8]) # x is difference between the state of each agent and other agents x = tf.expand_dims(s, 1) - tf.expand_dims(s, 0) # h is the CBF value of shape [num_agents, TOP_K, 1], where TOP_K represents # the K nearest agents h, mask, indices = core.network_cbf(x=x, r=config.DIST_MIN_THRES, indices=None) # u is the control action of each agent, with shape [num_agents, 3] u = core.network_action(s=s, s_ref=s_ref, obs_radius=config.OBS_RADIUS, indices=indices) # compute the value of loss functions and the accuracies # loss_dang is for h(s) < 0, s in dangerous set # loss safe is for h(s) >=0, s in safe set # acc_dang is the accuracy that h(s) < 0, s in dangerous set is satisfied # acc_safe is the accuracy that h(s) >=0, s in safe set is satisfied loss_dang, loss_safe, acc_dang, acc_safe = core.loss_barrier( h=h, s=s, indices=indices) # loss_dang_deriv is for doth(s) + alpha h(s) >=0 for s in dangerous set # loss_safe_deriv is for doth(s) + alpha h(s) >=0 for s in safe set # loss_medium_deriv is for doth(s) + alpha h(s) >=0 for s not in the dangerous # or the safe set (loss_dang_deriv, loss_safe_deriv, loss_medium_deriv, acc_dang_deriv, acc_safe_deriv, acc_medium_deriv) = core.loss_derivatives(s=s, u=u, h=h, x=x, indices=indices) # the distance between the u and the nominal u loss_action = core.loss_actions(s=s, u=u, s_ref=s_ref, indices=indices) # the weight of each loss item requires careful tuning loss_list = [ loss_dang, loss_safe, 3 * loss_dang_deriv, loss_safe_deriv, 2 * loss_medium_deriv, 0.5 * loss_action ] acc_list = [ acc_dang, acc_safe, acc_dang_deriv, acc_safe_deriv, acc_medium_deriv ] weight_loss = [ config.WEIGHT_DECAY * tf.nn.l2_loss(v) for v in tf.trainable_variables() ] loss = 10 * tf.math.add_n(loss_list + weight_loss) return s, s_ref, u, loss_list, loss, acc_list
def opt_body(u_res, loop_count, is_safe): dsdt = core.quadrotor_dynamics_tf(s, u + u_res) s_next = s + dsdt * config.TIME_STEP_EVAL x_next = tf.expand_dims(s_next, 1) - tf.expand_dims(s_next, 0) h_next, mask_next, _ = core.network_cbf(x=x_next, r=config.DIST_MIN_THRES, indices=indices) deriv = h_next - h + config.TIME_STEP_EVAL * config.ALPHA_CBF * h deriv = deriv * mask * mask_next error = tf.reduce_sum(tf.math.maximum(-deriv, 0), axis=1) error_gradient = tf.gradients(error, u_res)[0] u_res = u_res - config.REFINE_LEARNING_RATE * error_gradient loop_count = loop_count + 1 return u_res, loop_count, is_safe
def opt_body(a_res, loop_count): dsdt = core.dynamics(s, a + a_res) s_next = s + dsdt * config.TIME_STEP x_next = tf.expand_dims(s_next, 1) - tf.expand_dims(s_next, 0) h_next, mask_next, _ = core.network_cbf(x=x_next, r=config.DIST_MIN_THRES, indices=indices) deriv = h_next - h + config.TIME_STEP * config.ALPHA_CBF * h deriv = deriv * mask * mask_next error = tf.reduce_sum(tf.math.maximum(-deriv, 0), axis=1) error_gradient = tf.gradients(error, a_res)[0] a_res = a_res - config.REFINE_LEARNING_RATE * error_gradient loop_count = loop_count + 1 return a_res, loop_count
def opt_body(a_res, loop_count): # a loop of updating a_res # compute s_next under a + a_res dsdt = core.dynamics(s, a + a_res) s_next = s + dsdt * config.TIME_STEP x_next = tf.expand_dims(s_next, 1) - tf.expand_dims(s_next, 0) h_next, mask_next, _ = core.network_cbf( x=x_next, r=config.DIST_MIN_THRES, indices=indices) # deriv should be >= 0. if not, we update a_res by gradient descent deriv = h_next - h + config.TIME_STEP * config.ALPHA_CBF * h deriv = deriv * mask * mask_next error = tf.reduce_sum(tf.math.maximum(-deriv, 0), axis=1) # compute the gradient to update a_res error_gradient = tf.gradients(error, a_res)[0] a_res = a_res - config.REFINE_LEARNING_RATE * error_gradient loop_count = loop_count + 1 return a_res, loop_count
def build_training_graph(num_agents): s = tf.placeholder(tf.float32, [num_agents, 8]) s_ref = tf.placeholder(tf.float32, [num_agents, 8]) x = tf.expand_dims(s, 1) - tf.expand_dims(s, 0) h, mask, indices = core.network_cbf(x=x, r=config.DIST_MIN_THRES, indices=None) u = core.network_action(s=s, s_ref=s_ref, obs_radius=config.OBS_RADIUS, indices=indices) loss_dang, loss_safe, acc_dang, acc_safe = core.loss_barrier( h=h, s=s, indices=indices) (loss_dang_deriv, loss_safe_deriv, loss_medium_deriv, acc_dang_deriv, acc_safe_deriv, acc_medium_deriv) = core.loss_derivatives(s=s, u=u, h=h, x=x, indices=indices) loss_action = core.loss_actions(s=s, u=u, s_ref=s_ref, indices=indices) # the weight of each loss item requires careful tuning loss_list = [ loss_dang, loss_safe, 3 * loss_dang_deriv, loss_safe_deriv, 2 * loss_medium_deriv, 0.5 * loss_action ] acc_list = [ acc_dang, acc_safe, acc_dang_deriv, acc_safe_deriv, acc_medium_deriv ] weight_loss = [ config.WEIGHT_DECAY * tf.nn.l2_loss(v) for v in tf.trainable_variables() ] loss = 10 * tf.math.add_n(loss_list + weight_loss) return s, s_ref, u, loss_list, loss, acc_list
def build_evaluation_graph(num_agents): s = tf.placeholder(tf.float32, [num_agents, 8]) s_ref = tf.placeholder(tf.float32, [num_agents, 8]) x = tf.expand_dims(s, 1) - tf.expand_dims(s, 0) h, mask, indices = core.network_cbf(x=x, r=config.DIST_MIN_THRES, indices=None) u = core.network_action(s=s, s_ref=s_ref, obs_radius=config.OBS_RADIUS, indices=indices) safe_mask = core.compute_safe_mask(s, r=config.DIST_SAFE, indices=indices) is_safe = tf.equal(tf.reduce_mean(tf.cast(safe_mask, tf.float32)), 1) u_res = tf.Variable(tf.zeros_like(u), name='u_res') loop_count = tf.Variable(0, name='loop_count') def opt_body(u_res, loop_count, is_safe): dsdt = core.quadrotor_dynamics_tf(s, u + u_res) s_next = s + dsdt * config.TIME_STEP_EVAL x_next = tf.expand_dims(s_next, 1) - tf.expand_dims(s_next, 0) h_next, mask_next, _ = core.network_cbf(x=x_next, r=config.DIST_MIN_THRES, indices=indices) deriv = h_next - h + config.TIME_STEP_EVAL * config.ALPHA_CBF * h deriv = deriv * mask * mask_next error = tf.reduce_sum(tf.math.maximum(-deriv, 0), axis=1) error_gradient = tf.gradients(error, u_res)[0] u_res = u_res - config.REFINE_LEARNING_RATE * error_gradient loop_count = loop_count + 1 return u_res, loop_count, is_safe def opt_cond(u_res, loop_count, is_safe): cond = tf.logical_and(tf.less(loop_count, config.REFINE_LOOPS), tf.logical_not(is_safe)) return cond with tf.control_dependencies( [u_res.assign(tf.zeros_like(u)), loop_count.assign(0)]): u_res, _, _ = tf.while_loop(opt_cond, opt_body, [u_res, loop_count, is_safe]) u_opt = u + u_res loss_dang, loss_safe, acc_dang, acc_safe = core.loss_barrier( h=h, s=s, indices=indices) (loss_dang_deriv, loss_safe_deriv, loss_medium_deriv, acc_dang_deriv, acc_safe_deriv, acc_medium_deriv) = core.loss_derivatives(s=s, u=u_opt, h=h, x=x, indices=indices) loss_action = core.loss_actions(s=s, u=u_opt, s_ref=s_ref, indices=indices) loss_list = [ loss_dang, loss_safe, loss_dang_deriv, loss_safe_deriv, loss_medium_deriv, loss_action ] acc_list = [ acc_dang, acc_safe, acc_dang_deriv, acc_safe_deriv, acc_medium_deriv ] return s, s_ref, u_opt, loss_list, acc_list
def build_evaluation_graph(num_agents): # s is the state vectors of the agents s = tf.placeholder(tf.float32, [num_agents, 4]) # g is the goal states g = tf.placeholder(tf.float32, [num_agents, 2]) # x is difference between the state of each agent and other agents x = tf.expand_dims(s, 1) - tf.expand_dims(s, 0) # h is the CBF value of shape [num_agents, TOP_K, 1], where TOP_K represents # the K nearest agents h, mask, indices = core.network_cbf(x=x, r=config.DIST_MIN_THRES) # a is the control action of each agent, with shape [num_agents, 3] a = core.network_action(s=s, g=g, obs_radius=config.OBS_RADIUS, indices=indices) # a_res is delta a. when a does not satisfy the CBF conditions, we want to compute # a a_res such that a + a_res satisfies the CBF conditions a_res = tf.Variable(tf.zeros_like(a), name='a_res') loop_count = tf.Variable(0, name='loop_count') def opt_body(a_res, loop_count): # a loop of updating a_res # compute s_next under a + a_res dsdt = core.dynamics(s, a + a_res) s_next = s + dsdt * config.TIME_STEP x_next = tf.expand_dims(s_next, 1) - tf.expand_dims(s_next, 0) h_next, mask_next, _ = core.network_cbf( x=x_next, r=config.DIST_MIN_THRES, indices=indices) # deriv should be >= 0. if not, we update a_res by gradient descent deriv = h_next - h + config.TIME_STEP * config.ALPHA_CBF * h deriv = deriv * mask * mask_next error = tf.reduce_sum(tf.math.maximum(-deriv, 0), axis=1) # compute the gradient to update a_res error_gradient = tf.gradients(error, a_res)[0] a_res = a_res - config.REFINE_LEARNING_RATE * error_gradient loop_count = loop_count + 1 return a_res, loop_count def opt_cond(a_res, loop_count): # update u_res for REFINE_LOOPS cond = tf.less(loop_count, config.REFINE_LOOPS) return cond with tf.control_dependencies([ a_res.assign(tf.zeros_like(a)), loop_count.assign(0)]): a_res, _ = tf.while_loop(opt_cond, opt_body, [a_res, loop_count]) a_opt = a + a_res dsdt = core.dynamics(s, a_opt) s_next = s + dsdt * config.TIME_STEP x_next = tf.expand_dims(s_next, 1) - tf.expand_dims(s_next, 0) h_next, mask_next, _ = core.network_cbf(x=x_next, r=config.DIST_MIN_THRES, indices=indices) # compute the value of loss functions and the accuracies # loss_dang is for h(s) < 0, s in dangerous set # loss safe is for h(s) >=0, s in safe set # acc_dang is the accuracy that h(s) < 0, s in dangerous set is satisfied # acc_safe is the accuracy that h(s) >=0, s in safe set is satisfied (loss_dang, loss_safe, acc_dang, acc_safe) = core.loss_barrier( h=h_next, s=s_next, r=config.DIST_MIN_THRES, ttc=config.TIME_TO_COLLISION, eps=[0, 0]) # loss_dang_deriv is for doth(s) + alpha h(s) >=0 for s in dangerous set # loss_safe_deriv is for doth(s) + alpha h(s) >=0 for s in safe set # loss_medium_deriv is for doth(s) + alpha h(s) >=0 for s not in the dangerous # or the safe set (loss_dang_deriv, loss_safe_deriv, acc_dang_deriv, acc_safe_deriv ) = core.loss_derivatives(s=s_next, a=a_opt, h=h_next, x=x_next, r=config.DIST_MIN_THRES, ttc=config.TIME_TO_COLLISION, alpha=config.ALPHA_CBF, indices=indices) # the distance between the u_opt and the nominal u loss_action = core.loss_actions(s, g, a, r=config.DIST_MIN_THRES, ttc=config.TIME_TO_COLLISION) loss_list = [loss_dang, loss_safe, loss_dang_deriv, loss_safe_deriv, loss_action] acc_list = [acc_dang, acc_safe, acc_dang_deriv, acc_safe_deriv] return s, g, a_opt, loss_list, acc_list
def build_evaluation_graph(num_agents): s = tf.placeholder(tf.float32, [num_agents, 4]) g = tf.placeholder(tf.float32, [num_agents, 2]) x = tf.expand_dims(s, 1) - tf.expand_dims(s, 0) h, mask, indices = core.network_cbf(x=x, r=config.DIST_MIN_THRES) a = core.network_action(s=s, g=g, obs_radius=config.OBS_RADIUS, indices=indices) a_res = tf.Variable(tf.zeros_like(a), name='a_res') loop_count = tf.Variable(0, name='loop_count') def opt_body(a_res, loop_count): dsdt = core.dynamics(s, a + a_res) s_next = s + dsdt * config.TIME_STEP x_next = tf.expand_dims(s_next, 1) - tf.expand_dims(s_next, 0) h_next, mask_next, _ = core.network_cbf(x=x_next, r=config.DIST_MIN_THRES, indices=indices) deriv = h_next - h + config.TIME_STEP * config.ALPHA_CBF * h deriv = deriv * mask * mask_next error = tf.reduce_sum(tf.math.maximum(-deriv, 0), axis=1) error_gradient = tf.gradients(error, a_res)[0] a_res = a_res - config.REFINE_LEARNING_RATE * error_gradient loop_count = loop_count + 1 return a_res, loop_count def opt_cond(a_res, loop_count): cond = tf.less(loop_count, config.REFINE_LOOPS) return cond with tf.control_dependencies( [a_res.assign(tf.zeros_like(a)), loop_count.assign(0)]): a_res, _ = tf.while_loop(opt_cond, opt_body, [a_res, loop_count]) a_opt = a + a_res dsdt = core.dynamics(s, a_opt) s_next = s + dsdt * config.TIME_STEP x_next = tf.expand_dims(s_next, 1) - tf.expand_dims(s_next, 0) h_next, mask_next, _ = core.network_cbf(x=x_next, r=config.DIST_MIN_THRES, indices=indices) (loss_dang, loss_safe, acc_dang, acc_safe) = core.loss_barrier(h=h_next, s=s_next, r=config.DIST_MIN_THRES, ttc=config.TIME_TO_COLLISION, eps=[0, 0]) (loss_dang_deriv, loss_safe_deriv, acc_dang_deriv, acc_safe_deriv) = core.loss_derivatives(s=s_next, a=a_opt, h=h_next, x=x_next, r=config.DIST_MIN_THRES, ttc=config.TIME_TO_COLLISION, alpha=config.ALPHA_CBF, indices=indices) loss_action = core.loss_actions(s, g, a, r=config.DIST_MIN_THRES, ttc=config.TIME_TO_COLLISION) loss_list = [ loss_dang, loss_safe, loss_dang_deriv, loss_safe_deriv, loss_action ] acc_list = [acc_dang, acc_safe, acc_dang_deriv, acc_safe_deriv] return s, g, a_opt, loss_list, acc_list
def build_training_graph(num_agents): # s is the state vectors of the agents s = tf.placeholder(tf.float32, [num_agents, 4]) # g is the goal states g = tf.placeholder(tf.float32, [num_agents, 2]) # x is difference between the state of each agent and other agents x = tf.expand_dims(s, 1) - tf.expand_dims(s, 0) # h is the CBF value of shape [num_agents, TOP_K, 1], where TOP_K represents # the K nearest agents h, mask, indices = core.network_cbf(x=x, r=config.DIST_MIN_THRES, indices=None) # a is the control action of each agent, with shape [num_agents, 2] a = core.network_action(s=s, g=g, obs_radius=config.OBS_RADIUS, indices=indices) # compute the value of loss functions and the accuracies # loss_dang is for h(s) < 0, s in dangerous set # loss safe is for h(s) >=0, s in safe set # acc_dang is the accuracy that h(s) < 0, s in dangerous set is satisfied # acc_safe is the accuracy that h(s) >=0, s in safe set is satisfied (loss_dang, loss_safe, acc_dang, acc_safe) = core.loss_barrier(h=h, s=s, r=config.DIST_MIN_THRES, ttc=config.TIME_TO_COLLISION, indices=indices) # loss_dang_deriv is for doth(s) + alpha h(s) >=0 for s in dangerous set # loss_safe_deriv is for doth(s) + alpha h(s) >=0 for s in safe set # loss_medium_deriv is for doth(s) + alpha h(s) >=0 for s not in the dangerous # or the safe set (loss_dang_deriv, loss_safe_deriv, acc_dang_deriv, acc_safe_deriv) = core.loss_derivatives(s=s, a=a, h=h, x=x, r=config.DIST_MIN_THRES, indices=indices, ttc=config.TIME_TO_COLLISION, alpha=config.ALPHA_CBF) # the distance between the a and the nominal a loss_action = core.loss_actions(s=s, g=g, a=a, r=config.DIST_MIN_THRES, ttc=config.TIME_TO_COLLISION) loss_list = [ 2 * loss_dang, loss_safe, 2 * loss_dang_deriv, loss_safe_deriv, 0.01 * loss_action ] acc_list = [acc_dang, acc_safe, acc_dang_deriv, acc_safe_deriv] weight_loss = [ config.WEIGHT_DECAY * tf.nn.l2_loss(v) for v in tf.trainable_variables() ] loss = 10 * tf.math.add_n(loss_list + weight_loss) return s, g, a, loss_list, loss, acc_list