def solve(x_0, covMatrix, bounds): global covMatrix_, n_grad n_grad = 0 covMatrix_ = covMatrix inf = 1.0e20 n = len(x_0) snopt = SNOPT_solver() snopt.setOption("Verbose", False) snopt.setOption("Specs file","/home/federico/workspace/TestMosek/algorithms/specs.spec") xlow = np.array([bounds[0]]*n) xupp = np.array([bounds[1]]*n) Flow = np.array([0.0, 1.0]) Fupp = np.array([inf, 1.0]) ObjRow = 1 A = np.array([ [0]*n, [1]*n ]) G = np.array([ [1]*n, [0]*n ]) [exe_time, iterations] = snopt.snopta(x0=x_0,xlow=xlow,xupp=xupp, Flow=Flow,Fupp=Fupp, ObjRow=ObjRow, A=A, G=G, usrfun=objFG) return exe_time, snopt.x, iterations
xp = xp.values [xm, ym] = inter_min(xp, inter_par) return xm, ym, cse # p=interpolate_val(x,inter_par); # e=R2-(x-xc)'*(x-xc); # M=-e/(p-y0); # if p<y0 # M=-inf; # end inf = 1.0e20 snopt = SNOPT_solver() snopt.setOption('Verbose', True) snopt.setOption('Solution print', True) snopt.setOption('Print file', 'sntoya.out') # Either dtype works, but the names for x and F have to be of # the correct length, else they are both ignored by SNOPT: xNames = np.array([' x0', ' x1']) FNames = np.array([' F0', ' F1', ' F2'], dtype='c') x0 = np.array([1.0, 1.0]) xlow = np.array([0.0, -inf]) xupp = np.array([inf, inf])
def optimize_blending_function_between_two_distance_sigmas(sigmaA, sigmaB, personA, personB, min_distA, min_distB, params, constrain_at_endpoints=False): ''' This function finds a blend between two trajectories that respect two minimum distance constraints. ''' # Some important parameters here nsamples = params['nsamples'] if 'nsamples' in params else 50 ndims = 6 dt = 0.01 xdot5_limit = 0.001 inf = 1.0e20 lambda_snap = 1#(1/dt)**4 # snap must be scaled down to be comparable to position. lambda_pos = 1 # A few derived quantities nvars = ndims*nsamples nconstraints_continuity = (ndims-1)*nsamples nconstraints_obstacles = 2*nsamples nconstraints = 1 + nconstraints_continuity + nconstraints_obstacles # Solver configuration snopt = SNOPT_solver() snopt.setOption('Verbose',False) snopt.setOption('Solution print',False) snopt.setOption('Print file','test5.out') snopt.setOption('Iteration limit',8000) snopt.setOption('Print level',3) snopt.setOption('Major optimality',2e-6) snopt.setOption('Verify level',3) # Turn to 3 to carefully check gradiants # 1. Set up decision variables x = np.array([0.5]*nsamples) # Initialize to 0.5 xdot1 = np.array([0.0]*nsamples) xdot2 = np.array([0.0]*nsamples) xdot3 = np.array([0.0]*nsamples) xdot4 = np.array([0.0]*nsamples) v = np.array([0.0]*nsamples) # C4 Continuity Control Variable x0 = np.matrix(np.c_[x,xdot1,xdot2,xdot3,xdot4,v]).A1 # Interleave [x[0],xdot1[0],xdot2[0]...] # 2. Set up the bounds on x low_x = np.array([ 0.0] *nsamples) # X must be greater or equal to 0 low_xdot1 = np.array([ -inf]*nsamples) low_xdot2 = np.array([ -inf]*nsamples) low_xdot3 = np.array([ -inf]*nsamples) low_xdot4 = np.array([ -inf]*nsamples) low_v = np.array([ -xdot5_limit]*nsamples) # Bound control variable arbitrarily if constrain_at_endpoints: low_x[0] = 0.5 low_x[nsamples-1] = 0.5 xlow = np.matrix(np.c_[low_x,low_xdot1,low_xdot2,low_xdot3,low_xdot4,low_v]).A1 # Interleave [x[0],xdot1[0],xdot2[0]...] upp_x = np.array([ 1.0] *nsamples) # X must be greater or equal to 0 upp_xdot1 = np.array([ inf]*nsamples) upp_xdot2 = np.array([ inf]*nsamples) upp_xdot3 = np.array([ inf]*nsamples) upp_xdot4 = np.array([ inf]*nsamples) upp_v = np.array([ xdot5_limit]*nsamples) # Bound control variable arbitrarily if constrain_at_endpoints: upp_x[0] = 0.5 upp_x[nsamples-1] = 0.5 xupp = np.matrix(np.c_[upp_x,upp_xdot1,upp_xdot2,upp_xdot3,upp_xdot4,upp_v]).A1 # Interleave [x[0],xdot1[0],xdot2[0]...] # 3. Set up the objective function M = np.array([ [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], [0,0,0,0,1], [0,0,0,0,0] ]) N = np.array([0,0,0,0,1]) def grad_function(xM, compute_nonzero_only=False, compute_linear=False): G = np.zeros((nconstraints, nvars)) # Set up the jacobian structure of the cost function. # This only impacts the w_i and wdot4_i variables obj_col = G[0,:] if not compute_nonzero_only: obj_col[::6] = 2*dt*lambda_pos*(xM[:,0] - 0.5) obj_col[4::6] = 2*dt*lambda_snap*xM[:,4] elif not compute_linear: obj_col[::6] = 1 obj_col[4::6] = 1 if compute_linear: # The C4 continuity constraint is linear stupidcounter = 0 add_to_fi = 0 for fi in range(1,nconstraints_continuity-5): # Looping over the objective function fi_row = G[fi,:] fi += add_to_fi fi_row[fi-1] = 1 fi_row[fi] = dt fi_row[fi+5] = -1 stupidcounter += 1 if stupidcounter == 5: add_to_fi += 1 stupidcounter = 0 return G def calc_obj(xM): # our objective is the sum of # the L2 norm of our position error away from 0.5 # the L2 norm of our 4th derivative error away from 0 obj_pos = dt * np.sum( (xM[:,0] - 0.5)**2) obj_snap = dt * np.sum( (xM[:,4] )**2) objective = lambda_pos * obj_pos + lambda_snap * obj_snap return (objective, obj_pos, obj_snap) def calc_obstacle_constraints(xM): blend = xM[:,0] sigmaBlended = (blend[:,np.newaxis]*sigmaA + (1-blend)[:,np.newaxis]*sigmaB) constraintA = la.norm(sigmaBlended - personA, axis=1) - min_distA constraintB = la.norm(sigmaBlended - personB, axis=1) - min_distB return np.r_[constraintA, constraintB] def blend_test3_objFG(status,x,needF,needG,cu,iu,ru): xM = x.reshape(nsamples,ndims) objective, obj_pos, obj_snap = calc_obj(xM) # Evaluate the current continuity constraints continuity_x = np.zeros((nsamples, 5)) for i in range(nsamples-1): si = xM[i,0:5] vi = xM[i,5 ] si1 = xM[i+1,0:5] continuity_x[i] = si + (M.dot(si) + N.dot(vi))*dt - si1 continuity_x = np.matrix(continuity_x).A1 obstacles = calc_obstacle_constraints(xM) F = np.concatenate( ([objective], continuity_x, obstacles)) #G = grad_function(xM) return status, F#, G[G_nonzero_inds] # 4. Set up bounds on F # [ objectivec can be anything, equal-to-zero for continuity, greater-than-0 for obstacles along traj] low_F = np.concatenate(([-inf], np.array([0,0,0,0,0]*nsamples), [0 , 0]*nsamples)) upp_F = np.concatenate(([ inf], np.array([0,0,0,0,0]*nsamples), [inf, inf]*nsamples)) # Matrix uses fortran numbering or something ObjRow = 1 # Set up the linear and nonlinear structure of the jacobian matrix xM = x0.reshape(nsamples,ndims) G = grad_function(xM,compute_nonzero_only=True, compute_linear=False) G_nonzero_inds = G.nonzero() A = grad_function(xM,compute_nonzero_only=True, compute_linear=True) # Now we solve a = time.time() snopt.snopta(name='blend_test3',usrfun=blend_test3_objFG,x0=x0,xlow=xlow,xupp=xupp, Flow=low_F,Fupp=upp_F,ObjRow=ObjRow) b = time.time() print "Solved in %.4fs" % (b - a) print "Value of objective function: %.8f" % snopt.F[0] print " lambda_pos: %f, lambda_snap: %f, " % (lambda_pos, lambda_snap) print " objective: %f, obj_pos: %f, obj_snap: %f" % calc_obj(xM) xM = snopt.x.reshape(nsamples, ndims) return (xM, snopt)
# Nonlinear objective term only fObj = 0.0 if mode == 0 or mode == 2: fObj = sum**2 gObj = np.zeros(nnObj,float) if mode == 1 or mode == 2: gObj[0] = 2.0*sum gObj[1] = 2.0*sum gObj[2] = 2.0*sum return mode, fObj, gObj snoptb = SNOPT_solver() inf = 1.0e+20 snoptb.setOption('Infinite bound',inf) snoptb.setOption('Print file','sntoyb.out') m = 4 n = 4 nnCon = 2 nnJac = 2 nnObj = 3 # J contains the sparsity pattern of the Jacobian matrix. # For nonlinear elements, enter any nonzero number (in this case 100). # Linear elements must be correctly defined.
def optimize(p_eval,psi_eval, \ t_nominal,user_progress_nominal,dt_nominal, \ const_vals_ti, \ x_min_ti,x_max_ti, \ u_min_ti,u_max_ti): assert allclose(psi_eval, 0.0) print "flashlight.trajectory_optimization.quadrotor3d_direct_transcription_nonconst_dt: Initializing optimization problem..." sys_time_begin = time.time() solver_time_begin = sys_time_begin # # find numerically stable and feasible trajectories to initialize the solver # numerically_stable_infeasible_trajectory = quadrotor3d_gaussian_time_stretch.optimize_numerically_stable_infeasible( p_eval,psi_eval, \ t_nominal,user_progress_nominal,dt_nominal, \ x_min_ti,x_max_ti, \ u_min_ti,u_max_ti, \ max_stretch_iters_numerically_stable, \ gauss_width_in_terms_of_dt_numerically_stable, \ gauss_max_in_terms_of_dt_numerically_stable, \ 0 ) x_numerically_stable, u_numerically_stable, t_numerically_stable, user_progress_numerically_stable, dt_numerically_stable = numerically_stable_infeasible_trajectory if use_gaussian_time_stretching_for_feasible: # use gaussian time stretching to find a feasible trajectory feasible_trajectory = quadrotor3d_gaussian_time_stretch.optimize_feasible( p_eval,psi_eval, \ t_numerically_stable,user_progress_numerically_stable,dt_numerically_stable, \ x_min_ti,x_max_ti, \ u_min_ti,u_max_ti, \ max_stretch_iters_feasible, \ gauss_width_in_terms_of_dt_feasible, \ gauss_max_in_terms_of_dt_feasible, \ extra_iters_feasible ) x_feasible, u_feasible, t_feasible, user_progress_feasible, dt_feasible = feasible_trajectory else: # use uniform time stretching to find a feasible trajectory p_nominal, _, _, _ = curveutils.reparameterize_curve( p_eval, user_progress_nominal) psi_nominal, _, _, _ = curveutils.reparameterize_curve( psi_eval, user_progress_nominal) feasible_trajectory = quadrotor3d_uniform_time_stretch.optimize_feasible( p_nominal,psi_nominal,dt_nominal, \ x_min_ti,x_max_ti, \ u_min_ti,u_max_ti, \ max_bin_search_iters_feasible, \ dt_upper_init_feasible ) x_feasible, u_feasible, dt_scale_feasible = feasible_trajectory t_feasible = t_nominal * dt_scale_feasible * dt_scale_extra_stretch_feasible user_progress_feasible = user_progress_nominal dt_feasible = dt_nominal * dt_scale_feasible * dt_scale_extra_stretch_feasible # return user_progress_numerically_stable,None,None,None,None,t_numerically_stable,t_numerically_stable[-1] # return user_progress_feasible,None,None,None,None,t_feasible,t_feasible[-1] sys_time_end = time.time() print "flashlight.optimize.quadrotor3d_fixed_path: Finished initializing optimization problem (%.03f seconds)." % ( sys_time_end - sys_time_begin) # # set up optimization problem constants # num_trajectory_samples = p_eval.shape[0] num_x_dims = quadrotor3d.num_x_dims num_u_dims = quadrotor3d.num_u_dims num_dt_dims = 1 num_alpha_dims = num_x_dims + num_u_dims + num_dt_dims x_p_inds = arange(0, 3) x_e_inds = arange(3, 6) num_x_p_inds = x_p_inds.size # soft control effort constraints lamb_J_control_effort = 0.0 * ones(num_trajectory_samples) # soft position waypoint constraints num_dims_J_x_p_waypoint_ref_ti = 3 lamb_J_x_p_waypoint = 0.01 * ones(num_trajectory_samples) J_x_p_waypoint_ref = x_numerically_stable[:, 0:3] # soft dt constraints num_dims_J_dt_ref_ti = 1 lamb_J_dt = 0.0001 * ones(num_trajectory_samples) J_dt_ref = dt_numerically_stable * ones(num_trajectory_samples) # hard dynamics constraints num_dims_g_dynamics_ti = num_x_dims # hard state space waypoint constraints num_dims_g_x_waypoint_ti = num_x_dims num_dims_x_waypoint_ref_ti = num_x_dims lamb_g_x_waypoint = zeros(num_trajectory_samples) lamb_g_x_waypoint[[0, -1]] = 1 X_waypoint_ref = zeros( (num_trajectory_samples, num_dims_x_waypoint_ref_ti)) X_waypoint_ref[0] = array([ p_eval[0, 0], p_eval[0, 1], p_eval[0, 2], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ]) X_waypoint_ref[-1] = array([ p_eval[-1, 0], p_eval[-1, 1], p_eval[-1, 2], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ]) lamb_g_x_waypoint_nonzero = nonzero(lamb_g_x_waypoint)[0] num_lamb_g_x_waypoint_nonzero = len(lamb_g_x_waypoint_nonzero) lamb_g_x_waypoint_ti_to_ti_sparse = -1 * ones_like(lamb_g_x_waypoint, dtype=int32) lamb_g_x_waypoint_ti_to_ti_sparse[lamb_g_x_waypoint_nonzero] = arange( num_lamb_g_x_waypoint_nonzero) # hard position waypoint constraints num_dims_g_x_p_waypoint_ti = num_x_p_inds num_dims_x_p_waypoint_ref_ti = num_x_p_inds lamb_g_x_p_waypoint = zeros(num_trajectory_samples) X_p_waypoint_ref = zeros( (num_trajectory_samples, num_dims_x_p_waypoint_ref_ti)) lamb_g_x_p_waypoint_nonzero = nonzero(lamb_g_x_p_waypoint)[0] num_lamb_g_x_p_waypoint_nonzero = len(lamb_g_x_p_waypoint_nonzero) lamb_g_x_p_waypoint_ti_to_ti_sparse = -1 * ones_like(lamb_g_x_p_waypoint, dtype=int32) lamb_g_x_p_waypoint_ti_to_ti_sparse[lamb_g_x_p_waypoint_nonzero] = arange( num_lamb_g_x_p_waypoint_nonzero) # hard dt constraints num_dims_g_dt_ti = 1 num_dims_dt_ref_ti = 1 lamb_g_dt = zeros(num_trajectory_samples) dt_ref = zeros(num_trajectory_samples) lamb_g_dt_nonzero = nonzero(lamb_g_dt)[0] num_lamb_g_dt_nonzero = len(lamb_g_dt_nonzero) lamb_g_dt_ti_to_ti_sparse = -1 * ones_like(lamb_g_dt, dtype=int32) lamb_g_dt_ti_to_ti_sparse[lamb_g_dt_nonzero] = arange( num_lamb_g_dt_nonzero) dt_min_ti = dt_numerically_stable * 0.45 dt_max_ti = dt_feasible * 1.55 # stack all the const, lamb, and ref values const_vals = tile(const_vals_ti, (num_trajectory_samples, 1)) lamb_vals = c_[lamb_J_control_effort, lamb_J_x_p_waypoint, lamb_J_dt, lamb_g_x_waypoint, lamb_g_x_p_waypoint, lamb_g_dt] ref_vals = c_[J_x_p_waypoint_ref, J_dt_ref, X_waypoint_ref, X_p_waypoint_ref, dt_ref] # number of constraints and decision variables num_constraints_g_dynamics = num_trajectory_samples - 1 num_dims_g_dynamics_ti = num_x_dims num_constraints_1d_g_dynamics = num_constraints_g_dynamics * num_dims_g_dynamics_ti num_constraints_1d_g_x_waypoint = num_lamb_g_x_waypoint_nonzero * num_dims_g_x_waypoint_ti num_constraints_1d_g_x_p_waypoint = num_lamb_g_x_p_waypoint_nonzero * num_dims_g_x_p_waypoint_ti num_constraints_1d_g_dt = num_lamb_g_dt_nonzero * num_dims_g_dt_ti num_decision_vars_1d_X = num_trajectory_samples * num_x_dims num_decision_vars_1d_U = num_trajectory_samples * num_u_dims num_decision_vars_1d_DT = num_trajectory_samples * num_dt_dims def _unpack_Alpha_1d(Alpha_1d): X_1d_begin, X_1d_end = 0, 0 + num_trajectory_samples * num_x_dims U_1d_begin, U_1d_end = X_1d_end, X_1d_end + num_trajectory_samples * num_u_dims DT_1d_begin, DT_1d_end = U_1d_end, U_1d_end + num_trajectory_samples * num_dt_dims X_1d = Alpha_1d[X_1d_begin:X_1d_end] U_1d = Alpha_1d[U_1d_begin:U_1d_end] DT_1d = Alpha_1d[DT_1d_begin:DT_1d_end] X = X_1d.reshape((num_trajectory_samples, num_x_dims)) U = U_1d.reshape((num_trajectory_samples, num_u_dims)) DT = DT_1d.reshape((num_trajectory_samples, num_dt_dims)) return X, U, DT def _compute_common_vals(ti, X, U, DT): lamb_J_control_effort_ti = lamb_J_control_effort[ti] lamb_J_x_p_waypoint_ti = lamb_J_x_p_waypoint[ti] lamb_J_dt_ti = lamb_J_dt[ti] lamb_g_x_waypoint_ti = lamb_g_x_waypoint[ti] lamb_g_x_p_waypoint_ti = lamb_g_x_p_waypoint[ti] lamb_g_dt_ti = lamb_g_dt[ti] J_x_p_waypoint_ref_ti = matrix(J_x_p_waypoint_ref[ti]).T J_dt_ref_ti = J_dt_ref[ti] x_waypoint_ref_ti = matrix(X_waypoint_ref[ti]).T x_p_waypoint_ref_ti = matrix(X_p_waypoint_ref[ti]).T dt_ref_ti = dt_ref[ti] x_ti = matrix(X[ti]).T u_ti = matrix(U[ti]).T dt_ti = DT[ti] lamb_vals_ti = hstack([ lamb_J_control_effort_ti, lamb_J_x_p_waypoint_ti, lamb_J_dt_ti, lamb_g_x_waypoint_ti, lamb_g_x_p_waypoint_ti, lamb_g_dt_ti ]) ref_vals_ti = hstack([ matrix(J_x_p_waypoint_ref_ti).A1, J_dt_ref_ti, matrix(x_waypoint_ref_ti).A1, matrix(x_p_waypoint_ref_ti).A1, dt_ref_ti ]) var_vals_ti = hstack([x_ti.A1, u_ti.A1, dt_ti]) common_vals_ti = hstack([lamb_vals_ti, ref_vals_ti, var_vals_ti]) return common_vals_ti def _compute_sparse_jacobian_indices(ti, ti_to_ti_sparse, num_dims_gi): ti_sparse = ti_to_ti_sparse[ti] gi_begin = (ti_sparse + 0) * num_dims_gi gi_end = (ti_sparse + 1) * num_dims_gi xi_begin = (ti + 0) * num_x_dims xi_end = (ti + 1) * num_x_dims ui_begin = (ti + 0) * num_u_dims ui_end = (ti + 1) * num_u_dims dti_begin = (ti + 0) * num_dt_dims dti_end = (ti + 1) * num_dt_dims return gi_begin, gi_end, xi_begin, xi_end, ui_begin, ui_end, dti_begin, dti_end # Define objective function def _obj_func(Alpha_1d): global snopt_major_iter_count global snopt_obj_vals X, U, DT = _unpack_Alpha_1d(Alpha_1d) common_vals = c_[lamb_vals, ref_vals, X, U, DT] const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals = c_[ const_vals[:-1], X[:-1], X[1:], U[:-1], DT[:-1]] J_ti = J_ti_vectorized_autowrap(common_vals) g_dynamics = quadrotor3d.g_dynamics_ti_vectorized_autowrap( const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals) g_x_waypoint = zeros( (num_lamb_g_x_waypoint_nonzero, num_dims_g_x_waypoint_ti)) g_x_p_waypoint = zeros( (num_lamb_g_x_p_waypoint_nonzero, num_dims_g_x_p_waypoint_ti)) g_dt = zeros((num_lamb_g_dt_nonzero, num_dims_g_dt_ti)) for ti in range(num_trajectory_samples): common_vals_ti = _compute_common_vals(ti, X, U, DT) lamb_g_x_waypoint_ti = lamb_g_x_waypoint[ti] lamb_g_x_p_waypoint_ti = lamb_g_x_p_waypoint[ti] lamb_g_dt_ti = lamb_g_dt[ti] if lamb_g_x_waypoint_ti != 0: g_x_waypoint[lamb_g_x_waypoint_ti_to_ti_sparse[ ti]] = sympyutils.evaluate_anon_func( g_x_waypoint_ti_autowrap, common_vals_ti).T if lamb_g_x_p_waypoint_ti != 0: g_x_p_waypoint[lamb_g_x_p_waypoint_ti_to_ti_sparse[ ti]] = sympyutils.evaluate_anon_func( g_x_p_waypoint_ti_autowrap, common_vals_ti).T if lamb_g_dt_ti != 0: g_dt[lamb_g_dt_ti_to_ti_sparse[ ti]] = sympyutils.evaluate_anon_func( g_dt_ti_autowrap, common_vals_ti) J = sum(J_ti) g_1d = hstack([ matrix(g_dynamics).A1, matrix(g_x_waypoint).A1, matrix(g_x_p_waypoint).A1, matrix(g_dt).A1 ]) snopt_obj_vals[snopt_major_iter_count, 0] = J snopt_obj_vals[snopt_major_iter_count, 1] = sum(norm(g_dynamics, axis=1)) snopt_major_iter_count = snopt_major_iter_count + 1 set_printoptions(suppress=True) print "SNOPT major iteration: %d, Objective value: %f, Total g_dynamics error: %f" % ( snopt_major_iter_count, J, sum(square(g_dynamics))) fail = 0 return J, g_1d, fail # Define gradient function def _grad_func(Alpha_1d, J, g_1d, compute_nonzero_only=False): X, U, DT = _unpack_Alpha_1d(Alpha_1d) dJ_dX = zeros((num_trajectory_samples, num_x_dims)) dJ_dU = zeros((num_trajectory_samples, num_u_dims)) dJ_dDT = zeros((num_trajectory_samples, num_dt_dims)) dgdynamics_dX = zeros( (num_constraints_1d_g_dynamics, num_decision_vars_1d_X)) dgdynamics_dU = zeros( (num_constraints_1d_g_dynamics, num_decision_vars_1d_U)) dgdynamics_dDT = zeros( (num_constraints_1d_g_dynamics, num_decision_vars_1d_DT)) dgxwaypoint_dX = zeros( (num_constraints_1d_g_x_waypoint, num_decision_vars_1d_X)) dgxwaypoint_dU = zeros( (num_constraints_1d_g_x_waypoint, num_decision_vars_1d_U)) dgxwaypoint_dDT = zeros( (num_constraints_1d_g_x_waypoint, num_decision_vars_1d_DT)) dgxpwaypoint_dX = zeros( (num_constraints_1d_g_x_p_waypoint, num_decision_vars_1d_X)) dgxpwaypoint_dU = zeros( (num_constraints_1d_g_x_p_waypoint, num_decision_vars_1d_U)) dgxpwaypoint_dDT = zeros( (num_constraints_1d_g_x_p_waypoint, num_decision_vars_1d_DT)) dgdt_dX = zeros((num_constraints_1d_g_dt, num_decision_vars_1d_X)) dgdt_dU = zeros((num_constraints_1d_g_dt, num_decision_vars_1d_U)) dgdt_dDT = zeros((num_constraints_1d_g_dt, num_decision_vars_1d_DT)) if not compute_nonzero_only: common_vals = c_[lamb_vals, ref_vals, X, U, DT] const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals = c_[ const_vals[:-1], X[:-1], X[1:], U[:-1], DT[:-1]] dJ_dX = dJti_dxti_vectorized_autowrap(common_vals) dJ_dU = dJti_duti_vectorized_autowrap(common_vals) dJ_dDT = dJti_ddtti_vectorized_autowrap(common_vals) dgdynamics_dX_current_block = quadrotor3d.dgdynamicsti_dxcurrent_vectorized_autowrap( const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals) dgdynamics_dX_next_block = quadrotor3d.dgdynamicsti_dxnext_vectorized_autowrap( const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals) dgdynamics_dU_current_block = quadrotor3d.dgdynamicsti_ducurrent_vectorized_autowrap( const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals) dgdynamics_dDT_current_block = quadrotor3d.dgdynamicsti_ddtcurrent_vectorized_autowrap( const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals) for ti in range(num_trajectory_samples): if compute_nonzero_only: dJ_dX[ti] = 1 dJ_dU[ti] = 1 dJ_dDT[ti] = 1 for ti in range(num_constraints_g_dynamics): gi_begin = (ti + 0) * num_dims_g_dynamics_ti gi_end = (ti + 1) * num_dims_g_dynamics_ti ai_x_current_begin = (ti + 0) * num_x_dims ai_x_current_end = (ti + 1) * num_x_dims ai_x_next_begin = (ti + 1) * num_x_dims ai_x_next_end = (ti + 2) * num_x_dims ai_u_current_begin = (ti + 0) * num_u_dims ai_u_current_end = (ti + 1) * num_u_dims ai_dt_current_begin = (ti + 0) * num_dt_dims ai_dt_current_end = (ti + 1) * num_dt_dims if compute_nonzero_only: dgdynamics_dX[gi_begin:gi_end, ai_x_current_begin:ai_x_current_end] = 1 dgdynamics_dX[gi_begin:gi_end, ai_x_next_begin:ai_x_next_end] = 1 dgdynamics_dU[gi_begin:gi_end, ai_u_current_begin:ai_u_current_end] = 1 dgdynamics_dDT[gi_begin:gi_end, ai_dt_current_begin:ai_dt_current_end] = 1 else: dgdynamics_dX[ gi_begin:gi_end, ai_x_current_begin: ai_x_current_end] = dgdynamics_dX_current_block[ti] dgdynamics_dX[gi_begin:gi_end, ai_x_next_begin: ai_x_next_end] = dgdynamics_dX_next_block[ti] dgdynamics_dU[ gi_begin:gi_end, ai_u_current_begin: ai_u_current_end] = dgdynamics_dU_current_block[ti] dgdynamics_dDT[gi_begin:gi_end, ai_dt_current_begin:ai_dt_current_end] = matrix( dgdynamics_dDT_current_block[ti]).T for ti in range(num_trajectory_samples): common_vals_ti = _compute_common_vals(ti, X, U, DT) lamb_g_x_waypoint_ti = lamb_g_x_waypoint[ti] lamb_g_x_p_waypoint_ti = lamb_g_x_p_waypoint[ti] lamb_g_dt_ti = lamb_g_dt[ti] if lamb_g_x_waypoint_ti != 0: gi_begin, gi_end, xi_begin, xi_end, ui_begin, ui_end, li_begin, li_end = _compute_sparse_jacobian_indices( ti, lamb_g_x_waypoint_ti_to_ti_sparse, num_dims_g_x_waypoint_ti) dgxwaypoint_dX[ gi_begin:gi_end, xi_begin:xi_end] = sympyutils.evaluate_anon_func( dgxwaypointti_dxti_autowrap, common_vals_ti) if lamb_g_x_p_waypoint_ti != 0: gi_begin, gi_end, xi_begin, xi_end, ui_begin, ui_end, li_begin, li_end = _compute_sparse_jacobian_indices( ti, lamb_g_x_p_waypoint_ti_to_ti_sparse, num_dims_g_x_p_waypoint_ti) dgxpwaypoint_dX[ gi_begin:gi_end, xi_begin:xi_end] = sympyutils.evaluate_anon_func( dgxpwaypointti_dxti_autowrap, common_vals_ti) if lamb_g_dt_ti != 0: gi_begin, gi_end, xi_begin, xi_end, ui_begin, ui_end, dti_begin, dti_end = _compute_sparse_jacobian_indices( ti, lamb_g_dt_ti_to_ti_sparse, num_dims_g_dt_ti) dgdt_dDT[gi_begin:gi_end, dti_begin:dti_end] = sympyutils.evaluate_anon_func( dgdtti_ddtti_autowrap, common_vals_ti) dJ_dAlpha_1d = hstack( [matrix(dJ_dX).A1, matrix(dJ_dU).A1, matrix(dJ_dDT).A1]) dgdynamics_dAlpha = c_[dgdynamics_dX, dgdynamics_dU, dgdynamics_dDT] dgxwaypoint_dAlpha = c_[dgxwaypoint_dX, dgxwaypoint_dU, dgxwaypoint_dDT] dgxpwaypoint_dAlpha = c_[dgxpwaypoint_dX, dgxpwaypoint_dU, dgxpwaypoint_dDT] dgdt_dAlpha = c_[dgdt_dX, dgdt_dU, dgdt_dDT] dg_dAlpha = r_[dgdynamics_dAlpha, dgxwaypoint_dAlpha, dgxpwaypoint_dAlpha, dgdt_dAlpha] fail = 0 return matrix(dJ_dAlpha_1d).A, dg_dAlpha, fail def _obj_grad_func(status, Alpha_1d, needF, needG, cu, iu, ru): J, g_1d, fail = _obj_func(Alpha_1d) dJ_dAlpha_1d, dg_dAlpha, fail = _grad_func(Alpha_1d, J, g_1d) J_g_1d = hstack([J, g_1d, snopt_dummy_val]) dJ_dAlpha_dg_dAlpha = r_[dJ_dAlpha_1d, dg_dAlpha] dJ_dAlpha_dg_dAlpha_nonzero_vals = dJ_dAlpha_dg_dAlpha[ dJ_dAlpha_dg_dAlpha_nonzero_inds] return status, J_g_1d, dJ_dAlpha_dg_dAlpha_nonzero_vals inf = 1.0e20 snopt = SNOPT_solver() snopt.setOption('Verbose', False) snopt.setOption('Solution print', False) snopt.setOption('Major print level', 0) snopt.setOption('Print level', 0) snopt_obj_row = 1 snopt_num_funcs_1d = num_constraints_1d_g_dynamics + num_constraints_1d_g_x_waypoint + num_constraints_1d_g_x_p_waypoint + num_constraints_1d_g_dt + 1 snopt_num_vars_1d = num_decision_vars_1d_X + num_decision_vars_1d_U + num_decision_vars_1d_DT snopt_dummy_val = 0.0 snopt_dummy_array = zeros((1, snopt_num_vars_1d)) global snopt_major_iter_count global snopt_obj_vals snopt_major_iter_count = 0 snopt_obj_vals = -1 * ones((10000, 2)) X_min = tile(x_min_ti.A1, (1, num_trajectory_samples)) X_max = tile(x_max_ti.A1, (1, num_trajectory_samples)) U_min = tile(u_min_ti.A1, (1, num_trajectory_samples)) U_max = tile(u_max_ti.A1, (1, num_trajectory_samples)) DT_min = tile(dt_min_ti, (1, num_trajectory_samples)) DT_max = tile(dt_max_ti, (1, num_trajectory_samples)) Alpha_min = hstack([matrix(X_min).A1, matrix(U_min).A1, matrix(DT_min).A1]) Alpha_max = hstack([matrix(X_max).A1, matrix(U_max).A1, matrix(DT_max).A1]) X_0 = x_feasible U_0 = u_feasible DT_0 = dt_feasible * ones(num_trajectory_samples) Alpha_0 = hstack([matrix(X_0).A1, matrix(U_0).A1, matrix(DT_0).A1]) print "flashlight.trajectory_optimization.quadrotor3d_fixed_path: Calculating objective value on initial guess..." _obj_func(Alpha_0) J_g_1d_min = hstack([ -inf, zeros(num_constraints_1d_g_dynamics), zeros(num_constraints_1d_g_x_waypoint), zeros(num_constraints_1d_g_x_p_waypoint), zeros(num_constraints_1d_g_dt), snopt_dummy_val ]) J_g_1d_max = hstack([ inf, zeros(num_constraints_1d_g_dynamics), zeros(num_constraints_1d_g_x_waypoint), zeros(num_constraints_1d_g_x_p_waypoint), zeros(num_constraints_1d_g_dt), snopt_dummy_val ]) dJ_dAlpha_dg_dAlpha_const = r_[zeros( (snopt_num_funcs_1d, snopt_num_vars_1d)), snopt_dummy_array] dJ_dAlpha_dg_dAlpha_const[-1, 0] = 10e-9 dJ_dAlpha_nonzero, dg_dAlpha_nonzero, fail = _grad_func( Alpha_0, J=None, g_1d=None, compute_nonzero_only=True) dJ_dAlpha_dg_dAlpha_nonzero = r_[dJ_dAlpha_nonzero, dg_dAlpha_nonzero, snopt_dummy_array] dJ_dAlpha_dg_dAlpha_nonzero_inds = dJ_dAlpha_dg_dAlpha_nonzero.nonzero() print "flashlight.trajectory_optimization.quadrotor3d_fixed_path: Solving optimization problem..." sys_time_begin = time.time() snopt.snopta(name="quadrotor_3d_fixed_path_optimization_test", usrfun=_obj_grad_func, x0=Alpha_0, xlow=Alpha_min, xupp=Alpha_max, Flow=J_g_1d_min, Fupp=J_g_1d_max, ObjRow=snopt_obj_row, A=dJ_dAlpha_dg_dAlpha_const, G=dJ_dAlpha_dg_dAlpha_nonzero, xnames=None, Fnames=None) sys_time_end = time.time() print "flashlight.trajectory_optimization.quadrotor3d_fixed_path: Finished solving optimization problem (%.03f seconds)." % ( sys_time_end - sys_time_begin) solver_time_end = sys_time_end solver_time = solver_time_end - solver_time_begin print "flashlight.trajectory_optimization.quadrotor3d_fixed_path: Total solver time was %.03f seconds." % solver_time Alpha_opt_1d = snopt.x X_opt, U_opt, DT_opt = _unpack_Alpha_1d(Alpha_opt_1d) dt_opt_cumsum = cumsum(DT_opt[:-1]) t_opt = hstack( [t_numerically_stable[0], t_numerically_stable[0] + dt_opt_cumsum]) T_final_opt = dt_opt_cumsum[-1] return X_opt, U_opt, DT_opt, t_opt, T_final_opt, solver_time, snopt_obj_vals
sum = x[0] + x[1] + x[2] # Nonlinear objective term only fObj = 0.0 if mode == 0 or mode == 2: fObj = sum**2 gObj = np.zeros(nnObj, float) if mode == 1 or mode == 2: gObj[0] = 2.0 * sum gObj[1] = 2.0 * sum gObj[2] = 2.0 * sum return mode, fObj, gObj snoptb = SNOPT_solver() inf = 1.0e+20 snoptb.setOption('Infinite bound', inf) snoptb.setOption('Print file', 'sntoyb.out') m = 4 n = 4 nnCon = 2 nnJac = 2 nnObj = 3 # J contains the sparsity pattern of the Jacobian matrix. # For nonlinear elements, enter any nonzero number (in this case 100). # Linear elements must be correctly defined.
""" import numpy as np import scipy.sparse as sp from optimize.snopt7 import SNOPT_solver def dieta_fun(status, x, needF, needG, cu, iu, ru): # LP has no nonlinear terms in the objective F = [] G = [] return status, F, G inf = 1.0e20 snopt = SNOPT_solver() snopt.setOption('Print file', 'dieta.out') snopt.setOption('Minor print level', 1) snopt.setOption('Summary frequency', 1) snopt.setOption('Print frequency', 1) nF = 4 ObjRow = 4 n = 6 # We provide the linear components of the Jacobian # matrix as a dense matrix. A = np.array([[110, 205, 160, 160, 420, 260], [4, 32, 13, 8, 4, 14], [2, 12, 54, 285, 22, 80], [3, 24, 13, 9, 20, 19]], float)
gObj[0] = x[6] gObj[1] = - x[5] gObj[2] = - x[6] + x[7] gObj[3] = x[8] gObj[4] = - x[7] gObj[5] = - x[1] gObj[6] = - x[2] + x[0] gObj[7] = - x[4] + x[2] gObj[8] = x[3] return mode, fObj, gObj inf = 1.0e+20 snoptb = SNOPT_solver() snoptb.setOption('Infinite bound',inf) snoptb.setOption('Specs file','snmainb.spc') snoptb.setOption('Print file','snmainb.out') m = 18 n = 9 nnCon = 14 ne = 52 nnJac = n nnObj = n bl = -inf*np.ones(n+m) bu = inf*np.ones(n+m) # Nonlinear constraints
return status, F def sntoya_objFG(status,x,needF,needG,cu,iu,ru): F = np.array([ x[1], # objective row x[0]**2 + 4.0*x[1]**2, (x[0] - 2.0)**2 + x[1]**2 ]) G = np.array([ 2*x[0], 8*x[1], 2*(x[0]-2), 2*x[1] ]) return status, F, G inf = 1.0e20 snopt = SNOPT_solver() snopt.setOption('Verbose',True) snopt.setOption('Solution print',True) snopt.setOption('Print file','sntoya.out') # Either dtype works, but the names for x and F have to be of # the correct length, else they are both ignored by SNOPT: xNames = np.array([ ' x0', ' x1' ]) FNames = np.array([ ' F0', ' F1', ' F2' ],dtype='c') x0 = np.array([ 1.0, 1.0 ]) xlow = np.array([ 0.0, -inf]) xupp = np.array([ inf, inf])
gObj[0] = x[6] gObj[1] = -x[5] gObj[2] = -x[6] + x[7] gObj[3] = x[8] gObj[4] = -x[7] gObj[5] = -x[1] gObj[6] = -x[2] + x[0] gObj[7] = -x[4] + x[2] gObj[8] = x[3] return mode, fObj, gObj inf = 1.0e+20 snoptb = SNOPT_solver() snoptb.setOption('Infinite bound', inf) snoptb.setOption('Specs file', 'snmainb.spc') snoptb.setOption('Print file', 'snmainb.out') m = 18 n = 9 nnCon = 14 ne = 52 nnJac = n nnObj = n bl = -inf * np.ones(n + m) bu = inf * np.ones(n + m) # Nonlinear constraints
def optimize_blending_function_between_two_distance_sigmas( sigmaA, sigmaB, personA, personB, min_distA, min_distB, params, constrain_at_endpoints=False): ''' This function finds a blend between two trajectories that respect two minimum distance constraints. ''' # Some important parameters here nsamples = params['nsamples'] if 'nsamples' in params else 50 ndims = 6 dt = 0.01 xdot5_limit = 0.001 inf = 1.0e20 lambda_snap = 1 #(1/dt)**4 # snap must be scaled down to be comparable to position. lambda_pos = 1 # A few derived quantities nvars = ndims * nsamples nconstraints_continuity = (ndims - 1) * nsamples nconstraints_obstacles = 2 * nsamples nconstraints = 1 + nconstraints_continuity + nconstraints_obstacles # Solver configuration snopt = SNOPT_solver() snopt.setOption('Verbose', False) snopt.setOption('Solution print', False) snopt.setOption('Print file', 'test5.out') snopt.setOption('Iteration limit', 8000) snopt.setOption('Print level', 3) snopt.setOption('Major optimality', 2e-6) snopt.setOption('Verify level', 3) # Turn to 3 to carefully check gradiants # 1. Set up decision variables x = np.array([0.5] * nsamples) # Initialize to 0.5 xdot1 = np.array([0.0] * nsamples) xdot2 = np.array([0.0] * nsamples) xdot3 = np.array([0.0] * nsamples) xdot4 = np.array([0.0] * nsamples) v = np.array([0.0] * nsamples) # C4 Continuity Control Variable x0 = np.matrix(np.c_[x, xdot1, xdot2, xdot3, xdot4, v]).A1 # Interleave [x[0],xdot1[0],xdot2[0]...] # 2. Set up the bounds on x low_x = np.array([0.0] * nsamples) # X must be greater or equal to 0 low_xdot1 = np.array([-inf] * nsamples) low_xdot2 = np.array([-inf] * nsamples) low_xdot3 = np.array([-inf] * nsamples) low_xdot4 = np.array([-inf] * nsamples) low_v = np.array([-xdot5_limit] * nsamples) # Bound control variable arbitrarily if constrain_at_endpoints: low_x[0] = 0.5 low_x[nsamples - 1] = 0.5 xlow = np.matrix(np.c_[low_x, low_xdot1, low_xdot2, low_xdot3, low_xdot4, low_v]).A1 # Interleave [x[0],xdot1[0],xdot2[0]...] upp_x = np.array([1.0] * nsamples) # X must be greater or equal to 0 upp_xdot1 = np.array([inf] * nsamples) upp_xdot2 = np.array([inf] * nsamples) upp_xdot3 = np.array([inf] * nsamples) upp_xdot4 = np.array([inf] * nsamples) upp_v = np.array([xdot5_limit] * nsamples) # Bound control variable arbitrarily if constrain_at_endpoints: upp_x[0] = 0.5 upp_x[nsamples - 1] = 0.5 xupp = np.matrix(np.c_[upp_x, upp_xdot1, upp_xdot2, upp_xdot3, upp_xdot4, upp_v]).A1 # Interleave [x[0],xdot1[0],xdot2[0]...] # 3. Set up the objective function M = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]]) N = np.array([0, 0, 0, 0, 1]) def grad_function(xM, compute_nonzero_only=False, compute_linear=False): G = np.zeros((nconstraints, nvars)) # Set up the jacobian structure of the cost function. # This only impacts the w_i and wdot4_i variables obj_col = G[0, :] if not compute_nonzero_only: obj_col[::6] = 2 * dt * lambda_pos * (xM[:, 0] - 0.5) obj_col[4::6] = 2 * dt * lambda_snap * xM[:, 4] elif not compute_linear: obj_col[::6] = 1 obj_col[4::6] = 1 if compute_linear: # The C4 continuity constraint is linear stupidcounter = 0 add_to_fi = 0 for fi in range(1, nconstraints_continuity - 5): # Looping over the objective function fi_row = G[fi, :] fi += add_to_fi fi_row[fi - 1] = 1 fi_row[fi] = dt fi_row[fi + 5] = -1 stupidcounter += 1 if stupidcounter == 5: add_to_fi += 1 stupidcounter = 0 return G def calc_obj(xM): # our objective is the sum of # the L2 norm of our position error away from 0.5 # the L2 norm of our 4th derivative error away from 0 obj_pos = dt * np.sum((xM[:, 0] - 0.5)**2) obj_snap = dt * np.sum((xM[:, 4])**2) objective = lambda_pos * obj_pos + lambda_snap * obj_snap return (objective, obj_pos, obj_snap) def calc_obstacle_constraints(xM): blend = xM[:, 0] sigmaBlended = (blend[:, np.newaxis] * sigmaA + (1 - blend)[:, np.newaxis] * sigmaB) constraintA = la.norm(sigmaBlended - personA, axis=1) - min_distA constraintB = la.norm(sigmaBlended - personB, axis=1) - min_distB return np.r_[constraintA, constraintB] def blend_test3_objFG(status, x, needF, needG, cu, iu, ru): xM = x.reshape(nsamples, ndims) objective, obj_pos, obj_snap = calc_obj(xM) # Evaluate the current continuity constraints continuity_x = np.zeros((nsamples, 5)) for i in range(nsamples - 1): si = xM[i, 0:5] vi = xM[i, 5] si1 = xM[i + 1, 0:5] continuity_x[i] = si + (M.dot(si) + N.dot(vi)) * dt - si1 continuity_x = np.matrix(continuity_x).A1 obstacles = calc_obstacle_constraints(xM) F = np.concatenate(([objective], continuity_x, obstacles)) #G = grad_function(xM) return status, F #, G[G_nonzero_inds] # 4. Set up bounds on F # [ objectivec can be anything, equal-to-zero for continuity, greater-than-0 for obstacles along traj] low_F = np.concatenate( ([-inf], np.array([0, 0, 0, 0, 0] * nsamples), [0, 0] * nsamples)) upp_F = np.concatenate( ([inf], np.array([0, 0, 0, 0, 0] * nsamples), [inf, inf] * nsamples)) # Matrix uses fortran numbering or something ObjRow = 1 # Set up the linear and nonlinear structure of the jacobian matrix xM = x0.reshape(nsamples, ndims) G = grad_function(xM, compute_nonzero_only=True, compute_linear=False) G_nonzero_inds = G.nonzero() A = grad_function(xM, compute_nonzero_only=True, compute_linear=True) # Now we solve a = time.time() snopt.snopta(name='blend_test3', usrfun=blend_test3_objFG, x0=x0, xlow=xlow, xupp=xupp, Flow=low_F, Fupp=upp_F, ObjRow=ObjRow) b = time.time() print "Solved in %.4fs" % (b - a) print "Value of objective function: %.8f" % snopt.F[0] print " lambda_pos: %f, lambda_snap: %f, " % (lambda_pos, lambda_snap) print " objective: %f, obj_pos: %f, obj_snap: %f" % calc_obj(xM) xM = snopt.x.reshape(nsamples, ndims) return (xM, snopt)
def optimize(p_eval,psi_eval, \ t_nominal,user_progress_nominal,dt_nominal, \ const_vals_ti, \ x_min_ti,x_max_ti, \ u_min_ti,u_max_ti): assert allclose(psi_eval,0.0) print "flashlight.trajectory_optimization.quadrotor3d_direct_transcription_nonconst_dt: Initializing optimization problem..." sys_time_begin = time.time() solver_time_begin = sys_time_begin # # find numerically stable and feasible trajectories to initialize the solver # numerically_stable_infeasible_trajectory = quadrotor3d_gaussian_time_stretch.optimize_numerically_stable_infeasible( p_eval,psi_eval, \ t_nominal,user_progress_nominal,dt_nominal, \ x_min_ti,x_max_ti, \ u_min_ti,u_max_ti, \ max_stretch_iters_numerically_stable, \ gauss_width_in_terms_of_dt_numerically_stable, \ gauss_max_in_terms_of_dt_numerically_stable, \ 0 ) x_numerically_stable,u_numerically_stable,t_numerically_stable,user_progress_numerically_stable,dt_numerically_stable = numerically_stable_infeasible_trajectory if use_gaussian_time_stretching_for_feasible: # use gaussian time stretching to find a feasible trajectory feasible_trajectory = quadrotor3d_gaussian_time_stretch.optimize_feasible( p_eval,psi_eval, \ t_numerically_stable,user_progress_numerically_stable,dt_numerically_stable, \ x_min_ti,x_max_ti, \ u_min_ti,u_max_ti, \ max_stretch_iters_feasible, \ gauss_width_in_terms_of_dt_feasible, \ gauss_max_in_terms_of_dt_feasible, \ extra_iters_feasible ) x_feasible,u_feasible,t_feasible,user_progress_feasible,dt_feasible = feasible_trajectory else: # use uniform time stretching to find a feasible trajectory p_nominal, _, _, _ = curveutils.reparameterize_curve( p_eval, user_progress_nominal ) psi_nominal, _, _, _ = curveutils.reparameterize_curve( psi_eval, user_progress_nominal ) feasible_trajectory = quadrotor3d_uniform_time_stretch.optimize_feasible( p_nominal,psi_nominal,dt_nominal, \ x_min_ti,x_max_ti, \ u_min_ti,u_max_ti, \ max_bin_search_iters_feasible, \ dt_upper_init_feasible ) x_feasible,u_feasible,dt_scale_feasible = feasible_trajectory t_feasible = t_nominal*dt_scale_feasible*dt_scale_extra_stretch_feasible user_progress_feasible = user_progress_nominal dt_feasible = dt_nominal*dt_scale_feasible*dt_scale_extra_stretch_feasible # return user_progress_numerically_stable,None,None,None,None,t_numerically_stable,t_numerically_stable[-1] # return user_progress_feasible,None,None,None,None,t_feasible,t_feasible[-1] sys_time_end = time.time() print "flashlight.optimize.quadrotor3d_fixed_path: Finished initializing optimization problem (%.03f seconds)." % (sys_time_end - sys_time_begin) # # set up optimization problem constants # num_trajectory_samples = p_eval.shape[0] num_x_dims = quadrotor3d.num_x_dims num_u_dims = quadrotor3d.num_u_dims num_dt_dims = 1 num_alpha_dims = num_x_dims + num_u_dims + num_dt_dims x_p_inds = arange(0,3) x_e_inds = arange(3,6) num_x_p_inds = x_p_inds.size # soft control effort constraints lamb_J_control_effort = 0.0*ones(num_trajectory_samples) # soft position waypoint constraints num_dims_J_x_p_waypoint_ref_ti = 3 lamb_J_x_p_waypoint = 0.01*ones(num_trajectory_samples) J_x_p_waypoint_ref = x_numerically_stable[:,0:3] # soft dt constraints num_dims_J_dt_ref_ti = 1 lamb_J_dt = 0.0001*ones(num_trajectory_samples) J_dt_ref = dt_numerically_stable*ones(num_trajectory_samples) # hard dynamics constraints num_dims_g_dynamics_ti = num_x_dims # hard state space waypoint constraints num_dims_g_x_waypoint_ti = num_x_dims num_dims_x_waypoint_ref_ti = num_x_dims lamb_g_x_waypoint = zeros(num_trajectory_samples) lamb_g_x_waypoint[[0,-1]] = 1 X_waypoint_ref = zeros((num_trajectory_samples,num_dims_x_waypoint_ref_ti)) X_waypoint_ref[0] = array([ p_eval[0,0], p_eval[0,1], p_eval[0,2], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ]) X_waypoint_ref[-1] = array([ p_eval[-1,0], p_eval[-1,1], p_eval[-1,2], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ]) lamb_g_x_waypoint_nonzero = nonzero(lamb_g_x_waypoint)[0] num_lamb_g_x_waypoint_nonzero = len(lamb_g_x_waypoint_nonzero) lamb_g_x_waypoint_ti_to_ti_sparse = -1*ones_like(lamb_g_x_waypoint,dtype=int32) lamb_g_x_waypoint_ti_to_ti_sparse[lamb_g_x_waypoint_nonzero] = arange(num_lamb_g_x_waypoint_nonzero) # hard position waypoint constraints num_dims_g_x_p_waypoint_ti = num_x_p_inds num_dims_x_p_waypoint_ref_ti = num_x_p_inds lamb_g_x_p_waypoint = zeros(num_trajectory_samples) X_p_waypoint_ref = zeros((num_trajectory_samples,num_dims_x_p_waypoint_ref_ti)) lamb_g_x_p_waypoint_nonzero = nonzero(lamb_g_x_p_waypoint)[0] num_lamb_g_x_p_waypoint_nonzero = len(lamb_g_x_p_waypoint_nonzero) lamb_g_x_p_waypoint_ti_to_ti_sparse = -1*ones_like(lamb_g_x_p_waypoint,dtype=int32) lamb_g_x_p_waypoint_ti_to_ti_sparse[lamb_g_x_p_waypoint_nonzero] = arange(num_lamb_g_x_p_waypoint_nonzero) # hard dt constraints num_dims_g_dt_ti = 1 num_dims_dt_ref_ti = 1 lamb_g_dt = zeros(num_trajectory_samples) dt_ref = zeros(num_trajectory_samples) lamb_g_dt_nonzero = nonzero(lamb_g_dt)[0] num_lamb_g_dt_nonzero = len(lamb_g_dt_nonzero) lamb_g_dt_ti_to_ti_sparse = -1*ones_like(lamb_g_dt,dtype=int32) lamb_g_dt_ti_to_ti_sparse[lamb_g_dt_nonzero] = arange(num_lamb_g_dt_nonzero) dt_min_ti = dt_numerically_stable*0.45 dt_max_ti = dt_feasible*1.55 # stack all the const, lamb, and ref values const_vals = tile(const_vals_ti,(num_trajectory_samples,1)) lamb_vals = c_[ lamb_J_control_effort, lamb_J_x_p_waypoint, lamb_J_dt, lamb_g_x_waypoint, lamb_g_x_p_waypoint, lamb_g_dt ] ref_vals = c_[ J_x_p_waypoint_ref, J_dt_ref, X_waypoint_ref, X_p_waypoint_ref, dt_ref ] # number of constraints and decision variables num_constraints_g_dynamics = num_trajectory_samples-1 num_dims_g_dynamics_ti = num_x_dims num_constraints_1d_g_dynamics = num_constraints_g_dynamics*num_dims_g_dynamics_ti num_constraints_1d_g_x_waypoint = num_lamb_g_x_waypoint_nonzero*num_dims_g_x_waypoint_ti num_constraints_1d_g_x_p_waypoint = num_lamb_g_x_p_waypoint_nonzero*num_dims_g_x_p_waypoint_ti num_constraints_1d_g_dt = num_lamb_g_dt_nonzero*num_dims_g_dt_ti num_decision_vars_1d_X = num_trajectory_samples*num_x_dims num_decision_vars_1d_U = num_trajectory_samples*num_u_dims num_decision_vars_1d_DT = num_trajectory_samples*num_dt_dims def _unpack_Alpha_1d(Alpha_1d): X_1d_begin,X_1d_end = 0, 0 + num_trajectory_samples*num_x_dims U_1d_begin,U_1d_end = X_1d_end, X_1d_end + num_trajectory_samples*num_u_dims DT_1d_begin,DT_1d_end = U_1d_end, U_1d_end + num_trajectory_samples*num_dt_dims X_1d = Alpha_1d[X_1d_begin:X_1d_end] U_1d = Alpha_1d[U_1d_begin:U_1d_end] DT_1d = Alpha_1d[DT_1d_begin:DT_1d_end] X = X_1d.reshape((num_trajectory_samples,num_x_dims)) U = U_1d.reshape((num_trajectory_samples,num_u_dims)) DT = DT_1d.reshape((num_trajectory_samples,num_dt_dims)) return X,U,DT def _compute_common_vals(ti,X,U,DT): lamb_J_control_effort_ti = lamb_J_control_effort[ti] lamb_J_x_p_waypoint_ti = lamb_J_x_p_waypoint[ti] lamb_J_dt_ti = lamb_J_dt[ti] lamb_g_x_waypoint_ti = lamb_g_x_waypoint[ti] lamb_g_x_p_waypoint_ti = lamb_g_x_p_waypoint[ti] lamb_g_dt_ti = lamb_g_dt[ti] J_x_p_waypoint_ref_ti = matrix(J_x_p_waypoint_ref[ti]).T J_dt_ref_ti = J_dt_ref[ti] x_waypoint_ref_ti = matrix(X_waypoint_ref[ti]).T x_p_waypoint_ref_ti = matrix(X_p_waypoint_ref[ti]).T dt_ref_ti = dt_ref[ti] x_ti = matrix(X[ti]).T u_ti = matrix(U[ti]).T dt_ti = DT[ti] lamb_vals_ti = hstack( [ lamb_J_control_effort_ti, lamb_J_x_p_waypoint_ti, lamb_J_dt_ti, lamb_g_x_waypoint_ti, lamb_g_x_p_waypoint_ti, lamb_g_dt_ti ] ) ref_vals_ti = hstack( [ matrix(J_x_p_waypoint_ref_ti).A1, J_dt_ref_ti, matrix(x_waypoint_ref_ti).A1, matrix(x_p_waypoint_ref_ti).A1, dt_ref_ti ] ) var_vals_ti = hstack( [ x_ti.A1, u_ti.A1, dt_ti ] ) common_vals_ti = hstack( [ lamb_vals_ti, ref_vals_ti, var_vals_ti ] ) return common_vals_ti def _compute_sparse_jacobian_indices(ti,ti_to_ti_sparse,num_dims_gi): ti_sparse = ti_to_ti_sparse[ti] gi_begin = (ti_sparse+0)*num_dims_gi gi_end = (ti_sparse+1)*num_dims_gi xi_begin = (ti+0)*num_x_dims xi_end = (ti+1)*num_x_dims ui_begin = (ti+0)*num_u_dims ui_end = (ti+1)*num_u_dims dti_begin = (ti+0)*num_dt_dims dti_end = (ti+1)*num_dt_dims return gi_begin,gi_end,xi_begin,xi_end,ui_begin,ui_end,dti_begin,dti_end # Define objective function def _obj_func(Alpha_1d): global snopt_major_iter_count global snopt_obj_vals X,U,DT = _unpack_Alpha_1d(Alpha_1d) common_vals = c_[ lamb_vals, ref_vals, X, U, DT ] const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals = c_[ const_vals[:-1], X[:-1], X[1:], U[:-1], DT[:-1] ] J_ti = J_ti_vectorized_autowrap(common_vals) g_dynamics = quadrotor3d.g_dynamics_ti_vectorized_autowrap(const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals) g_x_waypoint = zeros((num_lamb_g_x_waypoint_nonzero, num_dims_g_x_waypoint_ti)) g_x_p_waypoint = zeros((num_lamb_g_x_p_waypoint_nonzero, num_dims_g_x_p_waypoint_ti)) g_dt = zeros((num_lamb_g_dt_nonzero, num_dims_g_dt_ti)) for ti in range(num_trajectory_samples): common_vals_ti = _compute_common_vals(ti,X,U,DT) lamb_g_x_waypoint_ti = lamb_g_x_waypoint[ti] lamb_g_x_p_waypoint_ti = lamb_g_x_p_waypoint[ti] lamb_g_dt_ti = lamb_g_dt[ti] if lamb_g_x_waypoint_ti != 0: g_x_waypoint[lamb_g_x_waypoint_ti_to_ti_sparse[ti]] = sympyutils.evaluate_anon_func( g_x_waypoint_ti_autowrap, common_vals_ti ).T if lamb_g_x_p_waypoint_ti != 0: g_x_p_waypoint[lamb_g_x_p_waypoint_ti_to_ti_sparse[ti]] = sympyutils.evaluate_anon_func( g_x_p_waypoint_ti_autowrap, common_vals_ti ).T if lamb_g_dt_ti != 0: g_dt[lamb_g_dt_ti_to_ti_sparse[ti]] = sympyutils.evaluate_anon_func( g_dt_ti_autowrap, common_vals_ti ) J = sum(J_ti) g_1d = hstack( [ matrix(g_dynamics).A1, matrix(g_x_waypoint).A1, matrix(g_x_p_waypoint).A1, matrix(g_dt).A1 ] ) snopt_obj_vals[snopt_major_iter_count,0] = J snopt_obj_vals[snopt_major_iter_count,1] = sum(norm(g_dynamics,axis=1)) snopt_major_iter_count = snopt_major_iter_count+1 set_printoptions(suppress=True) print "SNOPT major iteration: %d, Objective value: %f, Total g_dynamics error: %f" % (snopt_major_iter_count,J,sum(square(g_dynamics))) fail = 0 return J, g_1d, fail # Define gradient function def _grad_func(Alpha_1d, J, g_1d, compute_nonzero_only=False): X,U,DT = _unpack_Alpha_1d(Alpha_1d) dJ_dX = zeros((num_trajectory_samples,num_x_dims)) dJ_dU = zeros((num_trajectory_samples,num_u_dims)) dJ_dDT = zeros((num_trajectory_samples,num_dt_dims)) dgdynamics_dX = zeros((num_constraints_1d_g_dynamics,num_decision_vars_1d_X)) dgdynamics_dU = zeros((num_constraints_1d_g_dynamics,num_decision_vars_1d_U)) dgdynamics_dDT = zeros((num_constraints_1d_g_dynamics,num_decision_vars_1d_DT)) dgxwaypoint_dX = zeros((num_constraints_1d_g_x_waypoint,num_decision_vars_1d_X)) dgxwaypoint_dU = zeros((num_constraints_1d_g_x_waypoint,num_decision_vars_1d_U)) dgxwaypoint_dDT = zeros((num_constraints_1d_g_x_waypoint,num_decision_vars_1d_DT)) dgxpwaypoint_dX = zeros((num_constraints_1d_g_x_p_waypoint,num_decision_vars_1d_X)) dgxpwaypoint_dU = zeros((num_constraints_1d_g_x_p_waypoint,num_decision_vars_1d_U)) dgxpwaypoint_dDT = zeros((num_constraints_1d_g_x_p_waypoint,num_decision_vars_1d_DT)) dgdt_dX = zeros((num_constraints_1d_g_dt,num_decision_vars_1d_X)) dgdt_dU = zeros((num_constraints_1d_g_dt,num_decision_vars_1d_U)) dgdt_dDT = zeros((num_constraints_1d_g_dt,num_decision_vars_1d_DT)) if not compute_nonzero_only: common_vals = c_[ lamb_vals, ref_vals, X, U, DT ] const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals = c_[ const_vals[:-1], X[:-1], X[1:], U[:-1], DT[:-1] ] dJ_dX = dJti_dxti_vectorized_autowrap(common_vals) dJ_dU = dJti_duti_vectorized_autowrap(common_vals) dJ_dDT = dJti_ddtti_vectorized_autowrap(common_vals) dgdynamics_dX_current_block = quadrotor3d.dgdynamicsti_dxcurrent_vectorized_autowrap(const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals) dgdynamics_dX_next_block = quadrotor3d.dgdynamicsti_dxnext_vectorized_autowrap(const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals) dgdynamics_dU_current_block = quadrotor3d.dgdynamicsti_ducurrent_vectorized_autowrap(const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals) dgdynamics_dDT_current_block = quadrotor3d.dgdynamicsti_ddtcurrent_vectorized_autowrap(const_and_xcurrent_and_xnext_and_ucurrent_and_dtcurrent_vals) for ti in range(num_trajectory_samples): if compute_nonzero_only: dJ_dX[ti] = 1 dJ_dU[ti] = 1 dJ_dDT[ti] = 1 for ti in range(num_constraints_g_dynamics): gi_begin = (ti+0)*num_dims_g_dynamics_ti gi_end = (ti+1)*num_dims_g_dynamics_ti ai_x_current_begin = (ti+0)*num_x_dims ai_x_current_end = (ti+1)*num_x_dims ai_x_next_begin = (ti+1)*num_x_dims ai_x_next_end = (ti+2)*num_x_dims ai_u_current_begin = (ti+0)*num_u_dims ai_u_current_end = (ti+1)*num_u_dims ai_dt_current_begin = (ti+0)*num_dt_dims ai_dt_current_end = (ti+1)*num_dt_dims if compute_nonzero_only: dgdynamics_dX[gi_begin:gi_end,ai_x_current_begin:ai_x_current_end] = 1 dgdynamics_dX[gi_begin:gi_end,ai_x_next_begin:ai_x_next_end] = 1 dgdynamics_dU[gi_begin:gi_end,ai_u_current_begin:ai_u_current_end] = 1 dgdynamics_dDT[gi_begin:gi_end,ai_dt_current_begin:ai_dt_current_end] = 1 else: dgdynamics_dX[gi_begin:gi_end,ai_x_current_begin:ai_x_current_end] = dgdynamics_dX_current_block[ti] dgdynamics_dX[gi_begin:gi_end,ai_x_next_begin:ai_x_next_end] = dgdynamics_dX_next_block[ti] dgdynamics_dU[gi_begin:gi_end,ai_u_current_begin:ai_u_current_end] = dgdynamics_dU_current_block[ti] dgdynamics_dDT[gi_begin:gi_end,ai_dt_current_begin:ai_dt_current_end] = matrix(dgdynamics_dDT_current_block[ti]).T for ti in range(num_trajectory_samples): common_vals_ti = _compute_common_vals(ti,X,U,DT) lamb_g_x_waypoint_ti = lamb_g_x_waypoint[ti] lamb_g_x_p_waypoint_ti = lamb_g_x_p_waypoint[ti] lamb_g_dt_ti = lamb_g_dt[ti] if lamb_g_x_waypoint_ti != 0: gi_begin,gi_end,xi_begin,xi_end,ui_begin,ui_end,li_begin,li_end = _compute_sparse_jacobian_indices(ti,lamb_g_x_waypoint_ti_to_ti_sparse,num_dims_g_x_waypoint_ti) dgxwaypoint_dX[gi_begin:gi_end,xi_begin:xi_end] = sympyutils.evaluate_anon_func( dgxwaypointti_dxti_autowrap, common_vals_ti ) if lamb_g_x_p_waypoint_ti != 0: gi_begin,gi_end,xi_begin,xi_end,ui_begin,ui_end,li_begin,li_end = _compute_sparse_jacobian_indices(ti,lamb_g_x_p_waypoint_ti_to_ti_sparse,num_dims_g_x_p_waypoint_ti) dgxpwaypoint_dX[gi_begin:gi_end,xi_begin:xi_end] = sympyutils.evaluate_anon_func( dgxpwaypointti_dxti_autowrap, common_vals_ti ) if lamb_g_dt_ti != 0: gi_begin,gi_end,xi_begin,xi_end,ui_begin,ui_end,dti_begin,dti_end = _compute_sparse_jacobian_indices(ti,lamb_g_dt_ti_to_ti_sparse,num_dims_g_dt_ti) dgdt_dDT[gi_begin:gi_end,dti_begin:dti_end] = sympyutils.evaluate_anon_func( dgdtti_ddtti_autowrap, common_vals_ti ) dJ_dAlpha_1d = hstack( [ matrix(dJ_dX).A1, matrix(dJ_dU).A1, matrix(dJ_dDT).A1 ] ) dgdynamics_dAlpha = c_[ dgdynamics_dX, dgdynamics_dU, dgdynamics_dDT ] dgxwaypoint_dAlpha = c_[ dgxwaypoint_dX, dgxwaypoint_dU, dgxwaypoint_dDT ] dgxpwaypoint_dAlpha = c_[ dgxpwaypoint_dX, dgxpwaypoint_dU, dgxpwaypoint_dDT ] dgdt_dAlpha = c_[ dgdt_dX, dgdt_dU, dgdt_dDT ] dg_dAlpha = r_[ dgdynamics_dAlpha, dgxwaypoint_dAlpha, dgxpwaypoint_dAlpha, dgdt_dAlpha ] fail = 0 return matrix(dJ_dAlpha_1d).A, dg_dAlpha, fail def _obj_grad_func(status,Alpha_1d,needF,needG,cu,iu,ru): J, g_1d, fail = _obj_func(Alpha_1d) dJ_dAlpha_1d, dg_dAlpha, fail = _grad_func(Alpha_1d,J,g_1d) J_g_1d = hstack( [ J, g_1d, snopt_dummy_val ] ) dJ_dAlpha_dg_dAlpha = r_[ dJ_dAlpha_1d, dg_dAlpha ] dJ_dAlpha_dg_dAlpha_nonzero_vals = dJ_dAlpha_dg_dAlpha[dJ_dAlpha_dg_dAlpha_nonzero_inds] return status, J_g_1d, dJ_dAlpha_dg_dAlpha_nonzero_vals inf = 1.0e20 snopt = SNOPT_solver() snopt.setOption('Verbose',False) snopt.setOption('Solution print',False) snopt.setOption('Major print level',0) snopt.setOption('Print level',0) snopt_obj_row = 1 snopt_num_funcs_1d = num_constraints_1d_g_dynamics + num_constraints_1d_g_x_waypoint + num_constraints_1d_g_x_p_waypoint + num_constraints_1d_g_dt + 1 snopt_num_vars_1d = num_decision_vars_1d_X + num_decision_vars_1d_U + num_decision_vars_1d_DT snopt_dummy_val = 0.0 snopt_dummy_array = zeros((1,snopt_num_vars_1d)) global snopt_major_iter_count global snopt_obj_vals snopt_major_iter_count = 0 snopt_obj_vals = -1*ones((10000,2)) X_min = tile(x_min_ti.A1,(1,num_trajectory_samples)) X_max = tile(x_max_ti.A1,(1,num_trajectory_samples)) U_min = tile(u_min_ti.A1,(1,num_trajectory_samples)) U_max = tile(u_max_ti.A1,(1,num_trajectory_samples)) DT_min = tile(dt_min_ti,(1,num_trajectory_samples)) DT_max = tile(dt_max_ti,(1,num_trajectory_samples)) Alpha_min = hstack( [ matrix(X_min).A1, matrix(U_min).A1, matrix(DT_min).A1 ] ) Alpha_max = hstack( [ matrix(X_max).A1, matrix(U_max).A1, matrix(DT_max).A1 ] ) X_0 = x_feasible U_0 = u_feasible DT_0 = dt_feasible*ones(num_trajectory_samples) Alpha_0 = hstack( [ matrix(X_0).A1, matrix(U_0).A1, matrix(DT_0).A1 ] ) print "flashlight.trajectory_optimization.quadrotor3d_fixed_path: Calculating objective value on initial guess..." _obj_func(Alpha_0) J_g_1d_min = hstack( [ -inf, zeros(num_constraints_1d_g_dynamics), zeros(num_constraints_1d_g_x_waypoint), zeros(num_constraints_1d_g_x_p_waypoint), zeros(num_constraints_1d_g_dt), snopt_dummy_val ] ) J_g_1d_max = hstack( [ inf, zeros(num_constraints_1d_g_dynamics), zeros(num_constraints_1d_g_x_waypoint), zeros(num_constraints_1d_g_x_p_waypoint), zeros(num_constraints_1d_g_dt), snopt_dummy_val ] ) dJ_dAlpha_dg_dAlpha_const = r_[ zeros((snopt_num_funcs_1d,snopt_num_vars_1d)), snopt_dummy_array ] dJ_dAlpha_dg_dAlpha_const[-1,0] = 10e-9 dJ_dAlpha_nonzero, dg_dAlpha_nonzero, fail = _grad_func(Alpha_0, J=None, g_1d=None, compute_nonzero_only=True) dJ_dAlpha_dg_dAlpha_nonzero = r_[ dJ_dAlpha_nonzero, dg_dAlpha_nonzero, snopt_dummy_array ] dJ_dAlpha_dg_dAlpha_nonzero_inds = dJ_dAlpha_dg_dAlpha_nonzero.nonzero() print "flashlight.trajectory_optimization.quadrotor3d_fixed_path: Solving optimization problem..." sys_time_begin = time.time() snopt.snopta( name="quadrotor_3d_fixed_path_optimization_test", usrfun=_obj_grad_func, x0=Alpha_0, xlow=Alpha_min, xupp=Alpha_max, Flow=J_g_1d_min, Fupp=J_g_1d_max, ObjRow=snopt_obj_row, A=dJ_dAlpha_dg_dAlpha_const, G=dJ_dAlpha_dg_dAlpha_nonzero, xnames=None, Fnames=None) sys_time_end = time.time() print "flashlight.trajectory_optimization.quadrotor3d_fixed_path: Finished solving optimization problem (%.03f seconds)." % (sys_time_end - sys_time_begin) solver_time_end = sys_time_end solver_time = solver_time_end - solver_time_begin print "flashlight.trajectory_optimization.quadrotor3d_fixed_path: Total solver time was %.03f seconds." % solver_time Alpha_opt_1d = snopt.x X_opt,U_opt,DT_opt = _unpack_Alpha_1d(Alpha_opt_1d) dt_opt_cumsum = cumsum(DT_opt[:-1]) t_opt = hstack( [ t_numerically_stable[0], t_numerically_stable[0] + dt_opt_cumsum ] ) T_final_opt = dt_opt_cumsum[-1] return X_opt,U_opt,DT_opt,t_opt,T_final_opt,solver_time,snopt_obj_vals
def __init__(self, conf, load_path=None): tf_random_seed = None nonlinearity = tf.nn.relu self.keep_prob_train_val = 1.0 self.floatX = 'float32' def unif_fanin_mat(shape, name): b = np.sqrt(3 * self.keep_prob_train_val / shape[0]) initial = tf.random_uniform(shape, minval=-b, maxval=b, seed=tf_random_seed, dtype=self.floatX) return tf.Variable(initial, name=name) def bias(shape, name): initial = tf.constant(0.1, shape=shape, dtype=self.floatX) return tf.Variable(initial, name=name) self.snopt = SNOPT_solver() self.snopt.setOption('Major print level', 0) #self.snopt.setOption('Major feasibility', 1.0e-6) #self.snopt.setOption('Minor feasibility', 1.0e-6) #self.snopt.setOption('Major optimality', 1.0e-6) #self.snopt.setOption('Linesearch tolerance', 0.9) #self.snopt.setOption('Major iterations', 100) #self.snopt.setOption('Minor iterations', 50) #self.snopt.setOption('Scale option', 0) #self.snopt_max_count = conf['snopt_max_count'] self.snopt.setOption('Iteration limit', conf['snopt_max_count']) self.profiler = Profiler() self.n_s = conf['n_s'] self.n_a = conf['n_a'] self.n_sa = self.n_s + self.n_a n_hidden = conf['n_hidden'] self.n_1 = n_hidden self.n_2 = n_hidden self.n_q = conf['n_q'] error_functions = [tf.square, tf.abs] self.error_function = error_functions[conf['error_type']] self.one_layer_only = conf['one_layer_only'] self.n_minibatch = conf['minibatch_size'] self.n_batches = conf['num_batches'] self.n_megabatch = self.n_minibatch * self.n_batches self.max_a_min_iters = 5 self.max_abs_torque = conf['max_torque'] #self.a_tolerance = conf['a_tolerance'] self.max_torques = np.array([[self.max_abs_torque]], dtype=self.floatX) self.max_torques_p = np.ones((self.n_megabatch,1)) * np.array([[self.max_abs_torque]], dtype=self.floatX) self.min_torques = np.array([[-self.max_abs_torque]], dtype=self.floatX) self.min_torques_p = np.ones((self.n_megabatch,1)) * np.array([[-self.max_abs_torque]], dtype=self.floatX) self.set_standardizer((np.zeros(self.n_sa), np.ones(self.n_sa)), (np.zeros(self.n_q), np.ones(self.n_q))) self.sess = tf.Session() self.keep_prob = tf.placeholder(self.floatX) self.sa_learn = tf.placeholder(self.floatX, shape=[None,self.n_sa]) self.W_sa_1 = unif_fanin_mat([self.n_sa, self.n_1], 'W_sa_1') self.b_1 = bias([self.n_1], 'b_1') self.W_1_2 = unif_fanin_mat([self.n_1, self.n_2], 'W_1_2') self.b_2 = bias([self.n_2], 'b_2') self.W_2_q = unif_fanin_mat([self.n_2,self.n_q], 'W_2_q') self.b_q = bias([self.n_q], 'b_q') name_var_pairs = zip(['W_sa_1', 'b_1', 'b_q'], [self.W_sa_1, self.b_1, self.b_q]) #if not self.one_layer_only: name_var_pairs.extend(zip(['W_1_2', 'b_2', 'W_2_q'], [self.W_1_2, self.b_2, self.W_2_q])) self.name_var_dict = {i:j for (i,j) in name_var_pairs} # # run collapse once to set number of params # self.collapse_params() def q_from_input(i): o1 = tf.nn.dropout(nonlinearity(tf.matmul(i, self.W_sa_1) + self.b_1), self.keep_prob) if self.one_layer_only: return tf.matmul(o1, self.W_2_q) + self.b_q o2 = tf.nn.dropout(nonlinearity(tf.matmul(o1, self.W_1_2) + self.b_2), self.keep_prob) return tf.matmul(o2, self.W_2_q) + self.b_q self.o1 = nonlinearity(tf.matmul(self.sa_learn, self.W_sa_1) + self.b_1) self.q_learn = q_from_input(self.sa_learn) self.y_learn = tf.placeholder(self.floatX, shape = [None, self.n_q]) self.learn_delta = tf.square(self.y_learn - self.q_learn) self.learn_error = tf.reduce_mean(self.learn_delta) self.max_a_time_limit = conf['max_a_time_limit'] global_step = tf.Variable(0, trainable=False) self.learn_rate = tf.train.exponential_decay( conf['initial_learn_rate'] / self.n_minibatch, global_step, conf['learn_rate_half_life'], 0.5, staircase=False) self.learn_opt = tf.train.AdamOptimizer(self.learn_rate).minimize(self.learn_error, global_step=global_step) self.learn_opts = [] self.feed_ys = [] for i in range(self.n_q): feed_y = tf.placeholder(self.floatX) self.feed_ys.append(feed_y) learn_error = self.error_function(feed_y - self.q_learn[0,i]) learn_opt = tf.train.AdamOptimizer(self.learn_rate).minimize(learn_error, global_step=global_step) self.learn_opts.append(learn_opt) if self.n_a > 0: def query_setup(n_sample): s_query = tf.placeholder('float', shape=[n_sample, self.n_s]) a_query = unif_fanin_mat([n_sample, self.n_a], 'a_query') min_cutoff = tf.matmul(np.ones((n_sample, 1), dtype=self.floatX), self.min_torques) max_cutoff = tf.matmul(np.ones((n_sample, 1), dtype=self.floatX), self.max_torques) # print "min cutoff:", self.sess.run(min_cutoff) # print "max cutoff:", self.sess.run(max_cutoff) a_query_clipped = tf.minimum(tf.maximum(min_cutoff, a_query), max_cutoff) #sa_query = tf.concat(1, [s_query, a_query_clipped]) sa_query = tf.concat(1, [s_query, a_query]) q_query = q_from_input(sa_query) q_query_mean = tf.reduce_mean(q_query) query_opt = tf.train.AdamOptimizer(0.1) query_grads_and_vars = query_opt.compute_gradients(q_query_mean, [a_query]) # list of tuples (gradient, variable). query_grads_and_vars[0] = (-query_grads_and_vars[0][0], query_grads_and_vars[0][1]) apply_query_grads = query_opt.apply_gradients(query_grads_and_vars) return s_query, a_query, a_query_clipped, sa_query, q_query, q_query_mean, apply_query_grads self.s_query, self.a_query, self.a_query_clipped, self.sa_query, self.q_query, \ self.q_query_mean, self.apply_query_grads = query_setup(1) self.s_query_p, self.a_query_p, self.a_query_clipped_p, self.sa_query_p, self.q_query_p, \ self.q_query_mean_p, self.apply_query_grads_p = query_setup(self.n_megabatch) self.sym_grad_p = tf.gradients(self.q_query_mean_p, self.a_query_p) self.sym_grad = tf.gradients(self.q_query_mean, self.a_query) self.saver = tf.train.Saver(self.name_var_dict) self.init_op = tf.initialize_all_variables() self.sess.run(self.init_op) if load_path != None: self.load_model(load_path)
class ControlNN: def __init__(self, conf, load_path=None): tf_random_seed = None nonlinearity = tf.nn.relu self.keep_prob_train_val = 1.0 self.floatX = 'float32' def unif_fanin_mat(shape, name): b = np.sqrt(3 * self.keep_prob_train_val / shape[0]) initial = tf.random_uniform(shape, minval=-b, maxval=b, seed=tf_random_seed, dtype=self.floatX) return tf.Variable(initial, name=name) def bias(shape, name): initial = tf.constant(0.1, shape=shape, dtype=self.floatX) return tf.Variable(initial, name=name) self.snopt = SNOPT_solver() self.snopt.setOption('Major print level', 0) #self.snopt.setOption('Major feasibility', 1.0e-6) #self.snopt.setOption('Minor feasibility', 1.0e-6) #self.snopt.setOption('Major optimality', 1.0e-6) #self.snopt.setOption('Linesearch tolerance', 0.9) #self.snopt.setOption('Major iterations', 100) #self.snopt.setOption('Minor iterations', 50) #self.snopt.setOption('Scale option', 0) #self.snopt_max_count = conf['snopt_max_count'] self.snopt.setOption('Iteration limit', conf['snopt_max_count']) self.profiler = Profiler() self.n_s = conf['n_s'] self.n_a = conf['n_a'] self.n_sa = self.n_s + self.n_a n_hidden = conf['n_hidden'] self.n_1 = n_hidden self.n_2 = n_hidden self.n_q = conf['n_q'] error_functions = [tf.square, tf.abs] self.error_function = error_functions[conf['error_type']] self.one_layer_only = conf['one_layer_only'] self.n_minibatch = conf['minibatch_size'] self.n_batches = conf['num_batches'] self.n_megabatch = self.n_minibatch * self.n_batches self.max_a_min_iters = 5 self.max_abs_torque = conf['max_torque'] #self.a_tolerance = conf['a_tolerance'] self.max_torques = np.array([[self.max_abs_torque]], dtype=self.floatX) self.max_torques_p = np.ones((self.n_megabatch,1)) * np.array([[self.max_abs_torque]], dtype=self.floatX) self.min_torques = np.array([[-self.max_abs_torque]], dtype=self.floatX) self.min_torques_p = np.ones((self.n_megabatch,1)) * np.array([[-self.max_abs_torque]], dtype=self.floatX) self.set_standardizer((np.zeros(self.n_sa), np.ones(self.n_sa)), (np.zeros(self.n_q), np.ones(self.n_q))) self.sess = tf.Session() self.keep_prob = tf.placeholder(self.floatX) self.sa_learn = tf.placeholder(self.floatX, shape=[None,self.n_sa]) self.W_sa_1 = unif_fanin_mat([self.n_sa, self.n_1], 'W_sa_1') self.b_1 = bias([self.n_1], 'b_1') self.W_1_2 = unif_fanin_mat([self.n_1, self.n_2], 'W_1_2') self.b_2 = bias([self.n_2], 'b_2') self.W_2_q = unif_fanin_mat([self.n_2,self.n_q], 'W_2_q') self.b_q = bias([self.n_q], 'b_q') name_var_pairs = zip(['W_sa_1', 'b_1', 'b_q'], [self.W_sa_1, self.b_1, self.b_q]) #if not self.one_layer_only: name_var_pairs.extend(zip(['W_1_2', 'b_2', 'W_2_q'], [self.W_1_2, self.b_2, self.W_2_q])) self.name_var_dict = {i:j for (i,j) in name_var_pairs} # # run collapse once to set number of params # self.collapse_params() def q_from_input(i): o1 = tf.nn.dropout(nonlinearity(tf.matmul(i, self.W_sa_1) + self.b_1), self.keep_prob) if self.one_layer_only: return tf.matmul(o1, self.W_2_q) + self.b_q o2 = tf.nn.dropout(nonlinearity(tf.matmul(o1, self.W_1_2) + self.b_2), self.keep_prob) return tf.matmul(o2, self.W_2_q) + self.b_q self.o1 = nonlinearity(tf.matmul(self.sa_learn, self.W_sa_1) + self.b_1) self.q_learn = q_from_input(self.sa_learn) self.y_learn = tf.placeholder(self.floatX, shape = [None, self.n_q]) self.learn_delta = tf.square(self.y_learn - self.q_learn) self.learn_error = tf.reduce_mean(self.learn_delta) self.max_a_time_limit = conf['max_a_time_limit'] global_step = tf.Variable(0, trainable=False) self.learn_rate = tf.train.exponential_decay( conf['initial_learn_rate'] / self.n_minibatch, global_step, conf['learn_rate_half_life'], 0.5, staircase=False) self.learn_opt = tf.train.AdamOptimizer(self.learn_rate).minimize(self.learn_error, global_step=global_step) self.learn_opts = [] self.feed_ys = [] for i in range(self.n_q): feed_y = tf.placeholder(self.floatX) self.feed_ys.append(feed_y) learn_error = self.error_function(feed_y - self.q_learn[0,i]) learn_opt = tf.train.AdamOptimizer(self.learn_rate).minimize(learn_error, global_step=global_step) self.learn_opts.append(learn_opt) if self.n_a > 0: def query_setup(n_sample): s_query = tf.placeholder('float', shape=[n_sample, self.n_s]) a_query = unif_fanin_mat([n_sample, self.n_a], 'a_query') min_cutoff = tf.matmul(np.ones((n_sample, 1), dtype=self.floatX), self.min_torques) max_cutoff = tf.matmul(np.ones((n_sample, 1), dtype=self.floatX), self.max_torques) # print "min cutoff:", self.sess.run(min_cutoff) # print "max cutoff:", self.sess.run(max_cutoff) a_query_clipped = tf.minimum(tf.maximum(min_cutoff, a_query), max_cutoff) #sa_query = tf.concat(1, [s_query, a_query_clipped]) sa_query = tf.concat(1, [s_query, a_query]) q_query = q_from_input(sa_query) q_query_mean = tf.reduce_mean(q_query) query_opt = tf.train.AdamOptimizer(0.1) query_grads_and_vars = query_opt.compute_gradients(q_query_mean, [a_query]) # list of tuples (gradient, variable). query_grads_and_vars[0] = (-query_grads_and_vars[0][0], query_grads_and_vars[0][1]) apply_query_grads = query_opt.apply_gradients(query_grads_and_vars) return s_query, a_query, a_query_clipped, sa_query, q_query, q_query_mean, apply_query_grads self.s_query, self.a_query, self.a_query_clipped, self.sa_query, self.q_query, \ self.q_query_mean, self.apply_query_grads = query_setup(1) self.s_query_p, self.a_query_p, self.a_query_clipped_p, self.sa_query_p, self.q_query_p, \ self.q_query_mean_p, self.apply_query_grads_p = query_setup(self.n_megabatch) self.sym_grad_p = tf.gradients(self.q_query_mean_p, self.a_query_p) self.sym_grad = tf.gradients(self.q_query_mean, self.a_query) self.saver = tf.train.Saver(self.name_var_dict) self.init_op = tf.initialize_all_variables() self.sess.run(self.init_op) if load_path != None: self.load_model(load_path) def __del__(self): self.sess.close() def set_standardizer(self, sa_mean_std, q_mean_std): def check_stuff(thing, l): assert len(thing) == 2 assert thing[0].shape == thing[1].shape assert len(thing[0].shape) == 1 assert thing[1].shape[0] == l check_stuff(sa_mean_std, self.n_sa) check_stuff(q_mean_std, self.n_q) self.sa_mean_std = sa_mean_std self.q_mean_std = q_mean_std def print_params(self): for (name, param) in enumerate(self.name_var_dict): print name print self.sess.run(param) print #def collapse_params(self): # self.print_params() # ans = [] # for (name, param) in enumerate(self.name_var_dict): # ans.extend(self.sess.run(param).flatten().tolist()) # self.num_params = len(ans) # print self.num_params # return np.array(ans) def q_from_sa(self, sa_vals): net_q = self.sess.run(self.q_learn, feed_dict={self.sa_learn: sa_vals, #standardize(sa_vals, self.sa_mean_std), self.keep_prob: 1.0}) #print net_q.shape #return unstandardize(net_q, self.q_mean_std) return net_q def q_query_from_s(self, s_vals): return self.sess.run(self.q_query, feed_dict={self.s_query: s_vals[np.newaxis,:], self.keep_prob: 1.0}) def q_query_from_s_p(self, s_vals): return self.sess.run(self.q_query_p, feed_dict={self.s_query_p: s_vals, self.keep_prob: 1.0}) def o1_from_sa(self, sa_vals): return self.sess.run(self.o1, feed_dict={self.sa_learn: sa_vals, self.keep_prob: 1.0}) def s_const_grid(self, s, xr, n=10000): # [[s x_1], [s x_2], ..., [s x_n]] s = np.array(s) xs = np.linspace(xr[0], xr[1], n)[:,np.newaxis] return xs, np.concatenate((np.ones((n,1)) * s[np.newaxis,:], xs), 1) def manual_max_a(self, s, xr=None): assert self.n_a == 1 if xr == None: xr = self.max_abs_torque * np.array([-1.,1]) xs, inputs = self.s_const_grid(s, xr) outputs = self.q_from_sa(inputs).flatten() print outputs best_input = np.argmax(outputs) return np.array([inputs[best_input][-1], outputs[best_input]]) def manual_max_a_p(self, s, xr=None): assert self.n_a == 1 ans = [] for i in s: ans.append(self.manual_max_a(i, xr)) return np.array(ans) def q_from_s_discrete(self, s): return self.q_from_sa(s) def q_from_sa_discrete(self, s, a): qs = self.q_from_s_discrete(s) chosen_qs = np.array([r[i] for r,i in zip(qs, a)]) return chosen_qs[:, np.newaxis] def get_best_a_discrete(self, s): qs = self.q_from_s_discrete(s[np.newaxis,:]) return np.argmax(qs) def get_best_q_discrete(self, s): assert s.shape[1] == self.n_s qs = self.q_from_s_discrete(s) return np.max(qs, 1) def get_best_a_p(self, s, is_p, num_tries, init_a=None, tolerance=0.01): #TODO: benchmark different init methods assert (is_p and len(s.shape) == 2 and s.shape[0] == self.n_megabatch) or (not is_p and len(s.shape) == 1) n_batch = s.shape[0] if is_p else 1 inf = 1.0e20 obj_row = 1 x_names = np.array(['x' + str(i) for i in range(n_batch)]) F_names = np.array(['F1', 'F2']) xlow = np.array([-self.max_abs_torque for i in range(n_batch)]) xupp = np.array([self.max_abs_torque for i in range(n_batch)]) Flow = np.array([-inf, -inf]) Fupp = np.array([inf, inf]) A = np.array([[0 for _ in range(n_batch)], [1 for _ in range(n_batch)]]) G = np.array([[2 for _ in range(n_batch)], [0 for _ in range(n_batch)]]) vs = {} vs['count'] = 0 vs['old_a'] = None ans_a, ans_q = None, None def check_timeout(start_time, time_limit): if time.time() - start_time > time_limit: err_msg = 'error!!! max a timeout: s=%s is_p=%s num_tries=%s init_a=%s' % (s, is_p, num_tries, init_a) print err_msg return True return False def update_tolerance_conditions(a): vs['count'] += 1 #if vs['old_a'] != None and np.max(np.abs(a - vs['old_a'])) < self.a_tolerance: # print 'vs', vs # return -1 #vs['old_a'] = a if vs['count'] > self.snopt_max_count: return -1 return 0 def inner_p(init_a, time_limit): start_time = time.time() if init_a == None: init_a = self.min_torques_p + (self.max_torques_p - self.min_torques_p) * \ np.random.random((self.n_megabatch, self.n_a)) / 2.0 def grad(a): g = self.sess.run(self.sym_grad_p, feed_dict={self.s_query_p: s, self.a_query_p: a[:,np.newaxis], self.keep_prob: 1.0}) ret_grad = -g[0].flatten() return ret_grad def objFG(status,a,needF,needG,cu,iu,ru): F = np.array([-self.sess.run(self.q_query_mean_p, feed_dict={self.s_query_p: s, self.a_query_p: a[:,np.newaxis], self.keep_prob: 1.0}), 0]) G = grad(a) #status = update_tolerance_conditions(a) return status, F, G self.snopt.snopta(n=n_batch, nF=2, usrfun=objFG, x0=init_a, xlow=xlow, xupp=xupp, Flow=Flow, Fupp=Fupp, ObjRow=obj_row, A=A, G=G, xnames=x_names, Fnames=F_names) res_x = self.snopt.x q_final = self.sess.run(self.q_query_p, feed_dict={self.s_query_p: s, self.a_query_p: res_x, self.keep_prob: 1.0}) return res_x, q_final def inner(init_a, time_limit): start_time = time.time() if init_a == None: #init_a = np.zeros([1, self.n_a]) init_a = self.min_torques + (self.max_torques - self.min_torques) * \ np.random.random((1, self.n_a)) def grad(a): g = self.sess.run(self.sym_grad, feed_dict={self.s_query: s[np.newaxis,:], self.a_query: a[:,np.newaxis], self.keep_prob: 1.0}) ret_grad = -g[0].flatten() return ret_grad def objFG(status,a,needF,needG,cu,iu,ru): F = np.array([-self.sess.run(self.q_query_mean, feed_dict={self.s_query: s[np.newaxis,:], self.a_query: a[:,np.newaxis], self.keep_prob: 1.0}), 0]) G = grad(a) #status = update_tolerance_conditions(a) return status, F, G self.snopt.snopta(n=n_batch, nF=2, usrfun=objFG, x0=init_a, xlow=xlow, xupp=xupp, Flow=Flow, Fupp=Fupp, ObjRow=obj_row, A=A, G=G)#, xnames=x_names, Fnames=F_names) res_x = self.snopt.x print init_a, res_x q_final = self.sess.run(self.q_query, feed_dict={self.s_query: s[np.newaxis,:], self.a_query: res_x, self.keep_prob: 1.0}) return res_x, q_final inner_function = inner_p if is_p else inner iter_time_limit = self.max_a_time_limit / num_tries for i in range(num_tries): a, q = inner_function(init_a, iter_time_limit) if i == 0 else inner_function(None, iter_time_limit) if i == 0: ans_a, ans_q = a, q continue for i in range(len(q)): if q[i] > ans_q[i]: ans_q[i] = q[i] ans_a[i] = a[i] return ans_a, ans_q def get_learn_rate(self): return self.sess.run(self.learn_rate) def learn_delta_q(self, sa_vals, y_vals): return self.sess.run(self.learn_delta, feed_dict={self.sa_learn: sa_vals, self.y_learn: y_vals, self.keep_prob: 1.0}) def mse_q(self, sa_vals, y_vals): return self.sess.run(self.learn_error, feed_dict={self.sa_learn: sa_vals, self.y_learn: y_vals, self.keep_prob: 1.0}) def train(self, sa_vals, y_vals): self.sess.run(self.learn_opt, feed_dict={self.y_learn: y_vals, self.sa_learn: sa_vals, self.keep_prob: self.keep_prob_train_val}) #print 'learn_rate', self.sess.run(self.learn_rate) def train_discrete(self, s_vals, a_vals, y_vals): for (s, a, y) in zip(s_vals, a_vals, y_vals): self.sess.run(self.learn_opts[a], feed_dict={self.sa_learn: s[np.newaxis,:], self.feed_ys[a]: y, self.keep_prob: 1.0}) def save_model(self, save_path): self.saver.save(self.sess, save_path) def load_model(self, load_path): print 'loading model from', load_path self.saver.restore(self.sess, '/Users/jeffreyyan/drake-distro/drake/examples/NN/' + load_path)