def __init__(self, env_name='Pushing2D-v1', mpc_params=None): self.env = gym.make(env_name) self.task_horizon = TASK_HORIZON self.agent = Agent(self.env) # Does not need model self.warmup = False mpc_params['use_gt_dynamics'] = True self.cem_policy = MPC(self.env, PLAN_HORIZON, None, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params, use_random_optimizer=False) self.random_policy = MPC(self.env, PLAN_HORIZON, None, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params, use_random_optimizer=True)
def __init__(self, A, B, C, Q, R, RD, umin, umax, N): self.A = A self.B = B self.C = C self.num_outputs = C.shape[0] self.num_inputs = B.shape[1] if self.num_inputs == self.num_outputs == 1: self.mtype = 0 else: self.mtype = 1 self.mpc = MPC(A, B, C, Q, R, RD, umin, umax, N)
def __init__(self, env_name='Pushing2D-v1', num_nets=1, mpc_params=None): self.env = gym.make(env_name) self.task_horizon = TASK_HORIZON self.agent = Agent(self.env) mpc_params['use_gt_dynamics'] = False self.model = PENN(num_nets, STATE_DIM, len(self.env.action_space.sample()), LR) self.cem_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params, use_random_optimizer=False) self.random_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params, use_random_optimizer=True) self.random_policy_no_mpc = RandomPolicy(len(self.env.action_space.sample())) self.log_cem_success = [] self.log_rnd_success = []
def __init__(self, A, B, C, Q, R, RD, umin, umax, N): self.A = A self.B = B self.C = C self.num_outputs = C.shape[0] self.num_inputs = B.shape[1] if self.num_inputs == self.num_outputs == 1: self.mtype = 0 else: self.mtype = 1 self.mpc = MPC(A, B, C, Q, R, RD, umin, umax, N) plt.rcParams['savefig.facecolor'] = 'xkcd:black'
def __init__(self, env_name='Pushing2D-v1', mpc_params=None): self.env = gym.make(env_name) self.task_horizon = TASK_HORIZON self.agent = Agent(self.env) # Does not need model self.warmup = False mpc_params['use_gt_dynamics'] = True if (mpc_params['use_mpc']): self.cem_policy = MPC(self.env, PLAN_HORIZON, None, POPSIZE, NUM_ELITES, MAX_ITERS, INITIAL_MU, INITIAL_SIGMA, **mpc_params, use_random_optimizer=False) self.random_policy = MPC(self.env, PLAN_HORIZON, None, POPSIZE, NUM_ELITES, MAX_ITERS, INITIAL_MU, INITIAL_SIGMA, **mpc_params, use_random_optimizer=True) else: self.cem_policy = CEMPolicy( self.env, len(self.env.action_space.low), INITIAL_MU, INITIAL_SIGMA, PLAN_HORIZON, POPSIZE, NUM_ELITES, MAX_ITERS, self.env.action_space.high, self.env.action_space.low, mpc_params['use_gt_dynamics']) self.random_policy = RandomPolicy(self.env, len(self.env.action_space.low), INITIAL_MU, INITIAL_SIGMA, PLAN_HORIZON, POPSIZE, MAX_ITERS, self.env.action_space.high, self.env.action_space.low, mpc_params['use_gt_dynamics'])
def __init__(self, env_name='Pushing2D-v1', num_nets=1, mpc_params=None, exp_dir=None): self.env = gym.make(env_name) self.task_horizon = TASK_HORIZON self.num_nets = num_nets self.agent = Agent(self.env) mpc_params['use_gt_dynamics'] = False self.model = PENN(num_nets, STATE_DIM, len(self.env.action_space.sample()), LR) self.cem_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params, use_random_optimizer=False) self.random_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params, use_random_optimizer=True) self.random_policy_no_mpc = RandomPolicy( len(self.env.action_space.sample())) if exp_dir: self.exp_dir = os.path.join(os.getcwd(), exp_dir) else: self.exp_dir = os.getcwd() self.create_dirs(self.exp_dir) self.samples = []
def __init__(self, env_name='Pushing2D-v1', num_nets=1, mpc_params=None): self.env = gym.make(env_name) # self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.device = torch.device('cpu') self.task_horizon = TASK_HORIZON # Tensorboard logging. self.timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') self.environment_name = "pusher" self.logdir = 'logs/%s/%s' % (self.environment_name, self.timestamp) self.summary_writer = SummaryWriter(self.logdir) self.agent = Agent(self.env) mpc_params['use_gt_dynamics'] = False self.model = PENN(num_nets, STATE_DIM, len(self.env.action_space.sample()), LR, self.device, self.summary_writer, self.timestamp, self.environment_name) self.cem_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, use_random_optimizer=False, **mpc_params) self.random_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, use_random_optimizer=True, **mpc_params) self.random_policy_no_mpc = RandomPolicy( len(self.env.action_space.sample()))
def __init__(self, moos_community, moos_port): """Initiates MOOSComms, sets the callbacks and runs the loop""" super(mpcMOOS, self).__init__() self.server = moos_community self.port = moos_port self.name = 'mpcMOOS' self.lock = threading.Lock() self.set_on_connect_callback(self.__on_connect) self.set_on_mail_callback(self.__on_new_mail) self.add_active_queue('path_queue', self.on_path) self.add_message_route_to_active_queue('path_queue', 'PATH_X') self.add_message_route_to_active_queue('path_queue', 'PATH_Y') self.add_active_queue('control_queue', self.on_vessel_state) self.add_message_route_to_active_queue('control_queue', 'VESSEL_STATE') self.path_x = [] self.path_y = [] self.mpc = MPC() print('MPC created') self.run(self.server, self.port, self.name)
def __init__(self, chain_length=4, alpha=pi / 4): super(Brush, self).__init__() # Add parts self.add(Silane(), label='silane') self.add(Initiator(), label='initiator') self.add(mb.recipes.Polymer(MPC(alpha=alpha), n=chain_length, port_labels=('up', 'down')), label='pmpc') self.add(CH3(), label='methyl') mb.force_overlap(self['initiator'], self['initiator']['down'], self['silane']['up']) mb.force_overlap(self['pmpc'], self['pmpc']['down'], self['initiator']['up']) mb.force_overlap(self['methyl'], self['methyl']['up'], self['pmpc']['up']) # Make self.port point to silane.bottom_port self.add(self['silane']['down'], label='down', containment=False)
class PyBlaster: """Daemon for PiBlaster project""" def __init__(self): """Whole project is run from this constructor """ # +++++++++++++++ Init +++++++++++++++ # self.keep_run = 1 # used in run for daemon loop, reset by SIGTERM self.ret_code = 0 # return code to command line (10 = shutdown) # +++++++++++++++ Objects +++++++++++++++ # # Each inner object will get reference to PyBlaster as self.main. # exceptions in child threads are put here self.ex_queue = queue.Queue() self.log = Log(self) self.settings = Settings(self) self.settings.parse() PB_GPIO.init_gpio(self) self.led = LED(self) self.buttons = Buttons(self) self.lirc = Lirc(self) self.dbhandle = DBHandle(self) self.cmd = Cmd(self) self.mpc = MPC(self) self.bt = RFCommServer(self) self.alsa = AlsaMixer(self) self.i2c = I2C(self) self.usb = UsbDrive(self) # +++++++++++++++ Init Objects +++++++++++++++ # # Make sure to run init functions in proper order! # Some might depend upon others ;) self.led.init_leds() self.dbhandle.dbconnect() self.mpc.connect() self.bt.start_server_thread() self.alsa.init_alsa() self.i2c.open_bus() self.usb.start_uploader_thread() # +++++++++++++++ Daemoninze +++++++++++++++ # self.check_pidfile() self.daemonize() self.create_pidfile() # +++++++++++++++ Daemon loop +++++++++++++++ # self.led.show_init_done() self.buttons.start() self.lirc.start() self.run() # +++++++++++++++ Finalize +++++++++++++++ # self.log.write(log.MESSAGE, "Joining threads...") # join remaining threads self.usb.join() self.bt.join() self.buttons.join() self.lirc.join() self.mpc.join() self.led.join() self.log.write(log.MESSAGE, "leaving...") # cleanup self.mpc.exit_client() self.delete_pidfile() PB_GPIO.cleanup(self) def run(self): """Daemon loop""" # Expensive operations like new usb drive check # should not be run every loop run. poll_count = 0 led_count = 1 # -e flag is set, run only init and exit directly. # self.keep_run = 0 if self.settings.exitafterinit else 1 # # # # # # DAEMON LOOP ENTRY # # # # # # self.log.write(log.MESSAGE, "Entering daemon loop...") while self.keep_run: poll_count += 1 time.sleep(50. / 1000.) # 50ms default in config self.buttons.read_buttons() self.mpc.process_idler_events() self.lirc.read_lirc() self.bt.check_incomming_commands() # TODO: play LEDs while playing -- if paused, do something else... if poll_count % 10 == 0: self.led.play_leds(led_count) led_count += 1 # try: # exc = self.ex_queue.get(block=False) # except queue.Empty: # pass # else: # exc_type, exc_obj, exc_trace = exc # print(exc_type, exc_obj) # print(exc_trace) # self.ret_code = 1 # self.keep_run = False # self.led.indicate_error() # end daemon loop # # # # # # # DAEMON LOOP EXIT # # # # # # def daemonize(self): """Fork process and disable print in log object""" signal.signal(signal.SIGTERM, self.term_handler) signal.signal(signal.SIGINT, self.term_handler) if not self.settings.daemonize: self.log.init_log() return self.log.write(log.DEBUG1, "daemonizing") try: pid = os.fork() except OSError: # self.log.write(log.EMERGENCY, "Failed to fork daemon") raise if pid == 0: os.setsid() try: pid = os.fork() except OSError: # self.log.write(log.EMERGENCY, "Failed to fork daemon") raise if pid == 0: os.chdir("/tmp") os.umask(0) else: exit(0) else: exit(0) self.settings.is_daemonized = True self.log.init_log() self.log.write(log.MESSAGE, "daemonized.") def term_handler(self, *args): """ Signal handler to stop daemon loop""" self.log.write(log.MESSAGE, "Got TERM or INT signal -- leaving!") self.keep_run = 0 def check_pidfile(self): """Check if daemon already running, throw if pid file found""" if os.path.exists(self.settings.pidfile): self.log.write(log.EMERGENCY, "Found pid file for pyblaster, " "another process running?") raise Exception("pid file found") def create_pidfile(self): """Write getpid() to file after daemonize()""" try: fpid = open(self.settings.pidfile, "w") except IOError: self.log.write(log.EMERGENCY, "failed to create pidfile %s" % self.settings.pidfile) raise fpid.write("%s\n" % os.getpid()) def delete_pidfile(self): """Try to remove pid file after daemon should exit""" if os.path.exists(self.settings.pidfile): try: os.remove(self.settings.pidfile) except OSError: self.log.write(log.EMERGENCY, "failed to remove pidfile %s" % self.settings.pidfile) raise def kill_other_pyblaster(self): """Check if pid found in pid file and try to kill this (old) process""" if not os.path.exists(self.settings.pidfile): return try: f = open(self.settings.pidfile, "r") except IOError: self.log.write(log.EMERGENCY, "failed to read pidfile %s" % self.settings.pidfile) raise pid = int(f.readline().strip()) print("Trying to kill old process with pid %s..." % pid) try: os.kill(pid, signal.SIGTERM) except OSError: self.log.write(log.EMERGENCY, "failed to kill process with pid %s" % pid) raise exit(0)
def __init__(self): """Whole project is run from this constructor """ # +++++++++++++++ Init +++++++++++++++ # self.keep_run = 1 # used in run for daemon loop, reset by SIGTERM self.ret_code = 0 # return code to command line (10 = shutdown) # +++++++++++++++ Objects +++++++++++++++ # # Each inner object will get reference to PyBlaster as self.main. # exceptions in child threads are put here self.ex_queue = queue.Queue() self.log = Log(self) self.settings = Settings(self) self.settings.parse() PB_GPIO.init_gpio(self) self.led = LED(self) self.buttons = Buttons(self) self.lirc = Lirc(self) self.dbhandle = DBHandle(self) self.cmd = Cmd(self) self.mpc = MPC(self) self.bt = RFCommServer(self) self.alsa = AlsaMixer(self) self.i2c = I2C(self) self.usb = UsbDrive(self) # +++++++++++++++ Init Objects +++++++++++++++ # # Make sure to run init functions in proper order! # Some might depend upon others ;) self.led.init_leds() self.dbhandle.dbconnect() self.mpc.connect() self.bt.start_server_thread() self.alsa.init_alsa() self.i2c.open_bus() self.usb.start_uploader_thread() # +++++++++++++++ Daemoninze +++++++++++++++ # self.check_pidfile() self.daemonize() self.create_pidfile() # +++++++++++++++ Daemon loop +++++++++++++++ # self.led.show_init_done() self.buttons.start() self.lirc.start() self.run() # +++++++++++++++ Finalize +++++++++++++++ # self.log.write(log.MESSAGE, "Joining threads...") # join remaining threads self.usb.join() self.bt.join() self.buttons.join() self.lirc.join() self.mpc.join() self.led.join() self.log.write(log.MESSAGE, "leaving...") # cleanup self.mpc.exit_client() self.delete_pidfile() PB_GPIO.cleanup(self)
class mpcMOOS(pymoos.comms): def __init__(self, moos_community, moos_port): """Initiates MOOSComms, sets the callbacks and runs the loop""" super(mpcMOOS, self).__init__() self.server = moos_community self.port = moos_port self.name = 'mpcMOOS' self.lock = threading.Lock() self.set_on_connect_callback(self.__on_connect) self.set_on_mail_callback(self.__on_new_mail) self.add_active_queue('path_queue', self.on_path) self.add_message_route_to_active_queue('path_queue', 'PATH_X') self.add_message_route_to_active_queue('path_queue', 'PATH_Y') self.add_active_queue('control_queue', self.on_vessel_state) self.add_message_route_to_active_queue('control_queue', 'VESSEL_STATE') self.path_x = [] self.path_y = [] self.mpc = MPC() print('MPC created') self.run(self.server, self.port, self.name) def __on_connect(self): """OnConnect callback""" print("Connected to", self.server, self.port, "under the name ", self.name) self.register('PATH_X', 0) self.register('PATH_Y', 0) self.register('VESSEL_STATE', 0) return True def __on_new_mail(self): """OnNewMail callback""" print("on_mail activated by") return True def on_path(self, msg): """Special callback for path""" self.lock.acquire() try: # print("on_path activated by", msg.key(), "with value", msg.string()) if msg.key() == 'PATH_X': # parse the path x coords self.path_x = msg.string().replace('[', '') self.path_x = self.path_x.replace(']', '') self.path_x = self.path_x.replace(' ', '') self.path_x = self.path_x.split(',') self.path_x = [float(i) for i in self.path_x] print('Received {} x coordinates'.format(len(self.path_x))) elif msg.key() == 'PATH_Y': # parse the path y coords self.path_y = msg.string().replace('[', '') self.path_y = self.path_y.replace(']', '') self.path_y = self.path_y.replace('array(', '') self.path_y = self.path_y.replace(')', '') self.path_y = self.path_y.replace(' ', '') self.path_y = self.path_y.split(',') self.path_y = [float(i) for i in self.path_y] print('Received {} y coordinates'.format(len(self.path_y))) finally: self.lock.release() return True def on_vessel_state(self, msg): while (len(self.path_x) == 0 or len(self.path_y) == 0): # wait till there is a path present time.sleep(0.5) self.lock.acquire() try: if msg.key() == 'VESSEL_STATE': states = msg.string().split(',') print('states received') print(states) vessel = Vessel(float(states[0]), float(states[1]), float(states[4]), float(states[3]), float(states[2]), 0.1, 180, -180) print('vessel object created') elapsed_time = [] path_xte = [] mpc_xte = [] path_heading_error = [] start = time.time() print('size pathx: {}'.format(len(self.path_x))) print('size pathy: {}'.format(len(self.path_y))) print(self.mpc.ready) rrot = self.mpc.optimize_simple(self.path_x, self.path_y, vessel) stop = time.time() print("elapsed time: {}".format(stop - start)) elapsed_time.append(stop - start) self.notify('RROT', rrot, -1) except Exception as e: print(e) finally: self.lock.release() return True
class Simulator: def __init__(self, A, B, C, Q, R, RD, umin, umax, N): self.A = A self.B = B self.C = C self.num_outputs = C.shape[0] self.num_inputs = B.shape[1] if self.num_inputs == self.num_outputs == 1: self.mtype = 0 else: self.mtype = 1 self.mpc = MPC(A, B, C, Q, R, RD, umin, umax, N) def get_reference_trajectory(self, n, model_type=0): self.t = np.linspace(1, n, n) if model_type == 0: self.ref_traj = signal.square(self.t / 6)[np.newaxis, :] else: rx = 3 * np.ones(len(self.t)) ry = np.ones(len(self.t)) rz = signal.square(self.t / 32) ryaw = 2 * signal.square(self.t / 1000) self.ref_traj = np.row_stack((rx, ry, rz, ryaw)) def simulate(self): U, X = self.establish_starting_state() for i in range(self.ref_traj.shape[1]): if i == 0: self.X_hist = X self.U_hist = U self.Y_hist = self.C @ X else: self.X_hist = np.column_stack((self.X_hist, X)) self.U_hist = np.column_stack((self.U_hist, U)) self.Y_hist = np.column_stack((self.Y_hist, self.C @ X)) remaining_traj = self.ref_traj[:, i:] temp = time.time() U = self.mpc.get_control_input(X, U, remaining_traj) print(time.time() - temp) X = self.update_states(X, U) def establish_starting_state(self): U = np.zeros((self.B.shape[1], 1)) X = np.zeros((self.A.shape[1], 1)) return U, X def update_states(self, X, U): X = self.A @ X + self.B @ U return X def plot(self): self.t = self.t * .04 if self.mtype == 0: fig, ax = plt.subplots() fig.patch.set_facecolor('xkcd:black') ax.set_facecolor('xkcd:black') ax.tick_params(color='xkcd:white', labelcolor='xkcd:white') ax.spines['bottom'].set_color('xkcd:white') ax.spines['top'].set_color('xkcd:white') ax.spines['right'].set_color('xkcd:white') ax.spines['left'].set_color('xkcd:white') ax.plot(self.t, self.ref_traj[0, :]) ax.plot(self.t, self.X_hist[0, :]) else: figY, axY = plt.subplots(nrows=self.num_outputs) figU, axU = plt.subplots(nrows=self.num_outputs) figY.suptitle('Reference Tracking', color='xkcd:white', y=1.0) figU.suptitle('Control Usage', color='xkcd:white', y=1.0) figY.patch.set_facecolor('xkcd:black') figU.patch.set_facecolor('xkcd:black') Y_labels = [ 'X Position', 'Y Position', 'Z Position', 'Yaw Position' ] U_labels = ['Roll', 'Pitch', 'Yaw', 'Thrust'] X_change = np.vstack((self.X_hist[1, :], self.X_hist[2, :], self.X_hist[0, :], self.X_hist[3, :])) for i in range(self.num_outputs): axY[i].plot(self.t, self.ref_traj[i, :]) axY[i].plot(self.t, X_change[i, :]) axY[i].set_title(Y_labels[i], color='xkcd:white') axU[i].plot(self.t, self.U_hist[i, :]) axU[i].set_title(U_labels[i], color='xkcd:white') axY[i].set_facecolor('xkcd:black') axU[i].set_facecolor('xkcd:black') axY[i].tick_params(color='xkcd:white', labelcolor='xkcd:white') axU[i].tick_params(color='xkcd:white', labelcolor='xkcd:white') axY[i].spines['bottom'].set_color('xkcd:white') axU[i].spines['bottom'].set_color('xkcd:white') axY[i].spines['top'].set_color('xkcd:white') axU[i].spines['top'].set_color('xkcd:white') axY[i].spines['right'].set_color('xkcd:white') axU[i].spines['right'].set_color('xkcd:white') axY[i].spines['left'].set_color('xkcd:white') axU[i].spines['left'].set_color('xkcd:white') figY.tight_layout() figU.tight_layout() plt.show()
class System(object): def __init__(self, window, dt=0.01): self.use_mpc = True self.clock = sf.Clock() self.window = window self.dt = dt self.time = 0 self.mpc_horizon = 10 self.counter = -1 spring_damper = SpringDamper(window, pos_x=0.9, pos_y=0.5) spring_damper.set_properties(k=10., m=1., c=0.1, x0=0.5) spring_damper.integration_type = spring_damper.RUNGE_KUTTA pendulum = Pendulum(window) pendulum.set_properties(m=1.0, g=-9.8, L=0.25, k=0.1) pendulum.set_position(0.5, 0.5) pendulum.set_rotation(np.pi * 0.95) pendulum.integration_type = pendulum.RUNGE_KUTTA pendulum.nonlinear_mode = True cart = CartPole(window) cart.set_properties(m=0.2, M=0.5, I=0.006, g=-9.8, l=0.25, b=0.5) cart.set_position(0.5, 0.5) cart.set_rotation(np.pi * 0.99) cart.nonlinear_mode = True cart.integration_type = cart.RUNGE_KUTTA self.mpc = MPC(dt, self.mpc_horizon) # This works for springdamper and pendulum R = np.matrix(np.eye(2) * 1e-5) Q = np.matrix(np.matrix([[100, 0], [0.0, 1.0]])) T = np.matrix(np.eye(2) * 1e-5) self.mpc.set_cost_weights(Q, R, T) self.mpc.Y_prime = np.matrix(np.zeros(shape=(2, self.mpc_horizon))) self.mpc.Y_prime[0, :] = 0.7 self.mpc.C = np.matrix(np.eye(2)) # cartpole # R = np.matrix(np.eye(4)*0.001) # Q = np.matrix([[100, 0, 0, 0], # [0, 0, 0, 0], # [0, 0, 100, 0], # [0, 0, 0, 0]]) # # self.mpc.set_cost_weights(Q, R) # self.mpc.set_constraints(cart.cons) # # self.mpc.Y_prime = np.matrix(np.zeros(shape=(4, self.mpc_horizon))) # self.mpc.Y_prime[0,:] = np.pi # self.mpc.Y_prime[2,:] = 0.5 # self.C = np.matrix([[1,0, 1, 0]]) # self.bodies = [spring_damper, pendulum] self.bodies = [spring_damper] self.mpc.set_body(self.bodies[0]) def simulate(self): self.counter += 1 if self.use_mpc: if self.counter % self.mpc_horizon == 0: self.counter = 0 self.mpc.calc_control() u = self.mpc.U[:, self.counter] self.mpc.body.simulate(self.dt, np.matrix(u).transpose()) else: for body in self.bodies: body.simulate(self.dt) def render(self): self.window.clear(sf.Color.WHITE) for body in self.bodies: body.render() add_time(self.window, self.time) self.window.display() def step(self): if self.clock.elapsed_time.milliseconds >= self.dt * 1000: self.clock.restart() self.time += self.dt self.simulate() self.render()
pendulum = Pendulum() # Get the system discrete-time dynamics A, B, Bw, C = pendulum.get_discrete_system_matrices_at_eq() # Solve the ARE for our system to extract the terminal weight matrix P Q = np.eye(4) * 1 R = np.eye(1) * 1 P = np.eye(4) * 1 # Instantiate controller ctl = MPC(model=pendulum, dynamics=pendulum.discrete_time_dynamics, horizon=7, Q=Q, R=R, P=P, ulb=-5, uub=5, xlb=[-2, -10, -np.pi / 2, -np.pi / 2], xub=[12, 10, np.pi / 2, np.pi / 2]) # Solve without disturbance ctl.set_reference(x_sp=np.array([10, 0, 0, 0])) sim_env_full = EmbeddedSimEnvironment( model=pendulum, dynamics=pendulum.pendulum_linear_dynamics_with_disturbance, controller=ctl.mpc_controller, time=6) sim_env_full.run([0, 0, 0, 0]) # Solve witho disturbance
import mpc.MPC as MPC if __name__ == '__main__': MpcMachine = MPC()
class ExperimentModelDynamics: def __init__(self, env_name='Pushing2D-v1', num_nets=1, mpc_params=None, exp_dir=None): self.env = gym.make(env_name) self.task_horizon = TASK_HORIZON self.num_nets = num_nets self.agent = Agent(self.env) mpc_params['use_gt_dynamics'] = False self.model = PENN(num_nets, STATE_DIM, len(self.env.action_space.sample()), LR) self.cem_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params, use_random_optimizer=False) self.random_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params, use_random_optimizer=True) self.random_policy_no_mpc = RandomPolicy(len(self.env.action_space.sample())) if exp_dir: self.exp_dir = os.path.join(os.getcwd(),exp_dir) else: self.exp_dir = os.getcwd() self.create_dirs(self.exp_dir) def test(self, num_episodes, optimizer='cem'): samples = [] print("optimizer: {}".format(optimizer)) for j in range(num_episodes): print('Test episode {}'.format(j)) samples.append( self.agent.sample( self.task_horizon, self.cem_policy if optimizer == 'cem' else self.random_policy ) ) avg_return = np.mean([sample["reward_sum"] for sample in samples]) avg_success = np.mean([sample["rewards"][-1] == 0 for sample in samples]) return avg_return, avg_success def model_warmup(self, num_episodes, num_epochs): """ Train a single probabilistic model using a random policy """ samples = [] for i in range(num_episodes): samples.append(self.agent.sample(self.task_horizon, self.random_policy_no_mpc)) epoch_loss_arr, epoch_rmse_arr = self.cem_policy.train( [sample["obs"] for sample in samples], [sample["ac"] for sample in samples], [sample["rewards"] for sample in samples], epochs=num_epochs ) self.save_data(epoch_loss_arr,"loss_warmup",self.exp_dir) self.save_data(epoch_rmse_arr,"rmse_warmup",self.exp_dir) self.plot_prop(epoch_loss_arr,"loss_warmup",self.exp_dir) self.plot_prop(epoch_rmse_arr,"rmse_warmup",self.exp_dir) def train(self, num_train_epochs, num_episodes_per_epoch, evaluation_interval): """ Jointly training the model and the policy """ random_SR = [] cem_SR = [] loss_arr = [] rmse_arr = [] for i in range(num_train_epochs): print("####################################################################") print("Starting training epoch %d." % (i + 1)) samples = [] for j in range(num_episodes_per_epoch): samples.append( self.agent.sample( self.task_horizon, self.cem_policy ) ) print("Rewards obtained:", [sample["reward_sum"] for sample in samples]) loss_arr_curr, rmse_arr_curr = self.cem_policy.train( [sample["obs"] for sample in samples], [sample["ac"] for sample in samples], [sample["rewards"] for sample in samples], epochs=5 ) loss_arr.append(np.mean(loss_arr_curr)) rmse_arr.append(np.mean(rmse_arr_curr)) print("mean_loss: {}".format(np.mean(loss_arr_curr))) print("mean_rmse: {}".format(np.mean(rmse_arr_curr))) self.save_data(loss_arr,"loss",self.exp_dir) self.save_data(rmse_arr,"rmse",self.exp_dir) if (i + 1) % evaluation_interval == 0: avg_return, avg_success = self.test(20, optimizer='cem') cem_SR.append(avg_success*1.0) print('Test success CEM + MPC:', avg_success) avg_return, avg_success = self.test(20, optimizer='random') random_SR.append(avg_success*1.0) print('Test success Random + MPC:', avg_success) self.save_data(cem_SR,"SR_CEM",self.exp_dir) self.save_data(random_SR,"SR_random",self.exp_dir) self.plot_prop(cem_SR,"SR_CEM",self.exp_dir) self.plot_prop(random_SR,"SR_random",self.exp_dir) self.save_data(cem_SR,"SR_CEM",self.exp_dir) self.save_data(random_SR,"SR_random",self.exp_dir) self.plot_prop(cem_SR,"SR_CEM",self.exp_dir) self.plot_prop(random_SR,"SR_random",self.exp_dir) def plot_prop(self,prop,prop_name,plots_path): fig = plt.figure(figsize=(16, 9)) plt.plot(prop,label=prop_name,color='navy') plt.xlabel("num_epochs / 50") plt.ylabel(prop_name) plt.legend() plt.savefig(os.path.join(plots_path,"{}.png".format(prop_name))) plt.clf() plt.close() def save_data(self,prop,prop_name,save_dir): np.save(os.path.join(save_dir,prop_name+".npy"),prop) def create_dirs(self,path): if not os.path.exists(path): os.mkdir(path)
class ExperimentModelDynamics: def __init__(self, env_name='Pushing2D-v1', num_nets=1, mpc_params=None): self.env = gym.make(env_name) self.task_horizon = TASK_HORIZON self.agent = Agent(self.env) mpc_params['use_gt_dynamics'] = False self.model = PENN(num_nets, STATE_DIM, len(self.env.action_space.sample()), LR) self.cem_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params, use_random_optimizer=False) self.random_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params, use_random_optimizer=True) self.random_policy_no_mpc = RandomPolicy(len(self.env.action_space.sample())) def test(self, num_episodes, optimizer='cem'): samples = [] for j in range(num_episodes): print('Test episode {}'.format(j)) samples.append( self.agent.sample( self.task_horizon, self.cem_policy if optimizer == 'cem' else self.random_policy ) ) avg_return = np.mean([sample["reward_sum"] for sample in samples]) avg_success = np.mean([sample["rewards"][-1] == 0 for sample in samples]) return avg_return, avg_success def model_warmup(self, num_episodes, num_epochs): """ Train a single probabilistic model using a random policy """ samples = [] for i in range(num_episodes): samples.append(self.agent.sample(self.task_horizon, self.random_policy_no_mpc)) self.cem_policy.train( [sample["obs"] for sample in samples], [sample["ac"] for sample in samples], [sample["rewards"] for sample in samples], epochs=num_epochs ) def train(self, num_train_epochs, num_episodes_per_epoch, evaluation_interval): """ Jointly training the model and the policy """ for i in range(num_train_epochs): print("####################################################################") print("Starting training epoch %d." % (i + 1)) samples = [] for j in range(num_episodes_per_epoch): samples.append( self.agent.sample( self.task_horizon, self.cem_policy ) ) print("Rewards obtained:", [sample["reward_sum"] for sample in samples]) self.cem_policy.train( [sample["obs"] for sample in samples], [sample["ac"] for sample in samples], [sample["rewards"] for sample in samples], epochs=5 ) if (i + 1) % evaluation_interval == 0: avg_return, avg_success = self.test(50, optimizer='cem') print('Test success CEM + MPC:', avg_success) avg_return, avg_success = self.test(50, optimizer='random') print('Test success Random + MPC:', avg_success)
from mpc import MPC from control_room import ControlRoom import cvxpy as cp from cvxpy.atoms.elementwise.abs import abs as cp_abs import random random.seed(3) # construct MPC class mpc = MPC( ctr_mu=ctr_mu, n_slots=n_slots, ad_opportunities_params=ad_opportunities_params, ad_opportunities_rate_initial=ad_opportunities_rate_initial, b_star_params=b_star_params, b_star_initial=b_star_initial, ctr_params=ctr_params, ctr_initial=ctr_initial, cov=cov, bid_price_initial=bid_price_initial, bid_uncertainty_initial=bid_uncertainty_initial ) # 0. Initialize campaign without MPC informed bidding for i in range(100): ad_data = mpc.simulate_data() cost = ad_data["cost"] # Update historic cost data past_costs = mpc.update_history(past_costs, cost)
obs5 = Obstacle(cx=0.27, cy=-1.0, radius=0.05) obs6 = Obstacle(cx=0.78, cy=-1.47, radius=0.05) obs7 = Obstacle(cx=0.73, cy=-0.9, radius=0.07) obs8 = Obstacle(cx=1.2, cy=0.0, radius=0.08) obs9 = Obstacle(cx=0.67, cy=-0.05, radius=0.06) map.add_obstacles([obs1, obs2, obs3, obs4, obs5, obs6, obs7, obs8, obs9]) """ Get configured do-mpc modules: """ vehicle = simple_bycicle_model(length=0.12, width=0.06, Ts=0.05, reference_path=reference_path) model = vehicle.model controller = MPC(vehicle) mpc = controller.mpc simulator = Simulator(vehicle).simulator # Compute speed profile ay_max = 4.0 # m/s^2 a_min = -0.1 # m/s^2 a_max = 0.5 # m/s^2 SpeedProfileConstraints = { 'a_min': a_min, 'a_max': a_max, 'v_min': 0.0, 'v_max': 1.0, 'ay_max': ay_max }
class Experiment: def __init__(self): self.env = gym.make('Pushing2D-v1') self.task_hor = TASK_HORIZON self.agent = Agent(self.env) self.model = PENN(NUM_NETS, STATE_DIM, ACTION_DIM, LR) self.policy = MPC(self.env, NUM_PARTICLES, PLAN_HOR, self.model, POPSIZE, NUM_ELITES, MAX_ITERS) def test(self, num_episodes): samples = [] for j in range(num_episodes): samples.append( self.agent.sample( self.task_hor, self.policy ) ) print("Rewards obtained:", np.mean([sample["reward_sum"] for sample in samples])) print("Percent success:", np.mean([sample["rewards"][-1]==0 for sample in samples])) return np.mean([sample["rewards"][-1]==0 for sample in samples]) def train(self): traj_obs, traj_acs, traj_rets, traj_rews = [], [], [], [] test_results = [] samples = [] rand_pol = RandomPolicy(2) for i in range(NINIT_ROLLOUTS): samples.append(self.agent.sample(self.task_hor, rand_pol)) traj_obs.append(samples[-1]["obs"]) traj_acs.append(samples[-1]["ac"]) traj_rews.append(samples[-1]["rewards"]) if NINIT_ROLLOUTS>0: self.policy.train( [sample["obs"] for sample in samples], [sample["ac"] for sample in samples], [sample["rewards"] for sample in samples], epochs=10 ) for i in range(NTRAIN_ITERS): print("####################################################################") print("Starting training iteration %d." % (i + 1)) samples = [] for j in range(NROLLOUTS_PER_ITER): samples.append( self.agent.sample( self.task_hor, self.policy ) ) print("Rewards obtained:", [sample["reward_sum"] for sample in samples]) traj_obs.extend([sample["obs"] for sample in samples]) traj_acs.extend([sample["ac"] for sample in samples]) traj_rets.extend([sample["reward_sum"] for sample in samples]) traj_rews.extend([sample["rewards"] for sample in samples]) if(i % 50 == 0): self.model.save_models() test_results.append((i,self.test(20))) test_file = open("test_graph.txt","w") test_file.writelines([str(epoch) + "," + str(result) + "\n" for (epoch,result) in test_results]) test_file.close() self.policy.train( [sample["obs"] for sample in samples], [sample["ac"] for sample in samples], [sample["rewards"] for sample in samples] )
class ExperimentModelDynamics: def __init__(self, env_name='Pushing2D-v1', num_nets=1, mpc_params=None): self.env = gym.make(env_name) self.env.seed(1024) self.task_horizon = TASK_HORIZON self.agent = Agent(self.env) mpc_params['use_gt_dynamics'] = False self.model = PENN(num_nets, STATE_DIM, len(self.env.action_space.sample()), LR) self.cem_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params, use_random_optimizer=False) self.random_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params, use_random_optimizer=True) self.random_policy_no_mpc = RandomPolicy( len(self.env.action_space.sample())) def test(self, num_episodes, optimizer='cem'): samples = [] for j in range(num_episodes): print('Test episode {}'.format(j)) samples.append( self.agent.sample( self.task_horizon, self.cem_policy if optimizer == 'cem' else self.random_policy)) avg_return = np.mean([sample["reward_sum"] for sample in samples]) avg_success = np.mean( [sample["rewards"][-1] == 0 for sample in samples]) return avg_return, avg_success def model_warmup(self, num_episodes, num_epochs): """ Train a single probabilistic model using a random policy """ samples = [] for i in range(num_episodes): samples.append( self.agent.sample(self.task_horizon, self.random_policy_no_mpc)) obs = [sample["obs"] for sample in samples] ac = [sample["ac"] for sample in samples] rewards = [sample["rewards"] for sample in samples] self.cem_policy.train(obs, ac, rewards, epochs=num_epochs, save_fig=True) return samples def train(self, num_train_epochs, num_episodes_per_epoch, evaluation_interval, samples_): """ Jointly training the model and the policy """ samples = deque(maxlen=100) samples.extend(samples_) print('Sample size:', len(samples)) rmse_list, loss_list = [], [] for i in range(num_train_epochs): print( "####################################################################" ) print("Starting training epoch %d." % (i + 1)) # samples = [] for j in range(num_episodes_per_epoch): samples.append( self.agent.sample(self.task_horizon, self.cem_policy)) # print("Rewards obtained:", [sample["reward_sum"] for sample in samples]) rmse, loss = self.cem_policy.train( [sample["obs"] for sample in samples], [sample["ac"] for sample in samples], [sample["rewards"] for sample in samples], epochs=5, save_fig=False) rmse_list.append(rmse) loss_list.append(loss) if (i + 1) % evaluation_interval == 0: self.model.saver.save( self.model.sess, './model/v1/1.3/model_' + str(i) + '.ckpt') avg_return, avg_success = self.test(20, optimizer='cem') print('Test success CEM + MPC:', avg_success) avg_return, avg_success = self.test(20, optimizer='random') print('Test success Random + MPC:', avg_success) plt.figure(3) plt.plot(range(len(rmse_list)), rmse_list) plt.xlabel('Epochs') plt.ylabel("RMSE") plt.savefig('rmse_.png') plt.figure(4) plt.plot(range(len(loss_list)), loss_list) plt.xlabel('Epochs') plt.ylabel("Gauss Loss") plt.savefig('GLoss_.png')
import time import numpy as np import serial.tools.list_ports import scipy.linalg as linalg import matplotlib.pyplot as plt from analyzer import EncoderAnalyzer, ImageAnalyzer from motor_control import SabreControl from cart_pole import CartPole import serial.tools.list_ports from mpc import MPC mpc = MPC(0.5, 0, 0, 0) ports = list(serial.tools.list_ports.comports()) print(ports) for p in ports: print(p) if p[2] == "USB VID:PID=268b:0201 SNR=160045F45953": sabre_port = p[0] elif p[2] == "USB VID:PID=2341:0043 SNR=75334323935351D0D022": ard_port = p[0] motor = SabreControl(port=sabre_port) encoder = EncoderAnalyzer(port=ard_port) #image = ImageAnalyzer(0,show_image=True) cart = CartPole(motor, encoder, encoder, encoder) cart.zeroAngleAnalyzer() #encoder.setAngleZero()
class ExperimentModelDynamics: def __init__(self, env_name='Pushing2D-v1', num_nets=1, mpc_params=None): self.env = gym.make(env_name) self.task_horizon = TASK_HORIZON self.agent = Agent(self.env) mpc_params['use_gt_dynamics'] = False self.model = PENN(num_nets, STATE_DIM, len(self.env.action_space.sample()), LR) self.cem_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params, use_random_optimizer=False) self.random_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params, use_random_optimizer=True) self.random_policy_no_mpc = RandomPolicy( len(self.env.action_space.sample())) def test(self, num_episodes, optimizer='cem'): samples = list() for j in range(num_episodes): print('Test episode {}'.format(j + 1)) samples.append( self.agent.sample( self.task_horizon, self.cem_policy if optimizer == 'cem' else self.random_policy)) avg_return = np.mean([sample["reward_sum"] for sample in samples]) avg_success = np.mean( [sample["rewards"][-1] == 0 for sample in samples]) return avg_return, avg_success def model_warmup(self, num_episodes, num_epochs): """Train a single probabilistic model using a random policy """ samples = list() for i in range(num_episodes): samples.append( self.agent.sample(self.task_horizon, self.random_policy_no_mpc)) print('') print('*' * 20, 'TRAINING', '*' * 20) self.cem_policy.train([sample["obs"] for sample in samples], [sample["ac"] for sample in samples], [sample["rewards"] for sample in samples], epochs=num_epochs) def train(self, num_train_epochs, num_episodes_per_epoch, evaluation_interval, use_buffer=True): """Jointly training the model and the policy """ def save(filename, arr): np.save(filename, arr) # Buffer has a max size of 100 episodes worth of data obs_buffer = deque(maxlen=40 * 100) ac_buffer = deque(maxlen=40 * 100) rewards_buffer = deque(maxlen=40 * 100) success_cem = list() success_random = list() success_name_cem = 'success_pets_cem.npy' success_name_ran = 'success_pets_random.npy' for i in range(num_train_epochs): print("Starting training epoch %d." % (i + 1)) # Collect data using the current dynamics model samples = list() for j in range(num_episodes_per_epoch): samples.append( self.agent.sample(self.task_horizon, self.cem_policy)) # Update replay buffers new_obs = [sample["obs"] for sample in samples] new_ac = [sample["ac"] for sample in samples] new_rewards = [sample["rewards"] for sample in samples] obs_buffer.extend(new_obs) ac_buffer.extend(new_ac) rewards_buffer.extend(new_rewards) print("Rewards obtained:", [sample["reward_sum"] for sample in samples]) # Train the dynamics model using the data if use_buffer: self.cem_policy.train(obs_buffer, ac_buffer, rewards_buffer, epochs=5) else: self.cem_policy.train(new_obs, new_ac, new_rewards, epochs=5) # Test num_test_episodes = 20 if (i + 1) % evaluation_interval == 0: avg_return, avg_success_cem = self.test(num_test_episodes, optimizer='cem') print('Test success CEM + MPC:', avg_success_cem) success_cem.append(avg_success_cem) save(success_name_cem, success_cem) avg_return, avg_success_ran = self.test(num_test_episodes, optimizer='random') print('Test success Random + MPC:', avg_success_ran) success_random.append(avg_success_ran) save(success_name_ran, success_random)
# pendulum = Pendulum(window) # pendulum.set_properties(m=1.0, g=-9.8, L=0.25, k=0.1) # pendulum.set_position(0.5, 0.5) # pendulum.set_rotation(np.pi * 0.95) # pendulum.integration_type = pendulum.RUNGE_KUTTA # pendulum.nonlinear_mode = True # # cart = CartPole(window) # cart.set_properties(m=0.2, M=0.5, I=0.006, g=-9.8, l=0.25, b=0.5) # cart.set_position(0.5, 0.5) # cart.set_rotation(np.pi * 0.99) # cart.nonlinear_mode = True # cart.integration_type = cart.RUNGE_KUTTA mpc = MPC(dt, mpc_horizon) # This works for springdamper and pendulum R = np.matrix(np.eye(2) * 1.0e-4) Q = np.matrix(np.matrix([[100, 0], [0.0, 0.0]])) T = np.matrix(np.eye(2) * 1e-4) mpc.set_cost_weights(Q, R, T) mpc.Y_prime = np.matrix(np.zeros(shape=(2, mpc_horizon))) mpc.Y_prime[0, :] = 0.7 mpc.C = np.matrix(np.eye(2)) mpc.set_body(spring_damper) Xs = np.zeros(shape=(mpc.body.get_state().shape[0], time.shape[0])) Us = np.zeros(shape=(mpc.body.u.shape[0], time.shape[0]))
class ExperimentModelDynamics: def __init__(self, env_name='Pushing2D-v1', num_nets=1, mpc_params=None): self.env = gym.make(env_name) # self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.device = torch.device('cpu') self.task_horizon = TASK_HORIZON # Tensorboard logging. self.timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') self.environment_name = "pusher" self.logdir = 'logs/%s/%s' % (self.environment_name, self.timestamp) self.summary_writer = SummaryWriter(self.logdir) self.agent = Agent(self.env) mpc_params['use_gt_dynamics'] = False self.model = PENN(num_nets, STATE_DIM, len(self.env.action_space.sample()), LR, self.device, self.summary_writer, self.timestamp, self.environment_name) self.cem_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, use_random_optimizer=False, **mpc_params) self.random_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, use_random_optimizer=True, **mpc_params) self.random_policy_no_mpc = RandomPolicy( len(self.env.action_space.sample())) def test(self, num_episodes, optimizer='cem'): samples = [] for j in range(num_episodes): samples.append( self.agent.sample( self.task_horizon, self.cem_policy if optimizer == 'cem' else self.random_policy)) print('Test episode {}: {}'.format(j, samples[-1]["rewards"][-1])) avg_return = np.mean([sample["reward_sum"] for sample in samples]) avg_success = np.mean( [sample["rewards"][-1] == 0 for sample in samples]) print('MPC PushingEnv: avg_reward: {}, avg_success: {}'.format( avg_return, avg_success)) return avg_return, avg_success def model_warmup(self, num_episodes, num_epochs): """ Train a single probabilistic model using a random policy """ samples = [] for i in range(num_episodes): print("Warmup Episode %d" % (i + 1)) samples.append( self.agent.sample(self.task_horizon, self.random_policy_no_mpc)) self.cem_policy.train([sample["obs"] for sample in samples], [sample["ac"] for sample in samples], [sample["rewards"] for sample in samples], epochs=num_epochs) def train(self, num_train_epochs, num_episodes_per_epoch, evaluation_interval): """ Jointly training the model and the policy """ samples = [] for i in range(num_train_epochs): print( "####################################################################" ) print("Starting training epoch %d." % (i + 1)) for j in range(num_episodes_per_epoch): new_sample = self.agent.sample(self.task_horizon, self.cem_policy) samples.append(new_sample) if (len(samples) > 10 * num_episodes_per_epoch): samples = samples[num_episodes_per_epoch:] print("Rewards obtained:", [sample["reward_sum"] for sample in samples]) self.cem_policy.train([sample["obs"] for sample in samples], [sample["ac"] for sample in samples], [sample["rewards"] for sample in samples], epochs=5) if (i + 1) % evaluation_interval == 0: cem_avg_return, cem_avg_success = self.test(20, optimizer='cem') print('Test success CEM + MPC:', cem_avg_success) rand_avg_return, rand_avg_success = self.test( 20, optimizer='random') print('Test success Random + MPC:', rand_avg_success) self.summary_writer.add_scalar("test/CEM-AverageSuccess", cem_avg_success, i) self.summary_writer.add_scalar("test/Rand-AverageSuccess", rand_avg_success, i) self.summary_writer.add_scalar("test/CEM-AverageReturn", cem_avg_return, i) self.summary_writer.add_scalar("test/Rand-AverageReturn", rand_avg_return, i)
fig, ax = plt.subplots(nrows=2, ncols=1) ax[0].plot(t_des, q_des[0].T, 'o') ax[0].plot(t_fine, y_fine[0].T) ax[0].set_title('Desired Position') ax[1].plot(t_des, q_des[1].T, 'o') ax[1].plot(t_fine, y_fine[1].T) ax[1].set_title('Desired Angle') plt.show() # initialize MPC controller = MPC(m1=m1, m2=m2, l=l, g=g, u_max=u_max, d_max=d_max, tf=tf, N=N, Q=Q, Qf=Qf, R=R, verbose=verbose) # cartpole dynamics used in integration def qddot(x, u): q1ddot = (l*m2*np.sin(x[1])*x[3]**2 + u + \ m2*g*np.cos(x[1])*np.sin(x[1]))/ \ (m1 + m2*(1 - np.cos(x[1])**2)) q2ddot = -1*(l*m2*np.cos(x[1])*np.sin(x[1])*x[3]**2 \ + u*np.cos(x[1]) + (m1 + m2)*g*np.sin(x[1])) / \ (l*m1 + l*m2*(1 - np.cos(x[1])**2))
P = np.matrix(scipy.linalg.solve_discrete_are(A, B, Q, R)) # Instantiate controller max_fz = 4 * Quadcopter_orginal.m * Quadcopter_orginal.g max_tau = (max_fz / 2) * (17.5 / 100) tau_z = 0.01 ctl = MPC( model=Quadcopter_orginal, dynamics=Quadcopter_orginal.discrete_time_dynamics, # Linear MPC Q=Q, R=R, P=P, horizon=0.5, ulb=[-max_fz, -max_tau, -max_tau, -tau_z], uub=[max_fz, max_tau, max_tau, tau_z], xlb=[ -np.inf, -np.inf, -np.inf, -np.inf, -np.inf, -np.inf, -np.pi / 2, -np.pi / 2, -np.pi / 2, -np.inf, -np.inf, -np.inf ], xub=[ np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.pi / 2, np.pi / 2, np.pi / 2, np.inf, np.inf, np.inf ], integrator=False) ctl.set_reference(x_sp=np.array([0, 0, 0.0, 0, 0, 0, 0, 0, 0, 0, 0, 0])) # Q2-1 ctl.set_constant_control([0, 0, 0, 0]) sim_env = EmbeddedSimEnvironment( model=Quadcopter_orginal,
# pendulum = Pendulum(window) # pendulum.set_properties(m=1.0, g=-9.8, L=0.25, k=0.1) # pendulum.set_position(0.5, 0.5) # pendulum.set_rotation(np.pi * 0.95) # pendulum.integration_type = pendulum.RUNGE_KUTTA # pendulum.nonlinear_mode = True # # cart = CartPole(window) # cart.set_properties(m=0.2, M=0.5, I=0.006, g=-9.8, l=0.25, b=0.5) # cart.set_position(0.5, 0.5) # cart.set_rotation(np.pi * 0.99) # cart.nonlinear_mode = True # cart.integration_type = cart.RUNGE_KUTTA mpc = MPC(dt, mpc_horizon) # This works for springdamper and pendulum R = np.matrix(np.eye(2) * 1.0e-4) Q = np.matrix(np.matrix([[100, 0], [0.0, 0.0]])) T = np.matrix(np.eye(2) * 1e-4) mpc.set_cost_weights(Q, R, T) mpc.Y_prime = np.matrix(np.zeros(shape=(2, mpc_horizon))) mpc.Y_prime[0, :] = 0.7 mpc.C = np.matrix(np.eye(2)) mpc.set_body(spring_damper) Xs = np.zeros(shape=(mpc.body.get_state().shape[0], time.shape[0]))
save(folder + 'ftocp_sPred.npy', ftocp.sPred) save(folder + 'ftocp_xPred.npy', ftocp.xPred) save(folder + 'ftocp_sPred.npy', ftocp.sPred) pdb.set_trace() if newCost == 1: Q = 10**(-4) * np.diag([1, 10, 1, 1]) R = 10**(-4) * np.diag([1, 0.1]) maxIt = 41 else: maxIt = 1 if tracking == 0: mpc = MPC(N_mpc, n, d, Q, R, Qf, xlb, xub, ulb, uub, dt, xref) else: Q = 100 * np.diag([1, 1, 1, 1]) R = np.diag([1, 1]) mpc = MPC_tracking(N_mpc, n, d, Q, R, Qf, xlb, xub, ulb, uub, dt) mpc.addTraj(ftocp.xPred, ftocp.uPred, ftocp.sPred, region) tMax = 600 pltFlag = 0 IC = [] IC.append(np.array([0.0, 0.0, 0.0, 0.0])) # 0 IC.append(np.array([0.0, -0.03, 0.0, 0.0])) # 1 IC.append(np.array([0.005, 0.0, 0.0, 0.0])) # 2 IC.append(np.array([0.0, 0.0, 0.0, 0.2])) # 3