Exemplos de MPC em Python, exemplos de mpc.MPC em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: run.py Projeto: uzair789/Model_based_RL

    def __init__(self, env_name='Pushing2D-v1', mpc_params=None):
        self.env = gym.make(env_name)
        self.task_horizon = TASK_HORIZON

        self.agent = Agent(self.env)
        # Does not need model
        self.warmup = False
        mpc_params['use_gt_dynamics'] = True
        self.cem_policy = MPC(self.env, PLAN_HORIZON, None, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params,
                              use_random_optimizer=False)
        self.random_policy = MPC(self.env, PLAN_HORIZON, None, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params,
                                 use_random_optimizer=True)

Exemplo n.º 2

0

Exibir arquivo

    def __init__(self, A, B, C, Q, R, RD, umin, umax, N):
        self.A = A
        self.B = B
        self.C = C
        self.num_outputs = C.shape[0]
        self.num_inputs = B.shape[1]

        if self.num_inputs == self.num_outputs == 1:
            self.mtype = 0

        else:
            self.mtype = 1

        self.mpc = MPC(A, B, C, Q, R, RD, umin, umax, N)

Exemplo n.º 3

0

Exibir arquivo

    def __init__(self, env_name='Pushing2D-v1', num_nets=1, mpc_params=None):
        self.env = gym.make(env_name)
        self.task_horizon = TASK_HORIZON

        self.agent = Agent(self.env)
        mpc_params['use_gt_dynamics'] = False
        self.model = PENN(num_nets, STATE_DIM, len(self.env.action_space.sample()), LR)
        self.cem_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params,
                              use_random_optimizer=False)
        self.random_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params,
                                 use_random_optimizer=True)
        self.random_policy_no_mpc = RandomPolicy(len(self.env.action_space.sample()))

        self.log_cem_success = []
        self.log_rnd_success = []

Exemplo n.º 4

0

Exibir arquivo

    def __init__(self, A, B, C, Q, R, RD, umin, umax, N):
        self.A = A
        self.B = B
        self.C = C
        self.num_outputs = C.shape[0]
        self.num_inputs = B.shape[1]

        if self.num_inputs == self.num_outputs == 1:
            self.mtype = 0
        
        else:
            self.mtype = 1

        self.mpc = MPC(A, B, C, Q, R, RD, umin, umax, N)

        plt.rcParams['savefig.facecolor'] = 'xkcd:black'

Exemplo n.º 5

0

Exibir arquivo

Arquivo: run.py Projeto: harshsha5/MPC-PETS

    def __init__(self, env_name='Pushing2D-v1', mpc_params=None):
        self.env = gym.make(env_name)
        self.task_horizon = TASK_HORIZON

        self.agent = Agent(self.env)
        # Does not need model
        self.warmup = False
        mpc_params['use_gt_dynamics'] = True

        if (mpc_params['use_mpc']):
            self.cem_policy = MPC(self.env,
                                  PLAN_HORIZON,
                                  None,
                                  POPSIZE,
                                  NUM_ELITES,
                                  MAX_ITERS,
                                  INITIAL_MU,
                                  INITIAL_SIGMA,
                                  **mpc_params,
                                  use_random_optimizer=False)
            self.random_policy = MPC(self.env,
                                     PLAN_HORIZON,
                                     None,
                                     POPSIZE,
                                     NUM_ELITES,
                                     MAX_ITERS,
                                     INITIAL_MU,
                                     INITIAL_SIGMA,
                                     **mpc_params,
                                     use_random_optimizer=True)
        else:
            self.cem_policy = CEMPolicy(
                self.env, len(self.env.action_space.low), INITIAL_MU,
                INITIAL_SIGMA, PLAN_HORIZON, POPSIZE, NUM_ELITES, MAX_ITERS,
                self.env.action_space.high, self.env.action_space.low,
                mpc_params['use_gt_dynamics'])
            self.random_policy = RandomPolicy(self.env,
                                              len(self.env.action_space.low),
                                              INITIAL_MU, INITIAL_SIGMA,
                                              PLAN_HORIZON, POPSIZE, MAX_ITERS,
                                              self.env.action_space.high,
                                              self.env.action_space.low,
                                              mpc_params['use_gt_dynamics'])

Exemplo n.º 6

0

Exibir arquivo

    def __init__(self,
                 env_name='Pushing2D-v1',
                 num_nets=1,
                 mpc_params=None,
                 exp_dir=None):
        self.env = gym.make(env_name)
        self.task_horizon = TASK_HORIZON
        self.num_nets = num_nets
        self.agent = Agent(self.env)
        mpc_params['use_gt_dynamics'] = False

        self.model = PENN(num_nets, STATE_DIM,
                          len(self.env.action_space.sample()), LR)

        self.cem_policy = MPC(self.env,
                              PLAN_HORIZON,
                              self.model,
                              POPSIZE,
                              NUM_ELITES,
                              MAX_ITERS,
                              **mpc_params,
                              use_random_optimizer=False)

        self.random_policy = MPC(self.env,
                                 PLAN_HORIZON,
                                 self.model,
                                 POPSIZE,
                                 NUM_ELITES,
                                 MAX_ITERS,
                                 **mpc_params,
                                 use_random_optimizer=True)

        self.random_policy_no_mpc = RandomPolicy(
            len(self.env.action_space.sample()))
        if exp_dir:
            self.exp_dir = os.path.join(os.getcwd(), exp_dir)
        else:
            self.exp_dir = os.getcwd()

        self.create_dirs(self.exp_dir)
        self.samples = []

Exemplo n.º 7

0

Exibir arquivo

Arquivo: run.py Projeto: ykarmesh/Deep-RL-Assignments

    def __init__(self, env_name='Pushing2D-v1', num_nets=1, mpc_params=None):
        self.env = gym.make(env_name)
        # self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.device = torch.device('cpu')

        self.task_horizon = TASK_HORIZON

        # Tensorboard logging.
        self.timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
        self.environment_name = "pusher"
        self.logdir = 'logs/%s/%s' % (self.environment_name, self.timestamp)
        self.summary_writer = SummaryWriter(self.logdir)

        self.agent = Agent(self.env)
        mpc_params['use_gt_dynamics'] = False
        self.model = PENN(num_nets, STATE_DIM,
                          len(self.env.action_space.sample()), LR, self.device,
                          self.summary_writer, self.timestamp,
                          self.environment_name)
        self.cem_policy = MPC(self.env,
                              PLAN_HORIZON,
                              self.model,
                              POPSIZE,
                              NUM_ELITES,
                              MAX_ITERS,
                              use_random_optimizer=False,
                              **mpc_params)
        self.random_policy = MPC(self.env,
                                 PLAN_HORIZON,
                                 self.model,
                                 POPSIZE,
                                 NUM_ELITES,
                                 MAX_ITERS,
                                 use_random_optimizer=True,
                                 **mpc_params)
        self.random_policy_no_mpc = RandomPolicy(
            len(self.env.action_space.sample()))

Exemplo n.º 8

0

Exibir arquivo

    def __init__(self, moos_community, moos_port):
        """Initiates MOOSComms, sets the callbacks and runs the loop"""
        super(mpcMOOS, self).__init__()
        self.server = moos_community
        self.port = moos_port
        self.name = 'mpcMOOS'

        self.lock = threading.Lock()

        self.set_on_connect_callback(self.__on_connect)
        self.set_on_mail_callback(self.__on_new_mail)

        self.add_active_queue('path_queue', self.on_path)
        self.add_message_route_to_active_queue('path_queue', 'PATH_X')
        self.add_message_route_to_active_queue('path_queue', 'PATH_Y')

        self.add_active_queue('control_queue', self.on_vessel_state)
        self.add_message_route_to_active_queue('control_queue', 'VESSEL_STATE')
        self.path_x = []
        self.path_y = []
        self.mpc = MPC()
        print('MPC created')

        self.run(self.server, self.port, self.name)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: brush.py Projeto: rmatsum836/mbuild-examples

    def __init__(self, chain_length=4, alpha=pi / 4):
        super(Brush, self).__init__()

        # Add parts
        self.add(Silane(), label='silane')
        self.add(Initiator(), label='initiator')
        self.add(mb.recipes.Polymer(MPC(alpha=alpha),
                                    n=chain_length,
                                    port_labels=('up', 'down')),
                 label='pmpc')
        self.add(CH3(), label='methyl')

        mb.force_overlap(self['initiator'], self['initiator']['down'],
                         self['silane']['up'])
        mb.force_overlap(self['pmpc'], self['pmpc']['down'],
                         self['initiator']['up'])
        mb.force_overlap(self['methyl'], self['methyl']['up'],
                         self['pmpc']['up'])

        # Make self.port point to silane.bottom_port
        self.add(self['silane']['down'], label='down', containment=False)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: pyblaster2.py Projeto: ujac81/PiBlaster2

class PyBlaster:
    """Daemon for PiBlaster project"""

    def __init__(self):
        """Whole project is run from this constructor
        """

        # +++++++++++++++ Init +++++++++++++++ #

        self.keep_run = 1  # used in run for daemon loop, reset by SIGTERM
        self.ret_code = 0  # return code to command line (10 = shutdown)

        # +++++++++++++++ Objects +++++++++++++++ #

        # Each inner object will get reference to PyBlaster as self.main.

        # exceptions in child threads are put here
        self.ex_queue = queue.Queue()

        self.log = Log(self)
        self.settings = Settings(self)
        self.settings.parse()

        PB_GPIO.init_gpio(self)
        self.led = LED(self)
        self.buttons = Buttons(self)
        self.lirc = Lirc(self)
        self.dbhandle = DBHandle(self)
        self.cmd = Cmd(self)
        self.mpc = MPC(self)
        self.bt = RFCommServer(self)
        self.alsa = AlsaMixer(self)
        self.i2c = I2C(self)
        self.usb = UsbDrive(self)

        # +++++++++++++++ Init Objects +++++++++++++++ #

        # Make sure to run init functions in proper order!
        # Some might depend upon others ;)

        self.led.init_leds()
        self.dbhandle.dbconnect()
        self.mpc.connect()
        self.bt.start_server_thread()
        self.alsa.init_alsa()
        self.i2c.open_bus()
        self.usb.start_uploader_thread()

        # +++++++++++++++ Daemoninze +++++++++++++++ #

        self.check_pidfile()
        self.daemonize()
        self.create_pidfile()

        # +++++++++++++++ Daemon loop +++++++++++++++ #

        self.led.show_init_done()
        self.buttons.start()
        self.lirc.start()
        self.run()

        # +++++++++++++++ Finalize +++++++++++++++ #

        self.log.write(log.MESSAGE, "Joining threads...")

        # join remaining threads
        self.usb.join()
        self.bt.join()
        self.buttons.join()
        self.lirc.join()
        self.mpc.join()
        self.led.join()

        self.log.write(log.MESSAGE, "leaving...")

        # cleanup
        self.mpc.exit_client()
        self.delete_pidfile()
        PB_GPIO.cleanup(self)

    def run(self):
        """Daemon loop"""

        # Expensive operations like new usb drive check
        # should not be run every loop run.
        poll_count = 0
        led_count = 1

        # -e flag is set, run only init and exit directly.
        # self.keep_run = 0 if self.settings.exitafterinit else 1

        # # # # # # DAEMON LOOP ENTRY # # # # # #

        self.log.write(log.MESSAGE, "Entering daemon loop...")

        while self.keep_run:

            poll_count += 1

            time.sleep(50. / 1000.)  # 50ms default in config

            self.buttons.read_buttons()
            self.mpc.process_idler_events()
            self.lirc.read_lirc()
            self.bt.check_incomming_commands()

            # TODO: play LEDs while playing -- if paused, do something else...

            if poll_count % 10 == 0:
                self.led.play_leds(led_count)
                led_count += 1

            # try:
            #     exc = self.ex_queue.get(block=False)
            # except queue.Empty:
            #     pass
            # else:
            #     exc_type, exc_obj, exc_trace = exc
            #     print(exc_type, exc_obj)
            #     print(exc_trace)
            #     self.ret_code = 1
            #     self.keep_run = False
            #     self.led.indicate_error()

            # end daemon loop #

        # # # # # # DAEMON LOOP EXIT # # # # # #

    def daemonize(self):
        """Fork process and disable print in log object"""

        signal.signal(signal.SIGTERM, self.term_handler)
        signal.signal(signal.SIGINT, self.term_handler)

        if not self.settings.daemonize:
            self.log.init_log()
            return

        self.log.write(log.DEBUG1, "daemonizing")

        try:
            pid = os.fork()
        except OSError:
            # self.log.write(log.EMERGENCY, "Failed to fork daemon")
            raise

        if pid == 0:
            os.setsid()
            try:
                pid = os.fork()
            except OSError:
                # self.log.write(log.EMERGENCY, "Failed to fork daemon")
                raise

            if pid == 0:
                os.chdir("/tmp")
                os.umask(0)
            else:
                exit(0)
        else:
            exit(0)

        self.settings.is_daemonized = True
        self.log.init_log()
        self.log.write(log.MESSAGE, "daemonized.")

    def term_handler(self, *args):
        """ Signal handler to stop daemon loop"""
        self.log.write(log.MESSAGE, "Got TERM or INT signal -- leaving!")
        self.keep_run = 0

    def check_pidfile(self):
        """Check if daemon already running, throw if pid file found"""

        if os.path.exists(self.settings.pidfile):
            self.log.write(log.EMERGENCY, "Found pid file for pyblaster, "
                                          "another process running?")
            raise Exception("pid file found")

    def create_pidfile(self):
        """Write getpid() to file after daemonize()"""

        try:
            fpid = open(self.settings.pidfile, "w")
        except IOError:
            self.log.write(log.EMERGENCY, "failed to create pidfile %s" %
                           self.settings.pidfile)
            raise

        fpid.write("%s\n" % os.getpid())

    def delete_pidfile(self):
        """Try to remove pid file after daemon should exit"""

        if os.path.exists(self.settings.pidfile):
            try:
                os.remove(self.settings.pidfile)
            except OSError:
                self.log.write(log.EMERGENCY, "failed to remove pidfile %s" %
                               self.settings.pidfile)
                raise

    def kill_other_pyblaster(self):
        """Check if pid found in pid file and try to kill this (old) process"""

        if not os.path.exists(self.settings.pidfile):
            return

        try:
            f = open(self.settings.pidfile, "r")
        except IOError:
            self.log.write(log.EMERGENCY, "failed to read pidfile %s" %
                           self.settings.pidfile)
            raise

        pid = int(f.readline().strip())

        print("Trying to kill old process with pid %s..." % pid)

        try:
            os.kill(pid, signal.SIGTERM)
        except OSError:
            self.log.write(log.EMERGENCY,
                           "failed to kill process with pid %s" % pid)
            raise

        exit(0)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: pyblaster2.py Projeto: ujac81/PiBlaster2

    def __init__(self):
        """Whole project is run from this constructor
        """

        # +++++++++++++++ Init +++++++++++++++ #

        self.keep_run = 1  # used in run for daemon loop, reset by SIGTERM
        self.ret_code = 0  # return code to command line (10 = shutdown)

        # +++++++++++++++ Objects +++++++++++++++ #

        # Each inner object will get reference to PyBlaster as self.main.

        # exceptions in child threads are put here
        self.ex_queue = queue.Queue()

        self.log = Log(self)
        self.settings = Settings(self)
        self.settings.parse()

        PB_GPIO.init_gpio(self)
        self.led = LED(self)
        self.buttons = Buttons(self)
        self.lirc = Lirc(self)
        self.dbhandle = DBHandle(self)
        self.cmd = Cmd(self)
        self.mpc = MPC(self)
        self.bt = RFCommServer(self)
        self.alsa = AlsaMixer(self)
        self.i2c = I2C(self)
        self.usb = UsbDrive(self)

        # +++++++++++++++ Init Objects +++++++++++++++ #

        # Make sure to run init functions in proper order!
        # Some might depend upon others ;)

        self.led.init_leds()
        self.dbhandle.dbconnect()
        self.mpc.connect()
        self.bt.start_server_thread()
        self.alsa.init_alsa()
        self.i2c.open_bus()
        self.usb.start_uploader_thread()

        # +++++++++++++++ Daemoninze +++++++++++++++ #

        self.check_pidfile()
        self.daemonize()
        self.create_pidfile()

        # +++++++++++++++ Daemon loop +++++++++++++++ #

        self.led.show_init_done()
        self.buttons.start()
        self.lirc.start()
        self.run()

        # +++++++++++++++ Finalize +++++++++++++++ #

        self.log.write(log.MESSAGE, "Joining threads...")

        # join remaining threads
        self.usb.join()
        self.bt.join()
        self.buttons.join()
        self.lirc.join()
        self.mpc.join()
        self.led.join()

        self.log.write(log.MESSAGE, "leaving...")

        # cleanup
        self.mpc.exit_client()
        self.delete_pidfile()
        PB_GPIO.cleanup(self)

Exemplo n.º 12

0

Exibir arquivo

class mpcMOOS(pymoos.comms):
    def __init__(self, moos_community, moos_port):
        """Initiates MOOSComms, sets the callbacks and runs the loop"""
        super(mpcMOOS, self).__init__()
        self.server = moos_community
        self.port = moos_port
        self.name = 'mpcMOOS'

        self.lock = threading.Lock()

        self.set_on_connect_callback(self.__on_connect)
        self.set_on_mail_callback(self.__on_new_mail)

        self.add_active_queue('path_queue', self.on_path)
        self.add_message_route_to_active_queue('path_queue', 'PATH_X')
        self.add_message_route_to_active_queue('path_queue', 'PATH_Y')

        self.add_active_queue('control_queue', self.on_vessel_state)
        self.add_message_route_to_active_queue('control_queue', 'VESSEL_STATE')
        self.path_x = []
        self.path_y = []
        self.mpc = MPC()
        print('MPC created')

        self.run(self.server, self.port, self.name)

    def __on_connect(self):
        """OnConnect callback"""
        print("Connected to", self.server, self.port, "under the name ",
              self.name)
        self.register('PATH_X', 0)
        self.register('PATH_Y', 0)
        self.register('VESSEL_STATE', 0)

        return True

    def __on_new_mail(self):
        """OnNewMail callback"""

        print("on_mail activated by")

        return True

    def on_path(self, msg):
        """Special callback for path"""
        self.lock.acquire()
        try:
            # print("on_path activated by", msg.key(), "with value", msg.string())
            if msg.key() == 'PATH_X':
                # parse the path x coords
                self.path_x = msg.string().replace('[', '')
                self.path_x = self.path_x.replace(']', '')
                self.path_x = self.path_x.replace(' ', '')
                self.path_x = self.path_x.split(',')
                self.path_x = [float(i) for i in self.path_x]
                print('Received {} x coordinates'.format(len(self.path_x)))
            elif msg.key() == 'PATH_Y':
                # parse the path y coords
                self.path_y = msg.string().replace('[', '')
                self.path_y = self.path_y.replace(']', '')
                self.path_y = self.path_y.replace('array(', '')
                self.path_y = self.path_y.replace(')', '')
                self.path_y = self.path_y.replace(' ', '')
                self.path_y = self.path_y.split(',')

                self.path_y = [float(i) for i in self.path_y]
                print('Received {} y coordinates'.format(len(self.path_y)))
        finally:
            self.lock.release()
        return True

    def on_vessel_state(self, msg):
        while (len(self.path_x) == 0 or len(self.path_y) == 0):
            # wait till there is a path present
            time.sleep(0.5)
        self.lock.acquire()
        try:
            if msg.key() == 'VESSEL_STATE':
                states = msg.string().split(',')
                print('states received')
                print(states)
                vessel = Vessel(float(states[0]), float(states[1]),
                                float(states[4]), float(states[3]),
                                float(states[2]), 0.1, 180, -180)
                print('vessel object created')
                elapsed_time = []
                path_xte = []
                mpc_xte = []
                path_heading_error = []

                start = time.time()
                print('size pathx: {}'.format(len(self.path_x)))
                print('size pathy: {}'.format(len(self.path_y)))
                print(self.mpc.ready)
                rrot = self.mpc.optimize_simple(self.path_x, self.path_y,
                                                vessel)

                stop = time.time()
                print("elapsed time: {}".format(stop - start))
                elapsed_time.append(stop - start)
                self.notify('RROT', rrot, -1)
        except Exception as e:
            print(e)
        finally:
            self.lock.release()
        return True

Exemplo n.º 13

0

Exibir arquivo

class Simulator:
    def __init__(self, A, B, C, Q, R, RD, umin, umax, N):
        self.A = A
        self.B = B
        self.C = C
        self.num_outputs = C.shape[0]
        self.num_inputs = B.shape[1]

        if self.num_inputs == self.num_outputs == 1:
            self.mtype = 0

        else:
            self.mtype = 1

        self.mpc = MPC(A, B, C, Q, R, RD, umin, umax, N)

    def get_reference_trajectory(self, n, model_type=0):
        self.t = np.linspace(1, n, n)

        if model_type == 0:
            self.ref_traj = signal.square(self.t / 6)[np.newaxis, :]

        else:
            rx = 3 * np.ones(len(self.t))
            ry = np.ones(len(self.t))
            rz = signal.square(self.t / 32)
            ryaw = 2 * signal.square(self.t / 1000)

            self.ref_traj = np.row_stack((rx, ry, rz, ryaw))

    def simulate(self):

        U, X = self.establish_starting_state()

        for i in range(self.ref_traj.shape[1]):

            if i == 0:
                self.X_hist = X
                self.U_hist = U
                self.Y_hist = self.C @ X

            else:
                self.X_hist = np.column_stack((self.X_hist, X))
                self.U_hist = np.column_stack((self.U_hist, U))
                self.Y_hist = np.column_stack((self.Y_hist, self.C @ X))

            remaining_traj = self.ref_traj[:, i:]
            temp = time.time()
            U = self.mpc.get_control_input(X, U, remaining_traj)
            print(time.time() - temp)
            X = self.update_states(X, U)

    def establish_starting_state(self):
        U = np.zeros((self.B.shape[1], 1))
        X = np.zeros((self.A.shape[1], 1))

        return U, X

    def update_states(self, X, U):
        X = self.A @ X + self.B @ U

        return X

    def plot(self):
        self.t = self.t * .04
        if self.mtype == 0:
            fig, ax = plt.subplots()
            fig.patch.set_facecolor('xkcd:black')

            ax.set_facecolor('xkcd:black')
            ax.tick_params(color='xkcd:white', labelcolor='xkcd:white')
            ax.spines['bottom'].set_color('xkcd:white')
            ax.spines['top'].set_color('xkcd:white')
            ax.spines['right'].set_color('xkcd:white')
            ax.spines['left'].set_color('xkcd:white')

            ax.plot(self.t, self.ref_traj[0, :])
            ax.plot(self.t, self.X_hist[0, :])

        else:
            figY, axY = plt.subplots(nrows=self.num_outputs)
            figU, axU = plt.subplots(nrows=self.num_outputs)
            figY.suptitle('Reference Tracking', color='xkcd:white', y=1.0)
            figU.suptitle('Control Usage', color='xkcd:white', y=1.0)
            figY.patch.set_facecolor('xkcd:black')
            figU.patch.set_facecolor('xkcd:black')

            Y_labels = [
                'X Position', 'Y Position', 'Z Position', 'Yaw Position'
            ]

            U_labels = ['Roll', 'Pitch', 'Yaw', 'Thrust']
            X_change = np.vstack((self.X_hist[1, :], self.X_hist[2, :],
                                  self.X_hist[0, :], self.X_hist[3, :]))
            for i in range(self.num_outputs):
                axY[i].plot(self.t, self.ref_traj[i, :])
                axY[i].plot(self.t, X_change[i, :])
                axY[i].set_title(Y_labels[i], color='xkcd:white')

                axU[i].plot(self.t, self.U_hist[i, :])
                axU[i].set_title(U_labels[i], color='xkcd:white')

                axY[i].set_facecolor('xkcd:black')
                axU[i].set_facecolor('xkcd:black')

                axY[i].tick_params(color='xkcd:white', labelcolor='xkcd:white')
                axU[i].tick_params(color='xkcd:white', labelcolor='xkcd:white')

                axY[i].spines['bottom'].set_color('xkcd:white')
                axU[i].spines['bottom'].set_color('xkcd:white')

                axY[i].spines['top'].set_color('xkcd:white')
                axU[i].spines['top'].set_color('xkcd:white')

                axY[i].spines['right'].set_color('xkcd:white')
                axU[i].spines['right'].set_color('xkcd:white')

                axY[i].spines['left'].set_color('xkcd:white')
                axU[i].spines['left'].set_color('xkcd:white')

            figY.tight_layout()
            figU.tight_layout()

        plt.show()

Exemplo n.º 14

0

Exibir arquivo

class System(object):
    def __init__(self, window, dt=0.01):
        self.use_mpc = True
        self.clock = sf.Clock()
        self.window = window
        self.dt = dt
        self.time = 0
        self.mpc_horizon = 10

        self.counter = -1

        spring_damper = SpringDamper(window, pos_x=0.9, pos_y=0.5)
        spring_damper.set_properties(k=10., m=1., c=0.1, x0=0.5)
        spring_damper.integration_type = spring_damper.RUNGE_KUTTA

        pendulum = Pendulum(window)
        pendulum.set_properties(m=1.0, g=-9.8, L=0.25, k=0.1)
        pendulum.set_position(0.5, 0.5)
        pendulum.set_rotation(np.pi * 0.95)
        pendulum.integration_type = pendulum.RUNGE_KUTTA
        pendulum.nonlinear_mode = True

        cart = CartPole(window)
        cart.set_properties(m=0.2, M=0.5, I=0.006, g=-9.8, l=0.25, b=0.5)
        cart.set_position(0.5, 0.5)
        cart.set_rotation(np.pi * 0.99)
        cart.nonlinear_mode = True
        cart.integration_type = cart.RUNGE_KUTTA

        self.mpc = MPC(dt, self.mpc_horizon)

        # This works for springdamper and pendulum
        R = np.matrix(np.eye(2) * 1e-5)
        Q = np.matrix(np.matrix([[100, 0], [0.0, 1.0]]))
        T = np.matrix(np.eye(2) * 1e-5)
        self.mpc.set_cost_weights(Q, R, T)

        self.mpc.Y_prime = np.matrix(np.zeros(shape=(2, self.mpc_horizon)))
        self.mpc.Y_prime[0, :] = 0.7
        self.mpc.C = np.matrix(np.eye(2))

        # cartpole
        # R = np.matrix(np.eye(4)*0.001)
        # Q = np.matrix([[100, 0, 0, 0],
        #                          [0, 0, 0, 0],
        #                          [0, 0, 100, 0],
        #                          [0, 0, 0, 0]])
        #
        # self.mpc.set_cost_weights(Q, R)
        # self.mpc.set_constraints(cart.cons)
        #
        # self.mpc.Y_prime = np.matrix(np.zeros(shape=(4, self.mpc_horizon)))
        # self.mpc.Y_prime[0,:] = np.pi
        # self.mpc.Y_prime[2,:] = 0.5
        # self.C = np.matrix([[1,0, 1, 0]])

        # self.bodies = [spring_damper, pendulum]
        self.bodies = [spring_damper]

        self.mpc.set_body(self.bodies[0])

    def simulate(self):
        self.counter += 1

        if self.use_mpc:
            if self.counter % self.mpc_horizon == 0:
                self.counter = 0
                self.mpc.calc_control()

            u = self.mpc.U[:, self.counter]
            self.mpc.body.simulate(self.dt, np.matrix(u).transpose())

        else:
            for body in self.bodies:
                body.simulate(self.dt)

    def render(self):
        self.window.clear(sf.Color.WHITE)

        for body in self.bodies:
            body.render()

        add_time(self.window, self.time)
        self.window.display()

    def step(self):
        if self.clock.elapsed_time.milliseconds >= self.dt * 1000:
            self.clock.restart()

            self.time += self.dt
            self.simulate()
            self.render()

Exemplo n.º 15

0

Exibir arquivo

Arquivo: task4.py Projeto: aljanabim/EL2700_MPC

pendulum = Pendulum()

# Get the system discrete-time dynamics
A, B, Bw, C = pendulum.get_discrete_system_matrices_at_eq()

# Solve the ARE for our system to extract the terminal weight matrix P
Q = np.eye(4) * 1
R = np.eye(1) * 1
P = np.eye(4) * 1

# Instantiate controller
ctl = MPC(model=pendulum,
          dynamics=pendulum.discrete_time_dynamics,
          horizon=7,
          Q=Q,
          R=R,
          P=P,
          ulb=-5,
          uub=5,
          xlb=[-2, -10, -np.pi / 2, -np.pi / 2],
          xub=[12, 10, np.pi / 2, np.pi / 2])

# Solve without disturbance
ctl.set_reference(x_sp=np.array([10, 0, 0, 0]))
sim_env_full = EmbeddedSimEnvironment(
    model=pendulum,
    dynamics=pendulum.pendulum_linear_dynamics_with_disturbance,
    controller=ctl.mpc_controller,
    time=6)
sim_env_full.run([0, 0, 0, 0])

# Solve witho disturbance

Exemplo n.º 16

0

Exibir arquivo

Arquivo: main.py Projeto: walternie/UAV-RL-real-dynamics

import mpc.MPC as MPC

if __name__ == '__main__':
    MpcMachine = MPC()

Exemplo n.º 17

0

Exibir arquivo

class ExperimentModelDynamics:
    def __init__(self, env_name='Pushing2D-v1', num_nets=1, mpc_params=None, exp_dir=None):
        self.env = gym.make(env_name)
        self.task_horizon = TASK_HORIZON
        self.num_nets = num_nets
        self.agent = Agent(self.env)
        mpc_params['use_gt_dynamics'] = False

        self.model = PENN(num_nets, STATE_DIM, len(self.env.action_space.sample()), LR)
        
        self.cem_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params,
                              use_random_optimizer=False)
        
        self.random_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params,
                                 use_random_optimizer=True)
        
        self.random_policy_no_mpc = RandomPolicy(len(self.env.action_space.sample()))
        if exp_dir:
            self.exp_dir = os.path.join(os.getcwd(),exp_dir)
        else:
            self.exp_dir = os.getcwd()

        self.create_dirs(self.exp_dir)

    def test(self, num_episodes, optimizer='cem'):
        samples = []
        print("optimizer: {}".format(optimizer))
        for j in range(num_episodes):
            print('Test episode {}'.format(j))
            samples.append(
                self.agent.sample(
                    self.task_horizon, self.cem_policy if optimizer == 'cem' else self.random_policy
                )
            )
        avg_return = np.mean([sample["reward_sum"] for sample in samples])
        avg_success = np.mean([sample["rewards"][-1] == 0 for sample in samples])
        return avg_return, avg_success

    def model_warmup(self, num_episodes, num_epochs):
        """ Train a single probabilistic model using a random policy """
        samples = []
        for i in range(num_episodes):
            samples.append(self.agent.sample(self.task_horizon, self.random_policy_no_mpc))
        epoch_loss_arr, epoch_rmse_arr =  self.cem_policy.train(
                                                                [sample["obs"] for sample in samples],
                                                                [sample["ac"] for sample in samples],
                                                                [sample["rewards"] for sample in samples],
                                                                epochs=num_epochs
                                                            )
        self.save_data(epoch_loss_arr,"loss_warmup",self.exp_dir)
        self.save_data(epoch_rmse_arr,"rmse_warmup",self.exp_dir)
        self.plot_prop(epoch_loss_arr,"loss_warmup",self.exp_dir)
        self.plot_prop(epoch_rmse_arr,"rmse_warmup",self.exp_dir)
    
    def train(self, num_train_epochs, num_episodes_per_epoch, evaluation_interval):
        """ Jointly training the model and the policy """
        random_SR = []
        cem_SR = []
        loss_arr = []
        rmse_arr = []
        for i in range(num_train_epochs):
            print("####################################################################")
            print("Starting training epoch %d." % (i + 1))

            samples = []
            for j in range(num_episodes_per_epoch):
                samples.append(
                    self.agent.sample(
                        self.task_horizon, self.cem_policy
                    )
                )
            print("Rewards obtained:", [sample["reward_sum"] for sample in samples])
            loss_arr_curr, rmse_arr_curr = self.cem_policy.train(
                                                                [sample["obs"] for sample in samples],
                                                                [sample["ac"] for sample in samples],
                                                                [sample["rewards"] for sample in samples],
                                                                epochs=5
                                                            )

            loss_arr.append(np.mean(loss_arr_curr))
            rmse_arr.append(np.mean(rmse_arr_curr))
            print("mean_loss: {}".format(np.mean(loss_arr_curr)))
            print("mean_rmse: {}".format(np.mean(rmse_arr_curr)))
            
            self.save_data(loss_arr,"loss",self.exp_dir)
            self.save_data(rmse_arr,"rmse",self.exp_dir)

            if (i + 1) % evaluation_interval == 0:
                avg_return, avg_success = self.test(20, optimizer='cem')
                cem_SR.append(avg_success*1.0)
                print('Test success CEM + MPC:', avg_success)
                avg_return, avg_success = self.test(20, optimizer='random')
                random_SR.append(avg_success*1.0)
                print('Test success Random + MPC:', avg_success)
                
                self.save_data(cem_SR,"SR_CEM",self.exp_dir)
                self.save_data(random_SR,"SR_random",self.exp_dir)
                self.plot_prop(cem_SR,"SR_CEM",self.exp_dir)
                self.plot_prop(random_SR,"SR_random",self.exp_dir)

        self.save_data(cem_SR,"SR_CEM",self.exp_dir)
        self.save_data(random_SR,"SR_random",self.exp_dir)
        
        self.plot_prop(cem_SR,"SR_CEM",self.exp_dir)
        self.plot_prop(random_SR,"SR_random",self.exp_dir)
        

    def plot_prop(self,prop,prop_name,plots_path):
        fig = plt.figure(figsize=(16, 9))
        plt.plot(prop,label=prop_name,color='navy')
        plt.xlabel("num_epochs / 50")
        plt.ylabel(prop_name)
        plt.legend()
        plt.savefig(os.path.join(plots_path,"{}.png".format(prop_name)))
        plt.clf()
        plt.close()

    def save_data(self,prop,prop_name,save_dir):
        np.save(os.path.join(save_dir,prop_name+".npy"),prop)


    def create_dirs(self,path):
        if not os.path.exists(path):
            os.mkdir(path)

Exemplo n.º 18

0

Exibir arquivo

class ExperimentModelDynamics:
    def __init__(self, env_name='Pushing2D-v1', num_nets=1, mpc_params=None):
        self.env = gym.make(env_name)
        self.task_horizon = TASK_HORIZON

        self.agent = Agent(self.env)
        mpc_params['use_gt_dynamics'] = False
        self.model = PENN(num_nets, STATE_DIM, len(self.env.action_space.sample()), LR)
        self.cem_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params,
                              use_random_optimizer=False)
        self.random_policy = MPC(self.env, PLAN_HORIZON, self.model, POPSIZE, NUM_ELITES, MAX_ITERS, **mpc_params,
                                 use_random_optimizer=True)
        self.random_policy_no_mpc = RandomPolicy(len(self.env.action_space.sample()))

    def test(self, num_episodes, optimizer='cem'):
        samples = []
        for j in range(num_episodes):
            print('Test episode {}'.format(j))
            samples.append(
                self.agent.sample(
                    self.task_horizon, self.cem_policy if optimizer == 'cem' else self.random_policy
                )
            )
        avg_return = np.mean([sample["reward_sum"] for sample in samples])
        avg_success = np.mean([sample["rewards"][-1] == 0 for sample in samples])
        return avg_return, avg_success

    def model_warmup(self, num_episodes, num_epochs):
        """ Train a single probabilistic model using a random policy """

        samples = []
        for i in range(num_episodes):
            samples.append(self.agent.sample(self.task_horizon, self.random_policy_no_mpc))

        self.cem_policy.train(
            [sample["obs"] for sample in samples],
            [sample["ac"] for sample in samples],
            [sample["rewards"] for sample in samples],
            epochs=num_epochs
        )

    def train(self, num_train_epochs, num_episodes_per_epoch, evaluation_interval):
        """ Jointly training the model and the policy """

        for i in range(num_train_epochs):
            print("####################################################################")
            print("Starting training epoch %d." % (i + 1))

            samples = []
            for j in range(num_episodes_per_epoch):
                samples.append(
                    self.agent.sample(
                        self.task_horizon, self.cem_policy
                    )
                )
            print("Rewards obtained:", [sample["reward_sum"] for sample in samples])

            self.cem_policy.train(
                [sample["obs"] for sample in samples],
                [sample["ac"] for sample in samples],
                [sample["rewards"] for sample in samples],
                epochs=5
            )

            if (i + 1) % evaluation_interval == 0:
                avg_return, avg_success = self.test(50, optimizer='cem')
                print('Test success CEM + MPC:', avg_success)
                avg_return, avg_success = self.test(50, optimizer='random')
                print('Test success Random + MPC:', avg_success)

Exemplo n.º 19

0

Exibir arquivo

Arquivo: main.py Projeto: bhastrup/mpc-project

from mpc import MPC
from control_room import ControlRoom

import cvxpy as cp
from cvxpy.atoms.elementwise.abs import abs as cp_abs
import random
random.seed(3)

# construct MPC class
mpc = MPC(
    ctr_mu=ctr_mu,
    n_slots=n_slots,
    ad_opportunities_params=ad_opportunities_params,
    ad_opportunities_rate_initial=ad_opportunities_rate_initial,
    b_star_params=b_star_params,
    b_star_initial=b_star_initial,
    ctr_params=ctr_params,
    ctr_initial=ctr_initial,
    cov=cov,
    bid_price_initial=bid_price_initial,
    bid_uncertainty_initial=bid_uncertainty_initial
)

# 0. Initialize campaign without MPC informed bidding
for i in range(100):

    ad_data = mpc.simulate_data()
    cost = ad_data["cost"]

    # Update historic cost data
    past_costs = mpc.update_history(past_costs, cost)

Exemplo n.º 20

0

Exibir arquivo

    obs5 = Obstacle(cx=0.27, cy=-1.0, radius=0.05)
    obs6 = Obstacle(cx=0.78, cy=-1.47, radius=0.05)
    obs7 = Obstacle(cx=0.73, cy=-0.9, radius=0.07)
    obs8 = Obstacle(cx=1.2, cy=0.0, radius=0.08)
    obs9 = Obstacle(cx=0.67, cy=-0.05, radius=0.06)
    map.add_obstacles([obs1, obs2, obs3, obs4, obs5, obs6, obs7, obs8, obs9])
"""
Get configured do-mpc modules:
"""
vehicle = simple_bycicle_model(length=0.12,
                               width=0.06,
                               Ts=0.05,
                               reference_path=reference_path)
model = vehicle.model

controller = MPC(vehicle)
mpc = controller.mpc

simulator = Simulator(vehicle).simulator

# Compute speed profile
ay_max = 4.0  # m/s^2
a_min = -0.1  # m/s^2
a_max = 0.5  # m/s^2
SpeedProfileConstraints = {
    'a_min': a_min,
    'a_max': a_max,
    'v_min': 0.0,
    'v_max': 1.0,
    'ay_max': ay_max
}

Exemplo n.º 21

0

Exibir arquivo

Arquivo: run.py Projeto: pboylandcmu/rl-403

class Experiment:
    def __init__(self):
        self.env = gym.make('Pushing2D-v1')
        self.task_hor = TASK_HORIZON

        self.agent = Agent(self.env)
        self.model = PENN(NUM_NETS, STATE_DIM, ACTION_DIM, LR)
        self.policy = MPC(self.env, NUM_PARTICLES, PLAN_HOR, self.model, POPSIZE, NUM_ELITES, MAX_ITERS)


    def test(self, num_episodes):
        samples = []
        for j in range(num_episodes):
            samples.append(
                self.agent.sample(
                    self.task_hor, self.policy
                )
            )
        print("Rewards obtained:", np.mean([sample["reward_sum"] for sample in samples]))
        print("Percent success:", np.mean([sample["rewards"][-1]==0 for sample in samples]))
        return np.mean([sample["rewards"][-1]==0 for sample in samples])

    def train(self):
        traj_obs, traj_acs, traj_rets, traj_rews = [], [], [], []
        test_results = []
        samples = []
        rand_pol = RandomPolicy(2)
        for i in range(NINIT_ROLLOUTS):
            samples.append(self.agent.sample(self.task_hor, rand_pol))
            traj_obs.append(samples[-1]["obs"])
            traj_acs.append(samples[-1]["ac"])
            traj_rews.append(samples[-1]["rewards"])

        if NINIT_ROLLOUTS>0:
            self.policy.train(
                    [sample["obs"] for sample in samples],
                    [sample["ac"] for sample in samples],
                    [sample["rewards"] for sample in samples],
                    epochs=10
            )

        for i in range(NTRAIN_ITERS):
            print("####################################################################")
            print("Starting training iteration %d." % (i + 1))

            samples = []
            for j in range(NROLLOUTS_PER_ITER):
                samples.append(
                    self.agent.sample(
                        self.task_hor, self.policy
                    )
                )
            print("Rewards obtained:", [sample["reward_sum"] for sample in samples])
            traj_obs.extend([sample["obs"] for sample in samples])
            traj_acs.extend([sample["ac"] for sample in samples])
            traj_rets.extend([sample["reward_sum"] for sample in samples])
            traj_rews.extend([sample["rewards"] for sample in samples])

            if(i % 50 == 0):
                self.model.save_models()
                test_results.append((i,self.test(20)))
                test_file = open("test_graph.txt","w")
                test_file.writelines([str(epoch) + "," + str(result) + "\n" for (epoch,result) in test_results])
                test_file.close()

            self.policy.train(
                    [sample["obs"] for sample in samples],
                    [sample["ac"] for sample in samples],
                    [sample["rewards"] for sample in samples]
            )

Exemplo n.º 22

0

Exibir arquivo

Arquivo: run.py Projeto: YikangGui/Reinforcement-Learning-Algorithms-Zoo

class ExperimentModelDynamics:
    def __init__(self, env_name='Pushing2D-v1', num_nets=1, mpc_params=None):
        self.env = gym.make(env_name)
        self.env.seed(1024)
        self.task_horizon = TASK_HORIZON

        self.agent = Agent(self.env)
        mpc_params['use_gt_dynamics'] = False
        self.model = PENN(num_nets, STATE_DIM,
                          len(self.env.action_space.sample()), LR)
        self.cem_policy = MPC(self.env,
                              PLAN_HORIZON,
                              self.model,
                              POPSIZE,
                              NUM_ELITES,
                              MAX_ITERS,
                              **mpc_params,
                              use_random_optimizer=False)
        self.random_policy = MPC(self.env,
                                 PLAN_HORIZON,
                                 self.model,
                                 POPSIZE,
                                 NUM_ELITES,
                                 MAX_ITERS,
                                 **mpc_params,
                                 use_random_optimizer=True)
        self.random_policy_no_mpc = RandomPolicy(
            len(self.env.action_space.sample()))

    def test(self, num_episodes, optimizer='cem'):
        samples = []
        for j in range(num_episodes):
            print('Test episode {}'.format(j))
            samples.append(
                self.agent.sample(
                    self.task_horizon, self.cem_policy
                    if optimizer == 'cem' else self.random_policy))
        avg_return = np.mean([sample["reward_sum"] for sample in samples])
        avg_success = np.mean(
            [sample["rewards"][-1] == 0 for sample in samples])
        return avg_return, avg_success

    def model_warmup(self, num_episodes, num_epochs):
        """ Train a single probabilistic model using a random policy """

        samples = []
        for i in range(num_episodes):
            samples.append(
                self.agent.sample(self.task_horizon,
                                  self.random_policy_no_mpc))

        obs = [sample["obs"] for sample in samples]
        ac = [sample["ac"] for sample in samples]
        rewards = [sample["rewards"] for sample in samples]

        self.cem_policy.train(obs,
                              ac,
                              rewards,
                              epochs=num_epochs,
                              save_fig=True)
        return samples

    def train(self, num_train_epochs, num_episodes_per_epoch,
              evaluation_interval, samples_):
        """ Jointly training the model and the policy """

        samples = deque(maxlen=100)
        samples.extend(samples_)
        print('Sample size:', len(samples))
        rmse_list, loss_list = [], []
        for i in range(num_train_epochs):
            print(
                "####################################################################"
            )
            print("Starting training epoch %d." % (i + 1))

            # samples = []
            for j in range(num_episodes_per_epoch):
                samples.append(
                    self.agent.sample(self.task_horizon, self.cem_policy))
            # print("Rewards obtained:", [sample["reward_sum"] for sample in samples])

            rmse, loss = self.cem_policy.train(
                [sample["obs"]
                 for sample in samples], [sample["ac"] for sample in samples],
                [sample["rewards"] for sample in samples],
                epochs=5,
                save_fig=False)
            rmse_list.append(rmse)
            loss_list.append(loss)

            if (i + 1) % evaluation_interval == 0:
                self.model.saver.save(
                    self.model.sess,
                    './model/v1/1.3/model_' + str(i) + '.ckpt')

                avg_return, avg_success = self.test(20, optimizer='cem')
                print('Test success CEM + MPC:', avg_success)
                avg_return, avg_success = self.test(20, optimizer='random')
                print('Test success Random + MPC:', avg_success)

        plt.figure(3)
        plt.plot(range(len(rmse_list)), rmse_list)
        plt.xlabel('Epochs')
        plt.ylabel("RMSE")
        plt.savefig('rmse_.png')

        plt.figure(4)
        plt.plot(range(len(loss_list)), loss_list)
        plt.xlabel('Epochs')
        plt.ylabel("Gauss Loss")
        plt.savefig('GLoss_.png')

Exemplo n.º 23

0

Exibir arquivo

Arquivo: mpc_controller.py Projeto: philzook58/cart_pole

import time
import numpy as np
import serial.tools.list_ports
import scipy.linalg as linalg

import matplotlib.pyplot as plt

from analyzer import EncoderAnalyzer, ImageAnalyzer
from motor_control import SabreControl
from cart_pole import CartPole
import serial.tools.list_ports
from mpc import MPC

mpc = MPC(0.5, 0, 0, 0)

ports = list(serial.tools.list_ports.comports())
print(ports)
for p in ports:
    print(p)
    if p[2] == "USB VID:PID=268b:0201 SNR=160045F45953":
        sabre_port = p[0]
    elif p[2] == "USB VID:PID=2341:0043 SNR=75334323935351D0D022":
        ard_port = p[0]

motor = SabreControl(port=sabre_port)
encoder = EncoderAnalyzer(port=ard_port)
#image = ImageAnalyzer(0,show_image=True)
cart = CartPole(motor, encoder, encoder, encoder)

cart.zeroAngleAnalyzer()
#encoder.setAngleZero()

Exemplo n.º 24

0

Exibir arquivo

Arquivo: run.py Projeto: dhananjaisharma10/Model-based-Reinforcement-Learning

class ExperimentModelDynamics:
    def __init__(self, env_name='Pushing2D-v1', num_nets=1, mpc_params=None):
        self.env = gym.make(env_name)
        self.task_horizon = TASK_HORIZON
        self.agent = Agent(self.env)
        mpc_params['use_gt_dynamics'] = False
        self.model = PENN(num_nets, STATE_DIM,
                          len(self.env.action_space.sample()), LR)
        self.cem_policy = MPC(self.env,
                              PLAN_HORIZON,
                              self.model,
                              POPSIZE,
                              NUM_ELITES,
                              MAX_ITERS,
                              **mpc_params,
                              use_random_optimizer=False)
        self.random_policy = MPC(self.env,
                                 PLAN_HORIZON,
                                 self.model,
                                 POPSIZE,
                                 NUM_ELITES,
                                 MAX_ITERS,
                                 **mpc_params,
                                 use_random_optimizer=True)
        self.random_policy_no_mpc = RandomPolicy(
            len(self.env.action_space.sample()))

    def test(self, num_episodes, optimizer='cem'):
        samples = list()
        for j in range(num_episodes):
            print('Test episode {}'.format(j + 1))
            samples.append(
                self.agent.sample(
                    self.task_horizon, self.cem_policy
                    if optimizer == 'cem' else self.random_policy))
        avg_return = np.mean([sample["reward_sum"] for sample in samples])
        avg_success = np.mean(
            [sample["rewards"][-1] == 0 for sample in samples])
        return avg_return, avg_success

    def model_warmup(self, num_episodes, num_epochs):
        """Train a single probabilistic model using a random policy
        """
        samples = list()
        for i in range(num_episodes):
            samples.append(
                self.agent.sample(self.task_horizon,
                                  self.random_policy_no_mpc))
        print('')
        print('*' * 20, 'TRAINING', '*' * 20)
        self.cem_policy.train([sample["obs"] for sample in samples],
                              [sample["ac"] for sample in samples],
                              [sample["rewards"] for sample in samples],
                              epochs=num_epochs)

    def train(self,
              num_train_epochs,
              num_episodes_per_epoch,
              evaluation_interval,
              use_buffer=True):
        """Jointly training the model and the policy
        """
        def save(filename, arr):
            np.save(filename, arr)

        # Buffer has a max size of 100 episodes worth of data
        obs_buffer = deque(maxlen=40 * 100)
        ac_buffer = deque(maxlen=40 * 100)
        rewards_buffer = deque(maxlen=40 * 100)
        success_cem = list()
        success_random = list()
        success_name_cem = 'success_pets_cem.npy'
        success_name_ran = 'success_pets_random.npy'
        for i in range(num_train_epochs):
            print("Starting training epoch %d." % (i + 1))
            # Collect data using the current dynamics model
            samples = list()
            for j in range(num_episodes_per_epoch):
                samples.append(
                    self.agent.sample(self.task_horizon, self.cem_policy))
            # Update replay buffers
            new_obs = [sample["obs"] for sample in samples]
            new_ac = [sample["ac"] for sample in samples]
            new_rewards = [sample["rewards"] for sample in samples]
            obs_buffer.extend(new_obs)
            ac_buffer.extend(new_ac)
            rewards_buffer.extend(new_rewards)
            print("Rewards obtained:",
                  [sample["reward_sum"] for sample in samples])
            # Train the dynamics model using the data
            if use_buffer:
                self.cem_policy.train(obs_buffer,
                                      ac_buffer,
                                      rewards_buffer,
                                      epochs=5)
            else:
                self.cem_policy.train(new_obs, new_ac, new_rewards, epochs=5)
            # Test
            num_test_episodes = 20
            if (i + 1) % evaluation_interval == 0:
                avg_return, avg_success_cem = self.test(num_test_episodes,
                                                        optimizer='cem')
                print('Test success CEM + MPC:', avg_success_cem)
                success_cem.append(avg_success_cem)
                save(success_name_cem, success_cem)
                avg_return, avg_success_ran = self.test(num_test_episodes,
                                                        optimizer='random')
                print('Test success Random + MPC:', avg_success_ran)
                success_random.append(avg_success_ran)
                save(success_name_ran, success_random)

Exemplo n.º 25

0

Exibir arquivo

Arquivo: plot.py Projeto: ewilkinson/mpc-demo

# pendulum = Pendulum(window)
# pendulum.set_properties(m=1.0, g=-9.8, L=0.25, k=0.1)
# pendulum.set_position(0.5, 0.5)
# pendulum.set_rotation(np.pi * 0.95)
# pendulum.integration_type = pendulum.RUNGE_KUTTA
# pendulum.nonlinear_mode = True
#
# cart = CartPole(window)
# cart.set_properties(m=0.2, M=0.5, I=0.006, g=-9.8, l=0.25, b=0.5)
# cart.set_position(0.5, 0.5)
# cart.set_rotation(np.pi * 0.99)
# cart.nonlinear_mode = True
# cart.integration_type = cart.RUNGE_KUTTA

mpc = MPC(dt, mpc_horizon)

# This works for springdamper and pendulum
R = np.matrix(np.eye(2) * 1.0e-4)
Q = np.matrix(np.matrix([[100, 0], [0.0, 0.0]]))
T = np.matrix(np.eye(2) * 1e-4)
mpc.set_cost_weights(Q, R, T)

mpc.Y_prime = np.matrix(np.zeros(shape=(2, mpc_horizon)))
mpc.Y_prime[0, :] = 0.7
mpc.C = np.matrix(np.eye(2))

mpc.set_body(spring_damper)

Xs = np.zeros(shape=(mpc.body.get_state().shape[0], time.shape[0]))
Us = np.zeros(shape=(mpc.body.u.shape[0], time.shape[0]))

Exemplo n.º 26

0

Exibir arquivo

Arquivo: run.py Projeto: ykarmesh/Deep-RL-Assignments

class ExperimentModelDynamics:
    def __init__(self, env_name='Pushing2D-v1', num_nets=1, mpc_params=None):
        self.env = gym.make(env_name)
        # self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.device = torch.device('cpu')

        self.task_horizon = TASK_HORIZON

        # Tensorboard logging.
        self.timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
        self.environment_name = "pusher"
        self.logdir = 'logs/%s/%s' % (self.environment_name, self.timestamp)
        self.summary_writer = SummaryWriter(self.logdir)

        self.agent = Agent(self.env)
        mpc_params['use_gt_dynamics'] = False
        self.model = PENN(num_nets, STATE_DIM,
                          len(self.env.action_space.sample()), LR, self.device,
                          self.summary_writer, self.timestamp,
                          self.environment_name)
        self.cem_policy = MPC(self.env,
                              PLAN_HORIZON,
                              self.model,
                              POPSIZE,
                              NUM_ELITES,
                              MAX_ITERS,
                              use_random_optimizer=False,
                              **mpc_params)
        self.random_policy = MPC(self.env,
                                 PLAN_HORIZON,
                                 self.model,
                                 POPSIZE,
                                 NUM_ELITES,
                                 MAX_ITERS,
                                 use_random_optimizer=True,
                                 **mpc_params)
        self.random_policy_no_mpc = RandomPolicy(
            len(self.env.action_space.sample()))

    def test(self, num_episodes, optimizer='cem'):
        samples = []
        for j in range(num_episodes):
            samples.append(
                self.agent.sample(
                    self.task_horizon, self.cem_policy
                    if optimizer == 'cem' else self.random_policy))
            print('Test episode {}: {}'.format(j, samples[-1]["rewards"][-1]))
        avg_return = np.mean([sample["reward_sum"] for sample in samples])
        avg_success = np.mean(
            [sample["rewards"][-1] == 0 for sample in samples])
        print('MPC PushingEnv: avg_reward: {}, avg_success: {}'.format(
            avg_return, avg_success))
        return avg_return, avg_success

    def model_warmup(self, num_episodes, num_epochs):
        """ Train a single probabilistic model using a random policy """

        samples = []
        for i in range(num_episodes):
            print("Warmup Episode %d" % (i + 1))
            samples.append(
                self.agent.sample(self.task_horizon,
                                  self.random_policy_no_mpc))

        self.cem_policy.train([sample["obs"] for sample in samples],
                              [sample["ac"] for sample in samples],
                              [sample["rewards"] for sample in samples],
                              epochs=num_epochs)

    def train(self, num_train_epochs, num_episodes_per_epoch,
              evaluation_interval):
        """ Jointly training the model and the policy """
        samples = []
        for i in range(num_train_epochs):
            print(
                "####################################################################"
            )
            print("Starting training epoch %d." % (i + 1))

            for j in range(num_episodes_per_epoch):
                new_sample = self.agent.sample(self.task_horizon,
                                               self.cem_policy)
                samples.append(new_sample)
            if (len(samples) > 10 * num_episodes_per_epoch):
                samples = samples[num_episodes_per_epoch:]
            print("Rewards obtained:",
                  [sample["reward_sum"] for sample in samples])

            self.cem_policy.train([sample["obs"] for sample in samples],
                                  [sample["ac"] for sample in samples],
                                  [sample["rewards"] for sample in samples],
                                  epochs=5)

            if (i + 1) % evaluation_interval == 0:
                cem_avg_return, cem_avg_success = self.test(20,
                                                            optimizer='cem')
                print('Test success CEM + MPC:', cem_avg_success)
                rand_avg_return, rand_avg_success = self.test(
                    20, optimizer='random')
                print('Test success Random + MPC:', rand_avg_success)

                self.summary_writer.add_scalar("test/CEM-AverageSuccess",
                                               cem_avg_success, i)
                self.summary_writer.add_scalar("test/Rand-AverageSuccess",
                                               rand_avg_success, i)
                self.summary_writer.add_scalar("test/CEM-AverageReturn",
                                               cem_avg_return, i)
                self.summary_writer.add_scalar("test/Rand-AverageReturn",
                                               rand_avg_return, i)

Exemplo n.º 27

0

Exibir arquivo

    fig, ax = plt.subplots(nrows=2, ncols=1)
    ax[0].plot(t_des, q_des[0].T, 'o')
    ax[0].plot(t_fine, y_fine[0].T)
    ax[0].set_title('Desired Position')
    ax[1].plot(t_des, q_des[1].T, 'o')
    ax[1].plot(t_fine, y_fine[1].T)
    ax[1].set_title('Desired Angle')
    plt.show()

# initialize MPC
controller = MPC(m1=m1,
                 m2=m2,
                 l=l,
                 g=g,
                 u_max=u_max,
                 d_max=d_max,
                 tf=tf,
                 N=N,
                 Q=Q,
                 Qf=Qf,
                 R=R,
                 verbose=verbose)


# cartpole dynamics used in integration
def qddot(x, u):
    q1ddot = (l*m2*np.sin(x[1])*x[3]**2 + u + \
        m2*g*np.cos(x[1])*np.sin(x[1]))/ \
        (m1 + m2*(1 - np.cos(x[1])**2))
    q2ddot = -1*(l*m2*np.cos(x[1])*np.sin(x[1])*x[3]**2 \
        + u*np.cos(x[1]) + (m1 + m2)*g*np.sin(x[1])) / \
        (l*m1 + l*m2*(1 - np.cos(x[1])**2))

Exemplo n.º 28

0

Exibir arquivo

P = np.matrix(scipy.linalg.solve_discrete_are(A, B, Q, R))

# Instantiate controller
max_fz = 4 * Quadcopter_orginal.m * Quadcopter_orginal.g
max_tau = (max_fz / 2) * (17.5 / 100)
tau_z = 0.01

ctl = MPC(
    model=Quadcopter_orginal,
    dynamics=Quadcopter_orginal.discrete_time_dynamics,  # Linear MPC
    Q=Q,
    R=R,
    P=P,
    horizon=0.5,
    ulb=[-max_fz, -max_tau, -max_tau, -tau_z],
    uub=[max_fz, max_tau, max_tau, tau_z],
    xlb=[
        -np.inf, -np.inf, -np.inf, -np.inf, -np.inf, -np.inf, -np.pi / 2,
        -np.pi / 2, -np.pi / 2, -np.inf, -np.inf, -np.inf
    ],
    xub=[
        np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.pi / 2, np.pi / 2,
        np.pi / 2, np.inf, np.inf, np.inf
    ],
    integrator=False)

ctl.set_reference(x_sp=np.array([0, 0, 0.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))

# Q2-1
ctl.set_constant_control([0, 0, 0, 0])
sim_env = EmbeddedSimEnvironment(
    model=Quadcopter_orginal,

Exemplo n.º 29

0

Exibir arquivo

Arquivo: plot.py Projeto: ewilkinson/mpc-demo

# pendulum = Pendulum(window)
# pendulum.set_properties(m=1.0, g=-9.8, L=0.25, k=0.1)
# pendulum.set_position(0.5, 0.5)
# pendulum.set_rotation(np.pi * 0.95)
# pendulum.integration_type = pendulum.RUNGE_KUTTA
# pendulum.nonlinear_mode = True
#
# cart = CartPole(window)
# cart.set_properties(m=0.2, M=0.5, I=0.006, g=-9.8, l=0.25, b=0.5)
# cart.set_position(0.5, 0.5)
# cart.set_rotation(np.pi * 0.99)
# cart.nonlinear_mode = True
# cart.integration_type = cart.RUNGE_KUTTA

mpc = MPC(dt, mpc_horizon)


# This works for springdamper and pendulum
R = np.matrix(np.eye(2) * 1.0e-4)
Q = np.matrix(np.matrix([[100, 0], [0.0, 0.0]]))
T = np.matrix(np.eye(2) * 1e-4)
mpc.set_cost_weights(Q, R, T)

mpc.Y_prime = np.matrix(np.zeros(shape=(2, mpc_horizon)))
mpc.Y_prime[0, :] = 0.7
mpc.C = np.matrix(np.eye(2))

mpc.set_body(spring_damper)

Xs = np.zeros(shape=(mpc.body.get_state().shape[0], time.shape[0]))

Exemplo n.º 30

0

Exibir arquivo

Arquivo: main.py Projeto: urosolia/SLIP

save(folder + 'ftocp_sPred.npy', ftocp.sPred)
save(folder + 'ftocp_xPred.npy', ftocp.xPred)
save(folder + 'ftocp_sPred.npy', ftocp.sPred)

pdb.set_trace()

if newCost == 1:
    Q = 10**(-4) * np.diag([1, 10, 1, 1])
    R = 10**(-4) * np.diag([1, 0.1])
    maxIt = 41
else:
    maxIt = 1

if tracking == 0:
    mpc = MPC(N_mpc, n, d, Q, R, Qf, xlb, xub, ulb, uub, dt, xref)
else:
    Q = 100 * np.diag([1, 1, 1, 1])
    R = np.diag([1, 1])
    mpc = MPC_tracking(N_mpc, n, d, Q, R, Qf, xlb, xub, ulb, uub, dt)

mpc.addTraj(ftocp.xPred, ftocp.uPred, ftocp.sPred, region)

tMax = 600
pltFlag = 0

IC = []
IC.append(np.array([0.0, 0.0, 0.0, 0.0]))  # 0
IC.append(np.array([0.0, -0.03, 0.0, 0.0]))  # 1
IC.append(np.array([0.005, 0.0, 0.0, 0.0]))  # 2
IC.append(np.array([0.0, 0.0, 0.0, 0.2]))  # 3