Python Supervisor примеры использования

Язык программирования: Python

Пространство имен/Пакет: tools.supervisor

Класс/Тип: Supervisor

Примеров на hotexamples.com: 6

Python Supervisor - 6 примеров найдено. Это лучшие примеры Python кода для tools.supervisor.Supervisor, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Supervisor(5)

sample_action(2)

intended_action(1)

Пример #1

Показать файл

    def prologue(self):
        # self.env = gym.envs.make('CartPoleAltRandom-v0')     # Used over mutliple trials
        self.env = gym.envs.make('CartPoleAlt-v0')  # Used for just one trial
        self.sup = Supervisor(self.act)
        self.lnr = Learner(LRC(self.alpha, self.eta, intercept=False))
        print(self.env.env.force_mag)

        self.env.env.force_mag = self.force_mag

Пример #2

Показать файл

    def prologue(self):
        """
            Preprocess hyperparameters and initialize learner and supervisor
        """
        self.params['filename'] = './experts/' + self.params['envname'] + '.pkl'
        self.env = gym.envs.make(self.params['envname'])

        self.params['d'] = self.env.action_space.shape[0]

        sess = tf.Session()
        policy = load_policy.load_policy(self.params['filename'])
        net_sup = Supervisor(policy, sess)
        init_cov = np.zeros((self.params['d'], self.params['d']))
        sup = GaussianSupervisor(net_sup, init_cov)
        est, lnr = self.reset_learner(self.params)

        self.lnr, self.sup, self.net_sup = lnr, sup, net_sup
        return self.params

Пример #3

Показать файл

class CartpoleDagger():
    def __init__(self, force_mag, reg):
        self.reg = reg
        self.iters = 100
        self.T = 200
        self.trials = 1

        self.alpha = 0.1
        self.lambda_prior = list(np.ones(10))

        self.eta = 1.0
        self.inner_eta = self.eta
        self.params = {}
        self.params['T'] = self.T
        self.params['iters'] = self.iters
        self.act = deepq.load("cartpole_model_alt2.pkl")

        if self.reg:
            self.base_dir = 'data/reg_cartpole_force_mag' + str(force_mag)
        else:
            self.base_dir = 'data/cartpole_force_mag' + str(force_mag)
        self.dir = os.path.join(self.base_dir, 'dagger')
        self.prefix = 'dagger'
        self.path = os.path.join(self.dir, self.prefix)
        self.force_mag = force_mag

        self.t = .01

    def prologue(self):
        # self.env = gym.envs.make('CartPoleAltRandom-v0')     # Used over mutliple trials
        self.env = gym.envs.make('CartPoleAlt-v0')  # Used for just one trial
        self.sup = Supervisor(self.act)
        self.lnr = Learner(LRC(self.alpha, self.eta, intercept=False))
        print(self.env.env.force_mag)

        self.env.env.force_mag = self.force_mag

    def run_trials(self):
        all_results = []
        # Used for multiple trials with random initial states
        # init_states = np.load("data/init_states.npy")
        if not os.path.exists(self.dir):
            os.makedirs(self.dir)

        for trial in range(self.trials):
            self.prologue()
            # print("Init state: " + str(self.env.env.init_state))
            # self.env.env.init_state = init_states[trial, :]
            # print("Setting init state: " + str(self.env.env.init_state))
            self.path = os.path.join(self.dir,
                                     self.prefix) + '_trial' + str(trial)
            results = self.run_iters()
            all_results.append(results)

        self.aggregate(all_results)

        filepath = os.path.join(self.dir, self.prefix) + '.p'
        f = open(filepath, 'wb')
        pickle.dump(all_results, f)
        f.close()

        return all_results

    def aggregate(self, all_results):
        n = len(all_results)
        d = self.iters

        lnr_costs = np.zeros((n, d))
        opt_costs = np.zeros((n, d))
        diff_costs = np.zeros((n, d))

        lnr_batch_costs = np.zeros((n, d))
        opt_batch_costs = np.zeros((n, d))
        static_regret = np.zeros((n, d))

        for t, result in enumerate(all_results):
            lnr_costs[t, :] = result['lnr_costs']
            opt_costs[t, :] = result['opt_costs']
            diff_costs[t, :] = result['lnr_costs'] - result['opt_costs']
            lnr_batch_costs[t, :] = result['lnr_batch_costs']
            opt_batch_costs[t, :] = result['opt_batch_costs']
            static_regret[t, :] = result['static_regret']

        lnr_mean, lnr_std = statistics.mean_sem(lnr_costs)
        opt_mean, opt_std = statistics.mean_sem(opt_costs)
        diff_mean, diff_std = statistics.mean_sem(diff_costs)

        lnr_batch_mean, lnr_batch_std = statistics.mean_sem(lnr_batch_costs)
        opt_batch_mean, opt_batch_std = statistics.mean_sem(opt_batch_costs)
        static_regret_mean, static_regret_sem = statistics.mean_sem(
            static_regret)

        x_axis = np.arange(len(lnr_mean))

        # Dynamic Regret
        plt.subplot(211)
        plt.title("Actual loss")
        plt.errorbar(x_axis, lnr_mean, yerr=lnr_std, label='lnr costs')
        plt.errorbar(x_axis, opt_mean, yerr=opt_std, label='opt costs')
        plt.legend()

        plt.subplot(212)
        plt.title("Difference")
        plt.errorbar(x_axis, diff_mean, yerr=diff_std)
        plt.tight_layout()

        filepath = os.path.join(self.dir, self.prefix) + '.pdf'
        plt.savefig(filepath)
        plt.close()
        plt.cla()
        plt.clf()

        # Static Regret
        plt.subplot(211)
        plt.title("Batch loss")
        plt.errorbar(x_axis, lnr_batch_mean, yerr=lnr_std, label='lnr costs')
        plt.errorbar(x_axis, opt_batch_mean, yerr=opt_std, label='opt costs')
        plt.legend()

        plt.subplot(212)
        plt.title("Static Regret")
        plt.errorbar(x_axis, static_regret_mean, yerr=diff_std)
        plt.tight_layout()

        filepath = os.path.join(self.dir, self.prefix) + '_batch.pdf'
        plt.savefig(filepath)
        plt.close()
        plt.cla()
        plt.clf()

    def compute_statistics(self, iteration, results):

        states, tmp_actions, _, reward = statistics.collect_traj(
            self.env, self.lnr, self.params['T'], False)
        actions = [self.sup.intended_action(s) for s in states]
        d = self.env.observation_space.shape[0]
        # states += [np.zeros(d), np.zeros(d)]
        # actions += [1, 0]

        est = LRC(self.lnr.est.alpha, self.inner_eta, intercept=False)
        lh, ph = est.fit(states, actions)

        lnr_cost = self.lnr.est.loss(states, actions)
        opt_cost = est.loss(states, actions)

        print("\tlnr_cost: " + str(lnr_cost))
        print("\topt_cost: " + str(opt_cost))

        results['lnr_costs'].append(lnr_cost)
        results['opt_costs'].append(opt_cost)
        results['rewards'].append(reward)
        results['alphas'].append(self.lnr.est.alpha)

        curr_coef_ = self.lnr.est.coef_.copy()
        curr_opt_coef_ = est.coef_.copy()

        results['param_norms'].append(np.linalg.norm(curr_coef_))
        results['opt_param_norms'].append(np.linalg.norm(curr_opt_coef_))

        if not iteration is 0:

            variation = np.linalg.norm(self.last_coef_ - curr_coef_)
            opt_variation = np.linalg.norm(self.last_opt_coef_ -
                                           curr_opt_coef_)

            last_gradient = est.gradient(self.last_states, self.last_actions,
                                         curr_coef_)
            curr_gradient = est.gradient(states, actions, curr_coef_)
            beta = np.linalg.norm(last_gradient - curr_gradient) / variation

            results['variations'].append(variation)
            results['opt_variations'].append(opt_variation)
            results['lambdas'].append(opt_variation / variation)
            results['betas'].append(beta)

        self.last_coef_ = curr_coef_.copy()
        self.last_opt_coef_ = curr_opt_coef_.copy()
        self.last_states = states
        self.last_actions = actions

        static_est = LRC(self.lnr.est.alpha, self.inner_eta, intercept=False)
        batch_states = self.data_states + states
        batch_actions = self.data_actions + actions

        lh_batch, ph_batch = static_est.fit(batch_states, batch_actions)
        opt_batch_cost = static_est.loss(batch_states, batch_actions)
        lnr_batch_cost = np.mean(results['lnr_costs'])
        static_regret = lnr_batch_cost - opt_batch_cost

        print("\tlnr_batch_cost: " + str(lnr_batch_cost))
        print("\topt_batch_cost: " + str(opt_batch_cost))
        print()

        results['lnr_batch_costs'].append(lnr_batch_cost)
        results['opt_batch_costs'].append(opt_batch_cost)
        results['static_regret'].append(static_regret)

        return results

    def compute_results(self, results):

        _, _, _, sup_reward = statistics.collect_traj(self.env, self.sup,
                                                      self.params['T'], False)
        results['sup_rewards'] = [sup_reward] * len(results['rewards'])

        # DYNAMIC REGRET
        plt.subplot(211)
        plt.title("Actual loss")
        plt.plot(results['lnr_costs'], label='lnr costs')
        plt.plot(results['opt_costs'], label='opt costs')
        plt.legend()

        difference = results['lnr_costs'] - results['opt_costs']
        plt.subplot(212)
        plt.title("Difference")
        plt.plot(difference)
        plt.tight_layout()

        filepath = self.path + '.pdf'
        plt.savefig(filepath)
        plt.close()
        plt.cla()
        plt.clf()

        # STATIC REGRET
        plt.subplot(211)
        plt.title("Batch costs")
        plt.plot(results['lnr_batch_costs'], label='lnr costs')
        plt.plot(results['opt_batch_costs'], label='opt costs')
        plt.legend()

        plt.subplot(212)
        plt.title("Static regret (lnr batch - opt batch)")
        plt.plot(results['static_regret'])
        plt.tight_layout()

        filepath = self.path + '_batch.pdf'
        plt.savefig(filepath)
        plt.close()
        plt.cla()
        plt.clf()

        plt.subplot(111)
        plt.title("Rewards")
        plt.plot(results['rewards'], label='Learner rewards')
        plt.plot(results['sup_rewards'], label='Supervisor Rewards')
        plt.legend()
        plt.ylim(0, 20)
        filepath = self.path + '_reward.pdf'
        plt.savefig(filepath)
        plt.close()
        plt.cla()
        plt.clf()

        filepath = self.path + '.p'
        f = open(filepath, 'wb')
        pickle.dump(results, f)
        f.close()

    def run_iters(self):

        results = {
            'lnr_costs': [],
            'opt_costs': [],
            'variations': [],
            'opt_variations': [],
            'param_norms': [],
            'opt_param_norms': [],
            'lambdas': [],
            'lnr_batch_costs': [],
            'opt_batch_costs': [],
            'static_regret': [],
            'rewards': [],
            'betas': [],
            'alphas': [],
        }

        d = self.env.observation_space.shape[0]
        # self.data_states = [np.zeros(d), np.zeros(d)]
        # self.data_actions = [1, 0]
        self.data_states = []
        self.data_actions = []

        for iteration in range(self.iters):
            print("\tIteration: " + str(iteration))
            print("\tData states: " + str(len(self.data_states)))

            self.compute_statistics(iteration, results)

            states, tmp_actions, _, _ = statistics.collect_traj(
                self.env, self.lnr, self.params['T'], False)
            i_actions = [self.sup.intended_action(s) for s in states]

            self.data_states += states
            self.data_actions += i_actions

            self.lnr.set_data(self.data_states, self.data_actions)
            self.lnr.train()

            # Adaptive regularization:
            if self.reg and (iteration + 1) % 10 == 0:
                # mean_lambda = np.mean(results['lambdas'][-10:] + self.lambda_prior)
                mean_lambda = np.mean(results['lambdas'][-10:])
                next_alpha = mean_lambda * self.lnr.est.alpha
                self.lnr.est.alpha = self.t * next_alpha + (
                    1 - self.t) * self.lnr.est.alpha
                print("\n\n\t\t Updated alpha: " + str(self.lnr.est.alpha))
                print("\t\t Lambda was: " + str(mean_lambda))

        for key in results.keys():
            results[key] = np.array(results[key])

        self.compute_results(results)

        return results

Пример #4

Показать файл

def main():
    global state, action
    ##clear whole file from data dir
    clear()
    ####### Initialize Parameters
    dataNumber = 1
    Max_trajectory = 10
    init_noise = 0.0
    noise = [init_noise,init_noise]
    old_sigma = []
    N_K = []
    sampling_flag = False
    save_flag = False
    fail_flag = False
    robot = Robot(Num_goal)

    result = {'model_Num' :[]
             ,'Noise':[]
             ,'Number_of_Mixture':[]}
    row = 0
    col = 0
    initialize()
    rospy.init_node('Demo', anonymous=True, disable_signals=True)
    rospy.on_shutdown(shutdown)
    rate = rospy.Rate(10)
    for t in range(Max_trajectory):
        
        Sup_x = Supervisor(noise[0])
        Sup_y = Supervisor(noise[1])
        [a_x,E_x,IE_x] = [0.0,0.0,0.0]
        [a_y,E_y,IE_y] = [0.0,0.0,0.0]
        
        button = True
        k=0

        while True:
            # s = [Sub.goal_1,Sub.goal_2,Sub.goal_3,Sub.endeffector_pose]
            s = [Sub.goal_1,Sub.goal_2,Sub.endeffector_pose]
        
            fail = Fail(s)
            a_x, a_y = robot.policy(s,k)
            
            axes = [a_y, a_x]
            a = axes
            temp_state, temp_action = save.tempDataframe(s, a, Num_goal)
            
            fail_flag = fail.fail_check(Sub.simulationTime)
            if button :
                robot = Robot(Num_goal)
                Pub.reset(t)
                initialize()
                
                sampling_flag = True
                button = False
            elif fail_flag or (Sub.simulationState == 0) :
                Pub.sim_stop()
                initialize()
                sampling_flag = False
                button = True
                fail_flag = False
            
            if sampling_flag :
                action1 = Sup_y.sample_action(axes[0])
                action2 = Sup_x.sample_action(axes[1])
                sample_action = [action1,action2]
                # temp_action['v_y1'], temp_action['v_x1']= action1, action2
                state = save.dataAppend(state,temp_state)
                action = save.dataAppend(action,temp_action)
                Pub.actionInput(sample_action)
                if Sub.simulationTime >1.0 :
                    fail.simple_success()
                
                if (Sub.success == True) or (fail.success==True) :
                    save_flag = True
            if save_flag :
                # k = 0
                k += 1
                k %= Num_goal
                Pub.sim_stop()
                save.dataSave(state,action,dataNumber)
                save_flag = False
                sampling_flag = False
                Sub.success = False
                fail.success = False
                button = True
                if (dataNumber)%Num_goal==0 :
                    dataNumber += 1
                    break
                dataNumber += 1
            
            rate.sleep()
        
        if ((dataNumber-1) % 2 ==0) :
            initialize()
            Num_data = int(subprocess.check_output(command + " action | wc -l", shell=True))
            for i in range(Num_data):
                _state, _action = load.dataLoad(i+1)
                state = save.dataAppend(state,_state)
                action = save.dataAppend(action,_action)
            N = state.shape[0]
            
            X = state
            Y = action
            Y1 = Y['v_x1']
            Y2 = Y['v_y1']
            
            model = Learning('HIMGP',30,X,Y,old_sigma=old_sigma,K=N_K)
            model.learning(int((dataNumber-1)/Num_goal))

            old_sigma = model.model.old_sigma
            N_K = model.model.N_K
            K = len(model.model.N_K)-1
            noise = [model.model.Noise[K],model.model.Noise[K]]
            
            result['Noise'].append(noise[0])
            result['Number_of_Mixture'].append(model.model.M)
            result['model_Num'].append(i+1)
            df = pd.DataFrame(result)
            df.to_excel('data/Learning_state/LS.xlsx')
            
            print("="*40)
            print("Optimized Noise x: %f, Noise y: %f" %(noise[0],noise[1]))
            print(" \t model saved" )
            print(" \t Number of step %i " %(N))
            print("="*40)

    
    rospy.spin()

Пример #5

Показать файл

def main():
    global state, action
    ##clear whole file from data dir
    clear()
    ####### Initialize Parameters
    dataNumber = 1
    Max_trajectory = 10
    sampling_flag = False
    save_flag = False
    fail_flag = False
    robot = Robot(Num_goal)

    row = 0
    col = 0
    initialize()
    rospy.init_node('Demo', anonymous=True, disable_signals=True)
    rospy.on_shutdown(shutdown)
    rate = rospy.Rate(10)
    for t in range(Max_trajectory):
        Sup_x = Supervisor(0.0)
        Sup_y = Supervisor(0.0)
        [a_x, E_x, IE_x] = [0.0, 0.0, 0.0]
        [a_y, E_y, IE_y] = [0.0, 0.0, 0.0]

        button = True
        k = 1

        while True:
            # s = [Sub.goal_1,Sub.goal_2,Sub.goal_3,Sub.endeffector_pose]
            s = [Sub.goal_1, Sub.goal_2, Sub.endeffector_pose]

            fail = Fail(s)
            a_x, a_y = robot.policy(s, k)

            axes = [a_y, a_x]
            a = axes
            temp_state, temp_action = save.tempDataframe(s, a, Num_goal)

            fail_flag = fail.fail_check(Sub.simulationTime)
            if button:
                robot = Robot(Num_goal)
                Pub.reset(t)
                initialize()

                sampling_flag = True
                button = False
            elif fail_flag or (Sub.simulationState == 0):
                Pub.sim_stop()
                initialize()
                sampling_flag = False
                button = True
                fail_flag = False

            if sampling_flag:
                # temp_action['v_y1'], temp_action['v_x1']= action1, action2
                state = save.dataAppend(state, temp_state)
                action = save.dataAppend(action, temp_action)
                action1 = Sup_y.sample_action(axes[0])
                action2 = Sup_x.sample_action(axes[1])
                sample_action = [action1, action2]
                Pub.actionInput(sample_action)
                if Sub.simulationTime > 1.0:
                    fail.simple_success()

                if (Sub.success == True) or (fail.success == True):
                    save_flag = True
            if save_flag:
                k += 1
                k %= Num_goal
                Pub.sim_stop()
                save.dataSave(state, action, dataNumber)
                save_flag = False
                sampling_flag = False
                Sub.success = False
                fail.success = False
                button = True
                if (dataNumber) % Num_goal == 0:
                    dataNumber += 1
                    break
                dataNumber += 1

            rate.sleep()

        if ((dataNumber - 1) % 2 == 0):
            initialize()
            Num_data = int(
                subprocess.check_output(command + " action | wc -l",
                                        shell=True))
            for i in range(Num_data):
                _state, _action = load.dataLoad(i + 1)
                state = save.dataAppend(state, _state)
                action = save.dataAppend(action, _action)
            N = state.shape[0]

            X = state
            Y = action
            Y1 = Y['v_x1']
            Y2 = Y['v_y1']

            model = Learning('IMGP', 30, X, Y)
            # model = Learning('HIMGP',30,X,Y)
            model.learning(int((dataNumber - 1) / Num_goal))

            print("=" * 40)
            print(" \t model saved")
            print(" \t Number of step %i " % (N))
            print("=" * 40)

    rospy.spin()

Пример #6

Показать файл

from tools.Data.Subscriber import Subscriber
from tools.Data.Publisher import Publisher
from tools.Data.Save import Save
from tools.Data.Clear import clear
from tools.Fail_condition import Fail
from tools.supervisor import Supervisor
import rospy

save = Save('data/')
Sub = Subscriber()
Pub = Publisher()
noise = 0.1980
Sup_x = Supervisor(noise)
Sup_y = Supervisor(noise)
Num_goal = 2

def initialize():
    global state, action
    state , action = save.initDataframe(Num_goal)


def shutdown():
    print ('ros shutdown')
    

def main():
    global state, action
    init_data_num = 3
    dataNumber = init_data_num
    sampling_flag = False
    save_flag = False