Пример #1
0
    def run_dp_train(self):

        sa_costs = None
        with open('../Pendulum/dp_sa_cost.p', 'rb') as f:
            sa_costs = pickle.load(f).flatten()

        sa_samples = []
        a_grid = np.linspace(-2, 2, 9)
        s1_grid = np.linspace(0, 2*np.pi, 51)
        s2_grid = np.linspace(-10, 10, 51)

        for i, c in enumerate(sa_costs):
            a = i / (51 * 51)
            ss = i % (51 * 51)
            s2, s1 = ss / 51, ss % 51
            a = a_grid[a]
            s1 = s1_grid[s1]
            s2 = s2_grid[s2]
            sa_samples.append([s1, s2, a, c])

        sa_samples = np.array(sa_samples)
        n = len(sa_samples)
        for i in range(0):
            sample_size = n if i%100==0 else 50
            inds = np.random.choice(range(n), sample_size, False)
            #t = np.array(random.sample(sa_samples, sample_size))
            t = sa_samples[inds]

            if i%100 == 0:
                print 'iter', i
                mse = self.current_net.mse_q(t[:,0:3], -t[:,3][:,np.newaxis])
                learn_rate = self.current_net.get_learn_rate()
                print 'mse', mse, 'learn_rate', learn_rate
                self.mse_hist.append(mse)
                self.learn_rate_hist.append(learn_rate)
            else:
                self.current_net.train(t[:,0:3], -t[:,3][:,np.newaxis])

        #plt.plot(mses); plt.show()
        vis = NetVisualizer(self.current_net)
        vis.q_heat_map()