示例#1
0
    def finish(self, nb_train_steps, nb_epoch_steps, nb_warmup_steps=0):
        import matplotlib
        matplotlib.use('Agg')
        from matplotlib import pyplot as plt
        from util import mstd

        x_vals = range(0, nb_train_steps + 1, nb_epoch_steps)
        # Plot online reward and test reward.
        for (k, v) in self.records.items():
            v = np.array(v)
            if len(v.shape) == 3:
                for i in range(v.shape[1]):
                    f0, ax0 = plt.subplots()
                    m, ids25, ids75 = mstd(v[:, i, :].T)
                    # If the number of warmup steps > the number of epoch
                    # step or there is no episode completed, v may not have
                    # the same length with the x_vals.
                    _ = ax0.plot(x_vals[-len(v):], m, color='b')
                    _ = ax0.fill_between(x_vals[-len(v):],
                                         list(ids75),
                                         list(ids25),
                                         facecolor='b',
                                         alpha=0.2)
                    if k == 'mean_nlogdetcov':
                        _ = ax0.set_ylim(-1500, 3000)
                    _ = ax0.grid()
                    _ = ax0.set_ylabel(k)
                    _ = ax0.set_xlabel('Learning Steps')
                    if x_vals[-len(v)] < nb_warmup_steps:
                        _ = ax0.axvline(x=nb_warmup_steps, color='k')
                    _ = f0.savefig(
                        os.path.join(self.save_dir, "%s_eval_%d.png" % (k, i)))

            elif len(v) > 0 and (k == 'test_reward' or k == 'mean_nlogdetcov'
                                 or k == 'online_reward'):
                f0, ax0 = plt.subplots()
                if len(v.shape) == 2:
                    m, ids25, ids75 = mstd(v.T)
                    # If the number of warmup steps > the number of epoch
                    # step or there is no episode completed, v may not have
                    # the same length with the x_vals.
                    _ = ax0.plot(x_vals[-len(v):], m, color='b')
                    _ = ax0.fill_between(x_vals[-len(v):],
                                         list(ids75),
                                         list(ids25),
                                         facecolor='b',
                                         alpha=0.2)
                else:
                    _ = ax0.plot(x_vals[-len(v):], v, color='b')

                if k == 'mean_nlogdetcov':
                    _ = ax0.set_ylim(-1500, 3000)
                _ = ax0.grid()
                _ = ax0.set_ylabel(k)
                _ = ax0.set_xlabel('Learning Steps')

                if x_vals[-len(v)] < nb_warmup_steps:
                    _ = ax0.axvline(x=nb_warmup_steps, color='k')
                _ = f0.savefig(os.path.join(self.save_dir, "%s.png" % k))
示例#2
0
def batch_plot(list_records, save_dir, nb_train_steps, nb_epoch_steps, is_target_tracking=False):
    import matplotlib
    matplotlib.use('Agg')
    from matplotlib import pyplot as plt
    from util import mstd

    results = {'online_reward':[], 'test_reward':[]}
    if is_target_tracking:
        results['mean_nlogdetcov'] = []
    for r in list_records:
        for (k,v) in results.items():
            r[k] = np.array(r[k])
            v.append(r[k].T)
    x_vals = range(0, nb_train_steps+1, nb_epoch_steps)
    for (k,v) in results.items():
        v = np.array(v)
        if len(v.shape) == 4:
            for i in range(v.shape[2]):
                v_i = np.concatenate(v[:,:,i,:], axis=0)
                f0, ax0 = plt.subplots()
                m, ids25, ids75 = mstd(v_i)
                _ = ax0.plot(x_vals[-v_i.shape[1]:], m, color='k')
                _ = ax0.fill_between(x_vals[-v_i.shape[1]:], list(ids75), list(ids25),
                                facecolor='k', alpha=0.2)
                _ = ax0.plot(x_vals[-v_i.shape[1]:], np.max(v_i, axis=0), color='b')
                _ = ax0.plot(x_vals[-v_i.shape[1]:], np.min(v_i, axis=0), color='r')
                _ = ax0.grid()
                if k == 'mean_nlogdetcov':
                    ax0.set_ylim(-1500, 3000)
                _ = f0.savefig(os.path.join(save_dir, "%s_eval_%d.png"%(k,i)))
                plt.close()
        else:
            if len(v.shape) == 3:
                v = np.concatenate(v, axis=0)
            f0, ax0 = plt.subplots()
            m, ids25, ids75 = mstd(v)
            _ = ax0.plot(x_vals[-v.shape[1]:], m, color='k')
            _ = ax0.fill_between(x_vals[-v.shape[1]:], list(ids75), list(ids25),
                            facecolor='k', alpha=0.2)
            _ = ax0.plot(x_vals[-v.shape[1]:], np.max(v, axis=0), color='b')
            _ = ax0.plot(x_vals[-v.shape[1]:], np.min(v, axis=0), color='r')
            _ = ax0.grid()
            if k == 'mean_nlogdetcov':
                ax0.set_ylim(-1500, 3000)
            _ = f0.savefig(os.path.join(save_dir, k+".png"))
            plt.close()