示例#1
0
文件: UCB.py 项目: HeySue/eheye
class QuantUCB(object):
    """Perform QuantUCB algorithm on environment
    
    Attributes
    --------------------------------
    
    environment: instance of DummyEnvironment, generate samples using x
    x, t: list of all index and values of samples
    maxlabel: max label of t

    beta: parameter for adjust predictions and variance 
        beta can be fixed as specified by arguments
        beta also can be changed along with epoches
    uq, lq: upper and lower quantile
    uq_rate: adjust upper quantile during updating 

    predict, ub, lb: prediction median, 
        upper and lower bound based on the specidied uq and lq

    D: kernel dimensions
    sampler: kernel sampler

    X, T: list of observed samples 
    cumu_regret: cumulative regret
    regret_list: list for regret for each epoch 
    """
    def __init__(self,
                 x,
                 t,
                 init_list,
                 max_method='MUB',
                 uq=0.9,
                 lq=0.1,
                 uq_rate=0.0,
                 beta=1.):

        #self.environment = environment
        #self.x = np.stack([x.ravel(), np.ones_like(x.ravel())]).T
        self.x = x
        self.t = t
        self.maxlabel = np.max(self.t)

        self.max_method = max_method
        self.beta = beta
        self.uq = uq
        self.lq = lq
        self.uq_rate = uq_rate

        self.X = list(self.x[init_list])
        self.T = list(self.t[init_list])
        self.cumu_regret = 0
        self.regret_list = []

        self.predict = np.zeros_like(self.x)
        self.ub = 0.5 * np.ones_like(self.x)
        self.lb = -0.5 * np.ones_like(self.x)

        self.D = 50
        self.reg = QRegressor(C=1e2,
                              probs=[lq, 0.5, uq],
                              gamma_out=1e-2,
                              max_iter=1e4,
                              verbose=False,
                              lag_tol=1e-3,
                              active_set=True)
        #self.QuantReg = QuantReg(self.D)
        #self.sampler = sklearn.kernel_approximation.RBFSampler(n_components= self.D, gamma=0.1)

    def argmax_ucb(self, max_method, epoch):
        if max_method == 'MUB':
            # use upper bound
            if self.uq < 0.95:
                self.uq += self.uq_rate
            return np.argmax(self.ub)
        elif max_method == 'MPV':
            # use predict + uncertainty
            if len(self.X) > 0:
                self.beta = np.log(len(self.X) * (epoch + 1.0)) / 20
            return np.argmax(self.predict +
                             0.5 * abs(self.ub - self.lb) * self.beta)
        else:
            raise ValueError

    def regret(self):
        self.cumu_regret += self.maxlabel - self.T[-1]
        self.regret_list.append(self.cumu_regret)

    def learn(self, epoch):

        #print(self.X)
        #print(self.T)
        #self.QuantReg.fit(self.X, self.T, self.uq, self.lq)
        self.reg.fit(self.X, self.T, [self.lq, 0.5, self.uq])
        pred = self.reg.predict(self.x)
        self.lb = pred[0]
        self.predict = pred[1]
        self.ub = pred[2]
        #self.predict, self.ub, self.lb = self.QuantReg.predict(self.x)
        idx = self.argmax_ucb(self.max_method, epoch)
        self.sample(idx)
        self.regret()

    def sample(self, idx):
        self.X.append(self.x[idx])
        self.T.append(self.t[idx])

    def plot(self):
        fig = plt.figure()
        ax = plt.axes()

        min_val = min(self.x)
        max_val = max(self.x)
        test_range = np.arange(min_val - 1, max_val + 1, 0.1)
        num_test = len(test_range)
        #test_range.shape = (num_test, 1)

        #test_range = self.x
        #preds, ub, lb = self.QuantReg.predict(test_range)
        self.pred = self.reg.predict(test_range)

        ax.plot(test_range,
                self.pred[1],
                alpha=0.5,
                color='g',
                label='predict')
        ax.fill_between(test_range,
                        self.pred[0],
                        self.pred[2],
                        facecolor='k',
                        alpha=0.2)
        ax.scatter(self.X,
                   self.T,
                   c='r',
                   marker='o',
                   alpha=1.0,
                   label='sample')
        #plt.savefig('fig_%02d.png' % len(self.X))
        plt.legend()
        plt.xlabel('X')
        plt.ylabel('Y')
        plt.title('QuantUCB')
        plt.show()
示例#2
0
文件: UCB_new.py 项目: HeySue/eheye
class QuantUCB_MUB(UCB):
    """Perform QuantUCB algorithm on environment
    
    Attributes
    --------------------------------
    
    environment: instance of DummyEnvironment, generate samples using x
    x, t: list of all index and values of samples
    maxlabel: max label of t

    beta: parameter for adjust predictions and variance 
        beta can be fixed as specified by arguments
        beta also can be changed along with epoches
    uq, lq: upper and lower quantile
    uq_rate: adjust upper quantile during updating 

    predict, ub, lb: prediction median, 
        upper and lower bound based on the specidied uq and lq

    D: kernel dimensions
    sampler: kernel sampler

    X, T: list of observed samples 
    cumu_regret: cumulative regret
    regret_list: list for regret for each epoch 
    """
    def __init__(self, env, x):

        super().__init__(env, x)

        self.alpha = 0.5

        self.predict = np.zeros_like(self.x)

        self.reg = QRegressor(C=1e2,
                              probs=[0.1, 0.5, 0.9],
                              gamma_out=1e-2,
                              max_iter=1e4,
                              verbose=False,
                              lag_tol=1e-3,
                              active_set=True)

    def argmax_ucb(self, t, num_rounds):

        self.alpha = 0.5 + np.log(t + 1) / (num_rounds * 2)
        return self.X[np.argmax(self.quantile)]

    def learn(self, t, num_rounds):

        #print(self.X)
        #print(self.T)
        #self.QuantReg.fit(self.X, self.T, self.uq, self.lq)
        self.reg.fit(self.X, self.T, [self.alpha])
        self.quantile = self.reg.predict(self.x)[0]
        #self.predict, self.ub, self.lb = self.QuantReg.predict(self.x)
        idx = self.argmax_ucb(t, num_rounds)
        self.sample(idx)
        self.regret(t)

    def plot(self):
        ax = plt.axes()

        min_val = min(self.x)
        max_val = max(self.x)
        test_range = np.arange(min_val - 1, max_val + 1, 0.1)
        #preds, ub, lb = self.QuantReg.predict(test_range)
        pred = self.reg.predict(test_range)

        ax.plot(test_range, pred[0], alpha=0.5, color='g', label='predict')
        #ax.fill_between(test_range, pred[0], pred[2], facecolor='k', alpha=0.2)
        init_len = len(self.x)
        ax.scatter(self.X[:init_len],
                   self.T[:init_len],
                   c='b',
                   marker='o',
                   alpha=1.0,
                   label='init sample')
        ax.scatter(self.X[init_len:],
                   self.T[init_len:],
                   c='r',
                   marker='o',
                   alpha=1.0,
                   label='selected sample')
        #plt.savefig('fig_%02d.png' % len(self.X))
        plt.legend()
        plt.xlabel('X')
        plt.ylabel('Y')
        plt.title('QuantUCB')
        plt.show()
示例#3
0
    x_train, y_train, z_train = toy_data(50)
    x_test, y_test, z_test = toy_data(1000, t_min=-0.2, t_max=1.7, probs=probs)
    reg = QRegressor(C=1e2, probs=probs, gamma_out=1e-2, max_iter=1e4, verbose=False, lag_tol=1e-3, active_set=True)

    res = []  # List for resulting coefficients
    plt.figure(figsize=(12, 7))
    for it, alg in enumerate(algorithms):
        if 'eps' in alg.lower():
            reg.alg = alg[:-4]
            reg.eps = eps
        else:
            reg.alg = alg
            reg.eps = 0.

        # Fit on training data and predict on test data
        reg.fit(x_train, y_train)
        pred = reg.predict(x_test)

        # Plot the estimated conditional quantiles
        plt.subplot(1, len(algorithms), it+1)
        plt.plot(x_train, y_train, '.')
        for q in pred:
            plt.plot(x_test, q, '-')
        for q in z_test:
            plt.plot(x_test, q, '--')
        plt.title(alg.upper())

        # Print the optimal objective value
        print(alg.upper() + ":")
        print("   objective value: %f" % reg.obj)
        print("   training time: %0.2fs" % reg.time)
示例#4
0
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
from qreg import QRegressor, QRegMTL, toy_data

if __name__ == '__main__':
    probs = np.linspace(0.1, 0.9, 5)  # Joint quantile regression
    x_train, y_train, z_train = toy_data(50)
    x_test, y_test, z_test = toy_data(1000, probs=probs)

    # QR with operator-valued kernel
    ovk = QRegressor(C=1e2, probs=probs, gamma_out=1e-2, alg='qp')

    # Fit on training data and predict on test data
    print("Learn QRegressor")
    ovk.fit(x_train, y_train)
    pred = ovk.predict(x_test)

    # Plot the estimated conditional quantiles
    plt.close('all')
    plt.figure(figsize=(12, 7))
    plt.subplot(231)
    plt.plot(x_train, y_train, '.')
    for q in pred:
        plt.plot(x_test, q, '-')
    for q in z_test:
        plt.plot(x_test, q, '--')
    plt.title('Operator-valued kernel')

    # QR with multi-task learning
    mtl = QRegMTL(C=1e2, probs=probs, n_landmarks=0.2)