Пример #1
0
def local_batchify(*arrays, **kwargs):
    from locality_sampler import gen_Qw, locality_sampler2
    mean_psu = 1
    mean_ssu = 100
    mean_M = 200

    var_psu = 1
    var_ssu = 50
    var_M = 60

    mean_Q, mean_w = gen_Qw(arrays[0], mean_psu, mean_ssu, mean_M)
    var_Q, var_w = gen_Qw(arrays[0], var_psu, var_ssu, var_M)
    arrays = (*arrays, mean_w, var_w)
    count = 0
    while True:
        if count % 2 == 0:
            batch = locality_sampler2(mean_psu, mean_ssu, mean_Q,
                                      mean_w).astype(np.int32)
        else:
            batch = locality_sampler2(var_psu, var_ssu, var_Q,
                                      var_w).astype(np.int32)
        count += 1
        yield [a[batch] for a in arrays]
Пример #2
0
def local_batchify(*arrays, **kwargs):
    from locality_sampler import gen_Qw, locality_sampler, locality_sampler2
    mean_psu = 1
    mean_ssu = 100
    mean_M = 150

    var_psu = 3
    var_ssu = 7
    var_M = 15

    mean_Q, mean_w = gen_Qw(arrays[0], mean_psu, mean_ssu, mean_M)
    var_Q, var_w = gen_Qw(arrays[0], var_psu, var_ssu, var_M)
    arrays = (*arrays, mean_w, var_w)
    while True:
        batch = locality_sampler2(mean_psu, mean_ssu, mean_Q,
                                  mean_w).astype(np.int32)
        yield [a[batch] for a in arrays]
Пример #3
0
def john(args, X, y, Xval, yval):
    from sklearn.cluster import KMeans
    from utils import dist
    from itertools import chain
    from torch import distributions as D
    from locality_sampler import gen_Qw, locality_sampler2
    from sklearn.decomposition import PCA
    
    if args.dataset == 'protein' or args.dataset == 'year_prediction':
        n_neurons = 100
    else:
        n_neurons = 50
    args.n_clusters = min(args.n_clusters, X.shape[0])
    
    y, y_mean, y_std = normalize_y(y)
    
    mean_psu = 1
    mean_ssu = 40
    mean_M = 50

    var_psu = 2
    var_ssu = 10
    var_M = 10
    
    num_draws_train = 20
    kmeans = KMeans(n_clusters=args.n_clusters)
    if args.dataset != 'year_prediction':
        kmeans.fit(np.concatenate([X], axis=0))
    else:
        kmeans.fit(X[np.random.randint(0, X.shape[0], size=(10000))])
    c = torch.tensor(kmeans.cluster_centers_, dtype=torch.float32)
    if torch.cuda.is_available() and args.cuda: 
        c = torch.tensor(c).to(torch.float32).to('cuda')
    else:
        c = torch.tensor(c).to(torch.float32)
        
    class translatedSigmoid(torch.nn.Module):
        def __init__(self):
            super(translatedSigmoid, self).__init__()
            self.beta = torch.nn.Parameter(torch.tensor([1.5]))
            
        def forward(self, x):
            beta = torch.nn.functional.softplus(self.beta)
            alpha = -beta*(6.9077542789816375)
            return torch.sigmoid((x+alpha)/beta)
    
    class GPNNModel(torch.nn.Module):
        def __init__(self):
            super(GPNNModel, self).__init__()
            self.mean = torch.nn.Sequential(torch.nn.Linear(X.shape[1], n_neurons),
                                      torch.nn.ReLU(),
                                      torch.nn.Linear(n_neurons, y.shape[1]))
            self.alph = torch.nn.Sequential(torch.nn.Linear(X.shape[1], n_neurons),
                                      torch.nn.ReLU(),
                                      torch.nn.Linear(n_neurons, y.shape[1]),
                                      torch.nn.Softplus())
            self.bet = torch.nn.Sequential(torch.nn.Linear(X.shape[1], n_neurons),
                                     torch.nn.ReLU(),
                                     torch.nn.Linear(n_neurons, y.shape[1]),
                                     torch.nn.Softplus())
            self.trans = translatedSigmoid()
            
        def forward(self, x, switch):
            d = dist(x, c)
            d_min = d.min(dim=1, keepdim=True)[0]
            s = self.trans(d_min)
            mean = self.mean(x)
            if switch:
                a = self.alph(x)
                b = self.bet(x)
                gamma_dist = D.Gamma(a+1e-8, 1.0/(b+1e-8))
                if self.training:
                    samples_var = gamma_dist.rsample(torch.Size([num_draws_train]))
                    x_var = (1.0/(samples_var+1e-8))
                else:
                    samples_var = gamma_dist.rsample(torch.Size([2000]))
                    x_var = (1.0/(samples_var+1e-8))
                var = (1-s) * x_var + s * y_std ** 2

            else:
                var = 0.05*torch.ones_like(mean)
            return mean, var
    
    model = GPNNModel()
    if torch.cuda.is_available() and args.cuda: 
        model.cuda()
        device=torch.device('cuda')
    else:
        device=torch.device('cpu')

    optimizer = torch.optim.Adam(model.mean.parameters(), lr=1e-2)
    optimizer2 = torch.optim.Adam(chain(model.alph.parameters(),
                                        model.bet.parameters(),
                                        model.trans.parameters()), lr=1e-4)
    mean_Q, mean_w = gen_Qw(X, mean_psu, mean_ssu, mean_M)
    
    if X.shape[0] > 100000 and X.shape[1] > 10:
        pca = PCA(n_components=0.5)
        temp = pca.fit_transform(X)
        var_Q, var_w = gen_Qw(temp, var_psu, var_ssu, var_M)
    else:    
        var_Q, var_w = gen_Qw(X, var_psu, var_ssu, var_M)
    
    #mean_pseupoch = get_pseupoch(mean_w,0.5)
    #var_pseupoch = get_pseupoch(var_w,0.5)
    opt_switch = 1
    mean_w = torch.tensor(mean_w).to(torch.float32).to(device)
    var_w = torch.tensor(var_w).to(torch.float32).to(device)
    model.train()
    
    X = torch.tensor(X).to(torch.float32).to(device)
    y = torch.tensor(y).to(torch.float32).to(device)
    batches = batchify(X, y, batch_size = args.batch_size, shuffel=args.shuffel)

    # validation data and performance measures
    ll_list = []
    mae_list = []
    rmse_list = []
    x_eval = torch.tensor(Xval).to(torch.float32).to(device)
    y_eval = torch.tensor(yval).to(torch.float32).to(device)
    y_mean = torch.tensor(y_mean).to(torch.float32).to(device)
    y_std = torch.tensor(y_std).to(torch.float32).to(device)

    it = 0
    its_per_epoch = int(np.ceil(X.shape[0] / args.batch_size))
    epochs = round(args.iters / its_per_epoch)
    while it < args.iters:
        switch = 1.0 if it > args.iters/2.0 else 0.0
        
        if it % 11: opt_switch = opt_switch + 1 # change between var and mean optimizer
        with torch.autograd.detect_anomaly():
            data, label = next(batches)
            if not switch:
                optimizer.zero_grad()
                m, v = model(data, switch)
                loss = -t_likelihood(label, m, v.unsqueeze(0))
                loss.backward()
                optimizer.step()
            else:
                if opt_switch % 2 == 0:    
                    #for b in range(mean_pseupoch):
                    optimizer.zero_grad()
                    batch = locality_sampler2(mean_psu,mean_ssu,mean_Q,mean_w)
                    m, v = model(X[batch], switch)
                    loss = -t_likelihood(y[batch], m, v, mean_w[batch])
                    loss.backward()
                    optimizer.step()
                else:
                    #for b in range(var_pseupoch):
                    optimizer2.zero_grad()
                    batch = locality_sampler2(var_psu,var_ssu,var_Q,var_w)
                    m, v = model(X[batch], switch)
                    loss = -t_likelihood(y[batch], m, v, var_w[batch])
                    loss.backward()
                    optimizer2.step()

        # test on validation set once per epoch
        if it % its_per_epoch == 0:
            model.eval()
            with torch.no_grad():
                m, v = model(x_eval, switch)
            m = m * y_std + y_mean
            v = v * y_std ** 2
            if switch == 0:
                ll = t_likelihood(y_eval, m, v.unsqueeze(0)).item()
            else:
                ll = t_likelihood(y_eval, m, v).item()
            # if it % (500 * its_per_epoch) == 0:
            #     print('Epoch {:d}/{:d},'.format(it // its_per_epoch, epochs), 'Loss {:.4f},'.format(ll))

            # log validation performance after we are stable in the second optimization regime
            if it > args.iters * 0.60:
                ll_list.append(ll)
                error = torch.norm(y_eval - m, p=2, dim=1)
                mae_list.append(error.abs().mean().item())
                rmse_list.append((error ** 2).mean().sqrt().item())
                model.train()

                # early stop check
                if len(ll_list) - np.argmax(ll_list) > 50:
                    it = args.iters
                    print('Early Stop!')

        it+=1

    # get best LL
    i_best = np.argmax(ll_list)

    # evaluate model moments
    with torch.no_grad():
        model.training = False
        m, v = model(x_eval, 1.0)
        m = m * y_std + y_mean
        v = v * y_std ** 2

    return ll_list[i_best], rmse_list[i_best], m.cpu().numpy(), v.cpu().numpy()
def ens_john(X, y, x):
    from sklearn.cluster import KMeans
    from utils import dist
    from itertools import chain

    mean_psu = 1
    mean_ssu = 50
    mean_M = 60

    var_psu = 3
    var_ssu = 7
    var_M = 10

    kmeans = KMeans(n_clusters=10)
    kmeans.fit(np.concatenate([X], axis=0))
    c = torch.tensor(kmeans.cluster_centers_, dtype=torch.float32)

    class translatedSigmoid(nn.Module):
        def __init__(self):
            super(translatedSigmoid, self).__init__()
            self.beta = nn.Parameter(torch.tensor([1.5]))

        def forward(self, x):
            beta = torch.nn.functional.softplus(self.beta)
            alpha = -beta * (6.9077542789816375)
            return torch.sigmoid((x + alpha) / beta)

    class GPNNModel(nn.Module):
        def __init__(self):
            super(GPNNModel, self).__init__()
            self.mean = nn.Sequential(nn.Linear(1, n_neurons), nn.Sigmoid(),
                                      nn.Linear(n_neurons, 1))
            self.alph = nn.Sequential(nn.Linear(1, n_neurons), nn.Sigmoid(),
                                      nn.Linear(n_neurons, 1), nn.Softplus())
            self.bet = nn.Sequential(nn.Linear(1, n_neurons), nn.Sigmoid(),
                                     nn.Linear(n_neurons, 1), nn.Softplus())
            self.trans = translatedSigmoid()

        def forward(self, x, switch):
            d = dist(x, c)
            d_min = d.min(dim=1, keepdim=True)[0]
            s = self.trans(d_min)
            mean = self.mean(x)
            if switch:
                a = self.alph(x)
                b = self.bet(x)
                gamma_dist = D.Gamma(a + 1e-8, 1.0 / (b + 1e-8))
                if self.training:
                    samples_var = gamma_dist.rsample(torch.Size([50]))
                    x_var = (1.0 / (samples_var + 1e-8))
                else:
                    samples_var = gamma_dist.rsample(torch.Size([2000]))
                    x_var = (1.0 / (samples_var + 1e-8))
                var = (1 - s) * x_var + s * torch.tensor([3.5**2
                                                          ])  # HYPERPARAMETER

            else:
                var = torch.tensor([0.05])
            return mean, var

    ens_mean, ens_var = [], []
    for i in range(5):
        model = GPNNModel()
        optimizer = torch.optim.Adam(model.mean.parameters(), lr=1e-2)
        optimizer2 = torch.optim.Adam(chain(model.alph.parameters(),
                                            model.bet.parameters(),
                                            model.trans.parameters()),
                                      lr=1e-3)

        n_iter = 6000
        it = 0
        mean_Q, mean_w = gen_Qw(X, mean_psu, mean_ssu, mean_M)
        var_Q, var_w = gen_Qw(X, var_psu, var_ssu, var_M)
        mean_pseupoch = get_pseupoch(mean_w, 0.5)
        var_pseupoch = get_pseupoch(var_w, 0.5)
        opt_switch = 1
        mean_w = torch.Tensor(mean_w)
        var_w = torch.Tensor(var_w)
        model.train()

        while it < n_iter:
            model.train()
            switch = 1.0 if it > 5000 else 0.0

            if it % 11:
                opt_switch = opt_switch + 1  # change between var and mean optimizer

            if not switch:
                optimizer.zero_grad()
                m, v = model(X, switch)
                loss = -(-v.log() - (m.flatten() - y)**2 / (2 * v)).sum()
                loss.backward()
                optimizer.step()
            else:
                if opt_switch % 2 == 0:
                    for b in range(mean_pseupoch):
                        optimizer.zero_grad()
                        batch = locality_sampler2(mean_psu, mean_ssu, mean_Q,
                                                  mean_w)
                        m, v = model(X[batch], switch)
                        loss = -t_likelihood(
                            y[batch], m, v, mean_w[batch]
                        )  #-(-v.log() - ((m.flatten()-y[batch])**2).reshape(1,-1,1) / (2 * v)) / mean_w[batch].reshape(1,-1,1)
                        loss = loss.sum()  # why the f*** is it so slow
                        loss.backward()
                        optimizer.step()
                else:
                    for b in range(var_pseupoch):
                        optimizer2.zero_grad()
                        batch = locality_sampler2(var_psu, var_ssu, var_Q,
                                                  var_w)
                        m, v = model(X[batch], switch)
                        loss = -t_likelihood(
                            y[batch], m, v, var_w[batch]
                        )  #-(-(diff.log() / 2 + diff/v + v.log() / 2)) / var_w[batch].reshape(1,-1,1)
                        loss = loss.sum()  # why the f*** is it so slow
                        loss.backward()
                        optimizer2.step()

            if it % 500 == 0:
                model.eval()
                m, v = model(X, switch)
                loss = -(-v.log() - (m.flatten() - y)**2 / (2 * v)).mean()
                print('Iter {0}/{1}, Loss {2}'.format(it, n_iter, loss.item()))
            it += 1

        model.eval()
        with torch.no_grad():
            mean, var = model(x, switch)
        ens_mean.append(mean)
        ens_var.append(var.mean(dim=0))

    ens_mean = torch.stack(ens_mean)
    ens_var = torch.stack(ens_var)

    mean = ens_mean.mean(dim=0)
    var = (ens_var + ens_mean**2).mean(dim=0) - mean**2

    return mean.numpy(), var.sqrt().numpy()
Пример #5
0
def john(args, X, y, Xval, yval):
    from sklearn.cluster import KMeans
    from utils import dist
    from itertools import chain
    from torch import distributions as D
    from locality_sampler import gen_Qw, locality_sampler2
    from sklearn.decomposition import PCA

    if args.dataset == 'protein' or args.dataset == 'year_prediction':
        n_neurons = 100
    else:
        n_neurons = 50
    args.n_clusters = min(args.n_clusters, X.shape[0])

    y, y_mean, y_std = normalize_y(y)

    mean_psu = 1
    mean_ssu = 40
    mean_M = 50

    var_psu = 2
    var_ssu = 10
    var_M = 10

    num_draws_train = 20
    kmeans = KMeans(n_clusters=args.n_clusters)
    if args.dataset != 'year_prediction':
        kmeans.fit(np.concatenate([X], axis=0))
    else:
        kmeans.fit(X[np.random.randint(0, X.shape[0], size=(10000))])
    c = torch.tensor(kmeans.cluster_centers_, dtype=torch.float32)
    if torch.cuda.is_available() and args.cuda:
        c = torch.tensor(c).to(torch.float32).to('cuda')
    else:
        c = torch.tensor(c).to(torch.float32)

    class translatedSigmoid(torch.nn.Module):
        def __init__(self):
            super(translatedSigmoid, self).__init__()
            self.beta = torch.nn.Parameter(torch.tensor([1.5]))

        def forward(self, x):
            beta = torch.nn.functional.softplus(self.beta)
            alpha = -beta * (6.9077542789816375)
            return torch.sigmoid((x + alpha) / beta)

    class GPNNModel(torch.nn.Module):
        def __init__(self):
            super(GPNNModel, self).__init__()
            self.mean = torch.nn.Sequential(
                torch.nn.Linear(X.shape[1], n_neurons), torch.nn.ReLU(),
                torch.nn.Linear(n_neurons, 1))
            self.alph = torch.nn.Sequential(
                torch.nn.Linear(X.shape[1], n_neurons), torch.nn.ReLU(),
                torch.nn.Linear(n_neurons, 1), torch.nn.Softplus())
            self.bet = torch.nn.Sequential(
                torch.nn.Linear(X.shape[1], n_neurons), torch.nn.ReLU(),
                torch.nn.Linear(n_neurons, 1), torch.nn.Softplus())
            self.trans = translatedSigmoid()

        def forward(self, x, switch):
            d = dist(x, c)
            d_min = d.min(dim=1, keepdim=True)[0]
            s = self.trans(d_min)
            mean = self.mean(x)
            if switch:
                a = self.alph(x)
                b = self.bet(x)
                gamma_dist = D.Gamma(a + 1e-8, 1.0 / (b + 1e-8))
                if self.training:
                    samples_var = gamma_dist.rsample(
                        torch.Size([num_draws_train]))
                    x_var = (1.0 / (samples_var + 1e-8))
                else:
                    samples_var = gamma_dist.rsample(torch.Size([1000]))
                    x_var = (1.0 / (samples_var + 1e-8))
                var = (1 - s) * x_var + s * torch.tensor(
                    [y_std**2], device=x.device)  # HYPERPARAMETER

            else:
                var = 0.05 * torch.ones_like(mean)
            return mean, var

    model = GPNNModel()
    if torch.cuda.is_available() and args.cuda:
        model.cuda()
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    optimizer = torch.optim.Adam(model.mean.parameters(), lr=1e-2)
    optimizer2 = torch.optim.Adam(chain(model.alph.parameters(),
                                        model.bet.parameters(),
                                        model.trans.parameters()),
                                  lr=1e-4)
    mean_Q, mean_w = gen_Qw(X, mean_psu, mean_ssu, mean_M)

    if X.shape[0] > 100000 and X.shape[1] > 10:
        pca = PCA(n_components=0.5)
        temp = pca.fit_transform(X)
        var_Q, var_w = gen_Qw(temp, var_psu, var_ssu, var_M)
    else:
        var_Q, var_w = gen_Qw(X, var_psu, var_ssu, var_M)

    #mean_pseupoch = get_pseupoch(mean_w,0.5)
    #var_pseupoch = get_pseupoch(var_w,0.5)
    opt_switch = 1
    mean_w = torch.tensor(mean_w).to(torch.float32).to(device)
    var_w = torch.tensor(var_w).to(torch.float32).to(device)
    model.train()

    X = torch.tensor(X).to(torch.float32).to(device)
    y = torch.tensor(y).to(torch.float32).to(device)
    batches = batchify(X, y, batch_size=args.batch_size, shuffel=args.shuffel)

    it = 0
    while it < args.iters:
        switch = 1.0 if it > args.iters / 2.0 else 0.0

        if it % 11:
            opt_switch = opt_switch + 1  # change between var and mean optimizer
        with torch.autograd.detect_anomaly():
            data, label = next(batches)
            if not switch:
                optimizer.zero_grad()
                m, v = model(data, switch)
                loss = -t_likelihood(label.reshape(-1, 1), m,
                                     v.reshape(1, -1, 1)) / X.shape[0]
                loss.backward()
                optimizer.step()
            else:
                if opt_switch % 2 == 0:
                    #for b in range(mean_pseupoch):
                    optimizer.zero_grad()
                    batch = locality_sampler2(mean_psu, mean_ssu, mean_Q,
                                              mean_w)
                    m, v = model(X[batch], switch)
                    loss = -t_likelihood(y[batch].reshape(-1, 1), m, v,
                                         mean_w[batch]) / X.shape[0]
                    loss.backward()
                    optimizer.step()
                else:
                    #for b in range(var_pseupoch):
                    optimizer2.zero_grad()
                    batch = locality_sampler2(var_psu, var_ssu, var_Q, var_w)
                    m, v = model(X[batch], switch)
                    loss = -t_likelihood(y[batch].reshape(-1, 1), m, v,
                                         var_w[batch]) / X.shape[0]
                    loss.backward()
                    optimizer2.step()

        if it % 500 == 0:
            m, v = model(data, switch)
            loss = -(-v.log() / 2 -
                     ((m.flatten() - label)**2).reshape(1, -1, 1) /
                     (2 * v)).mean()
            print('Iter {0}/{1}, Loss {2}'.format(it, args.iters, loss.item()))
        it += 1

    model.eval()

    data = torch.tensor(Xval).to(torch.float32).to(device)
    label = torch.tensor(yval).to(torch.float32).to(device)
    with torch.no_grad():
        m, v = model(data, switch)
    m = m * y_std + y_mean
    v = v * y_std**2
    #log_px = normal_log_prob(label, m, v).mean(dim=0) # check for correctness
    log_px = t_likelihood(label.reshape(-1, 1), m, v) / Xval.shape[0]  # check
    rmse = ((label - m.flatten())**2).mean().sqrt()
    return log_px.mean().item(), rmse.item()