Exemplo n.º 1
0
def train_scene_discriminator(x):
    netC.zero_grad()

    if has_cuda:
        target = torch.cuda.FloatTensor(opt.batch_size, 1)
    else:
        target = torch.FloatTensor(opt.batch_size, 1)

    x1 = x[0]
    x2 = x[1]
    h_p1 = netEP(x1).detach()
    h_p2 = netEP(x2).detach()

    half = int(opt.batch_size/2)
    if has_cuda:
        rp = torch.randperm(half).cuda()
    else:
        rp = torch.randperm(half).cpu()
    h_p2[:half] = h_p2[rp]
    target[:half] = 1
    target[half:] = 0

    out = netC([h_p1, h_p2])
    bce = bce_criterion(out, Variable(target))

    bce.backward()
    optimizerC.step()

    acc =out[:half].gt(0.5).sum() + out[half:].le(0.5).sum()
    return bce.data.cpu().numpy(), acc.data.cpu().numpy()/opt.batch_size
Exemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser(description="parse args")
    parser.add_argument('-n', '--num-epochs', default=1000, type=int)
    parser.add_argument('-b', '--batch-size', default=N, type=int)
    parser.add_argument('--cuda', action='store_true')
    args = parser.parse_args()
    data = build_linear_dataset(N, p)
    if args.cuda:
        # make tensors and modules CUDA
        data = data.cuda()
        softplus.cuda()
        regression_model.cuda()
    for j in range(args.num_epochs):
        if args.batch_size == N:
            # use the entire data set
            epoch_loss = svi.step(data)
        else:
            # mini batch
            epoch_loss = 0.0
            perm = torch.randperm(N) if not args.cuda else torch.randperm(N).cuda()
            # shuffle data
            data = data[perm]
            # get indices of each batch
            all_batches = get_batch_indices(N, args.batch_size)
            for ix, batch_start in enumerate(all_batches[:-1]):
                batch_end = all_batches[ix + 1]
                batch_data = data[batch_start: batch_end]
                epoch_loss += svi.step(batch_data)
        if j % 100 == 0:
            print("epoch avg loss {}".format(epoch_loss/float(N)))
Exemplo n.º 3
0
        def random(nin, nout, nto):
            nker = nto * nout
            tbl = torch.Tensor(nker, 2)
            fi = torch.randperm(nin)
            frcntr = 0
            nfi = math.floor(nin / nto)  # number of distinct nto chunks
            totbl = tbl.select(1, 1)
            frtbl = tbl.select(1, 0)
            fitbl = fi.narrow(0, 0, (nfi * nto))  # part of fi that covers distinct chunks
            ufrtbl = frtbl.unfold(0, nto, nto)
            utotbl = totbl.unfold(0, nto, nto)
            ufitbl = fitbl.unfold(0, nto, nto)

            # start fill_ing frtbl
            for i in range(nout):  # fro each unit in target map
                ufrtbl.select(0, i).copy_(ufitbl.select(0, frcntr))
                frcntr += 1
                if frcntr - 1 == nfi:  # reset fi
                    fi.copy_(torch.randperm(nin))
                    frcntr = 1

            for tocntr in range(utotbl.size(0)):
                utotbl.select(0, tocntr).fill_(tocntr)

            return tbl
Exemplo n.º 4
0
def main(args):
    pyro.clear_param_store()
    data = build_linear_dataset(N, p)
    if args.cuda:
        # make tensors and modules CUDA
        data = data.cuda()
        softplus.cuda()
        regression_model.cuda()
    for j in range(args.num_epochs):
        if args.batch_size == N:
            # use the entire data set
            epoch_loss = svi.step(data)
        else:
            # mini batch
            epoch_loss = 0.0
            perm = torch.randperm(N) if not args.cuda else torch.randperm(N).cuda()
            # shuffle data
            data = data[perm]
            # get indices of each batch
            all_batches = get_batch_indices(N, args.batch_size)
            for ix, batch_start in enumerate(all_batches[:-1]):
                batch_end = all_batches[ix + 1]
                batch_data = data[batch_start: batch_end]
                epoch_loss += svi.step(batch_data)
        if j % 100 == 0:
            print("epoch avg loss {}".format(epoch_loss/float(N)))
Exemplo n.º 5
0
def mixup_data(x, y, alpha=1.0, use_cuda=True):
    if alpha>0.:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1.
    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam*x + (1-lam)*x[index,:]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam
Exemplo n.º 6
0
 def pretrain(self, train_data, corrupter, tester):
     src, rel, dst = train_data
     n_train = len(src)
     optimizer = Adam(self.mdl.parameters())
     #optimizer = SGD(self.mdl.parameters(), lr=1e-4)
     n_epoch = self.config.n_epoch
     n_batch = self.config.n_batch
     best_perf = 0
     for epoch in range(n_epoch):
         epoch_loss = 0
         rand_idx = t.randperm(n_train)
         src = src[rand_idx]
         rel = rel[rand_idx]
         dst = dst[rand_idx]
         src_corrupted, dst_corrupted = corrupter.corrupt(src, rel, dst)
         src_cuda = src.cuda()
         rel_cuda = rel.cuda()
         dst_cuda = dst.cuda()
         src_corrupted = src_corrupted.cuda()
         dst_corrupted = dst_corrupted.cuda()
         for s0, r, t0, s1, t1 in batch_by_num(n_batch, src_cuda, rel_cuda, dst_cuda, src_corrupted, dst_corrupted,
                                               n_sample=n_train):
             self.mdl.zero_grad()
             loss = t.sum(self.mdl.pair_loss(Variable(s0), Variable(r), Variable(t0), Variable(s1), Variable(t1)))
             loss.backward()
             optimizer.step()
             self.mdl.constraint()
             epoch_loss += loss.data[0]
         logging.info('Epoch %d/%d, Loss=%f', epoch + 1, n_epoch, epoch_loss / n_train)
         if (epoch + 1) % self.config.epoch_per_test == 0:
             test_perf = tester()
             if test_perf > best_perf:
                 self.save(os.path.join(config().task.dir, self.config.model_file))
                 best_perf = test_perf
     return best_perf
Exemplo n.º 7
0
def optimize_model(model, x, y, x_test, y_test, batch_size=32, learning_rate=1e-4, weight_decay=1e-4):
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    N = y.size(0)
    num_one_epoch = np.floor(N / batch_size).astype(np.int)
    num_epoch = np.floor(3000/num_one_epoch).astype(np.int)
    for epoch in range(num_epoch):
        index = torch.randperm(N)
        for t in range(num_one_epoch):
            idx_start = t*batch_size
            idx_end = (t+1)*batch_size
            y_pred = model(x[index[idx_start:idx_end], :])
            loss = torch.nn.MSELoss()(y_pred, y[index[idx_start:idx_end]])
            # print(epoch, t, loss.data[0])

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    y_pred = model(x)
    loss = torch.nn.MSELoss()(y_pred, y)
    y_test_pred = model(x_test)
    test_loss = torch.nn.MSELoss()(y_test_pred, y_test)
    # print(test_loss.data[0])
    print(loss.data[0], test_loss.data[0])
    return loss.data[0], test_loss.data[0]
Exemplo n.º 8
0
def train_valid_splitter(x, y, split, shuffle=True):
    ''' Generate training and validation tensors from whole dataset data and label tensors
    
    :param x: Data tensor for whole dataset
    :type x: torch.Tensor
    :param y: Label tensor for whole dataset
    :type y: torch.Tensor
    :param split: Fraction of dataset to be used for validation
    :type split: float
    :param shuffle: If True randomize tensor order before splitting else do not randomize 
    :type shuffle: bool
    :return: Training and validation tensors (training data, training labels, validation data, validation labels)
    :rtype: tuple
    '''
    num_samples_x = x.size()[0]
    num_valid_samples = math.floor(num_samples_x * split)

    if shuffle:
        indicies = torch.randperm(num_samples_x)
        x, y = x[indicies], y[indicies]

    x_val, y_val = x[:num_valid_samples], y[:num_valid_samples]
    x, y = x[num_valid_samples:], y[num_valid_samples:]

    return x, y, x_val, y_val
Exemplo n.º 9
0
 def _generate_perms_and_inverses(feature_size, num_perms):
     perms = [torch.randperm(feature_size)
              for _ in range(num_perms)]
     inv_perms = [torch.cat([(perm == i).nonzero()
                             for i in range(feature_size)], 0).squeeze()
                  for perm in perms]
     return perms, inv_perms
def drop_exp_2(r_feat_val, r_feat_train, pred):
    # incep_score, mode_score, fid
    n_mode = len(Counter(pred))
    scores = np.zeros((n_mode, 3))
    t_feat = r_feat_train.clone()
    collapsed_order = torch.randperm(n_mode).long()
    index = torch.arange(0, r_feat_train.size(0)).long()
    collapsed = torch.zeros(r_feat_train.size(0)).byte()
    Mxx = distance(r_feat_val, r_feat_val, sqrt=True)
    
    for i in range(n_mode):
        # Compute Score
        Mxy = distance(r_feat_val, t_feat, sqrt=True)
        Myy = distance(t_feat, t_feat, sqrt=True)
        scores[i, 0] = inception_score(t_feat)
        scores[i, 1] = mode_score(t_feat, r_feat_val)
        scores[i, 2] = fid(t_feat, r_feat_val)
        
        # Do drop -- fill dropped slots with remaining samples
        c = collapsed_order[i]
        collapsed[pred.eq(c)] = 1
        cidx = index[collapsed.eq(1)]
        ncidx = index[collapsed.ne(1)]
        if ncidx.dim() == 0 or cidx.dim() == 0 or ncidx.size(0) == 0:
            continue
        for j in cidx:
            copy_idx = np.random.randint(0, ncidx.size(0))
            t_feat[j] = t_feat[ncidx[copy_idx]]
            
    return scores
Exemplo n.º 11
0
def sparse_(tensor, sparsity, std=0.01):
    r"""Fills the 2D input `Tensor` as a sparse matrix, where the
    non-zero elements will be drawn from the normal distribution
    :math:`\mathcal{N}(0, 0.01)`, as described in "Deep learning via
    Hessian-free optimization" - Martens, J. (2010).

    Args:
        tensor: an n-dimensional `torch.Tensor`
        sparsity: The fraction of elements in each column to be set to zero
        std: the standard deviation of the normal distribution used to generate
            the non-zero values

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.sparse_(w, sparsity=0.1)
    """
    if tensor.ndimension() != 2:
        raise ValueError("Only tensors with 2 dimensions are supported")

    rows, cols = tensor.shape
    num_zeros = int(math.ceil(sparsity * rows))

    with torch.no_grad():
        tensor.normal_(0, std)
        for col_idx in range(cols):
            row_indices = torch.randperm(rows)
            zero_indices = row_indices[:num_zeros]
            tensor[zero_indices, col_idx] = 0
    return tensor
Exemplo n.º 12
0
 def __call__(self, *inputs):
     order = th.randperm(inputs[0].dim())
     outputs = []
     for idx, _input in enumerate(inputs):
         _input = _input.index_select(0, order)
         outputs.append(_input)
     return outputs if idx > 1 else outputs[0]
def collapse_exp_1(r_feat_val, r_feat, c_feat, pred):
    # emd, mmd, acc_t, acc_f
    n_mode = c_feat.size(0)
    c_feat_repeat = c_feat[pred]
    scores = np.zeros((n_mode, 4))
    t_feat = r_feat.clone()
    index = torch.arange(0, 2000).long()
    collapsed_order = torch.randperm(n_mode).long()
    Mxx = distance(r_feat_val, r_feat_val, sqrt=False)
    
    for i in range(n_mode):
        # Compute Score
        Mxy = distance(r_feat_val, t_feat, sqrt=False)
        Myy = distance(t_feat, t_feat, sqrt=False)
        scores[i, 0] = wasserstein(Mxy, True)
        scores[i, 1] = mmd(Mxx, Mxy, Myy, 1)
        s = knn(Mxx, Mxy, Myy, 1, True)
        scores[i, 2], scores[i, 3] = s.acc_t, s.acc_f
        
        # Do collapse 
        c = collapsed_order[i]
        cidx = index[pred.eq(c)]
        t_feat[cidx] = c_feat_repeat[cidx]
        
    return scores
Exemplo n.º 14
0
 def pretrain(self, train_data, corrupter, tester):
     src, rel, dst = train_data
     n_train = len(src)
     n_epoch = self.config.n_epoch
     n_batch = self.config.n_batch
     optimizer = Adam(self.mdl.parameters(), weight_decay=self.weight_decay)
     best_perf = 0
     for epoch in range(n_epoch):
         epoch_loss = 0
         if epoch % self.config.sample_freq == 0:
             rand_idx = t.randperm(n_train)
             src = src[rand_idx]
             rel = rel[rand_idx]
             dst = dst[rand_idx]
             src_corrupted, rel_corrupted, dst_corrupted = corrupter.corrupt(src, rel, dst)
             src_corrupted = src_corrupted.cuda()
             rel_corrupted = rel_corrupted.cuda()
             dst_corrupted = dst_corrupted.cuda()
         for ss, rs, ts in batch_by_num(n_batch, src_corrupted, rel_corrupted, dst_corrupted, n_sample=n_train):
             self.mdl.zero_grad()
             label = t.zeros(len(ss)).type(t.LongTensor).cuda()
             loss = t.sum(self.mdl.softmax_loss(Variable(ss), Variable(rs), Variable(ts), label))
             loss.backward()
             optimizer.step()
             epoch_loss += loss.data[0]
         logging.info('Epoch %d/%d, Loss=%f', epoch + 1, n_epoch, epoch_loss / n_train)
         if (epoch + 1) % self.config.epoch_per_test == 0:
             test_perf = tester()
             if test_perf > best_perf:
                 self.save(os.path.join(config().task.dir, self.config.model_file))
                 best_perf = test_perf
     return best_perf
Exemplo n.º 15
0
def val(spatial_size, Scale, precomputeStride):
    d = pickle.load(open('pickle/test.pickle', 'rb'))
    d = torchnet.dataset.ListDataset(d)
    randperm = torch.randperm(len(d))

    def perm(idx, size):
        return randperm[idx]

    def merge(tbl):
        inp = scn.InputBatch(2, spatial_size)
        center = spatial_size.float().view(1, 2) / 2
        p = torch.LongTensor(2)
        v = torch.FloatTensor([1, 0, 0])
        for char in tbl['input']:
            inp.addSample()
            for stroke in char:
                stroke = stroke.float() * (Scale - 0.01) / 255 - 0.5 * (Scale - 0.01)
                stroke += center.expand_as(stroke)
                scn.dim_fn(
                    2,
                    'drawCurve')(
                    inp.metadata.ffi,
                    inp.features,
                    stroke)
        inp.precomputeMetadata(precomputeStride)
        return {'input': inp, 'target': torch.LongTensor(tbl['target']) - 1}
    bd = torchnet.dataset.BatchDataset(d, 183, perm=perm, merge=merge)
    tdi = scn.threadDatasetIterator(bd)

    def iter():
        randperm = torch.randperm(len(d))
        return tdi()
    return iter
Exemplo n.º 16
0
    def test(self):
        if opt['model'] == 'CharCNN':
            X_train = self.dataset.df_train['text_parsed'].values
            X_test = self.dataset.df_test['text_parsed'].values
        else:
            X_train = self.dataset.df_train['ids'].values
            X_test = self.dataset.df_test['ids'].values

        Y_train = self.dataset.df_train['label'].values
        Y_test = self.dataset.df_test['label'].values        

        m_train = len(X_train)
        permutation = torch.randperm(m_train)

        accuracies = []
        for start_idx in range(0, m_train, opt['batch_size']):
            indices = permutation[start_idx:start_idx + opt['batch_size']]

            if opt['model'] == 'CharCNN':
                X_train_batch, X_train_mask_batch, Y_train_batch = self.create_batch_char(X_train, Y_train, indices)
            else:
                X_train_batch, X_train_mask_batch, Y_train_batch = self.create_batch(X_train, Y_train, indices)
            Y_predict = self.model(X_train_batch, X_train_mask_batch)
            loss = self.loss(Y_predict, Y_train_batch)

            accuracy, _ = self.calculate_accuracy(Y_train_batch, Y_predict)
            accuracies.append(accuracy)
            print(loss.cpu().data.numpy(), accuracy)

            del X_train_batch, X_train_mask_batch, Y_train_batch, Y_predict

        print(sum(accuracies)/len(accuracies))
def drop_exp_1(r_feat_val, r_feat_train, pred):
    # emd, mmd, acc_t, acc_f
    n_mode = len(Counter(pred))
    scores = np.zeros((n_mode, 4))
    t_feat = r_feat_train.clone()
    collapsed_order = torch.randperm(n_mode).long()
    index = torch.arange(0, r_feat_train.size(0)).long()
    collapsed = torch.zeros(r_feat_train.size(0)).byte()
    Mxx = distance(r_feat_val, r_feat_val, sqrt=True)
    
    for i in range(n_mode):
        # Compute Score
        Mxy = distance(r_feat_val, t_feat, sqrt=True)
        Myy = distance(t_feat, t_feat, sqrt=True)
        scores[i, 0] = wasserstein(Mxy, False)
        scores[i, 1] = mmd(Mxx, Mxy, Myy, 1)
        s = knn(Mxx, Mxy, Myy, 1, True)
        scores[i, 2], scores[i, 3] = s.acc_t, s.acc_f
        
        # Do drop -- fill dropped slots with remaining samples
        c = collapsed_order[i]
        collapsed[pred.eq(c)] = 1
        cidx = index[collapsed.eq(1)]
        ncidx = index[collapsed.ne(1)]
        if ncidx.dim() == 0 or cidx.dim() == 0 or ncidx.size(0) == 0:
            continue
        for j in cidx:
            copy_idx = np.random.randint(0, ncidx.size(0))
            t_feat[j] = t_feat[ncidx[copy_idx]]
            
    return scores
Exemplo n.º 18
0
    def __iter__(self):
        rand_num = torch.randperm(self.num_per_batch).view(-1,1) * self.batch_size
        self.rand_num = rand_num.expand(self.num_per_batch, self.batch_size) + self.range

        self.rand_num_view = self.rand_num.view(-1)

        if self.leftover_flag:
            self.rand_num_view = torch.cat((self.rand_num_view, self.leftover),0)

        return iter(self.rand_num_view)
Exemplo n.º 19
0
    def __call__(self, data, subsample=True):

        # deterministically shuffle based on epoch
        g = torch.Generator()
        g.manual_seed(self.epoch)

        indices = list(torch.randperm(len(data), generator=g))
        if not subsample:
            return [data[i] for i in indices]
        return [data[i] for i in self.subsample(indices)]
Exemplo n.º 20
0
    def _shuffle_training_data(self):
        """
        Shuffles the training data.

        :return: None
        """
        num_examples = len(self.train_x)
        shuffled_indices = torch.randperm(num_examples)
        self.train_x = self.train_x[shuffled_indices]
        self.train_y = self.train_y[shuffled_indices]
Exemplo n.º 21
0
 def __iter__(self):
     indices = torch.randperm(self.num_samples)
     ret = []
     for i in indices:
         pid = self.pids[i]
         t = self.index_dic[pid]
         if len(t) >= self.num_instances:
             t = np.random.choice(t, size=self.num_instances, replace=False)
         else:
             t = np.random.choice(t, size=self.num_instances, replace=True)
         ret.extend(t)
     return iter(ret)
Exemplo n.º 22
0
def random_split(dataset, lengths):
    """
    Randomly split a dataset into non-overlapping new datasets of given lengths.

    Arguments:
        dataset (Dataset): Dataset to be split
        lengths (sequence): lengths of splits to be produced
    """
    if sum(lengths) != len(dataset):
        raise ValueError("Sum of input lengths does not equal the length of the input dataset!")

    indices = randperm(sum(lengths))
    return [Subset(dataset, indices[offset - length:offset]) for offset, length in zip(_accumulate(lengths), lengths)]
Exemplo n.º 23
0
 def sample(self):
     """
     :returns: a random subsample of `range(size)`
     :rtype: torch.autograd.Variable of torch.LongTensor
     """
     subsample_size = self.subsample_size
     if subsample_size is None or subsample_size > self.size:
         subsample_size = self.size
     if subsample_size == self.size:
         result = Variable(torch.LongTensor(list(range(self.size))))
     else:
         result = Variable(torch.randperm(self.size)[:self.subsample_size])
     return result.cuda() if self.use_cuda else result
Exemplo n.º 24
0
 def _train_on_instance_mixup(self, z, x, **kwargs):
     """Perform mixup in the pixel space"""
     self._train()
     x.requires_grad = True # for dnorm
     # Train the generator.
     self.optim['g'].zero_grad()
     alpha = self.sample_lambda(x.size(0))
     fake = self.g(z)
     xz = Variable(alpha*x.data + (1.-alpha)*fake.data)
     if self.mixup_ff:
         perm = torch.randperm(fake.size(0)).view(-1).long()
         fake_perm = fake[perm]
         xz_ff = Variable(alpha*fake.data + (1.-alpha)*fake_perm.data)
     _, d_fake = self.d(fake)
     gen_loss = self.g_loss(d_fake)
     if (kwargs['iter']-1) % self.update_g_every == 0:
         gen_loss.backward()
         self.optim['g'].step()
     # Train the discriminator.
     self.optim['d'].zero_grad()
     _, d_xz = self.d(xz.detach())
     _, d_real = self.d(x)
     _, d_fake = self.d(fake.detach())
     d_loss = self.d_loss_fake(d_xz) + self.d_loss_real(d_real) + \
              self.d_loss_fake(d_fake)
     if self.mixup_ff:
         _, d_xz_ff = self.d(xz_ff.detach())
         d_loss += self.d_loss_fake(d_xz_ff)
     d_loss.backward()
     self.optim['d'].step()
     ##################################
     # Also compute the gradient norm.
     # Grad norm for D_REAL
     _, d_real = self.d(x)
     g_norm_x = self.grad_norm(d_real, x)
     if self.dnorm > 0.:
         self.optim['d'].zero_grad()
         (g_norm_x*self.dnorm).backward()
         self.optim['d'].step()
     self.optim['d'].zero_grad()
     ##################################
     losses = {
         'g_loss': gen_loss.data.item(),
         'd_loss': d_loss.data.item(),
         'd_real_norm': g_norm_x.data.item(),
     }
     outputs = {
         'x': x.detach(),
         'gz': fake.detach(),
     }
     return losses, outputs
def prepare(dataset):
    real_idx = torch.randperm(len(dataset)).long()
    r_imgs = torch.stack([dataset[i][0] for i in tqdm(real_idx[:2000])], 0)
    r2_imgs = torch.stack([dataset[i][0] for i in tqdm(real_idx[2000:4000])], 0)
    kmeans = KMeans(n_clusters=50, n_jobs=12)
    X = r_imgs.view(2000, -1).numpy()
    kmeans.fit(X)
    centers = torch.from_numpy(kmeans.cluster_centers_).view(-1, 3, 64, 64).float()
    r_feat = get_features(r_imgs)
    r2_feat = get_features(r2_imgs)
    c_feat = get_features(centers)
    pred = distance(r_imgs, centers, False).min(1)[1].squeeze_()

    return r_imgs, r2_imgs, centers, r_feat, r2_feat, c_feat, pred
Exemplo n.º 26
0
        def __init__(self, *args, idx=None, split=.8, **kwargs):
            super().__init__(*args, **kwargs)
            self.idx = idx if idx is not None else torch.randperm(len(self))
            tensors_ = []

            for i in range(len(self.tensors)):
                if split > 0:
                    tensors_.append(
                        self.tensors[i][self.idx][:int(split * len(self))])
                else:
                    tensors_.append(
                        self.tensors[i][self.idx][int(split * len(self)) - 1:])

            self.tensors = tuple(tensors_)
    def set_model_permutations(self):
        self.model_permutations = []
        self.model_unpermutations = []
        for n in range(1, self.N):
            permutation = list(range(2 ** (n - 1)))
            if n > 1:
                while permutation == list(range(2 ** (n - 1))):
                    permutation = torch.randperm(2 ** (n - 1)).numpy().tolist()
            self.model_permutations.append(permutation)

            unpermutation = list(range(len(permutation)))
            for i in range(len(permutation)):
                unpermutation[permutation[i]] = i
            self.model_unpermutations.append(unpermutation)
Exemplo n.º 28
0
    def forward(self, x):
        lrt_mean = 0.0
        if self.bias is not None:
            lrt_mean = self.bias

        sigma2 = self.sigma * self.sigma
        if self.permute_sigma:
            sigma2 = sigma2.view(-1)[torch.randperm(self.weight.shape).cuda()].view(self.weight.shape)

        lrt_std = Variable.sqrt(1e-16 + self.op_nobias(x * x, sigma2))
        if self.training:
            eps = Variable(lrt_std.data.new(lrt_std.size()).normal_())
        else:
            eps = 0.0
        return lrt_mean + lrt_std * eps
Exemplo n.º 29
0
    def forward(self, x):
        if self.zero_mean:
            lrt_mean = self.op_bias(x, 0.0 * self.weight)
        else:
            lrt_mean = self.op_bias(x, self.weight)

        sigma2 = Variable.exp(self.log_alpha) * self.weight * self.weight
        if self.permute_sigma:
            sigma2 = sigma2.view(-1)[torch.randperm(self.weight.nelement()).cuda()].view(self.weight.shape)

        lrt_std = Variable.sqrt(1e-16 + self.op_nobias(x * x, sigma2))
        if self.training:
            eps = Variable(lrt_std.data.new(lrt_std.size()).normal_())
        else:
            eps = 0.0
        return lrt_mean + lrt_std * eps
Exemplo n.º 30
0
    def __iter__(self):
        # deterministically shuffle based on epoch
        g = torch.Generator()
        g.manual_seed(self.epoch)
        indices = list(torch.randperm(len(self.dataset), generator=g))

        # add extra samples to make it evenly divisible
        indices += indices[:(self.total_size - len(indices))]
        assert len(indices) == self.total_size

        # subsample
        offset = self.num_samples * self.rank
        indices = indices[offset:offset + self.num_samples]
        assert len(indices) == self.num_samples

        return iter(indices)
Exemplo n.º 31
0
 def shuffle(self):
     data = list(zip(self.src, self.tgt))
     self.src, self.tgt = zip(*[data[i] for i in torch.randperm(len(data))])
Exemplo n.º 32
0
# In[6]:

t_c = [0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0,
       21.0]  # Temperatura en grados celsios
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4,
       68.4]  # Unidades desconocidas
t_c = torch.tensor(t_c).unsqueeze(
    1)  # Agregamos una dimension para tener B x N_inputs
t_u = torch.tensor(t_u).unsqueeze(
    1)  # Agregamos una dimension para tener B x N_inputs

n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

# In[15]:
Exemplo n.º 33
0
def get_data(dataset, num_bits, train=True, valid_frac=None):
    train_dataset = None
    valid_dataset = None
    test_dataset = None

    if train:
        assert valid_frac is not None

    if dataset == 'imagenet-64-fast':
        root = dataset_root('imagenet64_fast')
        c, h, w = (3, 64, 64)

        if train:
            train_dataset = data.ImageNet64Fast(root=root,
                                                train=True,
                                                download=True,
                                                transform=Preprocess(num_bits))

            num_train = len(train_dataset)
            valid_size = int(np.floor(num_train * valid_frac))
            train_size = num_train - valid_size
            train_dataset, valid_dataset = random_split(
                train_dataset, (train_size, valid_size))
        else:
            test_dataset = data.ImageNet64Fast(root=root,
                                               train=False,
                                               download=True,
                                               transform=Preprocess(num_bits))

    elif dataset == 'cifar-10-fast' or dataset == 'cifar-10':
        root = dataset_root('cifar-10')
        c, h, w = (3, 32, 32)

        if dataset == 'cifar-10-fast':
            dataset_class = data.CIFAR10Fast
            train_transform = tvt.Compose(
                [RandomHorizontalFlipTensor(),
                 Preprocess(num_bits)])
            test_transform = Preprocess(num_bits)
        else:
            dataset_class = datasets.CIFAR10
            train_transform = tvt.Compose([
                tvt.RandomHorizontalFlip(),
                tvt.ToTensor(),
                Preprocess(num_bits)
            ])
            test_transform = tvt.Compose(
                [tvt.ToTensor(), Preprocess(num_bits)])

        if train:
            train_dataset = dataset_class(root=root,
                                          train=True,
                                          download=True,
                                          transform=train_transform)

            valid_dataset = dataset_class(
                root=root,
                train=True,
                transform=test_transform  # Note different transform.
            )

            num_train = len(train_dataset)
            indices = torch.randperm(num_train).tolist()
            valid_size = int(np.floor(valid_frac * num_train))
            train_idx, valid_idx = indices[valid_size:], indices[:valid_size]

            train_dataset = Subset(train_dataset, train_idx)
            valid_dataset = Subset(valid_dataset, valid_idx)
        else:
            test_dataset = dataset_class(root=root,
                                         train=False,
                                         download=True,
                                         transform=test_transform)
    elif dataset == 'imagenet-32' or dataset == 'imagenet-64':
        if dataset == 'imagenet-32':
            root = dataset_root('imagenet32')
            c, h, w = (3, 32, 32)
            dataset_class = data.ImageNet32
        else:
            root = dataset_root('imagenet64')
            c, h, w = (3, 64, 64)
            dataset_class = data.ImageNet64

        if train:
            train_dataset = dataset_class(
                root=root,
                train=True,
                download=True,
                transform=tvt.Compose([tvt.ToTensor(),
                                       Preprocess(num_bits)]))

            num_train = len(train_dataset)
            valid_size = int(np.floor(num_train * valid_frac))
            train_size = num_train - valid_size
            train_dataset, valid_dataset = random_split(
                train_dataset, (train_size, valid_size))
        else:
            test_dataset = dataset_class(
                root=root,
                train=False,
                download=True,
                transform=tvt.Compose([tvt.ToTensor(),
                                       Preprocess(num_bits)]))
    elif dataset == 'celeba-hq-64-fast':
        root = dataset_root('celeba_hq_64_fast')
        c, h, w = (3, 64, 64)

        train_transform = tvt.Compose(
            [RandomHorizontalFlipTensor(),
             Preprocess(num_bits)])
        test_transform = Preprocess(num_bits)

        if train:
            train_dataset = data.CelebAHQ64Fast(root=root,
                                                train=True,
                                                download=True,
                                                transform=train_transform)

            valid_dataset = data.CelebAHQ64Fast(
                root=root,
                train=True,
                transform=test_transform  # Note different transform.
            )

            num_train = len(train_dataset)
            indices = torch.randperm(num_train).tolist()
            valid_size = int(np.floor(valid_frac * num_train))
            train_idx, valid_idx = indices[valid_size:], indices[:valid_size]

            train_dataset = Subset(train_dataset, train_idx)
            valid_dataset = Subset(valid_dataset, valid_idx)
        else:
            test_dataset = data.CelebAHQ64Fast(root=root,
                                               train=False,
                                               download=True,
                                               transform=test_transform)

    elif dataset == 'mnist':
        root = dataset_root('mnist')
        c, h, w = (1, 28, 28)

        train_transform = tvt.Compose([tvt.ToTensor(), Preprocess(num_bits)])

        test_transform = tvt.Compose([tvt.ToTensor(), Preprocess(num_bits)])

        if train:
            train_dataset = datasets.MNIST(root=root,
                                           train=True,
                                           download=True,
                                           transform=train_transform)

            valid_dataset = datasets.MNIST(
                root=root,
                train=True,
                transform=test_transform  # Note different transform.
            )

            num_train = len(train_dataset)
            indices = torch.randperm(num_train).tolist()
            valid_size = int(np.floor(valid_frac * num_train))
            train_idx, valid_idx = indices[valid_size:], indices[:valid_size]

            train_dataset = Subset(train_dataset, train_idx)
            valid_dataset = Subset(valid_dataset, valid_idx)
        else:
            test_dataset = datasets.MNIST(root=root,
                                          train=False,
                                          download=True,
                                          transform=test_transform)

    elif dataset == 'svhn':
        root = dataset_root('svhn')
        c, h, w = (3, 32, 32)

        train_transform = tvt.Compose([
            tvt.ToTensor(),
            RandomHorizontalFlipTensor(),
            Preprocess(num_bits)
        ])
        test_transform = tvt.Compose([tvt.ToTensor(), Preprocess(num_bits)])

        if train:
            train_dataset = datasets.SVHN(root=root,
                                          split='train',
                                          download=True,
                                          transform=train_transform)

            valid_dataset = datasets.SVHN(
                root=root,
                split='train',
                transform=test_transform  # Note different transform.
            )

            num_train = len(train_dataset)
            indices = torch.randperm(num_train).tolist()
            valid_size = int(np.floor(valid_frac * num_train))
            train_idx, valid_idx = indices[valid_size:], indices[:valid_size]

            train_dataset = Subset(train_dataset, train_idx)
            valid_dataset = Subset(valid_dataset, valid_idx)
        else:
            test_dataset = datasets.SVHN(root=root,
                                         split='test',
                                         download=True,
                                         transform=test_transform)
    else:
        raise RuntimeError('Unknown dataset')

    if train:
        return train_dataset, valid_dataset, (c, h, w)
    else:
        return test_dataset, (c, h, w)
Exemplo n.º 34
0
def train(epoch, data):
    net.train().to(device)
    # zero the parameter gradients
    optimizer.zero_grad()
    inputs, labels = data
    # print(type(inputs))
    inputs = torch.from_numpy(np.asarray(inputs).astype(np.float32))
    permutation = torch.randperm(inputs.size()[0])
    running_loss = 0
    # print(inputs.size()[0])
    count = 0
    batch_losses = []
    for batch_idx in range(0, inputs.size()[0], BATCH_SIZE):
        t0 = time.time()
        count += 1
        optimizer.zero_grad()
        indices = permutation[batch_idx:batch_idx + BATCH_SIZE]
        batch_x, batch_y = inputs[indices], labels[indices]
        #  print(batch_x.shape)
        batch_x = batch_x.reshape(batch_x.size()[0], 1,
                                  batch_x.size()[1],
                                  batch_x.size()[2])
        # print("###### ", batch_x.shape)
        outputs = net(batch_x.to(device)).to(device)
        #  print(outputs.shape)
        loss = criterion(outputs.to(device), batch_y.to(device))
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
        batch_losses.append(loss.item())

        sys.stdout.write('\r')
        sys.stdout.write(" Train data epoch %d [%-100s] %d/%d \t Loss:%f" %
                         (epoch, '=' * int(
                             (batch_idx / inputs.size()[0]) * 100), batch_idx,
                          inputs.size()[0], loss.item()))
        sys.stdout.flush()
        time.sleep(0.25)
        if batch_idx % inputs.size()[0] == 0:
            test_output = net(batch_x.to(device)).to(device)
            pred_y = torch.argmax(test_output, dim=1)
            print(
                float(
                    np.array([(x == y) for x, y in zip(batch_y, pred_y)
                              ]).astype(int).sum()) / float(batch_y.size()[0]))
            accuracy = float(
                np.array([(x == y) for x, y in zip(batch_y, pred_y)
                          ]).astype(int).sum()) / float(batch_y.size()[0])
            print(
                "numerateur:",
                float(
                    np.array([(x == y) for x, y in zip(batch_y, pred_y)
                              ]).astype(int).sum()))
            print('Epoch: ', epoch,
                  '| train loss: %.4f' % loss.cpu().data.numpy(),
                  '| train accuracy: %.2f' % accuracy)

    print("\n")
    print('Epoch {}, loss {}, took {} seconds'.format(epoch, loss.item(),
                                                      time.time() - t0))
    print("\n")
Exemplo n.º 35
0
def main():
    resnet = resnet34(pretrained=True, progress=True)
    extractor = nn.Sequential(*list(resnet.children())[:-1]).eval().cuda()
    resnet = list(resnet.children())[-1].eval().cuda()

    # Load an existing DeepMDS model or train a new one.
    if os.path.exists('DeepMDS/weights.pt'):
        with open('DeepMDS/layerSizes.pkl', 'rb') as f:
            layerSizes = pickle.load(f)
        deepMDS = DeepMDS(layerSizes)
        deepMDS.load_state_dict(th.load('DeepMDS/weights.pt'))
        deepMDS = deepMDS.cuda()
    else:
        layerSizes = (512, 256, 128, 64, 32)
        with open('DeepMDS/layerSizes.pkl', 'wb') as f:
            pickle.dump(layerSizes, f)
        deepMDS = DeepMDS(layerSizes)
        deepMDS = deepMDS.train().cuda()

        numLayers = len(layerSizes)
        epochs = 10
        batchSize = 1024
        lr = .005
        optimizer = th.optim.Adam(deepMDS.parameters(), lr=lr)

        while True:
            X = getDataLoader(512)
            embeddings = []
            with th.no_grad():
                for x, _ in tqdm(X):
                    x = x.cuda()
                    embeddings.append(extractor(x).squeeze(2).squeeze(2))
            X = th.cat(embeddings)
            # embeddings, _ = loadImageNetEmbeddings()
            # X = th.cuda.FloatTensor(embeddings).cuda()

            N = len(X)
            numBatches = N // batchSize
            for layersToTrain in range(1, numLayers + 1):
                print()
                print('Training layers less than:', layersToTrain)
                for epoch in range(1, epochs + 1):
                    lossSum = 0.
                    print('epoch:', epoch, '\tloss:', end=' ')
                    X = X[th.randperm(N)]
                    for b in range(numBatches):
                        x = X[b * batchSize:(b + 1) * batchSize]

                        activations = deepMDS.trainForward(x, layersToTrain)

                        dist_in1 = th.norm(x[:batchSize // 2] -
                                           x[batchSize // 2:],
                                           dim=1,
                                           keepdim=True)
                        dist_in2 = th.norm(x[0::2] - x[1::2],
                                           dim=1,
                                           keepdim=True)

                        losses = []
                        for y in activations:
                            crit = criterion(dist_in1, y[:batchSize // 2],
                                             y[batchSize // 2:])
                            if crit:
                                losses += [crit]
                            crit = criterion(dist_in2, y[0::2], y[1::2])
                            if crit:
                                losses += [crit]

                        for py, y in zip(activations, activations[1:]):
                            din = th.norm(py[:batchSize // 2] -
                                          py[batchSize // 2:],
                                          dim=1,
                                          keepdim=True)
                            crit = criterion(din, y[:batchSize // 2],
                                             y[batchSize // 2:])
                            if crit:
                                losses += [crit]
                            din = th.norm(py[0::2] - py[1::2],
                                          dim=1,
                                          keepdim=True)
                            crit = criterion(din, y[0::2], y[1::2])
                            if crit:
                                losses += [crit]

                        loss = 0.
                        if len(losses):
                            loss = sum(losses) / len(losses)

                        if loss:
                            optimizer.zero_grad()
                            loss.backward()
                            optimizer.step()

                        lossSum += float(loss)

                    print(lossSum / numBatches)
            th.save(deepMDS.state_dict(), 'DeepMDS/weights.pt')

    classifier = nn.Linear(layerSizes[-1], 1000).cuda()
    # either train both mds and a linear layer
    net = nn.Sequential(deepMDS, classifier).cuda()
    # or just a new linear layer
    # net = classifier

    if os.path.exists('DeepMDS/classifierWeights.pt'):
        net.load_state_dict(th.load('DeepMDS/classifierWeights.pt'))

    lossF = nn.CrossEntropyLoss()
    # lr = .00987654321
    lr = .00287654321
    optimizer = th.optim.Adam(net.parameters(), lr)
    epochs = 20

    batchSize = 256
    while True:
        X = getDataLoader(batchSize)
        embeddings = []
        with th.no_grad():
            for x, _ in tqdm(X):
                x = x.cuda()
                embeddings.append(extractor(x).squeeze(2).squeeze(2))
            X = th.cat(embeddings)
            del x, embeddings
        #     embeddings, _ = loadImageNetEmbeddings()
        #     X = th.cuda.FloatTensor(embeddings).cuda()
        numBatches = len(X) // batchSize
        for epoch in range(1, epochs):
            avg_loss = 0.
            trainacc = 0.
            N = X.shape[0]
            X = X[th.randperm(N)]
            for b in range(numBatches):
                with th.no_grad():
                    emb = X[b * batchSize:(b + 1) * batchSize]
                    y = resnet(emb).argmax(axis=1)
                # forward
                # yhat = net(deepMDS(emb))
                yhat = net(emb)
                # compute error
                loss = lossF(yhat, y)
                avg_loss += float(loss)
                # backprop
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                trainacc += float(
                    (yhat.argmax(axis=1) == y).sum()) / float(batchSize)
            print('epoch', epoch)
            print('TrainAcc:', trainacc / numBatches)
            th.save(net.state_dict(), 'DeepMDS/classifierWeights.pt')
        del loss, emb, y, yhat
Exemplo n.º 36
0
def makeData(srcFile, tgtFile, train_oracle_file, train_src_rouge_file,
             srcDicts, tgtDicts):
    src, tgt = [], []
    src_raw = []
    src_rouge = []
    oracle = []
    sizes = []
    count, ignored = 0, 0

    logger.info('Processing %s & %s ...' % (srcFile, tgtFile))
    srcF = open(srcFile, encoding='utf-8')
    tgtF = open(tgtFile, encoding='utf-8')
    oracleF = open(train_oracle_file, encoding='utf-8')
    src_rougeF = open(train_src_rouge_file, encoding='utf-8')

    while True:
        sline = srcF.readline()
        tline = tgtF.readline()
        oline = oracleF.readline()
        src_rouge_line = src_rougeF.readline()

        # normal end of file
        if sline == "" and tline == "":
            break

        # source or target does not have same number of lines
        if sline == "" or tline == "" or src_rouge_line == "":
            logger.info(
                'WARNING: source and target do not have the same number of sentences'
            )
            break

        sline = sline.strip()
        tline = tline.strip()
        oline = oline.strip()
        src_rouge_line = src_rouge_line.strip()

        # source and/or target are empty
        if sline == "" or tline == "" or ('None'
                                          in oline) or ('nan'
                                                        in src_rouge_line):
            logger.info('WARNING: ignoring an empty line (' + str(count + 1) +
                        ')')
            continue

        srcSents = sline.split('##SENT##')[:max_doc_len]
        tgtSents = tline.split('##SENT##')
        rouge_gains = src_rouge_line.split('\t')[1:]
        srcWords = [x.split(' ')[:seq_length] for x in srcSents]
        tgtWords = ' '.join(tgtSents)
        oracle_combination = make_tuple(oline.split('\t')[0])
        # oracle_combination = [(x + 1) for x in oracle_combination] + [0]
        oracle_combination = [x for x in oracle_combination]  # no sentinel

        index_out_of_range = [x >= max_doc_len for x in oracle_combination]
        if any(index_out_of_range):
            logger.info('WARNING: oracle exceeds max_doc_len, ignoring (' +
                        str(count + 1) + ')')
            continue

        src_raw.append(srcSents)

        src.append([
            srcDicts.convertToIdx(word, neusum.Constants.UNK_WORD)
            for word in srcWords
        ])
        tgt.append(tgtWords)

        oracle.append(torch.LongTensor(oracle_combination))
        rouge_gains = [[float(gain) for gain in x.split(' ')]
                       for x in rouge_gains]
        # rouge_gains = [torch.FloatTensor(x) for x in rouge_gains]
        # rouge_gains = [(x - torch.min(x)) / (torch.max(x) - torch.min(x)) for x in rouge_gains][:1]
        rouge_gains = [numpy.array(x) for x in rouge_gains]
        rouge_gains = [(x - numpy.min(x)) / (numpy.max(x) - numpy.min(x))
                       for x in rouge_gains]
        rouge_gains = [
            torch.from_numpy(np_softmax(x, norm_lambda)).float()
            for x in rouge_gains
        ]
        src_rouge.append(rouge_gains)

        sizes += [len(srcWords)]

        count += 1

        if count % report_every == 0:
            logger.info('... %d sentences prepared' % count)

    srcF.close()
    tgtF.close()
    oracleF.close()
    src_rougeF.close()

    if shuffle == 1:
        logger.info('... shuffling sentences')
        perm = torch.randperm(len(src))
        src = [src[idx] for idx in perm]
        src_raw = [src_raw[idx] for idx in perm]
        tgt = [tgt[idx] for idx in perm]
        oracle = [oracle[idx] for idx in perm]
        src_rouge = [src_rouge[idx] for idx in perm]
        sizes = [sizes[idx] for idx in perm]

    logger.info('... sorting sentences by size')
    _, perm = torch.sort(torch.Tensor(sizes))
    src = [src[idx] for idx in perm]
    src_raw = [src_raw[idx] for idx in perm]
    tgt = [tgt[idx] for idx in perm]
    oracle = [oracle[idx] for idx in perm]
    src_rouge = [src_rouge[idx] for idx in perm]

    logger.info(
        'Prepared %d sentences (%d ignored due to length == 0 or > %d)' %
        (len(src), ignored, seq_length))
    return src, src_raw, tgt, oracle, src_rouge
Exemplo n.º 37
0
    def __call__(self, data: Union[Data, HeteroData]):
        edge_types = self.edge_types
        rev_edge_types = self.rev_edge_types

        train_data, val_data, test_data = copy(data), copy(data), copy(data)

        if isinstance(data, HeteroData):
            if edge_types is None:
                raise ValueError(
                    "The 'RandomLinkSplit' transform expects 'edge_types' to"
                    "be specified when operating on 'HeteroData' objects")

            if not isinstance(edge_types, list):
                edge_types = [edge_types]
                rev_edge_types = [rev_edge_types]

            stores = [data[edge_type] for edge_type in edge_types]
            train_stores = [train_data[edge_type] for edge_type in edge_types]
            val_stores = [val_data[edge_type] for edge_type in edge_types]
            test_stores = [test_data[edge_type] for edge_type in edge_types]
        else:
            rev_edge_types = [None]
            stores = [data._store]
            train_stores = [train_data._store]
            val_stores = [val_data._store]
            test_stores = [test_data._store]

        for item in zip(stores, train_stores, val_stores, test_stores,
                        rev_edge_types):
            store, train_store, val_store, test_store, rev_edge_type = item

            is_undirected = self.is_undirected
            is_undirected &= not store.is_bipartite()
            is_undirected &= (rev_edge_type is None
                              or store._key == data[rev_edge_type]._key)

            edge_index = store.edge_index
            if is_undirected:
                mask = edge_index[0] <= edge_index[1]
                perm = mask.nonzero(as_tuple=False).view(-1)
                perm = perm[torch.randperm(perm.size(0), device=perm.device)]
            else:
                device = edge_index.device
                perm = torch.randperm(edge_index.size(1), device=device)

            num_val = self.num_val
            if isinstance(num_val, float):
                num_val = int(num_val * perm.numel())
            num_test = self.num_test
            if isinstance(num_test, float):
                num_test = int(num_test * perm.numel())

            num_train = perm.numel() - num_val - num_test
            if num_train <= 0:
                raise ValueError("Insufficient number of edges for training")

            train_edges = perm[:num_train]
            val_edges = perm[num_train:num_train + num_val]
            test_edges = perm[num_train + num_val:]
            train_val_edges = perm[:num_train + num_val]

            num_disjoint = self.disjoint_train_ratio
            if isinstance(num_disjoint, float):
                num_disjoint = int(num_disjoint * train_edges.numel())
            if num_train - num_disjoint <= 0:
                raise ValueError("Insufficient number of edges for training")

            # Create data splits:
            self._split(train_store, train_edges[num_disjoint:], is_undirected,
                        rev_edge_type)
            self._split(val_store, train_edges, is_undirected, rev_edge_type)
            self._split(test_store, train_val_edges, is_undirected,
                        rev_edge_type)

            # Create negative samples:
            num_neg_train = 0
            if self.add_negative_train_samples:
                if num_disjoint > 0:
                    num_neg_train = int(num_disjoint * self.neg_sampling_ratio)
                else:
                    num_neg_train = int(num_train * self.neg_sampling_ratio)
            num_neg_val = int(num_val * self.neg_sampling_ratio)
            num_neg_test = int(num_test * self.neg_sampling_ratio)

            num_neg = num_neg_train + num_neg_val + num_neg_test

            size = store.size()
            if store._key is None or store._key[0] == store._key[-1]:
                size = size[0]
            neg_edge_index = negative_sampling(edge_index,
                                               size,
                                               num_neg_samples=num_neg,
                                               method='sparse')

            # Create labels:
            if num_disjoint > 0:
                train_edges = train_edges[:num_disjoint]
            self._create_label(
                store,
                train_edges,
                neg_edge_index[:, num_neg_val + num_neg_test:],
                out=train_store,
            )
            self._create_label(
                store,
                val_edges,
                neg_edge_index[:, :num_neg_val],
                out=val_store,
            )
            self._create_label(
                store,
                test_edges,
                neg_edge_index[:, num_neg_val:num_neg_val + num_neg_test],
                out=test_store,
            )

        return train_data, val_data, test_data
Exemplo n.º 38
0
def main():
    parser = argparse.ArgumentParser(
        description="OGBL-Citation2 (Cluster-GCN)")
    parser.add_argument("--device", type=int, default=0)
    parser.add_argument("--log_steps", type=int, default=1)
    parser.add_argument("--num_partitions", type=int, default=15000)
    parser.add_argument("--num_workers", type=int, default=12)
    parser.add_argument("--num_layers", type=int, default=3)
    parser.add_argument("--hidden_channels", type=int, default=256)
    parser.add_argument("--dropout", type=float, default=0.0)
    parser.add_argument("--batch_size", type=int, default=256)
    parser.add_argument("--lr", type=float, default=0.001)
    parser.add_argument("--epochs", type=int, default=200)
    parser.add_argument("--eval_steps", type=int, default=10)
    parser.add_argument("--runs", type=int, default=10)
    args = parser.parse_args()
    print(args)

    device = f"cuda:{args.device}" if torch.cuda.is_available() else "cpu"
    device = torch.device(device)

    dataset = PygLinkPropPredDataset(name="ogbl-citation2")
    split_edge = dataset.get_edge_split()
    data = dataset[0]
    data.edge_index = to_undirected(data.edge_index, data.num_nodes)

    cluster_data = ClusterData(
        data,
        num_parts=args.num_partitions,
        recursive=False,
        save_dir=dataset.processed_dir,
    )

    loader = ClusterLoader(
        cluster_data,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
    )

    # We randomly pick some training samples that we want to evaluate on:
    torch.manual_seed(12345)
    idx = torch.randperm(split_edge["train"]["source_node"].numel())[:86596]
    split_edge["eval_train"] = {
        "source_node": split_edge["train"]["source_node"][idx],
        "target_node": split_edge["train"]["target_node"][idx],
        "target_node_neg": split_edge["valid"]["target_node_neg"],
    }

    model = GCN(
        data.x.size(-1),
        args.hidden_channels,
        args.hidden_channels,
        args.num_layers,
        args.dropout,
    ).to(device)
    predictor = LinkPredictor(args.hidden_channels, args.hidden_channels, 1,
                              args.num_layers, args.dropout).to(device)

    evaluator = Evaluator(name="ogbl-citation2")
    logger = Logger(args.runs, args)

    for run in range(args.runs):
        model.reset_parameters()
        predictor.reset_parameters()
        optimizer = torch.optim.Adam(list(model.parameters()) +
                                     list(predictor.parameters()),
                                     lr=args.lr)
        for epoch in range(1, 1 + args.epochs):
            loss = train(model, predictor, loader, optimizer, device)
            print(f"Run: {run + 1:02d}, Epoch: {epoch:02d}, Loss: {loss:.4f}")

            if epoch > 49 and epoch % args.eval_steps == 0:
                result = test(
                    model,
                    predictor,
                    data,
                    split_edge,
                    evaluator,
                    batch_size=64 * 1024,
                    device=device,
                )
                logger.add_result(run, result)

                train_mrr, valid_mrr, test_mrr = result
                print(f"Run: {run + 1:02d}, "
                      f"Epoch: {epoch:02d}, "
                      f"Loss: {loss:.4f}, "
                      f"Train: {train_mrr:.4f}, "
                      f"Valid: {valid_mrr:.4f}, "
                      f"Test: {test_mrr:.4f}")

        print("ClusterGCN")
        logger.print_statistics(run)
    print("ClusterGCN")
    logger.print_statistics()
Exemplo n.º 39
0
# shape = (64, 64, 4)
# denoise_model = Models.get_denoise_model(shape)

#   ===================================== Experiment 6 =====================================

# Data used for training and testing are contained into an external hard disk 'Seagate Expansion Drive'
Inputs = np.load(
    '/media/federico/Seagate Expansion Drive/DataProject/EDS_Data/10_diffShapesBEST/Params_DataSet.npy'
)
Labels = np.load(
    '/media/federico/Seagate Expansion Drive/DataProject/EDS_Data/10_diffShapesBEST/Labels_DataSet.npy'
)

# Generate random train and test dataset
split = 0.75
random_indices_poly = torch.randperm(len(Inputs))
train_split_poly = int(split * len(Inputs))
train_random_indices_poly = random_indices_poly[:train_split_poly]
test_random_indices_poly = random_indices_poly[train_split_poly:]

# Class for loading Polygons sequence from a sequence folder
denoise_generator_poly = GP.DenoiseHPatchesPoly_Exp6(
    random_indices_poly=train_random_indices_poly,
    inputs=Inputs,
    labels=Labels,
    batch_size=50)
denoise_generator_val_poly = GP.DenoiseHPatchesPoly_Exp6(
    random_indices_poly=test_random_indices_poly,
    inputs=Inputs,
    labels=Labels,
    batch_size=50)
Exemplo n.º 40
0
def makeData(srcFile, tgtFile, srcDicts, tgtDicts):
    src, tgt = [], []
    sizes = []
    count, ignored = 0, 0

    print('Processing %s & %s ...' % (srcFile, tgtFile))
    srcF = codecs.open(srcFile, "r", "utf-8")
    tgtF = codecs.open(tgtFile, "r", "utf-8")

    while True:
        sline = srcF.readline()
        tline = tgtF.readline()

        # normal end of file
        if sline == "" and tline == "":
            break

        # source or target does not have same number of lines
        if sline == "" or tline == "":
            print(
                'WARNING: source and target do not have the same number of sentences'
            )
            break

        sline = sline.strip()
        tline = tline.strip()

        # source and/or target are empty
        if sline == "" or tline == "":
            print('WARNING: ignoring an empty line (' + str(count + 1) + ')')
            continue

        srcWords = sline.split()
        tgtWords = tline.split()

        if len(srcWords) <= opt.seq_length and len(tgtWords) <= opt.seq_length:

            srcTensor, sunky = srcDicts.convertToIdx(srcWords,
                                                     onmt.Constants.UNK_WORD)
            tgtTensor, tunky = tgtDicts.convertToIdx(tgtWords,
                                                     onmt.Constants.UNK_WORD,
                                                     onmt.Constants.BOS_WORD,
                                                     onmt.Constants.EOS_WORD)
            if (not sunky and not tunky) or not opt.remove_unk:
                src += [srcTensor]
                tgt += [tgtTensor]

                sizes += [len(srcWords)]
            else:
                ignored += 1
        else:
            ignored += 1

        count += 1

        if count % opt.report_every == 0:
            print('... %d sentences prepared' % count)

    srcF.close()
    tgtF.close()

    if opt.shuffle == 1:
        print('... shuffling sentences')
        perm = torch.randperm(len(src))
        src = [src[idx] for idx in perm]
        tgt = [tgt[idx] for idx in perm]
        sizes = [sizes[idx] for idx in perm]

    print('... sorting sentences by size')
    _, perm = torch.sort(torch.Tensor(sizes))
    src = [src[idx] for idx in perm]
    tgt = [tgt[idx] for idx in perm]

    print('Prepared %d sentences (%d ignored due to length == 0 or > %d)' %
          (len(src), ignored, opt.seq_length))

    return src, tgt
Exemplo n.º 41
0
 def __iter__(self):
     return iter(th.randperm(self.num_samples).long())
Exemplo n.º 42
0
    def Reset(self):

        self.unuse_index = torch.randperm(self.num_train_sample).tolist()
Exemplo n.º 43
0
def train(**options):
    logs_dir = os.path.join(options['logdir'], options['name'])
    writer = SummaryWriter(log_dir=logs_dir)

    f_train_x = h5py.File(os.path.join(options['data_path'], 'train_x.hdf5'),
                          'r')
    X_train = f_train_x['train_x'][:]
    f_train_x.close()

    f_train_y = h5py.File(os.path.join(options['data_path'], 'train_y.hdf5'),
                          'r')
    y_train = f_train_y['train_y'][:]
    f_train_y.close()

    f_val_x = h5py.File(os.path.join(options['data_path'], 'val_x.hdf5'), 'r')
    X_val = f_val_x['val_x'][:]
    f_val_x.close()

    f_val_y = h5py.File(os.path.join(options['data_path'], 'val_y.hdf5'), 'r')
    y_val = f_val_y['val_y'][:]
    f_val_y.close()

    # Define datasets
    train_dataset = ClassifierDataset(
        torch.from_numpy(X_train).float(),
        torch.from_numpy(y_train).long())
    val_dataset = ClassifierDataset(
        torch.from_numpy(X_val).float(),
        torch.from_numpy(y_val).long())

    if not options['under']:
        target_list = []
        for _, t in train_dataset:
            target_list.append(t)

        target_list = torch.tensor(target_list)
        target_list = target_list[torch.randperm(len(target_list))]

        class_count = [i for i in get_class_distribution(y_train).values()]
        class_weights = 1. / torch.tensor(class_count, dtype=torch.float)
        print(class_weights)

        class_weights_all = class_weights[target_list]

        weighted_sampler = WeightedRandomSampler(
            weights=class_weights_all,
            num_samples=len(class_weights_all),
            replacement=True)

    # Run on CUDA if available
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)

    model = AuctionNet()
    # Check if should load checkpoints
    if options['checkpoints']:
        model.load_state_dict(torch.load(options['checkpoints']))
    model.to(device)

    # Define data loaders
    if not options['under']:
        train_loader = DataLoader(dataset=train_dataset,
                                  num_workers=12,
                                  batch_size=options['batch_size'],
                                  sampler=weighted_sampler)
    else:
        train_loader = DataLoader(dataset=train_dataset,
                                  num_workers=12,
                                  batch_size=options['batch_size'],
                                  shuffle=True)
    val_loader = DataLoader(dataset=val_dataset,
                            num_workers=12,
                            batch_size=options['batch_size'])

    if not options['under']:
        criterion = torch.nn.CrossEntropyLoss(weight=class_weights.to(device))
    else:
        criterion = torch.nn.CrossEntropyLoss()
    # optimizer = torch.optim.AdamW(model.parameters(), lr=options['lr'], weight_decay=1e-6, amsgrad=True)
    optimizer = torch.optim.AdamW(model.parameters())
    scheduler = ReduceLROnPlateau(optimizer,
                                  'min',
                                  factor=0.5,
                                  patience=10,
                                  verbose=True)

    accuracy_stats = {'train': [], "val": []}
    loss_stats = {'train': [], "val": []}

    print("Begin training.")
    for e in tqdm(range(1, options['num_epochs'] + 1)):
        # TRAINING
        train_epoch_loss = 0
        train_epoch_acc = 0
        model.train()
        for X_train_batch, y_train_batch in train_loader:
            X_train_batch, y_train_batch = X_train_batch.to(
                device), y_train_batch.to(device)
            optimizer.zero_grad()
            y_train_pred = model(X_train_batch)
            train_loss = criterion(y_train_pred, y_train_batch)
            train_acc = multi_acc(y_train_pred, y_train_batch)
            train_loss.backward()
            optimizer.step()

            train_epoch_loss += train_loss.item()
            train_epoch_acc += train_acc.item()

        # VALIDATION
        with torch.no_grad():
            val_epoch_loss = 0
            val_epoch_acc = 0
            model.eval()
            for X_val_batch, y_val_batch in val_loader:
                X_val_batch, y_val_batch = X_val_batch.to(
                    device), y_val_batch.to(device)
                y_val_pred = model(X_val_batch)
                val_loss = criterion(y_val_pred, y_val_batch)
                val_acc = multi_acc(y_val_pred, y_val_batch)
                val_epoch_loss += val_loss.item()
                val_epoch_acc += val_acc.item()

        val_epoch_loss /= len(val_loader)
        val_epoch_acc /= len(val_loader)
        train_epoch_loss /= len(train_loader)
        train_epoch_acc /= len(train_loader)
        scheduler.step(val_epoch_loss)
        loss_stats['train'].append(train_epoch_loss)
        loss_stats['val'].append(val_epoch_loss)
        accuracy_stats['train'].append(train_epoch_acc)
        accuracy_stats['val'].append(val_epoch_acc)
        writer.add_scalar("Test-loss-avg", val_epoch_loss, global_step=e)
        writer.add_scalar("Train-loss-avg", train_epoch_loss, global_step=e)
        writer.add_scalar("Test-accuracy", val_epoch_acc, global_step=e)
        writer.add_scalar("Train-accuracy", train_epoch_acc, global_step=e)
        print(f'Epoch {e + 0:03}: | Train Loss: {train_epoch_loss:.5f} | '
              f'Val Loss: {val_epoch_loss:.5f} | '
              f'Train Acc: {train_epoch_acc:.3f}| '
              f'Val Acc: {val_epoch_acc:.3f}')
        torch.save(
            model.state_dict(),
            '/home/wingman2/models/pref/varijanta_under_2m_1/auction' +
            str(e) + ".pt")
Exemplo n.º 44
0
def get_zipped_dataloaders(
        data_path: str,
        batch_size: int,
        num_worker=1,
        use_valid=False) -> (DataLoader, DataLoader, DataLoader):
    """
        Returns dataloader instances of the ZippedDataset for training,
        validation and testing.

        data_path -- Path to the zip-file for the ZippedDataset
        batch_size -- Amount of samples contained per returned tensor of the dataset

        Keyword arguments: 
        num_worker -- Used for the DataSet class, should be 1 or results in runtime errors
        use_valid -- If True, the validation and test set are seperate DataSets (default False)
    """
    train_transforms = transforms.Compose([
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor()
    ])

    train_set = ZippedDataset(os.path.join(data_path, 'index-train.zip'),
                              os.path.join(data_path, 'index-train.txt'),
                              transform=train_transforms)
    val_set = ZippedDataset(os.path.join(data_path, 'index-val.zip'),
                            os.path.join(data_path, 'index-val.txt'))

    if use_valid:
        num_sample_valid = int(len(train_set) * 0.1)
        train_set_index = torch.randperm(len(train_set))
        train_loader = DataLoader(train_set,
                                  sampler=torch.utils.data.SubsetRandomSampler(
                                      train_set_index[:-num_sample_valid]),
                                  batch_size=batch_size,
                                  num_workers=num_worker,
                                  pin_memory=True)

        val_loader = DataLoader(train_set,
                                sampler=torch.utils.data.SubsetRandomSampler(
                                    train_set_index[-num_sample_valid:]),
                                batch_size=batch_size,
                                num_workers=num_worker,
                                pin_memory=True)

        test_loader = DataLoader(val_set,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 pin_memory=True)
    else:
        train_loader = DataLoader(train_set,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  pin_memory=True)
        val_loader = DataLoader(val_set,
                                batch_size=batch_size,
                                shuffle=True,
                                pin_memory=True)
        test_loader = val_loader

    return train_loader, val_loader, test_loader
Exemplo n.º 45
0
def process_train_MAM_data(spec, config=None):
    """Process training data for the masked acoustic model"""

    dr = config['downsample_rate'] if config is not None else DR
    hidden_size = config['hidden_size'] if config is not None else HIDDEN_SIZE
    mask_proportion = config[
        'mask_proportion'] if config is not None else MASK_PROPORTION
    mask_consecutive = config[
        'mask_consecutive'] if config is not None else MASK_CONSECUTIVE

    with torch.no_grad():
        if len(
                spec
        ) == 2:  # if self.duo_feature: dataloader will output `source_spec` and `target_spec`
            source_spec = spec[0]
            target_spec = spec[1]
        elif len(spec) == 1:
            source_spec = spec[0]
            target_spec = copy.deepcopy(spec[0])
        else:
            raise NotImplementedError(
                'Input spec sould be either (spec,) or (target_spec, source_spec), where `spec` has shape BxTxD.'
            )

        # Down sample
        spec_masked = down_sample_frames(
            source_spec, dr)  # (batch_size, seq_len, mel_dim * dr)
        spec_stacked = down_sample_frames(
            target_spec, dr)  # (batch_size, seq_len, mel_dim * dr)
        assert (spec_masked.shape[1] == spec_stacked.shape[1]
                ), 'Input and output spectrogram should have the same shape'

        # Record length for each uttr
        spec_len = np.sum(np.sum(spec_stacked.data.numpy(), axis=-1) != 0,
                          axis=-1)
        spec_len = [int(sl) for sl in spec_len]

        batch_size = spec_stacked.shape[0]
        seq_len = spec_stacked.shape[1]

        pos_enc = position_encoding(
            seq_len, hidden_size,
            batch_size)  # (batch_size, seq_len, hidden_size)
        mask_label = np.zeros_like(spec_stacked)
        attn_mask = np.ones((batch_size, seq_len))  # (batch_size, seq_len)

        for idx in range(len(spec_stacked)):

            # determine whether to mask / random / or do nothing to the frame
            dice = torch.rand(1).data.cpu()
            valid_index_range = int(
                spec_len[idx] - mask_consecutive -
                1)  # compute valid len for consecutive masking
            proportion = int(spec_len[idx] * mask_proportion //
                             mask_consecutive)
            chosen_index = torch.randperm(valid_index_range).data.cpu().numpy(
            )[:
              proportion]  # draw `proportion` samples from the range (0, valid_index_range) and without replacement

            # mask to zero
            if bool(dice < 0.8):
                for i in range(mask_consecutive):
                    spec_masked[idx][chosen_index + i] = 0
            # replace to random frames
            elif bool(dice >= 0.8) and bool(dice < 0.9):
                random_index = torch.randperm(
                    valid_index_range).data.cpu().numpy()[:proportion]
                for i in range(mask_consecutive):
                    spec_masked[idx][chosen_index +
                                     i] = spec_masked[idx][random_index + i]
            # do nothing
            else:
                pass

            # the gradients will be calculated on all chosen frames
            mask_label[idx][chosen_index] = 1

            # zero vectors for padding dimension
            pos_enc[idx][spec_len[idx]:] = 0
            attn_mask[idx][spec_len[idx]:] = 0

        spec_masked = spec_masked.to(dtype=torch.float32)
        pos_enc = torch.FloatTensor(pos_enc).to(dtype=torch.float32)
        mask_label = torch.ByteTensor(mask_label).to(dtype=torch.uint8)
        attn_mask = torch.FloatTensor(attn_mask).to(dtype=torch.float32)
        spec_stacked = spec_stacked.to(dtype=torch.float32)

    return spec_masked, pos_enc, mask_label, attn_mask, spec_stacked
Exemplo n.º 46
0
    einet.eval()
    train_ll = EinsumNetwork.eval_loglikelihood_batched(einet,
                                                        train_x,
                                                        batch_size=batch_size)
    valid_ll = EinsumNetwork.eval_loglikelihood_batched(einet,
                                                        valid_x,
                                                        batch_size=batch_size)
    test_ll = EinsumNetwork.eval_loglikelihood_batched(einet,
                                                       test_x,
                                                       batch_size=batch_size)
    print("[{}]   train LL {}   valid LL {}   test LL {}".format(
        epoch_count, train_ll / train_N, valid_ll / valid_N, test_ll / test_N))
    einet.train()
    #####

    idx_batches = torch.randperm(train_N, device=device).split(batch_size)

    total_ll = 0.0
    for idx in idx_batches:
        batch_x = train_x[idx, :]
        outputs = einet.forward(batch_x)
        ll_sample = EinsumNetwork.log_likelihoods(outputs)
        log_likelihood = ll_sample.sum()
        log_likelihood.backward()

        einet.em_process_batch()
        total_ll += log_likelihood.detach().item()

    einet.em_update()

if fashion_mnist:
Exemplo n.º 47
0
def swd(image1,
        image2,
        n_pyramids=None,
        slice_size=7,
        n_descriptors=128,
        n_repeat_projection=128,
        proj_per_repeat=4,
        device="cpu",
        return_by_resolution=False,
        pyramid_batchsize=128):
    # n_repeat_projectton * proj_per_repeat = 512
    # Please change these values according to memory usage.
    # original = n_repeat_projection=4, proj_per_repeat=128
    assert image1.size() == image2.size()
    assert image1.ndim == 4 and image2.ndim == 4

    if n_pyramids is None:
        n_pyramids = int(np.rint(np.log2(image1.size(2) // 16)))
    with torch.no_grad():
        # minibatch laplacian pyramid for cuda memory reasons
        pyramid1 = minibatch_laplacian_pyramid(image1,
                                               n_pyramids,
                                               pyramid_batchsize,
                                               device=device)
        pyramid2 = minibatch_laplacian_pyramid(image2,
                                               n_pyramids,
                                               pyramid_batchsize,
                                               device=device)
        result = []

        for i_pyramid in range(n_pyramids + 1):
            # indices
            n = (pyramid1[i_pyramid].size(2) -
                 6) * (pyramid1[i_pyramid].size(3) - 6)
            indices = torch.randperm(n)[:n_descriptors]

            # extract patches on CPU
            # patch : 2rank (n_image*n_descriptors, slice_size**2*ch)
            p1 = extract_patches(pyramid1[i_pyramid],
                                 indices,
                                 slice_size=slice_size,
                                 device="cpu")
            p2 = extract_patches(pyramid2[i_pyramid],
                                 indices,
                                 slice_size=slice_size,
                                 device="cpu")

            p1, p2 = p1.to(device), p2.to(device)

            distances = []
            for j in range(n_repeat_projection):
                # random
                rand = torch.randn(p1.size(1), proj_per_repeat).to(
                    device)  # (slice_size**2*ch)
                rand = rand / torch.std(rand, dim=0, keepdim=True)  # noramlize
                # projection
                proj1 = torch.matmul(p1, rand)
                proj2 = torch.matmul(p2, rand)
                proj1, _ = torch.sort(proj1, dim=0)
                proj2, _ = torch.sort(proj2, dim=0)
                d = torch.abs(proj1 - proj2)
                distances.append(torch.mean(d))

            # swd
            result.append(torch.mean(torch.stack(distances)))

        # average over resolution
        result = torch.stack(result) * 1e3
        if return_by_resolution:
            return result.cpu()
        else:
            return torch.mean(result).cpu()
    def add_whole_word_mask(self, source, p):
        is_word_start = self.word_starts(source)
        num_to_mask = int(math.ceil(is_word_start.float().sum() * p))
        num_inserts = 0
        if num_to_mask == 0:
            return source

        if self.mask_span_distribution is not None:
            lengths = self.mask_span_distribution.sample(sample_shape=(num_to_mask,))

            # Make sure we have enough to mask
            cum_length = torch.cumsum(lengths, 0)
            while cum_length[-1] < num_to_mask:
                lengths = torch.cat([lengths, self.mask_span_distribution.sample(sample_shape=(num_to_mask,))], dim=0)
                cum_length = torch.cumsum(lengths, 0)

            # Trim to masking budget
            i = 0
            while cum_length[i] < num_to_mask:
                i += 1
            lengths[i] = num_to_mask - (0 if i == 0 else cum_length[i - 1])
            num_to_mask = i + 1
            lengths = lengths[:num_to_mask]

            # Handle 0-length mask (inserts) separately
            lengths = lengths[lengths > 0]
            num_inserts = num_to_mask - lengths.size(0)
            num_to_mask -= num_inserts
            if num_to_mask == 0:
                return self.add_insertion_noise(source, num_inserts / source.size(0))

            assert (lengths > 0).all()
        else:
            lengths = torch.ones((num_to_mask,)).long()
        assert is_word_start[-1] == 0
        word_starts = is_word_start.nonzero()
        indices = word_starts[torch.randperm(word_starts.size(0))[:num_to_mask]].squeeze(1)
        mask_random = torch.FloatTensor(num_to_mask).uniform_() < self.random_ratio

        source_length = source.size(0)
        assert source_length - 1 not in indices
        to_keep = torch.ones(source_length, dtype=torch.bool)
        is_word_start[-1] = 255 # acts as a long length, so spans don't go over the end of doc
        if self.replace_length == 0:
            to_keep[indices] = 0
        else:
            # keep index, but replace it with [MASK]
            source[indices] = self.mask_idx
            source[indices[mask_random]] = torch.randint(1, len(self.vocab), size=(mask_random.sum(),))

        if self.mask_span_distribution is not None:
            assert len(lengths.size()) == 1
            assert lengths.size() == indices.size()
            lengths -= 1
            while indices.size(0) > 0:
                assert lengths.size() == indices.size()
                lengths -= is_word_start[indices + 1].long()
                uncompleted = lengths >= 0
                indices = indices[uncompleted] + 1
                mask_random = mask_random[uncompleted]
                lengths = lengths[uncompleted]
                if self.replace_length != -1:
                    # delete token
                    to_keep[indices] = 0
                else:
                    # keep index, but replace it with [MASK]
                    source[indices] = self.mask_idx
                    source[indices[mask_random]] = torch.randint(1, len(self.vocab), size=(mask_random.sum(),))
        else:
            # A bit faster when all lengths are 1
            while indices.size(0) > 0:
                uncompleted = is_word_start[indices + 1] == 0
                indices = indices[uncompleted] + 1
                mask_random = mask_random[uncompleted]
                if self.replace_length != -1:
                    # delete token
                    to_keep[indices] = 0
                else:
                    # keep index, but replace it with [MASK]
                    source[indices] = self.mask_idx
                    source[indices[mask_random]] = torch.randint(1, len(self.vocab), size=(mask_random.sum(),))

                assert source_length - 1 not in indices

        source = source[to_keep]

        if num_inserts > 0:
            source = self.add_insertion_noise(source, num_inserts / source.size(0))

        return source
 def add_permuted_noise(self, tokens, p):
     num_words = len(tokens)
     num_to_permute = math.ceil(((num_words * 2) * p) / 2.0)
     substitutions = torch.randperm(num_words - 2)[:num_to_permute] + 1
     tokens[substitutions] = tokens[substitutions[torch.randperm(num_to_permute)]]
     return tokens
def get_indices_for_classes(data, data_classes):
    # Creates a list of indices of samples from the dataset, corresponding to given classes
    indices = torch.FloatTensor(
        list((data.tensors[1].long() == class_).tolist()
             for class_ in data_classes)).sum(0).nonzero().long().squeeze()
    return indices[torch.randperm(len(indices))]
Exemplo n.º 51
0
def train_func_MI(params,
                  lstm,
                  large,
                  fc,
                  domain_fc,
                  criterion,
                  optimizer,
                  raw_train_set,
                  train_set,
                  teach_lstm=None,
                  teach_fc=None):
    train_loss = 0
    domain_loss = 0
    output_cluster_loss = 0

    mmd = MMD_loss()

    if teach_lstm != None:
        raw_set = pred_label(raw_train_set[1], teach_lstm, teach_fc,
                             params.num_to_return)
        centers = get_centers(train_set + raw_set, lstm)  #+ raw_set, lstm)
        anchor = Anchor1(params.embed_size, params.nclass)

    data = []
    for train in raw_train_set:
        data.append(
            DataLoader(train,
                       batch_size=params.batch_size,
                       shuffle=True,
                       collate_fn=generate_batch))

    enu_data = []
    for d in data[0]:
        enu_data.append(d)
    data[0] = enu_data
    enu_data = []
    for d in data[1]:
        enu_data.append(d)
    data[1] = enu_data

    source_labeled_data = DataLoader(train_set,
                                     batch_size=params.text_batch_size,
                                     shuffle=True,
                                     collate_fn=generate_batch)
    enu_data = []
    for d in source_labeled_data:
        enu_data.append(d)
    source_labeled_data = enu_data

    if teach_lstm != None:
        raw_set_batch = int(
            float(len(raw_set)) * params.text_batch_size / len(train_set))
        raw_set = DataLoader(raw_set,
                             batch_size=raw_set_batch,
                             shuffle=True,
                             collate_fn=generate_batch)
        enu_data = []
        for d in raw_set:
            enu_data.append(d)
        raw_set = enu_data
    else:
        raw_set = source_labeled_data

    random.shuffle(source_labeled_data)
    random.shuffle(data[0])
    random.shuffle(data[1])
    random.shuffle(raw_set)

    print(len(data[0]))
    print(len(data[1]))
    print(len(source_labeled_data))
    print(len(raw_set))

    for (text1, cls1), (text2, cls2), (text3, cls3), (text4, cls4) in zip(
            data[0], data[1], source_labeled_data, raw_set):
        batch_ul, N_, dim_ = text1.size()
        batch_l, _, _ = text3.size()
        optimizer.zero_grad()
        text1, cls1 = text1.to(device), cls1.to(device)
        text2, cls2 = text2.to(device), cls2.to(device)
        text3, cls3 = text3.to(device), cls3.to(device)
        text4, cls4 = text4.to(device), cls4.to(device)

        z1 = lstm(text1)  #(batch, hidden_size)
        z2 = lstm(text2)
        z3 = lstm(text3)
        z4 = lstm(text4)

        # 1. cluster, 2. pesudo loss
        if teach_lstm != None and params.cluster_lamda >= 0:
            output_z4 = anchor(z4, centers, cls4)  #pred_t_ul)
            output_z3 = anchor(z3, centers, cls3)
            cluster_loss = output_z4 + output_z3  #+ anchor(z2, centers_t, pred_t_ul) + anchor(z3, centers_s, cls3)   #+= criterion(output_z3, cls3)

            output_cluster_loss += cluster_loss

            if params.pseudo_t_lamda >= 0:
                output_z4_super = fc(z4)
                pseudo_t_loss = criterion(output_z4_super, cls4)  #pred_t_ul)

        text1 = torch.sum(text1, dim=1).view(batch_ul, dim_)
        text2 = torch.sum(text2, dim=1).view(batch_ul, dim_)
        text3 = torch.sum(text3, dim=1).view(batch_l, dim_)

        neg_n = params.neg_n

        # I (x1, z1) + I (x1, z2)
        cos = nn.CosineSimilarity(dim=1, eps=1e-6)
        # I (x1, z1)
        z_ave_1 = text1
        z_z_ave_1_score = cos(large[0](z1), z_ave_1).view(-1, 1)
        z_z_ave_1_shuffle_score = []

        z_ave_2 = text2
        z_z_ave_2_score = cos(large[0](z2), z_ave_2).view(-1, 1)
        z_z_ave_2_shuffle_score = []

        local_loss = 0
        if params.mi_lamda_t != 0 and params.mi_lamda_s != 0:
            for i in range(neg_n):
                r = torch.randperm(z1.size(0))
                z_z_ave_1_shuffle_score.append(
                    cos(large[0](z1), z_ave_1[r]).view(
                        -1, 1))  #(global_dis(z, z_ave[r]).view(-1,1))

                r = torch.randperm(z2.size(0))
                z_z_ave_2_shuffle_score.append(
                    cos(large[0](z2), z_ave_2[r]).view(-1, 1))

                local_loss += -torch.mean(
                    z_z_ave_1_score - z_z_ave_1_shuffle_score[i]
                ) * params.mi_lamda_s - torch.mean(
                    z_z_ave_2_score -
                    z_z_ave_2_shuffle_score[i]) * params.mi_lamda_t

        elif params.mi_lamda_t == 0 and params.mi_lamda_s != 0:
            for i in range(neg_n):
                r = torch.randperm(z1.size(0))
                z_z_ave_1_shuffle_score.append(
                    cos(large[0](z1), z_ave_1[r]).view(
                        -1, 1))  #(global_dis(z, z_ave[r]).view(-1,1))
                local_loss += -torch.mean(
                    z_z_ave_1_score -
                    z_z_ave_1_shuffle_score[i]) * params.mi_lamda_s
        elif params.mi_lamda_t != 0 and params.mi_lamda_s == 0:
            for i in range(neg_n):
                r = torch.randperm(z2.size(0))
                z_z_ave_2_shuffle_score.append(
                    cos(large[0](z2), z_ave_2[r]).view(-1, 1))
                ll = (z_z_ave_2_score - z_z_ave_2_shuffle_score[i])
                local_loss += -torch.mean(ll) * params.mi_lamda_t

        if params.lamda != 0:
            if params.domain_mode == 'kl':
                z_s = torch.mean(z1, dim=0).view(-1)
                z_s = F.softmax(z_s, -1)
                z_t = torch.mean(z2, dim=0).view(-1)
                z_t = F.softmax(z_t, -1)
                div_loss = torch.nn.KLDivLoss(size_average=True)(z_s.log(), z_t)\
                                              + torch.nn.KLDivLoss(size_average=True)(z_t.log(), z_s)
            elif params.domain_mode == 'mmd':
                z_s = z1
                z_t = z2
                div_loss = mmd(z_s, z_t)
            elif params.domain_mode == 'adv':
                z_s = grad_reverse(z1)
                z_t = grad_reverse(z2)
                domain_output_s = domain_fc(z_s)
                domain_output_t = domain_fc(z_t)
                div_loss = criterion(domain_output_s, cls1) + criterion(
                    domain_output_t, cls2)
        else:
            div_loss = 0

        ### supervised loss
        output = fc(z3)
        super_loss = criterion(output, cls3)

        if teach_lstm == None:
            whole_loss = local_loss + params.lamda * div_loss + super_loss  #+ entropy_loss * params.entropy_lamda #+ whole_contract_loss * params.contract_lamda
        else:
            whole_loss = local_loss + params.lamda * div_loss + super_loss + pseudo_t_loss * params.pseudo_t_lamda + cluster_loss * params.cluster_lamda

        if params.mi_lamda_t == 0 and params.mi_lamda_s == 0:
            train_loss = 0
        else:
            train_loss += local_loss.item(
            )  #(local_loss.item() -  params.alpha * loss_d.item())
        if params.lamda == 0:
            domain_loss = 0
        else:
            domain_loss += div_loss.item()
        whole_loss.backward()
        optimizer.step()

    if teach_lstm != None:
        print('cluster_loss: %lf' % output_cluster_loss.item())

    return train_loss / len(train_set), domain_loss / len(train_set)
Exemplo n.º 52
0
def train(args, train_dataset, model, tokenizer):
    global extracted_grads
    """ Train the model """
    if args.local_rank in [-1, 0]:
        tb_writer = SummaryWriter()

    if args.mix_option == 1:
        logger.info("Random Mixup")
    else:
        logger.info("No Mixup")

    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
    processor = processors[args.task_name]()
    attacker = get_attacker(args.attacker)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.train_batch_size,
                                  shuffle=True)

    if args.max_steps > 0:
        t_total = args.max_steps
        args.num_train_epochs = args.max_steps // (
            len(train_dataloader) // args.gradient_accumulation_steps) + 1
    else:
        t_total = len(
            train_dataloader
        ) // args.gradient_accumulation_steps * args.num_train_epochs

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            args.weight_decay,
        },
        {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0
        },
    ]

    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.learning_rate,
                      eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=args.warmup_steps,
        num_training_steps=t_total)

    # Check if saved optimizer or scheduler states exist
    if os.path.isfile(os.path.join(
            args.model_name_or_path, "optimizer.pt")) and os.path.isfile(
                os.path.join(args.model_name_or_path, "scheduler.pt")):
        # Load in optimizer and scheduler states
        optimizer.load_state_dict(
            torch.load(os.path.join(args.model_name_or_path, "optimizer.pt")))
        scheduler.load_state_dict(
            torch.load(os.path.join(args.model_name_or_path, "scheduler.pt")))

    if args.fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.fp16_opt_level)

    # multi-gpu training (should be after apex fp16 initialization)
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Distributed training (should be after apex fp16 initialization)
    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True,
        )

    # Train!
    logger.info("***** Running training *****")
    logger.info("  Num Epochs = %d", args.num_train_epochs)
    logger.info("  Instantaneous batch size per GPU = %d",
                args.per_gpu_train_batch_size)
    logger.info(
        "  Total train batch size (w. parallel, distributed & accumulation) = %d",
        args.train_batch_size * args.gradient_accumulation_steps *
        (torch.distributed.get_world_size() if args.local_rank != -1 else 1),
    )
    logger.info("  Gradient Accumulation steps = %d",
                args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", t_total)

    global_step = 0
    epochs_trained = 0
    steps_trained_in_current_epoch = 0

    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    train_iterator = trange(
        epochs_trained,
        int(args.num_train_epochs),
        desc="Epoch",
        disable=args.local_rank not in [-1, 0],
    )
    set_seed(args)  # Added here for reproductibility
    ## Add Mixup in Batch
    epoch = 0
    for _ in train_iterator:
        epoch += 1

        if epoch > 1 and args.iterative:
            ## augment the current train dataset with new batch of adversarial exampels generated by the currect model
            orig_data = load_custom_dataset(os.path.join(
                args.data_dir, "train.tsv"),
                                            all_data=True,
                                            number=args.num_adv)
            clsf = ModelClassifier(tokenizer, model, args)
            attack_eval = OpenAttack.attack_evals.DefaultAttackEval(
                attacker, clsf, progress_bar=True)
            adv_egs = attack_eval.eval(orig_data,
                                       visualize=False,
                                       return_examples=True)
            adv_examples = processor._create_examples(adv_egs, "adv_train")
            logger.info(
                "Epoch: {}, Number of adversarial examples added to training: {}"
                .format(epoch, len(adv_examples)))
            adv_dataset = convert_examples_dataset(args, adv_examples,
                                                   tokenizer)
            train_dataset = ConcatDataset([train_dataset, adv_dataset])

            ## start training on augmented data (we will shuffle the training data)
            # train_sampler = RandomSampler(train_dataset) if args.local_rank == -1 else DistributedSampler(train_dataset)
            train_dataloader = DataLoader(train_dataset,
                                          batch_size=args.train_batch_size,
                                          shuffle=True)

            logger.info("Current Num examples = %d", len(train_dataset))

        epoch_iterator = train_dataloader
        for step, batch in enumerate(epoch_iterator):

            # Skip past any already trained steps if resuming training
            if steps_trained_in_current_epoch > 0:
                steps_trained_in_current_epoch -= 1
                continue

            model.train()
            batch = tuple(t.to(args.device) for t in batch)

            ## normal training
            ## for now, just ignore token type ids
            input_ids = batch[0]  #(bsz, len)
            attention_mask = batch[1]
            batch_size = input_ids.size(0)
            length = input_ids.size(1)
            labels = batch[3]  #(bsz,)
            logits, outputs = model(input_ids,
                                    attention_mask)  #(bsz, num_labels)
            # x_embeddings = outputs[2] # (bsz, len, dim)
            # x_embeddings.register_hook(save_grad("x_emb"))
            # logger.info("#outputs 1: " + str(len(outputs[-1])))
            L_ori = nn.CrossEntropyLoss()(logits.view(-1, args.num_labels),
                                          labels.view(-1))

            ## RandomMix
            if args.mix_option == 1:
                idx = torch.randperm(batch_size)
                input_ids_2 = input_ids[idx]
                labels_2 = labels[idx]
                attention_mask_2 = attention_mask[idx]
                ## convert the labels to one-hot
                labels = torch.zeros(batch_size,
                                     args.num_labels).to(args.device).scatter_(
                                         1, labels.view(-1, 1), 1)
                labels_2 = torch.zeros(batch_size, args.num_labels).to(
                    args.device).scatter_(1, labels_2.view(-1, 1), 1)

                l = np.random.beta(args.alpha, args.alpha)
                # l = max(l, 1-l) ## not needed when only using labeled examples
                mixed_labels = l * labels + (1 - l) * labels_2

                mix_layer = np.random.choice(args.mix_layers_set, 1)[0]
                mix_layer = mix_layer - 1

                logits, outputs = model(input_ids, attention_mask, input_ids_2,
                                        attention_mask_2, l, mix_layer)
                probs = torch.softmax(logits, dim=1)  #(bsz, num_labels)
                L_mix = F.kl_div(probs.log(), mixed_labels, None, None,
                                 'batchmean')

                loss = L_ori + L_mix

            else:
                loss = L_ori

            if args.n_gpu > 1:
                loss = loss.mean(
                )  # mean() to average on multi-gpu parallel training
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            tr_loss += loss.item()

            if args.fp16:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            if (step + 1) % args.gradient_accumulation_steps == 0:
                if args.fp16:
                    torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer), args.max_grad_norm)
                else:
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   args.max_grad_norm)

                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1

                if args.local_rank in [
                        -1, 0
                ] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
                    logs = {}
                    if (
                            args.local_rank == -1
                            and args.evaluate_during_training
                    ):  # Only evaluate when single GPU otherwise metrics may not average well
                        results = evaluate(args, model, tokenizer)
                        for key, value in results.items():
                            eval_key = "eval_{}".format(key)
                            logs[eval_key] = value

                    loss_scalar = (tr_loss - logging_loss) / args.logging_steps
                    learning_rate_scalar = scheduler.get_lr()[0]
                    logs["learning_rate"] = learning_rate_scalar
                    logs["loss"] = loss_scalar
                    logging_loss = tr_loss

                    for key, value in logs.items():
                        tb_writer.add_scalar(key, value, global_step)
                    # print(json.dumps({**logs, **{"step": global_step}}))

                    logging.info("Global Step: " + str(global_step))
                    logging.info("Loss: " + str(loss_scalar))

        if args.max_steps > 0 and global_step > args.max_steps:
            train_iterator.close()
            break

    ## save the final epoch only
    if args.local_rank in [-1, 0]:
        # Save model checkpoint
        output_dir = os.path.join(args.output_dir, "final-checkpoint")
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        model_to_save = (model.module if hasattr(model, "module") else model
                         )  # Take care of distributed/parallel training
        model_to_save.save_pretrained(output_dir)
        tokenizer.save_pretrained(output_dir)

        torch.save(args, os.path.join(output_dir, "training_args.bin"))
        logger.info("Saving model checkpoint to %s", output_dir)

        torch.save(optimizer.state_dict(),
                   os.path.join(output_dir, "optimizer.pt"))
        torch.save(scheduler.state_dict(),
                   os.path.join(output_dir, "scheduler.pt"))
        logger.info("Saving optimizer and scheduler states to %s", output_dir)

    if args.local_rank in [-1, 0]:
        tb_writer.close()

    return global_step, tr_loss / global_step
 def reset_permutation(self):
     perm = len(self.data_source)
     if self.shuffle:
         perm = torch.randperm(perm)
     self._perm = perm.tolist()
 def __iter__(self):
     for i in range(self.n_episodes):
         yield torch.randperm(self.n_classes)[:self.n_way]
Exemplo n.º 55
0
# Set device
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using", DEVICE)

# Load previous model and loss history
model, hyperparameters, loss_history = RecurrentTemporalPrediction.load(
    args.path, DEVICE)
# Set epochs with passed argument
print("Loaded model from", args.path)

# Shuffled weights
weights = model.rnn.weight_hh_l0
shape = weights.shape
weights = weights.reshape(-1)
weights = weights[torch.randperm(weights.shape[0])]
weights_ = np.random.normal(loc=torch.mean(weights).item(),
                            scale=torch.std(weights).item(),
                            size=(1500, 1500))
model.rnn.weight_hh_l0 = torch.nn.Parameter(torch.Tensor(weights_))

# Get data loader for test data
data_loader = model.data_loader([args.dataset], split='all')
print("Loaded dataset from", args.dataset)

# Save history here
test_history = {
    'loss': [],
    'MSE_1': [],
    'MSE_2': [],
    'L1': [],
Exemplo n.º 56
0
        print("Epoch %d/%d" % (t + 1, args.epochs))

        data_iterator = read_data_tensors(args.dataset_path,
                                          args.wv_path,
                                          batch_size=args.batch_size,
                                          maxlen=args.maxlen)

        for item_number, (x, texts) in enumerate(data_iterator):

            x = torch.from_numpy(x)

            # extracting bad samples from the very same batch; not sure if this is OK, so todo
            negative_samples = torch.stack(
                tuple([
                    x[torch.randperm(x.shape[0])[:args.neg_samples]]
                    for _ in range(args.batch_size)
                ]))

            # prediction
            y_pred = model(x, negative_samples)

            # error computation
            loss = criterion(y_pred, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # scheduler.step(epoch=t)

            if item_number % 1000 == 0:
Exemplo n.º 57
0
    def forward(self, x, adj, *args):
        B, S, C, H, W = x.size()
        x = x.view(B * S, C, H, W)
        x4_1, x4_2 = self.featuremaps(x)
        _, c, h, w = x4_1.shape

        # global branch
        x4_1 = x4_1.view(B, S, c, h, w).transpose(1, 2).contiguous()
        g_f = self.global_avg_pool(x4_1).view(B, -1)
        g_bn = self.global_bottleneck(g_f)

        # attention branch
        v_f = list()
        for idx, n in enumerate(self.total_split_list):
            v_f.append(self.parts_avgpool[idx](x4_2).view(B, S, c, n))
        v_f = torch.cat(v_f, dim=3)
        f = v_f.transpose(2, 3).contiguous().view(B, S * self.total_split, c)

        # graph propagation
        for i in range(self.num_gb):
            f = self.graph_layers[i](f, adj)
        f = f.view(B, S, self.total_split, c)

        f_fuse = self._attention_op(f)

        att_f = f_fuse.mean(dim=1).view(B, -1)
        att_bn = self.att_bottleneck(att_f)

        if not self.training:
            return torch.cat([g_bn, att_bn], dim=1)

        g_out = self.global_classifier(g_bn)
        att_out = self.att_classifier(att_bn)

        # consistent
        if self.consistent_loss and self.training:
            satt_f_list = list()
            satt_out_list = list()
            # random select sub frames
            assert S >= 5
            for num_frame in [S-3, S-2, S-1]:
                sub_index = torch.randperm(S)[:num_frame]
                sub_index = torch.sort(sub_index)[0]
                sub_index = sub_index.long().to(f.device)
                sf = torch.gather(f, dim=1, index=sub_index.view(1, num_frame, 1, 1).repeat(B, 1, self.total_split, c))
                sf_fuse = self._attention_op(sf)
                satt_f = sf_fuse.mean(dim=1).view(B, -1)
                satt_bn = self.att_bottleneck(satt_f)
                satt_out = self.att_classifier(satt_bn)
                satt_f_list.append(satt_f)
                satt_out_list.append(satt_out)

        if self.loss == {'xent'}:
            out_list = [g_out, att_out]
            if self.consistent_loss:
                out_list.extend(satt_out_list)
            return out_list
        elif self.loss == {'xent', 'htri'}:
            out_list = [g_out, att_out]
            f_list = [g_f, att_f]
            if self.consistent_loss:
                out_list.extend(satt_out_list)
                f_list.extend(satt_f_list)
            return out_list, f_list
        else:
            raise KeyError('Unsupported loss: {}'.format(self.loss))
Exemplo n.º 58
0
    def recurrent_generator(self, advantages, num_mini_batch):
        num_processes = self.rewards.size(1)
        assert num_processes >= num_mini_batch, (
            "PPO requires the number of processes ({}) "
            "to be greater than or equal to the number of "
            "PPO mini batches ({}).".format(num_processes, num_mini_batch)
        )
        num_envs_per_batch = num_processes // num_mini_batch
        perm = torch.randperm(num_processes)
        for start_ind in range(0, num_processes, num_envs_per_batch):
            observations_batch = defaultdict(list)

            recurrent_hidden_states_batch = []
            actions_batch = []
            value_preds_batch = []
            return_batch = []
            masks_batch = []
            old_action_log_probs_batch = []
            adv_targ = []

            for offset in range(num_envs_per_batch):
                ind = perm[start_ind + offset]

                for sensor in self.observations:
                    observations_batch[sensor].append(
                        self.observations[sensor][:-1, ind]
                    )

                recurrent_hidden_states_batch.append(
                    self.recurrent_hidden_states[0:1, ind]
                )

                actions_batch.append(self.actions[:, ind])
                value_preds_batch.append(self.value_preds[:-1, ind])
                return_batch.append(self.returns[:-1, ind])
                masks_batch.append(self.masks[:-1, ind])
                old_action_log_probs_batch.append(
                    self.action_log_probs[:, ind]
                )

                adv_targ.append(advantages[:, ind])

            T, N = self.num_steps, num_envs_per_batch

            # These are all tensors of size (T, N, -1)
            for sensor in observations_batch:
                observations_batch[sensor] = torch.stack(
                    observations_batch[sensor], 1
                )

            actions_batch = torch.stack(actions_batch, 1)
            value_preds_batch = torch.stack(value_preds_batch, 1)
            return_batch = torch.stack(return_batch, 1)
            masks_batch = torch.stack(masks_batch, 1)
            old_action_log_probs_batch = torch.stack(
                old_action_log_probs_batch, 1
            )
            adv_targ = torch.stack(adv_targ, 1)

            # States is just a (N, -1) tensor
            recurrent_hidden_states_batch = torch.stack(
                recurrent_hidden_states_batch, 1
            ).view(N, -1)

            # Flatten the (T, N, ...) tensors to (T * N, ...)
            for sensor in observations_batch:
                observations_batch[sensor] = _flatten_helper(
                    T, N, observations_batch[sensor]
                )

            actions_batch = _flatten_helper(T, N, actions_batch)
            value_preds_batch = _flatten_helper(T, N, value_preds_batch)
            return_batch = _flatten_helper(T, N, return_batch)
            masks_batch = _flatten_helper(T, N, masks_batch)
            old_action_log_probs_batch = _flatten_helper(
                T, N, old_action_log_probs_batch
            )
            adv_targ = _flatten_helper(T, N, adv_targ)

            yield (
                observations_batch,
                recurrent_hidden_states_batch,
                actions_batch,
                value_preds_batch,
                return_batch,
                masks_batch,
                old_action_log_probs_batch,
                adv_targ,
            )
Exemplo n.º 59
0
Arquivo: main.py Projeto: Vestaia/nSAI
from analysis import cnn
from generate import scatter1d
import torch
import matplotlib.pyplot as plt
from torchsummary import summary

BATCH_SIZE = 50

data, target = scatter1d.gen_data(2000)
data = torch.log10(data)
ind = torch.randperm(data.size(2))
data2 = data[:,:,ind]

# plt.scatter(data[0,0], data[0,1], s=2)
# plt.show()
target = torch.log10(target)

model = cnn.Net().cuda()
optim = torch.optim.Adam(model.parameters(), lr = 1e-4)
summary(model, input_size=(2, 450), batch_size=BATCH_SIZE)

cnn.train(model, optim, data, target, batch_size=BATCH_SIZE, validation_size=200, epochs=50)
Exemplo n.º 60
0
 def __iter__(self):
     if sequential:
         return iter(self.indices)
     else:
         return iter(self.indices[x]
                     for x in torch.randperm(len(self.indices)).long())