Exemplo n.º 1
0
def ts_loader(ts: TrainingSet, batch_size: int = 32) -> data.DataLoader:
    '''Training set loader'''
    class TSDataset(data.Dataset):
        def __init__(self, ts: TrainingSet):
            stateaction, reward, state_prime = (torch.tensor(x).float()
                                                for x in ts)

            self.stateaction, self.reward = stateaction, reward
            self.stateaction_prime = torch.cat(
                [  # all combinations of x' and u'
                    state_prime.unsqueeze(1).expand(-1, len(U), -1),
                    torch.tensor(U).expand(len(state_prime), -1).unsqueeze(-1)
                ],
                dim=-1)

        def __len__(self) -> int:
            return len(self.stateaction)

        def __getitem__(
                self,
                i: int) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
            return self.stateaction[i], self.reward[i], self.stateaction_prime[
                i]

    dataset = TSDataset(ts)
    loader = data.DataLoader(dataset,
                             batch_size=None,
                             sampler=data.BatchSampler(
                                 data.RandomSampler(dataset), batch_size,
                                 False))

    return loader
Exemplo n.º 2
0
Arquivo: demos.py Projeto: qxcv/mtil
def make_loader_mt(dataset, batch_size):
    """Construct sampler that randomly chooses N items from N-sample dataset,
    weighted so that it's even across all tasks (so no task implicitly has
    higher priority than the others). Assumes the given dataset is a
    TensorDataset produced by trajectories_to_dataset_mt."""
    task_ids = dataset.tensor_dict['obs'].task_id
    assert len(task_ids) > 0 and batch_size > 0, \
        f"either {len(task_ids)}=0 task IDs or {batch_size}=0 batch size"
    unique_ids, frequencies = torch.unique(task_ids,
                                           return_counts=True,
                                           sorted=True)
    # all tasks must be present for this to work
    assert torch.all(unique_ids == torch.arange(len(unique_ids))), (unique_ids)
    freqs_total = torch.sum(frequencies).to(torch.float)
    unique_weights = freqs_total / frequencies.to(torch.float)
    unique_weights = unique_weights / unique_weights.sum()
    weights = unique_weights[task_ids]

    # even out the number of samples to be a multiple of batch size, always
    n_samples = len(weights) + (-len(weights)) % batch_size
    assert n_samples >= len(weights) and 0 == n_samples % batch_size, \
        (batch_size, n_samples)
    weighted_sampler = data.WeightedRandomSampler(weights,
                                                  n_samples,
                                                  replacement=True)
    batch_sampler = data.BatchSampler(weighted_sampler,
                                      batch_size=batch_size,
                                      drop_last=True)

    loader = data.DataLoader(dataset,
                             pin_memory=False,
                             batch_sampler=batch_sampler,
                             collate_fn=fixed_default_collate)

    return loader
Exemplo n.º 3
0
    def __init__(self, args):
        self.args = args
        self.log_str_path = './train_log/' + args.model + str(
            args.number) + args.mod + args.dataset + 'GL' + str(
                args.global_lr) + 'LE' + str(args.local_epoch) + 'LL' + str(
                    args.local_lr) + '.log'
        self.mod_str_path = './saved_models/' + args.model + str(
            args.number) + args.mod + args.dataset + 'GL' + str(
                args.global_lr) + 'LE' + str(args.local_epoch) + 'LL' + str(
                    args.local_lr)
        self.log = Logging(self.log_str_path)

        self.user_num, self.item_num, self.train_data, self.train_support_mat, self.val_negative_dict, self.val_negative, self.train_mat, self.sup_max, self.query_max = data_prepare.load_all(
            self.args)

        self.Train_data = data_prepare.Train_data(self.train_data,
                                                  self.item_num,
                                                  self.train_support_mat,
                                                  self.sup_max, self.query_max,
                                                  self.args)
        self.Train_data.ng_train_sample()
        eval_ = args.model + "(self.item_num,args).cuda()"
        self.model = eval(eval_)
        self.maml_train_batch_sampler = data.BatchSampler(
            data.RandomSampler(range(self.Train_data.idx)),
            batch_size=self.args.batch_size,
            drop_last=False)
Exemplo n.º 4
0
 def val_on(self, dset):
     self.enc.eval()
     self.dec.eval()
     self.mlp.eval()
     pred_list = []
     idx_list = []
     dl = data.DataLoader(dset,
                          batch_sampler=data.BatchSampler(
                              data.RandomSampler(dset),
                              self.batch_size,
                              drop_last=False),
                          pin_memory=False)
     for batch_idx, (xb, yb, idx) in enumerate(dl):
         latent = self.enc(xb)
         label_pred = self.mlp(latent) if latent.ndim == 2 else self.mlp(
             latent[:, :, 0, 0])
         pred_list.append(label_pred.argmax(axis=1).detach().cpu().numpy())
         idx_list.append(idx.detach().cpu().numpy())
         if ARGS.test: break
     pred_array = np.concatenate(pred_list)
     if ARGS.test:
         return -1, -1, dummy_labels(self.num_classes, len(dset.y))
     idx_array = np.concatenate(idx_list)
     pred_array_ordered = np.array([
         item[0]
         for item in sorted(zip(pred_array, idx_array), key=lambda x: x[1])
     ])
     if get_num_labels(dset.y) == 1: set_trace()
     acc = -1 if ARGS.test else accuracy(pred_array_ordered,
                                         dset.y.detach().cpu().numpy())
     f1 = -1 if ARGS.test else mean_f1(pred_array_ordered,
                                       dset.y.detach().cpu().numpy())
     return acc, f1, pred_array_ordered
Exemplo n.º 5
0
    def batchPredict(self, dataset: LargePatentDataset, batchSize):
        batchSampler = data.BatchSampler(data.SequentialSampler(dataset),
                                         batch_size=batchSize, drop_last=False)
        dataGenerator = data.DataLoader(dataset, batch_sampler=batchSampler,
                                        collate_fn=dataset.collate)
        with torch.no_grad():
            with tqdm(total=len(dataset), unit='st', unit_scale=True, file=sys.stdout) as t:
                t.set_description('Predicting')
                for sentsTokens, batchGoldTags in dataGenerator:
                    actualBatchLen = len(sentsTokens)
                    # truncate sentence to maxInputLen (-2 to account for [CLS] & [SEP] tokens)
                    sentsTokens = [["[CLS]"] + sent[:self.maxInputLen-2] + ["[SEP]"] for sent in sentsTokens]
                    sentsTokenIds = [
                        convertTokensToIds(sent, '[UNK]', lambda token: dataset.tokenizer.vocab[token])
                        for sent in sentsTokens]
                    paddedSentsTokenIds = torch.tensor(padBatchTokenIds(sentsTokenIds, 0, len(sentsTokenIds[0])),
                                                       dtype=torch.long, device=self.device)
                    attentionMask = torch.tensor([[float(i > 0) for i in sent] for sent in paddedSentsTokenIds],
                                                 device=self.device)
                    batchLogits = self.model(input_ids=paddedSentsTokenIds, attention_mask=attentionMask).sigmoid()
                    batchLogits: np.ndarray = batchLogits.detach().cpu().numpy()
                    # print(batchLogits)
                    maxIndices = batchLogits.argmax(axis=1)
                    # print(maxIndices)
                    batchPredTags = self.tagDict.indices2words(maxIndices)
                    t.update(actualBatchLen)

                    yield batchPredTags, batchGoldTags
Exemplo n.º 6
0
def get_dataloader(data_dir, batch_size, num_workers, input_size, mean, std,
                   distributed):
    """Get dataloader."""
    def val_batch_fn(batch, device):
        data = batch[0].to(device)
        scale = batch[1]
        center = batch[2]
        score = batch[3]
        imgid = batch[4]
        return data, scale, center, score, imgid

    val_dataset = COCOKeyPoints(data_dir,
                                aspect_ratio=4. / 3.,
                                splits=('person_keypoints_val2017'))

    meanvec = [float(i) for i in mean.split(',')]
    stdvec = [float(i) for i in std.split(',')]
    transform_val = SimplePoseDefaultValTransform(
        num_joints=val_dataset.num_joints,
        joint_pairs=val_dataset.joint_pairs,
        image_size=input_size,
        mean=meanvec,
        std=stdvec)
    val_tmp = val_dataset.transform(transform_val)
    sampler = make_data_sampler(val_tmp, False, distributed)
    batch_sampler = data.BatchSampler(sampler=sampler,
                                      batch_size=batch_size,
                                      drop_last=False)
    val_data = data.DataLoader(val_tmp,
                               batch_sampler=batch_sampler,
                               num_workers=num_workers)

    return val_dataset, val_data, val_batch_fn
Exemplo n.º 7
0
    def __init__(self, dataset, batch_size, device='cpu'):
        super().__init__()
        self._dataset = dataset
        self.batch_size = batch_size
        self.device = device

        self._sampler = data.BatchSampler(data.RandomSampler(
            self._dataset,
            replacement=False),
            self.batch_size, False)
Exemplo n.º 8
0
def get_dataloader(batch_size, num_workers, data_root, distributed):
    transform_test = transforms.Compose([
        transforms_cv.ToTensor(),
        transforms_cv.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
    ])
    val_dataset = CIFAR10(root=data_root, train=False, transform=transform_test, download=True)

    sampler = make_data_sampler(val_dataset, False, distributed)
    batch_sampler = data.BatchSampler(sampler=sampler, batch_size=batch_size, drop_last=False)
    val_loader = data.DataLoader(val_dataset, batch_sampler=batch_sampler, num_workers=num_workers)
    return val_loader
Exemplo n.º 9
0
def get_dataloader(val_dataset, batch_size, num_workers, distributed, coco=False):
    """Get dataloader."""
    if coco:
        batchify_fn = Tuple(Stack(), Pad(pad_val=-1), Empty())
    else:
        batchify_fn = Tuple(Stack(), Pad(pad_val=-1))
    sampler = make_data_sampler(val_dataset, False, distributed)
    batch_sampler = data.BatchSampler(sampler=sampler, batch_size=batch_size, drop_last=False)
    val_loader = data.DataLoader(val_dataset, batch_sampler=batch_sampler, collate_fn=batchify_fn,
                                 num_workers=num_workers)
    return val_loader
Exemplo n.º 10
0
def get_dataloader(val_dataset, batch_size, num_workers, distributed, coco=False):
    """Get dataloader."""
    if coco:
        batchify_fn = Tuple(*[Append() for _ in range(3)], Empty())
    else:
        batchify_fn = Tuple(*[Append() for _ in range(3)])
    sampler = make_data_sampler(val_dataset, False, distributed)
    batch_sampler = data.BatchSampler(sampler=sampler, batch_size=batch_size, drop_last=False)
    val_loader = data.DataLoader(val_dataset, batch_sampler=batch_sampler, collate_fn=batchify_fn,
                                 num_workers=num_workers)
    return val_loader
Exemplo n.º 11
0
        def batches(self, size=None):
            # if self.data_loader is None:
            #     self.data_loader = torch_data.DataLoader(self, batch_size=size, shuffle=self.shuffle_batches)
            # for sample in self.data_loader:
            #    yield sample

            sampler_name = torch_data.RandomSampler if self.shuffle_batches else torch_data.SequentialSampler
            sampler = torch_data.BatchSampler(sampler_name(range(len(self))),
                                              size,
                                              drop_last=False)

            for batch_idx in sampler:
                # let wpid_batch and wplabel_batch contain the list of ids and labels, respectively
                wpid_batch = [self.wpids[idx] for idx in batch_idx]
                wplabel_batch = [self.wplabels[idx] for idx in batch_idx]

                # transform a list of sampled id sequences to a batch matrix of a fixed size by trimming and padding
                # self.max_length-2: leave space for [SEP] and [CLS]
                input_ids = np.zeros((size, self.max_length - 2), dtype=int)
                for i, wpid in enumerate(wpid_batch):
                    l = min([len(wpid), self.max_length - 2])
                    input_ids[i, :l] = wpid[:l]

                # add [SEP] at the end of each sentence (just before padding)
                flipped_input_ids = self._flip_nonzero_columns(input_ids)
                flipped_input_ids = np.c_[
                    np.tile(self.SEP, input_ids.shape[0]), flipped_input_ids]
                input_ids = self._flip_nonzero_columns(flipped_input_ids)

                # add [CLS] at the beginning of each sentence
                input_ids = np.c_[np.tile(self.CLS, input_ids.shape[0]),
                                  input_ids]

                # all tokens belong to the same sentence => all zeros
                token_type_ids = np.zeros((size, self.max_length), dtype=int)

                # highlight padding (zeros)
                attention_mask = (input_ids != 0).astype(int)

                # transform a list of sampled labels to a batch matrix of a fixed size by trimming and padding
                # input_ids[i, 1:l+1] : the 0th item corresponds to the [CLS] token - use 0 label for it
                labels = np.zeros((size, self.max_length), dtype=int)
                for i, wplabel in enumerate(wplabel_batch):
                    l = min([len(wplabel), self.max_length - 2])
                    labels[i, 1:l + 1] = wplabel[:l]

                yield SrcComplexityDataset.Batch(
                    input_ids=input_ids,
                    token_type_ids=token_type_ids,
                    attention_mask=attention_mask,
                    labels=labels,
                )
Exemplo n.º 12
0
def get_dataloader(opt, distributed):
    input_size = opt.input_size
    crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875
    resize = int(math.ceil(input_size / crop_ratio))
    transform_test = transforms_cv.Compose([
        transforms_cv.Resize((resize, resize)),
        transforms_cv.CenterCrop(input_size),
        transforms_cv.ToTensor(),
        transforms_cv.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    val_dataset = ImageNet(opt.data_dir, train=False, transform=transform_test)

    sampler = make_data_sampler(val_dataset, False, distributed)
    batch_sampler = data.BatchSampler(sampler=sampler, batch_size=opt.batch_size, drop_last=False)
    val_loader = data.DataLoader(val_dataset, batch_sampler=batch_sampler, num_workers=opt.num_workers)
    return val_loader
Exemplo n.º 13
0
 def get_latents(self, dset):
     self.enc.eval()
     collected_latents = []
     determin_dl = data.DataLoader(dset,
                                   batch_sampler=data.BatchSampler(
                                       data.SequentialSampler(dset),
                                       self.batch_size,
                                       drop_last=False),
                                   pin_memory=False)
     for idx, (xb, yb, tb) in enumerate(determin_dl):
         batch_latents = self.enc(xb)
         batch_latents = batch_latents.view(batch_latents.shape[0],
                                            -1).detach().cpu().numpy()
         collected_latents.append(batch_latents)
     collected_latents = np.concatenate(collected_latents, axis=0)
     return collected_latents
Exemplo n.º 14
0
  def train_network(self, X_inputs, Y_labels):
    optimizer = optim.Adam(self.neural_network.parameters())
    X_inputs = torch.from_numpy(X_inputs).double()
    Y_labels = torch.from_numpy(Y_labels).double().view(len(Y_labels), 1)

    self.neural_network.train(True)
    for iteration in range(self.nb_iters):
      for batch in tdata.BatchSampler(
              tdata.RandomSampler(range(len(X_inputs)), replacement=False),
              batch_size=self.batch_size, drop_last=False):
        optimizer.zero_grad()
        with torch.set_grad_enabled(True):
          outputs = self.neural_network(X_inputs[batch])
          loss = nn.MSELoss(reduction="mean")(outputs, Y_labels[batch])
          loss.backward()
          optimizer.step()
Exemplo n.º 15
0
Arquivo: train.py Projeto: ririw/mac
    def main(self, clevr_dir, preproc_dir, results_loc, log_loc=None):
        logging.basicConfig(level=logging.INFO)
        utils.cuda_message()
        np.printoptions(linewidth=139)

        clevr_fs = open_fs(clevr_dir, create=False)
        preproc_fs = open_fs(preproc_dir, create=True)

        dataset = datasets.TaskDataset(clevr_fs, preproc_fs, "train")
        total_words = len(dataset.word_ix) + 1
        logging.info("Total words: %s", total_words)
        sampler = data.BatchSampler(data.RandomSampler(dataset), 32, False)

        net = mac.MACNet(mac.MACRec(12, 512),
                         total_words).to(config.torch_device())
        opt = torch.optim.Adam(net.parameters())

        if log_loc:
            now = datetime.datetime.now()
            log_dir = f"{log_loc}/new-{now}"
            writer = tensorboardX.SummaryWriter(log_dir)
        else:
            writer = None

        step = 0
        rolling_accuracy = 0
        for epoch in range(10):
            bar = tqdm(sampler)
            for batch_ix in bar:
                opt.zero_grad()
                images, qns, qn_lens, answers = dataset[batch_ix]
                predictions = net(images, qns, qn_lens)

                loss = functional.cross_entropy(predictions, answers)
                loss.backward()
                opt.step()
                hard_preds = np.argmax(predictions.detach().cpu().numpy(), 1)
                accuracy = (
                    hard_preds == answers.detach().cpu().numpy()).mean()
                if writer is not None:
                    writer.add_scalar("loss", loss.item(), step)
                    writer.add_scalar("accuracy", accuracy, step)

                rolling_accuracy = rolling_accuracy * 0.95 + accuracy * 0.05
                bar.set_description("Accuracy: {}".format(rolling_accuracy))

                step += 1
Exemplo n.º 16
0
    def __init__(self,
                 dataset: data.Dataset,
                 mask: bool,
                 batch_size: int,
                 initial_temperature: float,
                 drop_last: bool = False,
                 device='cpu'):
        super().__init__()
        self._dataset = dataset
        self.mask = mask
        self.batch_size = batch_size
        self.drop_last = drop_last
        self.device = device

        self._temperature = initial_temperature

        # * TODO: A better than random sampler that take into account sample length
        self._sampler = data.BatchSampler(data.RandomSampler(
            self._dataset, replacement=False),
            batch_size=self.batch_size,
            drop_last=self.drop_last)
Exemplo n.º 17
0
def main():
    args = parse_args()

    config = load_config(os.path.join(args.models_path, args.exp_name))

    if config['model'] == 'TextTransposeModel':
        test_set = DatasetTextImages(path=args.test_data_path, patch_size=None,
                                     aug_resize_factor_range=None, scale=config['scale'])
    else:
        test_set = DatasetFromSingleImages(path=args.test_data_path, patch_size=None,
                                           aug_resize_factor_range=None, scale=config['scale'])

    batch_sampler = Data.BatchSampler(
        sampler=Data.SequentialSampler(test_set),
        batch_size=1,
        drop_last=True
    )

    evaluation_data_loader = Data.DataLoader(dataset=test_set, num_workers=0, batch_sampler=batch_sampler)

    trainer = Trainer(name=args.exp_name, models_root=args.models_path, resume=True)
    trainer.load_best()

    psnr = PSNR(name='PSNR', border=config['border'])

    tic = time.time()
    count = 0
    for batch in tqdm(evaluation_data_loader):
        output = trainer.predict(batch=batch)
        psnr.update(batch[1], output)
        count += 1

    toc = time.time()

    print('FPS: {}, SAMPLES: {}'.format(float(count) / (toc - tic), count))
    print('PSNR: {}'.format(psnr.get()))
Exemplo n.º 18
0
    def __init__(self, args):
        self.device = torch.device(args.device)
        # network
        net_name = '_'.join(('yolo3', args.network, args.dataset))
        self.save_prefix = net_name
        self.net = get_model(net_name, pretrained_base=True)
        if args.distributed:
            self.net = torch.nn.SyncBatchNorm.convert_sync_batchnorm(self.net)
        if args.resume.strip():
            logger.info("Resume from the model {}".format(args.resume))
            self.net.load_state_dict(torch.load(args.resume.strip()))
        else:
            logger.info("Init from base net {}".format(args.network))
        classes, anchors = self.net.num_class, self.net.anchors
        self.net.set_nms(nms_thresh=0.45, nms_topk=400)
        if args.label_smooth:
            self.net._target_generator._label_smooth = True
        self.net.to(self.device)
        if args.distributed:
            self.net = torch.nn.parallel.DistributedDataParallel(
                self.net, device_ids=[args.local_rank], output_device=args.local_rank)

        # dataset and dataloader
        train_dataset = get_train_data(args.dataset, args.mixup)
        width, height = args.data_shape, args.data_shape
        batchify_fn = Tuple(*([Stack() for _ in range(6)] + [Pad(axis=0, pad_val=-1) for _ in range(1)]))
        train_dataset = train_dataset.transform(
            YOLO3DefaultTrainTransform(width, height, classes, anchors, mixup=args.mixup))
        args.per_iter = len(train_dataset) // (args.num_gpus * args.batch_size)
        args.max_iter = args.epochs * args.per_iter
        if args.distributed:
            sampler = data.DistributedSampler(train_dataset)
        else:
            sampler = data.RandomSampler(train_dataset)
        train_sampler = data.sampler.BatchSampler(sampler=sampler, batch_size=args.batch_size,
                                                  drop_last=False)
        train_sampler = IterationBasedBatchSampler(train_sampler, num_iterations=args.max_iter)
        if args.no_random_shape:
            self.train_loader = data.DataLoader(train_dataset, batch_sampler=train_sampler, pin_memory=True,
                                                collate_fn=batchify_fn, num_workers=args.num_workers)
        else:
            transform_fns = [YOLO3DefaultTrainTransform(x * 32, x * 32, classes, anchors, mixup=args.mixup)
                             for x in range(10, 20)]
            self.train_loader = RandomTransformDataLoader(transform_fns, train_dataset, batch_sampler=train_sampler,
                                                          collate_fn=batchify_fn, num_workers=args.num_workers)
        if args.eval_epoch > 0:
            # TODO: rewrite it
            val_dataset, self.metric = get_test_data(args.dataset)
            val_batchify_fn = Tuple(Stack(), Pad(pad_val=-1))
            val_dataset = val_dataset.transform(YOLO3DefaultValTransform(width, height))
            val_sampler = make_data_sampler(val_dataset, False, args.distributed)
            val_batch_sampler = data.BatchSampler(val_sampler, args.test_batch_size, False)
            self.val_loader = data.DataLoader(val_dataset, batch_sampler=val_batch_sampler,
                                              collate_fn=val_batchify_fn, num_workers=args.num_workers)

        # optimizer and lr scheduling
        self.optimizer = optim.SGD(self.net.parameters(), lr=args.lr, momentum=args.momentum,
                                   weight_decay=args.wd)
        if args.lr_mode == 'cos':
            self.scheduler = WarmupCosineLR(optimizer=self.optimizer, T_max=args.max_iter,
                                            warmup_factor=args.warmup_factor, warmup_iters=args.warmup_iters)
        elif args.lr_mode == 'step':
            lr_decay = float(args.lr_decay)
            milestones = sorted([float(ls) * args.per_iter for ls in args.lr_decay_epoch.split(',') if ls.strip()])
            self.scheduler = WarmupMultiStepLR(optimizer=self.optimizer, milestones=milestones, gamma=lr_decay,
                                               warmup_factor=args.warmup_factor, warmup_iters=args.warmup_iters)
        else:
            raise ValueError('illegal scheduler type')
        self.args = args
Exemplo n.º 19
0
    input_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
    ])

    data_kwargs = {
        'base_size': args.base_size,
        'crop_size': args.crop_size,
        'transform': input_transform
    }

    val_dataset = get_segmentation_dataset(args.dataset,
                                           split=args.split,
                                           mode=args.mode,
                                           **data_kwargs)
    sampler = make_data_sampler(val_dataset, False, distributed)
    batch_sampler = data.BatchSampler(sampler=sampler,
                                      batch_size=args.batch_size,
                                      drop_last=False)
    val_data = data.DataLoader(val_dataset,
                               shuffle=False,
                               batch_sampler=batch_sampler,
                               num_workers=args.num_workers)
    metric = SegmentationMetric(val_dataset.num_class)

    metric = validate(model, val_data, metric, device)
    ptutil.synchronize()
    pixAcc, mIoU = ptutil.accumulate_metric(metric)
    if ptutil.is_main_process():
        print('pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU))
Exemplo n.º 20
0
 def pseudo_label_train(self, pseudo_label_dset, mask, num_epochs):
     self.enc.train()
     pseudo_label_dl = data.DataLoader(
         pseudo_label_dset,
         batch_sampler=data.BatchSampler(
             data.RandomSampler(pseudo_label_dset),
             self.batch_size,
             drop_last=False),
         pin_memory=False)
     for epoch in range(num_epochs):
         epoch_loss = 0
         best_loss = np.inf
         pred_list = []
         idx_list = []
         for batch_idx, (xb, yb, idx) in enumerate(pseudo_label_dl):
             batch_mask = mask[idx]
             latent = self.enc(xb)
             latent = noiseify(latent, ARGS.noise)
             if batch_mask.any():
                 try:
                     pseudo_label_pred = self.mlp(latent[:, :, 0, 0])
                 except ValueError:
                     set_trace()
                 pseudo_label_loss = self.pseudo_label_lf(
                     pseudo_label_pred[batch_mask],
                     yb.long()[batch_mask])
             else:
                 pseudo_label_loss = torch.tensor(0, device=self.device)
             if not batch_mask.all():
                 latents_to_rec_train = latent[~batch_mask]
                 rec_pred = self.dec(latents_to_rec_train)
                 rec_loss = self.rec_lf(
                     rec_pred, xb[~batch_mask]) / (~batch_mask).sum()
             else:
                 rec_loss = torch.tensor(0, device=pseudo_label_dset.device)
             loss = pseudo_label_loss + rec_loss
             if math.isnan(loss): set_trace()
             loss.backward()
             self.enc_opt.step()
             self.enc_opt.zero_grad()
             self.dec_opt.step()
             self.dec_opt.zero_grad()
             self.mlp_opt.step()
             self.mlp_opt.zero_grad()
             pred_list.append(
                 pseudo_label_pred.argmax(axis=1).detach().cpu().numpy())
             idx_list.append(idx.detach().cpu().numpy())
             if ARGS.test: break
         if ARGS.test:
             pred_array_ordered = dummy_labels(self.num_classes,
                                               len(pseudo_label_dset))
             break
         pred_array = np.concatenate(pred_list)
         idx_array = np.concatenate(idx_list)
         pred_array_ordered = np.array([
             item[0] for item in sorted(zip(pred_array, idx_array),
                                        key=lambda x: x[1])
         ])
         if epoch_loss < best_loss:
             best_loss = epoch_loss
             count = 0
         else:
             count += 1
         if count > 4: break
     return pred_array_ordered
Exemplo n.º 21
0
    print('System start to load data...')
    t0 = time()
    train_data, val_data, test_data = data_utils.load_all()
    t1 = time()
    print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0))

    ########################### FIRST TRAINING #####################################
    check_dir('%s/train_%s_lm_id_x.py' % (conf.out_path, conf.data_name))
    log = Logging('%s/train_%s_att2seq_id_X.py' %
                  (conf.out_path, conf.data_name))
    train_model_path = '%s/train_%s_att2seq_id_X' % (conf.out_path,
                                                     conf.data_name)

    # prepare data for the training stage
    train_dataset = data_utils.TrainData(train_data)
    train_batch_sampler = data.BatchSampler(data.RandomSampler(\
        range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False)

    review_val_dataset = data_utils.TestData(val_data)
    review_val_sampler = data.BatchSampler(data.RandomSampler(\
        range(review_val_dataset.length)), batch_size=conf.batch_size, drop_last=False)

    review_test_dataset = data_utils.TestData(test_data)
    review_test_sampler = data.BatchSampler(data.RandomSampler(\
        range(review_test_dataset.length)), batch_size=conf.batch_size, drop_last=False)

    # Start Training !!!
    max_bleu = 0.0
    for epoch in range(1, conf.train_epochs + 1):
        t0 = time()
        model.train()
Exemplo n.º 22
0
        ))
    model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate)

    import pdb
    pdb.set_trace()
    ############################## PREPARE DATASET ##############################
    print('System start to load data...')
    t0 = time()
    train_data, val_data, test_data = data_utils.load_all()
    t1 = time()
    print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0))

    test_dataset = data_utils.TestData(test_data)
    test_batch_sampler = data.BatchSampler(data.RandomSampler(
        range(test_dataset.length)),
                                           batch_size=1,
                                           drop_last=False)

    print('test dataset length:%d' % test_dataset.length)
    word_dict = constructDict()

    t0 = time()
    count = 0
    bleu_score = []
    bleu_list_1, bleu_list_2, bleu_list_3, bleu_list_4 = [], [], [], []
    rouge_1_list, rouge_2_list, rouge_L_list = [], [], []
    out_probit = []
    target = []
    for batch_idx_list in test_batch_sampler:
        user_list, item_list, review_input_list, _, real_review_list = test_dataset.get_batch(
            batch_idx_list)
Exemplo n.º 23
0
     {},
     [],
     {"data_source": dummy_dataset},
     data.Sampler(data_source=dummy_dataset),
     id="SamplerConf",
 ),
 pytest.param(
     "utils.data.sampler",
     "BatchSampler",
     {
         "batch_size": 4,
         "drop_last": False
     },
     [],
     {"sampler": dummy_sampler},
     data.BatchSampler(
         sampler=dummy_sampler, batch_size=4, drop_last=False),
     id="BatchSamplerConf",
 ),
 pytest.param(
     "utils.data.sampler",
     "RandomSampler",
     {},
     [],
     {"data_source": dummy_dataset},
     data.RandomSampler(data_source=dummy_dataset),
     id="RandomSamplerConf",
 ),
 pytest.param(
     "utils.data.sampler",
     "SequentialSampler",
     {},
Exemplo n.º 24
0
test_data = KBDataset(os.path.join("data", args.task_dir, 'test2id.txt'),
                      args.cuda)
chk = DataChecker(args.task_dir)

# init data loader
train_loader = data.DataLoader(train_data,
                               batch_size=args.batch_size,
                               shuffle=True)
valid_loader = data.DataLoader(valid_data,
                               batch_size=args.batch_size,
                               shuffle=True)
test_loader = data.DataLoader(valid_data,
                              batch_size=args.batch_size,
                              shuffle=True)
test_sampler = data.BatchSampler(data.RandomSampler(test_data),
                                 batch_size=args.sample_size,
                                 drop_last=True)

ent_size = chk.ent_size
rel_size = chk.rel_size

from models.seq2seq_egreedy import Seq2Seq

gen = Seq2Seq(args, rel_size, ent_size)

g_opt = None
d_opt = None
if args.optim == "SGD":
    g_opt = optim.SGD(gen.parameters(), lr=args.lr, momentum=0.9)
elif args.optim == "Adam":
    g_opt = optim.Adam(gen.parameters())
Exemplo n.º 25
0
	def get_sampler(self, batch_size, drop_last=False, **kwargs):
		return data.BatchSampler(BucketSampler(self, batch_size, len_step=3), batch_size, drop_last=drop_last)
    ############################## LOAD MODEL ##############################
    from language_model import language_model
    model = language_model()

    model.load_state_dict(
        torch.load(
            '/content/drive/My Drive/task/aspect_based_rs/out/amazon_clothing/train_amazon_clothing_language_model_id_0X.mod'
        ))

    model.cuda()

    ########################### TEST STAGE #####################################
    test_dataset = data_utils.TrainData(test_data)

    test_batch_sampler = data.BatchSampler(data.SequentialSampler(
        range(test_dataset.length)),
                                           batch_size=conf.batch_size,
                                           drop_last=False)

    # Start Training !!!
    t0 = time()
    model.eval()

    beam_size = 4

    for batch_idx_list in test_batch_sampler:
        user, item, _, review_input, review_output = test_dataset.get_batch(
            batch_idx_list)
        generate(batch_idx_list, user, item,
                 tensorToScalar(review_output.transpose(0, 1)).tolist())
Exemplo n.º 27
0
def main():

    opt = parser.parse_args()
    print(opt)
    print(torch.__version__)
    opt.seed = 1337
    random.seed(opt.seed)
    torch.manual_seed(opt.seed)
    np.random.seed(opt.seed)
    torch.cuda.manual_seed(opt.seed)

    series = [
        f for f in os.listdir(opt.train_path[0])
        if os.path.isdir(os.path.join(opt.train_path[0], f))
    ]
    series.sort()

    print("===> Building model")
    enc_layers = [1, 2, 2, 4]
    dec_layers = [1, 1, 1, 1]
    number_of_channels = [int(8 * 2**i) for i in range(1, 1 + len(enc_layers))]
    model = UNet(depth=len(enc_layers),
                 encoder_layers=enc_layers,
                 decoder_layers=dec_layers,
                 number_of_channels=number_of_channels,
                 number_of_outputs=3)
    model.apply(weight_init.weight_init)
    model = torch.nn.DataParallel(module=model, device_ids=range(opt.gpus))

    trainer = train.Trainer(model=model,
                            name=opt.name,
                            models_root=opt.models_path,
                            rewrite=False)
    trainer.cuda()

    gc.collect()

    opt.seed = 1337  # random.randint(1, 10000)
    random.seed(opt.seed)
    torch.manual_seed(opt.seed)
    np.random.seed(opt.seed)
    torch.cuda.manual_seed(opt.seed)

    cudnn.benchmark = True

    print("===> Loading datasets")
    print('Train data:', opt.train_path)

    series_val = [
        'BraTS19_2013_0_1', 'BraTS19_2013_12_1', 'BraTS19_2013_16_1',
        'BraTS19_2013_2_1', 'BraTS19_2013_23_1', 'BraTS19_2013_26_1',
        'BraTS19_2013_29_1', 'BraTS19_CBICA_AAB_1', 'BraTS19_CBICA_AAP_1',
        'BraTS19_CBICA_AMH_1', 'BraTS19_CBICA_AQD_1', 'BraTS19_CBICA_ATX_1',
        'BraTS19_CBICA_AZH_1', 'BraTS19_CBICA_BHB_1', 'BraTS19_TCIA12_101_1',
        'BraTS19_TCIA01_150_1', 'BraTS19_TCIA10_152_1', 'BraTS19_TCIA04_192_1',
        'BraTS19_TCIA08_205_1', 'BraTS19_TCIA06_211_1', 'BraTS19_TCIA02_222_1',
        'BraTS19_TCIA12_298_1', 'BraTS19_TCIA13_623_1', 'BraTS19_CBICA_ANV_1',
        'BraTS19_CBICA_BBG_1', 'BraTS19_TMC_15477_1'
    ]

    series_train = [f for f in series if f not in series_val]
    print('Train {}'.format(series_train))
    print('Val {}'.format(series_val))

    train_set = dataloader.SimpleReader(
        paths=opt.train_path,
        patch_size=(144, 144, 128),
        series=[
            series_train,
        ] + [None for i in range(len(opt.train_path) - 1)],
        annotation_path=opt.annotation_path,
        images_in_epoch=8000,
        patches_from_single_image=1)
    val_set = dataloader.FullReader(path=opt.train_path[0], series=series_val)

    training_data_loader = DataLoader(dataset=train_set,
                                      num_workers=opt.threads,
                                      batch_size=opt.batchSize,
                                      shuffle=True,
                                      drop_last=True,
                                      worker_init_fn=worker_init_fn)

    batch_sampler = Data.BatchSampler(sampler=Data.SequentialSampler(val_set),
                                      batch_size=1,
                                      drop_last=True)

    evaluation_data_loader = DataLoader(dataset=val_set,
                                        num_workers=0,
                                        batch_sampler=batch_sampler)

    criterion = [
        loss.Dice_loss_joint(index=0, priority=1).cuda(),
        loss.BCE_Loss(index=0, bg_weight=1e-2).cuda(),
    ]
    print("===> Building model")

    print("===> Training")

    trainer.train(criterion=criterion,
                  optimizer=optim.Adam,
                  optimizer_params={"lr": 2e-5,
                                    "weight_decay": 1e-6,
                                    "amsgrad": True,
                                    },
                  scheduler=torch.optim.lr_scheduler.StepLR,
                  scheduler_params={"step_size": 16000,
                                    "gamma": 0.5,
                                    },
                  training_data_loader=training_data_loader,
                  evaluation_data_loader=evaluation_data_loader,
                  split_into_tiles=False,
                  pretrained_weights=None,
                  train_metrics=[metrics.Dice(name='Dice', input_index=0, target_index=0, classes=4), \
                                 ],
                  val_metrics=[metrics.Dice(name='Dice', input_index=0, target_index=0, classes=4),
                               metrics.Hausdorff_ITK(name='Hausdorff_ITK', input_index=0, target_index=0, classes=4),
                               ],
                  track_metric='Dice',
                  epoches=opt.nEpochs,
                  default_val=np.array([0, 0, 0, 0, 0]),
                  comparator=lambda x, y: np.min(x) + np.mean(x) > np.min(y) + np.mean(y),
                  eval_cpu=False,
                  continue_form_pretraining=False
                  )
Exemplo n.º 28
0
    args = parse_args()
    print("start to load data")
    user_num, item_num, train_data, val_negative_dict, val_negative, train_mat, val_mat = data_prepare.load_all(
        args)
    print('data loaded')
    hrs, ngs, losss = [], [], []
    pmfloss = PMFLoss(user_num, item_num).cuda()
    Train_data = data_prepare.Test_data(train_data, item_num, train_mat, 5)
    Train_data.ng_test_sample()

    hrmax = 0.0
    ndcgmax = 0.0
    for epoch in range(1, 100):
        sampler = data.BatchSampler(data.RandomSampler(range(Train_data.idx)),
                                    batch_size=10240,
                                    drop_last=False)
        pmfloss.train()
        loss_sum = 0.0
        for u_list in sampler:
            user_list, item_list, label_list = Train_data.get_batch(u_list)
            loss = pmfloss.forward(user_list, item_list, label_list)
            pmfloss.optim.zero_grad()
            loss.backward()
            pmfloss.optim.step()
            loss_sum += loss.item()
        pmfloss.eval()
        hrtmp, ndcgtmp = metrics_pmf(pmfloss, val_negative_dict, val_negative,
                                     10)
        str_log = 'epoch:{}----loss:{}----hr:{}----ndcg{}'.format(
            epoch, loss_sum, hrtmp, ndcgtmp)
Exemplo n.º 29
0
    print('System start to load data...')
    t0 = time()
    train_data, val_data = data_utils.load_all()
    t1 = time()
    print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0))

    ########################### TRAINING STAGE ##################################
    check_dir('%s/train_log' % conf.out_path)
    log = Logging('%s/train_%s_nrms.log' % (conf.out_path, conf.data_name))
    train_model_path = '%s/train_%s_nrms.mod' % (conf.out_path, conf.data_name)

    # prepare data for the training stage
    train_dataset = data_utils.TrainData(train_data)
    val_dataset = data_utils.TestData(val_data)

    train_batch_sampler = data.BatchSampler(data.RandomSampler(
        range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False)
    val_batch_sampler = data.BatchSampler(data.SequentialSampler(
        range(val_dataset.length)), batch_size=conf.batch_size, drop_last=True)

    # Start Training !!!
    max_auc = 0
    for epoch in range(1, conf.train_epochs+1):
        t0 = time()
        model.train()
        
        train_loss = []
        count = 0
        for batch_idx_list in train_batch_sampler:
            
            his_input_title, pred_input_title, labels = \
                train_dataset._get_batch(batch_idx_list)
Exemplo n.º 30
0
            'transform': input_transform
        }
    else:
        data_kwargs = {
            'base_size': config.DATASET.BASE_SIZE,
            'crop_size': config.DATASET.CROP_SIZE,
            'transform': None
        }

    val_dataset = get_segmentation_dataset(config.DATASET.NAME,
                                           split=config.TEST.TEST_SPLIT,
                                           mode=config.TEST.MODE,
                                           **data_kwargs)
    sampler = make_data_sampler(val_dataset, False, is_distributed)
    batch_sampler = data.BatchSampler(sampler=sampler,
                                      batch_size=config.TEST.TEST_BATCH_SIZE,
                                      drop_last=False)
    val_data = data.DataLoader(val_dataset,
                               shuffle=False,
                               batch_sampler=batch_sampler,
                               num_workers=config.DATASET.WORKERS)
    metric = SegmentationMetric(val_dataset.NUM_CLASS)

    model = get_segmentation_model(config.TEST.MODEL_NAME,
                                   nclass=val_dataset.NUM_CLASS)
    if not os.path.exists(config.TEST.PRETRAINED):
        raise RuntimeError('cannot found the pretrained file in {}'.format(
            config.TEST.PRETRAINED))
    model.load_state_dict(torch.load(config.TEST.PRETRAINED))
    model.keep_shape = True if config.TEST.MODE == 'testval' else False
    model.to(device)