Exemplo n.º 1
0
def train_loop(model_in, train_dl, epochs):

    optimizer = optim.Adam(model_in.parameters(), lr=0.001, weight_decay=5e-5)

    loss_function = nn.BCELoss()

    model_in.train()

    for i in range(epochs):

        for i_batch, sample_batched in enumerate(tqdm(train_dl)):

            batch_X = sample_batched['mri'].to(device)
            batch_Xb = sample_batched['clin_t'].to(device)
            batch_y = sample_batched['label'].to(device)

            model_in.zero_grad()
            outputs = model_in((batch_X, batch_Xb))

            loss = loss_function(outputs, batch_y)
            loss.backward()
            optimizer.step()

        tqdm.write("Epoch: {}/{}, train loss: {}".format(
            i, epochs, round(loss.item(), 5)))
Exemplo n.º 2
0
def shardify_openwebtext(input_dir: str,
                         output_dir: str,
                         n_processes: int,
                         n_shards: int = 20):
    input_dir = Path(input_dir)
    assert input_dir.exists()

    output_dir = Path(output_dir)
    output_dir.mkdir()

    subset_dirs = [
        subset_dir for subset_dir in input_dir.iterdir()
        if subset_dir.is_dir()
    ]
    with mp.Pool(processes=n_processes) as pool:
        # Get list of sorted files in OpenWebText
        text_files = sorted(
            list(chain.from_iterable(pool.map(text_files_in_dir,
                                              subset_dirs))))

        # Split list of files into shards
        shard_files = np.array_split(text_files, n_shards)

        # Save shards and associated filenames
        for i, split in enumerate(tqdm(shard_files)):
            tqdm.write("Loading text in shard...")
            shard = pool.map(read_text, split)

            tqdm.write("Saving shard...")
            shard_name = f'owtc{i:02d}'
            dump(shard, output_dir / f'{shard_name}.joblib')
            filenames = map(lambda f: f.stem, split)
            with open(output_dir / f'{shard_name}_filenames.txt', 'w') as f:
                print(*filenames, file=f, sep='\n')
Exemplo n.º 3
0
    def _validate(self, epoch, hide_progress):
        assert self.dl_valid is not None
        self.model.eval()

        with torch.no_grad():
            for img_batch, mask_batch in tqdm(self.dl_valid,
                                              desc="Validating",
                                              disable=hide_progress,
                                              leave=False,
                                              position=0):
                img_batch, mask_batch = img_batch.to(
                    self.device), mask_batch.to(self.device)
                model_output = self.model(img_batch)
                loss = self.criterion(model_output, mask_batch)

                self.recorder.update_record_on_batch_end(
                    epoch,
                    loss.item(),
                    mask_batch,
                    model_output.squeeze(),
                    img_batch.size(0),
                    self.num_validation_samples,
                    training=False)
            self.recorder.finalize_record_on_epoch_end(training=False)
            tqdm.write(self.recorder.get_latest_epoch_message(training=False))
Exemplo n.º 4
0
    def _train_one_epoch(self, epoch, scheduler, hide_progress):
        self.model.train()

        for img_batch, mask_batch in tqdm(self.dl_train,
                                          desc=f"Epoch {epoch}",
                                          leave=False,
                                          disable=hide_progress,
                                          position=0):
            img_batch, mask_batch = img_batch.to(self.device), mask_batch.to(
                self.device)

            self.optimizer.zero_grad()

            model_output = self.model(img_batch)

            loss = self.criterion(model_output, mask_batch)

            with torch.no_grad():
                self.recorder.update_record_on_batch_end(
                    epoch, loss.item(), mask_batch, model_output.squeeze(),
                    img_batch.size(0), self.num_train_samples)
            loss.backward()
            self.optimizer.step()
            scheduler.step()

        with torch.no_grad():
            self.recorder.finalize_record_on_epoch_end()

        tqdm.write(self.recorder.get_latest_epoch_message(training=True))
Exemplo n.º 5
0
def oracc_download(p):
    """Downloads ZIP with JSON files from
    ORACC server. Parameter is a list
    with ORACC project names,
    return is the same list of names,
    minus doublets and non-existing
    projects"""

    CHUNK = 16 * 1024
    p = list(set(p))  #remove duplicates
    projects = p.copy()
    for project in p:
        proj = project.replace('/', '-')
        url = "http://build-oracc.museum.upenn.edu/json/" + proj + ".zip"
        file = 'jsonzip/' + proj + '.zip'
        with requests.get(url, stream=True) as r:
            if r.status_code == 200:
                tqdm.write("Saving " + url + " as " + file)
                with open(file, 'wb') as f:
                    for c in tqdm(r.iter_content(chunk_size=CHUNK),
                                  desc=project):
                        f.write(c)
            else:
                tqdm.write(url + " does not exist.")
                projects.remove(project)
    return projects
    def run(self):
        with self.device:
            self._init_vars()
            self._power_method()
            self.L_init = []
            self.R_init = []
            for j in range(self.J):
                self.L_init.append(sp.to_device(self.L[j]))
                self.R_init.append(sp.to_device(self.R[j]))

            done = False
            while not done:
                try:
                    self.L = []
                    self.R = []
                    for j in range(self.J):
                        self.L.append(sp.to_device(self.L_init[j],
                                                   self.device))
                        self.R.append(sp.to_device(self.R_init[j],
                                                   self.device))

                    self._sgd()
                    done = True
                except OverflowError:
                    self.alpha *= self.beta
                    if self.show_pbar:
                        tqdm.write('\nReconstruction diverged. '
                                   'Scaling step-size by {}.'.format(
                                       self.beta))

            if self.comm is None or self.comm.rank == 0:
                return MultiScaleLowRankImage(
                    (self.T, ) + self.img_shape,
                    [sp.to_device(L_j, sp.cpu_device) for L_j in self.L],
                    [sp.to_device(R_j, sp.cpu_device) for R_j in self.R])
Exemplo n.º 7
0
 def download_kanji(self, kanji: str, force=False) -> bool:
     path = self.out_dir / (str(ord(kanji)) + ".html")
     if not force and path.exists():
         tqdm.write("Skipping existing kanji {}".format(kanji))
         return False
     self._download(self._build_url(kanji), path)
     return True
Exemplo n.º 8
0
def GetLungSegData(initial_dir):
    # First, let's get all the subject directories. We'll do this by proceeding
    # through the directory structure and grabbing the ones we want.
    # We'll use the package glob for finding directories
    # The input to this function was the LCTSC directory

    # Now we'll get all the subject directories using glob
    subj_dirs = glob.glob(os.path.join(initial_dir, 'LCTSC*'))
    # and feed those directories into another function that loads
    # the dicoms and masks for each
    data = [GetLCTSCdata(d) for d in tqdm(subj_dirs, desc='Loading data:')]
    # get all images together as inputs
    inputs = np.concatenate([d[0] for d in data])
    # get all masks together as targets
    targets = np.concatenate([d[1] for d in data])
    # add a singleton dimension to the input and target arrays
    inputs = inputs[..., np.newaxis]
    targets = targets[..., np.newaxis]
    # Get the total number of slices
    num_slices = inputs.shape[0]
    # Find the cutoff- set to 90% train and 10% validation
    split_ind = np.int(.1 * num_slices)
    # split into training and validation sets using the cutoff
    x_val = inputs[:split_ind]
    y_val = targets[:split_ind]
    x_train = inputs[split_ind:]
    y_train = targets[split_ind:]
    tqdm.write('Data loaded')
    return x_train, y_train, x_val, y_val
Exemplo n.º 9
0
def get_grid_panoids(grid, closest=False, proxies=None, max_retry=3):
    """Get the closest panoramas (ids) for the GPS coordinates list.
    
    If the 'closest' boolean parameter is set to true, only the closest panorama
    will be gotten (at all the available dates)
    """
    def _gen_result(result):
        for r in result:
            yield r

    pan_urls = []
    for lat, lon in grid:
        pan_urls.append(get_panoids_url(lat, lon))
    pan_urls = np.array(pan_urls)
    
    # Try to get the panorama's data
    responses = np.zeros(len(pan_urls), dtype=bool)
    result = []
    n_retry = 0
    while any(responses == False) and n_retry < max_retry:
        if n_retry > 0:
            tqdm.write("[get_grid_panoids] Connection error. Trying again in 2 seconds.")
            time.sleep(2)
        idxs = (responses == False).nonzero()[0]
        rs = (grequests.get(p_url, stream=True) for p_url in pan_urls[idxs])
        res = grequests.map(rs)
        for i, res_i in zip(idxs, res):
            if res_i is not None:
                pans = postprocess_panoids(res_i.text, closest=closest, proxies=proxies)
                result.extend(pans)
                # result.append((grid[i], pans))
                # yield (grid[i], pans)
                responses[i] = True
        n_retry += 1
    return result
Exemplo n.º 10
0
def _cvat_export_dataset_cli(
    cvat_params, task_id, dataset_format, position, unzip=True
):
    """Download a dataset from CVAT (with progress bar)"""

    output = Path(f"{dataset_format}_{task_id}")
    output_zip = output.with_suffix(".zip")
    if output.exists():
        print(f"{output} already exists, skipping download")
        return

    with tqdm(
        desc=f"Exporting dataset for task {task_id}",
        position=position,
        leave=False,
    ) as pbar:
        try:
            cvat_export_dataset(
                cvat_params, task_id, output_zip, dataset_format, progress=pbar
            )

        except requests.exceptions.HTTPError as exc:
            tqdm.write("Failed exporting dataset {}: {}".format(task_id, exc))

        if unzip:
            tqdm.write(f"Unpacking {output_zip}")
            unzip_dataset(output_zip)
Exemplo n.º 11
0
 def write(self, buf):
     # Set default streams back
     # They are required for correct tqdm.write work
     # See tqdm.tqdm.external_write_mode()
     with self._std_streams():
         for line in buf.rstrip().splitlines():
             tqdm.write(line.rstrip(), self._real_stream)
Exemplo n.º 12
0
def train(ae, dataloader, criterion, optimizer, use_gpu=True, epochs=5):
    t_begin = time.time()

    if use_gpu:
        ae.cuda()
        criterion.cuda()
        
    losses = []
    for epoch in tqdm(range(epochs), desc='Epoch'):
        for step, example in enumerate(tqdm(dataloader, desc='Batch')):
            if use_gpu:
                example = example.cuda()
                
            optimizer.zero_grad()
            prediction = ae(example)
            loss = criterion(example, prediction)
            loss.backward()
            optimizer.step()
            
            losses.append(float(loss))
            if (step % 300) == 0:
                tqdm.write('Loss: {}\n'.format(loss))
                
    t_end = time.time()
    timestamp = datetime.datetime.fromtimestamp(t_end).strftime('%Y-%m-%d-%H-%M-%S')
    time_training = t_end - t_begin
    return losses, timestamp, time_training
Exemplo n.º 13
0
def main():
    device='cuda'
    batch_size = 256
    normalize = {
        'mean': [0.485, 0.456, 0.406],
        'std': [0.229, 0.224, 0.225]
    }
    lr = 1e-1
    epochs = 200
    scaler = torch.cuda.amp.GradScaler()

    # transform = common_train((224, 224))
    trainset = ImageNet(os.environ['DATAROOT'], transform=common_train((224, 224)), train=True, subset=50)
    testset = ImageNet(os.environ['DATAROOT'], transform=common_test((224, 224)), train=False, subset=50)
    train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, pin_memory=False, num_workers=12)
    test_loader = DataLoader(testset, batch_size=batch_size, shuffle=True, pin_memory=False, num_workers=12)


    model = resnet18(**normalize, class_num=50).to(device)
    model = nn.DataParallel(model, device_ids=list(range(torch.cuda.device_count())))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4, nesterov=True)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs, 1e-4, -1)
    # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 60, 90], gamma=0.1, last_epoch=-1)

    runner = Runner(model, train_loader, test_loader, criterion, optimizer, scheduler, scaler, epochs, 10)
    
    tqdm.write("Start training with Resnet18.")
    runner.train()
Exemplo n.º 14
0
 def write(self, msg: str) -> int:
     try:
         tqdm.write(msg, end='')
     except BrokenPipeError:
         sys.stderr.write(msg)
         return 0
     return len(msg)
Exemplo n.º 15
0
def download_tiles(tiles, directory, disp=False):
    """
    Downloads all the tiles in a Google Stree View panorama into a directory.

    Params:
        tiles - the list of tiles. This is generated by get_tiles_info(panoid).
        directory - the directory to dump the tiles to.
    """

    for i, (x, y, fname, url) in enumerate(tiles):

        if disp and i % 20 == 0:
            print("Image %d / %d" % (i, len(tiles)))

        # Try to download the image file
        while True:
            try:
                response = requests.get(url, stream=True)
                break
            except requests.ConnectionError:
                tqdm.write("Connection error. Trying again in 2 seconds.")
                time.sleep(2)

        with open(directory + '/' + fname, 'wb') as out_file:
            shutil.copyfileobj(response.raw, out_file)
        del response
def main():
    # Distributed setting.
    torch.distributed.init_process_group(backend='nccl', init_method='env://')
    device_id = get_device_id()
    torch.cuda.set_device(device_id)
    device = f'cuda:{device_id}'

    # Automatic mixed precision.
    scaler = torch.cuda.amp.GradScaler()

    batch_size = 128
    normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225]}
    lr = 1e-1
    epochs = 200

    trainset = ImageNet(os.environ['DATAROOT'],
                        transform=common_train((224, 224)),
                        train=True,
                        subset=50)
    testset = ImageNet(os.environ['DATAROOT'],
                       transform=common_test((224, 224)),
                       train=False,
                       subset=50)
    # Use distributed sampler to map data parts to different CUDA devices.
    trainsampler = torch.utils.data.distributed.DistributedSampler(trainset)
    testsampler = torch.utils.data.distributed.DistributedSampler(testset)
    train_loader = DataLoader(trainset,
                              batch_size=batch_size,
                              sampler=trainsampler,
                              pin_memory=False,
                              num_workers=12)
    test_loader = DataLoader(testset,
                             batch_size=batch_size,
                             sampler=testsampler,
                             pin_memory=False,
                             num_workers=12)

    model = resnet18(**normalize, class_num=50).to(device)
    # Distributed: convert the BN layers of the model into sync-BN layers.
    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
    # Distributed: create DDP model.
    model = nn.parallel.DistributedDataParallel(model,
                                                device_ids=[device_id],
                                                output_device=device_id)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=1e-4,
                          nesterov=True)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs, 1e-4,
                                                     -1)

    # Use DistRunner to run DDP train and evaluation.
    runner = DistRunner(model, train_loader, test_loader, criterion, optimizer,
                        scheduler, scaler, epochs, 10)

    tqdm.write("Start training with Resnet18.")
    runner.train()
Exemplo n.º 17
0
def parsedNames(list, verbose):
    """Used to parse names from a document of names."""
    names = []
    if verbose: tqdm.write(f"Parsing names in file {os.path.basename(list)}")
    with open(list, "r", encoding="utf-8") as nameList:
        for line in nameList:
            names.append(line.rstrip())
    return names
Exemplo n.º 18
0
 def __call__(self, epoch, train_loss, val_loss):
     log_string = (f"Epoch [{epoch}/{self.num_epochs}], "
                   f"Train Loss: {train_loss:.4f}, "
                   f"Val Loss: {val_loss:.4f} ")
     if epoch % self.log_interval == 0:
         tqdm.write(log_string)
     self.log.append(log_string)
     if self.best is None or val_loss < self.best:
         self.best = val_loss
Exemplo n.º 19
0
Arquivo: logger.py Projeto: zyxue/pyro
 def emit(self, record):
     try:
         msg = self.format(record)
         self.flush()
         tqdm.write(msg, file=sys.stderr)
     except (KeyboardInterrupt, SystemExit) as e:
         raise e
     except Exception:
         self.handleError(record)
Exemplo n.º 20
0
def feature_extract(eval_set, model, device, opt, config):
    if not exists(opt.output_features_dir):
        makedirs(opt.output_features_dir)

    output_local_features_prefix = join(opt.output_features_dir, 'patchfeats')
    output_global_features_filename = join(opt.output_features_dir,
                                           'globalfeats.npy')

    pool_size = int(config['global_params']['num_pcs'])

    test_data_loader = DataLoader(
        dataset=eval_set,
        num_workers=int(config['global_params']['threads']),
        batch_size=int(config['feature_extract']['cacheBatchSize']),
        shuffle=False,
        pin_memory=(not opt.nocuda))

    model.eval()
    with torch.no_grad():
        tqdm.write('====> Extracting Features')
        db_feat = np.empty((len(eval_set), pool_size), dtype=np.float32)

        for iteration, (input_data, indices) in \
                enumerate(tqdm(test_data_loader, position=1, leave=False, desc='Test Iter'.rjust(15)), 1):
            indices_np = indices.detach().numpy()
            input_data = input_data.to(device)
            image_encoding = model.encoder(input_data)
            if config['global_params']['pooling'].lower() == 'patchnetvlad':
                vlad_local, vlad_global = model.pool(image_encoding)

                vlad_global_pca = get_pca_encoding(model, vlad_global)
                db_feat[indices_np, :] = vlad_global_pca.detach().cpu().numpy()

                for this_iter, this_local in enumerate(vlad_local):
                    this_patch_size = model.pool.patch_sizes[this_iter]

                    db_feat_patches = np.empty(
                        (this_local.size(0), pool_size, this_local.size(2)),
                        dtype=np.float32)
                    grid = np.indices((1, this_local.size(0)))
                    this_local_pca = get_pca_encoding(model, this_local.permute(2, 0, 1).reshape(-1, this_local.size(1))).\
                        reshape(this_local.size(2), this_local.size(0), pool_size).permute(1, 2, 0)
                    db_feat_patches[
                        grid, :, :] = this_local_pca.detach().cpu().numpy()

                    for i, val in enumerate(indices_np):
                        image_name = os.path.splitext(
                            os.path.basename(eval_set.images[val]))[0]
                        filename = output_local_features_prefix + '_' + 'psize{}_'.format(
                            this_patch_size) + image_name + '.npy'
                        np.save(filename, db_feat_patches[i, :, :])
            else:
                vlad_global = model.pool(image_encoding)
                vlad_global_pca = get_pca_encoding(model, vlad_global)
                db_feat[indices_np, :] = vlad_global_pca.detach().cpu().numpy()

    np.save(output_global_features_filename, db_feat)
Exemplo n.º 21
0
 def emit(self, record: Any) -> None:
     try:
         msg = self.format(record)
         tqdm.write(msg)
         self.flush()
     except (KeyboardInterrupt, SystemExit):
         raise
     except Exception:
         self.handleError(record)
Exemplo n.º 22
0
 def emit(self, record: logging.LogRecord):
     if record.levelno < self.level:
         return
     try:
         msg = self.format(record)
         tqdm.write(msg, file=sys.stderr)
         self.flush()
     except Exception:  # noqa
         self.handleError(record)
Exemplo n.º 23
0
    def train(self, train_set, dev_set):
        self.iterations, self.nb_tr_steps, self.tr_loss = 0, 0, 0
        self.best_valid_metric, self.unimproved_iters = 0, 0
        self.early_stop = False
        if self.args.gradient_accumulation_steps < 1:
            raise ValueError(
                "Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
                .format(self.args.gradient_accumulation_steps))

        self.args.batch_size = (self.args.batch_size //
                                self.args.gradient_accumulation_steps)
        self.init_optimizer()

        train_dataset = convert_df_to_ids(train_set, self.word2id,
                                          self.args.max_seq_length)

        dev_dataset = convert_df_to_ids(dev_set, self.word2id,
                                        self.args.max_seq_length)

        train_dataloader = DataLoader(
            train_dataset,
            sampler=RandomSampler(train_dataset),
            batch_size=self.args.batch_size,
        )
        dev_dataloader = DataLoader(
            dev_dataset,
            sampler=SequentialSampler(dev_dataset),
            batch_size=self.args.batch_size,
        )

        for epoch in tqdm(range(int(self.args.epochs))):
            self.tr_loss = self.train_an_epoch(train_dataloader)[0]
            tqdm.write(f"[Epoch {epoch}] loss: {self.tr_loss}".format(
                epoch, self.best_valid_metric))
            self.tr_loss = 0
            eval_result = self.eval(dev_dataloader)
            # Update validation results
            if eval_result[self.args.valid_metric] > self.best_valid_metric:
                self.unimproved_iters = 0
                self.best_valid_metric = eval_result[self.args.valid_metric]
                print_dict_as_table(
                    eval_result,
                    tag=f"[Epoch {epoch}]performance on validation set",
                    columns=["metrics", "values"],
                )
                ensureDir(self.args.model_save_dir)
                self.save_pretrained(self.args.model_save_dir)
            else:
                self.unimproved_iters += 1
                if self.unimproved_iters >= self.args.patience:
                    self.early_stop = True
                    tqdm.write(
                        "Early Stopping. Epoch: {}, best_valid_metric ({}): {}"
                        .format(epoch, self.args.valid_metric,
                                self.best_valid_metric))
                    break
Exemplo n.º 24
0
def train(
    self: [AnimeHeadsTrainer, BirdsTrainer],
    n_step,
    step_per_epoch=500,  # print loss each step_per_epoch
    saveck_every=None,
    ck_path: str = None,
):
    assert n_step % step_per_epoch == 0, f'n_step: {n_step} % step_per_epoch: {step_per_epoch} should be 0'
    losses = []
    total_start_t = time.time()
    log_start_t = time.time()
    dl = InfiniteDl(self.dls.train)

    pb = tqdm(range(1, n_step + 1))
    for step in pb:
        self.rnn_encoder.train()
        self.cnn_encoder.train()
        self.optim.zero_grad()

        cap, cap_len, img = to_device(dl.next(), device=self.device)
        cap, cap_len, img = self.after_batch_tfm(cap, cap_len, img)
        word_features, cnn_code = self.cnn_encoder(img)
        sent_emb, word_emb = self.rnn_encoder(cap, cap_len)

        s_loss0, s_loss1 = compute_sent_loss(cnn_code,
                                             sent_emb,
                                             gamma3=self.gamma3)
        w_loss0, w_loss1, attn_maps = compute_word_loss(word_features,
                                                        word_emb,
                                                        cap_len,
                                                        gamma1=self.gamma1,
                                                        gamma2=self.gamma2,
                                                        gamma3=self.gamma3)
        loss = s_loss0 + s_loss1 + w_loss0 + w_loss1
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.rnn_encoder.parameters(), 0.25)
        self.optim.step()

        losses.append(loss.detach().cpu())
        pb.set_postfix(loss=loss.detach().cpu().numpy())

        if saveck_every and step % saveck_every == 0:
            self.save_checkpoint(path=ck_path + f'-{step//saveck_every}.pt')
        if step % step_per_epoch == 0:  # Each epoch
            if self.lr_scheduler.get_last_lr(
            )[0] > self.lr_scheduler.base_lrs[0] / 10:
                self.lr_scheduler.step()
#             print(self.lr_scheduler.get_last_lr()[0], self.lr_scheduler.base_lrs[0])
            duration = time.time() - log_start_t
            msg = f'{step//step_per_epoch}, time: {duration:.1f}s, loss: {torch.tensor(losses).mean():.4f}'
            tqdm.write(msg)
            losses = []
            log_start_t = time.time()

    pb.close()
    tqdm.write(f'total_time: {(time.time()-total_start_t)/60:.1f}min')
Exemplo n.º 25
0
    def train(self, model: nn.Module, train_interactions: np.ndarray,
              test_interactions: np.ndarray, is_sparse: bool):

        optimizer: optim.Optimizer

        if is_sparse:
            optimizer = optim.SparseAdam(model.parameters(), lr=self.LR)
        else:
            optimizer = optim.Adam(model.parameters(),
                                   lr=self.LR,
                                   weight_decay=self.WEIGHT_DECAY)

        train_loss_history = []
        test_loss_history = []

        train_dataset = get_dataset(train_interactions)
        test_dataset = get_dataset(test_interactions)
        test_users, test_movies, test_ratings = test_dataset.tensors

        data_loader = DataLoader(train_dataset, batch_size=self.BATCH_SIZE)

        model.to(DEVICE)

        for epoch in tqdm(range(0, self.EPOCHS), desc='Training'):
            train_loss = 0

            for users_batch, movies_batch, ratings_batch in data_loader:
                optimizer.zero_grad()

                prediction = model(users_batch, movies_batch)
                loss = self.loss(prediction, ratings_batch)

                for regularizer in self.regularizers:
                    loss += regularizer(prediction)

                loss.backward()
                optimizer.step()

                train_loss += loss.item()

            test_prediction = model(test_users, test_movies)
            test_loss = self.loss(test_prediction, test_ratings).item()
            for regularizer in self.regularizers:
                test_loss += regularizer(test_prediction).item()

            train_loss /= len(data_loader)

            train_loss_history.append(train_loss)
            test_loss_history.append(test_loss)

            if self.VERBOSE:
                msg = f'Train loss: {train_loss:.3f}, '
                msg += f'Test loss: {test_loss:.3f}'
                tqdm.write(msg)

        return train_loss_history, test_loss_history
Exemplo n.º 26
0
 def delete(self):
     """Deletes internal `message_id`."""
     try:
         future = self.submit(
             self.session.post, self.API + '%s/deleteMessage' % self.token,
             data={'chat_id': self.chat_id, 'message_id': self.message_id})
     except Exception as e:
         tqdm_auto.write(str(e))
     else:
         return future
Exemplo n.º 27
0
 def request(prompts: List[str]):
     # Retry request (handles connection errors, timeouts, and overloaded API)
     while True:
         try:
             return openai.Completion.create(engine=model_name_or_path,
                                             prompt=prompts,
                                             max_tokens=max_len,
                                             n=1)
         except Exception as e:
             tqdm.write(str(e))
             tqdm.write("Retrying...")
Exemplo n.º 28
0
 def __init__(self, token, channel_id):
     """Creates a new message in the given `channel_id`."""
     super(DiscordIO, self).__init__()
     config = ClientConfig()
     config.token = token
     client = Client(config)
     self.text = self.__class__.__name__
     try:
         self.message = client.api.channels_messages_create(channel_id, self.text)
     except Exception as e:
         tqdm_auto.write(str(e))
Exemplo n.º 29
0
def download_panorama_v4(panoid, zoom=5, max_retry=3):
    '''
    v4: save image information in a buffer. (v2: save image to dist then read)
    input:
        panoid: which is an id of image on google maps
        zoom: larger number -> higher resolution, from 1 to 5, better less than 3, some location will fail when zoom larger than 3
    output:
        panorama image (uncropped)
    '''
    tile_height, tile_width = 512, 512
    # img_w, img_h = int(np.ceil(416*(2**zoom)/tile_width)*tile_width), int(np.ceil(416*( 2**(zoom-1) )/tile_width)*tile_width)
    img_w, img_h = 416*(2**zoom), 416*( 2**(zoom-1) )
    tiles = get_tiles_info(panoid, zoom=zoom)
    valid_tiles = []
    tile_urls = []
    # function of download_tiles
    for i, tile in enumerate(tiles):
        x, y, fname, url = tile
        if x*tile_width < img_w and y*tile_height < img_h: # tile is valid
            tile_urls.append(url)
    tile_urls = np.array(tile_urls, dtype=str)
    
    # Try to download the image file
    valid_tiles = np.array([None] * len(tile_urls), dtype=object)
    n_retry = 0
    while any(valid_tiles == None) and n_retry < max_retry:
        idxs = (valid_tiles == None).nonzero()[0]
        if n_retry > 0:
            tqdm.write(f"[download_panorama_v3] Connection error. Trying again in 2 seconds. {len(idxs)}/{len(valid_tiles)}")
            time.sleep(2)
        rs = (grequests.get(t_url, stream=True) for t_url in tile_urls[idxs])
        res = grequests.map(rs)
        for i, res_i in zip(idxs, res):
            if res_i is not None:
                try:
                    valid_tiles[i] = Image.open(BytesIO(res_i.content))
                except PIL.UnidentifiedImageError:
                    continue
        n_retry += 1
    # If Not a valid tile, fill with black pixels
    idxs = (valid_tiles == None).nonzero()[0]
    if idxs.size > 0:
        for idx in idxs:
            valid_tiles[idx] = Image.new('RGB', (tile_width, tile_height))

    # function to stich
    panorama = Image.new('RGB', (img_w, img_h))
    i = 0
    for x, y, fname, url in tiles:
        if x*tile_width < img_w and y*tile_height < img_h: # tile is valid
            tile = valid_tiles[i]
            i += 1
            panorama.paste(im=tile, box=(x*tile_width, y*tile_height))
    return np.array(panorama)
Exemplo n.º 30
0
    def _print_hyper_parameters(hyper_parameters):
        """
        Prints parameters.

        :param hyper_parameters: Dict with the hyper parameters.
        """

        tqdm.write('-' * 80)
        tqdm.write('Hyper-Parameters')
        tqdm.write('-' * 80)
        tqdm.write(pprint.pformat(hyper_parameters))
        tqdm.write('-' * 80)