Exemplo n.º 1
0
    def _init_TRADE_TAQ(self):
        self.TRADE_df, self.TAQ_df = pd.DataFrame(), pd.DataFrame()
        for i in progress_bar(self.TAQ_dict):
            df = self.TAQ_dict[i]
            df = df[[
                'Symbol', 'Market', 'BuyPrice01', 'SellPrice01', 'BuyVolume01',
                'SellVolume01', 'TotalBuyOrderVolume', 'TotalSellOrderVolume',
                'WtAvgSellPrice', 'WtAvgBuyPrice'
            ]]
            self.TAQ_df = self.TAQ_df.append(df)

        for i in progress_bar(self.TRADE_dict):
            df = self.TRADE_dict[i]
            self.TRADE_df = self.TRADE_df.append(df)

        self.TRADE_df = self.TRADE_df.sort_index()
        self.TAQ_df = self.TAQ_df.sort_index()

        start = datetime.datetime(year=int(self.date[:4]),
                                  month=int(self.date[4:6]),
                                  day=int(self.date[6:8]),
                                  hour=9,
                                  minute=30,
                                  second=0,
                                  microsecond=0)
        self.TRADE_df = self.TRADE_df.loc[self.TRADE_df.index.values >= start.
                                          strftime('%Y-%m-%d %H:%M:%S.000')]
        self.TAQ_df = self.TAQ_df.loc[self.TAQ_df.index.values >=
                                      start.strftime('%Y-%m-%d %H:%M:%S.000')]
Exemplo n.º 2
0
    def fit(self, epochs):
        self.logger.log_info(epochs, self.lr)
        mb = master_bar(range(epochs))
        for epoch in mb:
            self.model.train()
            for xb, yb in progress_bar(self.train_dl, parent=mb):
                loss = self.loss_func(self.model(xb), yb)
                loss.backward()
                self.opt.step()
                self.opt.zero_grad()

            self.model.eval()
            with torch.no_grad():
                tot_loss, tot_acc = 0., 0.
                for xb, yb in progress_bar(self.valid_dl, parent=mb):
                    pred = self.model(xb)
                    temp = self.loss_func(pred, yb)
                    tot_loss += temp
                    tot_acc += self.metric(pred,
                                           yb) if self.metric else 1 - temp
            nv = len(self.valid_dl)
            val_loss = tot_loss / nv
            acc = (tot_acc / nv) * 100.
            mb.write('Epoch: {:3}, train loss: {: .4f}, val loss: {: .4f}, '
                     'Acc: {: .4f}%'.format(epoch + 1, loss, val_loss, acc))
            self.logger.log([loss.cpu(), val_loss.cpu(), acc.cpu()])

        self.logger.done()
        io.save(self.model, self.logger.full_path)
Exemplo n.º 3
0
    def fit(self, train_dl, valid_dl, epochs, lr, metrics=None, optimizer=None, scheduler=None):
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model.to(device)
        optimizer = optimizer or Adam(self.model.parameters(), lr)
        if scheduler != False:
            scheduler = scheduler or OneCycleLR(optimizer, lr, epochs*len(train_dl))
        else:
            scheduler = None
        self.train_stats = TrainTracker(metrics, validate=(valid_dl is not None))
        bar = master_bar(range(epochs))
        bar.write(self.train_stats.metrics_names, table=True)

        for epoch in bar:
            self.model.train()
            for batch in progress_bar(train_dl, parent=bar):
                batch = batch_to_device(batch, device)
                loss = self._train_batch(batch, optimizer, scheduler)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
                if scheduler:
                    scheduler.step()
                self.train_stats.update_train_loss(loss)

            valid_outputs = []
            if valid_dl:
                self.model.eval()
                for batch in progress_bar(valid_dl, parent=bar):
                    batch = batch_to_device(batch, device)
                    output = self._valid_batch(batch)
                    valid_outputs.append(output)

            self.train_stats.log_epoch_results(valid_outputs)
            bar.write(self.train_stats.get_metrics_values(), table=True)
Exemplo n.º 4
0
    def fit_siamese(self, epochs):
        mb = master_bar(range(epochs))
        for epoch in mb:
            self.model.train()
            for x1b, x2b, rdm in progress_bar(self.train_dl, parent=mb):
                mb.child.comment = 'Train loop'
                out1 = self.model(x1b)
                out2 = self.model(x2b)
                loss = self.loss_func(out1, out2, rdm)
                loss.backward()
                self.opt.step()
                self.opt.zero_grad()

            self.model.eval()
            with torch.no_grad():
                tot_loss = 0.
                for x1b, x2b, rdm in progress_bar(self.valid_dl, parent=mb):
                    out1 = self.model(x1b)
                    out2 = self.model(x2b)
                    temp = self.loss_func(out1, out2, rdm)
                    tot_loss += temp
            nv = len(self.valid_dl)
            val_loss = tot_loss / nv
            mb.write(
                'Epoch: {}, train loss: {: .6f}, val loss: {: .6f}'.format(
                    epoch + 1, loss, val_loss))
            self.logger.log([loss.cpu(), val_loss.cpu()])
Exemplo n.º 5
0
    def fit_supervised(self, epochs):
        mb = master_bar(range(epochs))
        for epoch in mb:
            self.model.train()
            for xb, yb in progress_bar(self.train_dl, parent=mb):
                mb.child.comment = 'Train loop'
                loss = self.loss_func(self.model(xb), yb)
                loss.backward()
                self.opt.step()
                self.opt.zero_grad()

            self.model.eval()
            with torch.no_grad():
                tot_loss, tot_acc = 0., 0.
                for xb, yb in progress_bar(self.valid_dl, parent=mb):
                    mb.child.comment = 'Valid loop'
                    pred = self.model(xb)
                    temp = self.loss_func(pred, yb)
                    tot_loss += temp
                    tot_acc += self.metric(pred,
                                           yb) if self.metric else 1 - temp
            nv = len(self.valid_dl)
            val_loss = tot_loss / nv
            acc = (tot_acc / nv) * 100.
            mb.write('Epoch: {:3}, train loss: {: .4f}, val loss: {: .4f}, '
                     'Acc: {: .4f}%'.format(epoch + 1, loss, val_loss, acc))
            self.logger.log([loss.cpu(), val_loss.cpu(), acc.cpu()])
Exemplo n.º 6
0
def parallel(func, arr:Collection, max_workers:int=None):
    "Call `func` on every element of `arr` in parallel using `max_workers`."
    max_workers = ifnone(max_workers, defaults.cpus)
    if max_workers<2: results = [func(o,i) for i,o in progress_bar(enumerate(arr), total=len(arr))]
    else:
        with ProcessPoolExecutor(max_workers=max_workers) as ex:
            futures = [ex.submit(func,o,i) for i,o in enumerate(arr)]
            results = []
            for f in progress_bar(concurrent.futures.as_completed(futures), total=len(arr)): results.append(f.result())
    if any([o is not None for o in results]): return results
Exemplo n.º 7
0
    def _pre_process(self):
        x, y = self.x, self.y
        cfg = x.config

        if len(x.items) > 0:
            if not cfg.resample_to:
                _set_sr(x.items[0], x.config, x.path)
            if cfg._nchannels is None:
                _set_nchannels(x.items[0], x.config, x.path)
            if cfg.downmix or cfg.remove_silence or cfg.segment_size or cfg.resample_to:
                items = list(zip(x.items, y.items))

                def concat(x, y):
                    return np.concatenate((x, y)) if len(y) > 0 else x

                if x.config.downmix:
                    print("Preprocessing: Downmixing to Mono")
                    cfg._nchannels = 1
                    items = [
                        downmix_item(i, x.config, x.path)
                        for i in progress_bar(items)
                    ]
                    items = reduce(concat, items, np.empty((0, 2)))

                if x.config.resample_to:
                    print("Preprocessing: Resampling to", x.config.resample_to)
                    cfg._sr = x.config.resample_to
                    items = [
                        resample_item(i, x.config, x.path)
                        for i in progress_bar(items)
                    ]
                    items = reduce(concat, items, np.empty((0, 2)))

                if x.config.remove_silence:
                    print("Preprocessing: Removing Silence")
                    items = [
                        remove_silence(i, x.config, x.path)
                        for i in progress_bar(items)
                    ]
                    items = reduce(concat, items, np.empty((0, 2)))

                if x.config.segment_size:
                    print("Preprocessing: Segmenting Items")
                    items = [
                        segment_items(i, x.config, x.path)
                        for i in progress_bar(items)
                    ]
                    items = reduce(concat, items, np.empty((0, 2)))

                nx, ny = tuple(zip(*items))
                x.items, y.items = np.array(nx), np.array(ny)

        self.x, self.y = x, y
        self.y.x = x
Exemplo n.º 8
0
    def __init__(self,
                 tokenizer,
                 file_path,
                 cache_path,
                 logger,
                 block_size=512):
        assert os.path.isfile(file_path)

        if os.path.exists(cache_path):
            logger.info("Loading features from cached file %s", cache_path)
            with open(cache_path, "rb") as handle:
                self.examples = pickle.load(handle)
        else:
            logger.info("Creating features from dataset file %s", file_path)

            self.examples = []
            text = (line.strip() for line in open(file_path, encoding="utf-8"))
            text = progress_bar(list(text))
            text = map(
                lambda x: tokenizer.convert_tokens_to_ids(tokenizer.tokenize(x)
                                                          ), text)
            text = itertools.chain.from_iterable(text)
            text = more_itertools.chunked(text, block_size)
            self.examples = list(text)[:-1]
            # Note that we are loosing the last truncated example here for the sake of simplicity (no padding)
            # If your dataset is small, first you should loook for a bigger one :-) and second you
            # can change this behavior by adding (model specific) padding.

            logger.info("Saving features into cached file %s", cache_path)
            with open(cache_path, "wb") as handle:
                pickle.dump(self.examples,
                            handle,
                            protocol=pickle.HIGHEST_PROTOCOL)
def dtlz2_test():
    #Run the DTLZ2 benchmark
    errors = 0
    num_inputs = 6
    num_objectives = 2
    lab = DTLZ2(num_inputs=num_inputs, num_objectives=num_objectives)
    models = {
        f'y_{i}': GPyModel(Exponential(input_dim=num_inputs, ARD=True))
        for i in range(num_objectives)
    }

    warnings.filterwarnings("ignore", category=RuntimeWarning)
    tsemo = TSEMO(lab.domain, models=models, random_rate=0.00)
    experiments = tsemo.suggest_experiments(5 * num_inputs)

    mb = master_bar(range(1))
    for j in mb:
        mb.main_bar.comment = f'Repeats'
        for i in progress_bar(range(100), parent=mb):
            mb.child.comment = f'Iteration'
            # Run experiments
            experiments = lab.run_experiments(experiments)

            # Get suggestions
            try:
                experiments = tsemo.suggest_experiments(
                    1, experiments, **tsemo_options)
            except Exception as e:
                print(e)
                errors += 1

        tsemo.save(f'new_tsemo_params_{j}.json')
Exemplo n.º 10
0
    def _download_images(cat_list, path_images, max_images, remove_crowded):
        cat_ids = CocoData.coco.getCatIds(catNms=cat_list);
        idx2cat = {e['id']:e['name'] for e in CocoData.coco.loadCats(CocoData.coco.getCatIds())}
        img_id2fn = {}
        print(f"Found {len(cat_ids)} valid categories.")
        print([idx2cat[e] for e in cat_ids])
        print("Starting download.")
        mb = master_bar(range(len(cat_ids)))
        for i in mb:
            c_id = cat_ids[i]
            print(f"Downloading images of category {idx2cat[c_id]}")
            img_ids = CocoData.coco.getImgIds(catIds=c_id)
            # small function to filter images with crowded objects
            def _f(iid):
                annos = CocoData.coco.loadAnns(CocoData.coco.getAnnIds(imgIds=iid))
                annos = [a for a in annos if idx2cat[a["category_id"]] in cat_list]
                is_crowd = [a["iscrowd"] for a in annos]
                return 1 in is_crowd
            if remove_crowded:
                img_ids = [i for i in img_ids if not _f(i)]
            if max_images is not None:
                img_ids = img_ids[:max_images]
            for i in img_ids:
                img_id2fn[i] = path_images/(str(i).zfill(12)+".jpg")
            for i in progress_bar(range(len(img_ids)), parent=mb):
                with contextlib.redirect_stdout(io.StringIO()):
                    CocoData.coco.download(path_images, [img_ids[i]])

        print(len([fn for fn in path_images.ls()]), "images downloaded.")
Exemplo n.º 11
0
    def clear_cache(self):
        '''Delete the files and empty dirs in the cache folder'''
        num_removed = 0
        parent_dirs = set()
        if not os.path.exists(self.cache_dir / "cache_contents.txt"):
            print(
                "Cache contents not found, try calling again after creating your AudioList"
            )

        with open(self.cache_dir / "cache_contents.txt", 'r') as f:
            pb = progress_bar(f.read().split('\n')[:-1])
            for line in pb:
                if not os.path.exists(line):
                    continue
                else:
                    try:
                        os.remove(line)
                    except Exception as e:
                        print(
                            f"Warning: Failed to remove {line}, due to error {str(e)}...continuing"
                        )
                    else:
                        parent = Path(line).parents[0]
                        parent_dirs.add(parent)
                        num_removed += 1
        for parent in parent_dirs:
            if (os.path.exists(parent) and len(parent.ls()) == 0):
                try:
                    os.rmdir(str(parent))
                except Exception as e:
                    print(
                        f"Warning: Unable to remove empty dir {parent}, due to error {str(e)}...continuing"
                    )
        os.remove(self.cache_dir / "cache_contents.txt")
        print(f"{num_removed} files removed")
Exemplo n.º 12
0
 def run(self, photos):
     crop_photos = self.rec_model.get_crops(photos)
     for c in progress_bar(crop_photos):
         c.embedding = self.rec_model.get_embedding(c)
     crop_embeddings = np.stack([x.embedding[256:] for x in crop_photos])
     cluster_labels = self.get_cluster_labels(crop_embeddings)
     return crop_photos, cluster_labels
Exemplo n.º 13
0
def fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, mb, amp=False):

    if amp:
        scaler = torch.cuda.amp.GradScaler()

    model.train()
    # Iterate over the batches of the dataset
    for images, targets in progress_bar(train_loader, parent=mb):

        if torch.cuda.is_available():
            images = images.cuda()
            targets = targets.cuda()

        images = batch_transforms(images)

        optimizer.zero_grad()
        if amp:
            with torch.cuda.amp.autocast():
                out = model(images)
                train_loss = cross_entropy(out, targets)
            scaler.scale(train_loss).backward()
            # Update the params
            scaler.step(optimizer)
            scaler.update()
        else:
            out = model(images)
            train_loss = cross_entropy(out, targets)
            train_loss.backward()
            optimizer.step()
        scheduler.step()

        mb.child.comment = f"Training loss: {train_loss.item():.6}"
Exemplo n.º 14
0
    def train_epoch(self, mb: MasterBar) -> List[torch.tensor]:
        "One epoch used for training"
        self.mdl.train()
        trn_loss = SmoothenValue(0.9)
        trn_acc = SmoothenValue(0.9)
        for batch in progress_bar(self.data.train_dl, parent=mb):
            # Increment number of iterations
            self.num_it += 1
            for b in batch.keys():
                batch[b] = batch[b].to(self.device)
            self.optimizer.zero_grad()
            out = self.mdl(batch)
            loss = self.loss_fn(out, batch)
            loss = loss.mean()
            loss.backward()
            self.optimizer.step()
            metric = self.eval_fn(out, batch)
            trn_loss.add_value(loss.detach().cpu())
            trn_acc.add_value(metric.detach().cpu())
            mb.child.comment = (
                f'LossB {loss: .4f} | SmLossB {trn_loss.smooth: .4f} | AccB {trn_acc.smooth: .4f}')

        del batch
        self.optimizer.zero_grad()
        return trn_loss.smooth, trn_acc.smooth
Exemplo n.º 15
0
def compute_miou(model,
                 dl,
                 mean,
                 num_classes,
                 show_progress,
                 ignore_mapped_class=[]):

    ious = []
    model.learn.model.eval()
    with torch.no_grad():
        for input, target in progress_bar(dl, display=show_progress):
            pred = model.learn.model(input)
            target = target.squeeze(1)
            if ignore_mapped_class != []:
                _, total_classes, _, _ = pred.shape
                for k in ignore_mapped_class:
                    pred[:, k] = -1
                pred = pred.argmax(dim=1)
            else:
                pred = pred.argmax(dim=1)
            mask1 = []
            mask2 = []
            for i in range(pred.shape[0]):
                mask1.append(pred[i].to(model._device) ==
                             num_classes[:, None, None].to(model._device))
                mask2.append(target[i].to(model._device) ==
                             num_classes[:, None, None].to(model._device))
            mask1 = torch.stack(mask1)
            mask2 = torch.stack(mask2)
            iou = mask_iou(mask1, mask2)
            ious.append(iou.tolist())

    return np.mean(ious, 0)
Exemplo n.º 16
0
def fit_one_epoch(model, train_loader, optimizer, scheduler, mb, amp=False):
    if amp:
        scaler = torch.cuda.amp.GradScaler()

    model.train()
    # Iterate over the batches of the dataset
    for images, targets in progress_bar(train_loader, parent=mb):

        targets = convert_to_abs_coords(targets, images.shape)
        if torch.cuda.is_available():
            images = images.cuda()
            targets = [{k: v.cuda() for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        if amp:
            with torch.cuda.amp.autocast():
                loss_dict = model(images, targets)
                loss = sum(v for v in loss_dict.values())
            scaler.scale(loss).backward()
            # Update the params
            scaler.step(optimizer)
            scaler.update()
        else:
            loss_dict = model(images, targets)
            loss = sum(v for v in loss_dict.values())
            loss.backward()
            optimizer.step()

        mb.child.comment = f'Training loss: {loss.item()}'
    scheduler.step()
Exemplo n.º 17
0
def test_tsemo(test_num_improve_iter=2, save=False):
    num_inputs = 2
    num_objectives = 2
    lab = VLMOP2()
    strategy = TSEMO(lab.domain)
    experiments = strategy.suggest_experiments(5 * num_inputs)
    warnings.filterwarnings("ignore", category=RuntimeWarning)
    warnings.filterwarnings("ignore", category=DeprecationWarning)

    num_improve_iter = 0
    best_hv = None
    pb = progress_bar(range(20))
    for i in pb:
        # Run experiments
        experiments = lab.run_experiments(experiments)

        # Get suggestions
        experiments = strategy.suggest_experiments(1, experiments)

        if save:
            strategy.save("tsemo_settings.json")
        y_pareto, _ = pareto_efficient(lab.data[["y_0", "y_1"]].to_numpy(),
                                       maximize=False)
        hv = hypervolume(y_pareto, [11, 11])
        if best_hv == None:
            best_hv = hv
        elif hv > best_hv:
            best_hv = hv
            num_improve_iter += 1
        pb.comment = f"Hypervolume: {hv}"
        if num_improve_iter >= test_num_improve_iter:
            print(
                "Requirement to improve fbest in at least {} satisfied, test stopped."
                .format(test_num_improve_iter))
            break
Exemplo n.º 18
0
    def validate(self, mb, model, device):
        model.eval()
        valid_loss = 0
        with torch.no_grad():
            for ind, xy in enumerate(progress_bar(self.valid_dl, parent=mb)):
                y_tag = xy.pop('target_tag')
                y_pos = xy.pop('target_pos')
                x = xy
                inputs, target_tag, target_pos = {
                    key: x_.to(device)
                    for key, x_ in x.items()
                }, y_tag.to(device), y_pos.to(device)
                *out, loss = model(**inputs,
                                   target_tag=y_tag,
                                   target_pos=y_pos)

                valid_loss += loss.item()

                if ind % 500 == 0:
                    self.log(
                        f'Batch: {ind}, Valid loss: {valid_loss / (ind+1) :.3f}'
                    )

                mb.child.comment = f'{valid_loss / (ind+1) :.3f}'
        return valid_loss / len(self.valid_dl)
Exemplo n.º 19
0
    def train(self, mb, model, opt, device, sched=None):
        model.train()
        train_loss = 0
        for ind, xy in enumerate(progress_bar(self.train_dl, parent=mb)):
            y_tag = xy.pop('target_tag')
            y_pos = xy.pop('target_pos')
            x = xy
            inputs, target_tag, target_pos = {
                key: x_.to(device)
                for key, x_ in x.items()
            }, y_tag.to(device), y_pos.to(device)
            opt.zero_grad()
            *out, loss = model(**inputs, target_tag=y_tag, target_pos=y_pos)
            loss.backward()
            opt.step()
            if sched is not None:
                sched.step()
            train_loss += loss.item()

            if ind % 500 == 0:
                self.log(
                    f'Batch: {ind}, Train loss: {train_loss/ len(self.train_dl)}'
                )

            mb.child.comment = f'{train_loss / (ind+1) :.3f}'
        return train_loss / len(self.train_dl)
Exemplo n.º 20
0
def fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, mb, amp=False):

    if amp:
        scaler = torch.cuda.amp.GradScaler()

    model.train()
    # Iterate over the batches of the dataset
    for images, targets in progress_bar(train_loader, parent=mb):

        if torch.cuda.is_available():
            images = images.cuda()
        images = batch_transforms(images)

        train_loss = model(images, targets)['loss']

        optimizer.zero_grad()
        if amp:
            with torch.cuda.amp.autocast():
                train_loss = model(images, targets)['loss']
            scaler.scale(train_loss).backward()
            # Gradient clipping
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
            # Update the params
            scaler.step(optimizer)
            scaler.update()
        else:
            train_loss = model(images, targets)['loss']
            train_loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
            optimizer.step()

        scheduler.step()

        mb.child.comment = f'Training loss: {train_loss.item():.6}'
Exemplo n.º 21
0
def get_preds_cyclegan(learn,
                       test_path,
                       pred_path,
                       convert_to='B',
                       bs=4,
                       num_workers=4,
                       device='cuda',
                       suffix='tif'):
    """
    A prediction function that takes the Learner object `learn` with the trained model, the `test_path` folder with the images to perform
    batch inference on, and the output folder `pred_path` where the predictions will be saved. The function will convert images to the domain
    specified by `convert_to` (default is 'B'). The other arguments are the batch size `bs` (default=4), `num_workers` (default=4), the `device`
    to run inference on (default='cuda') and suffix of the prediction images `suffix` (default='tif').
    """

    assert os.path.exists(test_path)

    if not os.path.exists(pred_path):
        os.mkdir(pred_path)

    test_dl = load_dataset(test_path, bs, num_workers)
    if convert_to == 'B': model = learn.model.G_B.to(device)
    else: model = learn.model.G_A.to(device)
    for i, xb in progress_bar(enumerate(test_dl), total=len(test_dl)):
        fn, im = xb
        preds = (model(im.to(device)) / 2 + 0.5)
        for i in range(len(fn)):
            new_fn = os.path.join(
                pred_path, '.'.join([
                    os.path.basename(fn[i]).split('.')[0] +
                    f'_fake{convert_to}', suffix
                ]))
            torchvision.utils.save_image(preds[i], new_fn)
Exemplo n.º 22
0
    def predict(self, device='cuda:0', pbar=None):
        """
        Evaluate the model on a validation set
        :param device: str (defaults to 'cuda:0')
        :param pbar: fast_progress progress bar (defaults to None)
        :returns: overall_val_loss (float), accuracies (dict{'acc': value}, preds (dict)
        """
        current_size = len(self.val_data_loader.dataset)
        preds_dict = {
            'y_true': np.zeros([current_size, 11]),
            'y_pred': np.zeros([current_size, 11])
        }
        overall_val_loss = 0.0
        self.model.eval()
        with torch.no_grad():
            index_dict = 0
            for step, batch in enumerate(
                    progress_bar(self.val_data_loader,
                                 parent=pbar,
                                 leave=(pbar is not None))):
                loss, num_rows, y_pred, targets = self.model(batch, device)
                overall_val_loss += loss.item() * num_rows

                current_index = index_dict
                preds_dict['y_true'][current_index:current_index +
                                     num_rows, :] = targets
                preds_dict['y_pred'][current_index:current_index +
                                     num_rows, :] = y_pred
                index_dict += num_rows

        overall_val_loss = overall_val_loss / len(self.val_data_loader.dataset)
        return overall_val_loss, preds_dict
Exemplo n.º 23
0
def download_url(url, dest, overwrite=False, pbar=None, show_progress=True, chunk_size=1024*1024,
                 timeout=4, retries=5):
    "Download `url` to `dest` unless it exists and not `overwrite`"
    if os.path.exists(dest) and not overwrite: return

    s = requests.Session()
    s.mount('http://',requests.adapters.HTTPAdapter(max_retries=retries))
    # additional line to identify as a firefox browser, see fastai/#2438
    s.headers.update({'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:71.0) Gecko/20100101 Firefox/71.0'})
    u = s.get(url, stream=True, timeout=timeout)
    try: file_size = int(u.headers["Content-Length"])
    except: show_progress = False

    with open(dest, 'wb') as f:
        nbytes = 0
        if show_progress: pbar = progress_bar(range(file_size), leave=False, parent=pbar)
        try:
            if show_progress: pbar.update(0)
            for chunk in u.iter_content(chunk_size=chunk_size):
                nbytes += len(chunk)
                if show_progress: pbar.update(nbytes)
                f.write(chunk)
        except requests.exceptions.ConnectionError as e:
            fname = url.split('/')[-1]
            data_dir = dest.parent
            print(f'\n Download of {url} has failed after {retries} retries\n'
                  f' Fix the download manually:\n'
                  f'$ mkdir -p {data_dir}\n'
                  f'$ cd {data_dir}\n'
                  f'$ wget -c {url}\n'
                  f'$ tar xf {fname}\n'
                  f' And re-run your code once the download is successful\n')
Exemplo n.º 24
0
def download_url(url: str,
                 dest: str,
                 overwrite: bool = False,
                 show_progress=True,
                 chunk_size=1024 * 1024,
                 timeout=4,
                 retries=5) -> None:
    "Download `url` to `dest` unless it exists and not `overwrite`."
    if os.path.exists(dest) and not overwrite: return

    s = requests.Session()
    s.mount('http://', requests.adapters.HTTPAdapter(max_retries=retries))
    u = s.get(url, stream=True, timeout=timeout)
    try:
        file_size = int(u.headers["Content-Length"])
    except:
        show_progress = False

    with open(dest, 'wb') as f:
        nbytes = 0
        if show_progress:
            pbar = progress_bar(range(file_size),
                                auto_update=False,
                                leave=False)
        try:
            for chunk in u.iter_content(chunk_size=chunk_size):
                nbytes += len(chunk)
                if show_progress: pbar.update(nbytes)
                f.write(chunk)
        except requests.exceptions.ConnectionError as e:
            print(f'Try downloading your file manually from {url}')
            import sys
            sys.exit(1)
Exemplo n.º 25
0
def validate_w_dropout(
        model: nn.Module,
        dl: DataLoader,
        loss_func: OptLossFunc = None,
        cb_handler: Optional[CallbackHandler] = None,
        pbar: Optional[PBar] = None,
        average=True,
        n_batch: Optional[int] = None
) -> Iterator[Tuple[Union[Tensor, int], ...]]:
    "Calculate `loss_func` of `model` on `dl` in evaluation mode."
    model.train()
    with torch.no_grad():
        val_losses, nums = [], []
        if cb_handler: cb_handler.set_dl(dl)
        for xb, yb in progress_bar(dl, parent=pbar, leave=(pbar is not None)):
            if cb_handler:
                xb, yb = cb_handler.on_batch_begin(xb, yb, train=False)
            val_loss = loss_batch(model,
                                  xb,
                                  yb,
                                  loss_func,
                                  cb_handler=cb_handler)
            val_losses.append(val_loss)
            if not is_listy(yb): yb = [yb]
            nums.append(yb[0].shape[0])
            if cb_handler and cb_handler.on_batch_end(val_losses[-1]): break
            if n_batch and (len(nums) >= n_batch): break
        nums = np.array(nums, dtype=np.float32)
        if average:
            return (to_np(torch.stack(val_losses)) * nums).sum() / nums.sum()
        else:
            return val_losses
    model.eval()
Exemplo n.º 26
0
def download_url(url:str, dest:str, overwrite:bool=False, pbar:ProgressBar=None,
                 show_progress=True, chunk_size=1024*1024, timeout=4, retries=5)->None:
    "Download `url` to `dest` unless it exists and not `overwrite`."
    if os.path.exists(dest) and not overwrite: return

    s = requests.Session()
    s.mount('http://',requests.adapters.HTTPAdapter(max_retries=retries))
    u = s.get(url, stream=True, timeout=timeout)
    try: file_size = int(u.headers["Content-Length"])
    except: show_progress = False

    with open(dest, 'wb') as f:
        nbytes = 0
        if show_progress: pbar = progress_bar(range(file_size), auto_update=False, leave=False, parent=pbar)
        try:
            for chunk in u.iter_content(chunk_size=chunk_size):
                nbytes += len(chunk)
                if show_progress: pbar.update(nbytes)
                f.write(chunk)
        except requests.exceptions.ConnectionError as e:
            fname = url.split('/')[-1]
            from fastai.datasets import Config
            data_dir = Config().data_path()
            timeout_txt =(f'\n Download of {url} has failed after {retries} retries\n'
                          f' Fix the download manually:\n'
                          f'$ mkdir -p {data_dir}\n'
                          f'$ cd {data_dir}\n'
                          f'$ wget -c {url}\n'
                          f'$ tar -zxvf {fname}\n\n'
                          f'And re-run your code once the download is successful\n')
            print(timeout_txt)
            import sys;sys.exit(1)
Exemplo n.º 27
0
    def validate_old(self):
        self.logger.info("Running evaluation")

        all_logits = None
        all_labels = None

        self.model.eval()
        eval_loss, eval_accuracy = 0, 0
        nb_eval_steps, nb_eval_examples = 0, 0

        validation_scores = {metric['name']: 0. for metric in self.metrics}
        validation_scores2 = {metric['name']: 0. for metric in self.metrics}

        for step, batch in enumerate(progress_bar(self.data.val_dl)):
            batch = tuple(t.to(self.device) for t in batch)
            input_ids, input_mask, segment_ids, label_ids = batch

            if self.is_fp16 and self.multi_label:
                label_ids = label_ids.half()

            with torch.no_grad():
                outputs = self.model(input_ids, segment_ids, input_mask,
                                     label_ids)
                tmp_eval_loss, logits = outputs[:2]
#                logits = self.model(input_ids, segment_ids, input_mask)

            tmp_eval_accuracy = self.metrics[0]['function'](logits, label_ids)
            if all_logits is None:
                all_logits = logits
            else:
                all_logits = torch.cat((all_logits, logits), 0)

            if all_labels is None:
                all_labels = label_ids
            else:
                all_labels = torch.cat((all_labels, label_ids), 0)

            eval_loss += tmp_eval_loss.mean().item()

            nb_eval_examples += input_ids.size(0)
            nb_eval_steps += 1

        eval_loss = eval_loss / nb_eval_steps

        # Evaluation metrics
        for metric in self.metrics:
            validation_scores[metric['name']] = metric['function'](all_logits,
                                                                   all_labels)

        result = {'eval_loss': eval_loss, 'metrics': validation_scores}

        self.logger.info("Eval results:")
        for key in sorted(result.keys()):
            self.logger.info("  %s = %s", key, str(result[key]))

        self.logger.info(
            "--------------------------------------------------------------------------------"
        )

        return result
    def _df_predict(self, dataframe):
        # from fastai.data_block import split_kwargs_by_func, grab_idx, DatasetType
        # ds_type = DatasetType.Valid
        # ds = self.learn.dl(ds_type).dataset
        #
        # current_databunch = self.learn.data
        # databunch_half, databunch_second_half = self._data._prepare_validation_databunch(dataframe)
        # return dataframe, databunch_half, databunch_second_half
        # try:
        #     self.learn.data = databunch
        #     preds = self.learn.get_preds(ds_type)[0]
        # except Exception as e:
        #     raise e
        # finally:
        #     self.learn.data = current_databunch
        #
        # analyze_kwargs, kwargs = split_kwargs_by_func({}, ds.y.analyze_pred)
        # preds = [ds.y.analyze_pred(grab_idx(preds, i), **analyze_kwargs) for i in range(len(preds))]
        # preds = [ds.y.reconstruct(z) for z in preds]
        if not HAS_NUMPY:
            raise Exception("This function requires numpy.")

        preds = []

        for i in progress_bar(range(len(dataframe))):
            prediction = self._predict(dataframe.iloc[i])[0].obj
            if isinstance(prediction, (list, np.ndarray)):
                prediction = prediction[0]
            preds.append(prediction)

        return preds
Exemplo n.º 29
0
def get_preds_cyclegan(learn,
                       test_path,
                       pred_path,
                       bs=4,
                       num_workers=4,
                       suffix='tif'):
    """
    A prediction function that takes the Learner object `learn` with the trained model, the `test_path` folder with the images to perform
    batch inference on, and the output folder `pred_path` where the predictions will be saved, with a batch size `bs`, `num_workers`,
    and suffix of the prediction images `suffix` (default='png').
    """

    assert os.path.exists(test_path)

    if not os.path.exists(pred_path):
        os.mkdir(pred_path)

    test_dl = load_dataset(test_path, bs, num_workers)
    model = learn.model.G_B.cuda()
    for i, xb in progress_bar(enumerate(test_dl), total=len(test_dl)):
        fn, im = xb
        preds = (model(im.cuda()) / 2 + 0.5)
        for i in range(len(fn)):
            new_fn = os.path.join(
                pred_path, '.'.join(
                    [os.path.basename(fn[i]).split('.')[0] + '_fakeB',
                     suffix]))
            torchvision.utils.save_image(preds[i], new_fn)
Exemplo n.º 30
0
def run_chains_parallel(chains, progressbar, to_run, params, random_seed,
                        kernel_kwargs, cores):
    pbar = progress_bar((), total=100, display=progressbar)
    pbar.update(0)
    pbars = [pbar] + [None] * (chains - 1)

    pool = mp.Pool(cores)

    # "manually" (de)serialize params before/after multiprocessing
    params = tuple(cloudpickle.dumps(p) for p in params)
    kernel_kwargs = {
        key: cloudpickle.dumps(value)
        for key, value in kernel_kwargs.items()
    }
    results = _starmap_with_kwargs(
        pool,
        to_run,
        [(*params, random_seed[chain], chain, pbars[chain])
         for chain in range(chains)],
        repeat(kernel_kwargs),
    )
    results = tuple(cloudpickle.loads(r) for r in results)
    pool.close()
    pool.join()
    return results