Пример #1
0
                                           (batch_i + 1))
            log_str += f"\n---- ETA {time_left}"

            print(log_str)

            model.seen += imgs.size(0)

        if epoch % opt.evaluation_interval == 0:

            print("\n---- Evaluating Model ----")
            # Evaluate the model on the validation set
            precision, recall, AP, f1, ap_class = evaluate(
                model,
                path=valid_path,
                iou_thres=0.5,
                conf_thres=0.001,
                nms_thres=0.5,
                img_size=opt.input_len,
                batch_size=8,
                dim=opt.dim,
            )

            evaluation_metrics = [
                ("val_precision", precision.mean()),
                ("val_recall", recall.mean()),
                ("val_mAP", AP.mean()),
                ("val_f1", f1.mean()),
            ]
            logger.list_of_scalars_summary(evaluation_metrics, epoch)

            # Print class APs and mAP
            ap_table = [["Index", "Class name", "AP"]]
Пример #2
0
            log_str += f"\n---- ETA {time_left}"

            print(log_str)

            model.seen += imgs.size(0)

        print('Epoch', epoch, opt.evaluation_interval)

        if epoch % opt.evaluation_interval == 0:
            print("\n---- Evaluating Model ----")
            # Evaluate the model on the validation set
            result = evaluate(
                model,
                path=valid_path,
                iou_thres=0.5,
                conf_thres=0.5,
                nms_thres=0.5,
                img_size=opt.img_size,
                batch_size=8,
            )
            if result:
                print('result', result)
                precision, recall, AP, f1, ap_class = result
                evaluation_metrics = [
                    ("val_precision", precision.mean()),
                    ("val_recall", recall.mean()),
                    ("val_mAP", AP.mean()),
                    ("val_f1", f1.mean()),
                ]
                logger.list_of_scalars_summary(evaluation_metrics, epoch)
Пример #3
0
            writer.add_scalar('loss', errD.data.cpu().numpy(), iters)
            train_info += ' loss: {:.4f}'.format(errD.data.cpu().numpy())

            print(train_info)

            # Output training stats
            if i % 50 == 0:
                print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
                    % (epoch, num_epochs, i, len(dataloader),
                        errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))

            # Save Losses for plotting later
            G_losses.append(errG.item())
            D_losses.append(errD.item())
           
        print(epoch%args.val_epoch)
        if epoch%args.val_epoch == 0:
            #print('evaluate')
            ''' evaluate the model '''
            acc = test.evaluate(model, dataloader_target, alpha)
            writer.add_scalar('val_acc', acc, iters)
            #print('Epoch: [{}] ACC:{}'.format(epoch, acc))

            ''' save best model '''
            if acc > best_acc:
                save_model(model, os.path.join(args.save_dir, 'model_best.pth.tar'))
                best_acc = acc

        ''' save model '''
        save_model(model, os.path.join(args.save_dir, 'model_{}.pth.tar'.format(epoch)))
Пример #4
0
            if step != 0:
                vis.add_scalar(loss_dict, epoch,
                               epoch * len(train_loader) + step)
            # Visualization
            res_dict = model.test(*data, epoch=epoch, save_every=save_every)
            # vis.add_images(model.get_visuals(), epoch, epoch * len(train_loader) + step, prefix='train')
            # Random sample test data
            idx = np.random.randint(len(val_loader.dataset))
            inputs = val_loader.dataset[idx:idx + 1]
            res_dict = model.test(*inputs, epoch=1, save_every=save_every)
            if step != 0:
                vis.add_scalar(res_dict, epoch,
                               epoch * len(train_loader) + step)
            # vis.add_images(model.get_visuals(), epoch, epoch * len(train_loader) + step, prefix='test')

    logger.print('Epoch {}/{}:{}'.format(epoch, opt.n_epochs - 1, mode))
    if epoch > 400:
        break

    # Evaluate on val set
    if opt.evaluate_every > 0 and (epoch + 1) % opt.evaluate_every == 0 and \
        opt.n_frames_output > 0:
        results = evaluate(val_opt, val_loader, model)
        vis.add_scalar(results, epoch)
        for metric in results.keys():
            logger.print('{}: {}'.format(metric, results[metric]))

    # Save model checkpoints
    if (epoch + 1
        ) % opt.save_every == 0 and epoch > 0 or epoch == opt.n_epochs - 1:
        model.save(opt.ckpt_path, epoch + 1)
Пример #5
0
def train():
    cfg = args.cfg
    data = args.data
    if len(args.image_size) == 2:
        image_size, image_size_val = args.image_size[0], args.image_size[1]
    else:
        image_size, image_size_val = args.image_size[0], args.image_size[0]

    epochs = args.epochs
    batch_size = args.batch_size
    accumulate = args.accumulate
    weights = args.weights

    # Initialize
    gs = 32  # (pixels) grid size
    assert math.fmod(image_size,
                     gs) == 0, f"--image-size must be a {gs}-multiple"

    init_seeds()
    image_size_min = 6.6  # 320 / 32 / 1.5
    image_size_max = 28.5  # 320 / 32 / 28.5
    if args.multi_scale:
        image_size_min = round(image_size / gs / 1.5) + 1
        image_size_max = round(image_size / gs * 1.5)
        image_size = image_size_max * gs  # initiate with maximum multi_scale size
        print(f"Using multi-scale {image_size_min * gs} - {image_size}")

    # Configure run
    dataset_dict = parse_data_config(data)
    train_path = dataset_dict["train"]
    valid_path = dataset_dict["valid"]
    num_classes = 1 if args.single_cls else int(dataset_dict["classes"])

    # Remove previous results
    for files in glob.glob("results.txt"):
        os.remove(files)

    # Initialize model
    model = Darknet(cfg).to(device)

    # Optimizer
    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
    for model_key, model_value in dict(model.named_parameters()).items():
        if ".bias" in model_key:
            pg2 += [model_value]  # biases
        elif "Conv2d.weight" in model_key:
            pg1 += [model_value]  # apply weight_decay
        else:
            pg0 += [model_value]  # all else

    optimizer = torch.optim.SGD(pg0,
                                lr=parameters["lr0"],
                                momentum=parameters["momentum"],
                                nesterov=True)
    optimizer.add_param_group({
        "params": pg1,
        # add pg1 with weight_decay
        "weight_decay": parameters["weight_decay"]
    })
    optimizer.add_param_group({"params": pg2})  # add pg2 with biases
    del pg0, pg1, pg2

    epoch = 0
    start_epoch = 0
    best_fitness = 0.0
    context = None
    if weights.endswith(".pth"):
        state = torch.load(weights, map_location=device)
        # load model
        try:
            state["state_dict"] = {
                k: v
                for k, v in state["state_dict"].items()
                if model.state_dict()[k].numel() == v.numel()
            }
            model.load_state_dict(state["state_dict"], strict=False)
        except KeyError as e:
            error_msg = f"{args.weights} is not compatible with {args.cfg}. "
            error_msg += f"Specify --weights `` or specify a --cfg "
            error_msg += f"compatible with {args.weights}. "
            raise KeyError(error_msg) from e

        # load optimizer
        if state["optimizer"] is not None:
            optimizer.load_state_dict(state["optimizer"])
            best_fitness = state["best_fitness"]

        # load results
        if state.get("training_results") is not None:
            with open("results.txt", "w") as file:
                file.write(state["training_results"])  # write results.txt

        start_epoch = state["epoch"] + 1
        del state

    elif len(weights) > 0:
        # possible weights are "*.weights", "yolov3-tiny.conv.15",  "darknet53.conv.74" etc.
        load_darknet_weights(model, weights)
    else:
        print("Pre training model weight not loaded.")

    # Mixed precision training https://github.com/NVIDIA/apex
    if mixed_precision:
        # skip print amp info
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O1",
                                          verbosity=0)
    # source https://arxiv.org/pdf/1812.01187.pdf
    lr_lambda = lambda x: ((
        (1 + math.cos(x * math.pi / epochs)) / 2)**1.0) * 0.95 + 0.05
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
                                                  lr_lambda=lr_lambda,
                                                  last_epoch=start_epoch - 1)

    # Initialize distributed training
    if device.type != "cpu" and torch.cuda.device_count(
    ) > 1 and torch.distributed.is_available():
        dist.init_process_group(
            backend="nccl",  # "distributed backend"
            # distributed training init method
            init_method="tcp://127.0.0.1:8888",
            # number of nodes for distributed training
            world_size=1,
            # distributed training node rank
            rank=0)
        model = torch.nn.parallel.DistributedDataParallel(model)
        model.yolo_layers = model.module.yolo_layers

    # Dataset
    # Apply augmentation hyperparameters (option: rectangular training)
    train_dataset = LoadImagesAndLabels(train_path,
                                        image_size,
                                        batch_size,
                                        augment=True,
                                        hyp=parameters,
                                        rect=args.rect,
                                        cache_images=args.cache_images,
                                        single_cls=args.single_cls)
    # No apply augmentation hyperparameters and rectangular inference
    valid_dataset = LoadImagesAndLabels(valid_path,
                                        image_size_val,
                                        batch_size,
                                        augment=False,
                                        hyp=parameters,
                                        rect=True,
                                        cache_images=args.cache_images,
                                        single_cls=args.single_cls)
    collate_fn = train_dataset.collate_fn
    # Dataloader
    train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   num_workers=args.workers,
                                                   shuffle=not args.rect,
                                                   pin_memory=True,
                                                   collate_fn=collate_fn)
    valid_dataloader = torch.utils.data.DataLoader(valid_dataset,
                                                   batch_size=batch_size,
                                                   num_workers=args.workers,
                                                   shuffle=False,
                                                   pin_memory=True,
                                                   collate_fn=collate_fn)

    # Model parameters
    model.nc = num_classes  # attach number of classes to model
    model.hyp = parameters  # attach hyperparameters to model
    model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
    # attach class weights
    model.class_weights = labels_to_class_weights(train_dataset.labels,
                                                  num_classes).to(device)

    # Model EMA
    ema = ModelEMA(model, decay=0.9998)

    # Start training
    batches_num = len(train_dataloader)  # number of batches
    burns = max(3 * batches_num,
                500)  # burn-in iterations, max(3 epochs, 500 iterations)
    maps = np.zeros(num_classes)  # mAP per class
    # "P", "R", "mAP", "F1", "val GIoU", "val Objectness", "val Classification"
    results = (0, 0, 0, 0, 0, 0, 0)
    print(f"Using {args.workers} dataloader workers.")
    print(f"Starting training for {args.epochs} epochs...")

    start_time = time.time()
    for epoch in range(start_epoch, args.epochs):
        model.train()

        # Update image weights (optional)
        if train_dataset.image_weights:
            # class weights
            class_weights = model.class_weights.cpu().numpy() * (1 - maps)**2
            image_weights = labels_to_image_weights(
                train_dataset.labels,
                num_classes=num_classes,
                class_weights=class_weights)
            # rand weighted index
            train_dataset.indices = random.choices(
                range(train_dataset.image_files_num),
                weights=image_weights,
                k=train_dataset.image_files_num)

        mean_losses = torch.zeros(4).to(device)
        print("\n")
        print(("%10s" * 8) % ("Epoch", "memory", "GIoU", "obj", "cls", "total",
                              "targets", " image_size"))
        progress_bar = tqdm(enumerate(train_dataloader), total=batches_num)
        for index, (images, targets, paths, _) in progress_bar:
            # number integrated batches (since train start)
            ni = index + batches_num * epoch
            # uint8 to float32, 0 - 255 to 0.0 - 1.0
            images = images.to(device).float() / 255.0
            targets = targets.to(device)

            # Hyperparameter Burn-in
            if ni <= burns * 2:
                # giou loss ratio (obj_loss = 1.0 or giou)
                model.gr = np.interp(ni, [0, burns * 2], [0.0, 1.0])

                for j, x in enumerate(optimizer.param_groups):
                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
                    x["lr"] = np.interp(ni, [0, burns], [
                        0.1 if j == 2 else 0.0,
                        x["initial_lr"] * lr_lambda(epoch)
                    ])
                    if "momentum" in x:
                        x["momentum"] = np.interp(
                            ni, [0, burns], [0.9, parameters["momentum"]])

            # Multi-Scale training
            if args.multi_scale:
                #  adjust img_size (67% - 150%) every 1 batch
                if ni / accumulate % 1 == 0:
                    image_size = random.randrange(image_size_min,
                                                  image_size_max + 1) * gs
                scale_ratio = image_size / max(images.shape[2:])
                if scale_ratio != 1:
                    # new shape (stretched to 32-multiple)
                    new_size = [
                        math.ceil(size * scale_ratio / gs) * gs
                        for size in images.shape[2:]
                    ]
                    images = F.interpolate(images,
                                           size=new_size,
                                           mode="bilinear",
                                           align_corners=False)

            # Run model
            output = model(images)

            # Compute loss
            loss, loss_items = compute_loss(output, targets, model)
            if not torch.isfinite(loss):
                warnings.warn(
                    f"WARNING: Non-finite loss, ending training {loss_items}")
                return results

            # Scale loss by nominal batch_size of (16 * 4 = 64)
            loss *= batch_size / (batch_size * accumulate)

            # Compute gradient
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # Optimize accumulated gradient
            if ni % accumulate == 0:
                optimizer.step()
                optimizer.zero_grad()
                ema.update(model)

            # Print batch results
            # update mean losses
            mean_losses = (mean_losses * index + loss_items) / (index + 1)
            memory = f"{torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0:.2f}G"
            context = ("%10s" * 2 + "%10.3g" * 6) % ("%g/%g" %
                                                     (epoch, args.epochs - 1),
                                                     memory, *mean_losses,
                                                     len(targets), image_size)
            progress_bar.set_description(context)

        # Update scheduler
        scheduler.step()

        # Process epoch results
        ema.update_attr(model)
        final_epoch = epoch + 1 == epochs
        if not args.notest or final_epoch:  # Calculate mAP
            coco = any([
                coco_name in data for coco_name in
                ["coco.data", "coco2014.data", "coco2017.data"]
            ]) and model.nc == 80
            results, maps = evaluate(cfg,
                                     data,
                                     batch_size=batch_size,
                                     image_size=image_size_val,
                                     model=ema.ema,
                                     save_json=final_epoch and coco,
                                     single_cls=args.single_cls,
                                     dataloader=valid_dataloader)

        # Write epoch results
        with open("results.txt", "a") as f:
            # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
            f.write(context + "%10.3g" * 7 % results)
            f.write("\n")

        # Write Tensorboard results
        if tb_writer:
            tags = [
                "train/giou_loss", "train/obj_loss", "train/cls_loss",
                "metrics/precision", "metrics/recall", "metrics/mAP_0.5",
                "metrics/F1", "val/giou_loss", "val/obj_loss", "val/cls_loss"
            ]
            for x, tag in zip(list(mean_losses[:-1]) + list(results), tags):
                tb_writer.add_scalar(tag, x, epoch)

        # Update best mAP
        # fitness_i = weighted combination of [P, R, mAP, F1]
        fitness_i = fitness(np.array(results).reshape(1, -1))
        if fitness_i > best_fitness:
            best_fitness = fitness_i

        # Save training results
        save = (not args.nosave) or (final_epoch and not args.evolve)
        if save:
            with open("results.txt", "r") as f:
                # Create checkpoint
                state = {
                    "epoch":
                    epoch,
                    "best_fitness":
                    best_fitness,
                    "training_results":
                    f.read(),
                    "state_dict":
                    ema.ema.module.state_dict()
                    if hasattr(model, "module") else ema.ema.state_dict(),
                    "optimizer":
                    None if final_epoch else optimizer.state_dict()
                }

        # Save last checkpoint
        torch.save(state, "weights/checkpoint.pth")

        # Save best checkpoint
        if (best_fitness == fitness_i) and not final_epoch:
            state = {
                "epoch": -1,
                "best_fitness": None,
                "training_results": None,
                "state_dict": model.state_dict(),
                "optimizer": None
            }
            torch.save(state, "weights/model_best.pth")

        # Delete checkpoint
        del state

    if not args.evolve:
        plot_results()  # save as results.png
    print(f"{epoch - start_epoch} epochs completed "
          f"in "
          f"{(time.time() - start_time) / 3600:.3f} hours.\n")
    dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
    torch.cuda.empty_cache()

    return results
Пример #6
0
def bayesian_opt(w, m, g, a, lcoor, lno, iou_thresh, iou_type, bayes_opt=True):

    iou_type = int(round(iou_type))
    if (iou_type) == 0:
        iou_type = (0, 0, 0)
    elif (iou_type == 1):
        iou_type = (1, 0, 0)
    elif (iou_type == 2):
        iou_type = (0, 1, 0)
    else:
        iou_type = (0, 0, 1)

    hyperparameters = {
        'lr': 0.0001,
        'epochs': 1,
        'resume_from': 0,
        'coco_version': '2017',  #can be either '2014' or '2017'
        'batch_size': 16,
        'weight_decay': w,
        'momentum': m,
        'optimizer': 'sgd',
        'alpha': a,
        'gamma': g,
        'lcoord': lcoor,
        'lno_obj': lno,
        'iou_type': iou_type,
        'iou_ignore_thresh': iou_thresh,
        'inf_confidence': 0.01,
        'inf_iou_threshold': 0.5,
        'wasserstein': False,
        'tfidf': True,
        'idf_weights': True,
        'tfidf_col_names': ['img_freq', 'none', 'none', 'none', 'no_softmax'],
        'augment': 1,
        'workers': 4,
        'pretrained': False,
        'path': 'yolo2017_semiprtnd',
        'reduction': 'sum'
    }

    mode = {
        'bayes_opt': bayes_opt,
        'multi_scale': False,
        'debugging': False,
        'show_output': False,
        'multi_gpu': True,
        'show_temp_summary': False,
        'save_summary': bayes_opt == False
    }

    #     print(hyperparameters)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    #     print('Using: ',device)

    model, optimizer, hyperparameters = init_model.init_model(hyperparameters,
                                                              mode,
                                                              show=False)

    if type(model) is nn.DataParallel:
        inp_dim = model.module.inp_dim
    else:
        inp_dim = model.inp_dim
    coco_version = hyperparameters['coco_version']

    if bayes_opt == True:
        tr_subset = 0.1
        ts_subset = 1
    else:
        tr_subset = 1
        ts_subset = 1

    if (mode['save_summary'] == True):
        writer = SummaryWriter('../results/' + hyperparameters['path'])

    if hyperparameters['augment'] > 0:
        train_dataset = Coco(partition='train',
                             coco_version=coco_version,
                             subset=tr_subset,
                             transform=transforms.Compose([
                                 Augment(hyperparameters['augment']),
                                 ResizeToTensor(inp_dim)
                             ]))
    else:
        train_dataset = Coco(partition='train',
                             coco_version=coco_version,
                             subset=subset,
                             transform=transforms.Compose(
                                 [ResizeToTensor(inp_dim)]))

    dataset_len = (len(train_dataset))
    batch_size = hyperparameters['batch_size']

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  collate_fn=helper.collate_fn,
                                  num_workers=hyperparameters['workers'])

    for i in range(hyperparameters['epochs']):
        outcome = yolo_function.train_one_epoch(model, optimizer,
                                                train_dataloader,
                                                hyperparameters, mode)

        if outcome['broken'] == 1:
            return 0
        else:
            mAP = test.evaluate(
                model,
                device,
                coco_version,
                confidence=hyperparameters['inf_confidence'],
                iou_threshold=hyperparameters['inf_iou_threshold'],
                subset=ts_subset)
        if (mode['save_summary'] == True):

            writer.add_scalar('Loss/train', outcome['avg_loss'],
                              hyperparameters['resume_from'])
            writer.add_scalar('AIoU/train', outcome['avg_iou'],
                              hyperparameters['resume_from'])
            writer.add_scalar('PConf/train', outcome['avg_conf'],
                              hyperparameters['resume_from'])
            writer.add_scalar('NConf/train', outcome['avg_no_conf'],
                              hyperparameters['resume_from'])
            writer.add_scalar('PClass/train', outcome['avg_pos'],
                              hyperparameters['resume_from'])
            writer.add_scalar('NClass/train', outcome['avg_neg'],
                              hyperparameters['resume_from'])
            writer.add_scalar('mAP/valid', mAP, hyperparameters['resume_from'])

            hyperparameters['resume_from'] = hyperparameters['resume_from'] + 1
        if (mode['bayes_opt'] == False):

            torch.save(
                {
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'avg_loss': outcome['avg_loss'],
                    'avg_iou': outcome['avg_iou'],
                    'avg_pos': outcome['avg_pos'],
                    'avg_neg': outcome['avg_neg'],
                    'avg_conf': outcome['avg_conf'],
                    'avg_no_conf': outcome['avg_no_conf'],
                    'epoch': hyperparameters['resume_from']
                }, PATH + hyperparameters['path'] + '.tar')

#             hyperparameters['resume_from']=checkpoint['epoch']+1

    return mAP
Пример #7
0
def main():
    make_deterministic()

    # region Prepare data
    with Timer('\nData preparation time: %s\n'):
        ru_lang = Language()
        en_lang = Language()

        yandex = Yandex(
            'datasets/yandex/corpus.en_ru.1m.ru',
            'datasets/yandex/corpus.en_ru.1m.en',
            ru_lang,
            en_lang,
            data_slice=H.dataset_slice,
        )

        paracrawl = ParaCrawl(
            'datasets/paracrawl/en-ru.txt',
            ru_lang,
            en_lang,
            data_slice=slice(0),
        )

        low = ru_lang.lower_than(H.ru_word_count_minimum)
        infrequent_words_n = max(
            ceil(ru_lang.words_n * H.infrequent_words_percent), len(low))
        if infrequent_words_n > 0:
            ru_lang.drop_words(ru_lang.lowk(infrequent_words_n))
            print(
                f'{infrequent_words_n:,} infrequent Russian words are dropped')

        low = en_lang.lower_than(H.en_word_count_minimum)
        if len(low) > 0:
            en_lang.drop_words(*low)
            print(f'{len(low):,} infrequent English words are dropped')

        print(
            f'Russian language: {ru_lang.words_n:,} words, {ru_lang.sentence_length:,} words in a sentence'
        )
        print(
            f'English language: {en_lang.words_n:,} words, {en_lang.sentence_length:,} words in a sentence'
        )

        batch = H.batch_size
        dataset = ConcatDataset((yandex, paracrawl))
        loader = DataLoader(dataset, batch, shuffle=True)
    # endregion

    # region Models and optimizers
    model = Seq2Seq(
        Encoder(ru_lang.words_n, H.encoder_embed_dim, H.encoder_hidden_dim,
                H.encoder_bi, H.decoder_hd),
        Attention(H.encoder_hd, H.decoder_hd),
        Decoder(en_lang.words_n, H.decoder_embed_dim, H.decoder_hidden_dim,
                H.encoder_hd),
    ).to(Device).train()

    optimizer = Adam(model.parameters(), lr=H.learning_rate)
    criterion = CrossEntropyLoss(ignore_index=Token_PAD, reduction='sum')
    # endregion

    # region Training
    teaching_percent = H.teaching_percent
    total = len(dataset)
    log_interval = max(5, round(total / batch / 1000))

    for epoch in range(1, H.epochs + 1):
        with Printer() as printer:
            printer.print(f'Train epoch {epoch}: starting...')
            for i, ((ru, ru_l), en_sos, en_eos) in enumerate(loader, 1):
                # Zero the parameter gradients
                optimizer.zero_grad()
                # Run data through model
                predictions = model(ru, ru_l, en_sos, teaching_percent)
                # Calculate loss
                loss = criterion(predictions, en_eos)
                # Back propagate and perform optimization
                loss.backward()
                clip_grad_norm_(model.parameters(), H.gradient_norm_clip)
                optimizer.step()

                # Print log
                if i % log_interval == 0:
                    printer.print(
                        f'Train epoch {epoch}: {i * batch / total:.1%} [{i * batch:,}/{total:,}]'
                    )

            printer.print(f'Train epoch {epoch}: completed')
    # endregion

    torch.save(
        (
            ru_lang.__getnewargs__(),
            en_lang.__getnewargs__(),
            model.cpu().eval().data,
        ),
        'data/data.pt',
    )

    evaluate(model.to(Device), ru_lang, en_lang,
             'datasets/yandex/corpus.en_ru.1m.ru',
             slice(H.dataset_slice.stop + 1, H.dataset_slice.stop + 1 + 100))
Пример #8
0
                                                wer_results=wer_results, cer_results=cer_results, avg_loss=avg_loss),
                           file_path)
            del loss, out, float_out

        avg_loss /= len(train_sampler)

        epoch_time = time.time() - start_epoch_time
        print('Training Summary Epoch: [{0}]\t'
              'Time taken (s): {epoch_time:.0f}\t'
              'Average Loss {loss:.3f}\t'.format(epoch + 1, epoch_time=epoch_time, loss=avg_loss))

        start_iter = 0  # Reset start iteration for next epoch
        with torch.no_grad():
            wer, cer, output_data = evaluate(test_loader=test_loader,
                                             device=device,
                                             model=model,
                                             decoder=decoder,
                                             target_decoder=decoder)
        loss_results[epoch] = avg_loss
        wer_results[epoch] = wer
        cer_results[epoch] = cer
        print('Validation Summary Epoch: [{0}]\t'
              'Average WER {wer:.3f}\t'
              'Average CER {cer:.3f}\t'.format(
            epoch + 1, wer=wer, cer=cer))

        values = {
            'loss_results': loss_results,
            'cer_results': cer_results,
            'wer_results': wer_results
        }
            source_loader,
            0):  # source_data: (128,3,28,28), source_data_label: (128,1)
        train_info = 'Epoch: [{0}][{1}/{2}]'.format(epoch, i + 1,
                                                    len(source_loader))
        source_data, source_data_label = source_data.to(
            device), source_data_label.to(device)

        optimizer.zero_grad()
        cls_output = model(source_data)

        label_loss = class_criterion(cls_output, source_data_label.squeeze())

        label_loss.backward()
        optimizer.step()

        train_info += ' loss: {:.4f}'.format(label_loss.data.cpu().numpy())
        if i % 50 == 0:
            print(train_info)

    if (epoch + 1) % 1 == 0:
        print("testing.... ")
        acc = evaluate(model, target_loader, False)
        print("acc: ", acc)
        print("best acc so far... ", best_acc)
        if acc > best_acc:
            best_acc = acc
            print("This is the best model!!!")
            save_model(model, os.path.join(save_dir, 'model_best.pth.tar'))

    save_model(model, os.path.join(save_dir, 'model_{}.pth.tar'.format(epoch)))
#import pdb;pdb.set_trace();
encoder = EncoderRNN(N_word, hidden_size).to(device)
decoder = AttnDecoderRNN(hidden_size, CLASS_size, dropout_p=0.1, max_length=max_length).to(device)
n_iterations = train_df.shape[0]
#trainIters(encoder, decoder, n_iterations, print_every=50, plot_every=10)
import pdb;pdb.set_trace();
trainIters(encoder, decoder, 1, print_every=50, plot_every=10)
sentence = train_df.iloc[0]["description"]
sentence = normalizeString(sentence)
input_tensor = embeddedTensorFromSentence(sentence,device,word_emb,N_word)
target_class = train_df.iloc[0]["department_new"]
class_index = []
target_index = class_dict[target_class]
print(target_index)
#y_true.append(target_index)
output, attention = evaluate(encoder, decoder, input_tensor,max_length,device)
#import pdb;pdb.set_trace();
topv, topi = output.topk(1)
#import pdb;pdb.set_trace();
#torch.save(encoder.state_dict(), "encoder")
#torch.save(decoder.state_dict(), "decoder")
#encoder = torch.load("encoder")
#decoder = torch.load("decoder")
#desc1 = full_table.iloc[0]["description"]
#dep1 = full_table.iloc[0]["department"]
#input_tensor = embeddedTensorFromSentence(desc1,device,word_emb,N_word)
#print(classes_)
#evaluateTest(encoder,decoder)
#import pdb;pdb.set_trace();
#output, attention = evaluate(encoder, decoder, input_tensor,max_length,device)
#showAttention(desc2, attention)
Пример #11
0
def main(config):

    '''===== Generator ====='''
    gen_logger = config.get_logger('generator')

    # setup data_loader instances
    data_loader = config.initialize('data_loader', module_data, 'generator')
    valid_data_loader = data_loader.split_validation()

    # build model architecture, then print to console
    model = config.initialize('arch', model_arch, 'generator')
    gen_logger.info(model)

    # get function handles of loss and metrics
    loss_fn = getattr(module_loss, config['generator']['loss'])
    metric_fns = [getattr(module_metric, met) for met in config['generator']['metrics']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.initialize('optimizer', torch.optim, 'generator', trainable_params)

    lr_scheduler = config.initialize('lr_scheduler', torch.optim.lr_scheduler, 'generator', optimizer)

    generator = {
        'logger': gen_logger,
        'data_loader': data_loader,
        'valid_data_loader': valid_data_loader,
        'model': model,
        'loss_fn': loss_fn,
        'metric_fns': metric_fns,
        'optimizer': optimizer,
        'lr_scheduler': lr_scheduler
    }

    '''===== Discriminator ====='''
    dis_logger = config.get_logger('discriminator')

    # setup data_loader instances
    data_loader = config.initialize('data_loader', module_data, 'discriminator')
    valid_data_loader = data_loader.split_validation()

    # build model architecture, then print to console
    model = config.initialize('arch', model_arch, 'discriminator')
    dis_logger.info(model)

    # get function handles of loss and metrics
    loss_fn = getattr(module_loss, config['discriminator']['loss'])
    metric_fns = [getattr(module_metric, met) for met in config['discriminator']['metrics']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.initialize('optimizer', torch.optim, 'discriminator', trainable_params)

    lr_scheduler = config.initialize('lr_scheduler', torch.optim.lr_scheduler, 'discriminator', optimizer)

    discriminator = {
        'logger': dis_logger,
        'data_loader': data_loader,
        'valid_data_loader': valid_data_loader,
        'model': model,
        'loss_fn': loss_fn,
        'metric_fns': metric_fns,
        'optimizer': optimizer,
        'lr_scheduler': lr_scheduler
    }

    '''===== Training ====='''

    trainer = Trainer(generator, discriminator, config)

    # trainer = Trainer(model, loss_fn, metric_fns, optimizer,
    #                   config=config,
    #                   data_loader=data_loader,
    #                   valid_data_loader=valid_data_loader,
    #                   lr_scheduler=lr_scheduler)

    trainer.train()

    log = evaluate(model, metric_fns, data_loader, loss_fn)

    '''===== Testing ====='''

    logger.info('< Evaluation >')
    for key, value in log.items():
        logger.info('    {:15s}: {}'.format(str(key), value))
Пример #12
0
def train(cfg):
    # Initialize
    init_seeds()
    image_size_min = 6.6  # 320 / 32 / 1.5
    image_size_max = 28.5  # 320 / 32 / 28.5
    if cfg.TRAIN.MULTI_SCALE:
        image_size_min = round(cfg.TRAIN.IMAGE_SIZE / 32 / 1.5)
        image_size_max = round(cfg.TRAIN.IMAGE_SIZE / 32 * 1.5)
        image_size = image_size_max * 32  # initiate with maximum multi_scale size
        print(f"Using multi-scale {image_size_min * 32} - {image_size}")

    # Remove previous results
    for files in glob.glob("results.txt"):
        os.remove(files)

    # Initialize model
    model = YOLOv3(cfg).to(device)

    # Optimizer
    optimizer = optim.SGD(model.parameters(),
                          lr=cfg.TRAIN.LR,
                          momentum=cfg.TRAIN.MOMENTUM,
                          weight_decay=cfg.TRAIN.DECAY,
                          nesterov=True)

    # Define the loss function calculation formula of the model
    compute_loss = YoloV3Loss(cfg)

    epoch = 0
    start_epoch = 0
    best_maps = 0.0
    context = None

    # Dataset
    # Apply augmentation hyperparameters
    train_dataset = VocDataset(anno_file_type=cfg.TRAIN.DATASET,
                               image_size=cfg.TRAIN.IMAGE_SIZE,
                               cfg=cfg)
    # Dataloader
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=cfg.TRAIN.MINI_BATCH_SIZE,
                                  num_workers=cfg.TRAIN.WORKERS,
                                  shuffle=cfg.TRAIN.SHUFFLE,
                                  pin_memory=cfg.TRAIN.PIN_MENORY)

    if cfg.TRAIN.WEIGHTS.endswith(".pth"):
        state = torch.load(cfg.TRAIN.WEIGHTS, map_location=device)
        # load model
        try:
            state["state_dict"] = {
                k: v
                for k, v in state["state_dict"].items()
                if model.state_dict()[k].numel() == v.numel()
            }
            model.load_state_dict(state["state_dict"], strict=False)
        except KeyError as e:
            error_msg = f"{cfg.TRAIN.WEIGHTS} is not compatible with {cfg.CONFIG_FILE}. "
            error_msg += f"Specify --weights `` or specify a --config-file "
            error_msg += f"compatible with {cfg.TRAIN.WEIGHTS}. "
            raise KeyError(error_msg) from e

        # load optimizer
        if state["optimizer"] is not None:
            optimizer.load_state_dict(state["optimizer"])
            best_maps = state["best_maps"]

        # load results
        if state.get("training_results") is not None:
            with open("results.txt", "w") as file:
                file.write(state["training_results"])  # write results.txt

        start_epoch = state["batches"] + 1 // len(train_dataloader)
        del state

    elif len(cfg.TRAIN.WEIGHTS) > 0:
        # possible weights are "*.weights", "yolov3-tiny.conv.15",  "darknet53.conv.74" etc.
        load_darknet_weights(model, cfg.TRAIN.WEIGHTS)
    else:
        print("Pre training model weight not loaded.")

    # Mixed precision training https://github.com/NVIDIA/apex
    if mixed_precision:
        # skip print amp info
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O1",
                                          verbosity=0)

    # source https://arxiv.org/pdf/1812.01187.pdf
    scheduler = CosineDecayLR(optimizer,
                              max_batches=cfg.TRAIN.MAX_BATCHES,
                              lr=cfg.TRAIN.LR,
                              warmup=cfg.TRAIN.WARMUP_BATCHES)

    # Initialize distributed training
    if device.type != "cpu" and torch.cuda.device_count(
    ) > 1 and torch.distributed.is_available():
        dist.init_process_group(
            backend="nccl",  # "distributed backend"
            # distributed training init method
            init_method="tcp://127.0.0.1:9999",
            # number of nodes for distributed training
            world_size=1,
            # distributed training node rank
            rank=0)
        model = torch.nn.parallel.DistributedDataParallel(model)
        model.backbone = model.module.backbone

    # Model EMA
    # TODO: ema = ModelEMA(model, decay=0.9998)

    # Start training
    batches_num = len(train_dataloader)  # number of batches
    # 'loss_GIOU', 'loss_Confidence', 'loss_Classification' 'loss'
    results = (0, 0, 0, 0)
    epochs = cfg.TRAIN.MAX_BATCHES // len(train_dataloader)
    print(f"Using {cfg.TRAIN.WORKERS} dataloader workers.")
    print(
        f"Starting training {cfg.TRAIN.MAX_BATCHES} batches for {epochs} epochs..."
    )

    start_time = time.time()
    for epoch in range(start_epoch, epochs):
        model.train()

        # init batches
        batches = 0
        mean_losses = torch.zeros(4)
        print("\n")
        print(
            ("%10s" * 7) %
            ("Batch", "memory", "GIoU", "conf", "cls", "total", " image_size"))
        progress_bar = tqdm(enumerate(train_dataloader), total=batches_num)
        for index, (images, small_label_bbox, medium_label_bbox,
                    large_label_bbox, small_bbox, medium_bbox,
                    large_bbox) in progress_bar:

            # number integrated batches (since train start)
            batches = index + len(train_dataloader) * epoch

            scheduler.step(batches)

            images = images.to(device)

            small_label_bbox = small_label_bbox.to(device)
            medium_label_bbox = medium_label_bbox.to(device)
            large_label_bbox = large_label_bbox.to(device)

            small_bbox = small_bbox.to(device)
            medium_bbox = medium_bbox.to(device)
            large_bbox = large_bbox.to(device)

            # Hyper parameter Burn-in
            if batches <= cfg.TRAIN.WARMUP_BATCHES:
                for m in model.named_modules():
                    if m[0].endswith('BatchNorm2d'):
                        m[1].track_running_stats = batches == cfg.TRAIN.WARMUP_BATCHES

            # Run model
            pred, raw = model(images)

            # Compute loss
            loss, loss_giou, loss_conf, loss_cls = compute_loss(
                pred, raw, small_label_bbox, medium_label_bbox,
                large_label_bbox, small_bbox, medium_bbox, large_bbox)

            # Compute gradient
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # Optimize accumulated gradient
            if batches % cfg.TRAIN.BATCH_SIZE // cfg.TRAIN.MINI_BATCH_SIZE == 0:
                optimizer.step()
                optimizer.zero_grad()
                # TODO: ema.update(model)

            # Print batch results
            # update mean losses
            loss_items = torch.tensor([loss_giou, loss_conf, loss_cls, loss])
            mean_losses = (mean_losses * index + loss_items) / (index + 1)
            memory = f"{torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0:.2f}G"
            context = ("%10s" * 2 + "%10.3g" * 5) % (
                "%g/%g" % (batches + 1, cfg.TRAIN.MAX_BATCHES), memory,
                *mean_losses, train_dataset.image_size)
            progress_bar.set_description(context)

            # Multi-Scale training
            if cfg.TRAIN.MULTI_SCALE:
                #  adjust img_size (67% - 150%) every 10 batch size
                if batches % cfg.TRAIN.RESIZE_INTERVAL == 0:
                    train_dataset.image_size = random.randrange(
                        image_size_min, image_size_max + 1) * 32

            # Write Tensorboard results
            if tb_writer:
                # 'loss_GIOU', 'loss_Confidence', 'loss_Classification' 'loss'
                titles = ["GIoU", "Confidence", "Classification", "Train loss"]
                for xi, title in zip(
                        list(mean_losses) + list(results), titles):
                    tb_writer.add_scalar(title, xi, index)

        # Process epoch results
        # TODO: ema.update_attr(model)
        final_epoch = epoch + 1 == epochs

        # Calculate mAP
        # skip first epoch
        maps = 0.
        if epoch > 0:
            maps = evaluate(cfg, args)

        # Write epoch results
        with open("results.txt", "a") as f:
            # 'loss_GIOU', 'loss_Confidence', 'loss_Classification' 'loss', 'maps'
            f.write(context + "%10.3g" * 1 % maps)
            f.write("\n")

        # Update best mAP
        if maps > best_maps:
            best_maps = maps

        # Save training results
        with open("results.txt", 'r') as f:
            # Create checkpoint
            state = {
                'batches': batches,
                'best_maps': maps,
                'training_results': f.read(),
                'state_dict': model.state_dict(),
                'optimizer': None if final_epoch else optimizer.state_dict()
            }

        # Save last checkpoint
        torch.save(state, "weights/checkpoint.pth")

        # Save best checkpoint
        if best_maps == maps:
            state = {
                'batches': -1,
                'best_maps': None,
                'training_results': None,
                'state_dict': model.state_dict(),
                'optimizer': None
            }
            torch.save(state, "weights/model_best.pth")

        # Delete checkpoint
        del state

    print(f"{epoch - start_epoch} epochs completed "
          f"in {(time.time() - start_time) / 3600:.3f} hours.\n")
    dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
    torch.cuda.empty_cache()
Пример #13
0
def main(log_dir, model_path, augmentation, dataset, batch_size, learning_rate,
         num_workers, restore_dir, lr_value, lr_steps):
    arguments = copy.deepcopy(locals())

    os.mkdir(log_dir)
    shutil.copy2(__file__, os.path.join(log_dir, "script.py"))
    shutil.copy2(model_path, os.path.join(log_dir, "model.py"))

    logger = logging.getLogger("train")
    logger.setLevel(logging.DEBUG)
    logger.handlers = []
    ch = logging.StreamHandler()
    logger.addHandler(ch)
    fh = logging.FileHandler(os.path.join(log_dir, "log.txt"))
    logger.addHandler(fh)

    logger.info("%s", repr(arguments))

    torch.backends.cudnn.benchmark = True

    device = torch.device("cuda:0")

    # Load the model
    loader = importlib.machinery.SourceFileLoader(
        'model', os.path.join(log_dir, "model.py"))
    mod = types.ModuleType(loader.name)
    loader.exec_module(mod)

    model = mod.Model(55)
    model = torch.nn.DataParallel(model)
    model.to(device)

    if restore_dir is not None:
        model.load_state_dict(
            torch.load(os.path.join(restore_dir, "state.pkl")))

    logger.info("{} paramerters in total".format(
        sum(x.numel() for x in model.parameters())))

    # Load the dataset
    # Increasing `repeat` will generate more cached files
    cache = CacheNPY("v64d",
                     transform=Obj2Voxel(64, double=True, rotate=True),
                     repeat=augmentation)

    def transform(x):
        x = cache(x)
        x = torch.from_numpy(x.astype(np.float32)).unsqueeze(0) / 8
        x = low_pass_filter(x, 2)
        return x

    def target_transform(x):
        classes = [
            '02691156', '02747177', '02773838', '02801938', '02808440',
            '02818832', '02828884', '02843684', '02871439', '02876657',
            '02880940', '02924116', '02933112', '02942699', '02946921',
            '02954340', '02958343', '02992529', '03001627', '03046257',
            '03085013', '03207941', '03211117', '03261776', '03325088',
            '03337140', '03467517', '03513137', '03593526', '03624134',
            '03636649', '03642806', '03691459', '03710193', '03759954',
            '03761084', '03790512', '03797390', '03928116', '03938244',
            '03948459', '03991062', '04004475', '04074963', '04090263',
            '04099429', '04225987', '04256520', '04330267', '04379243',
            '04401088', '04460130', '04468005', '04530566', '04554684'
        ]
        return classes.index(x[0])

    train_set = Shrec17("shrec17_data",
                        dataset,
                        perturbed=True,
                        download=True,
                        transform=transform,
                        target_transform=target_transform)

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=batch_size,
                                               sampler=EqSampler(train_set),
                                               num_workers=num_workers,
                                               pin_memory=True,
                                               drop_last=True)

    optimizer = torch.optim.Adam(model.parameters(), lr=0)

    def train_step(data, target):
        model.train()
        data, target = data.to(device), target.to(device)

        prediction = model(data)
        loss = F.cross_entropy(prediction, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        correct = prediction.argmax(1).eq(target).long().sum().item()

        return loss.item(), correct

    def get_learning_rate(epoch):
        assert len(lr_value) == len(lr_steps) + 1
        for lim, lr in zip(lr_steps, lr_value):
            if epoch < lim:
                return lr * learning_rate
        return lr_value[-1] * learning_rate

    dynamics = []
    epoch = 0

    if restore_dir is not None:
        dynamics = torch.load(os.path.join(restore_dir, "dynamics.pkl"))
        epoch = dynamics[-1]['epoch'] + 1

    score = best_score = 0

    for epoch in range(epoch, 2000):

        lr = get_learning_rate(epoch)
        logger.info("learning rate = {} and batch size = {}".format(
            lr, train_loader.batch_size))
        for p in optimizer.param_groups:
            p['lr'] = lr

        total_loss = 0
        total_correct = 0
        time_before_load = time.perf_counter()
        for batch_idx, (data, target) in enumerate(train_loader):
            time_after_load = time.perf_counter()
            time_before_step = time.perf_counter()
            loss, correct = train_step(data, target)

            total_loss += loss
            total_correct += correct

            avg_loss = total_loss / (batch_idx + 1)
            avg_correct = total_correct / len(data) / (batch_idx + 1)

            logger.info(
                "[{}:{}/{}] LOSS={:.2} <LOSS>={:.2} ACC={:.2} <ACC>={:.2} time={:.2}+{:.2}"
                .format(epoch, batch_idx, len(train_loader), loss, avg_loss,
                        correct / len(data), avg_correct,
                        time_after_load - time_before_load,
                        time.perf_counter() - time_before_step))
            time_before_load = time.perf_counter()

            dynamics.append({
                'epoch': epoch,
                'batch_idx': batch_idx,
                'step': epoch * len(train_loader) + batch_idx,
                'learning_rate': lr,
                'batch_size': len(data),
                'loss': loss,
                'correct': correct,
                'avg_loss': avg_loss,
                'avg_correct': avg_correct,
                'best_score': best_score,
                'score': score,
            })

        torch.save(model.state_dict(), os.path.join(log_dir, "state.pkl"))
        torch.save(dynamics, os.path.join(log_dir, "dynamics.pkl"))

        if epoch % 100 == 0:
            micro, macro = evaluate(log_dir, 1, "val", 20, 1, "state.pkl")
            score = micro["mAP"] + macro["mAP"]
            logger.info("Score={} Best={}".format(score, best_score))
            if score > best_score:
                best_score = score
                torch.save(model.state_dict(),
                           os.path.join(log_dir, "best_state.pkl"))
Пример #14
0
                                           (time.time() - start_time) /
                                           (batch_i + 1))
            log_str += f"\n---- ETA {time_left}"

            print(log_str)

            model.seen += imgs.size(0)

        if epoch % opt.evaluation_interval == 0:
            print("\n---- Evaluating Model ----")
            # Evaluate the model on the validation set
            precision, recall, AP, f1, ap_class = evaluate(
                model,
                path=valid_path,
                iou_thres=0.5,
                conf_thres=0.5,
                nms_thres=0.5,
                img_size=opt.img_size,
                batch_size=8,
            )
            evaluation_metrics = [
                ("val_precision", precision.mean()),
                ("val_recall", recall.mean()),
                ("val_mAP", AP.mean()),
                ("val_f1", f1.mean()),
            ]
            logger.list_of_scalars_summary(evaluation_metrics, epoch)

            # Print class APs and mAP
            ap_table = [["Index", "Class name", "AP"]]
            for i, c in enumerate(ap_class):
Пример #15
0
        if epoch % config['checkpoint_interval'] == 0:
            torch.save(
                model.state_dict(), config['checkpoint_path'] +
                f"yolov3_%s_%d.pth" % (config['type'], epoch))
            # torch.save(model.state_dict(),
            #     f"checkpoints/yolov3_ckpt_%d.pth" % epoch)

        if epoch % config['evaluation_interval'] == 0:
            print("\n---- Evaluating Model ----")
            # Evaluate the model on the validation set
            precision, recall, AP, f1, ap_class, landm = evaluate(
                model,
                path=valid_path,
                iou_thres=0.5,
                conf_thres=0.5,
                nms_thres=0.5,
                img_size=config['img_size'],
                batch_size=config['vbatch_size'],
                type=config['type'],
            )
            evaluation_metrics = [
                ("val_precision", precision.mean()),
                ("val_recall", recall.mean()),
                ("val_mAP", AP.mean()),
                ("val_f1", f1.mean()),
            ]
            if model.type in landm_set:
                evaluation_metrics.append(("landm", landm.mean()))
            logger.list_of_scalars_summary(evaluation_metrics, epoch)
            val_acc.append(evaluation_metrics)
            with open(config['val_metrics'].format(config['type']), 'w') as f:
Пример #16
0
            #
            # if opt.verbose: print(log_str)

            model.module.seen += imgs.size(0)
            # if batch_i > 30: break

        scheduler.step()

        if epoch % opt.evaluation_interval == 0:
            print("\n---- Evaluating Model ----")
            # Evaluate the model on the validation set
            metrics_output = evaluate(
                model,
                Loss,
                path=valid_path,
                iou_thres=0.5,
                conf_thres=0.5,
                nms_thres=0.5,
                img_size=opt.img_size,
                batch_size=20,
            )

            if metrics_output is not None:
                precision, recall, AP, f1, ap_class = metrics_output
                evaluation_metrics = [
                    ("validation/precision", precision.mean()),
                    ("validation/recall", recall.mean()),
                    ("validation/mAP", AP.mean()),
                    ("validation/f1", f1.mean()),
                ]
                logger.list_of_scalars_summary(evaluation_metrics, epoch)
Пример #17
0
ev_iterations = 100

n_units = 40

print(' ')
print('number of hidden units: ' + str(n_units))
print(' ')

test_data_stack = []
test_hh_stack = []
for i in range(ev_iterations):
    import test

    # Evaluate on test dataset
    test_data = test.generate_test_dataset()
    test_perp, test_hh, test_y_bin_error_sum = test.evaluate(test_data,
                                                             test=True)

    test_data_stack.extend(test_data['coordinates'])
    test_hh_stack.extend(test_hh)

input_data = test_hh_stack
output_data = []
for i in range(len(test_data_stack)):
    output_data.append(test_data_stack[i][0] + test_data_stack[i][1] * 9)

print('')

# data allocation
X_train, X_test, y_train, y_test = train_test_split(input_data, output_data)

# parameters
Пример #18
0
        b_size = target_data.size()[0]
        domain_label = torch.full((b_size, ),
                                  1,
                                  dtype=torch.long,
                                  device=device)  # all 1
        _, dom_output = model(target_data, alpha)
        target_domain_loss = domain_criterion(dom_output, domain_label)

        loss = label_loss + source_domain_loss + target_domain_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        iters += 1

        train_info += ' loss: {:.4f}'.format(label_loss.data.cpu().numpy())
        if i % 50 == 0:
            print(train_info)

    if (epoch + 1) % 1 == 0:
        print("testing.... ")
        acc = evaluate(model, test_loader, 0, False)
        print("acc: ", acc)
        print("best acc so far... ", best_acc)
        if acc > best_acc:
            best_acc = acc
            print("This is the best model!!!")
            save_model(model, os.path.join(save_dir, 'model_best.pth.tar'))

    save_model(model, os.path.join(save_dir, 'model_{}.pth.tar'.format(epoch)))
Пример #19
0

#%%
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Darknet(opt.model_def).to(device)
model.load_state_dict(torch.load(opt.model))  #加载模型

# 解析config文件
data_config = parse_data_config(opt.data_config)
valid_path = data_config["valid"]  #获取验证集路径
class_names = load_classes(data_config["names"])  #加载类别对应名字

eval_model = lambda model: evaluate(model,
                                    path=valid_path,
                                    iou_thres=0.5,
                                    conf_thres=0.01,
                                    nms_thres=0.5,
                                    img_size=model.img_size,
                                    batch_size=8)
obtain_num_parameters = lambda model: sum(
    [param.nelement() for param in model.parameters()])

origin_model_metric = eval_model(model)  #稀疏化训练的模型的评价指标(还没有剪枝)
origin_nparameters = obtain_num_parameters(model)  #稀疏化训练的模型的参数

# 返回CBL组件的id,单独的Conv层的id,以及需要被剪枝的层的id
CBL_idx, Conv_idx, prune_idx = parse_module_defs(model.module_defs)

# 获取CBL组件的BN层的权重,即Gamma参数,我们会根据这个参数来剪枝
bn_weights = gather_bn_weights(model.module_list, prune_idx)
Пример #20
0
def train(args):
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    log_dir = os.path.expanduser(args.log_dir)
    utils.cleanup_log_dir(log_dir)

    torch.set_num_threads(1)
    device = torch.device("cuda:0" if args.cuda else "cpu")

    log_file = '-{}-{}-reproduce-s{}'.format(args.run_name, args.env_name,
                                             args.seed)
    logger.configure(dir=args.log_dir,
                     format_strs=['csv', 'stdout'],
                     log_suffix=log_file)

    venv = ProcgenEnv(num_envs=args.num_processes, env_name=args.env_name, \
        num_levels=args.num_levels, start_level=args.start_level, \
        distribution_mode=args.distribution_mode)
    venv = VecExtractDictObs(venv, "rgb")
    venv = VecMonitor(venv=venv, filename=None, keep_buf=100)
    venv = VecNormalize(venv=venv, ob=False)
    envs = VecPyTorchProcgen(venv, device)

    obs_shape = envs.observation_space.shape
    actor_critic = Policy(obs_shape,
                          envs.action_space.n,
                          base_kwargs={
                              'recurrent': False,
                              'hidden_size': args.hidden_size
                          })
    actor_critic.to(device)

    if modelbased:
        rollouts = BiggerRolloutStorage(
            args.num_steps,
            args.num_processes,
            envs.observation_space.shape,
            envs.action_space,
            actor_critic.recurrent_hidden_state_size,
            aug_type=args.aug_type,
            split_ratio=args.split_ratio)
    else:
        rollouts = RolloutStorage(args.num_steps,
                                  args.num_processes,
                                  envs.observation_space.shape,
                                  envs.action_space,
                                  actor_critic.recurrent_hidden_state_size,
                                  aug_type=args.aug_type,
                                  split_ratio=args.split_ratio)

    batch_size = int(args.num_processes * args.num_steps / args.num_mini_batch)

    if args.use_ucb:
        aug_id = data_augs.Identity
        aug_list = [
            aug_to_func[t](batch_size=batch_size)
            for t in list(aug_to_func.keys())
        ]

        agent = algo.UCBDrAC(actor_critic,
                             args.clip_param,
                             args.ppo_epoch,
                             args.num_mini_batch,
                             args.value_loss_coef,
                             args.entropy_coef,
                             lr=args.lr,
                             eps=args.eps,
                             max_grad_norm=args.max_grad_norm,
                             aug_list=aug_list,
                             aug_id=aug_id,
                             aug_coef=args.aug_coef,
                             num_aug_types=len(list(aug_to_func.keys())),
                             ucb_exploration_coef=args.ucb_exploration_coef,
                             ucb_window_length=args.ucb_window_length)

    elif args.use_meta_learning:
        aug_id = data_augs.Identity
        aug_list = [aug_to_func[t](batch_size=batch_size) \
            for t in list(aug_to_func.keys())]

        aug_model = AugCNN()
        aug_model.to(device)

        agent = algo.MetaDrAC(actor_critic,
                              aug_model,
                              args.clip_param,
                              args.ppo_epoch,
                              args.num_mini_batch,
                              args.value_loss_coef,
                              args.entropy_coef,
                              meta_grad_clip=args.meta_grad_clip,
                              meta_num_train_steps=args.meta_num_train_steps,
                              meta_num_test_steps=args.meta_num_test_steps,
                              lr=args.lr,
                              eps=args.eps,
                              max_grad_norm=args.max_grad_norm,
                              aug_id=aug_id,
                              aug_coef=args.aug_coef)

    elif args.use_rl2:
        aug_id = data_augs.Identity
        aug_list = [
            aug_to_func[t](batch_size=batch_size)
            for t in list(aug_to_func.keys())
        ]

        rl2_obs_shape = [envs.action_space.n + 1]
        rl2_learner = Policy(rl2_obs_shape,
                             len(list(aug_to_func.keys())),
                             base_kwargs={
                                 'recurrent': True,
                                 'hidden_size': args.rl2_hidden_size
                             })
        rl2_learner.to(device)

        agent = algo.RL2DrAC(actor_critic,
                             rl2_learner,
                             args.clip_param,
                             args.ppo_epoch,
                             args.num_mini_batch,
                             args.value_loss_coef,
                             args.entropy_coef,
                             args.rl2_entropy_coef,
                             lr=args.lr,
                             eps=args.eps,
                             rl2_lr=args.rl2_lr,
                             rl2_eps=args.rl2_eps,
                             max_grad_norm=args.max_grad_norm,
                             aug_list=aug_list,
                             aug_id=aug_id,
                             aug_coef=args.aug_coef,
                             num_aug_types=len(list(aug_to_func.keys())),
                             recurrent_hidden_size=args.rl2_hidden_size,
                             num_actions=envs.action_space.n,
                             device=device)

    elif False:  # Regular Drac
        aug_id = data_augs.Identity
        aug_func = aug_to_func[args.aug_type](batch_size=batch_size)

        agent = algo.DrAC(actor_critic,
                          args.clip_param,
                          args.ppo_epoch,
                          args.num_mini_batch,
                          args.value_loss_coef,
                          args.entropy_coef,
                          lr=args.lr,
                          eps=args.eps,
                          max_grad_norm=args.max_grad_norm,
                          aug_id=aug_id,
                          aug_func=aug_func,
                          aug_coef=args.aug_coef,
                          env_name=args.env_name)
    elif False:  # Model Free Planning Drac
        aug_id = data_augs.Identity
        aug_func = aug_to_func[args.aug_type](batch_size=batch_size)

        actor_critic = PlanningPolicy(obs_shape,
                                      envs.action_space.n,
                                      base_kwargs={
                                          'recurrent': False,
                                          'hidden_size': args.hidden_size
                                      })
        actor_critic.to(device)

        agent = algo.DrAC(actor_critic,
                          args.clip_param,
                          args.ppo_epoch,
                          args.num_mini_batch,
                          args.value_loss_coef,
                          args.entropy_coef,
                          lr=args.lr,
                          eps=args.eps,
                          max_grad_norm=args.max_grad_norm,
                          aug_id=aug_id,
                          aug_func=aug_func,
                          aug_coef=args.aug_coef,
                          env_name=args.env_name)
    else:  # Model based Drac
        aug_id = data_augs.Identity
        aug_func = aug_to_func[args.aug_type](batch_size=batch_size)

        actor_critic = ModelBasedPolicy(obs_shape,
                                        envs.action_space.n,
                                        base_kwargs={
                                            'recurrent': False,
                                            'hidden_size': args.hidden_size
                                        })
        actor_critic.to(device)

        agent = algo.ConvDrAC(actor_critic,
                              args.clip_param,
                              args.ppo_epoch,
                              args.num_mini_batch,
                              args.value_loss_coef,
                              args.entropy_coef,
                              lr=args.lr,
                              eps=args.eps,
                              max_grad_norm=args.max_grad_norm,
                              aug_id=aug_id,
                              aug_func=aug_func,
                              aug_coef=args.aug_coef,
                              env_name=args.env_name)

    obs = envs.reset()
    rollouts.obs[0].copy_(obs)
    if modelbased:
        rollouts.next_obs[0].copy_(obs)  # TODO: is this right?
    rollouts.to(device)

    episode_rewards = deque(maxlen=10)
    num_updates = int(
        args.num_env_steps) // args.num_steps // args.num_processes

    for j in trange(num_updates):
        actor_critic.train()
        for step in range(args.num_steps):
            # Sample actions
            with torch.no_grad():
                obs_id = aug_id(rollouts.obs[step])
                value, action, action_log_prob, recurrent_hidden_states = actor_critic.act(
                    obs_id, rollouts.recurrent_hidden_states[step],
                    rollouts.masks[step])

            # Obser reward and next obs
            obs, reward, done, infos = envs.step(action)

            for info in infos:
                if 'episode' in info.keys():
                    episode_rewards.append(info['episode']['r'])

            # If done then clean the history of observations.
            masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                       for done_ in done])
            bad_masks = torch.FloatTensor(
                [[0.0] if 'bad_transition' in info.keys() else [1.0]
                 for info in infos])

            rollouts.insert(obs, recurrent_hidden_states, action,
                            action_log_prob, value, reward, masks, bad_masks)

        with torch.no_grad():
            obs_id = aug_id(rollouts.obs[-1])
            next_value = actor_critic.get_value(
                obs_id, rollouts.recurrent_hidden_states[-1],
                rollouts.masks[-1]).detach()

        rollouts.compute_returns(next_value, args.gamma, args.gae_lambda)

        if args.use_ucb and j > 0:
            agent.update_ucb_values(rollouts)
        if isinstance(agent, algo.ConvDrAC):
            value_loss, action_loss, dist_entropy, transition_model_loss, reward_model_loss = agent.update(
                rollouts)
        else:
            value_loss, action_loss, dist_entropy = agent.update(rollouts)
        rollouts.after_update()

        # save for every interval-th episode or for the last epoch
        total_num_steps = (j + 1) * args.num_processes * args.num_steps
        if j % args.log_interval == 0 and len(episode_rewards) > 1:
            total_num_steps = (j + 1) * args.num_processes * args.num_steps
            print(
                "\nUpdate {}, step {} \n Last {} training episodes: mean/median reward {:.1f}/{:.1f}"
                .format(j, total_num_steps, len(episode_rewards),
                        np.mean(episode_rewards), np.median(episode_rewards),
                        dist_entropy, value_loss, action_loss))

            logger.logkv("train/nupdates", j)
            logger.logkv("train/total_num_steps", total_num_steps)

            logger.logkv("losses/dist_entropy", dist_entropy)
            logger.logkv("losses/value_loss", value_loss)
            logger.logkv("losses/action_loss", action_loss)
            if isinstance(agent, algo.ConvDrAC):
                logger.logkv("losses/transition_model_loss",
                             transition_model_loss)
                logger.logkv("losses/reward_model_loss", reward_model_loss)

            logger.logkv("train/mean_episode_reward", np.mean(episode_rewards))
            logger.logkv("train/median_episode_reward",
                         np.median(episode_rewards))

            ### Eval on the Full Distribution of Levels ###
            eval_episode_rewards = evaluate(args,
                                            actor_critic,
                                            device,
                                            aug_id=aug_id)

            logger.logkv("test/mean_episode_reward",
                         np.mean(eval_episode_rewards))
            logger.logkv("test/median_episode_reward",
                         np.median(eval_episode_rewards))

            logger.dumpkvs()
Пример #21
0
            loss = criterion(output, cls)  # compute loss

            optimizer.zero_grad()  # set grad of all parameters to zero
            loss.backward()  # compute gradient for each parameters
            optimizer.step()  # update parameters
            ''' write out information to tensorboard '''
            writer.add_scalar('loss vs iters',
                              loss.data.cpu().numpy(),
                              iters)  # training loss vs num of iterations
            train_info += ' loss: {:.4f}'.format(loss.data.cpu().numpy())

            print(train_info)

        if epoch % args.val_epoch == 0:
            ''' evaluate the model '''
            acc = test.evaluate(model, val_loader, 0)
            writer.add_scalar(
                'val_acc vs epoch', acc,
                epoch)  # mIOU score on validation set vs num of epochs
            writer.add_scalar(
                'val_acc vs iters', acc,
                iters)  # mIOU score on validation set vs num of iterations
            print('Epoch: [{}] ACC:{}'.format(epoch, acc))
            ''' save best model '''
            if acc > best_acc:
                if args.model_level == 'baseline':
                    save_model(
                        model,
                        os.path.join(args.save_dir, 'baseline_model.pth.tar'))
                else:
                    save_model(
Пример #22
0
def main(args):
    #torch.backends.cudnn.benchmark=True # This makes dilated conv much faster for CuDNN 7.5

    # MODEL
    num_features = [args.features*i for i in range(1, args.levels+1)] if args.feature_growth == "add" else \
                   [args.features*2**i for i in range(0, args.levels)]
    target_outputs = int(args.output_size * args.sr)
    model = Waveunet(args.channels,
                     num_features,
                     args.channels,
                     args.instruments,
                     kernel_size=args.kernel_size,
                     target_output_size=target_outputs,
                     depth=args.depth,
                     strides=args.strides,
                     conv_type=args.conv_type,
                     res=args.res,
                     separate=args.separate)

    if args.cuda:
        model = model_utils.DataParallel(model)
        print("move model to gpu")
        model.cuda()

    print('model: ', model)
    print('parameter count: ', str(sum(p.numel() for p in model.parameters())))

    writer = SummaryWriter(args.log_dir)

    ### DATASET
    musdb = get_musdb_folds(args.dataset_dir)
    # If not data augmentation, at least crop targets to fit model output shape
    crop_func = partial(crop_targets, shapes=model.shapes)
    # Data augmentation function for training
    augment_func = partial(random_amplify,
                           shapes=model.shapes,
                           min=0.7,
                           max=1.0)
    train_data = SeparationDataset(musdb,
                                   "train",
                                   args.instruments,
                                   args.sr,
                                   args.channels,
                                   model.shapes,
                                   True,
                                   args.hdf_dir,
                                   audio_transform=augment_func)
    val_data = SeparationDataset(musdb,
                                 "val",
                                 args.instruments,
                                 args.sr,
                                 args.channels,
                                 model.shapes,
                                 False,
                                 args.hdf_dir,
                                 audio_transform=crop_func)
    test_data = SeparationDataset(musdb,
                                  "test",
                                  args.instruments,
                                  args.sr,
                                  args.channels,
                                  model.shapes,
                                  False,
                                  args.hdf_dir,
                                  audio_transform=crop_func)

    dataloader = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
        worker_init_fn=utils.worker_init_fn)

    ##### TRAINING ####

    # Set up the loss function
    if args.loss == "L1":
        criterion = nn.L1Loss()
    elif args.loss == "L2":
        criterion = nn.MSELoss()
    else:
        raise NotImplementedError("Couldn't find this loss!")

    # Set up optimiser
    optimizer = Adam(params=model.parameters(), lr=args.lr)

    # Set up training state dict that will also be saved into checkpoints
    state = {"step": 0, "worse_epochs": 0, "epochs": 0, "best_loss": np.Inf}

    # LOAD MODEL CHECKPOINT IF DESIRED
    if args.load_model is not None:
        print("Continuing training full model from checkpoint " +
              str(args.load_model))
        state = model_utils.load_model(model, optimizer, args.load_model,
                                       args.cuda)

    print('TRAINING START')
    while state["worse_epochs"] < args.patience:
        print("Training one epoch from iteration " + str(state["step"]))
        avg_time = 0.
        model.train()
        with tqdm(total=len(train_data) // args.batch_size) as pbar:
            np.random.seed()
            for example_num, (x, targets) in enumerate(dataloader):
                if args.cuda:
                    x = x.cuda()
                    for k in list(targets.keys()):
                        targets[k] = targets[k].cuda()

                t = time.time()

                # Set LR for this iteration
                utils.set_cyclic_lr(optimizer, example_num,
                                    len(train_data) // args.batch_size,
                                    args.cycles, args.min_lr, args.lr)
                writer.add_scalar("lr", utils.get_lr(optimizer), state["step"])

                # Compute loss for each instrument/model
                optimizer.zero_grad()
                outputs, avg_loss = model_utils.compute_loss(model,
                                                             x,
                                                             targets,
                                                             criterion,
                                                             compute_grad=True)

                optimizer.step()

                state["step"] += 1

                t = time.time() - t
                avg_time += (1. / float(example_num + 1)) * (t - avg_time)

                writer.add_scalar("train_loss", avg_loss, state["step"])

                if example_num % args.example_freq == 0:
                    input_centre = torch.mean(
                        x[0, :, model.shapes["output_start_frame"]:model.
                          shapes["output_end_frame"]],
                        0)  # Stereo not supported for logs yet
                    writer.add_audio("input",
                                     input_centre,
                                     state["step"],
                                     sample_rate=args.sr)

                    for inst in outputs.keys():
                        writer.add_audio(inst + "_pred",
                                         torch.mean(outputs[inst][0], 0),
                                         state["step"],
                                         sample_rate=args.sr)
                        writer.add_audio(inst + "_target",
                                         torch.mean(targets[inst][0], 0),
                                         state["step"],
                                         sample_rate=args.sr)

                pbar.update(1)

        # VALIDATE
        val_loss = validate(args, model, criterion, val_data)
        print("VALIDATION FINISHED: LOSS: " + str(val_loss))
        writer.add_scalar("val_loss", val_loss, state["step"])

        # EARLY STOPPING CHECK
        checkpoint_path = os.path.join(args.checkpoint_dir,
                                       "checkpoint_" + str(state["step"]))
        if val_loss >= state["best_loss"]:
            state["worse_epochs"] += 1
        else:
            print("MODEL IMPROVED ON VALIDATION SET!")
            state["worse_epochs"] = 0
            state["best_loss"] = val_loss
            state["best_checkpoint"] = checkpoint_path

        # CHECKPOINT
        print("Saving model...")
        model_utils.save_model(model, optimizer, state, checkpoint_path)

        state["epochs"] += 1

    #### TESTING ####
    # Test loss
    print("TESTING")

    # Load best model based on validation loss
    state = model_utils.load_model(model, None, state["best_checkpoint"],
                                   args.cuda)
    test_loss = validate(args, model, criterion, test_data)
    print("TEST FINISHED: LOSS: " + str(test_loss))
    writer.add_scalar("test_loss", test_loss, state["step"])

    # Mir_eval metrics
    test_metrics = evaluate(args, musdb["test"], model, args.instruments)

    # Dump all metrics results into pickle file for later analysis if needed
    with open(os.path.join(args.checkpoint_dir, "results.pkl"), "wb") as f:
        pickle.dump(test_metrics, f)

    # Write most important metrics into Tensorboard log
    avg_SDRs = {
        inst: np.mean([np.nanmean(song[inst]["SDR"]) for song in test_metrics])
        for inst in args.instruments
    }
    avg_SIRs = {
        inst: np.mean([np.nanmean(song[inst]["SIR"]) for song in test_metrics])
        for inst in args.instruments
    }
    for inst in args.instruments:
        writer.add_scalar("test_SDR_" + inst, avg_SDRs[inst], state["step"])
        writer.add_scalar("test_SIR_" + inst, avg_SIRs[inst], state["step"])
    overall_SDR = np.mean([v for v in avg_SDRs.values()])
    writer.add_scalar("test_SDR", overall_SDR)
    print("SDR: " + str(overall_SDR))

    writer.close()
Пример #23
0
            ''' move data to gpu '''
            imgs, seg = imgs.cuda(), seg.cuda()
            ''' forward path '''
            output = model(imgs)
            ''' compute loss, backpropagation, update parameters '''
            loss = criterion(output, seg)  # compute loss

            optimizer.zero_grad()  # set grad of all parameters to zero
            loss.backward()  # compute gradient for each parameters
            optimizer.step()  # update parameters
            ''' write out information to tensorboard '''
            writer.add_scalar('loss', loss.data.cpu().numpy(), iters)
            train_info += ' loss: {:.4f}'.format(loss.data.cpu().numpy())

            print(train_info)

        if epoch % args.val_epoch == 0:
            ''' evaluate the model '''
            acc = evaluate(model, val_loader)
            writer.add_scalar('val_acc', acc, iters)
            print('Epoch: [{}] ACC:{}'.format(epoch, acc))
            ''' save best model '''
            if acc > best_acc:
                save_model(
                    model,
                    os.path.join(args.save_improved_dir, 'model_best.pth.tar'))
                best_acc = acc
        ''' save model '''
        #save_model(model, os.path.join(args.save_improved_dir, 'model_{}.pth.tar'.format(epoch)))
        sched.step()
Пример #24
0
def train(args, seeds):
    global last_checkpoint_time
    args.cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda:0" if args.cuda else "cpu")
    if 'cuda' in device.type:
        print('Using CUDA\n')

    torch.set_num_threads(1)

    utils.seed(args.seed)

    # Configure logging
    if args.xpid is None:
        args.xpid = "lr-%s" % time.strftime("%Y%m%d-%H%M%S")
    log_dir = os.path.expandvars(os.path.expanduser(args.log_dir))
    plogger = FileWriter(
        xpid=args.xpid,
        xp_args=args.__dict__,
        rootdir=log_dir,
        seeds=seeds,
    )
    stdout_logger = HumanOutputFormat(sys.stdout)

    checkpointpath = os.path.expandvars(
        os.path.expanduser("%s/%s/%s" % (log_dir, args.xpid, "model.tar")))

    # Configure actor envs
    start_level = 0
    if args.full_train_distribution:
        num_levels = 0
        level_sampler_args = None
        seeds = None
    else:
        num_levels = 1
        level_sampler_args = dict(
            num_actors=args.num_processes,
            strategy=args.level_replay_strategy,
            replay_schedule=args.level_replay_schedule,
            score_transform=args.level_replay_score_transform,
            temperature=args.level_replay_temperature,
            eps=args.level_replay_eps,
            rho=args.level_replay_rho,
            nu=args.level_replay_nu,
            alpha=args.level_replay_alpha,
            staleness_coef=args.staleness_coef,
            staleness_transform=args.staleness_transform,
            staleness_temperature=args.staleness_temperature)
    envs, level_sampler = make_lr_venv(
        num_envs=args.num_processes,
        env_name=args.env_name,
        seeds=seeds,
        device=device,
        num_levels=num_levels,
        start_level=start_level,
        no_ret_normalization=args.no_ret_normalization,
        distribution_mode=args.distribution_mode,
        paint_vel_info=args.paint_vel_info,
        level_sampler_args=level_sampler_args)

    is_minigrid = args.env_name.startswith('MiniGrid')

    actor_critic = model_for_env_name(args, envs)
    actor_critic.to(device)
    print(actor_critic)

    rollouts = RolloutStorage(args.num_steps, args.num_processes,
                              envs.observation_space.shape, envs.action_space,
                              actor_critic.recurrent_hidden_state_size)

    batch_size = int(args.num_processes * args.num_steps / args.num_mini_batch)

    def checkpoint():
        if args.disable_checkpoint:
            return
        logging.info("Saving checkpoint to %s", checkpointpath)
        torch.save(
            {
                "model_state_dict": actor_critic.state_dict(),
                "optimizer_state_dict": agent.optimizer.state_dict(),
                "args": vars(args),
            },
            checkpointpath,
        )

    agent = algo.PPO(actor_critic,
                     args.clip_param,
                     args.ppo_epoch,
                     args.num_mini_batch,
                     args.value_loss_coef,
                     args.entropy_coef,
                     lr=args.lr,
                     eps=args.eps,
                     max_grad_norm=args.max_grad_norm,
                     env_name=args.env_name)

    level_seeds = torch.zeros(args.num_processes)
    if level_sampler:
        obs, level_seeds = envs.reset()
    else:
        obs = envs.reset()
    level_seeds = level_seeds.unsqueeze(-1)
    rollouts.obs[0].copy_(obs)
    rollouts.to(device)

    episode_rewards = deque(maxlen=10)
    num_updates = int(
        args.num_env_steps) // args.num_steps // args.num_processes

    timer = timeit.default_timer
    update_start_time = timer()
    for j in range(num_updates):
        actor_critic.train()
        for step in range(args.num_steps):
            # Sample actions
            with torch.no_grad():
                obs_id = rollouts.obs[step]
                value, action, action_log_dist, recurrent_hidden_states = actor_critic.act(
                    obs_id, rollouts.recurrent_hidden_states[step],
                    rollouts.masks[step])
                action_log_prob = action_log_dist.gather(-1, action)

            # Obser reward and next obs
            obs, reward, done, infos = envs.step(action)

            # Reset all done levels by sampling from level sampler
            for i, info in enumerate(infos):
                if 'episode' in info.keys():
                    episode_rewards.append(info['episode']['r'])

                if level_sampler:
                    level_seeds[i][0] = info['level_seed']

            # If done then clean the history of observations.
            masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                       for done_ in done])
            bad_masks = torch.FloatTensor(
                [[0.0] if 'bad_transition' in info.keys() else [1.0]
                 for info in infos])

            rollouts.insert(obs, recurrent_hidden_states, action,
                            action_log_prob, action_log_dist, value, reward,
                            masks, bad_masks, level_seeds)

        with torch.no_grad():
            obs_id = rollouts.obs[-1]
            next_value = actor_critic.get_value(
                obs_id, rollouts.recurrent_hidden_states[-1],
                rollouts.masks[-1]).detach()

        rollouts.compute_returns(next_value, args.gamma, args.gae_lambda)

        # Update level sampler
        if level_sampler:
            level_sampler.update_with_rollouts(rollouts)

        value_loss, action_loss, dist_entropy = agent.update(rollouts)
        rollouts.after_update()
        if level_sampler:
            level_sampler.after_update()

        # Log stats every log_interval updates or if it is the last update
        if (j % args.log_interval == 0
                and len(episode_rewards) > 1) or j == num_updates - 1:
            total_num_steps = (j + 1) * args.num_processes * args.num_steps

            update_end_time = timer()
            num_interval_updates = 1 if j == 0 else args.log_interval
            sps = num_interval_updates * (args.num_processes *
                                          args.num_steps) / (update_end_time -
                                                             update_start_time)
            update_start_time = update_end_time

            logging.info(f"\nUpdate {j} done, {total_num_steps} steps\n  ")
            logging.info(
                f"\nEvaluating on {args.num_test_seeds} test levels...\n  ")
            eval_episode_rewards, transitions = evaluate(
                args, actor_critic, args.num_test_seeds, device)
            plogger._save_data(transitions, f'test_trajectories_{j}.pkl')

            logging.info(
                f"\nEvaluating on {args.num_test_seeds} train levels...\n  ")
            train_eval_episode_rewards, transitions = evaluate(
                args,
                actor_critic,
                args.num_test_seeds,
                device,
                start_level=0,
                num_levels=args.num_train_seeds,
                seeds=seeds,
                level_sampler=level_sampler)

            stats = {
                "step":
                total_num_steps,
                "pg_loss":
                action_loss,
                "value_loss":
                value_loss,
                "dist_entropy":
                dist_entropy,
                "train:mean_episode_return":
                np.mean(episode_rewards),
                "train:median_episode_return":
                np.median(episode_rewards),
                "test:mean_episode_return":
                np.mean(eval_episode_rewards),
                "test:median_episode_return":
                np.median(eval_episode_rewards),
                "train_eval:mean_episode_return":
                np.mean(train_eval_episode_rewards),
                "train_eval:median_episode_return":
                np.median(train_eval_episode_rewards),
                "sps":
                sps,
            }
            if is_minigrid:
                stats["train:success_rate"] = np.mean(
                    np.array(episode_rewards) > 0)
                stats["train_eval:success_rate"] = np.mean(
                    np.array(train_eval_episode_rewards) > 0)
                stats["test:success_rate"] = np.mean(
                    np.array(eval_episode_rewards) > 0)

            if j == num_updates - 1:
                logging.info(
                    f"\nLast update: Evaluating on {args.num_test_seeds} test levels...\n  "
                )
                final_eval_episode_rewards, transitions = evaluate(
                    args, actor_critic, args.final_num_test_seeds, device)

                mean_final_eval_episode_rewards = np.mean(
                    final_eval_episode_rewards)
                median_final_eval_episide_rewards = np.median(
                    final_eval_episode_rewards)

                plogger.log_final_test_eval({
                    'num_test_seeds':
                    args.final_num_test_seeds,
                    'mean_episode_return':
                    mean_final_eval_episode_rewards,
                    'median_episode_return':
                    median_final_eval_episide_rewards
                })

            plogger.log(stats)
            if args.verbose:
                stdout_logger.writekvs(stats)

        # Log level weights
        if level_sampler and j % args.weight_log_interval == 0:
            plogger.log_level_weights(level_sampler.sample_weights())

        # Checkpoint
        timer = timeit.default_timer
        if last_checkpoint_time is None:
            last_checkpoint_time = timer()
        try:
            if j == num_updates - 1 or \
                (args.save_interval > 0 and timer() - last_checkpoint_time > args.save_interval * 60):  # Save every 10 min.
                checkpoint()
                last_checkpoint_time = timer()
        except KeyboardInterrupt:
            return
Пример #25
0
        'D:\py_pro\YOLOv3-PyTorch\yolo_cfg\\' + model_name + '.cfg',
        'weights': 'D:\py_pro\YOLOv3-PyTorch\weights\\' + map_name +
        '\\yolov3_ep43-map82.67-loss0.15187.pt',
        'train_path':
        'D:\py_pro\YOLOv3-PyTorch\data\\' + map_name + '\\train.txt',
        'val_path': 'D:\py_pro\YOLOv3-PyTorch\data\\' + map_name + '\\val.txt',
        'prune_num': 16,  # YOLOv3标准网络中有23个res块,这里代表剪掉多少块
    }
    model = YOLOv3(import_param['cfg_path']).cuda()
    model.load_state_dict(torch.load(import_param['weights']))

    precision, recall, before_AP, f1, ap_class = evaluate(
        model,
        path=import_param['val_path'],
        iou_thres=import_param['iou_thres'],
        conf_thres=import_param['conf_thres'],
        nms_thres=import_param['nms_thres'],
        img_size=import_param['img_size'],
        batch_size=import_param['batch_size'],
    )
    # 剪枝前模型参数总量
    before_parameters = sum([param.nelement() for param in model.parameters()])
    print(f'稀疏化训练后模型mAP:{before_AP.mean():.4f}')

    CBL_idx, _, shortcut_idx = parse_blocks_layer(model.blocks)

    # 将所有要剪枝的BN层的绝对值化γ参数,拷贝到bn_weights一维tensor上
    bn_weights = gather_bn_weights(model.module_list, shortcut_idx)
    # torch.sort return: (value, index) 是排序后的值列表,排序后的值在排序前的索引 默认从小到大排序
    sorted_bn = torch.sort(bn_weights)[0]
Пример #26
0
    print('Finished Training!')


if __name__ == "__main__":
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print(device)

    bert_model = "Musixmatch/umberto-commoncrawl-cased-v1"
    # bert_model = "idb-ita/gilberto-uncased-from-camembert"
    num_classes = 11

    bert = UmbertoCustom(bert_model=bert_model, num_classes=num_classes).to(device)

    train_iter, valid_iter, test_iter = get_train_valid_test_fine(bert_model=bert_model, max_seq_lenght=512)

    opt = optim.Adam(bert.parameters(), lr=2e-5)
    init_time = time.time()

    train(model=bert, optimizer=opt, train_loader=train_iter, valid_loader=valid_iter, num_epochs=5,
          eval_every=len(train_iter) // 2, file_path="../data/models/")

    tot_time = time.time() - init_time
    print("time taken:", int(tot_time // 60), "minutes", int(tot_time % 60), "seconds")

    best_model = UmbertoCustom(bert_model=bert_model, num_classes=num_classes).to(device)

    load_checkpoint("../data/models" + '/model2.pt', best_model, device)

    evaluate(best_model, test_iter, num_classes, device)
def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument("--epochs",
                        type=int,
                        default=10000,
                        help="number of epochs")
    parser.add_argument("--batch_size",
                        type=int,
                        default=20,
                        help="size of each image batch")
    parser.add_argument("--data_config",
                        type=str,
                        default="config/adc.data",
                        help="path to data config file")
    # parser.add_argument("--pretrained_weights", type=str, default="config/yolov3_ckpt_5.pth")  # models/model1/yolov3_ckpt_73.pth
    parser.add_argument("--pretrained_weights",
                        type=str)  # models/model1/yolov3_ckpt_73.pth
    parser.add_argument(
        "--n_cpu",
        type=int,
        default=0,
        help="number of cpu threads to use during batch generation")
    parser.add_argument("--img_size",
                        type=int,
                        default=[768, 1024],
                        help="size of each image dimension")
    parser.add_argument("--evaluation_interval",
                        type=int,
                        default=1,
                        help="interval evaluations on validation set")
    parser.add_argument("--multiscale",
                        default='False',
                        choices=['True', 'False'])
    parser.add_argument("--augment",
                        default='False',
                        choices=['True', 'False'])
    parser.add_argument("--save_path",
                        type=str,
                        default='models/weights_1350_0102',
                        help="save model path")
    parser.add_argument("--debug",
                        type=str,
                        default='False',
                        choices=['True', 'False'],
                        help="debug")
    parser.add_argument("--lr", type=float, default=0.01, help="learning rate")
    args = parser.parse_args(argv)

    args.debug = True if args.debug == 'True' else False
    args.multiscale = True if args.multiscale == 'True' else False
    args.augment = True if args.augment == 'True' else False
    print_args(args)

    print(
        datetime.datetime.strftime(datetime.datetime.now(),
                                   '%Y-%m-%d %H:%M:%S'))

    if args.debug:
        print('debug...')
        import shutil
        # if os.path.exists(args.save_path):
        #     shutil.rmtree(args.save_path)
        args.evaluation_interval = 1
        # debug模式下先删除save_path,并每间隔一轮验证一次

    # assert not os.path.exists(args.save_path)
    # os.makedirs(args.save_path)

    # adc.dat下有train和valid两个dat还有anchor.txt的路径
    data_config = parse_data_config(args.data_config)
    train_path = data_config["train"]
    valid_path = data_config["valid"]
    if args.debug:
        valid_path = train_path
    anchors = get_anchors(data_config['anchors']).to('cuda')

    model = ResNet(anchors).to('cuda')
    if args.pretrained_weights:
        print('pretrained weights: ', args.pretrained_weights)
        model.load_pretrained_weights(args.pretrained_weights)

    dataset = ListDataset(train_path,
                          img_size=args.img_size,
                          augment=args.augment,
                          multiscale=args.multiscale)
    eval = evaluate(path=valid_path,
                    img_size=args.img_size,
                    batch_size=args.batch_size,
                    debug=args.debug)

    if args.debug:
        dataset.img_files = dataset.img_files[:10 * args.batch_size]
    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.n_cpu,
        collate_fn=dataset.collate_fn,
    )
    print('Number train sample: ', len(dataset))
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=5e-5)
    # 这里优化器和学习率是不是要调节?
    print('\n### train ...')
    for epoch in range(args.epochs):
        model.train()

        lr = max(1e-10, args.lr * (0.95**epoch))
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

        for batch_i, (imgs, targets, _) in enumerate(dataloader):
            imgs = Variable(imgs.to('cuda'))
            # 训练集有经过augment_sequential,而验证集没有
            # targets=([[0.0000, 0.7328, 0.2808, 0.0934, 0.0808],
            #         [1.0000, 0.5255, 0.5466, 0.0596, 0.1587],
            #         [1.0000, 0.5585, 0.8077, 0.0553, 0.2250],
            #         [3.0000, 0.4519, 0.4351, 0.1365, 0.2048]], device='cuda:0')
            targets = Variable(targets.to('cuda'), requires_grad=False)

            yolo_map, _ = model(imgs)
            #  yolo_map.shape : [4,]  其中每个yolo_map的格式如下: batch,featuremap_h,featuremap_w,anchor_num,(x,y,w,h,conf)
            loss, metrics = model.loss(yolo_map, targets)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            if (batch_i + 1) % 100 == 0 or (batch_i + 1) == len(dataloader):
                time_str = datetime.datetime.strftime(datetime.datetime.now(),
                                                      '%Y-%m-%d %H:%M:%S')
                lr = optimizer.param_groups[0]['lr']
                loss = metrics["loss"]
                xy = metrics["xy"]
                wh = metrics["wh"]
                conf = metrics["conf"]
                loss_str = 'loss: {:<8.2f}'.format(loss)
                loss_str += 'xy: {:<8.2f}'.format(xy)
                loss_str += 'wh: {:<8.2f}'.format(wh)
                loss_str += 'conf: {:<8.2f}'.format(conf)
                epoch_str = 'Epoch: {:4}({:4}/{:4})'.format(
                    epoch, batch_i + 1, len(dataloader))
                print('[{}]{} {} lr:{}'.format(time_str, epoch_str, loss_str,
                                               lr))
        print()
        if epoch % args.evaluation_interval == 0:
            print("\n---- Evaluating Model ----")
            save_model_epoch = 'yolov3_ckpt_{}.pth'.format(epoch)
            model.save_weights(os.path.join(args.save_path, save_model_epoch))
            print(save_model_epoch)
            example_save_path = args.save_path
            for conf in [0.1, 0.3, 0.5, 0.7]:
                metrics = eval(model,
                               iou_thres=0.5,
                               conf_thres=conf,
                               nms_thres=0.5,
                               save_path=example_save_path)
                example_save_path = None
                print(
                    'image_acc: {}\t{}\tbbox_acc: {}\tbbox_recall: {}'.format(
                        *metrics[1:]))

                names = ['image', 'ture', 'det', 'box_acc', 'image_acc']
                print('{:<10}{:<10}{:<10}{:<10}{:<10}'.format(*names))
                print('{:<10}{:<10}{:<10}{:<10}{:<10}'.format(*metrics[0][0]))
                print('{:<10}{:<10}{:<10}{:<10}{:<10}'.format(*metrics[0][1]))
                print()
Пример #28
0
ev_iterations = 100

n_units = 40

print(' ')
print('number of hidden units: ' + str(n_units))
print(' ')

test_data_stack = []
test_hh_stack = []
for i in range(ev_iterations):
    import test
    
    # Evaluate on test dataset
    test_data = test.generate_test_dataset()
    test_perp, test_hh = test.evaluate(test_data, test=True)
    
    test_data_stack.extend(test_data['coordinates'])
    test_hh_stack.extend(test_hh)

input_data = test_hh_stack
output_data = []
for i in range(len(test_data_stack)):
    output_data.append(test_data_stack[i][0] + test_data_stack[i][1] * 9)

print('')

# data allocation
X_train, X_test, y_train, y_test = train_test_split(input_data, output_data)

# parameters
Пример #29
0
def train(check_every=0, save_every=5):
    global model, epoch, step

    optimizer = get_optimizer()
    model.train()
    start = tic = time()

    train_mAP = test_mAP = 0.
    if summary['map']['train']:
        train_mAP = summary['map']['train'][-1][1]
        test_mAP = summary['map']['test'][-1][1]
    if pretty:
        pretty_head()

    for e in range(epoch, num_epochs):
        if not pretty:
            print('- Epoch {}'.format(e))

        for x, y, a in loader_train:
            if len(y) == 0:
                continue  # no target in this image

            loss = train_step(x, y, a, optimizer)

            toc = time()
            iter_time = toc - tic
            tic = toc

            if check_every and step > 0 and step % check_every == 0:
                # evaluate the mAP

                # Keep quite
                voc_train.mute = True
                voc_test.mute = True

                if pretty:
                    pretty_tail()
                    print('Checking mAP ...')
                train_mAP = evaluate(model, loader_val, 200)
                summary['map']['train'].append((step, train_mAP))
                test_mAP = evaluate(model, loader_test, 200)
                summary['map']['test'].append((step, test_mAP))
                if pretty:
                    pretty_head()
                else:
                    print('train mAP = {:.1f}%'.format(100 * train_mAP))
                    print('test mAP = {:.1f}%'.format(100 * test_mAP))

                voc_train.mute = pretty
                voc_test.mute = pretty

            step += 1

            if pretty:
                pretty_body(summary, start, iter_time, learning_rate, epoch,
                            step, a['image_id'], train_mAP, test_mAP)
            else:
                print('Use time: {:.2f}s'.format(iter_time))
                print('-- Iteration {it}, loss = {loss:.4f}\n'.format(
                    it=step, loss=loss))

        epoch += 1

        # save model
        if epoch % save_every == 0:
            save()

        if epoch in decay_epochs:
            save()
            optimizer = lr_decay()

    if pretty:
        pretty_tail()
    table = {'left_eye': 4, 'right_eye': 5, 'upper_lip': 12, 'lower_lip': 13}

    image_path = args.img_path
    cp = 'cp/79999_iter.pth'

    try:
        image = cv2.imread(image_path)
        ori = image.copy()
        im2 = image.copy()
        im2 = cv2.rectangle(im2, (0, 0), (1080, 1080), (255, 255, 255),
                            thickness=1080)
    except AttributeError:
        print('Image not found. Please enter a valid path.')
        quit()

    parsing = evaluate(image_path, cp)
    parsing = cv2.resize(parsing,
                         image.shape[0:2],
                         interpolation=cv2.INTER_NEAREST)

    parts = [
        table['left_eye'], table['right_eye'], table['upper_lip'],
        table['lower_lip']
    ]
    color = [139, 0, 139]

    for part in parts:
        image = mask(image, parsing, part, color)
        im2 = mask(im2, parsing, part, color)

    cv2.imshow('image', cv2.resize(ori, (512, 512)))
Пример #31
0
            # Tensorboard logging
            tensorboard_log = []
            for j, yolo in enumerate(yolo_layers):
                for name, metric in yolo.metrics.items():
                    if name != "grid_size":
                        tensorboard_log += [(f"{name}_{j + 1}", metric)]
            tensorboard_log += [("loss", loss.item())]
            logger_fakenight.list_of_scalars_summary(tensorboard_log, epoch)

            print("\n---- Evaluating Model on Daytime ----")
            # Evaluate the model on the validation set
            precision, recall, AP, f1, ap_class = evaluate(
                model_list,
                path=valid_path_daytime,
                iou_thres=0.5,
                conf_thres=0.5,
                nms_thres=0.5,
                img_size=opt.img_size,
                batch_size=8,
            )
            evaluation_metrics = [
                ("val_precision", precision.mean()),
                ("val_recall", recall.mean()),
                ("val_mAP", AP.mean()),
                ("val_f1", f1.mean()),
            ]
            logger_daytime.list_of_scalars_summary(evaluation_metrics, epoch)

            # Print class APs and mAP
            ap_table = [["Index", "Class name", "AP"]]
            for i, c in enumerate(ap_class):
Пример #32
0
ev_iterations = 100

n_units = 40

print(' ')
print('number of hidden units: ' + str(n_units))
print(' ')

test_data_stack = []
test_hh_stack = []
for i in range(ev_iterations):
    import test
    
    # Evaluate on test dataset
    test_data = test.generate_test_dataset()
    test_perp, test_hh, test_y_bin_error_sum = test.evaluate(test_data, test=True)
    
    test_data_stack.extend(test_data['coordinates'])
    test_hh_stack.extend(test_hh)
    
N_train = ev_iterations * 100 * 4 / 5
N_test = ev_iterations * 100 / 5

input_data = test_hh_stack
output_data = []
for i in range(len(test_data_stack)):
    output_data.append(test_data_stack[i][0] + test_data_stack[i][1] * 9)

print('')

# data allocation
Пример #33
0
#!/usr/bin/env python3.4

import os
import sys

from data import DataDir
from test import evaluate

if 1 == len(sys.argv):
   print("Execute with:")
   print("   ./evaluate.py foo [classifier]")
   print(
    "Where foo indicates a classify-foo.txt file in data/,")
   print(
    "and classifier is one of "
    "baseline|tandem|reluctant-tandem|bigram|frequency.)")
   sys.exit(1)

env = sys.argv[1]
classifier = sys.argv[2] if len(sys.argv) >= 3 else "tandem"
evaluate(env, classifier)