def triplet_loss_with_knn_exp(device='3',
                              ckpt_prefix='Run01',
                              lr=1e-3,
                              pretrain_epochs=50,
                              batch_all_epochs=30,
                              batch_hard_epochs=80,
                              n_classes=10,
                              n_samples=12,
                              margin=0.3,
                              log_interval=50,
                              log_level="INFO",
                              k=3,
                              squared=False,
                              embed_dims=64,
                              embed_net='vgg',
                              is_train_embedding_model=False,
                              using_pretrain=False,
                              batch_size=128,
                              select_method='batch_all_and_hard',
                              soft_margin=True):
    """
    knn as classifier.
    :param device:
    :param lr:
    :param n_epochs:
    :param n_classes:
    :param n_samples:
    :param k: kNN parameter
    :return:
    """
    SEED = 0
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    np.random.seed(SEED)

    kwargs = locals()
    log_file = '{}/ckpt/{}_with_knn_exp/{}.log'.format(ROOT_DIR, select_method,
                                                       ckpt_prefix)
    if not os.path.exists(os.path.dirname(log_file)):
        os.makedirs(os.path.dirname(log_file))
    logging.basicConfig(filename=log_file,
                        level=getattr(logging, log_level.upper(), None))
    logging.info(str(kwargs))

    os.environ['CUDA_VISIBLE_DEVICES'] = str(device)

    # get the mean and std of dataset train/a
    standarizer = TaskbStandarizer(data_manager=Dcase18TaskbData())
    mu, sigma = standarizer.load_mu_sigma(mode='train', device='a')

    # get the normalized train dataset
    train_dataset = DevSet(mode='train',
                           device='a',
                           transform=Compose(
                               [Normalize(mean=mu, std=sigma),
                                ToTensor()]))
    test_dataset = DevSet(mode='test',
                          device='a',
                          transform=Compose(
                              [Normalize(mean=mu, std=sigma),
                               ToTensor()]))

    train_batch_sampler = BalanceBatchSampler(dataset=train_dataset,
                                              n_classes=n_classes,
                                              n_samples=n_samples)
    train_batch_loader = DataLoader(dataset=train_dataset,
                                    batch_sampler=train_batch_sampler,
                                    num_workers=1)

    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=batch_size,
                             shuffle=False,
                             num_workers=1)

    if embed_net == 'vgg':
        model = networks.vggish_bn(classify=using_pretrain)
    elif embed_net == 'shallow':
        model = networks.embedding_net_shallow()
    else:
        print("{} doesn't exist!".format(embed_net))
        return

    model = model.cuda()
    if is_train_embedding_model:
        if using_pretrain:
            pt_loss_fn = nn.CrossEntropyLoss()
            pt_optimizer = optim.Adam(model.parameters(), lr=lr)
            pt_scheduler = lr_scheduler.StepLR(optimizer=pt_optimizer,
                                               step_size=30,
                                               gamma=0.5)
            pt_train_hist = History(name='pretrain_train/a')
            pt_val_hist = History(name='pretrain_test/a')
            pt_ckpter = CheckPoint(model=model,
                                   optimizer=pt_optimizer,
                                   path='{}/ckpt/{}_with_knn_exp'.format(
                                       ROOT_DIR, select_method),
                                   prefix=(ckpt_prefix + 'pretrain'),
                                   interval=1,
                                   save_num=1)

            for epoch in range(1, pretrain_epochs + 1):
                pt_scheduler.step()
                train_loss, metrics = train_epoch(
                    train_loader=train_batch_loader,
                    model=model,
                    loss_fn=pt_loss_fn,
                    optimizer=pt_optimizer,
                    log_interval=log_interval,
                    metrics=[AccumulatedAccuracyMetric()])
                train_logs = {'loss': train_loss}
                for metric in metrics:
                    train_logs[metric.name()] = metric.value()
                pt_train_hist.add(logs=train_logs, epoch=epoch)

                test_loss, metrics = test_epoch(
                    val_loader=test_loader,
                    model=model,
                    loss_fn=pt_loss_fn,
                    metrics=[AccumulatedAccuracyMetric()])
                test_logs = {'loss': test_loss}
                for metric in metrics:
                    test_logs[metric.name()] = metric.value()
                pt_val_hist.add(logs=test_logs, epoch=epoch)

                pt_train_hist.clear()
                pt_train_hist.plot()
                pt_val_hist.plot()
                logging.info('Epoch{:04d}, {:15}, {}'.format(
                    epoch, pt_train_hist.name, str(pt_train_hist.recent)))
                logging.info('Epoch{:04d}, {:15}, {}'.format(
                    epoch, pt_val_hist.name, str(pt_val_hist.recent)))
                pt_ckpter.check_on(epoch=epoch,
                                   monitor='acc',
                                   loss_acc=pt_val_hist.recent)

            best_pt_model_filename = Reporter(ckpt_root=os.path.join(ROOT_DIR, 'ckpt'), exp='{}_with_knn_exp'.\
                                              format(select_method)).select_best(run=(ckpt_prefix + 'pretrain')).selected_ckpt
            model.load_state_dict(
                torch.load(best_pt_model_filename)['model_state_dict'])
            model.set_classify(False)

        if select_method == 'batch_all':
            loss_fn = BatchAllTripletLoss(margin=margin,
                                          squared=squared,
                                          soft_margin=soft_margin)
        elif select_method == 'batch_hard':
            loss_fn = BatchHardTripletLoss(margin=margin,
                                           squared=squared,
                                           soft_margin=soft_margin)
        elif select_method == 'random_hard':
            loss_fn = RandomHardTripletLoss(
                margin=margin,
                triplet_selector=RandomNegativeTripletSelector(margin=margin),
                squared=squared,
                soft_margin=soft_margin)
        elif select_method == 'batch_all_and_hard':
            loss_fn_ba = BatchAllTripletLoss(margin=margin,
                                             squared=squared,
                                             soft_margin=soft_margin)
            loss_fn_bh = BatchHardTripletLoss(margin=margin,
                                              squared=squared,
                                              soft_margin=soft_margin)
        else:
            print("{} is not defined!".format(select_method))
            return

        optimizer = optim.Adam(model.parameters(), lr=lr)
        scheduler = lr_scheduler.StepLR(optimizer=optimizer,
                                        step_size=30,
                                        gamma=0.5)

        train_hist = History(name='train/a')
        val_hist = History(name='test/a')
        ckpter = CheckPoint(model=model,
                            optimizer=optimizer,
                            path='{}/ckpt/{}_with_knn_exp'.format(
                                ROOT_DIR, select_method),
                            prefix=ckpt_prefix,
                            interval=1,
                            save_num=1)

        for epoch in range(1, batch_all_epochs + batch_hard_epochs + 1):
            scheduler.step()
            if epoch <= batch_all_epochs:
                cur_loss_fn = loss_fn_ba
            else:
                cur_loss_fn = loss_fn_bh
            train_loss, metrics = train_epoch(
                train_loader=train_batch_loader,
                model=model,
                loss_fn=cur_loss_fn,
                optimizer=optimizer,
                log_interval=log_interval,
                metrics=[AverageNoneZeroTripletsMetric()])
            train_logs = dict()
            train_logs['loss'] = train_loss
            for metric in metrics:
                train_logs[metric.name()] = metric.value()
            train_hist.add(logs=train_logs, epoch=epoch)

            # TODO sklearn knn
            test_acc = kNN(model=model,
                           train_loader=train_batch_loader,
                           test_loader=test_loader,
                           k=k)
            test_logs = {'acc': test_acc}
            val_hist.add(logs=test_logs, epoch=epoch)

            train_hist.clear()
            train_hist.plot()
            val_hist.plot()
            logging.info('Epoch{:04d}, {:15}, {}'.format(
                epoch, train_hist.name, str(train_hist.recent)))
            logging.info('Epoch{:04d}, {:15}, {}'.format(
                epoch, val_hist.name, str(val_hist.recent)))
            ckpter.check_on(epoch=epoch,
                            monitor='acc',
                            loss_acc=val_hist.recent)

    # reload best embedding model
    best_model_filename = Reporter(ckpt_root=os.path.join(ROOT_DIR, 'ckpt'), exp='{}_with_knn_exp'.\
                                   format(select_method)).select_best(run=ckpt_prefix).selected_ckpt
    model.load_state_dict(torch.load(best_model_filename)['model_state_dict'])

    train_embedding, train_labels = extract_embeddings(train_batch_loader,
                                                       model, embed_dims)
    test_embedding, test_labels = extract_embeddings(test_loader, model,
                                                     embed_dims)

    xgb_cls(train_data=train_embedding,
            train_label=train_labels,
            val_data=test_embedding,
            val_label=test_labels,
            exp_dir=os.path.dirname(log_file))

    # TODO plot all curve
    if using_pretrain:
        pt_train_hist.plot()
        pt_val_hist.plot()
def batch_hard_with_knn_exp(device='3',
                            ckpt_prefix='Run01',
                            lr=1e-3,
                            embedding_epochs=10,
                            classify_epochs=100,
                            n_classes=10,
                            n_samples=12,
                            margin=0.3,
                            log_interval=50,
                            log_level="INFO",
                            k=3,
                            squared=False):
    """
    knn as classifier.
    :param device:
    :param lr:
    :param n_epochs:
    :param n_classes:
    :param n_samples:
    :param k: kNN parameter
    :return:
    """
    SEED = 0
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    np.random.seed(SEED)

    kwargs = locals()
    log_file = '{}/ckpt/batch_hard_with_knn_exp/{}.log'.format(
        ROOT_DIR, ckpt_prefix)
    if not os.path.exists(os.path.dirname(log_file)):
        os.makedirs(os.path.dirname(log_file))
    logging.basicConfig(filename=log_file,
                        level=getattr(logging, log_level.upper(), None))
    logging.info(str(kwargs))

    os.environ['CUDA_VISIBLE_DEVICES'] = str(device)

    # get the mean and std of dataset train/a
    standarizer = TaskbStandarizer(data_manager=Dcase18TaskbData())
    mu, sigma = standarizer.load_mu_sigma(mode='train', device='a')

    # get the normalized train dataset
    train_dataset = DevSet(mode='train',
                           device='a',
                           transform=Compose(
                               [Normalize(mean=mu, std=sigma),
                                ToTensor()]))
    test_dataset = DevSet(mode='test',
                          device='a',
                          transform=Compose(
                              [Normalize(mean=mu, std=sigma),
                               ToTensor()]))

    train_batch_sampler = BalanceBatchSampler(dataset=train_dataset,
                                              n_classes=n_classes,
                                              n_samples=n_samples)
    train_batch_loader = DataLoader(dataset=train_dataset,
                                    batch_sampler=train_batch_sampler,
                                    num_workers=1)

    test_batch_sampler = BalanceBatchSampler(dataset=test_dataset,
                                             n_classes=n_classes,
                                             n_samples=n_samples)
    test_batch_loader = DataLoader(dataset=test_dataset,
                                   batch_sampler=test_batch_sampler,
                                   num_workers=1)

    model = networks.embedding_net_shallow()
    model = model.cuda()
    loss_fn = HardTripletLoss(margin=margin, hardest=True, squared=squared)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = lr_scheduler.StepLR(optimizer=optimizer,
                                    step_size=30,
                                    gamma=0.5)

    # fit(train_loader=train_batch_loader, val_loader=test_batch_loader, model=model, loss_fn=loss_fn,
    #     optimizer=optimizer, scheduler=scheduler, n_epochs=embedding_epochs, log_interval=log_interval,
    #     metrics=[AverageNoneZeroTripletsMetric()])
    train_hist = History(name='train/a')
    val_hist = History(name='test/a')
    ckpter = CheckPoint(
        model=model,
        optimizer=optimizer,
        path='{}/ckpt/batch_hard_with_knn_exp'.format(ROOT_DIR),
        prefix=ckpt_prefix,
        interval=1,
        save_num=1)

    for epoch in range(1, embedding_epochs + 1):
        scheduler.step()
        train_loss, metrics = train_epoch(
            train_loader=train_batch_loader,
            model=model,
            loss_fn=loss_fn,
            optimizer=optimizer,
            log_interval=log_interval,
            metrics=[AverageNoneZeroTripletsMetric()])
        train_logs = dict()
        train_logs['loss'] = train_loss
        for metric in metrics:
            train_logs[metric.name()] = metric.value()
        train_hist.add(logs=train_logs, epoch=epoch)

        test_acc = kNN(model=model,
                       train_loader=train_batch_loader,
                       test_loader=test_batch_loader,
                       k=k)
        test_logs = {'acc': test_acc}
        val_hist.add(logs=test_logs, epoch=epoch)

        train_hist.clear()
        train_hist.plot()
        val_hist.plot()
        logging.info('Epoch{:04d}, {:15}, {}'.format(epoch, train_hist.name,
                                                     str(train_hist.recent)))
        logging.info('Epoch{:04d}, {:15}, {}'.format(epoch, val_hist.name,
                                                     str(val_hist.recent)))
        ckpter.check_on(epoch=epoch, monitor='acc', loss_acc=val_hist.recent)

    # train classifier using learned embeddings.
    classify_model = networks.classifier()
    classify_model = classify_model.cuda()
    classify_loss_fn = nn.CrossEntropyLoss()
    classify_optimizer = optim.Adam(classify_model.parameters(), lr=lr)
    classify_scheduler = lr_scheduler.StepLR(optimizer=classify_optimizer,
                                             step_size=30,
                                             gamma=0.5)
    classify_train_hist = History(name='classify_train/a')
    classify_val_hist = History(name='classify_val/a')
    classify_ckpter = CheckPoint(
        model=classify_model,
        optimizer=classify_optimizer,
        path='{}/ckpt/batch_hard_with_knn_exp'.format(ROOT_DIR),
        prefix=ckpt_prefix,
        interval=1,
        save_num=1)
    # reload best embedding model
    best_model_filename = Reporter(ckpt_root=os.path.join(ROOT_DIR, 'ckpt'),
                                   exp='batch_hard_with_knn_exp').select_best(
                                       run=ckpt_prefix).selected_ckpt
    model.load_state_dict(torch.load(best_model_filename)['model_state_dict'])

    train_embedding, train_labels = extract_embeddings(train_batch_loader,
                                                       model, 128)
    test_embedding, test_labels = extract_embeddings(test_batch_loader, model,
                                                     128)

    classify_train_dataset = DatasetWrapper(data=train_embedding,
                                            labels=train_labels,
                                            transform=ToTensor())
    classify_test_dataset = DatasetWrapper(data=test_embedding,
                                           labels=test_labels,
                                           transform=ToTensor())
    classify_train_loader = DataLoader(dataset=classify_train_dataset,
                                       batch_size=128,
                                       shuffle=True,
                                       num_workers=1)
    classify_test_loader = DataLoader(dataset=classify_test_dataset,
                                      batch_size=128,
                                      shuffle=False,
                                      num_workers=1)

    fit(train_loader=classify_train_loader,
        val_loader=classify_test_loader,
        model=classify_model,
        loss_fn=classify_loss_fn,
        optimizer=classify_optimizer,
        scheduler=classify_scheduler,
        n_epochs=classify_epochs,
        log_interval=log_interval,
        metrics=[AccumulatedAccuracyMetric()],
        train_hist=classify_train_hist,
        val_hist=classify_val_hist,
        ckpter=classify_ckpter,
        logging=logging)
Exemplo n.º 3
0
def hard_triplet_baseline_exp(device='3',
                              ckpt_prefix='Run01',
                              lr=1e-3,
                              n_epochs=300,
                              n_classes=10,
                              n_samples=12,
                              margin=0.3,
                              log_interval=50,
                              log_level="INFO"):
    """

    :param device:
    :param lr:
    :param n_epochs:
    :param n_classes:
    :param n_samples:
    :return:
    """
    kwargs = locals()
    log_file = '{}/ckpt/hard_triplet_baseline_exp/{}.log'.format(
        ROOT_DIR, ckpt_prefix)
    if not os.path.exists(os.path.dirname(log_file)):
        os.makedirs(os.path.dirname(log_file))
    logging.basicConfig(filename=log_file,
                        level=getattr(logging, log_level.upper(), None))
    logging.info(str(kwargs))

    os.environ['CUDA_VISIBLE_DEVICES'] = str(device)

    # get the mean and std of dataset train/a
    standarizer = TaskbStandarizer(data_manager=Dcase18TaskbData())
    mu, sigma = standarizer.load_mu_sigma(mode='train', device='a')

    # get the normalized train dataset
    train_dataset = DevSet(mode='train',
                           device='a',
                           transform=Compose(
                               [Normalize(mean=mu, std=sigma),
                                ToTensor()]))
    test_dataset = DevSet(mode='test',
                          device='a',
                          transform=Compose(
                              [Normalize(mean=mu, std=sigma),
                               ToTensor()]))

    train_batch_sampler = BalanceBatchSampler(dataset=train_dataset,
                                              n_classes=n_classes,
                                              n_samples=n_samples)
    train_batch_loader = DataLoader(dataset=train_dataset,
                                    batch_sampler=train_batch_sampler,
                                    num_workers=1)

    test_batch_sampler = BalanceBatchSampler(dataset=test_dataset,
                                             n_classes=n_classes,
                                             n_samples=n_samples)
    test_batch_loader = DataLoader(dataset=test_dataset,
                                   batch_sampler=test_batch_sampler,
                                   num_workers=1)

    model = networks.embedding_net_shallow()
    model = model.cuda()
    loss_fn = RandomHardTripletLoss(
        margin=margin,
        triplet_selector=RandomNegativeTripletSelector(margin=margin))
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = lr_scheduler.StepLR(optimizer=optimizer,
                                    step_size=30,
                                    gamma=0.5)

    fit(train_loader=train_batch_loader,
        val_loader=test_batch_loader,
        model=model,
        loss_fn=loss_fn,
        optimizer=optimizer,
        scheduler=scheduler,
        n_epochs=n_epochs,
        log_interval=log_interval,
        metrics=[AverageNoneZeroTripletsMetric()])

    verification(model=model)
    train_embedding_tl, train_labels_tl = extract_embeddings(
        train_batch_loader, model, 64)
    # utils.plot_embeddings(embeddings=train_embedding_tl, targets=train_labels_tl, title='train set')
    test_embedding_tl, test_labels_tl = extract_embeddings(
        test_batch_loader, model, 64)
    # utils.plot_embeddings(embeddings=test_embedding_tl, targets=test_labels_tl, title='test set')

    model2 = networks.classifier()
    model2 = model2.cuda()
    loss_fn2 = nn.CrossEntropyLoss()
    optimizer2 = optim.Adam(model2.parameters(), lr=lr)
    scheduler2 = lr_scheduler.StepLR(optimizer=optimizer2,
                                     step_size=30,
                                     gamma=0.5)
    train_dataset2 = DatasetWrapper(data=train_embedding_tl,
                                    labels=train_labels_tl,
                                    transform=ToTensor())
    test_dataset2 = DatasetWrapper(data=test_embedding_tl,
                                   labels=test_labels_tl,
                                   transform=ToTensor())
    train_loader2 = DataLoader(dataset=train_dataset2,
                               batch_size=128,
                               shuffle=True,
                               num_workers=1)
    test_loader2 = DataLoader(dataset=test_dataset2,
                              batch_size=128,
                              shuffle=False,
                              num_workers=1)

    train_hist = History(name='train/a')
    val_hist = History(name='val/a')
    ckpter = CheckPoint(
        model=model,
        optimizer=optimizer,
        path='{}/ckpt/hard_triplet_baseline_exp'.format(ROOT_DIR),
        prefix=ckpt_prefix,
        interval=1,
        save_num=1)
    fit(train_loader=train_loader2,
        val_loader=test_loader2,
        model=model2,
        loss_fn=loss_fn2,
        optimizer=optimizer2,
        scheduler=scheduler2,
        n_epochs=n_epochs,
        log_interval=log_interval,
        metrics=[AccumulatedAccuracyMetric()],
        train_hist=train_hist,
        val_hist=val_hist,
        ckpter=ckpter,
        logging=logging)
Exemplo n.º 4
0
def hard_triplet_baseline_exp(device='3',
                              lr=1e-3,
                              n_epochs=300,
                              n_classes=10,
                              n_samples=12,
                              margin=0.3,
                              log_interval=50):
    """

    :param device:
    :param lr:
    :param n_epochs:
    :param n_classes:
    :param n_samples:
    :return:
    """

    os.environ['CUDA_VISIBLE_DEVICES'] = str(device)

    # get the mean and std of dataset train/a
    standarizer = TaskbStandarizer(data_manager=Dcase18TaskbData())
    mu, sigma = standarizer.load_mu_sigma(mode='train', device='a')

    # get the normalized train dataset
    train_dataset = DevSet(mode='train',
                           device='a',
                           transform=Compose(
                               [Normalize(mean=mu, std=sigma),
                                ToTensor()]))
    test_dataset = DevSet(mode='test',
                          device='a',
                          transform=Compose(
                              [Normalize(mean=mu, std=sigma),
                               ToTensor()]))

    train_batch_sampler = BalanceBatchSampler(dataset=train_dataset,
                                              n_classes=n_classes,
                                              n_samples=n_samples)
    train_batch_loader = DataLoader(dataset=train_dataset,
                                    batch_sampler=train_batch_sampler,
                                    num_workers=1)

    test_batch_sampler = BalanceBatchSampler(dataset=test_dataset,
                                             n_classes=n_classes,
                                             n_samples=n_samples)
    test_batch_loader = DataLoader(dataset=test_dataset,
                                   batch_sampler=test_batch_sampler,
                                   num_workers=1)

    model = networks.embedding_net_shallow()
    model = model.cuda()
    loss_fn = OnlineTripletLoss(
        margin=margin,
        triplet_selector=utils.RandomNegativeTripletSelector(margin=margin))
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = lr_scheduler.StepLR(optimizer=optimizer,
                                    step_size=30,
                                    gamma=0.5)

    fit(train_loader=train_batch_loader,
        val_loader=test_batch_loader,
        model=model,
        loss_fn=loss_fn,
        optimizer=optimizer,
        scheduler=scheduler,
        n_epochs=n_epochs,
        log_interval=log_interval,
        metrics=[AverageNoneZeroTripletsMetric()])

    train_embedding_tl, train_labels_tl = utils.extract_embeddings(
        train_batch_loader, model)
    utils.plot_embeddings(train_embedding_tl, train_labels_tl)
    test_embedding_tl, test_labels_tl = utils.extract_embeddings(
        test_batch_loader, model)
    utils.plot_embeddings(test_embedding_tl, test_labels_tl)
def batch_all_total_with_knn_exp(device='0',
                                 ckpt_prefix='Run01',
                                 lr=1e-3,
                                 embedding_epochs=10,
                                 classify_epochs=100,
                                 n_classes=10,
                                 n_samples=12,
                                 batch_size=128,
                                 margin=0.3,
                                 log_interval=50,
                                 log_level="INFO",
                                 k=3,
                                 squared=False,
                                 embed_dims=64,
                                 embed_net='vgg'):
    """
    Using the entire data set, including device A, B, C.
    Using the batch all method to select the triplets, kNN as the verification
    Finally, using a linear classifier to classify the extraction into embeddings.
    :param device:
    :param lr:
    :param n_epochs:
    :param n_classes:
    :param n_samples:
    :param k: kNN parameter
    :return:
    """
    SEED = 0
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    np.random.seed(SEED)

    kwargs = locals()
    log_file = '{}/ckpt/batch_all_total_with_knn_exp/{}.log'.format(
        ROOT_DIR, ckpt_prefix)
    if not os.path.exists(os.path.dirname(log_file)):
        os.makedirs(os.path.dirname(log_file))
    logging.basicConfig(filename=log_file,
                        level=getattr(logging, log_level.upper(), None))
    logging.info(str(kwargs))

    os.environ['CUDA_VISIBLE_DEVICES'] = str(device)

    # get the mean and std of dataset train/a
    standarizer = TaskbStandarizer(data_manager=Dcase18TaskbData())
    mu, sigma = standarizer.load_mu_sigma(mode='train', device='abc')

    # get the normalized train dataset
    train_dataset = DevSet(mode='train',
                           device='abc',
                           transform=Compose(
                               [Normalize(mean=mu, std=sigma),
                                ToTensor()]))

    # get the normalized test dataset
    test_dataset = {}
    device_list = ['a', 'b', 'c', 'bc']
    for device in device_list:
        test_dataset[device] = DevSet(
            mode='test',
            device=device,
            transform=Compose([Normalize(mean=mu, std=sigma),
                               ToTensor()]))

    train_batch_sampler = BalanceBatchSampler(dataset=train_dataset,
                                              n_classes=n_classes,
                                              n_samples=n_samples)
    train_batch_loader = DataLoader(dataset=train_dataset,
                                    batch_sampler=train_batch_sampler,
                                    num_workers=1)

    test_loader = {}
    for device in device_list:
        test_loader[device] = DataLoader(dataset=test_dataset[device],
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=1)

    # network architecture
    if embed_net == 'vgg':
        model = networks.vggish_bn()
    elif embed_net == 'shallow':
        model = networks.embedding_net_shallow()
    else:
        print("{} network doesn't exist.".format(embed_net))
        return

    # to gpu
    model = model.cuda()
    loss_fn = BatchAllTripletLoss(margin=margin,
                                  squared=squared,
                                  soft_margin=False)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = lr_scheduler.StepLR(optimizer=optimizer,
                                    step_size=30,
                                    gamma=0.5)

    train_hist = History(name='train/a')
    val_hist = {}
    for device in device_list:
        val_hist[device] = History(name=('test/' + str(device)))

    # learning embedding checkpointer.
    ckpter = CheckPoint(
        model=model,
        optimizer=optimizer,
        path='{}/ckpt/batch_all_total_with_knn_exp'.format(ROOT_DIR),
        prefix=ckpt_prefix,
        interval=1,
        save_num=1)

    # training embedding network
    for epoch in range(1, embedding_epochs + 1):
        scheduler.step()
        train_loss, metrics = train_epoch(
            train_loader=train_batch_loader,
            model=model,
            loss_fn=loss_fn,
            optimizer=optimizer,
            log_interval=log_interval,
            metrics=[AverageNoneZeroTripletsMetric()])
        train_logs = {'loss': train_loss}
        for metric in metrics:
            train_logs[metric.name()] = metric.value()
        train_hist.add(logs=train_logs, epoch=epoch)

        for device in device_list:
            test_acc = kNN(model=model,
                           train_loader=train_batch_loader,
                           test_loader=test_loader[device],
                           k=k)
            test_logs = {'acc': test_acc}
            val_hist[device].add(logs=test_logs, epoch=epoch)

        train_hist.clear()
        train_hist.plot()
        logging.info('Epoch{:04d}, {:15}, {}'.format(epoch, train_hist.name,
                                                     str(train_hist.recent)))

        for device in device_list:
            val_hist[device].plot()
            logging.info('Epoch{:04d}, {:15}, {}'.format(
                epoch, val_hist[device].name, str(val_hist[device].recent)))

        ckpter.check_on(epoch=epoch,
                        monitor='acc',
                        loss_acc=val_hist['bc'].recent)

    # train classifier using learned embeddings.
    classify_model = networks.classifier()
    classify_model = classify_model.cuda()
    classify_loss_fn = nn.CrossEntropyLoss()
    classify_optimizer = optim.Adam(classify_model.parameters(), lr=lr)
    classify_scheduler = lr_scheduler.StepLR(optimizer=classify_optimizer,
                                             step_size=30,
                                             gamma=0.5)
    classify_train_hist = History(name='classify_train/a')
    classify_val_hist = {}
    for device in device_list:
        classify_val_hist[device] = History(name=('classify_val/' +
                                                  str(device)))
    classify_ckpter = CheckPoint(
        model=classify_model,
        optimizer=classify_optimizer,
        path='{}/ckpt/batch_all_total_with_knn_exp'.format(ROOT_DIR),
        prefix=ckpt_prefix,
        interval=1,
        save_num=1)
    # reload best embedding model
    best_model_filename = Reporter(
        ckpt_root=os.path.join(ROOT_DIR, 'ckpt'),
        exp='batch_all_total_with_knn_exp').select_best(
            run=ckpt_prefix).selected_ckpt
    model.load_state_dict(torch.load(best_model_filename)['model_state_dict'])

    # learned best embeddings
    train_embedding, train_labels = extract_embeddings(train_batch_loader,
                                                       model, embed_dims)
    test_embedding, test_labels = {}, {}
    for device in device_list:
        test_embedding[device], test_labels[device] = extract_embeddings(
            test_loader[device], model, embed_dims)

    # wrap embeddings(numpy) to Dataset
    classify_train_dataset = DatasetWrapper(data=train_embedding,
                                            labels=train_labels,
                                            transform=ToTensor())
    classify_test_dataset = {}
    for device in device_list:
        classify_test_dataset[device] = DatasetWrapper(
            data=test_embedding[device],
            labels=test_labels[device],
            transform=ToTensor())
    classify_train_loader = DataLoader(dataset=classify_train_dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=1)
    classify_test_loader = {}
    for device in device_list:
        classify_test_loader[device] = DataLoader(
            dataset=classify_test_dataset[device],
            batch_size=batch_size,
            shuffle=False,
            num_workers=1)

    fit(train_loader=classify_train_loader,
        val_loader=classify_test_loader,
        model=classify_model,
        loss_fn=classify_loss_fn,
        optimizer=classify_optimizer,
        scheduler=classify_scheduler,
        n_epochs=classify_epochs,
        log_interval=log_interval,
        metrics=[AccumulatedAccuracyMetric()],
        train_hist=classify_train_hist,
        val_hist=classify_val_hist,
        ckpter=classify_ckpter,
        logging=logging)
Exemplo n.º 6
0
                                                   embed_dims)
        train_embedding = np.concatenate(
            (embedding_A, embedding_B, embedding_C))
        train_labels = np.concatenate((labels_A, labels_B, labels_C))
        test_embedding, test_labels = extract_embeddings(
            test_loader, model, embed_dims)
    else:
        train_embedding, train_labels = extract_embeddings(
            train_loader, model, embed_dims)
        test_embedding, test_labels = extract_embeddings(
            test_loader, model, embed_dims)

    distance_matrix = get_distance_matrix2(test_embedding, train_embedding)
    sorted_index = np.argsort(distance_matrix, axis=1)
    predict_labels = []
    for i in range(len(test_embedding)):
        class_cnt = np.zeros([cls_num])
        k_neighbor = train_labels[sorted_index[i]]
        for j in range(k):
            class_cnt[int(k_neighbor[j])] += 1
        predict_labels.append(np.argmax(class_cnt))
    predict_labels = np.array(predict_labels)
    # test_acc = (test_labels == predict_labels).sum() / len(test_labels)
    test_acc = np.mean(test_labels == predict_labels)
    return test_acc


if __name__ == '__main__':
    model = networks.embedding_net_shallow()
    model = model.cuda()
    verification(model=model)