示例#1
0
def fraction_of_datasets(datasets, fraction, attackers_idx=[]):
    """Extract a fraction of data from each dataset and return 
    the aggregated data as a FLCustomDataset.

    Args:
        datasets (dict[FLCustomDataset]): 
        fraction (float): Fraction between 0.0 and 1.0

    Returns:
        [FLCustomDataset]:
    """    
    logging.info("Extracting {}% of users data (total: {}) to be sent to the server...".format(
        fraction * 100.0, int(fraction * len(datasets) * len(list(datasets.values())[0].targets))))
    images, labels = [], []
    for ww_id, dataset in datasets.items():
        idx = torch.randperm(len(dataset.targets))[:int(fraction * len(dataset.targets))]
        if ww_id in attackers_idx:
            images.append(
                (dataset.data[idx.tolist()] +
                np.random.randint(0, 1024, (len(idx),28,28))).byte()
            )
        else:
            images.append(dataset.data[idx.tolist()])
        labels.append(dataset.targets[idx.tolist()])
    aggregate_dataset = FLCustomDataset(
        torch.cat(images), torch.cat(labels),
        transform=transforms.Compose([
            transforms.ToTensor()])
    )
    logging.info("Extracted... Ok, The size of the extracted data: {}".format(
        aggregate_dataset.data.shape))
    return aggregate_dataset
示例#2
0
def create_label_mapping(datasets):
    labels = set()
    for _, ds in datasets.items():
        labels.update(ds["func"].unique())
    mapping = {}
    indx = 1
    for x in labels:
        mapping[x] = indx
        indx += 1
    return mapping
示例#3
0
 def _create_split_schemes(self, datasets):
     output = {}
     for transform_type, v1 in datasets.items():
         output[transform_type] = {}
         for split_name, v2 in v1.items():
             indices = v2.get_split_indices(split_name)
             if indices is not None:
                 output[transform_type][
                     split_name] = torch.utils.data.Subset(v2, indices)
             else:
                 output[transform_type][split_name] = v2
     return {self.get_split_scheme_name(0): output}
示例#4
0
def get_dataloaders(datasets, batch_size=None, shuffle=None):
    batch_size, shuffle = _parse_batch_size_shuffle(batch_size, shuffle)
    assert isinstance(
        datasets,
        dict), f"Expected datasets to be a dict but found {type(datasets)}"

    loaders = {
        phase: DataLoader(dataset,
                          batch_size=batch_size[phase],
                          shuffle=shuffle[phase])
        for phase, dataset in datasets.items()
    }
    return loaders
示例#5
0
#%%
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import models
# %%
dataset = models.GardenData(dat, device="cuda")
train_len = int(len(dataset) * 0.9)
d = torch.utils.data.random_split(
    dataset, [train_len, len(dataset) - train_len])
datasets = {'train': d[0], 'valid': d[1]}
dataloaders = {
    phase: torch.utils.data.DataLoader(dataset=ds,
                                       batch_size=32,
                                       num_workers=0,
                                       shuffle=True)
    for phase, ds in datasets.items()
}
dataset_sizes = {name: len(dl.dataset) for name, dl in dataloaders.items()}

logging.info("Dataset sizes:")
logging.info(dataset_sizes)

#depth = dataset.__getitem__(2)['depth_frame']

#%%
from torch import nn
model = models.ConvNet()
model = model.to(device)
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
示例#6
0
def count_labels(datasets):
    labels = set()
    for _, ds in datasets.items():
        labels.update(ds["func"].unique())
    return len(labels)
示例#7
0
def siam_vsone_train():
    r"""
    CommandLine:
        python -m clab.live.siam_train siam_vsone_train --dry
        python -m clab.live.siam_train siam_vsone_train
        python -m clab.live.siam_train siam_vsone_train --db PZ_Master1
        python -m clab.live.siam_train siam_vsone_train --db PZ_MTEST --dry
        python -m clab.live.siam_train siam_vsone_train --db PZ_MTEST
        python -m clab.live.siam_train siam_vsone_train --db RotanTurtles

        python -m clab.live.siam_train siam_vsone_train --db humpbacks_fb

    Example:
        >>> # DISABLE_DOCTEST
        >>> from clab.live.siam_train import *  # NOQA
        >>> siam_vsone_train()
    """
    dbname = ub.argval('--db', default='PZ_MTEST')
    # train_dataset, vali_dataset, test_dataset = ibeis_datasets('GZ_Master')
    dim = 512
    datasets = randomized_ibeis_dset(dbname, dim=dim)
    workdir = ub.ensuredir(
        os.path.expanduser('~/data/work/siam-ibeis2/' + dbname))
    print('workdir = {!r}'.format(workdir))

    # train_dataset, vali_dataset, test_dataset = att_faces_datasets()
    # workdir = os.path.expanduser('~/data/work/siam-att')
    for k, v in datasets.items():
        print('* len({}) = {}'.format(k, len(v)))

    batch_size = 6

    print('batch_size = {!r}'.format(batch_size))
    # class_weights = train_dataset.class_weights()
    # print('class_weights = {!r}'.format(class_weights))

    pretrained = 'resnet50'
    # pretrained = 'resnet50'
    branch = getattr(torchvision.models, pretrained)(pretrained=True)
    model = models.SiameseLP(p=2, branch=branch, input_shape=(1, 3, dim, dim))
    print(model)

    hyper = hyperparams.HyperParams(criterion=(criterions.ContrastiveLoss, {
        'margin': 4,
        'weight': None,
    }),
                                    optimizer=(torch.optim.SGD, {
                                        'weight_decay': .0005,
                                        'momentum': 0.9,
                                        'nesterov': True,
                                    }),
                                    scheduler=('Exponential', {
                                        'gamma': 0.99,
                                        'base_lr': 0.001,
                                        'stepsize': 2,
                                    }),
                                    other={
                                        'n_classes': 2,
                                        'augment': datasets['train'].augment,
                                    })

    def custom_metrics(harn, output, label):
        from .torch import metrics
        metrics_dict = metrics._siamese_metrics(output,
                                                label,
                                                margin=harn.criterion.margin)
        return metrics_dict

    dry = ub.argflag('--dry')
    from .torch.sseg_train import directory_structure
    train_dpath, test_dpath = directory_structure(
        workdir,
        model.__class__.__name__,
        datasets,
        pretrained=pretrained,
        train_hyper_id=hyper.hyper_id(),
        suffix='_' + hyper.other_id())

    xpu = xpu_device.XPU.from_argv()
    harn = fit_harness.FitHarness(
        model=model,
        hyper=hyper,
        datasets=datasets,
        xpu=xpu,
        batch_size=batch_size,
        train_dpath=train_dpath,
        dry=dry,
    )
    harn.add_metric_hook(custom_metrics)
    harn.run()