Пример #1
0
def dataSetStatistics(data_dir, batch_size, num_data):
    # Detect if we have a GPU available
    # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # print('Current device: '+str(device))

    transform = transforms.Compose([transforms.ToTensor()])
    # img_list = [f for f in listdir(data_dir) if isfile(join(data_dir, f))]

    img_list = []
    for item in listdir(
            data_dir
    ):  # /var/scratch/jfeins1/resnet-binary/fold0/train/    item= 1 or 3
        if isfile(join(data_dir, item)
                  ):  # /var/scratch/jfeins1/resnet-binary/fold0/train/1/ FALSE
            img_list.append(item)
        elif isdir(join(data_dir, item)
                   ):  # /var/scratch/jfeins1/resnet-binary/fold0/train/1/ TRUE
            update_data_dir = join(data_dir, item)
            for f in listdir(
                    update_data_dir
            ):  # /var/scratch/jfeins1/resnet-binary/fold0/train/1/    f= 5iune00 or 3ir5a00
                if isfile(
                        join(update_data_dir, f)
                ):  # /var/scratch/jfeins1/resnet-binary/fold0/train/1/5iune00 FALSE
                    img_list.append(item + '/' + f)
                elif isdir(
                        join(update_data_dir, f)
                ):  # /var/scratch/jfeins1/resnet-binary/fold0/train/1/5iune00 TRUE
                    deeper_data_dir = join(
                        update_data_dir, f
                    )  # deeper = /var/scratch/jfeins1/resnet-binary/fold0/train/1/5iune00
                    for y in listdir(deeper_data_dir):
                        if isfile(join(deeper_data_dir, y)):
                            img_list.append(item + '/' + f + '/' + y)

    dataset = UnsuperviseDataset(data_dir, img_list, transform=transform)
    total = dataset.__len__()
    print('length of entire dataset:', total)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=16)

    # calculate mean and std for training data
    mean = 0.
    std = 0.
    m = 0
    for data, _ in dataloader:
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1)  # reshape
        mean = mean + data.mean(2).sum(0)
        std = std + data.std(2).sum(0)
        m = m + batch_samples
        if m > num_data:
            break
    mean = mean / m
    std = std / m
    #print('mean:',mean)
    #print('std:',std)
    return mean, std
def dataSetStatistics(data_dir, batch_size, num_data):
    # Detect if we have a GPU available
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print('Current device: '+str(device))

    transform = transforms.Compose([transforms.ToTensor()])
    img_list = [f for f in listdir(data_dir) if isfile(join(data_dir, f))]
    dataset = UnsuperviseDataset(data_dir, img_list, transform=transform)  
    total = dataset.__len__()
    print('length of entire dataset:', total)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=16)

    # calculate mean and std for training data
    mean = 0.
    std = 0.
    m = 0
    for data, _ in dataloader:
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1) # reshape
        mean = mean + data.mean(2).sum(0)
        std = std + data.std(2).sum(0)
        m = m + batch_samples
        if m > num_data:
            break
    mean = mean / m
    std = std / m
    print('mean:',mean)
    print('std:',std)
    return mean, std
Пример #3
0
def feature_vec_gen_bionoi(device, model, src_dir, feature_dir, classes,
                           normalize):
    """
    Generate feature vectors for bionoi images for 10-fold cross-validation
    """
    # data configuration
    data_mean = [0.6150, 0.4381, 0.6450]
    data_std = [0.6150, 0.4381, 0.6450]
    if normalize == True:
        print('normalizing data:')
        print('mean:', data_mean)
        print('std:', data_std)
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((data_mean[0], data_mean[1], data_mean[2]),
                                 (data_std[0], data_std[1], data_std[2]))
        ])
    else:
        transform = transforms.Compose([transforms.ToTensor()])

    # generating features for each folder
    for i in range(10):
        k = i + 1
        for task in ('train/', 'val/'):
            for type in classes:
                src_dir_sub = src_dir + 'cv' + str(k) + '/' + task + type + '/'
                feature_dir_sub = feature_dir + 'cv' + str(
                    k) + '/' + task + type + '/'
                if not os.path.exists(feature_dir_sub):
                    os.makedirs(feature_dir_sub)
                print('generating features from:', src_dir_sub)
                print('generated features stored at:', feature_dir_sub)
                img_list = [f for f in listdir(src_dir_sub)
                            ]  # put images into dataset
                dataset = UnsuperviseDataset(src_dir_sub,
                                             img_list,
                                             transform=transform)
                feature_vec_gen(device, model, dataset, feature_dir_sub)
Пример #4
0
img_list = []
for item in listdir(data_dir):
    if isfile(join(data_dir, item)):
        img_list.append(item)
    elif isdir(join(data_dir, item)):
        update_data_dir = join(data_dir, item)
        for f in listdir( update_data_dir):
            if isfile(join(update_data_dir, f)):
                img_list.append(item + '/' + f)
            elif isdir(join(update_data_dir, f)):
                deeper_data_dir = join(update_data_dir, f)
                for y in listdir(deeper_data_dir):
                    if isfile(join(deeper_data_dir, y)):
                        img_list.append(item + '/' + f + '/' + y)

dataset = UnsuperviseDataset(data_dir, img_list, transform=transform)


# Instantiate and load model
if style == 'conv':
    model = ConvAutoencoder()
elif style == 'dense':
    model = DenseAutoencoder(input_size, feature_size)
elif style == 'conv_dense_out':
    model = ConvAutoencoder_dense_out(feature_size)
elif style == 'conv_1x1':
    model = ConvAutoencoder_conv1x1()
elif style == 'conv_1x1_test':
    model = ConvAutoencoder_conv1x1_layertest()
elif style == 'conv_deeper':
    model = ConvAutoencoder_deeper1()
Пример #5
0
    img_list = []
    for item in listdir(data_dir):
        if isfile(join(data_dir, item)):
            img_list.append(item)
        elif isdir(join(data_dir, item)):
            update_data_dir = join(data_dir, item)
            for f in listdir(update_data_dir):
                if isfile(join(update_data_dir, f)):
                    img_list.append(item + '/' + f)
                elif isdir(join(update_data_dir, f)):
                    deeper_data_dir = join(update_data_dir, f)
                    for y in listdir(deeper_data_dir):
                        if isfile(join(deeper_data_dir, y)):
                            img_list.append(item + '/' + f + '/' + y)

    dataset = UnsuperviseDataset(data_dir, img_list, transform=transform)

    if style == 'conv_1x1':

        # create dataloader
        dataloader = torch.utils.data.DataLoader(dataset,
                                                 batch_size=batch_size,
                                                 shuffle=True,
                                                 num_workers=32)

        # get some random training images to show
        #dataiter = iter(dataloader)
        #images, filename = dataiter.next()

        # print(images.shape)
        # imshow(torchvision.utils.make_grid(images))