示例#1
0
    def __init__(self, root, segs_root, split='train'):
        with open('data/' + split + '.pkl', 'rb') as f:
            self.data = pickle.load(f)

        self.root_dir = root
        self.mat_dir = segs_root + '/' + split + '_mat'  # e.g.'/home/mi/RelationalReasoning/CLEVR_seg/images/train_mat_full_single'
        self.apps_dir = segs_root + '/' + split + '_apps_single'  # e.g.'/home/mi/RelationalReasoning/CLEVR_seg/images/train_apps_single'

        self.data
        self.transform0 = transforms.Compose([
            Scale([128, 128]),
            transforms.Pad(4),
            transforms.RandomCrop([128, 128]),
            transforms.ToTensor(),
        ])

        self.transform1 = transforms.Compose([
            Scale([128, 128]),
            transforms.Pad(4),
            transforms.RandomCrop([128, 128]),
            #    transforms.ToTensor(),
            #    transforms.Normalize(mean=[0.5, 0.5, 0.5],
            #                        std=[0.5, 0.5, 0.5])
        ])

        self.transform2 = transforms.Compose([
            transforms.ToTensor(),
        ])
        self.if_aug = (split == 'train')

        self.transform_app = transforms.Compose([
            #                                Scale([128, 128]),
            transforms.ToTensor(),
        ])
        self.split = split
示例#2
0
    def __init__(self, root, split='train'):

        with open(os.path.join(root, split + '.pkl'), 'rb') as f:
            self.data = pickle.load(f)

        self.root_dir = root
        self.img_dir = os.path.join(root, 'smallnorb_' + split)
        self.transform = transforms.Compose([
            # transforms.Scale(48, 48),
            Scale([48, 48]),
            transforms.CenterCrop([32, 32]),
            transforms.ToTensor()
        ])
        self.transform_aug = transforms.Compose([
            # transforms.Scale(48, 48),
            Scale([48, 48]),
            transforms.Pad(1),
            transforms.CenterCrop([32, 32]),
            transforms.RandomRotation(5),
            transforms.ColorJitter(brightness=0.1, contrast=0.1),
            transforms.ToTensor()
        ])

        self.if_aug = (split == 'train')

        self.split = split
示例#3
0
 def _init_transform(self):
     if self.thetas and not self.scales:
         return Rotate(self.theta_range_1, self.theta_range_2)
     if self.scales and not self.thetas:
         return Scale(self.scale_range_1, self.scale_range_2)
     if self.scales and self.thetas:
         return ScaleRotate(self.scale_range_1, self.scale_range_2,
                            self.theta_range_1, self.theta_range_2)
     else:
         return transforms.ToTensor()
示例#4
0
    def __init__(self, g_conv_dim, z_size, d_conv_dim):
        super(Net, self).__init__()

        self.z_size = z_size

        self.generator = Generator(z_size, g_conv_dim)
        self.discriminator = Discriminator(d_conv_dim)

        self.criterion = nn.BCEWithLogitsLoss()

        self.scale = Scale()

        self.samples = []
def test_augment(img, mask=None, model='scale'):
    if model == 'scale':
        return DualCompose([
            Scale(size=128),
            ImageOnly(
                Normalize(mean=(0.485, 0.456, 0.406),
                          std=(0.229, 0.224, 0.225)))
        ])(img, mask=None)
    else:
        return DualCompose([
            Centerpad(size=(128, 128)),
            ImageOnly(
                Normalize(mean=(0.485, 0.456, 0.406),
                          std=(0.229, 0.224, 0.225)))
        ])(img, mask=None)
def train_augment(img,mask,prob=0.5):
    return DualCompose([HorizontalFlip(prob=0.5),ShiftScale(limit=4,prob=0.5),
                        OneOf([#ImageOnly(CLAHE(clipLimit=2.0, tileGridSize=(8, 8))),
                               ImageOnly(Brightness_shift(limit=0.1)),
                               ImageOnly(do_Gamma(limit=0.08)),                              
                               ImageOnly(Brightness_multiply(limit=0.08)),
                               ],prob=0.5),
                        ImageOnly(Median_blur( ksize=3, prob=.15)),
                        Scale(size=pad),
                        #Centerpad(size=(pad,pad)),
                        ImageOnly(Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)))
                        #OneOf([Scale(size=128),Randompadding(size=(128,128))],prob=1),    
                        
                        #RandomErasing(probability = 0.22, sl = 0.02, sh = 0.2, r1 = 0.2, mean=[0.4914, 0.4822, 0.4465])
                        ])(img,mask)
示例#7
0
 def __init__(self, root=""):
     self.detected_file = os.path.join(root, self.detected_file_name)
     self.annotations = get_classes(os.path.join(root,
                                                 self.label_file_name))
     self._root_dir = root
     self.item_dict = {
         ItemEnum.IMAGE_PATH: [],
         ItemEnum.BOX_COORDS: [],
         ItemEnum.LABEL: []
     }
     self.load_label_file()
     self.transform = Compose([
         Scale(0.1),
         Rotate(),
     ])
示例#8
0
def view_detection_dataset(loader, annotations):
    transform = Compose([
        Scale(0.2),
        Rotate()
    ])
    for img, bbox, label in tqdm(loader):
        img = img.permute(1, 2, 0).cpu().numpy()
        bbox = bbox.cpu().numpy()
        label = label.cpu().numpy()
        img, bbox = transform(img, bbox)
        for i in range(bbox.shape[0]):
            img = cv2.rectangle(img, (int(bbox[i, 0]), int(
                bbox[i, 1])), (int(bbox[i, 2]), int(bbox[i, 3])), (255, 0, 0), 2)
            print(bbox.shape)
            cv2.putText(img, get_class_name_from_id(annotations, label[i]), (
                int(bbox[i, 0]), int(bbox[i, 1]-20)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (100, 255, 100), 2)

        plt.figure(dpi=120)
        plt.imshow(img, cmap="jet")
        plt.waitforbuttonpress(0)
        plt.close()
        id = int(imgfile.rsplit('_', 1)[1][:-4])
        img = torch.from_numpy(self.img[id])

        if self.split == "train":
            c_mask = torch.from_numpy(self.constraints[index])
        else:
            c_mask = None

        return img, question, len(question), answer, family, c_mask

    def __len__(self):
        return len(self.data)


transform = transforms.Compose([
    Scale([224, 224]),
    transforms.Pad(4),
    transforms.RandomCrop([224, 224]),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])


def collate_data(batch):
    images, lengths, answers, families, c_masks = [], [], [], [], []
    batch_size = len(batch)

    max_len = max(map(lambda x: len(x[1]), batch))

    questions = np.zeros((batch_size, max_len), dtype=np.int64)
    sort_by_len = sorted(batch, key=lambda x: len(x[1]), reverse=True)
示例#10
0
文件: cli.py 项目: mehdidc/wavae
def train(*,
          data_folder='data',
          nb=None,
          lr=1e-3,
          model_name='VAE_CPPN',
          batch_size=32,
          epochs=1000,
          input_dim=1,
          depth=8,
          max_len=500,
          log_interval=50,
          latent_size=10,
          ensemble_dim=1,
          cuda=False):
    mod = getattr(model, model_name)
    vae = mod(
        latent_size=latent_size,
        output_dim=max_len,
        ensemble_dim=ensemble_dim,
        depth=depth,
    )
    if cuda:
        vae = vae.cuda()
    optimizer = optim.Adam(
        vae.parameters(),
        lr=lr,
    )
    vae.train()
    epoch_start = 1
    transform = Compose([
        PadTrim(max_len=max_len),
        Scale(),
    ])
    if nb:
        nb = int(nb)
    dataset = Dataset(data_folder, transform=transform, nb=nb)
    print(len(dataset))
    dataloader = DataLoader(dataset, batch_size=batch_size)
    nb_iter = 0
    for epoch in range(epoch_start, epochs + 1):
        for batch_index, data in enumerate(dataloader):
            x = data
            x = x.cuda() if cuda else x
            vae.zero_grad()
            xrec, mu, logvar = vae(x)
            loss = vae.loss_function(x, xrec, mu, logvar)
            loss.backward()
            optimizer.step()
            if nb_iter % log_interval == 0:
                print(f'niter: {nb_iter:05d} loss: {loss.item():.4f}')
                x = x.detach().cpu().numpy()
                xrec = xrec.detach().cpu().numpy()
                signal = x[0:3, 0].T
                fake_signal = xrec[0:3, 0].T
                for i in range(len(xrec)):
                    s = xrec[i, 0]
                    wavfile.write(f'out/fake_{i:03d}.wav', 16000, s)
                for i in range(len(x)):
                    s = x[i, 0]
                    wavfile.write(f'out/real_{i:03d}.wav', 16000, s)
                fig = plt.figure(figsize=(50, 10))
                plt.plot(signal, color='blue', label='true')
                plt.plot(fake_signal, color='orange', label='fake')
                #plt.legend()
                plt.savefig('out.png')
                plt.close(fig)
            nb_iter += 1
示例#11
0
import os
import sys
import json
import pickle as pkl
import constraints
from typing import List, Type, Dict
from transforms import Scale

import torch
import h5py
from PIL import Image
from tqdm import tqdm

from torchvision import transforms

transform = transforms.Compose([Scale([224, 224]), transforms.ToTensor()])

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


def parse_scene_into_dict(scene_file_path: str,
                          split: str = "train") -> Dict[str, Dict]:

    scene_dict = {}

    with open(scene_file_path) as fin:
        scene_data = json.load(fin)

    for scene in scene_data["scenes"]:
        _img_fname = "CLEVR_{}_{}.png".format(
            split,
示例#12
0
def train(*,
          data_folder='data',
          nb=None,
          lr=1e-04,
          weight_decay=1e-04,
          beta1=0.5,
          beta2=.999,
          batch_size=32,
          epochs=1000,
          input_dim=1,
          max_len=500,
          log_interval=50,
          cppn=True,
          cuda=False):
    # define the optimizers.
    if cppn:
        output_dim = 1
    else:
        output_dim = max_len
    generator = Generator(input_dim=input_dim, output_dim=output_dim)
    discriminator = Discriminator(input_dim=1,
                                  output_dim=1,
                                  input_size=max_len)
    if cuda:
        discriminator = discriminator.cuda()
        generator = generator.cuda()
    generator_optimizer = optim.Adam(generator.parameters(),
                                     lr=lr,
                                     betas=(beta1, beta2),
                                     weight_decay=weight_decay)
    discriminator_optimizer = optim.Adam(discriminator.parameters(),
                                         lr=lr,
                                         betas=(beta1, beta2),
                                         weight_decay=weight_decay)

    # prepare the model and statistics.
    generator.train()
    discriminator.train()
    epoch_start = 1
    transform = Compose([
        PadTrim(max_len=max_len),
        Scale(),
    ])
    if nb:
        nb = int(nb)
    dataset = Dataset(data_folder, transform=transform, nb=nb)
    print(len(dataset))
    dataloader = DataLoader(dataset, batch_size=batch_size)
    nb_iter = 0
    for epoch in range(epoch_start, epochs + 1):
        for batch_index, data in enumerate(dataloader):
            for p in discriminator.parameters():
                p.data.clamp_(-0.1, 0.1)
            x = data
            x = x.cuda() if cuda else x
            discriminator.zero_grad()
            dreal = discriminator(x).mean()

            l = torch.randn(batch_size, generator.input_dim - 1)
            t = torch.linspace(-1, 1, discriminator.input_size)

            if cppn:
                z = torch.zeros(batch_size, discriminator.input_size,
                                generator.input_dim)
                if cuda:
                    z = z.cuda()
                z[:, :, 1:] = l.view(l.size(0), 1,
                                     l.size(1)).expand(l.size(0), z.size(1),
                                                       l.size(1))
                z[:, :, 0] = t
                z = z.contiguous()
                z_ = z.view(z.size(0) * z.size(1), -1)
                z_ = z_.contiguous()
                xfake = generator(z_)
                xfake = xfake.view(z.size(0), 1, z.size(1))
            else:
                z = torch.randn(batch_size, generator.input_dim)
                if cuda:
                    z = z.cuda()
                xfake = generator(z)
                xfake = xfake.view(xfake.size(0), 1, xfake.size(1))
            if nb_iter % 2 == 0:
                dfake = discriminator(xfake).mean()
                discr_loss = dfake - dreal
                discr_loss.backward(retain_graph=True)
                discriminator_optimizer.step()

                generator.zero_grad()
                dfake = discriminator(xfake).mean()
                gen_loss = -dfake
                gen_loss.backward()
                generator_optimizer.step()
            if nb_iter % log_interval == 0:
                print(
                    f'niter: {nb_iter:05d} gen_loss: {gen_loss.item():.4f} discr_loss: {discr_loss.item():.4f}'
                )
                x = x.detach().cpu().numpy()
                xfake = xfake.detach().cpu().numpy()
                signal = x[0:3, 0].T
                fake_signal = xfake[0:3, 0].T
                for i in range(len(xfake)):
                    s = xfake[i, 0]
                    wavfile.write(f'out/fake_{i:03d}.wav', 16000, s)
                for i in range(len(x)):
                    s = x[i, 0]
                    wavfile.write(f'out/real_{i:03d}.wav', 16000, s)
                fig = plt.figure(figsize=(50, 10))
                plt.plot(signal, color='blue', label='true')
                plt.plot(fake_signal, color='orange', label='fake')
                #plt.legend()
                plt.savefig('out.png')
                plt.close(fig)
            nb_iter += 1
示例#13
0
    def __getitem__(self, index):
        imgfile, question, answer, family = self.data[index]
        #        print (imgfile)
        dir_path = os.path.join(self.apps_dir, imgfile[0:len(imgfile) - 4])
        mat_path = os.path.join(self.mat_dir,
                                imgfile[0:len(imgfile) - 4] + '.mat')
        mat = sio.loadmat(mat_path)
        num_layers = mat['num_layers'] - 1  # no background

        #        print (int(num_layers))

        #        img = Image.open(os.path.join(self.root, 'images', self.split, imgfile)).convert('RGB')

        masks = np.zeros((MX_N, 32, 32))
        masks_part = mat['masks']
        masks[:int(num_layers)] = masks_part[1:]

        #        print (masks_part)
        apps = np.zeros((MX_N, 128, 128, 3), dtype=np.uint8)
        for l in range(1, int(num_layers) + 1):
            app_path = os.path.join(
                dir_path, imgfile[0:len(imgfile) - 4] + '_' + str(l) + '.png')
            app_img = imread(app_path)
            #            app_img = Image.open(app_path).convert('RGB')
            #            app_img = self.transform_app(app_img)
            #            apps.append(app_img)
            app_img = imresize(app_img, [128, 128])
            apps[l - 1] = app_img[:, :, 0:3]
#            print (app_path)

##            imshow
#            import matplotlib.pyplot as plt
#            fig = plt.figure()
#            ax = fig.add_subplot(111)
#            ax.imshow(app_img)
#            plt.show()

#        print (apps)

        coors = np.zeros((MX_N, 2))
        coors_part = mat['coors']
        coors[:int(num_layers)] = coors_part[1:int(num_layers) + 1]
        #        sizes = np.zeros((MX_N))
        #        sizes_part = mat['sizes']
        #        sizes[:int(num_layers)] = sizes_part[:int(num_layers)]
        #        apps_tensor = torch.FloatTensor(apps.asdtype(float))

        #        print (img)

        if self.if_aug:
            #            img = self.transform1(img)
            #            angle = random.random()*2.8648*2 - 2.8648 # -0.05-0.05
            #            img = img.rotate(angle, resample=Image.BILINEAR)
            ##            print (img)
            #            img = self.transform2(img)

            apps_tensor = []
            for l in range(int(num_layers)):
                #                print (apps[l], 'apps[l]')
                transform_tmp = transforms.Compose([
                    transforms.ToPILImage(),
                    Scale([32, 32]),
                    transforms.Pad(1),
                    transforms.RandomCrop([32, 32]),
                ])
                apps_tmp = transform_tmp(apps[l])

                angle = random.random() * 2.8648 * 2 - 2.8648  # -0.05-0.05
                apps_tmp = apps_tmp.rotate(angle, resample=Image.BILINEAR)
                #                print (apps_tmp, 'apps_tmp') # PIL.Image.Image image mode=RGB size=128x128
                apps_tmp = self.transform2(apps_tmp)

                #                print (apps_tmp)
                apps_tensor.append(apps_tmp)

            apps_tensor = torch.stack(apps_tensor)

        else:
            #            img = self.transform0(img)
            apps_tensor = []
            for l in range(int(num_layers)):
                #                print (apps[l], 'apps[l]')
                transform_tmp = transforms.Compose([
                    transforms.ToPILImage(),
                    Scale([32, 32]),
                    transforms.ToTensor(),
                ])
                apps_tmp = transform_tmp(apps[l])
                apps_tensor.append(apps_tmp)
            apps_tensor = torch.stack(apps_tensor)

#        print (apps_tensor)
        apps_tensor_pad = torch.FloatTensor(np.zeros((MX_N, 3, 32, 32)))
        apps_tensor_pad[:int(num_layers)].copy_(apps_tensor)

        #        print (apps_tensor_pad) # 23*3*32*32
        #        print (img) # 3*128*128

        coors = torch.FloatTensor(coors)
        masks_tensor_pad = torch.FloatTensor(masks)
        #        sizes = torch.FloatTensor(sizes)

        return apps_tensor_pad, masks_tensor_pad, int(
            num_layers), question, len(question), answer, family
def valid_augment(img,mask):
    return DualCompose([Scale(size=pad),ImageOnly(Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)))])(img, mask)
示例#15
0
文件: data.py 项目: mehdidc/wavae
class Dataset:
    def __init__(self, folder, transform=None, nb=None):
        self.folder = folder
        self.classes = os.listdir(folder)
        self.filenames = glob(os.path.join(folder, '**', '*.wav'))
        if nb:
            self.filenames = self.filenames[0:nb]
        self.transform = transform

    def __getitem__(self, idx):
        sample_rate, signal = wavfile.read(self.filenames[idx])
        signal = signal.copy()
        signal = torch.from_numpy(signal)
        signal = signal.view(1, -1)
        if self.transform:
            signal = self.transform(signal)
        return signal

    def __len__(self):
        return len(self.filenames)


if __name__ == '__main__':
    transform = Compose([
        Scale(),
        PadTrim(max_len=16000),
    ])
    dataset = Dataset('data', transform=transform)
    print(dataset[0].size())
    dataloader = DataLoader(dataset, batch_size=32)