Пример #1
0
def main(args):
    # Image preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load vocabulary wrapper
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build Models
    # encoder = EncoderCNN(args.embed_size)
    # encoder.eval()  # evaluation mode (BN uses moving mean/variance)
    # decoder = DecoderRNN(args.embed_size, args.hidden_size,
    #                     len(vocab), args.num_layers)
    generator = Generator(args.embed_size, args.hidden_size, len(vocab),
                          args.num_layers)
    generator.encoder.eval()
    # Load the trained model parameters
    # encoder.load_state_dict(torch.load(args.encoder_path))
    # decoder.load_state_dict(torch.load(args.decoder_path))
    generator.load_state_dict(
        torch.load(args.gen_path, map_location=lambda storage, loc: storage))

    # Prepare Image
    image = load_image(args.image, transform)
    image_tensor = to_var(image, volatile=True)

    # If use gpu
    if torch.cuda.is_available():
        # encoder.cuda()
        # decoder.cuda()
        generator.cuda()

    # Generate caption from image
    # feature = encoder(image_tensor)

    # sampled_ids = decoder.sample(feature)
    sampled_ids = generator.sample(image_tensor)
    sampled_ids = sampled_ids.cpu().data.numpy()[0]

    # Decode word_ids to words
    sampled_caption = []
    for word_id in sampled_ids:
        word = vocab.idx2word[word_id]
        sampled_caption.append(word)
        if word == '<end>':
            break
    sentence = ' '.join(sampled_caption)

    # Print out image and generated caption.
    print(sentence)
Пример #2
0
    def __init__(self, corpus, config, action2name):
        super(GanRnnAgent, self).__init__()
        self.use_gpu = config.use_gpu
        
        if config.state_type=='rnn':
            self.vocab = corpus.vocab
            self.rev_vocab = corpus.rev_vocab
            self.vocab_size = len(self.vocab)
            self.go_id = self.rev_vocab[BOS]
            self.eos_id = self.rev_vocab[EOS]
            self.context_encoder = ContEncoder(corpus, config)
            
        self.action2name=action2name
        self.lookupProb_ = LookupProb(action2name, config)
        self.discriminator = Discriminator(config)
        self.generator = Generator(config)

        self.loss_BCE = nn.BCELoss()
        self.config = config
Пример #3
0
    def __init__(self, context: DeepSpeedTrialContext) -> None:
        self.context = context
        self.hparams = AttrDict(self.context.get_hparams())
        self.data_config = AttrDict(self.context.get_data_config())
        self.logger = TorchWriter()
        num_channels = data.CHANNELS_BY_DATASET[self.data_config.dataset]
        gen_net = Generator(
            self.hparams.generator_width_base, num_channels, self.hparams.noise_length
        )
        gen_net.apply(weights_init)
        disc_net = Discriminator(self.hparams.discriminator_width_base, num_channels)
        disc_net.apply(weights_init)
        gen_parameters = filter(lambda p: p.requires_grad, gen_net.parameters())
        disc_parameters = filter(lambda p: p.requires_grad, disc_net.parameters())
        ds_config = overwrite_deepspeed_config(
            self.hparams.deepspeed_config, self.hparams.get("overwrite_deepspeed_args", {})
        )
        generator, _, _, _ = deepspeed.initialize(
            model=gen_net, model_parameters=gen_parameters, config=ds_config
        )
        discriminator, _, _, _ = deepspeed.initialize(
            model=disc_net, model_parameters=disc_parameters, config=ds_config
        )

        self.generator = self.context.wrap_model_engine(generator)
        self.discriminator = self.context.wrap_model_engine(discriminator)
        self.fixed_noise = self.context.to_device(
            torch.randn(
                self.context.train_micro_batch_size_per_gpu, self.hparams.noise_length, 1, 1
            )
        )
        self.criterion = nn.BCELoss()
        # TODO: Test fp16
        self.fp16 = generator.fp16_enabled()
        self.gradient_accumulation_steps = generator.gradient_accumulation_steps()
        # Manually perform gradient accumulation.
        if self.gradient_accumulation_steps > 1:
            logging.info("Disabling automatic gradient accumulation.")
            self.context.disable_auto_grad_accumulation()
Пример #4
0
    def __init__(self, corpus, config, action2name):
        super(GanRnnAgent, self).__init__()
        self.use_gpu = config.use_gpu
        self.vocab = corpus.vocab
        self.rev_vocab = corpus.rev_vocab
        self.vocab_size = len(self.vocab)
        self.go_id = self.rev_vocab[BOS]
        self.eos_id = self.rev_vocab[EOS]
        self.action2name = action2name
        self.lookupProb_ = LookupProb(action2name, config)
        # self.lookupProb_ = None
        # self.generator = generator
        # self.discriminator = discriminator
        # self.cont_encoder = state_encoder
        self.context_encoder = ContEncoder(corpus, config)
        self.discriminator = Discriminator(config)
        self.generator = Generator(config)
        # FNN to get Y
        self.p_fc1 = nn.Linear(config.ctx_cell_size, config.ctx_cell_size)
        self.p_y = nn.Linear(config.ctx_cell_size, config.y_size * config.k)

        self.loss_BCE = nn.BCELoss()
        self.config = config
Пример #5
0
def anomaly_test():
    print('Anomaly')

    category = 'hand'
    img_file = requestFolderName + '/image-16.jpg'

    print('saved ', img_file, ' category: ', category)
    count = 16

    files = []
    for i in range(65):
        files.append(img_file)

    data = {'0': np.array(files)}

    mura_valid_df = pd.DataFrame(data)
    print(mura_valid_df.head())
    transforms = transform(False, True, True, True, True, True, True, False)
    transforms = inverse_transform(False, True, True, True, True, True, True,
                                   False)

    # resize image to 256 X 256 to construct the output image

    noresize_transform = transform(False, False, False, True, True, True, True,
                                   False)
    img = cv2.imread(img_file)
    print(img.shape)
    img = noresize_transform(img)
    print(img.shape)

    transforms1 = transform(False, True, False, False, False, False, True,
                            False)
    resized_input_img = transforms1(img)

    # transforms2 = transform(False, True, False, False, False, False, True, False)
    # resized_input_img = transforms2(img)

    # rotation, hflip, resize, totensor, normalize, centercrop, to_pil, gray

    # valid_dataset = MURA_dataset(mura_valid_df, '/content/drive/Shared drives/MeanSquare-Drive/Advanced-DeepLearning/', transforms)
    valid_dataset = MURA_dataset(mura_valid_df, '', transforms)
    valid_dataloader = torch.utils.data.DataLoader(dataset=valid_dataset,
                                                   batch_size=64,
                                                   shuffle=True,
                                                   num_workers=0,
                                                   drop_last=False)
    if category == 'hand':
        out = 'models/XR_HAND/'
    else:
        out = 'models/XR_ELBOW/'

    max_auc = 0
    latent_dim = 128
    channels = 3
    batch_size = 64

    generator = Generator(dim=64, zdim=latent_dim, nc=channels)
    discriminator = Discriminator(dim=64,
                                  zdim=latent_dim,
                                  nc=channels,
                                  out_feat=True)
    encoder = Encoder(dim=64, zdim=latent_dim, nc=channels)
    device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
    device = 'cpu'
    generator.load_state_dict(
        torch.load(out + 'G_epoch5000.pt', map_location=torch.device('cpu')))
    discriminator.load_state_dict(
        torch.load(out + 'D_epoch5000.pt', map_location=torch.device('cpu')))

    generator.to(device)
    encoder.to(device)
    discriminator.to(device)

    with torch.no_grad():
        labels = torch.zeros(size=(len(valid_dataloader.dataset), ),
                             dtype=torch.long,
                             device=device)

        scores = torch.empty(size=(len(valid_dataloader.dataset), ),
                             dtype=torch.float32,
                             device=device)
        for i, (imgs, lbls) in enumerate(valid_dataloader):
            print('imgs. shape ', imgs.shape)
            imgs = imgs.to(device)
            lbls = lbls.to(device)

            labels[i * batch_size:(i + 1) * batch_size].copy_(lbls)
            emb_query = encoder(imgs)
            print('emb_query. shape ', emb_query.shape)

            fake_imgs = generator(emb_query)
            emb_fake = encoder(fake_imgs)

            image_feats = discriminator(imgs)
            recon_feats = discriminator(fake_imgs)

            diff = imgs - fake_imgs

            image1_tensor = diff[0]
            im = tensor2im(imgs)

            im2 = tensor2im(fake_imgs)
            print(lbls)

            im3 = tensor2im(diff)
            # plt.figure(1)
            # plt.subplot(311)
            # plt.title('Real image')
            # plt.imshow(im)

            # plt.subplot(312)
            # plt.title('Fake img')
            # plt.imshow(im2)
            # plt.show()

            img = cv2.GaussianBlur(im3, (5, 5), 0)
            img_gray = rgb2gray(img)
            #plt.imshow(img_gray)
            thresh = threshold_otsu(img_gray)
            binary = img_gray > thresh

            #plt.imshow(binary)
            im_rgb = np.array(Image.fromarray(binary).convert('RGB'))
            mask = binary.copy()
            mask[mask > 0.5] = 1
            mask[mask <= 0.5] = 0

            mask3 = np.stack((mask, mask, mask), axis=2)

            all_labels = measure.label(mask)
            all_labels[all_labels >= 1] = 255
            all_labels[all_labels < 1] = 0
            all_labels3 = np.stack((all_labels, all_labels, all_labels),
                                   axis=2)

            #             kernel = np.ones((6, 6), np.uint8)

            #             # Using cv2.erode() method
            #             image = cv2.erode(Image.fromarray(mask3), kernel, cv2.BORDER_REFLECT)

            black_pixels_mask = np.all(mask3 == 1, axis=2)
            non_black_pixels_mask = np.any(mask3 > [0, 0, 0], axis=-1)

            all_labels3[non_black_pixels_mask] = [255, 0, 0]

            # plt.subplot(313)
            # plt.title('Difference')
            # plt.imshow(im3)
            # plt.show()
            #
            # plt.subplot(321)
            # plt.title('colored mask')
            # plt.imshow(all_labels3)
            # plt.show()

            gray = cv2.cvtColor(im3, cv2.COLOR_BGR2GRAY)

            # Find Canny edges
            edged = cv2.Canny(gray, 30, 200)

            # Finding Contours
            # Use a copy of the image e.g. edged.copy()
            # since findContours alters the image
            contours, hierarchy = cv2.findContours(edged, cv2.RETR_EXTERNAL,
                                                   cv2.CHAIN_APPROX_NONE)

            # plt.subplot(322)
            # plt.imshow(edged)
            # plt.title('Edged')
            # plt.show()

            print("Number of Contours found = " + str(len(contours)))

            # Draw all contours
            # -1 signifies drawing all contours
            print('im3: ', im3.shape)
            backtorgb = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
            print('contours: ', len(contours))
            img_contours = np.zeros(backtorgb.shape)

            cv2.drawContours(img_contours, contours, -1, (220, 0, 0), 1)
            resized_output_image = cv2.resize(img_contours, (256, 256))

            cv2.imshow('output blue', resized_output_image)
            cv2.waitKey(0)

            cv2.imwrite('output_files/output-image-' + str(count) + '.jpg',
                        resized_output_image)
            #Image.fromarray(resized_output_image).save('output_files/output-image-' + str(count) + '.jpg')
            print('resize: ', resized_output_image.shape,
                  np.asarray(resized_input_img).shape)

            mix_img = cv2.addWeighted(np.asarray(resized_input_img),
                                      0.3,
                                      resized_output_image,
                                      0.7,
                                      0,
                                      dtype=cv2.CV_32F)
            #Image.fromarray(mix_img).save('output_files/mix-image-' + str(count) + '.jpg')
            cv2.imwrite('output_files/mix-image-' + str(count) + '.jpg',
                        mix_img)

            # plt.subplot(323)
            # plt.title('contour')
            # plt.imshow(gray)

            # plt.show()

            thresh = 50
            ret, thresh_img = cv2.threshold(gray, thresh, 255,
                                            cv2.THRESH_BINARY)

            contours, hierarchy = cv2.findContours(thresh_img, cv2.RETR_TREE,
                                                   cv2.CHAIN_APPROX_SIMPLE)
            print('contours second time : ', len(contours))

            backtorgb1 = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)

            cv2.drawContours(backtorgb1, contours, -1, (0, 255, 0), 1)

            #backtorgb = cv2.cvtColor(gray,cv2.COLOR_GRAY2RGB)

            cv2.imshow('output', backtorgb1)
            cv2.waitKey(0)

            # Image.fromarray(backtorgb1).save('output_files/image-' + str(count) + '.jpg')
            # cv2.imwrite('output_files/cv-image-' + str(count) + '.jpg', backtorgb1)
            #break

            image_distance = torch.mean(torch.pow(imgs - fake_imgs, 2),
                                        dim=[1, 2, 3])
            feat_distance = torch.mean(torch.pow(image_feats - recon_feats, 2),
                                       dim=1)
            print(emb_query.shape, emb_fake.shape)
            z_distance = mse_loss(emb_query,
                                  emb_fake)  # mse_loss(emb_query, emb_fake)
            # print z_distance
            print('z_distance=', z_distance)
            # print('hiiiiiiiii')
            scores[i * batch_size:(i + 1) * batch_size].copy_(feat_distance)
            print('feat_distance ', feat_distance[0])
            break

    output = {}
    output['status'] = 'done'
    return 'done'
Пример #6
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    if not os.path.exists(args.figure_path):
        os.makedirs(args.figure_path)

    # Image preprocessing
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load vocabulary wrapper.
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build data loader
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    # Build the models (Gen)
    generator = Generator(args.embed_size, args.hidden_size, len(vocab),
                          args.num_layers)

    # Build the models (Disc)
    discriminator = Discriminator(args.embed_size, args.hidden_size,
                                  len(vocab), args.num_layers)

    if torch.cuda.is_available():
        generator.cuda()
        discriminator.cuda()

    # Loss and Optimizer (Gen)
    mle_criterion = nn.CrossEntropyLoss()
    params_gen = list(generator.parameters())
    optimizer_gen = torch.optim.Adam(params_gen)

    # Loss and Optimizer (Disc)
    params_disc = list(discriminator.parameters())
    optimizer_disc = torch.optim.Adam(params_disc)

    if int(args.pretraining) == 1:
        # Pre-training: train generator with MLE and discriminator with 3 losses (real + fake + wrong)
        total_steps = len(data_loader)
        print(total_steps)
        disc_losses = []
        gen_losses = []
        print('pre-training')
        generator.load_state_dict(torch.load(args.pretrained_gen_path))
        discriminator.load_state_dict(torch.load(args.pretrained_disc_path))
        for epoch in range(
                max([
                    int(args.gen_pretrain_num_epochs),
                    int(args.disc_pretrain_num_epochs)
                ])):
            if epoch < 5:
                continue
        # for epoch in range(max([int(args.gen_pretrain_num_epochs), int(args.disc_pretrain_num_epochs)])):
            for i, (images, captions, lengths, wrong_captions,
                    wrong_lengths) in enumerate(data_loader):
                images = to_var(images, volatile=True)
                captions = to_var(captions)
                wrong_captions = to_var(wrong_captions)
                targets = pack_padded_sequence(captions,
                                               lengths,
                                               batch_first=True)[0]

                if epoch < int(args.gen_pretrain_num_epochs):
                    generator.zero_grad()
                    outputs, _ = generator(images, captions, lengths)
                    loss_gen = mle_criterion(outputs, targets)
                    # gen_losses.append(loss_gen.cpu().data.numpy()[0])
                    loss_gen.backward()
                    optimizer_gen.step()

                if epoch < int(args.disc_pretrain_num_epochs):
                    discriminator.zero_grad()
                    rewards_real = discriminator(images, captions, lengths)
                    # rewards_fake = discriminator(images, sampled_captions, sampled_lengths)
                    rewards_wrong = discriminator(images, wrong_captions,
                                                  wrong_lengths)
                    real_loss = -torch.mean(torch.log(rewards_real))
                    # fake_loss = -torch.mean(torch.clamp(torch.log(1 - rewards_fake), min=-1000))
                    wrong_loss = -torch.mean(
                        torch.clamp(torch.log(1 - rewards_wrong), min=-1000))
                    loss_disc = real_loss + wrong_loss  # + fake_loss, no fake_loss because this is pretraining

                    # disc_losses.append(loss_disc.cpu().data.numpy()[0])
                    loss_disc.backward()
                    optimizer_disc.step()
                if (i + 1) % args.log_step == 0:
                    print(
                        'Epoch [%d], Step [%d], Disc Loss: %.4f, Gen Loss: %.4f'
                        % (epoch + 1, i + 1, loss_disc, loss_gen))
                if (i + 1) % 500 == 0:
                    torch.save(
                        discriminator.state_dict(),
                        os.path.join(
                            args.model_path,
                            'pretrained-discriminator-%d-%d.pkl' %
                            (int(epoch) + 1, i + 1)))
                    torch.save(
                        generator.state_dict(),
                        os.path.join(
                            args.model_path, 'pretrained-generator-%d-%d.pkl' %
                            (int(epoch) + 1, i + 1)))

                    # Save pretrained models
        torch.save(
            discriminator.state_dict(),
            os.path.join(
                args.model_path, 'pretrained-discriminator-%d.pkl' %
                int(args.disc_pretrain_num_epochs)))
        torch.save(
            generator.state_dict(),
            os.path.join(
                args.model_path, 'pretrained-generator-%d.pkl' %
                int(args.gen_pretrain_num_epochs)))

        # Plot pretraining figures
        # plt.plot(disc_losses, label='pretraining_disc_loss')
        # plt.savefig(args.figure_path + 'pretraining_disc_losses.png')
        # plt.clf()
        #
        # plt.plot(gen_losses, label='pretraining_gen_loss')
        # plt.savefig(args.figure_path + 'pretraining_gen_losses.png')
        # plt.clf()

    else:
        generator.load_state_dict(torch.load(args.pretrained_gen_path))
        discriminator.load_state_dict(torch.load(args.pretrained_disc_path))

    # # Skip the rest for now
    # return

    # Train the Models
    total_step = len(data_loader)
    disc_gan_losses = []
    gen_gan_losses = []
    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths, wrong_captions,
                wrong_lengths) in enumerate(data_loader):

            # Set mini-batch dataset
            images = to_var(images, volatile=True)
            captions = to_var(captions)
            wrong_captions = to_var(wrong_captions)

            generator.zero_grad()
            outputs, packed_lengths = generator(images, captions, lengths)
            outputs = PackedSequence(outputs, packed_lengths)
            outputs = pad_packed_sequence(outputs,
                                          batch_first=True)  # (b, T, V)

            Tmax = outputs[0].size(1)
            if torch.cuda.is_available():
                rewards = torch.zeros_like(outputs[0]).type(
                    torch.cuda.FloatTensor)
            else:
                rewards = torch.zeros_like(outputs[0]).type(torch.FloatTensor)

            # getting rewards from disc

# for t in tqdm(range(2, Tmax, 4)):
            for t in range(2, Tmax, 2):
                # for t in range(2, 4):
                if t >= min(
                        lengths
                ):  # TODO this makes things easier, but could min(lengths) could be too short
                    break

                gen_samples = to_var(torch.zeros(
                    (captions.size(0), Tmax)).type(torch.FloatTensor),
                                     volatile=True)
                # part 1: taken from real caption
                gen_samples[:, :t] = captions[:, :t].data

                predicted_ids, saved_states = generator.pre_compute(
                    gen_samples, t)
                # for v in range(predicted_ids.size(1)):
                v = predicted_ids
                # pdb.set_trace()
                # part 2: taken from all possible vocabs
                # gen_samples[:,t] = predicted_ids[:,v]
                gen_samples[:, t] = v
                # part 3: taken from rollouts
                gen_samples[:, t:] = generator.rollout(gen_samples, t,
                                                       saved_states)

                sampled_lengths = []
                # finding sampled_lengths
                for batch in range(int(captions.size(0))):
                    for b_t in range(Tmax):
                        if gen_samples[batch,
                                       b_t].cpu().data.numpy() == 2:  # <end>
                            sampled_lengths.append(b_t + 1)
                            break
                        elif b_t == Tmax - 1:
                            sampled_lengths.append(Tmax)

                # sort sampled_lengths
                sampled_lengths = np.array(sampled_lengths)
                sampled_lengths[::-1].sort()
                sampled_lengths = sampled_lengths.tolist()

                # get rewards from disc
                rewards[:, t, v] = discriminator(images, gen_samples.detach(),
                                                 sampled_lengths)

            # rewards = rewards.detach()
            # pdb.set_trace()
            rewards_detached = rewards.data
            rewards_detached = to_var(rewards_detached)

            loss_gen = torch.dot(outputs[0], -rewards_detached)
            # gen_gan_losses.append(loss_gen.cpu().data.numpy()[0])
            # pdb.set_trace()

            loss_gen.backward()
            optimizer_gen.step()

            # TODO get sampled_captions
            sampled_ids = generator.sample(images)
            # sampled_captions = torch.zeros_like(sampled_ids).type(torch.LongTensor)
            sampled_lengths = []
            # finding sampled_lengths
            for batch in range(int(captions.size(0))):
                for b_t in range(20):
                    #pdb.set_trace()
                    #sampled_captions[batch, b_t].data = sampled_ids[batch, b_t].cpu().data.numpy()[0]
                    if sampled_ids[batch,
                                   b_t].cpu().data.numpy() == 2:  # <end>
                        sampled_lengths.append(b_t + 1)
                        break
                    elif b_t == 20 - 1:
                        sampled_lengths.append(20)
            # sort sampled_lengths
            sampled_lengths = np.array(sampled_lengths)
            sampled_lengths[::-1].sort()
            sampled_lengths = sampled_lengths.tolist()

            # Train discriminator
            discriminator.zero_grad()
            images.volatile = False
            captions.volatile = False
            wrong_captions.volatile = False
            rewards_real = discriminator(images, captions, lengths)
            rewards_fake = discriminator(images, sampled_ids, sampled_lengths)
            rewards_wrong = discriminator(images, wrong_captions,
                                          wrong_lengths)
            real_loss = -torch.mean(torch.log(rewards_real))
            fake_loss = -torch.mean(
                torch.clamp(torch.log(1 - rewards_fake), min=-1000))
            wrong_loss = -torch.mean(
                torch.clamp(torch.log(1 - rewards_wrong), min=-1000))
            loss_disc = real_loss + fake_loss + wrong_loss

            # disc_gan_losses.append(loss_disc.cpu().data.numpy()[0])
            loss_disc.backward()
            optimizer_disc.step()

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [%d/%d], Step [%d/%d], Disc Loss: %.4f, Gen Loss: %.4f'
                    % (epoch, args.num_epochs, i, total_step, loss_disc,
                       loss_gen))

            # Save the models
            # if (i+1) % args.save_step == 0:
            if (
                    i + 1
            ) % args.log_step == 0:  # jm: saving at the last iteration instead
                torch.save(
                    generator.state_dict(),
                    os.path.join(
                        args.model_path,
                        'generator-gan-%d-%d.pkl' % (epoch + 1, i + 1)))
                torch.save(
                    discriminator.state_dict(),
                    os.path.join(
                        args.model_path,
                        'discriminator-gan-%d-%d.pkl' % (epoch + 1, i + 1)))
Пример #7
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Image preprocessing
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load vocabulary wrapper.
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build data loader
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    # Build the models
    # encoder = EncoderCNN(args.embed_size)
    # decoder = DecoderRNN(args.embed_size, args.hidden_size,
    #                     len(vocab), args.num_layers)

    generator = Generator(args.embed_size, args.hidden_size, len(vocab),
                          args.num_layers)

    if torch.cuda.is_available():
        # encoder.cuda()
        # decoder.cuda()
        generator.cuda()

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    # params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters())
    params = list(generator.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Train the Models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        # for i, (images, captions, lengths) in enumerate(data_loader):
        for i, (images, captions, lengths, wrong_captions,
                wrong_lengths) in enumerate(data_loader):

            # Set mini-batch dataset
            images = to_var(images, volatile=True)
            captions = to_var(captions)
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]

            # Forward, Backward and Optimize
            # decoder.zero_grad()
            # encoder.zero_grad()
            generator.zero_grad()
            # features = encoder(images)
            # outputs = decoder(features, captions, lengths)
            outputs, _ = generator(images, captions, lengths)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f'
                    % (epoch, args.num_epochs, i, total_step, loss.data[0],
                       np.exp(loss.data[0])))

            # Save the models
            if (i + 1) % args.save_step == 0:
                #torch.save(decoder.state_dict(),
                #           os.path.join(args.model_path,
                #                        'decoder-%d-%d.pkl' %(epoch+1, i+1)))
                #torch.save(encoder.state_dict(),
                #           os.path.join(args.model_path,
                #                        'encoder-%d-%d.pkl' %(epoch+1, i+1)))
                torch.save(
                    generator.state_dict(),
                    os.path.join(
                        args.model_path,
                        'pretrained-generator-%d.pkl' % int(args.num_epochs)))
Пример #8
0
def model(lines_ch, lines):
    # Construct Input data
    real_inputs = tf.placeholder(tf.float32, shape=[BATCH_SIZE, SEQ_LEN, len(charmap)])
    fake_inputs = Generator(BATCH_SIZE)

    # Input Discriminator
    disc_real = Discriminator(real_inputs)
    disc_fake = Discriminator(fake_inputs)

    # Compute D/G cost
    disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
    gen_cost = -tf.reduce_mean(disc_fake)

    # WGAN-GP L constraints
    alpha = tf.random_uniform(shape=[BATCH_SIZE, 1, 1], minval=0., maxval=1.)
    differences = fake_inputs - real_inputs
    interpolates = real_inputs + (alpha*differences)
    gradients = tf.gradients(Discriminator(interpolates), [interpolates])[0]
    slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2]))
    gradient_penalty = tf.reduce_mean((slopes-1.)**2)
    disc_cost += LAMBDA*gradient_penalty

    # Get parameters
    gen_params = lib.params_with_name('Generator')
    disc_params = lib.params_with_name('Discriminator')

    # Construct optimizer
    # Optimizer?
    gen_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(gen_cost, var_list=gen_params)
    disc_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(disc_cost, var_list=disc_params)

    # During training we monitor JS divergence between the true & generated ngram distributions for n=1,2,3,4.// 2/3/4/5
    # To get an idea of the optimal values, we evaluate these statistics on a held-out set first.
    true_char_ngram_lms = [language_helpers.NgramLanguageModel(i, lines_ch[100*BATCH_SIZE:], tokenize=False) for i in range(2, 6)]
    validation_char_ngram_lms = [language_helpers.NgramLanguageModel(i, lines_ch[:100*BATCH_SIZE], tokenize=False) for i in range(2, 6)]
    for i in range(0, 4):
        print("validation set JSD for n={}: {}".format(i+2, true_char_ngram_lms[i].js_with(validation_char_ngram_lms[i])))
    true_char_ngram_lms = [language_helpers.NgramLanguageModel(i, lines_ch[:], tokenize=False) for i in range(2, 6)]

    # Start run the graph
    saver = tf.train.Saver()
    with tf.Session() as session:
        # initialize
        session.run(tf.initialize_all_variables())

        # Generate fake samples
        def generate_samples():
            samples = session.run(fake_inputs)
            samples = np.argmax(samples, axis=2)
            # print(samples.shape)
            decoded_samples = []
            for i in range(len(samples)):
                decoded = ''
                for j in samples[i]:
                    decoded += inv_charmap[j]
                decoded_samples.append(decoded)
            # print(len(decoded_samples), len(decoded_samples[0]), decoded_samples[0])
            return decoded_samples

        # Generate real samples
        gen = data_real_gen(lines)

        # Start iteration
        for iteration in range(ITERS):
            start_time = time.time()
            # Train generator
            _gen_cost = 0
            if iteration > 0:
                _gen_cost, _ = session.run([gen_cost, gen_train_op])
            # Train discriminator
            _disc_cost = 0
            for i in range(CRITIC_ITERS):
                _data = next(gen)
                _disc_cost, _ = session.run([disc_cost, disc_train_op], feed_dict={real_inputs: _data})
            # Plot curve
            lib.plot.plot('time', time.time() - start_time)
            lib.plot.plot('train gen cost', _gen_cost)
            lib.plot.plot('train disc cost', _disc_cost)

            if iteration % 100 == 99:
                saver.save(session, 'checkpoint/Mymodel_'+str(iteration))
                samples = []
                for i in range(20):
                    samples.extend(generate_samples())
                for i in range(0, 4):
                    lm = language_helpers.NgramLanguageModel(i+2, samples, tokenize=False)
                    lib.plot.plot('js{}'.format(i+2), true_char_ngram_lms[i].js_with(lm))
                with open('samples_{}.txt'.format(iteration), 'w', encoding="utf-8") as f:
                    for s in samples:
                        s = "".join(s)
                        f.write(s + "\n")
            if iteration % 100 == 99:
                lib.plot.flush()
            lib.plot.tick()
Пример #9
0
def train(args):
    writer = SummaryWriter(log_dir=args.tensorboard_path)
    create_folder(args.outf)
    set_seed(args.manualSeed)
    cudnn.benchmark = True
    dataset, nc = get_dataset(args)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=args.batchSize,
                                             shuffle=True,
                                             num_workers=int(args.workers))
    torch.cuda.set_device(args.local_rank)
    device = torch.device(
        "cuda",
        args.local_rank)  #torch.device("cuda:0" if args.cuda else "cpu")
    ngpu = 0
    nz = int(args.nz)
    ngf = int(args.ngf)
    ndf = int(args.ndf)

    netG = Generator(ngpu, ngf, nc, nz).to(device)
    netG.apply(weights_init)
    if args.netG != '':
        netG.load_state_dict(torch.load(args.netG))

    netD = Discriminator(ngpu, ndf, nc).to(device)
    netD.apply(weights_init)
    if args.netD != '':
        netD.load_state_dict(torch.load(args.netD))

    criterion = nn.BCELoss()

    fixed_noise = torch.randn(args.batchSize, nz, 1, 1, device=device)
    real_label = 1
    fake_label = 0

    # setup optimizer
    optimizerD = torch.optim.Adam(netD.parameters(),
                                  lr=args.lr,
                                  betas=(args.beta1, 0.999))
    optimizerG = torch.optim.Adam(netG.parameters(),
                                  lr=args.lr,
                                  betas=(args.beta1, 0.999))

    model_engineD, optimizerD, _, _ = deepspeed.initialize(
        args=args,
        model=netD,
        model_parameters=netD.parameters(),
        optimizer=optimizerD)
    model_engineG, optimizerG, _, _ = deepspeed.initialize(
        args=args,
        model=netG,
        model_parameters=netG.parameters(),
        optimizer=optimizerG)

    torch.cuda.synchronize()
    start = time()
    for epoch in range(args.epochs):
        for i, data in enumerate(dataloader, 0):
            ############################
            # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
            ###########################
            # train with real
            netD.zero_grad()
            real = data[0].to(device)
            batch_size = real.size(0)
            label = torch.full((batch_size, ),
                               real_label,
                               dtype=real.dtype,
                               device=device)
            output = netD(real)
            errD_real = criterion(output, label)
            model_engineD.backward(errD_real)
            D_x = output.mean().item()

            # train with fake
            noise = torch.randn(batch_size, nz, 1, 1, device=device)
            fake = netG(noise)
            label.fill_(fake_label)
            output = netD(fake.detach())
            errD_fake = criterion(output, label)
            model_engineD.backward(errD_fake)
            D_G_z1 = output.mean().item()
            errD = errD_real + errD_fake
            #optimizerD.step() # alternative (equivalent) step
            model_engineD.step()

            ############################
            # (2) Update G network: maximize log(D(G(z)))
            ###########################
            netG.zero_grad()
            label.fill_(real_label)  # fake labels are real for generator cost
            output = netD(fake)
            errG = criterion(output, label)
            model_engineG.backward(errG)
            D_G_z2 = output.mean().item()
            #optimizerG.step() # alternative (equivalent) step
            model_engineG.step()

            print(
                '[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f'
                % (epoch, args.epochs, i, len(dataloader), errD.item(),
                   errG.item(), D_x, D_G_z1, D_G_z2))
            writer.add_scalar("Loss_D", errD.item(),
                              epoch * len(dataloader) + i)
            writer.add_scalar("Loss_G", errG.item(),
                              epoch * len(dataloader) + i)
            if i % 100 == 0:
                vutils.save_image(real,
                                  '%s/real_samples.png' % args.outf,
                                  normalize=True)
                fake = netG(fixed_noise)
                vutils.save_image(fake.detach(),
                                  '%s/fake_samples_epoch_%03d.png' %
                                  (args.outf, epoch),
                                  normalize=True)

        # do checkpointing
        #torch.save(netG.state_dict(), '%s/netG_epoch_%d.pth' % (args.outf, epoch))
        #torch.save(netD.state_dict(), '%s/netD_epoch_%d.pth' % (args.outf, epoch))
    torch.cuda.synchronize()
    stop = time()
    print(
        f"total wall clock time for {args.epochs} epochs is {stop-start} secs")
Пример #10
0
# output_path = '/home/lilioo826/hw4_output/'
output_path = sys.argv[1]
# fig2_2
loss_G = np.load('GAN/loss_G.npy')
loss_D = np.load('GAN/loss_D.npy')
Dx = np.load('GAN/Dx.npy')
DG1 = np.load('GAN/DG1.npy')
DG2 = np.load('GAN/DG2.npy')

# x = np.arange(1206)
plt.figure(figsize=(30, 10))
plt.subplot(121)
plt.plot(loss_G[::10], label='loss G')
plt.plot(loss_D[::10], label='loss D')
plt.legend()
plt.title('loss G and loss D')
# plt.plot(loss_D)
plt.subplot(122)
# plt.plot(x, Dx, 'r', x, DG1, 'b')
plt.plot(Dx[::10], label='Real')
plt.plot(DG1[::10], label='Fake')
plt.legend()
plt.title('mean of output of Discriminator')
plt.savefig(output_path + '/fig2_2.jpg')

# fig2_3
netG = Generator(64, 256)
netG.load_state_dict(torch.load('GAN/gan_netG.pth'))
ran_vec = Variable(torch.randn(32, 256, 1, 1))
fake = netG(ran_vec)
save_image(fake.data, output_path + '/fig2_3.jpg', nrow=8, normalize=True)
Пример #11
0
def train_gan():

    batch_size = 64
    epochs = 100
    disc_update = 1
    gen_update = 5
    latent_dimension = 100
    lambduh = 10

    device = torch.device(
        'cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    # load data
    train_loader, valid_loader, test_loader = get_data_loader(
        'data', batch_size)

    disc_model = Discriminator().to(device)
    gen_model = Generator(latent_dimension).to(device)

    disc_optim = Adam(disc_model.parameters(), lr=1e-4, betas=(0.5, 0.9))
    gen_optim = Adam(gen_model.parameters(), lr=1e-4, betas=(0.5, 0.9))

    for e in range(epochs):
        disc_loss = 0
        gen_loss = 0
        for i, (images, _) in enumerate(train_loader):
            images = images.to(device)
            b_size = images.shape[0]
            step = i + 1
            if step % disc_update == 0:
                disc_model.zero_grad()
                # sample noise
                noise = torch.randn((b_size, latent_dimension), device=device)

                # loss on fake
                inputs = gen_model(noise).detach()
                f_outputs = disc_model(inputs)
                loss = f_outputs.mean()

                # loss on real
                r_outputs = disc_model(images)
                loss -= r_outputs.mean()

                # add gradient penalty
                loss += lambduh * gradient_penalty(disc_model, images, inputs,
                                                   device)

                disc_loss += loss
                loss.backward()
                disc_optim.step()

            if step % gen_update == 0:
                gen_model.zero_grad()

                noise = torch.randn((b_size, latent_dimension)).to(device)
                inputs = gen_model(noise)
                outputs = disc_model(inputs)
                loss = -outputs.mean()

                gen_loss += loss
                loss.backward()
                gen_optim.step()

        torch.save(
            {
                'epoch': e,
                'disc_model': disc_model.state_dict(),
                'gen_model': gen_model.state_dict(),
                'disc_loss': disc_loss,
                'gen_loss': gen_loss,
                'disc_optim': disc_optim.state_dict(),
                'gen_optim': gen_optim.state_dict()
            }, "upsample/checkpoint_{}.pth".format(e))
        print("Epoch: {} Disc loss: {}".format(
            e + 1,
            disc_loss.item() / len(train_loader)))
        print("Epoch: {} Gen loss: {}".format(
            e + 1,
            gen_loss.item() / len(train_loader)))
Пример #12
0
            with open('./defensive_models/dev_disc_costs.pickle', 'wb') as fp:
                pickle.dump(dev_disc_costs, fp)

            print('batch {:>3}/{:>3}, validation disc cost : {:.4f}'.format(
                iteration, ITERS, costs_avg))


if __name__ == "__main__":

    args = get_args()

    device_D = torch.device(args.deviceD)
    device_G = torch.device(args.deviceG)

    # load generator and discriminator model
    netG = Generator()
    summary(netG, input_size=(INPUT_LATENT, ), device='cpu')

    netD = Discriminator()
    summary(netD, input_size=(3, 32, 32), device='cpu')

    # set folder to save model checkpoints
    model_folder = os.path.abspath('./defensive_models')
    if not os.path.exists(model_folder):
        os.mkdir(model_folder)

    check_point_path = './defensive_models/snapshots.pth'

    if os.path.exists(check_point_path):
        checkpoint = torch.load(check_point_path)
Пример #13
0
batch_size = 64
d = 64
latent_size = 256
mode = 'lat256'

transform2 = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5))
    ]
)

train_faceDataset = FaceDataset(data_path+'train', data_path+'train.csv', transform2)
test_faceDataset = FaceDataset(data_path+'test', data_path+'test.csv', transform2)
train_dataloader = DataLoader(ConcatDataset([train_faceDataset, test_faceDataset]), batch_size=batch_size, num_workers=1)

netG = Generator(d, latent_size)
netD = Discriminator(d)
if cuda:
    netG = netG.cuda()
    netD = netD.cuda()
#print(netG)
#summary(netG, (1, 128))
# print(netD)
# summary(netD, (3, 64, 64))
# exit()

criterion = nn.BCELoss()

optimizerG = optim.Adam(netG.parameters(), lr=0.002, betas=(0.5, 0.999))
optimizerD = optim.Adam(netD.parameters(), lr=0.002, betas=(0.5, 0.999))