Exemplo n.º 1
0
    def __init__(self, config):
        self.train_loader = torch.utils.data.DataLoader(
            datasets.ImageFolder(config.train_data_dir, transforms.Compose([
                transforms.RandomSizedCrop(config.image_size),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ])),
            batch_size=config.batch_size, shuffle=True,
            num_workers=config.workers, pin_memory=True)

        os.makedirs(f'{config.save_dir}/{config.version}',exist_ok=True)

        self.loss_dir = f'{config.save_dir}/{config.version}/loss'
        self.model_state_dir = f'{config.save_dir}/{config.version}/model_state'
        self.image_dir = f'{config.save_dir}/{config.version}/image'
        self.psnr_dir = f'{config.save_dir}/{config.version}/psnr'

        os.makedirs(self.loss_dir,exist_ok=True)
        os.makedirs(self.model_state_dir,exist_ok=True)
        os.makedirs(self.image_dir,exist_ok=True)
        os.makedirs(self.psnr_dir,exist_ok=True)

        self.encoder = Encoder(True).cuda()
        self.decoder = Decoder(False, True).cuda()
        self.D = VGG16_mid().cuda()

        self.config = config
Exemplo n.º 2
0
    def __init__(
            self,
            n_cap_vocab, cap_max_seq, dim_language = 768,
            d_word_vec=512, d_model=512, d_inner=2048,
            n_layers=6, n_head=8, d_k=64, d_v=64, dropout=0.1,
            c3d_path=False, tgt_emb_prj_weight_sharing=True):

        super().__init__()

        # Load Kinetics/Self pre-trained C3D model, return only features
        self.encoder = nn.Linear(1024, 768)

        self.decoder = Decoder(
            n_tgt_vocab=n_cap_vocab, len_max_seq=cap_max_seq,
            d_word_vec=d_word_vec, d_model=d_model, d_inner=d_inner,
            n_layers=n_layers, n_head=n_head, d_k=d_k, d_v=d_v,
            dropout=dropout)

        self.cap_word_prj = nn.Linear(d_model, n_cap_vocab, bias=False)

        nn.init.xavier_normal_(self.cap_word_prj.weight)

        assert d_model == d_word_vec, \
            'To facilitate the residual connections, ' \
            'the dimensions of all module outputs shall be the same.'

        if tgt_emb_prj_weight_sharing:
            # Share the weight matrix between target word embedding & the final logit dense layer
            self.cap_word_prj.weight = self.decoder.tgt_word_emb.weight
            self.x_logit_scale = (d_model ** -0.5)
        else:
            self.x_logit_scale = 1.
Exemplo n.º 3
0
    def __init__(  # TODO move parameters to config file
            self,
            pset,
            batch_size=64,
            max_size=100,
            vocab_inp_size=32,
            vocab_tar_size=32,
            embedding_dim=64,
            units=128,
            hidden_size=128,
            alpha=0.1,
            epochs=200,
            epoch_decay=1,
            min_epochs=10,
            verbose=True):
        self.alpha = alpha
        self.batch_size = batch_size
        self.max_size = max_size
        self.epochs = epochs
        self.epoch_decay = epoch_decay
        self.min_epochs = min_epochs
        self.train_steps = 0

        self.verbose = verbose

        self.enc = Encoder(vocab_inp_size, embedding_dim, units, batch_size)
        self.dec = Decoder(vocab_inp_size, vocab_tar_size, embedding_dim,
                           units, batch_size)
        self.surrogate = Surrogate(hidden_size)
        self.population = Population(pset, max_size, batch_size)
        self.prob = 0.5

        self.optimizer = tf.keras.optimizers.Adam()
        self.loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=False, reduction='none')
Exemplo n.º 4
0
 def __init__(self, model_name: str, args):
     self.model_name = model_name
     self.args = args
     self.encoder = Encoder(self.args.add_noise).to(self.args.device)
     self.decoder = Decoder(self.args.upsample_mode).to(self.args.device)
     self.pretrainDataset = None
     self.pretrainDataloader = None
     self.pretrainOptimizer = None
     self.pretrainScheduler = None
     self.RHO_tensor = None
     self.pretrain_batch_cnt = 0
     self.writer = None
     self.svmDataset = None
     self.svmDataloader = None
     self.testDataset = None
     self.testDataloader = None
     self.svm = SVC(C=self.args.svm_c,
                    kernel=self.args.svm_ker,
                    verbose=True,
                    max_iter=self.args.svm_max_iter)
     self.resnet = Resnet(use_pretrained=True,
                          num_classes=self.args.classes,
                          resnet_depth=self.args.resnet_depth,
                          dropout=self.args.resnet_dropout).to(
                              self.args.device)
     self.resnetOptimizer = None
     self.resnetScheduler = None
     self.resnetLossFn = None
Exemplo n.º 5
0
    def __init__(self, opt, vocabs):
        super(S2SModel, self).__init__()

        self.opt = opt
        self.vocabs = vocabs
        self.encoder = Encoder(vocabs, opt)
        self.decoder = Decoder(vocabs, opt)
        self.generator = ProdGenerator(self.opt.decoder_rnn_size, vocabs,
                                       self.opt)
Exemplo n.º 6
0
    def __init__(self, config):

        content_trans = transforms.Compose([
            transforms.RandomSizedCrop(256),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])

        self.train_loader = torch.utils.data.DataLoader(
            datasets.ImageFolder(config.train_data_dir, content_trans),
            batch_size=config.batch_size,
            shuffle=True,
            num_workers=config.workers,
            pin_memory=True,
            drop_last=True)

        self.trans = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])

        style_trans = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])

        self.loss_dir = f'{config.save_dir}/loss'
        self.model_state_dir = f'{config.save_dir}/model_state'
        self.image_dir = f'{config.save_dir}/image'
        self.psnr_dir = f'{config.save_dir}/psnr'

        if not os.path.exists(self.loss_dir):
            os.mkdir(self.loss_dir)
            os.mkdir(self.model_state_dir)
            os.mkdir(self.image_dir)
            os.mkdir(self.psnr_dir)

        self.encoder = Encoder().cuda()
        self.transformer = Attention().cuda()
        self.decoder = Decoder().cuda()

        self.wavepool = WavePool(256).cuda()

        self.decoder.load_state_dict(torch.load("./decoder.pth"))

        S_path = os.path.join(config.style_dir, str(config.S))
        style_images = glob.glob((S_path + '/*.jpg'))
        s = Image.open(style_images[0])
        s = s.resize((512, 320), 0)
        s = style_trans(s).cuda()
        self.style_image = s.unsqueeze(0)
        self.style_target = torch.stack([s for i in range(config.batch_size)],
                                        0)

        self.config = config
Exemplo n.º 7
0
    def __init__(self, config):
        super(Model, self).__init__()
        self.config = config

        # 定义嵌入层
        self.embedding = Embedding(config.num_vocab,  # 词汇表大小
                                   config.embedding_size,  # 嵌入层维度
                                   config.pad_id,  # pad_id
                                   config.dropout)

        # post编码器
        self.post_encoder = Encoder(config.post_encoder_cell_type,  # rnn类型
                                    config.embedding_size,  # 输入维度
                                    config.post_encoder_output_size,  # 输出维度
                                    config.post_encoder_num_layers,  # rnn层数
                                    config.post_encoder_bidirectional,  # 是否双向
                                    config.dropout)  # dropout概率

        # response编码器
        self.response_encoder = Encoder(config.response_encoder_cell_type,
                                        config.embedding_size,  # 输入维度
                                        config.response_encoder_output_size,  # 输出维度
                                        config.response_encoder_num_layers,  # rnn层数
                                        config.response_encoder_bidirectional,  # 是否双向
                                        config.dropout)  # dropout概率

        # 先验网络
        self.prior_net = PriorNet(config.post_encoder_output_size,  # post输入维度
                                  config.latent_size,  # 潜变量维度
                                  config.dims_prior)  # 隐藏层维度

        # 识别网络
        self.recognize_net = RecognizeNet(config.post_encoder_output_size,  # post输入维度
                                          config.response_encoder_output_size,  # response输入维度
                                          config.latent_size,  # 潜变量维度
                                          config.dims_recognize)  # 隐藏层维度

        # 初始化解码器状态
        self.prepare_state = PrepareState(config.post_encoder_output_size+config.latent_size,
                                          config.decoder_cell_type,
                                          config.decoder_output_size,
                                          config.decoder_num_layers)

        # 解码器
        self.decoder = Decoder(config.decoder_cell_type,  # rnn类型
                               config.embedding_size,  # 输入维度
                               config.decoder_output_size,  # 输出维度
                               config.decoder_num_layers,  # rnn层数
                               config.dropout)  # dropout概率

        # 输出层
        self.projector = nn.Sequential(
            nn.Linear(config.decoder_output_size, config.num_vocab),
            nn.Softmax(-1)
        )
Exemplo n.º 8
0
 def __init__(self,
              args,
              vocab,
              n_dim,
              image_dim,
              layers,
              dropout,
              num_choice=5):
     super().__init__()
     print("Model name: DA, 1 layer, fixed subspaces")
     self.vocab = vocab
     self.encoder = Encoder(args, vocab, n_dim, image_dim, layers, dropout,
                            num_choice).cuda()
     self.decoder = Decoder(args, vocab, n_dim, image_dim, layers, dropout,
                            num_choice).cuda()
Exemplo n.º 9
0
    def __init__(self,
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 input_vocab_size,
                 target_vocab_size,
                 dropout=0.1):
        super(Transformer, self).__init__()

        self.encoder = Encoder(num_layers, d_model, num_heads, dff,
                               input_vocab_size, dropout)

        self.decoder = Decoder(num_layers, d_model, num_heads, dff,
                               target_vocab_size, dropout)

        self.final_layer = tf.keras.layers.Dense(target_vocab_size)
Exemplo n.º 10
0
    def __init__(self, config):
        content_trans = transforms.Compose([
            transforms.Resize(config.image_size),
            #transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])

        self.train_loader = torch.utils.data.DataLoader(
            HDR_LDR(config.ldr_dir, config.hdr_dir, content_trans),
            batch_size=1,
            shuffle=True,
            num_workers=config.workers,
            pin_memory=True,
            drop_last=True)

        os.makedirs(f'{config.save_dir}/{config.version}', exist_ok=True)

        self.loss_dir = f'{config.save_dir}/{config.version}/loss'
        self.model_state_dir = f'{config.save_dir}/{config.version}/model_state'
        self.image_dir = f'{config.save_dir}/{config.version}/image'
        self.psnr_dir = f'{config.save_dir}/{config.version}/psnr'
        self.code_dir = f'{config.save_dir}/{config.version}/code'

        os.makedirs(self.loss_dir, exist_ok=True)
        os.makedirs(self.model_state_dir, exist_ok=True)
        os.makedirs(self.image_dir, exist_ok=True)
        os.makedirs(self.psnr_dir, exist_ok=True)
        os.makedirs(self.code_dir, exist_ok=True)

        script_name = 'trainer_' + config.script_name + '.py'
        shutil.copyfile(os.path.join('scripts', script_name),
                        os.path.join(self.code_dir, script_name))
        shutil.copyfile('components/transformer.py',
                        os.path.join(self.code_dir, 'transformer.py'))
        shutil.copyfile('model/Fusion.py',
                        os.path.join(self.code_dir, 'Fusion.py'))

        self.encoder = Encoder().cuda()
        self.attention = Transformer(config.topk, True, False).cuda()
        self.decoder = Decoder().cuda()

        self.decoder.load_state_dict(torch.load("./hdr_decoder.pth"))

        self.config = config
Exemplo n.º 11
0
    def __init__(self,
                 num_encoder_layers: int = 6,
                 num_decoder_layers: int = 6,
                 dim_embedding: int = 512,
                 num_heads: int = 6,
                 dim_feedfordward: int = 512,
                 dropout: float = 0.1,
                 activation: nn.Module = nn.ReLU()):
        super().__init__()
        self.encoder = Encoder(num_layers=num_encoder_layers,
                               dim_embedding=dim_embedding,
                               num_heads=num_heads,
                               dim_feedfordward=dim_feedfordward,
                               dropout=dropout)

        self.decoder = Decoder(num_layers=num_decoder_layers,
                               dim_embedding=dim_embedding,
                               num_heads=num_heads,
                               dim_feedfordward=dim_feedfordward,
                               dropout=dropout)
        self.criterion = nn.CrossEntropyLoss()
Exemplo n.º 12
0
    def __init__(self):
        super(Model, self).__init__()

        self.encoder = Encoder()
        self.decoder = Decoder()
        self.embeds = nn.Embedding(config.vocab_size, config.emb_dim)
        init_wt.init_wt_normal(self.embeds.weight)

        self.encoder = get_cuda(self.encoder)
        self.decoder = get_cuda(self.decoder)
        self.embeds = get_cuda(self.embeds)


# if __name__ == '__main__':
#
#     my_model = Model()
#     my_model_paramters = my_model.parameters()
#
#     print(my_model_paramters)
#     my_model_paramters_group = list(my_model_paramters)
#     print(my_model_paramters_group)
Exemplo n.º 13
0
 def __init__(self,
              args,
              vocab,
              n_dim,
              image_dim,
              layers,
              dropout,
              num_choice=5):
     super().__init__()
     print("Model name: DA")
     self.vocab = vocab
     self.encoder = Encoder(args, vocab, n_dim, image_dim, layers, dropout,
                            num_choice).cuda()
     #self.encoder = TransformerEncoder(args, vocab, n_dim, image_dim, layers, dropout, num_choice).cuda()
     #self.encoder = DAEncoder(args, vocab, n_dim, image_dim, layers, dropout, num_choice).cuda()
     #self.encoder = MHEncoder(args, vocab, n_dim, image_dim, layers, dropout, num_choice).cuda()
     ##self.encoder = HierarchicalDA(args, vocab, n_dim, image_dim, layers, dropout, num_choice).cuda()
     #self.decoder = Disc(args, vocab, n_dim, image_dim, layers, dropout, num_choice)
     #self.decoder = SumDisc(args, vocab, n_dim, image_dim, layers, dropout, num_choice)
     self.decoder = Decoder(args, vocab, n_dim, image_dim, layers, dropout,
                            num_choice).cuda()
Exemplo n.º 14
0
def make_model(src_vocab,
               tar_vocab,
               N=6,
               d_model=512,
               d_ff=2014,
               h=8,
               dropout=0.1):
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = GeneralEncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
        nn.Sequential(Embedding(d_model, src_vocab), c(position)),
        nn.Sequential(Embedding(d_model, tar_vocab), c(position)),
        Generator(d_model, tar_vocab))

    # 随机初始化参数,这非常重要
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model
Exemplo n.º 15
0
def main():
    
    #create tensorboard summary writer
    writer = SummaryWriter(args.experiment_id)
    #[TODO] may need to resize input image
    cudnn.enabled = True
    #create model: Encoder
    model_encoder = Encoder()
    model_encoder.train()
    model_encoder.cuda(args.gpu)
    optimizer_encoder = optim.Adam(model_encoder.parameters(), lr=args.learning_rate, betas=(0.95, 0.99))
    optimizer_encoder.zero_grad()

    #create model: Decoder
    model_decoder = Decoder()
    model_decoder.train()
    model_decoder.cuda(args.gpu)
    optimizer_decoder = optim.Adam(model_decoder.parameters(), lr=args.learning_rate, betas=(0.95, 0.99))
    optimizer_decoder.zero_grad()
    
    l2loss = nn.MSELoss()
    
    #load data
    for i in range(1, 360002, 30000):
        train_data, valid_data = get_data(i)
        for e in range(1, args.epoch + 1):
            train_loss_value = 0
            validation_loss_value = 0
            for j in range(0, int(args.train_size/4), args.batch_size):
                optimizer_decoder.zero_grad()
                optimizer_decoder.zero_grad()
                image = Variable(torch.tensor(train_data[j: j + args.batch_size, :, :])).cuda(args.gpu)
                latent = model_encoder(image)
                img_recon = model_decoder(latent)
                img_recon = F.interpolate(img_recon, size=image.shape[2:], mode='bilinear', align_corners=True) 
                loss = l2loss(img_recon, image)
                train_loss_value += loss.data.cpu().numpy() / args.batch_size
                loss.backward()
                optimizer_decoder.step()
                optimizer_encoder.step()
            print("data load: {:8d}".format(i))
            print("epoch: {:8d}".format(e))
            print("train_loss: {:08.6f}".format(train_loss_value / (args.train_size / args.batch_size)))
            for j in range(0,int(args.validation_size/4), args.batch_size):
                model_encoder.eval()
                model_decoder.eval() 
                image = Variable(torch.tensor(valid_data[j: j + args.batch_size, :, :])).cuda(args.gpu)
                latent = model_encoder(image)
                img_recon = model_decoder(latent)
                img_1 = img_recon[0][0]
                img = image[0][0]
                img_recon = F.interpolate(img_recon, size=image.shape[2:], mode='bilinear', align_corners=True) 
                save_image(img_1, args.image_dir + '/fake' + str(i) + "_" + str(j) + ".png")
                save_image(img, args.image_dir + '/real' + str(i) + "_" + str(j) + ".png")
                image = Variable(torch.tensor(train_data[j: j + args.batch_size, :, :, :])).cuda(args.gpu)
                loss = l2loss(img_recon, image)
                validation_loss_value += loss.data.cpu().numpy() / args.batch_size
            model_encoder.train()
            model_decoder.train()
            print("train_loss: {:08.6f}".format(validation_loss_value / (args.validation_size / args.batch_size)))
        torch.save({'encoder_state_dict': model_encoder.state_dict()}, osp.join(args.checkpoint_dir, 'AE_encoder.pth'))
        torch.save({'decoder_state_dict': model_decoder.state_dict()}, osp.join(args.checkpoint_dir, 'AE_decoder.pth'))
Exemplo n.º 16
0
    sample_cell = encoder.initialize_cell_state()
    sample_output, sample_hidden, cell_hidden = encoder(
        example_input_batch, [sample_hidden, sample_cell])
    print(
        'Encoder output shape: (batch size, sequence length, units) {}'.format(
            sample_output.shape))
    print('Encoder Hidden state shape: (batch size, units) {}'.format(
        sample_hidden.shape))
    print('Encoder Cell state shape: (batch size, units) {}'.format(
        sample_hidden.shape))

    # Attention
    attention_layer = Attention()
    attention_result, attention_weights = attention_layer(
        sample_hidden, sample_output)

    print("Attention result shape: (batch size, units) {}".format(
        attention_result.shape))
    print(
        "Attention weights shape: (batch_size, sequence_length, 1) {}".format(
            attention_weights.shape))

    # Decoder
    decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE)

    sample_decoder_output, _, _, _ = decoder(
        tf.random.uniform((BATCH_SIZE, 1)), sample_hidden, sample_output)

    print('Decoder output shape: (batch_size, vocab size) {}'.format(
        sample_decoder_output.shape))
Exemplo n.º 17
0
 def create_model(self):
     self.encoder = Encoder(True).cuda()
     self.decoder = Decoder(True, True).cuda()
     self.D = VGG16_mid().cuda()
     self.attention1 = Transformer(4, 512, self.config.topk, True,
                                   False).cuda()
Exemplo n.º 18
0
    def __init__(self,
                 n_cap_vocab,
                 n_cms_vocab,
                 cap_max_seq,
                 cms_max_seq,
                 vis_emb=2048,
                 d_word_vec=512,
                 d_model=512,
                 d_inner=2048,
                 n_layers=6,
                 rnn_layers=1,
                 n_head=8,
                 d_k=64,
                 d_v=64,
                 dropout=0.1,
                 tgt_emb_prj_weight_sharing=True):

        super().__init__()

        # set RNN layers at 1 or 2 yield better performance.
        self.vis_emb = nn.Linear(vis_emb, d_model)
        self.encoder = Encoder(40,
                               d_model,
                               rnn_layers,
                               n_head,
                               d_k,
                               d_v,
                               d_model,
                               d_inner,
                               dropout=0.1)

        self.decoder = Decoder(n_tgt_vocab=n_cap_vocab,
                               len_max_seq=cap_max_seq,
                               d_word_vec=d_word_vec,
                               d_model=d_model,
                               d_inner=d_inner,
                               n_layers=n_layers,
                               n_head=n_head,
                               d_k=d_k,
                               d_v=d_v,
                               dropout=dropout)

        self.cms_decoder = Decoder(n_tgt_vocab=n_cms_vocab,
                                   len_max_seq=cms_max_seq,
                                   d_word_vec=d_word_vec,
                                   d_model=d_model,
                                   d_inner=d_inner,
                                   n_layers=n_layers,
                                   n_head=n_head,
                                   d_k=d_k,
                                   d_v=d_v,
                                   dropout=dropout)

        self.cap_word_prj = nn.Linear(d_model, n_cap_vocab, bias=False)
        self.cms_word_prj = nn.Linear(d_model, n_cms_vocab, bias=False)

        nn.init.xavier_normal_(self.cap_word_prj.weight)
        nn.init.xavier_normal_(self.cms_word_prj.weight)

        assert d_model == d_word_vec, \
            'To facilitate the residual connections, ' \
            'the dimensions of all module outputs shall be the same.'

        if tgt_emb_prj_weight_sharing:
            # Share the weight matrix between target word embedding & the final logit dense layer
            self.cap_word_prj.weight = self.decoder.tgt_word_emb.weight
            self.cms_word_prj.weight = self.cms_decoder.tgt_word_emb.weight
            self.x_logit_scale = (d_model**-0.5)
        else:
            self.x_logit_scale = 1.
Exemplo n.º 19
0
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=256,
                                          shuffle=True,
                                          num_workers=2)

testset = torchvision.datasets.ImageFolder(root='./data/Test',
                                           transform=transform_test)
testloader = torch.utils.data.DataLoader(testset,
                                         batch_size=200,
                                         shuffle=False,
                                         num_workers=2)

# Mode
print('==> Building model..')
encoder = Encoder(mask=mask)
decoder = Decoder(mask=mask)
classifier = Classifier()
encoder = encoder.to(device)
decoder = decoder.to(device)
classifier = classifier.to(device)
if device == 'cuda':
    cudnn.benchmark = True
if args.resume:
    # Load checkpoint.
    print('==> Resuming from checkpoint..')
    assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
    checkpoint = torch.load('./checkpoint/ckpt_' + codir + '.t7')
    encoder.load_state_dict(checkpoint['encoder'])
    decoder.load_state_dict(checkpoint['decoder'])
    classifier.load_state_dict(checkpoint['classifier'])
    best_loss = checkpoint['loss']