Exemplo n.º 1
0
    def build(self):

        # Build Modules
        self.linear_compress = nn.Linear(self.config.input_size,
                                         self.config.hidden_size).cuda()
        self.summarizer = Summarizer(input_size=self.config.hidden_size,
                                     hidden_size=self.config.hidden_size,
                                     num_layers=self.config.num_layers).cuda()
        self.discriminator = Discriminator(
            input_size=self.config.hidden_size,
            hidden_size=self.config.hidden_size,
            num_layers=self.config.num_layers).cuda()
        self.model = nn.ModuleList(
            [self.linear_compress, self.summarizer, self.discriminator])

        if self.config.mode == 'train':
            # Build Optimizers
            self.s_e_optimizer = optim.Adam(
                list(self.summarizer.s_lstm.parameters()) +
                list(self.summarizer.vae.e_lstm.parameters()) +
                list(self.linear_compress.parameters()),
                lr=self.config.lr)
            self.d_optimizer = optim.Adam(
                list(self.summarizer.vae.d_lstm.parameters()) +
                list(self.linear_compress.parameters()),
                lr=self.config.lr)
            self.c_optimizer = optim.Adam(
                list(self.discriminator.parameters()) +
                list(self.linear_compress.parameters()),
                lr=self.config.discriminator_lr)

            self.writer = TensorboardWriter(str(self.config.log_dir))
Exemplo n.º 2
0
    def build(self):
        # 내가 추가한 코드
        torch.backends.cudnn.enabled = False

        # 내가 추가한 코드 / GPU 정보
        USE_CUDA = torch.cuda.is_available()
        print(USE_CUDA)
        device = torch.device('cuda:0' if USE_CUDA else 'cpu')
        print('학습을 진행하는 기기:', device)
        print('cuda index:', torch.cuda.current_device())
        print('gpu 개수:', torch.cuda.device_count())
        print('graphic name:', torch.cuda.get_device_name())
        # setting device on GPU if available, else CPU
        print('Using device:', device)

        # Build Modules
        self.linear_compress = nn.Linear(self.config.input_size,
                                         self.config.hidden_size).cuda()
        self.summarizer = Summarizer(input_size=self.config.hidden_size,
                                     hidden_size=self.config.hidden_size,
                                     num_layers=self.config.num_layers).cuda()
        self.discriminator = Discriminator(
            input_size=self.config.hidden_size,
            hidden_size=self.config.hidden_size,
            num_layers=self.config.num_layers).cuda()
        self.model = nn.ModuleList(
            [self.linear_compress, self.summarizer, self.discriminator])

        if self.config.mode == 'train':
            # Build Optimizers
            self.s_e_optimizer = optim.Adam(
                list(self.summarizer.s_lstm.parameters()) +
                list(self.summarizer.vae.e_lstm.parameters()) +
                list(self.linear_compress.parameters()),
                lr=self.config.lr)
            self.d_optimizer = optim.Adam(
                list(self.summarizer.vae.d_lstm.parameters()) +
                list(self.linear_compress.parameters()),
                lr=self.config.lr)
            self.c_optimizer = optim.Adam(
                list(self.discriminator.parameters()) +
                list(self.linear_compress.parameters()),
                lr=self.config.discriminator_lr)
            print(self.model)
            self.model.train()
            # self.model.apply(apply_weight_norm)

            # Overview Parameters
            # VAE만 학습시키기
            # print('Model Parameters')
            # for name, param in self.model.named_parameters():
            #     print('\t' + name + '\t', list(param.size()))
            #     if 'vae' not in name:
            #         param.requires_grad = False
            #     print('\t train: ' + '\t', param.requires_grad)

            # Tensorboard 주석처리 내가 했음
            self.writer = TensorboardWriter(self.config.log_dir)
Exemplo n.º 3
0
 def __init__(self, n_in, n_h, activation):
     super(GMI, self).__init__()
     self.gcn1 = GCN(
         n_in, n_h, activation
     )  # if on citeseer and pubmed, the encoder is 1-layer GCN, you need to modify it
     self.gcn2 = GCN(n_h, n_h, activation)
     self.disc1 = Discriminator(n_in, n_h)
     self.disc2 = Discriminator(n_h, n_h)
     self.avg_neighbor = AvgNeighbor()
     self.prelu = nn.PReLU()
     self.sigm = nn.Sigmoid()
Exemplo n.º 4
0
    def __init__(self,
                 features,
                 adj_lists,
                 ft_size,
                 n_h,
                 activation,
                 num_sample=[10, 10],
                 skip_connection=False,
                 gcn=True):
        super(DGI_ind, self).__init__()
        self.features = features
        self.skip_connection = skip_connection
        self.agg1 = MeanAggregator(features,
                                   cuda=torch.cuda.is_available(),
                                   gcn=gcn,
                                   name='l1')
        self.enc1 = Encoder(features,
                            ft_size,
                            n_h,
                            adj_lists,
                            self.agg1,
                            num_sample=num_sample[0],
                            gcn=gcn,
                            cuda=torch.cuda.is_available(),
                            activation=activation,
                            skip_connection=skip_connection,
                            name='l2')
        self.agg2 = MeanAggregator(lambda nodes: self.enc1(nodes),
                                   cuda=torch.cuda.is_available(),
                                   gcn=gcn,
                                   name='l3')
        self.enc2 = Encoder(lambda nodes: self.enc1(nodes),
                            self.enc1.embed_dim,
                            n_h,
                            adj_lists,
                            self.agg2,
                            num_sample=num_sample[1],
                            base_model=self.enc1,
                            gcn=gcn,
                            cuda=torch.cuda.is_available(),
                            activation=activation,
                            skip_connection=skip_connection,
                            name='l4')
        self.read = AvgReadout()
        self.sigm = nn.Sigmoid()

        if skip_connection:
            self.disc = Discriminator(2 * n_h)
        else:
            self.disc = Discriminator(n_h)
Exemplo n.º 5
0
    def __init__(self,
                 image_size,
                 input_features,
                 hidden_features,
                 optim_cfg,
                 generator_cfg,
                 discriminator_cfg,
                 image_dataset,
                 output_dir="../generated_images",
                 add_layers_iters: int = 10000,
                 sample_every: int = 100,
                 num_samples: int = 4,
                 log_every: int = 10,
                 gp_every: int = 4,
                 batch_size: int = 32,
                 loss_mode: str = "relu"):
        super(piGAN, self).__init__()
        self.input_features = input_features
        self.hidden_features = hidden_features
        self.optim_cfg = optim_cfg
        self.batch_size = batch_size
        self.image_dataset = image_dataset
        self.log_every = log_every
        self.gp_every = gp_every
        self.sample_every = sample_every
        self.add_layers_iters = add_layers_iters
        self.output_dir = output_dir
        self.num_samples = num_samples

        # in case the directory for generated images doesn't exist - create it
        os.makedirs(output_dir, exist_ok=True)

        self.G = Generator(image_size=image_size,
                           input_features=input_features,
                           hidden_features=hidden_features,
                           **generator_cfg)

        self.D = Discriminator(image_size=image_size, **discriminator_cfg)

        # setup initial resolution for loaded_images
        self.image_dataset.set_transforms(self.D.init_resolution)

        self.iterations = 0
        self.last_loss_D = 0
        self.last_loss_G = 0

        self.discriminator_loss, self.generator_loss = get_GAN_losses(
            loss_mode)
Exemplo n.º 6
0
 def __init__(self, n_in, n_h, activation):
     super(DGI, self).__init__()
     self.gcn = GCN(n_in, n_h, activation)
     self.read = AvgReadout()
     self.sigm = nn.Sigmoid()
     self.disc = Discriminator(n_h)
     self.disc2 = Discriminator2(n_h)
Exemplo n.º 7
0
Arquivo: dgi.py Projeto: HekpoMaH/DGI
    def __init__(self,
                 n_in,
                 n_h,
                 activation,
                 update_rule="GCNConv",
                 batch_size=1,
                 K=None,
                 drop_sigma=False):
        super(DGI, self).__init__()

        if "GraphSkip" in update_rule:
            self.gnn = GraphSkip.GraphSkip(n_in,
                                           n_h,
                                           activation,
                                           convolution=update_rule,
                                           K=K)
            # has reset parameters and activation in constructor
        else:
            self.gnn = GNNPlusAct(n_in,
                                  n_h,
                                  activation,
                                  update_rule,
                                  K=K,
                                  drop_sigma=drop_sigma)
            # has reset parameters and activation in constructor

        self.read = AvgReadout()

        self.sigm = nn.Sigmoid()

        self.disc = Discriminator(n_h, batch_size)
Exemplo n.º 8
0
 def __init__(self, num_layers, num_mlp_layers, input_dim, hidden_dim,
              neighbor_pooling_type, device):
     super(DGI, self).__init__()
     self.gin = GraphCNN(num_layers, num_mlp_layers, input_dim, hidden_dim,
                         neighbor_pooling_type, device)
     self.read = AvgReadout()
     self.sigm = nn.Sigmoid()
     self.disc = Discriminator(hidden_dim)
Exemplo n.º 9
0
Arquivo: DGI.py Projeto: isxinli/DMGI
    def __init__(self, args):
        super(modeler, self).__init__()
        self.args = args
        self.gcn = GCN(args.ft_size, args.hid_units, args.activation,
                       args.drop_prob, args.isBias)

        # one discriminator
        self.disc = Discriminator(args.hid_units)
        self.readout_func = self.args.readout_func
    def __init__(self, nfeat, nhid, shid, P, act):
        super(DGI, self).__init__()
        self.hgcn = HGCN(nfeat, nhid, shid, P, act)
        
        self.read = AvgReadout()

        self.sigm = nn.Sigmoid()

        self.disc = Discriminator(nhid)
Exemplo n.º 11
0
    def build(self):

        # Build Modules
        self.linear_compress = nn.Linear(self.config.input_size,
                                         self.config.hidden_size).cuda()
        self.summarizer = Summarizer(input_size=self.config.hidden_size,
                                     hidden_size=self.config.hidden_size,
                                     num_layers=self.config.num_layers).cuda()
        self.discriminator = Discriminator(
            input_size=self.config.hidden_size,
            hidden_size=self.config.hidden_size,
            num_layers=self.config.num_layers).cuda()
        self.model = nn.ModuleList(
            [self.linear_compress, self.summarizer, self.discriminator])

        if self.config.mode == 'train':
            # Build Optimizers
            self.s_e_optimizer = optim.Adam(
                list(self.summarizer.s_lstm.parameters()) +
                list(self.summarizer.vae.e_lstm.parameters()) +
                list(self.linear_compress.parameters()),
                lr=self.config.lr)
            self.d_optimizer = optim.Adam(
                list(self.summarizer.vae.d_lstm.parameters()) +
                list(self.linear_compress.parameters()),
                lr=self.config.lr)
            self.c_optimizer = optim.Adam(
                list(self.discriminator.parameters()) +
                list(self.linear_compress.parameters()),
                lr=self.config.discriminator_lr)

            self.model.train()
            # self.model.apply(apply_weight_norm)

            # Overview Parameters
            # print('Model Parameters')
            # for name, param in self.model.named_parameters():
            #     print('\t' + name + '\t', list(param.size()))

            # Tensorboard
            import ipdb
            ipdb.set_trace()
            self.writer = TensorboardWriter(self.config.log_dir)
Exemplo n.º 12
0
    def __init__(self, n_in, n_h, activation, k, device, hard, batch_size):
        super(DGI, self).__init__()
        self.gcn = GCN(n_in, n_h, activation)
        self.assign = Assign(k, n_h, device, hard)

        self.sigm = nn.Sigmoid()

        self.disc = Discriminator(n_h)

        self.k = k
Exemplo n.º 13
0
    def __init__(self, n_nb, n_in, n_h, activation, num_clusters, beta, adj):
        super(GIC_GCN, self).__init__()

        self.gcn = net_gcn_baseline(embedding_dim=[n_in, 512, n_h], adj=adj)
        self.read = AvgReadout()
        self.sigm = nn.Sigmoid()
        self.disc = Discriminator(n_h)
        self.disc_c = Discriminator_cluster(n_h, n_h, n_nb, num_clusters)
        self.beta = beta
        self.cluster = Clusterator(n_h, num_clusters)
Exemplo n.º 14
0
    def __init__(self, n_nb, n_in, n_h, activation, num_clusters, beta, graph):
        super(GIC_GIN, self).__init__()

        self.gcn = GINNet(net_params=[n_in, 512, n_h], graph=graph)
        self.read = AvgReadout()
        self.sigm = nn.Sigmoid()
        self.disc = Discriminator(n_h)
        self.disc_c = Discriminator_cluster(n_h, n_h, n_nb, num_clusters)
        self.beta = beta
        self.cluster = Clusterator(n_h, num_clusters)
Exemplo n.º 15
0
    def __init__(self,
                 n_in,
                 n_h,
                 input_size=128,
                 hidden_size=128,
                 n_layers=1,
                 dropout=0.5):

        super(PIM, self).__init__()

        self.encoder_path = EncoderLSTM(input_size=input_size,
                                        hidden_size=hidden_size,
                                        n_layers=n_layers,
                                        dropout=dropout)

        self.disc_node = Discriminator(n_in, n_h)
        self.disc_path = Discriminator(n_in, n_h)

        self.read = self.read = Readout()

        ##can be initialized by results from graph embedding methods, e.g. node2vec.
        self.embeddingn = nn.Embedding(8894, 128)
Exemplo n.º 16
0
    def __init__(self,n_nb, n_in, n_h, activation, num_clusters, beta):
        super(GIC, self).__init__()
        self.gcn = GCN(n_in, n_h, activation)
        
        self.read = AvgReadout()

        self.sigm = nn.Sigmoid()

        self.disc = Discriminator(n_h)
        self.disc_c = Discriminator_cluster(n_h,n_h,n_nb,num_clusters)
        
        
        self.beta = beta
        
        self.cluster = Clusterator(n_h,num_clusters)
Exemplo n.º 17
0
    def __init__(self, args):
        super(modeler, self).__init__()
        self.args = args
        self.gcn = nn.ModuleList([GCN(args.ft_size, args.hid_units, args.activation, args.drop_prob, args.isBias) for _ in range(args.nb_graphs)])

        self.disc = Discriminator(args.hid_units)
        self.H = nn.Parameter(torch.FloatTensor(1, args.nb_nodes, args.hid_units))
        self.readout_func = self.args.readout_func
        if args.isAttn:
            self.attn = nn.ModuleList([Attention(args) for _ in range(args.nheads)])

        if args.isSemi:
            self.logistic = LogReg(args.hid_units, args.nb_classes).to(args.device)

        self.init_weight()
Exemplo n.º 18
0
    def __init__(self,
                 n_in,
                 n_h,
                 activation,
                 critic="bilinear",
                 dataset=None,
                 attack_model=True):
        super(DGI, self).__init__()
        self.gcn = GCN(n_in, n_h, activation)
        self.read = AvgReadout()

        self.sigm = nn.Sigmoid()

        self.disc = Discriminator(n_h,
                                  critic=critic,
                                  dataset=dataset,
                                  attack_model=attack_model)
Exemplo n.º 19
0
# Create the dataset
dataset = ImageFolder(root=dataroot,
                        transform=transforms.Compose([
                            transforms.Resize(image_size),
                            transforms.CenterCrop(image_size),
                            transforms.ToTensor(),
                            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                        ]))
# Create the dataloader
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                         shuffle=True, num_workers=workers)

##############################################################################
# Instance initialization
##############################################################################
netD = Discriminator().to(device)
netG = Generator(z_dim=z_dim).to(device)

optimizerD = optim.Adam(netD.parameters(), lr=lr_D, betas=(beta1, beta2))
optimizerG = optim.Adam(netG.parameters(), lr=lr_G, betas=(beta1, beta2))

def train(show_every_epoch=1, resume=False, resume_at_epoch=1):
    if resume:
        netG.load_state_dict(torch.load('generator_epoch{}.pth'.format(resume_at_epoch)))
        netD.load_state_dict(torch.load('discriminator_epoch{}.pth'.format(resume_at_epoch)))
    netD.train()
    netG.train()

    for epoch in range(1, epochs + 1):
        print('Training epoch {}'.format(epoch))
        D_running_loss = 0
Exemplo n.º 20
0
BATCH_SIZE = 1
N_EPOCHES = 200
LAMBDA = 10
D_RATIO = 2
N_BLOCKS = 9
LR_LAMBDA = lambda epoch: min(1, 1 - (epoch - 100) / 100)
IMG_SIZE = 286
INPUT_SIZE = 256

# device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Networks
G_A2B = Generator(input_dim=3, n_blocks=N_BLOCKS).to(device)
G_B2A = Generator(input_dim=3, n_blocks=N_BLOCKS).to(device)
D_A = Discriminator(input_dim=3).to(device)
D_B = Discriminator(input_dim=3).to(device)
G_A2B.apply(init_weights)
G_B2A.apply(init_weights)
D_A.apply(init_weights)
D_B.apply(init_weights)

# ImagePool
fake_A_pool = ImagePool(size=50)
fake_B_pool = ImagePool(size=50)

# loss
Loss_GAN = nn.MSELoss()
Loss_cyc = nn.L1Loss()

# optimizer , betas=(0.5, 0.999)
Exemplo n.º 21
0
    def __init__(self, hps):
        super(MixPoetAUS, self).__init__()
        self.hps = hps

        self.vocab_size = hps.vocab_size
        self.n_class1 = hps.n_class1
        self.n_class2 = hps.n_class2
        self.emb_size = hps.emb_size
        self.hidden_size = hps.hidden_size
        self.factor_emb_size = hps.factor_emb_size
        self.latent_size = hps.latent_size
        self.context_size = hps.context_size
        self.poem_len = hps.poem_len
        self.sens_num = hps.sens_num
        self.sen_len = hps.sen_len

        self.pad_idx = hps.pad_idx
        self.bos_idx = hps.bos_idx

        self.bos_tensor = torch.tensor(hps.bos_idx, dtype=torch.long, device=device).view(1, 1)

        self.gumbel_tool = GumbelSampler()

        # build postional inputs to distinguish lines at different positions
        # [sens_num, sens_num], each line is a one-hot input
        self.pos_inps = F.one_hot(torch.arange(0, self.sens_num), self.sens_num)
        self.pos_inps = self.pos_inps.type(torch.FloatTensor).to(device)


        # ----------------------------
        # build componets
        self.layers = nn.ModuleDict()
        self.layers['embed'] = nn.Embedding(self.vocab_size, self.emb_size, padding_idx=self.pad_idx)

        self.layers['encoder'] = BidirEncoder(self.emb_size, self.hidden_size, drop_ratio=hps.drop_ratio)

        # p(x|z, w, y)
        self.layers['decoder'] = Decoder(self.hidden_size, self.hidden_size, drop_ratio=hps.drop_ratio)

        # RNN to combine characters to form the representation of a word
        self.layers['word_encoder'] = BidirEncoder(self.emb_size, self.emb_size, cell='Elman',
            drop_ratio=hps.drop_ratio)

        # p(y_1|x,w), p(y_2|x,w)
        self.layers['cl_xw1'] = MLP(self.hidden_size*2+self.emb_size*2,
            layer_sizes=[self.hidden_size, 128, self.n_class1], activs=['relu', 'relu', None],
            drop_ratio=hps.drop_ratio)
        self.layers['cl_xw2'] = MLP(self.hidden_size*2+self.emb_size*2,
            layer_sizes=[self.hidden_size, 128, self.n_class2], activs=['relu', 'relu', None],
            drop_ratio=hps.drop_ratio)

        # p(y_1|w), p(y_2|w)
        self.layers['cl_w1'] = MLP(self.emb_size*2,
            layer_sizes=[self.emb_size, 64, self.n_class1], activs=['relu', 'relu', None],
            drop_ratio=hps.drop_ratio)
        self.layers['cl_w2'] = MLP(self.emb_size*2,
            layer_sizes=[self.emb_size, 64, self.n_class2], activs=['relu', 'relu', None],
            drop_ratio=hps.drop_ratio)

        # factor embedding
        self.layers['factor_embed1'] = nn.Embedding(self.n_class1, self.factor_emb_size)
        self.layers['factor_embed2'] = nn.Embedding(self.n_class2, self.factor_emb_size)

        # posteriori and prior
        self.layers['prior'] = PriorGenerator(
            self.emb_size*2+int(self.latent_size//2),
            self.latent_size, self.n_class1, self.n_class2, self.factor_emb_size)

        self.layers['posteriori'] = PosterioriGenerator(
            self.hidden_size*2+self.emb_size*2, self.latent_size,
            self.n_class1, self.n_class2, self.factor_emb_size)


        # for adversarial training
        self.layers['discriminator'] = Discriminator(self.n_class1, self.n_class2,
            self.factor_emb_size, self.latent_size, drop_ratio=hps.drop_ratio)

        #--------------
        # project the decoder hidden state to a vocanbulary-size output logit
        self.layers['out_proj'] = nn.Linear(hps.hidden_size, hps.vocab_size)

        # MLP for calculate initial decoder state
        # NOTE: Here we use a two-dimension one-hot vector as the input length embedding o_i,
        #   since there are only two kinds of line length, 5 chars and 7 chars, for Chinese
        #   classical quatrains.
        self.layers['dec_init'] = MLP(self.latent_size+self.emb_size*2+self.factor_emb_size*2,
            layer_sizes=[self.hidden_size-6],
            activs=['tanh'], drop_ratio=hps.drop_ratio)



        self.layers['map_x'] = MLP(self.context_size+self.emb_size,
            layer_sizes=[self.hidden_size],
            activs=['tanh'], drop_ratio=hps.drop_ratio)

        # update the context vector
        self.layers['context'] = ContextLayer(self.hidden_size, self.context_size)


        # two annealing parameters
        self.__tau = 1.0
        self.__teach_ratio = 1.0

        # only for pre-training
        self.layers['dec_init_pre'] = MLP(self.hidden_size*2+self.emb_size*2,
            layer_sizes=[self.hidden_size-6],
            activs=['tanh'], drop_ratio=hps.drop_ratio)
Exemplo n.º 22
0
class Solver(object):
    def __init__(self, config=None, train_loader=None, test_loader=None):
        """Class that Builds, Trains and Evaluates AC-SUM-GAN model"""
        self.config = config
        self.train_loader = train_loader
        self.test_loader = test_loader

    def build(self):

        # Build Modules
        self.linear_compress = nn.Linear(self.config.input_size,
                                         self.config.hidden_size).cuda()
        self.summarizer = Summarizer(input_size=self.config.hidden_size,
                                     hidden_size=self.config.hidden_size,
                                     num_layers=self.config.num_layers).cuda()
        self.discriminator = Discriminator(
            input_size=self.config.hidden_size,
            hidden_size=self.config.hidden_size,
            num_layers=self.config.num_layers).cuda()
        self.actor = Actor(state_size=self.config.action_state_size,
                           action_size=self.config.action_state_size).cuda()
        self.critic = Critic(state_size=self.config.action_state_size,
                             action_size=self.config.action_state_size).cuda()
        self.model = nn.ModuleList([
            self.linear_compress, self.summarizer, self.discriminator,
            self.actor, self.critic
        ])

        if self.config.mode == 'train':
            # Build Optimizers
            self.e_optimizer = optim.Adam(
                self.summarizer.vae.e_lstm.parameters(), lr=self.config.lr)
            self.d_optimizer = optim.Adam(
                self.summarizer.vae.d_lstm.parameters(), lr=self.config.lr)
            self.c_optimizer = optim.Adam(
                list(self.discriminator.parameters()) +
                list(self.linear_compress.parameters()),
                lr=self.config.discriminator_lr)
            self.optimizerA_s = optim.Adam(
                list(self.actor.parameters()) +
                list(self.summarizer.s_lstm.parameters()) +
                list(self.linear_compress.parameters()),
                lr=self.config.lr)
            self.optimizerC = optim.Adam(self.critic.parameters(),
                                         lr=self.config.lr)

            self.writer = TensorboardWriter(str(self.config.log_dir))

    def reconstruction_loss(self, h_origin, h_sum):
        """L2 loss between original-regenerated features at cLSTM's last hidden layer"""

        return torch.norm(h_origin - h_sum, p=2)

    def prior_loss(self, mu, log_variance):
        """KL( q(e|x) || N(0,1) )"""
        return 0.5 * torch.sum(-1 + log_variance.exp() + mu.pow(2) -
                               log_variance)

    def sparsity_loss(self, scores):
        """Summary-Length Regularization"""

        return torch.abs(
            torch.mean(scores) - self.config.regularization_factor)

    criterion = nn.MSELoss()

    def AC(self, original_features, seq_len, action_fragments):
        """ Function that makes the actor's actions, in the training steps where the actor and critic components are not trained"""
        scores = self.summarizer.s_lstm(original_features)  # [seq_len, 1]

        fragment_scores = np.zeros(
            self.config.action_state_size)  # [num_fragments, 1]
        for fragment in range(self.config.action_state_size):
            fragment_scores[fragment] = scores[action_fragments[
                fragment, 0]:action_fragments[fragment, 1] + 1].mean()
        state = fragment_scores

        previous_actions = [
        ]  # save all the actions (the selected fragments of each episode)
        reduction_factor = (
            self.config.action_state_size -
            self.config.termination_point) / self.config.action_state_size
        action_scores = (torch.ones(seq_len) * reduction_factor).cuda()
        action_fragment_scores = (torch.ones(
            self.config.action_state_size)).cuda()

        counter = 0
        for ACstep in range(self.config.termination_point):

            state = torch.FloatTensor(state).cuda()
            # select an action
            dist = self.actor(state)
            action = dist.sample(
            )  # returns a scalar between 0-action_state_size

            if action not in previous_actions:
                previous_actions.append(action)
                action_factor = (self.config.termination_point - counter) / (
                    self.config.action_state_size - counter) + 1

                action_scores[action_fragments[action,
                                               0]:action_fragments[action, 1] +
                              1] = action_factor
                action_fragment_scores[action] = 0

                counter = counter + 1

            next_state = state * action_fragment_scores
            next_state = next_state.cpu().detach().numpy()
            state = next_state

        weighted_scores = action_scores.unsqueeze(1) * scores
        weighted_features = weighted_scores.view(-1, 1, 1) * original_features

        return weighted_features, weighted_scores

    def train(self):

        step = 0
        for epoch_i in trange(self.config.n_epochs, desc='Epoch', ncols=80):
            self.model.train()
            recon_loss_init_history = []
            recon_loss_history = []
            sparsity_loss_history = []
            prior_loss_history = []
            g_loss_history = []
            e_loss_history = []
            d_loss_history = []
            c_original_loss_history = []
            c_summary_loss_history = []
            actor_loss_history = []
            critic_loss_history = []
            reward_history = []

            # Train in batches of as many videos as the batch_size
            num_batches = int(len(self.train_loader) / self.config.batch_size)
            iterator = iter(self.train_loader)
            for batch in range(num_batches):
                list_image_features = []
                list_action_fragments = []

                print(f'batch: {batch}')

                # ---- Train eLSTM ----#
                if self.config.verbose:
                    tqdm.write('Training eLSTM...')
                self.e_optimizer.zero_grad()
                for video in range(self.config.batch_size):
                    image_features, action_fragments = next(iterator)

                    action_fragments = action_fragments.squeeze(0)
                    # [batch_size, seq_len, input_size]
                    # [seq_len, input_size]
                    image_features = image_features.view(
                        -1, self.config.input_size)

                    list_image_features.append(image_features)
                    list_action_fragments.append(action_fragments)

                    # [seq_len, input_size]
                    image_features_ = Variable(image_features).cuda()
                    seq_len = image_features_.shape[0]

                    # [seq_len, 1, hidden_size]
                    original_features = self.linear_compress(
                        image_features_.detach()).unsqueeze(1)

                    weighted_features, scores = self.AC(
                        original_features, seq_len, action_fragments)
                    h_mu, h_log_variance, generated_features = self.summarizer.vae(
                        weighted_features)

                    h_origin, original_prob = self.discriminator(
                        original_features)
                    h_sum, sum_prob = self.discriminator(generated_features)

                    if self.config.verbose:
                        tqdm.write(
                            f'original_p: {original_prob.item():.3f}, summary_p: {sum_prob.item():.3f}'
                        )

                    reconstruction_loss = self.reconstruction_loss(
                        h_origin, h_sum)
                    prior_loss = self.prior_loss(h_mu, h_log_variance)

                    tqdm.write(
                        f'recon loss {reconstruction_loss.item():.3f}, prior loss: {prior_loss.item():.3f}'
                    )

                    e_loss = reconstruction_loss + prior_loss
                    e_loss = e_loss / self.config.batch_size
                    e_loss.backward()

                    prior_loss_history.append(prior_loss.data)
                    e_loss_history.append(e_loss.data)

                # Update e_lstm parameters every 'batch_size' iterations
                torch.nn.utils.clip_grad_norm_(
                    self.summarizer.vae.e_lstm.parameters(), self.config.clip)
                self.e_optimizer.step()

                #---- Train dLSTM (decoder/generator) ----#
                if self.config.verbose:
                    tqdm.write('Training dLSTM...')
                self.d_optimizer.zero_grad()
                for video in range(self.config.batch_size):
                    image_features = list_image_features[video]
                    action_fragments = list_action_fragments[video]

                    # [seq_len, input_size]
                    image_features_ = Variable(image_features).cuda()
                    seq_len = image_features_.shape[0]

                    # [seq_len, 1, hidden_size]
                    original_features = self.linear_compress(
                        image_features_.detach()).unsqueeze(1)

                    weighted_features, _ = self.AC(original_features, seq_len,
                                                   action_fragments)
                    h_mu, h_log_variance, generated_features = self.summarizer.vae(
                        weighted_features)

                    h_origin, original_prob = self.discriminator(
                        original_features)
                    h_sum, sum_prob = self.discriminator(generated_features)

                    tqdm.write(
                        f'original_p: {original_prob.item():.3f}, summary_p: {sum_prob.item():.3f}'
                    )

                    reconstruction_loss = self.reconstruction_loss(
                        h_origin, h_sum)
                    g_loss = self.criterion(sum_prob, original_label)

                    orig_features = original_features.squeeze(
                        1)  # [seq_len, hidden_size]
                    gen_features = generated_features.squeeze(1)  #         >>
                    recon_losses = []
                    for frame_index in range(seq_len):
                        recon_losses.append(
                            self.reconstruction_loss(
                                orig_features[frame_index, :],
                                gen_features[frame_index, :]))
                    reconstruction_loss_init = torch.stack(recon_losses).mean()

                    if self.config.verbose:
                        tqdm.write(
                            f'recon loss {reconstruction_loss.item():.3f}, g loss: {g_loss.item():.3f}'
                        )

                    d_loss = reconstruction_loss + g_loss
                    d_loss = d_loss / self.config.batch_size
                    d_loss.backward()

                    recon_loss_init_history.append(
                        reconstruction_loss_init.data)
                    recon_loss_history.append(reconstruction_loss.data)
                    g_loss_history.append(g_loss.data)
                    d_loss_history.append(d_loss.data)

                # Update d_lstm parameters every 'batch_size' iterations
                torch.nn.utils.clip_grad_norm_(
                    self.summarizer.vae.d_lstm.parameters(), self.config.clip)
                self.d_optimizer.step()

                #---- Train cLSTM ----#
                if self.config.verbose:
                    tqdm.write('Training cLSTM...')
                self.c_optimizer.zero_grad()
                for video in range(self.config.batch_size):
                    image_features = list_image_features[video]
                    action_fragments = list_action_fragments[video]

                    # [seq_len, input_size]
                    image_features_ = Variable(image_features).cuda()
                    seq_len = image_features_.shape[0]

                    # Train with original loss
                    # [seq_len, 1, hidden_size]
                    original_features = self.linear_compress(
                        image_features_.detach()).unsqueeze(1)
                    h_origin, original_prob = self.discriminator(
                        original_features)
                    c_original_loss = self.criterion(original_prob,
                                                     original_label)
                    c_original_loss = c_original_loss / self.config.batch_size
                    c_original_loss.backward()

                    # Train with summary loss
                    weighted_features, _ = self.AC(original_features, seq_len,
                                                   action_fragments)
                    h_mu, h_log_variance, generated_features = self.summarizer.vae(
                        weighted_features)
                    h_sum, sum_prob = self.discriminator(
                        generated_features.detach())
                    c_summary_loss = self.criterion(sum_prob, summary_label)
                    c_summary_loss = c_summary_loss / self.config.batch_size
                    c_summary_loss.backward()

                    tqdm.write(
                        f'original_p: {original_prob.item():.3f}, summary_p: {sum_prob.item():.3f}'
                    )

                    c_original_loss_history.append(c_original_loss.data)
                    c_summary_loss_history.append(c_summary_loss.data)

                # Update c_lstm parameters every 'batch_size' iterations
                torch.nn.utils.clip_grad_norm_(
                    list(self.discriminator.parameters()) +
                    list(self.linear_compress.parameters()), self.config.clip)
                self.c_optimizer.step()

                #---- Train sLSTM and actor-critic ----#
                if self.config.verbose:
                    tqdm.write('Training sLSTM, actor and critic...')
                self.optimizerA_s.zero_grad()
                self.optimizerC.zero_grad()
                for video in range(self.config.batch_size):
                    image_features = list_image_features[video]
                    action_fragments = list_action_fragments[video]

                    # [seq_len, input_size]
                    image_features_ = Variable(image_features).cuda()
                    seq_len = image_features_.shape[0]

                    # [seq_len, 1, hidden_size]
                    original_features = self.linear_compress(
                        image_features_.detach()).unsqueeze(1)
                    scores = self.summarizer.s_lstm(
                        original_features)  # [seq_len, 1]

                    fragment_scores = np.zeros(
                        self.config.action_state_size)  # [num_fragments, 1]
                    for fragment in range(self.config.action_state_size):
                        fragment_scores[fragment] = scores[action_fragments[
                            fragment,
                            0]:action_fragments[fragment, 1] + 1].mean()

                    state = fragment_scores  # [action_state_size, 1]

                    previous_actions = [
                    ]  # save all the actions (the selected fragments of each step)
                    reduction_factor = (self.config.action_state_size -
                                        self.config.termination_point
                                        ) / self.config.action_state_size
                    action_scores = (torch.ones(seq_len) *
                                     reduction_factor).cuda()
                    action_fragment_scores = (torch.ones(
                        self.config.action_state_size)).cuda()

                    log_probs = []
                    values = []
                    rewards = []
                    masks = []
                    entropy = 0

                    counter = 0
                    for ACstep in range(self.config.termination_point):
                        # select an action, get a value for the current state
                        state = torch.FloatTensor(
                            state).cuda()  # [action_state_size, 1]
                        dist, value = self.actor(state), self.critic(state)
                        action = dist.sample(
                        )  # returns a scalar between 0-action_state_size

                        if action in previous_actions:

                            reward = 0

                        else:

                            previous_actions.append(action)
                            action_factor = (
                                self.config.termination_point - counter
                            ) / (self.config.action_state_size - counter) + 1

                            action_scores[action_fragments[
                                action, 0]:action_fragments[action, 1] +
                                          1] = action_factor
                            action_fragment_scores[action] = 0

                            weighted_scores = action_scores.unsqueeze(
                                1) * scores
                            weighted_features = weighted_scores.view(
                                -1, 1, 1) * original_features

                            h_mu, h_log_variance, generated_features = self.summarizer.vae(
                                weighted_features)

                            h_origin, original_prob = self.discriminator(
                                original_features)
                            h_sum, sum_prob = self.discriminator(
                                generated_features)

                            tqdm.write(
                                f'original_p: {original_prob.item():.3f}, summary_p: {sum_prob.item():.3f}'
                            )

                            rec_loss = self.reconstruction_loss(
                                h_origin, h_sum)
                            reward = 1 - rec_loss.item(
                            )  # the less the distance, the higher the reward
                            counter = counter + 1

                        next_state = state * action_fragment_scores
                        next_state = next_state.cpu().detach().numpy()

                        log_prob = dist.log_prob(action).unsqueeze(0)
                        entropy += dist.entropy().mean()

                        log_probs.append(log_prob)
                        values.append(value)
                        rewards.append(
                            torch.tensor([reward],
                                         dtype=torch.float,
                                         device=device))

                        if ACstep == self.config.termination_point - 1:
                            masks.append(
                                torch.tensor([0],
                                             dtype=torch.float,
                                             device=device))
                        else:
                            masks.append(
                                torch.tensor([1],
                                             dtype=torch.float,
                                             device=device))

                        state = next_state

                    next_state = torch.FloatTensor(next_state).to(device)
                    next_value = self.critic(next_state)
                    returns = compute_returns(next_value, rewards, masks)

                    log_probs = torch.cat(log_probs)
                    returns = torch.cat(returns).detach()
                    values = torch.cat(values)

                    advantage = returns - values

                    actor_loss = -((log_probs * advantage.detach()).mean() +
                                   (self.config.entropy_coef /
                                    self.config.termination_point) * entropy)
                    sparsity_loss = self.sparsity_loss(scores)
                    critic_loss = advantage.pow(2).mean()

                    actor_loss = actor_loss / self.config.batch_size
                    sparsity_loss = sparsity_loss / self.config.batch_size
                    critic_loss = critic_loss / self.config.batch_size
                    actor_loss.backward()
                    sparsity_loss.backward()
                    critic_loss.backward()

                    reward_mean = torch.mean(torch.stack(rewards))
                    reward_history.append(reward_mean)
                    actor_loss_history.append(actor_loss)
                    sparsity_loss_history.append(sparsity_loss)
                    critic_loss_history.append(critic_loss)

                    if self.config.verbose:
                        tqdm.write('Plotting...')

                    self.writer.update_loss(original_prob.data, step,
                                            'original_prob')
                    self.writer.update_loss(sum_prob.data, step, 'sum_prob')

                    step += 1

                # Update s_lstm, actor and critic parameters every 'batch_size' iterations
                torch.nn.utils.clip_grad_norm_(
                    list(self.actor.parameters()) +
                    list(self.linear_compress.parameters()) +
                    list(self.summarizer.s_lstm.parameters()) +
                    list(self.critic.parameters()), self.config.clip)
                self.optimizerA_s.step()
                self.optimizerC.step()

            recon_loss_init = torch.stack(recon_loss_init_history).mean()
            recon_loss = torch.stack(recon_loss_history).mean()
            prior_loss = torch.stack(prior_loss_history).mean()
            g_loss = torch.stack(g_loss_history).mean()
            e_loss = torch.stack(e_loss_history).mean()
            d_loss = torch.stack(d_loss_history).mean()
            c_original_loss = torch.stack(c_original_loss_history).mean()
            c_summary_loss = torch.stack(c_summary_loss_history).mean()
            sparsity_loss = torch.stack(sparsity_loss_history).mean()
            actor_loss = torch.stack(actor_loss_history).mean()
            critic_loss = torch.stack(critic_loss_history).mean()
            reward = torch.mean(torch.stack(reward_history))

            # Plot
            if self.config.verbose:
                tqdm.write('Plotting...')
            self.writer.update_loss(recon_loss_init, epoch_i,
                                    'recon_loss_init_epoch')
            self.writer.update_loss(recon_loss, epoch_i, 'recon_loss_epoch')
            self.writer.update_loss(prior_loss, epoch_i, 'prior_loss_epoch')
            self.writer.update_loss(g_loss, epoch_i, 'g_loss_epoch')
            self.writer.update_loss(e_loss, epoch_i, 'e_loss_epoch')
            self.writer.update_loss(d_loss, epoch_i, 'd_loss_epoch')
            self.writer.update_loss(c_original_loss, epoch_i,
                                    'c_original_loss_epoch')
            self.writer.update_loss(c_summary_loss, epoch_i,
                                    'c_summary_loss_epoch')
            self.writer.update_loss(sparsity_loss, epoch_i,
                                    'sparsity_loss_epoch')
            self.writer.update_loss(actor_loss, epoch_i, 'actor_loss_epoch')
            self.writer.update_loss(critic_loss, epoch_i, 'critic_loss_epoch')
            self.writer.update_loss(reward, epoch_i, 'reward_epoch')

            # Save parameters at checkpoint
            ckpt_path = str(self.config.save_dir) + f'/epoch-{epoch_i}.pkl'
            if self.config.verbose:
                tqdm.write(f'Save parameters at {ckpt_path}')
            torch.save(self.model.state_dict(), ckpt_path)

            self.evaluate(epoch_i)

    def evaluate(self, epoch_i):

        self.model.eval()

        out_dict = {}

        for image_features, video_name, action_fragments in tqdm(
                self.test_loader, desc='Evaluate', ncols=80, leave=False):
            # [seq_len, batch_size=1, input_size)]
            image_features = image_features.view(-1, self.config.input_size)
            image_features_ = Variable(image_features).cuda()

            # [seq_len, 1, hidden_size]
            original_features = self.linear_compress(
                image_features_.detach()).unsqueeze(1)
            seq_len = original_features.shape[0]

            with torch.no_grad():

                _, scores = self.AC(original_features, seq_len,
                                    action_fragments)

                scores = scores.squeeze(1)
                scores = scores.cpu().numpy().tolist()

                out_dict[video_name] = scores

            score_save_path = self.config.score_dir.joinpath(
                f'{self.config.video_type}_{epoch_i}.json')
            with open(score_save_path, 'w') as f:
                if self.config.verbose:
                    tqdm.write(f'Saving score at {str(score_save_path)}.')
                json.dump(out_dict, f)
            score_save_path.chmod(0o777)
Exemplo n.º 23
0
def main_worker(gpu, config):
    # GPU is assigned
    config.gpu = gpu
    config.rank = gpu
    print(f'Launching at GPU {gpu}')

    if config.distributed:
        dist.init_process_group(
            backend='nccl',
            init_method='tcp://127.0.0.1:9001',
            # init_method="env://",
            world_size=config.world_size,
            rank=config.rank)

    if config.clustering:
        feat_dim = config.emb_dim  # feat_dim
        imsize = config.resize_input_size  # imsize
        n_centroids = config.n_centroids
        n_iter = config.n_iter
        encoder = config.encoder
        cluster_src = config.cluster_src

        centroid_dir = Path('../datasets/cluster_centroids/').resolve()
        if config.im_ratio == 'original':
            centroid_path = centroid_dir.joinpath(
                f'{encoder}_{cluster_src}_centroids{n_centroids}_iter{n_iter}_d{feat_dim}_grid{config.n_grid}.npy'
            )
        else:
            centroid_path = centroid_dir.joinpath(
                f'{encoder}_{cluster_src}_centroids{n_centroids}_iter{n_iter}_d{feat_dim}_grid{config.n_grid}_imsize{imsize}.npy'
            )
        centroids = np.load(centroid_path)

        Emb = nn.Embedding.from_pretrained(torch.from_numpy(centroids),
                                           freeze=True)
    else:
        Emb = None

    if config.classifier is None:
        E = None
    elif config.classifier == 'resnet101':
        E = ResNetEncoder('resnet101')
    elif config.classifier == 'resnet50':
        E = ResNetEncoder('resnet50')

    G = Generator(
        base_dim=config.g_base_dim,
        emb_dim=config.emb_dim,
        mod_dim=config.y_mod_dim,
        n_channel=config.n_channel,
        target_size=config.resize_target_size,
        extra_layers=config.g_extra_layers,
        init_H=config.n_grid,
        init_W=config.n_grid,
        norm_type=config.g_norm_type,
        SN=config.SN,
        codebook_dim=config.codebook_dim,
    )

    if config.gan:
        D = Discriminator(base_dim=config.d_base_dim,
                          emb_dim=config.emb_dim,
                          n_channel=config.n_channel,
                          target_size=config.resize_target_size,
                          extra_layers=config.d_extra_layers,
                          init_H=config.n_grid,
                          init_W=config.n_grid,
                          SN=config.SN,
                          ACGAN=config.ACGAN,
                          n_classes=config.n_centroids)
        if config.ACGAN:
            D.emb_classifier.weight = Emb.weight
    else:
        D = None

    # Logging
    if config.gpu == 0:
        logger = logging.getLogger('mylogger')
        file_handler = logging.FileHandler(config.log_dir.joinpath('log.txt'))
        stream_handler = logging.StreamHandler()
        logger.addHandler(file_handler)
        # logger.addHandler(stream_handler)
        logger.setLevel(logging.DEBUG)

        print('#===== (Trainable) Parameters =====#')

        def count_parameters(model):
            return sum(p.numel() for p in model.parameters()
                       if p.requires_grad)

        n_params = 0
        for model_name, model in [('E', E), ('G', G), ('D', D), ('Emb', Emb)]:
            if model is not None:
                # print(model)
                logger.info(model)
                # for name, p in model.named_parameters():
                #     print(name, '\t', list(p.size()))
                n_param = count_parameters(model)
                log_str = f'# {model_name} Parameters: {n_param}'
                print(log_str)
                logger.info(log_str)
                n_params += n_param
        log_str = f'# Total Parameters: {n_params}'
        logger.info(log_str)
        print(log_str)

        config.save(config.log_dir.joinpath('config.yaml'))

        # Save scripts for backup
        log_src_dir = config.log_dir.joinpath(f'src/')
        log_src_dir.mkdir(exist_ok=True)
        proj_dir = Path(__file__).resolve().parent
        for path in proj_dir.glob('*.py'):
            tgt_path = log_src_dir.joinpath(path.name)
            shutil.copy(path, tgt_path)
    else:
        logger = None

    if config.distributed:
        torch.cuda.set_device(config.gpu)

    if config.distributed:
        if 'bn' in config.g_norm_type:
            G = nn.SyncBatchNorm.convert_sync_batchnorm(G)
        G = G.cuda(config.gpu)

        params = G.parameters()

        g_optim = optim.Adam(
            params,
            lr=config.g_lr,
            betas=[config.g_adam_beta1, config.g_adam_beta2],
            eps=config.adam_eps,
        )

        if config.mixed_precision:
            G, g_optim = amp.initialize(G, g_optim, opt_level='O1')

        G = DDP(G,
                device_ids=[config.gpu],
                find_unused_parameters=True,
                broadcast_buffers=not config.SN)
    else:
        G = G.cuda()

        params = G.parameters()

        g_optim = optim.Adam(
            params,
            lr=config.g_lr,
            betas=[config.g_adam_beta1, config.g_adam_beta2],
            eps=config.adam_eps,
        )
        if config.multiGPU:
            G = nn.DataParallel(G)

    e_optim = None
    if config.classifier:
        if config.distributed:
            E = E.cuda(config.gpu)
        else:
            E = E.cuda()

        E = E.eval()
        if not config.distributed and config.multiGPU:
            E = nn.DataParallel(E)
    else:
        e_optim = None

    if config.gan:
        if config.distributed:
            D = D.cuda(config.gpu)

            d_optim = optim.Adam(
                D.parameters(),
                lr=config.d_lr,
                betas=[config.d_adam_beta1, config.d_adam_beta2],
                eps=config.adam_eps,
            )

            if config.mixed_precision:
                D, d_optim = amp.initialize(D, d_optim, opt_level='O1')

            D = DDP(D,
                    device_ids=[config.gpu],
                    find_unused_parameters=True,
                    broadcast_buffers=not config.SN)
        else:
            D = D.cuda()

            d_optim = optim.Adam(
                D.parameters(),
                lr=config.d_lr,
                betas=[config.d_adam_beta1, config.d_adam_beta2],
                eps=config.adam_eps,
            )
            if config.multiGPU:
                D = nn.DataParallel(D)
    else:
        d_optim = None
    if config.clustering:
        if config.distributed:
            Emb = Emb.cuda(config.gpu)

        else:
            Emb = Emb.cuda()
            if config.multiGPU:
                Emb = nn.DataParallel(Emb)

    train_transform = transforms.Compose([
        transforms.Resize((config.resize_input_size, config.resize_input_size),
                          interpolation=Image.LANCZOS),
    ])
    valid_transform = transforms.Compose([
        transforms.Resize((config.resize_input_size, config.resize_input_size),
                          interpolation=Image.LANCZOS),
    ])

    data_out = ['img']
    if config.clustering:
        data_out.append('cluster_id')

    train_set = 'mscoco_train'
    if config.run_minival:
        train_set = 'mscoco_minival'

    train_loader = get_loader(config,
                              train_set,
                              mode='train',
                              batch_size=config.batch_size,
                              distributed=config.distributed,
                              gpu=config.gpu,
                              workers=config.workers,
                              transform=train_transform,
                              topk=config.train_topk,
                              data_out=data_out)

    if config.distributed:
        valid_batch_size = config.batch_size
    else:
        valid_batch_size = config.batch_size // 4

    val_loader = get_loader(config,
                            'mscoco_minival',
                            mode='val',
                            batch_size=valid_batch_size,
                            distributed=config.distributed,
                            gpu=config.gpu,
                            workers=0,
                            transform=valid_transform,
                            topk=config.valid_topk,
                            data_out=data_out)

    trainer = Trainer(config, E, G, D, Emb, g_optim, d_optim, e_optim,
                      train_loader, val_loader, logger)
    trainer.train()
Exemplo n.º 24
0
 def __init__(self, n_features, n_hidden):
     super(DGI, self).__init__()
     self.gcn = GCN(n_features, n_hidden)
     self.readout = Readout()
     self.sigmoid = nn.Sigmoid()
     self.discriminator = Discriminator(n_hidden)
Exemplo n.º 25
0
train_data = train_data / 255.
train_labels = np.asarray(train_labels, dtype=np.int32)

train_data = train_data[0:49920]
train_labels = train_labels[0:49920]

tf.random.set_seed(69)
operation_seed = None

# for train
train_dataset = tf.data.Dataset.from_tensor_slices(train_data)
train_dataset = train_dataset.shuffle(buffer_size=60000)
train_dataset = train_dataset.batch(batch_size=batch_size)

generator = Generator()
discriminator = Discriminator()

# Defun for performance boost
#generator.call = tf.contrib.eager.defun(generator.call)
#discriminator.call = tf.contrib.eager.defun(discriminator.call)

#discriminator_optimizer = tf.train.AdamOptimizer(learning_rate_D, beta1=0.5)
#discriminator_optimizer = tf.train.RMSPropOptimizer(learning_rate_D)
#generator_optimizer = tf.train.AdamOptimizer(learning_rate_G, beta1=0.5)

discriminator_optimizer = tf.keras.optimizers.Adam(learning_rate_D, beta_1=0.5)
if second_unpaired is True:
    discriminator_optimizer_2 = tf.keras.optimizers.Adam(learning_rate_D,
                                                         beta_1=0.5)
generator_optimizer = tf.keras.optimizers.Adam(learning_rate_G, beta_1=0.5)
'''
Exemplo n.º 26
0
def trainer(cfg: DictConfig) -> None:
    
    os.environ["L5KIT_DATA_FOLDER"] = cfg.l5kit_data_folder
    dm = LocalDataManager(None)

    logger = logging.getLogger(__name__)

    logger.info("Working directory : {}".format(os.getcwd()))

    logger.info("Load dataset...")

    train_cfg = cfg["train_data_loader"]
    valid_cfg = cfg["valid_data_loader"]

    # rasterizer
    rasterizer = build_rasterizer(cfg, dm)

    train_path = train_cfg["key"]
    train_zarr = ChunkedDataset(dm.require(train_path)).open(cached=False)

    logger.info(f"train_zarr {type(train_zarr)}")

    # loading custom mask (we mask static agents)
    logger.info(f"Loading mask in path {train_cfg['mask_path']}")
    custom_mask = np.load(train_cfg['mask_path'])
    logger.info(f"Length of training mask is: {custom_mask.sum()}")

    train_agent_dataset = AgentDataset(cfg, train_zarr, rasterizer, agents_mask=custom_mask)

    # transform dataset to the proper frame of reference
    train_dataset = TransformDataset(train_agent_dataset, cfg)

    if not train_cfg['subset'] == -1:
        train_dataset = Subset(train_dataset, np.arange(train_cfg['subset']))

    train_loader = DataLoader(train_dataset,
                              shuffle=train_cfg["shuffle"],
                              batch_size=train_cfg["batch_size"],
                              num_workers=train_cfg["num_workers"])

    logger.info(train_agent_dataset)

    # loading custom mask for validation dataset
    logger.info(f"Loading val mask in path {valid_cfg['mask_path']}")
    val_custom_mask = np.load(valid_cfg['mask_path'])
    logger.info(f"Length of validation mask is: {val_custom_mask.sum()}")

    valid_path = valid_cfg["key"]
    valid_zarr = ChunkedDataset(dm.require(valid_path)).open(cached=False)

    logger.info(f"valid_zarr {type(train_zarr)}")

    valid_agent_dataset = AgentDataset(cfg, valid_zarr, rasterizer, agents_mask=val_custom_mask)

    # transform validation dataset to the proper frame of reference
    valid_dataset = TransformDataset(valid_agent_dataset, cfg)

    if not valid_cfg['subset'] == -1:
        valid_dataset = Subset(valid_dataset, valid_cfg['subset'])

    valid_loader = DataLoader(
        valid_dataset,
        shuffle=valid_cfg["shuffle"],
        batch_size=valid_cfg["batch_size"],
        num_workers=valid_cfg["num_workers"]
    )

    logger.info(valid_agent_dataset)
    logger.info(f"# Full AgentDataset train: {len(train_agent_dataset)} #valid: {len(valid_agent_dataset)}")
    logger.info(f"# Actual AgentDataset train: {len(train_dataset)} #valid: {len(valid_dataset)}")

    n_epochs = cfg['train_params']['num_epochs']

    d_steps = cfg['train_params']['num_d_steps']
    g_steps = cfg['train_params']['num_g_steps']

    noise_dim = cfg['gan_params']['noise_dim']
    g_learning_rate = cfg['train_params']['g_learning_rate']
    d_learning_rate = cfg['train_params']['d_learning_rate']

    if cfg['gan_params']['gan_type'] == 'vanilla':
        cross_entropy = nn.BCELoss()

    generator = Generator(input_dim=cfg['gan_params']['input_dim'],
                          embedding_dim=cfg['gan_params']['embedding_dim'],
                          decoder_dim=cfg['gan_params']['decoder_dim'],
                          trajectory_dim=cfg['model_params']['future_num_frames'],
                          noise_dim=noise_dim,
                          backbone_type=cfg['gan_params']['backbone_type'],
                          embedding_type=cfg['gan_params']['embedding_type']
                          )

    generator.to(cfg['device'])
    generator.train()  # train mode
    
    W = cfg['raster_params']['raster_size'][0]
    discriminator = Discriminator(width=W,
                                  h_0=cfg['raster_params']['ego_center'][0]*W,
                                  w_0=cfg['raster_params']['ego_center'][1]*W,
                                  r=cfg['raster_params']['pixel_size'][0],
                                  sigma=cfg['gan_params']['sigma'],
                                  channels_num=cfg['model_params']['future_num_frames']+3,
                                  num_disc_feats=cfg['gan_params']['num_disc_feats'],
                                  input_dim=cfg['gan_params']['input_dim'],
                                  device=cfg['device'],
                                  gan_type=cfg['gan_params']['gan_type'],
                                  embedding_type=cfg['gan_params']['embedding_type'],
                                  lstm_embedding_dim=cfg['gan_params']['embedding_dim']
                                  )

    discriminator.to(cfg['device'])
    discriminator.apply(weights_init)
    discriminator.train()  # train mode

    if cfg['gan_params']['gan_type'] == 'wasserstein':
        optimizer_g = optim.RMSprop(generator.parameters(), lr=g_learning_rate)
        optimizer_d = optim.RMSprop(discriminator.parameters(), lr=d_learning_rate)
    elif cfg['gan_params']['gan_type'] == 'wasserstein_gp':
        betas = (0.0, 0.9)
        optimizer_g = optim.Adam(generator.parameters(), lr=g_learning_rate, betas=betas)
        optimizer_d = optim.Adam(discriminator.parameters(), lr=d_learning_rate, betas=betas)
    else:
        optimizer_g = optim.Adam(generator.parameters(), lr=g_learning_rate)
        optimizer_d = optim.Adam(discriminator.parameters(), lr=d_learning_rate)

    d_steps_left = d_steps
    g_steps_left = g_steps

    # variables for statistics
    d_full_loss = []
    g_full_loss = []
    gp_values = []
    l2_variety_values = []
    metric_vals = []

    # checkpoint dictionary
    checkpoint = {
        'G_losses': defaultdict(list),
        'D_losses': defaultdict(list),
        'counters': {
            't': None,
            'epoch': None,
        },
        'g_state': None,
        'g_optim_state': None,
        'd_state': None,
        'd_optim_state': None
    }

    id_batch = 0

    # total number of batches
    len_of_epoch = len(train_loader)

    for epoch in range(n_epochs):
        for batch in train_loader:
            batch = [tensor.to(cfg['device']) for tensor in batch]

            # Creates single raster image from sequence of images from l5kit's AgentDataset
            batch[0] = f_get_raster_image(cfg=cfg,
                                          images=batch[0],
                                          history_weight=cfg['model_params']['history_fading_weight'])

            (image, target_positions, target_availabilities,
             history_positions, history_yaws, centroid, world_to_image) = batch

            actor_state = (history_positions, history_yaws)

            batch_size = image.shape[0]

            # noise for generator
            noise = torch.normal(size=(batch_size, noise_dim),
                                 mean=0.0,
                                 std=1.0,
                                 dtype=torch.float32,
                                 device=cfg['device'])

            #######################################
            #       TRAIN DISCRIMINATOR
            #######################################

            # train discriminator (d_steps_left) times (using different batches)
            # train generator (g_steps_left) times (using different batches)

            if d_steps_left > 0:
                d_steps_left -= 1

                for pd in discriminator.parameters():  # reset requires_grad
                    pd.requires_grad = True  # they are set to False below in generator update

                # freeze generator while training discriminator
                for pg in generator.parameters():
                    pg.requires_grad = False

                discriminator.zero_grad()

                # generate fake trajectories (batch_size, target_size, 2) for current batch
                fake_trajectory = generator(image, actor_state, noise)

                # discriminator predictions (batch_size, 1) on real and fake trajectories
                d_real_pred = discriminator(target_positions, image, actor_state)
                d_g_pred = discriminator(fake_trajectory, image, actor_state)

                # loss
                if cfg['gan_params']['gan_type'] == 'vanilla':
                    # tensor with true/fake labels of size (batch_size, 1)
                    real_labels = torch.full((batch_size,), 1, dtype=torch.float, device=cfg['device'])
                    fake_labels = torch.full((batch_size,), 0, dtype=torch.float, device=cfg['device'])

                    real_loss = cross_entropy(d_real_pred, real_labels)
                    fake_loss = cross_entropy(d_g_pred, fake_labels)

                    total_loss = real_loss + fake_loss
                elif cfg['gan_params']['gan_type'] == 'wasserstein':  # D(fake) - D(real)
                    total_loss = torch.mean(d_g_pred) - torch.mean(d_real_pred)
                elif cfg['gan_params']['gan_type'] == 'wasserstein_gp':
                    gp_loss = gradient_penalty(discrim=discriminator,
                                               real_trajectory=target_positions,
                                               fake_trajectory=fake_trajectory,
                                               in_image=image,
                                               in_actor_state=actor_state,
                                               lambda_gp=cfg['losses']['lambda_gp'],
                                               device=cfg['device'])

                    total_loss = torch.mean(d_g_pred) - torch.mean(d_real_pred) + gp_loss
                else:
                    raise NotImplementedError

                # calculate gradients for this batch
                total_loss.backward()
                optimizer_d.step()

                # weight clipping for discriminator in pure Wasserstein GAN
                if cfg['gan_params']['gan_type'] == 'wasserstein':
                    c = cfg['losses']['weight_clip']
                    for p in discriminator.parameters():
                        p.data.clamp_(-c, c)

                d_full_loss.append(total_loss.item())

                if cfg['gan_params']['gan_type'] == 'wasserstein_gp':
                    gp_values.append(gp_loss.item())

            #######################################
            #         TRAIN GENERATOR
            #######################################

            elif g_steps_left > 0:  # we either train generator or discriminator on current batch
                g_steps_left -= 1

                for pd in discriminator.parameters():
                    pd.requires_grad = False  # avoid discriminator training

                # unfreeze generator
                for pg in generator.parameters():
                    pg.requires_grad = True

                generator.zero_grad()

                if cfg['losses']['use_variety_l2']:
                    l2_variety_loss, fake_trajectory = l2_loss_kmin(traj_real=target_positions,
                                                                    generator_=generator,
                                                                    image=image,
                                                                    actor_state=actor_state,
                                                                    cfg=cfg,
                                                                    kmin=cfg['losses']['k_min'],
                                                                    return_best_traj=True)
                else:
                    fake_trajectory = generator(image, actor_state, noise)

                d_g_pred = discriminator(fake_trajectory, image, actor_state)

                if cfg['gan_params']['gan_type'] == 'vanilla':
                    # while training generator we associate generated fake examples
                    # with real labels in order to measure generator quality
                    real_labels = torch.full((batch_size,), 1, dtype=torch.float, device=cfg['device'])
                    fake_loss = cross_entropy(d_g_pred, real_labels)
                elif cfg['gan_params']['gan_type'] in ['wasserstein', 'wasserstein_gp']:  # -D(fake)
                    fake_loss = -torch.mean(d_g_pred)
                else:
                    raise NotImplementedError

                if cfg['losses']['use_variety_l2']:
                    fake_loss += cfg['losses']['weight_variety_l2'] * l2_variety_loss

                    l2_variety_values.append(l2_variety_loss.item())

                fake_loss.backward()
                optimizer_g.step()

                g_full_loss.append(fake_loss.item())

            # renew d_steps_left, g_steps_left at the end of full discriminator-generator training cycle
            if d_steps_left == 0 and g_steps_left == 0:
                d_steps_left = d_steps
                g_steps_left = g_steps

            # print current model state on train dataset
            if (id_batch > 0) and (id_batch % cfg['train_params']['print_every_n_steps'] == 0):

                print_statistics(logger=logger,
                                 cfg=cfg,
                                 epoch=epoch,
                                 len_of_epoch=len_of_epoch,
                                 id_batch=id_batch,
                                 d_full_loss=d_full_loss,
                                 g_full_loss=g_full_loss,
                                 gp_values=gp_values,
                                 l2_variety_values=l2_variety_values,
                                 print_over_n_last=1000)

                # save rasterized image of 0th element of current batch
                plot_traj_on_map(cfg, 0, batch, generator, save_name=str(id_batch),
                                 save_directory=cfg['train_params']['image_sample_dir'])

            # Save checkpoint and evaluate the model
            if (id_batch > 0) and (id_batch % cfg['train_params']['checkpoint_every_n_steps'] == 0):
                checkpoint['counters']['t'] = id_batch
                checkpoint['counters']['epoch'] = epoch

                # Check stats on the validation set
                logger.info('Checking stats on val ...')
                metrics_val = evaluate(cfg, generator, valid_loader)
                metric_vals.append(metrics_val)

                with open('metric_vals_list.pkl', 'wb') as handle:
                    pickle.dump(metric_vals, handle, protocol=pickle.HIGHEST_PROTOCOL)

                for k, v in sorted(metrics_val.items()):
                    logger.info('  [val] {}: {:.3f}'.format(k, v))

                checkpoint['g_state'] = generator.state_dict()
                checkpoint['g_optim_state'] = optimizer_g.state_dict()
                checkpoint['d_state'] = discriminator.state_dict()
                checkpoint['d_optim_state'] = optimizer_d.state_dict()
                checkpoint_path = os.path.join(os.getcwd(), f"{cfg['model_name']}_{id_batch}.pt")
                logger.info('Saving checkpoint to {}'.format(checkpoint_path))
                torch.save(checkpoint, checkpoint_path)
                logger.info('Done.')

                results_df, metric_df = get_results_plot(d_full_loss,
                                                         g_full_loss,
                                                         metric_vals,
                                                         train_window_size=100,
                                                         val_window_size=10,
                                                         is_save=True)

                results_df.to_excel('results.xlsx', index=False)
                metric_df.to_excel('val_metrics.xlsx', index=False)

            id_batch = id_batch + 1
Exemplo n.º 27
0
class piGAN(pl.LightningModule):
    def __init__(self,
                 image_size,
                 input_features,
                 hidden_features,
                 optim_cfg,
                 generator_cfg,
                 discriminator_cfg,
                 image_dataset,
                 output_dir="../generated_images",
                 add_layers_iters: int = 10000,
                 sample_every: int = 100,
                 num_samples: int = 4,
                 log_every: int = 10,
                 gp_every: int = 4,
                 batch_size: int = 32,
                 loss_mode: str = "relu"):
        super(piGAN, self).__init__()
        self.input_features = input_features
        self.hidden_features = hidden_features
        self.optim_cfg = optim_cfg
        self.batch_size = batch_size
        self.image_dataset = image_dataset
        self.log_every = log_every
        self.gp_every = gp_every
        self.sample_every = sample_every
        self.add_layers_iters = add_layers_iters
        self.output_dir = output_dir
        self.num_samples = num_samples

        # in case the directory for generated images doesn't exist - create it
        os.makedirs(output_dir, exist_ok=True)

        self.G = Generator(image_size=image_size,
                           input_features=input_features,
                           hidden_features=hidden_features,
                           **generator_cfg)

        self.D = Discriminator(image_size=image_size, **discriminator_cfg)

        # setup initial resolution for loaded_images
        self.image_dataset.set_transforms(self.D.init_resolution)

        self.iterations = 0
        self.last_loss_D = 0
        self.last_loss_G = 0

        self.discriminator_loss, self.generator_loss = get_GAN_losses(
            loss_mode)

    def configure_optimizers(self):
        lr_discr = self.optim_cfg["discriminator"]["learning_rate"]
        target_lr_discr = self.optim_cfg["discriminator"][
            "target_learning_rate"]

        lr_gen = self.optim_cfg["generator"]["learning_rate"]
        target_lr_gen = self.optim_cfg["generator"]["target_learning_rate"]

        lr_decay_span = self.optim_cfg["learning_rate_decay_span"]

        self.optim_D = Adam(self.D.parameters(), betas=(0, 0.9), lr=lr_discr)
        self.optim_G = Adam(self.G.parameters(), betas=(0, 0.9), lr=lr_gen)

        D_decay_fn = lambda i: max(1 - i / lr_decay_span, 0) + (
            target_lr_discr / lr_discr) * min(i / lr_decay_span, 1)
        G_decay_fn = lambda i: max(1 - i / lr_decay_span, 0) + (
            target_lr_gen / lr_gen) * min(i / lr_decay_span, 1)

        self.sched_D = LambdaLR(self.optim_D, D_decay_fn)
        self.sched_G = LambdaLR(self.optim_G, G_decay_fn)

        return [self.optim_D, self.optim_G], [self.sched_D, self.sched_G]

    def forward(self, x):
        return self.G(x)

    def generate_samples(self, num_samples: int):
        rand_latents = torch.randn(num_samples, self.input_features)
        rand_latents = rand_latents.to(self.device)
        return self.forward(rand_latents)

    def training_step(self, batch, batch_idx, optimizer_idx):
        images = batch

        # gp
        apply_gp = self.iterations % self.gp_every == 0

        # train discriminator
        if optimizer_idx == 0:
            images = images.requires_grad_()

            # increase resolution
            if self.iterations % self.add_layers_iters == 0:
                if self.iterations != 0:
                    self.D.increase_resolution_()

                image_size = self.D.resolution.item()
                self.G.set_image_size(image_size)
                self.image_dataset.set_transforms(image_size)

            real_out = self.D(images)
            real_labels = torch.ones_like(real_out)

            fake_images = self.generate_samples(self.batch_size)
            fake_out = self.D(fake_images.clone().detach())
            fake_labels = torch.zeros_like(fake_out)

            loss_D = self.discriminator_loss(real_out, real_labels) \
                   + self.discriminator_loss(fake_out, fake_labels)

            if apply_gp:
                gp = gradient_penalty(images, real_out)
                self.last_loss_gp = get_item(gp)
                loss = loss_D + gp
            else:
                loss = loss_D

            self.last_loss_D = loss_D

            tqdm_dict = {'loss_D': loss_D}
            output = OrderedDict({
                'loss': loss,
                'progress_bar': tqdm_dict,
                'log': tqdm_dict
            })
            return output

        # train generator
        if optimizer_idx == 1:
            fake_images = self.generate_samples(self.batch_size)
            fake_out = self.D(fake_images)
            fake_labels = torch.ones_like(fake_out)

            loss_G = self.generator_loss(fake_out, fake_labels)

            self.last_loss_G = loss_G

            tqdm_dict = {'loss_G': loss_G}
            output = OrderedDict({
                'loss': loss_G,
                'progress_bar': tqdm_dict,
                'log': tqdm_dict
            })

            return output

    def training_epoch_end(self, outputs):
        self.D.update_iter_()
        self.iterations += 1

        if self.iterations % self.sample_every == 0:
            imgs = self.generate_samples(self.num_samples)
            imgs.clamp_(0., 1.)
            save_image(
                imgs,
                f'{self.output_dir}/generated_image_{self.iterations}.png',
                nrow=max(2, self.num_samples))
Exemplo n.º 28
0
class Solver(object):
    def __init__(self, config=None, train_loader=None, test_loader=None):
        """Class that Builds, Trains and Evaluates SUM-GAN model"""
        self.config = config
        self.train_loader = train_loader
        self.test_loader = test_loader

    def build(self):

        # Build Modules
        self.linear_compress = nn.Linear(self.config.input_size,
                                         self.config.hidden_size).cuda()
        self.summarizer = Summarizer(input_size=self.config.hidden_size,
                                     hidden_size=self.config.hidden_size,
                                     num_layers=self.config.num_layers).cuda()
        self.discriminator = Discriminator(
            input_size=self.config.hidden_size,
            hidden_size=self.config.hidden_size,
            num_layers=self.config.num_layers).cuda()
        self.model = nn.ModuleList(
            [self.linear_compress, self.summarizer, self.discriminator])

        if self.config.mode == 'train':
            # Build Optimizers
            self.s_e_optimizer = optim.Adam(
                list(self.summarizer.s_lstm.parameters()) +
                list(self.summarizer.vae.e_lstm.parameters()) +
                list(self.linear_compress.parameters()),
                lr=self.config.lr)
            self.d_optimizer = optim.Adam(
                list(self.summarizer.vae.d_lstm.parameters()) +
                list(self.linear_compress.parameters()),
                lr=self.config.lr)
            self.c_optimizer = optim.Adam(
                list(self.discriminator.parameters()) +
                list(self.linear_compress.parameters()),
                lr=self.config.discriminator_lr)

            self.model.train()
            # self.model.apply(apply_weight_norm)

            # Overview Parameters
            # print('Model Parameters')
            # for name, param in self.model.named_parameters():
            #     print('\t' + name + '\t', list(param.size()))

            # Tensorboard
            self.writer = TensorboardWriter(self.config.log_dir)

    @staticmethod
    def freeze_model(module):
        for p in module.parameters():
            p.requires_grad = False

    def reconstruction_loss(self, h_origin, h_fake):
        """L2 loss between original-regenerated features at cLSTM's last hidden layer"""

        return torch.norm(h_origin - h_fake, p=2)

    def prior_loss(self, mu, log_variance):
        """KL( q(e|x) || N(0,1) )"""
        return 0.5 * torch.sum(-1 + log_variance.exp() + mu.pow(2) -
                               log_variance)

    def sparsity_loss(self, scores):
        """Summary-Length Regularization"""

        return torch.abs(torch.mean(scores) - self.config.summary_rate)

    def gan_loss(self, original_prob, fake_prob, uniform_prob):
        """Typical GAN loss + Classify uniformly scored features"""

        gan_loss = torch.mean(
            torch.log(original_prob) + torch.log(1 - fake_prob) +
            torch.log(1 - uniform_prob))  # Discriminate uniform score

        return gan_loss

    def train(self):
        step = 0
        for epoch_i in trange(self.config.n_epochs, desc='Epoch', ncols=80):
            s_e_loss_history = []
            d_loss_history = []
            c_loss_history = []
            for batch_i, image_features in enumerate(
                    tqdm(self.train_loader,
                         desc='Batch',
                         ncols=80,
                         leave=False)):

                if image_features.size(1) > 10000:
                    continue

                # [batch_size=1, seq_len, 2048]
                # [seq_len, 2048]
                image_features = image_features.view(-1,
                                                     self.config.input_size)

                # [seq_len, 2048]
                image_features_ = Variable(image_features).cuda()

                #---- Train sLSTM, eLSTM ----#
                if self.config.verbose:
                    tqdm.write('\nTraining sLSTM and eLSTM...')

                # [seq_len, 1, hidden_size]
                original_features = self.linear_compress(
                    image_features_.detach()).unsqueeze(1)

                scores, h_mu, h_log_variance, generated_features = self.summarizer(
                    original_features)
                _, _, _, uniform_features = self.summarizer(original_features,
                                                            uniform=True)

                h_origin, original_prob = self.discriminator(original_features)
                h_fake, fake_prob = self.discriminator(generated_features)
                h_uniform, uniform_prob = self.discriminator(uniform_features)

                tqdm.write(
                    f'original_p: {original_prob.data[0]:.3f}, fake_p: {fake_prob.data[0]:.3f}, uniform_p: {uniform_prob.data[0]:.3f}'
                )

                reconstruction_loss = self.reconstruction_loss(
                    h_origin, h_fake)
                prior_loss = self.prior_loss(h_mu, h_log_variance)
                sparsity_loss = self.sparsity_loss(scores)

                tqdm.write(
                    f'recon loss {reconstruction_loss.data[0]:.3f}, prior loss: {prior_loss.data[0]:.3f}, sparsity loss: {sparsity_loss.data[0]:.3f}'
                )

                s_e_loss = reconstruction_loss + prior_loss + sparsity_loss

                self.s_e_optimizer.zero_grad()
                s_e_loss.backward()  # retain_graph=True)
                # Gradient cliping
                torch.nn.utils.clip_grad_norm(self.model.parameters(),
                                              self.config.clip)
                self.s_e_optimizer.step()

                s_e_loss_history.append(s_e_loss.data)

                #---- Train dLSTM ----#
                if self.config.verbose:
                    tqdm.write('Training dLSTM...')

                # [seq_len, 1, hidden_size]
                original_features = self.linear_compress(
                    image_features_.detach()).unsqueeze(1)

                scores, h_mu, h_log_variance, generated_features = self.summarizer(
                    original_features)
                _, _, _, uniform_features = self.summarizer(original_features,
                                                            uniform=True)

                h_origin, original_prob = self.discriminator(original_features)
                h_fake, fake_prob = self.discriminator(generated_features)
                h_uniform, uniform_prob = self.discriminator(uniform_features)

                tqdm.write(
                    f'original_p: {original_prob.data[0]:.3f}, fake_p: {fake_prob.data[0]:.3f}, uniform_p: {uniform_prob.data[0]:.3f}'
                )

                reconstruction_loss = self.reconstruction_loss(
                    h_origin, h_fake)
                gan_loss = self.gan_loss(original_prob, fake_prob,
                                         uniform_prob)

                tqdm.write(
                    f'recon loss {reconstruction_loss.data[0]:.3f}, gan loss: {gan_loss.data[0]:.3f}'
                )

                d_loss = reconstruction_loss + gan_loss

                self.d_optimizer.zero_grad()
                d_loss.backward()  # retain_graph=True)
                # Gradient cliping
                torch.nn.utils.clip_grad_norm(self.model.parameters(),
                                              self.config.clip)
                self.d_optimizer.step()

                d_loss_history.append(d_loss.data)

                #---- Train cLSTM ----#
                if batch_i > self.config.discriminator_slow_start:
                    if self.config.verbose:
                        tqdm.write('Training cLSTM...')
                    # [seq_len, 1, hidden_size]
                    original_features = self.linear_compress(
                        image_features_.detach()).unsqueeze(1)

                    scores, h_mu, h_log_variance, generated_features = self.summarizer(
                        original_features)
                    _, _, _, uniform_features = self.summarizer(
                        original_features, uniform=True)

                    h_origin, original_prob = self.discriminator(
                        original_features)
                    h_fake, fake_prob = self.discriminator(generated_features)
                    h_uniform, uniform_prob = self.discriminator(
                        uniform_features)
                    tqdm.write(
                        f'original_p: {original_prob.data[0]:.3f}, fake_p: {fake_prob.data[0]:.3f}, uniform_p: {uniform_prob.data[0]:.3f}'
                    )

                    # Maximization
                    c_loss = -1 * self.gan_loss(original_prob, fake_prob,
                                                uniform_prob)

                    tqdm.write(f'gan loss: {gan_loss.data[0]:.3f}')

                    self.c_optimizer.zero_grad()
                    c_loss.backward()
                    # Gradient cliping
                    torch.nn.utils.clip_grad_norm(self.model.parameters(),
                                                  self.config.clip)
                    self.c_optimizer.step()

                    c_loss_history.append(c_loss.data)

                if self.config.verbose:
                    tqdm.write('Plotting...')

                self.writer.update_loss(reconstruction_loss.data, step,
                                        'recon_loss')
                self.writer.update_loss(prior_loss.data, step, 'prior_loss')
                self.writer.update_loss(sparsity_loss.data, step,
                                        'sparsity_loss')
                self.writer.update_loss(gan_loss.data, step, 'gan_loss')

                # self.writer.update_loss(s_e_loss.data, step, 's_e_loss')
                # self.writer.update_loss(d_loss.data, step, 'd_loss')
                # self.writer.update_loss(c_loss.data, step, 'c_loss')

                self.writer.update_loss(original_prob.data, step,
                                        'original_prob')
                self.writer.update_loss(fake_prob.data, step, 'fake_prob')
                self.writer.update_loss(uniform_prob.data, step,
                                        'uniform_prob')

                step += 1

            s_e_loss = torch.stack(s_e_loss_history).mean()
            d_loss = torch.stack(d_loss_history).mean()
            c_loss = torch.stack(c_loss_history).mean()

            # Plot
            if self.config.verbose:
                tqdm.write('Plotting...')
            self.writer.update_loss(s_e_loss, epoch_i, 's_e_loss_epoch')
            self.writer.update_loss(d_loss, epoch_i, 'd_loss_epoch')
            self.writer.update_loss(c_loss, epoch_i, 'c_loss_epoch')

            # Save parameters at checkpoint
            ckpt_path = str(self.config.save_dir) + f'_epoch-{epoch_i}.pkl'
            tqdm.write(f'Save parameters at {ckpt_path}')
            torch.save(self.model.state_dict(), ckpt_path)

            self.evaluate(epoch_i)

            self.model.train()

    def evaluate(self, epoch_i):
        # checkpoint = self.config.ckpt_path
        # print(f'Load parameters from {checkpoint}')
        # self.model.load_state_dict(torch.load(checkpoint))

        self.model.eval()

        out_dict = {}

        for video_tensor, video_name in tqdm(self.test_loader,
                                             desc='Evaluate',
                                             ncols=80,
                                             leave=False):

            # [seq_len, batch=1, 2048]
            video_tensor = video_tensor.view(-1, self.config.input_size)
            video_feature = Variable(video_tensor, volatile=True).cuda()

            # [seq_len, 1, hidden_size]
            video_feature = self.linear_compress(
                video_feature.detach()).unsqueeze(1)

            # [seq_len]
            scores = self.summarizer.s_lstm(video_feature).squeeze(1)

            scores = np.array(scores.data).tolist()

            out_dict[video_name] = scores

            score_save_path = self.config.score_dir.joinpath(
                f'{self.config.video_type}_{epoch_i}.json')
            with open(score_save_path, 'w') as f:
                tqdm.write(f'Saving score at {str(score_save_path)}.')
                json.dump(out_dict, f)
            score_save_path.chmod(0o777)

    def pretrain(self):
        pass
Exemplo n.º 29
0
class Solver(object):
    def __init__(self, config=None, train_loader=None, test_loader=None):
        """Class that Builds, Trains and Evaluates SUM-GAN-sl model"""
        self.config = config
        self.train_loader = train_loader
        self.test_loader = test_loader

    def build(self):

        # Build Modules
        self.linear_compress = nn.Linear(self.config.input_size,
                                         self.config.hidden_size).cuda()
        self.summarizer = Summarizer(input_size=self.config.hidden_size,
                                     hidden_size=self.config.hidden_size,
                                     num_layers=self.config.num_layers).cuda()
        self.discriminator = Discriminator(
            input_size=self.config.hidden_size,
            hidden_size=self.config.hidden_size,
            num_layers=self.config.num_layers).cuda()
        self.model = nn.ModuleList(
            [self.linear_compress, self.summarizer, self.discriminator])

        if self.config.mode == 'train':
            # Build Optimizers
            self.s_e_optimizer = optim.Adam(
                list(self.summarizer.s_lstm.parameters()) +
                list(self.summarizer.vae.e_lstm.parameters()) +
                list(self.linear_compress.parameters()),
                lr=self.config.lr)
            self.d_optimizer = optim.Adam(
                list(self.summarizer.vae.d_lstm.parameters()) +
                list(self.linear_compress.parameters()),
                lr=self.config.lr)
            self.c_optimizer = optim.Adam(
                list(self.discriminator.parameters()) +
                list(self.linear_compress.parameters()),
                lr=self.config.discriminator_lr)

            self.writer = TensorboardWriter(str(self.config.log_dir))

    def reconstruction_loss(self, h_origin, h_sum):
        """L2 loss between original-regenerated features at cLSTM's last hidden layer"""

        return torch.norm(h_origin - h_sum, p=2)

    def prior_loss(self, mu, log_variance):
        """KL( q(e|x) || N(0,1) )"""
        return 0.5 * torch.sum(-1 + log_variance.exp() + mu.pow(2) -
                               log_variance)

    def sparsity_loss(self, scores):
        """Summary-Length Regularization"""

        return torch.abs(
            torch.mean(scores) - self.config.regularization_factor)

    criterion = nn.MSELoss()

    def train(self):
        step = 0
        for epoch_i in trange(self.config.n_epochs, desc='Epoch', ncols=80):
            s_e_loss_history = []
            d_loss_history = []
            c_original_loss_history = []
            c_summary_loss_history = []
            for batch_i, image_features in enumerate(
                    tqdm(self.train_loader,
                         desc='Batch',
                         ncols=80,
                         leave=False)):

                self.model.train()

                # [batch_size=1, seq_len, 1024]
                # [seq_len, 1024]
                image_features = image_features.view(-1,
                                                     self.config.input_size)

                # [seq_len, 1024]
                image_features_ = Variable(image_features).cuda()

                #---- Train sLSTM, eLSTM ----#
                if self.config.verbose:
                    tqdm.write('\nTraining sLSTM and eLSTM...')

                # [seq_len, 1, hidden_size]
                original_features = self.linear_compress(
                    image_features_.detach()).unsqueeze(1)

                scores, h_mu, h_log_variance, generated_features = self.summarizer(
                    original_features)

                h_origin, original_prob = self.discriminator(original_features)
                h_sum, sum_prob = self.discriminator(generated_features)

                tqdm.write(
                    f'original_p: {original_prob.item():.3f}, summary_p: {sum_prob.item():.3f}'
                )

                reconstruction_loss = self.reconstruction_loss(h_origin, h_sum)
                prior_loss = self.prior_loss(h_mu, h_log_variance)
                sparsity_loss = self.sparsity_loss(scores)

                tqdm.write(
                    f'recon loss {reconstruction_loss.item():.3f}, prior loss: {prior_loss.item():.3f}, sparsity loss: {sparsity_loss.item():.3f}'
                )

                s_e_loss = reconstruction_loss + prior_loss + sparsity_loss

                self.s_e_optimizer.zero_grad()
                s_e_loss.backward()
                # Gradient cliping
                torch.nn.utils.clip_grad_norm(self.model.parameters(),
                                              self.config.clip)
                self.s_e_optimizer.step()

                s_e_loss_history.append(s_e_loss.data)

                #---- Train dLSTM (generator) ----#
                if self.config.verbose:
                    tqdm.write('Training dLSTM...')

                # [seq_len, 1, hidden_size]
                original_features = self.linear_compress(
                    image_features_.detach()).unsqueeze(1)

                scores, h_mu, h_log_variance, generated_features = self.summarizer(
                    original_features)

                h_origin, original_prob = self.discriminator(original_features)
                h_sum, sum_prob = self.discriminator(generated_features)

                tqdm.write(
                    f'original_p: {original_prob.item():.3f}, summary_p: {sum_prob.item():.3f}'
                )

                reconstruction_loss = self.reconstruction_loss(h_origin, h_sum)
                g_loss = self.criterion(sum_prob, original_label)

                tqdm.write(
                    f'recon loss {reconstruction_loss.item():.3f}, g loss: {g_loss.item():.3f}'
                )

                d_loss = reconstruction_loss + g_loss

                self.d_optimizer.zero_grad()
                d_loss.backward()
                # Gradient cliping
                torch.nn.utils.clip_grad_norm(self.model.parameters(),
                                              self.config.clip)
                self.d_optimizer.step()

                d_loss_history.append(d_loss.data)

                #---- Train cLSTM ----#
                if self.config.verbose:
                    tqdm.write('Training cLSTM...')

                self.c_optimizer.zero_grad()

                # Train with original loss
                # [seq_len, 1, hidden_size]
                original_features = self.linear_compress(
                    image_features_.detach()).unsqueeze(1)
                h_origin, original_prob = self.discriminator(original_features)
                c_original_loss = self.criterion(original_prob, original_label)
                c_original_loss.backward()

                # Train with summary loss
                scores, h_mu, h_log_variance, generated_features = self.summarizer(
                    original_features)
                h_sum, sum_prob = self.discriminator(
                    generated_features.detach())
                c_summary_loss = self.criterion(sum_prob, summary_label)
                c_summary_loss.backward()

                tqdm.write(
                    f'original_p: {original_prob.item():.3f}, summary_p: {sum_prob.item():.3f}'
                )
                tqdm.write(f'gen loss: {g_loss.item():.3f}')

                # Gradient cliping
                torch.nn.utils.clip_grad_norm(self.model.parameters(),
                                              self.config.clip)
                self.c_optimizer.step()

                c_original_loss_history.append(c_original_loss.data)
                c_summary_loss_history.append(c_summary_loss.data)

                if self.config.verbose:
                    tqdm.write('Plotting...')

                self.writer.update_loss(reconstruction_loss.data, step,
                                        'recon_loss')
                self.writer.update_loss(prior_loss.data, step, 'prior_loss')
                self.writer.update_loss(sparsity_loss.data, step,
                                        'sparsity_loss')
                self.writer.update_loss(g_loss.data, step, 'gen_loss')

                self.writer.update_loss(original_prob.data, step,
                                        'original_prob')
                self.writer.update_loss(sum_prob.data, step, 'sum_prob')

                step += 1

            s_e_loss = torch.stack(s_e_loss_history).mean()
            d_loss = torch.stack(d_loss_history).mean()
            c_original_loss = torch.stack(c_original_loss_history).mean()
            c_summary_loss = torch.stack(c_summary_loss_history).mean()

            # Plot
            if self.config.verbose:
                tqdm.write('Plotting...')
            self.writer.update_loss(s_e_loss, epoch_i, 's_e_loss_epoch')
            self.writer.update_loss(d_loss, epoch_i, 'd_loss_epoch')
            self.writer.update_loss(c_original_loss, step, 'c_original_loss')
            self.writer.update_loss(c_summary_loss, step, 'c_summary_loss')

            # Save parameters at checkpoint
            ckpt_path = str(self.config.save_dir) + f'/epoch-{epoch_i}.pkl'
            tqdm.write(f'Save parameters at {ckpt_path}')
            torch.save(self.model.state_dict(), ckpt_path)

            self.evaluate(epoch_i)

    def evaluate(self, epoch_i):

        self.model.eval()

        out_dict = {}

        for video_tensor, video_name in tqdm(self.test_loader,
                                             desc='Evaluate',
                                             ncols=80,
                                             leave=False):

            # [seq_len, batch=1, 1024]
            video_tensor = video_tensor.view(-1, self.config.input_size)
            video_feature = Variable(video_tensor).cuda()

            # [seq_len, 1, hidden_size]
            video_feature = self.linear_compress(
                video_feature.detach()).unsqueeze(1)

            # [seq_len]
            with torch.no_grad():
                scores = self.summarizer.s_lstm(video_feature).squeeze(1)
                scores = scores.cpu().numpy().tolist()

                out_dict[video_name] = scores

            score_save_path = self.config.score_dir.joinpath(
                f'{self.config.video_type}_{epoch_i}.json')
            with open(score_save_path, 'w') as f:
                tqdm.write(f'Saving score at {str(score_save_path)}.')
                json.dump(out_dict, f)
            score_save_path.chmod(0o777)

    def pretrain(self):
        pass