示例#1
0
    def forward(self, words, dropout=0.1, scale=None):
        if dropout:
            size = (self.embed.weight.size(0), 1)
            mask = Variable(dropout_mask(self.embed.weight.data, size, dropout))
            masked_embed_weight = mask * self.embed.weight
        else:
            masked_embed_weight = self.embed.weight

        if scale:
            masked_embed_weight = scale * masked_embed_weight

        padding_idx = self.embed.padding_idx

        if padding_idx is None:
            padding_idx = -1

        if IS_TORCH_04:
            X = F.embedding(words,
                masked_embed_weight, padding_idx, self.embed.max_norm,
                self.embed.norm_type, self.embed.scale_grad_by_freq, self.embed.sparse)
        else:
            X = self.embed._backend.Embedding.apply(words,
                masked_embed_weight, padding_idx, self.embed.max_norm,
                self.embed.norm_type, self.embed.scale_grad_by_freq, self.embed.sparse)

        return X
示例#2
0
    def positional_encoder(embedded_sentence):
        # embedded_sentence.size() = (batch_size, num_sentences, num_tokens, embedding_length)
        # l.size() = (num_tokems, embedding_length)
        # output.size() = (num_batch, num_sentences, embedding_length)
        # The outputs are basically f1, f2, f3,.... which will go into the input fusion layer in the next step to add share information
        # between sentences using a BiDirfectional GRU module.

        batch_size, num_sentences, num_tokens, embedding_length = embedded_sentence.size(
        )
        l = [
        ]  # It will be same for all sentences in all batches as num_tokens and embedding_length is same for the entire dataset.
        for j in range(num_tokens):
            x = []
            for d in range(embedding_length):
                x.append((1 - (j / (num_tokens - 1))) -
                         (d / (embedding_length - 1)) * (1 - 2 * j /
                                                         (num_tokens - 1)))
            l.append(x)

        l = torch.FloatTensor(l)
        l = l.unsqueeze(
            0)  # adding an extra dimension at first place for batch_size
        l = l.unsqueeze(
            1)  # adding an extra dimension at sencond place for num_sentences
        l = l.expand_as(
            embedded_sentence
        )  # so that l.size() = (batch_size, num_sentences, num_tokens, embedding_length)

        mat = embedded_sentence * Variable(l.cuda())
        f_ids = torch.sum(mat, dim=2).squeeze(2)  # sum along token dimension

        return f_ids
示例#3
0
 def forward(self, x):
     h0 = Variable(
         torch.zeros(self.num_layers,
                     x.size(0),
                     self.hidden_size,
                     batch_first=True))
     out, _ = self.rnn(x, h0)
     out = self.fc(out[:, -1, :])
     return out
示例#4
0
def prediction(k_data):
	#k_data (json) is the data of the previous k data-points of the given currenct
	#k is determined by the number of previous data used for training (Currently k = 5)
	m = int(k_data['next'])
	k_data = np.array(k_data['data'])
	#Model directory here
	model = torch.load('../prediction/model.pt')
	output = []
	with torch.no_grad():
		for i in range(m):
			data = Variable(torch.from_numpy(k_data))
			out = model.forward(data)[0].cpu().float().numpy()
			k_data.append(out)
			output.append(out)
			k_data = k_data[1:-1]
	return jsonify(output)
示例#5
0
    def forward(self, facts, G):
        # facts.size() = (batch_size, num_sentences, embedding_length)
        # fact.size() = (batch_size, embedding_length=hidden_size)
        # G.size() = (batch_size, num_sentences)
        # g.size() = (batch_size, )

        h_0 = Variable(torch.zeros(self.hidden_size)).cuda()

        for sen in range(facts.size()[1]):
            fact = facts[:, sen, :]
            g = G[:, sen]
            if sen == 0:  # Initialization for first sentence only
                hi_1 = h_0.unsqueeze(0).expand_as(fact)
            hi_1 = self.AttnGRUCell(fact, hi_1, g)
        C = hi_1  # Final hidden vector as the contextual vector used for updating memory

        return C
示例#6
0
    def forward(self, input, word_embedding):
        # input.size() = (batch_size, num_sentences, num_tokens)
        # word_embedding -> (batch_size, num_sentences, num_tokens, embedding_length)
        # positional_encoder(word_embedding(input)) -> (batch_size, num_sentences, embedding_length)
        # Now BidirectionalGRU blocks receive their input, the output of the positional encoder and finally give facts
        # facts.size() = (batch_size, num_sentences, embedding_length) embedding_length = hidden_size

        input = input.view(input.size()[0],
                           -1)  # Isn't it already in this format ?
        input = word_embedding(input)
        input = input.view(input.size()[0],
                           input.size()[1],
                           input.size()[2], -1)
        input = self.positional_encoder(input)
        input = self.dropout(input)

        h0 = Variable(
            torch.zeros(2,
                        input.size()[0], self.hidden_size).cuda()
        )  # Initializing the initial hidden state (at t=0 time step)
        facts, hdn = self.gru(input, h0)
        facts = facts[:, :, :hidden_size] + facts[:, :, hidden_size:]

        return facts
def predict_model(image, checkpoint, topk=5, labels=''):
    
    if args.image:
        image=args.image
    
    if args.checkpoint:
        checkpoint=args.checkpoint
    
    if args.topk:
        topk=args.topk
    
    if args.labels:
        labels=args.labels
        
    if args.gpu:
        gpu=args.gpu
    
    checkpoint_dict=torch.load(checkpoint)
    arch= checkpoint_dict['arch']
    num_labels= len(checkpoint_dict['class_to_idx'])
    hidden_units= checkpoint_dict['hidden_units']
    
    model= load_model(arch=arch, num_labels=num_labels, hidden_units=hidden_units)
    
    if gpu and torch.cuda.is_available():
        model.cuda()
    
    was_training = model.training
    model.eval()
    
    image=process_image(image)
    
    image=Variable(torch.FloatTensor(image), requires_grad=True)
    image=image.unsqueeze(0)
    
    if gpu and torch.cuda.is_available():
         image=image.cuda()
    
    result = model(image).topk(topk)
    
    if gpu and torch.cuda.is_available():
        probs=torch.nn.functional.softmax(result[0].data,dim=1).cpu().numpy()[0]
        classes= result[1].data.cpu().numpy[0]
        
    else:
        probs=torch.nn.functional.softmax(result[0].data,dim=1).cpu().numpy()[0]
        classes= result[1].data.cpu().numpy[0]
    
    if lables:
        with open(labels, 'r') as f:
            cat_to_name = json.load(f)
        
        labels= list(cat_to_name.values())
        classes= [labels[x] for x in classes]
        
    model.train(mode=was_training)
    
    
    if args.image:
        print('Prediction and probabilities:', list(zip(classes, probs)))
    
    return probs, classes
示例#8
0
 def init_hidden(self, batch_size):
     return Variable(
         torch.zeros(self.num_layers, batch_size, self.hidden_size))
f_scheduler = optim.lr_scheduler.StepLR(f_opt, step_size=5000, gamma=0.1)
g_scheduler = optim.lr_scheduler.StepLR(g_opt, step_size=1000, gamma=0.1)

# Gradient Penalty Hyper-parameters.
c = 1e-2
batch_size = 128
lmbda = 10

max_iters = 1000
sample_gen = mog_gen(d)
train_log = open('train.log', 'w')
# This loop implements the gradient penalty and Pac-GAN learning algorithm.
for it in range(max_iters):
    for t_critic in range(5):
        data = sample_gen.get_random_sample(batch_size)
        x = Variable(torch.from_numpy(data).cuda().float(), requires_grad=True)
        z = Variable(torch.randn(batch_size, d),
                     requires_grad=True).cuda().float().mul(1)
        f_x = f(x)
        g_z = g(z)
        fg_z = f(g_z)
        eps = Variable(torch.rand(batch_size),
                       requires_grad=True).cuda().float()
        x1 = Variable(torch.matmul(torch.diag(eps), x), requires_grad=True)
        x2 = Variable(torch.matmul(torch.diag(1 - eps), g_z),
                      requires_grad=True)
        x_hat = Variable(x1 + x2, requires_grad=True)
        f_xh = f(x_hat)
        grad_xh_norm = torch.zeros(batch_size).cuda().float()
        for b in range(f_xh.size()[0]):
            g_x_hat = ag.grad(f_xh[b][0], x_hat, retain_graph=True)[0]
示例#10
0
 def one_hidden(self, l):
     nh = (self.n_hid if l != self.n_layers - 1 else self.emb_sz)//self.ndir
     if IS_TORCH_04: return Variable(self.weights.new(self.ndir, self.bs, nh).zero_())
     else: return Variable(self.weights.new(self.ndir, self.bs, nh).zero_(), volatile=not self.training)
示例#11
0
                train_load = DataLoader(
                    dataset,
                    batch_size=100,
                    shuffle=True,
                    collate_fn=pad_collate)  ### Loading the babi dataset

                model.train()  ### training the network
                if not early_stop_flag:
                    total_acc = 0
                    count = 0
                    for batch_id, data in enumerate(train_load):
                        optim.zero_grad()
                        context, questions, answers = data
                        batch_size = context.size()[0]
                        context = Variable(
                            context.long()
                        )  ## context.size() = (batch_size, num_sentences, embedding_length) embedding_length = hidden_size
                        questions = Variable(questions.long(
                        ))  ## questions.size() = (batch_size, num_tokens)
                        answers = Variable(answers)

                        total_loss, acc = model.loss(
                            context, questions, answers
                        )  ## Loss is calculated and gradients are backpropagated through the layers.
                        total_loss.backward()
                        total_acc += acc * batch_size
                        count += batch_size

                        if batch_id % 20 == 0:
                            print('training error')
                            print('task ' + str(task_id) + ',epoch ' +
示例#12
0
 def make_std_mask(tgt, pad):
     "创建Mask,使得我们不能attend to未来的词"
     tgt_mask = (tgt != pad).unsqueeze(-2)
     tgt_mask = tgt_mask & Variable(
         subsequent_mask(tgt.size(-1)).type_as(tgt_mask.data))
     return tgt_mask