Exemplo n.º 1
0
def train(model,trainLoader,criterion, optimizer,evalData = None,
            epoch=1,echoStep=100,evalStep=1000,saveStep=5000,savePath="./"):
    
    if evalData != None:
        evalX,evalY = evalData
        if torch.cuda.is_available():
            evalY = evalY.cuda()
            if isinstance (evalX,list):
                for ti,t in enumerate(evalX):
                    evalX[ti] = evalX[ti].cuda()
            else:
                evalX = evalX.cuda()

    batchLen = len(trainLoader)
    for epochIdx in xrange(epoch):
        for i,batch in enumerate(trainLoader,batchLen * epochIdx + 1):
            x, y = batch            
            if torch.cuda.is_available():
                y = y.cuda()
                if isinstance (x,list):
                    for ti,t in enumerate(x):
                        x[ti] = x[ti].cuda()
                else:
                    x = x.cuda()
            out = model(x)
            loss = criterion(out, y)
            
            prob = F.softmax(out, 1) 
            pred = torch.argmax(out, dim=1)
            correct = pred.eq(y).sum()
            acc = float(correct) / len(y)
            
            #print loss
            if i % echoStep == 0:
                print "Step %d/%d/%d : Loss %.4f , Acc %.4f " %(i,batchLen*epoch,epochIdx+1,float(loss),acc)
            #evaluate
            if i % evalStep == 0 and evalData != None:
                evalOut = model(evalX)
                evalLoss = criterion(evalOut, evalY)
                correct = torch.argmax(F.softmax(evalOut, 1) , dim=1).eq(evalY).sum()
                evalAcc = float(correct) / len(evalY)
                print "------------------------------------------------"
                print "Evaluate %d Sample : Loss %.4f , Acc %.4f " %(evalY.size(0),float(evalLoss),evalAcc)
                print
            #save model        
            if i % saveStep == 0:
                outFile = "%s/m_%d_%d.pt" %(savePath,i,epochIdx+1)
                torch.save(model.state_dict(),outFile)
                print "Save model : %s" %(outFile)

            #backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    outFile = "%s/final.pt" %(savePath)
    torch.save(model.state_dict(),outFile)
    print "Save model : %s" %(outFile)
Exemplo n.º 2
0
    def run(self):
        # Loss and Optimizer
        criterion = nn.CrossEntropyLoss()
        params = filter(lambda p: p.requires_grad, self.model.parameters())
        optimizer = self.opt.optimizer(params, lr=self.opt.learning_rate)

        max_test_acc = 0
        global_step = 0
        for epoch in range(self.opt.num_epoch):
            print('>' * 100)
            print('epoch: ', epoch)
            n_correct, n_total = 0, 0
            for i_batch, sample_batched in enumerate(self.train_data_loader):
                global_step += 1

                # switch model to training mode, clear gradient accumulators
                self.model.train()
                optimizer.zero_grad()

                inputs = [sample_batched[col].to(opt.device) for col in self.opt.inputs_cols]
                targets = sample_batched['polarity'].to(opt.device)
                outputs = self.model(inputs)

                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()

                if global_step % self.opt.log_step == 0:
                    n_correct += (torch.argmax(outputs, -1) == targets).sum().item()
                    n_total += len(outputs)
                    train_acc = n_correct / n_total

                    # switch model to evaluation mode
                    self.model.eval()
                    n_test_correct, n_test_total = 0, 0
                    with torch.no_grad():
                        for t_batch, t_sample_batched in enumerate(self.test_data_loader):
                            t_inputs = [t_sample_batched[col].to(opt.device) for col in self.opt.inputs_cols]
                            t_targets = t_sample_batched['polarity'].to(opt.device)
                            t_outputs = self.model(t_inputs)

                            n_test_correct += (torch.argmax(t_outputs, -1) == t_targets).sum().item()
                            n_test_total += len(t_outputs)
                        test_acc = n_test_correct / n_test_total
                        if test_acc > max_test_acc:
                            max_test_acc = test_acc

                        print('loss: {:.4f}, acc: {:.4f}, test_acc: {:.4f}'.format(loss.item(), train_acc, test_acc))

                        # log
                        self.writer.add_scalar('loss', loss, global_step)
                        self.writer.add_scalar('acc', train_acc, global_step)
                        self.writer.add_scalar('test_acc', test_acc, global_step)

        self.writer.close()

        print('max_test_acc: {0}'.format(max_test_acc))
        return max_test_acc
Exemplo n.º 3
0
    def __init__(self, probs=None, logits=None, validate_args=None):
        if probs is not None:
            new_probs = torch.zeros_like(probs, dtype=torch.float)
            new_prob[torch.argmax(probs, dim=0)] = 1.0
            probs = new_probs
        elif logits is not None:
            new_logits = torch.full_like(logits, -1e8, dtype=torch.float)
            max_idx = torch.argmax(logits, dim=0)
            new_logits[max_idx] = logits[max_idx]
            logits = new_logits

        super(Argmax, self).__init__(probs=probs, logits=logits, validate_args=validate_args)
Exemplo n.º 4
0
    def sample_relax(logits, surrogate):
        cat = Categorical(logits=logits)
        u = torch.rand(B,C).clamp(1e-10, 1.-1e-10).cuda()
        gumbels = -torch.log(-torch.log(u))
        z = logits + gumbels
        b = torch.argmax(z, dim=1) #.view(B,1)
        logprob = cat.log_prob(b).view(B,1)


        # czs = []
        # for j in range(1):
        #     z = sample_relax_z(logits)
        #     surr_input = torch.cat([z, x, logits.detach()], dim=1)
        #     cz = surrogate.net(surr_input)
        #     czs.append(cz)
        # czs = torch.stack(czs)
        # cz = torch.mean(czs, dim=0)#.view(1,1)
        surr_input = torch.cat([z, x, logits.detach()], dim=1)
        cz = surrogate.net(surr_input)


        cz_tildes = []
        for j in range(1):
            z_tilde = sample_relax_given_b(logits, b)
            surr_input = torch.cat([z_tilde, x, logits.detach()], dim=1)
            cz_tilde = surrogate.net(surr_input)
            cz_tildes.append(cz_tilde)
        cz_tildes = torch.stack(cz_tildes)
        cz_tilde = torch.mean(cz_tildes, dim=0) #.view(B,1)

        return b, logprob, cz, cz_tilde
Exemplo n.º 5
0
Arquivo: crf.py Projeto: zysite/post
    def viterbi(self, emit, mask):
        T, B, N = emit.shape
        lens = mask.sum(dim=0)
        delta = torch.zeros(T, B, N)
        paths = torch.zeros(T, B, N, dtype=torch.long)

        delta[0] = self.strans + emit[0]  # [B, N]

        for i in range(1, T):
            trans_i = self.trans.unsqueeze(0)  # [1, N, N]
            emit_i = emit[i].unsqueeze(1)  # [B, 1, N]
            scores = trans_i + emit_i + delta[i - 1].unsqueeze(2)  # [B, N, N]
            delta[i], paths[i] = torch.max(scores, dim=1)

        predicts = []
        for i, length in enumerate(lens):
            prev = torch.argmax(delta[length - 1, i] + self.etrans)

            predict = [prev]
            for j in reversed(range(1, length)):
                prev = paths[j, i, prev]
                predict.append(prev)
            # 反转预测序列并保存
            predicts.append(torch.tensor(predict).flip(0))

        return torch.cat(predicts)
Exemplo n.º 6
0
def test(model):
    game_state = GameState()

    # initial action is do nothing
    action = torch.zeros([model.number_of_actions], dtype=torch.float32)
    action[0] = 1
    image_data, reward, terminal = game_state.frame_step(action)
    image_data = resize_and_bgr2gray(image_data)
    image_data = image_to_tensor(image_data)
    state = torch.cat((image_data, image_data, image_data, image_data)).unsqueeze(0)

    while True:
        # get output from the neural network
        output = model(state)[0]

        action = torch.zeros([model.number_of_actions], dtype=torch.float32)
        if torch.cuda.is_available():  # put on GPU if CUDA is available
            action = action.cuda()

        # get action
        action_index = torch.argmax(output)
        if torch.cuda.is_available():  # put on GPU if CUDA is available
            action_index = action_index.cuda()
        action[action_index] = 1

        # get next state
        image_data_1, reward, terminal = game_state.frame_step(action)
        image_data_1 = resize_and_bgr2gray(image_data_1)
        image_data_1 = image_to_tensor(image_data_1)
        state_1 = torch.cat((state.squeeze(0)[1:, :, :], image_data_1)).unsqueeze(0)

        # set state to be state_1
        state = state_1
    def get_pm_loss(self, image,
                        alpha = 0.0,
                        topk = 0,
                        use_baseline = True,
                        use_term_one_baseline = True,
                        n_samples = 1):

        class_weights = self.pixel_attention(image)
        log_q = torch.log(class_weights)

        # kl term
        kl_pixel_probs = (class_weights * log_q).sum()

        f_pixel = lambda i : self.get_loss_cond_pixel_1d(image, i) + \
                    kl_pixel_probs

        avg_pm_loss = 0.0
        # TODO: n_samples would be more elegant as an
        # argument to get_partial_marginal_loss
        for k in range(n_samples):
            pm_loss = pm_lib.get_partial_marginal_loss(f_pixel, log_q, alpha, topk,
                                        use_baseline = use_baseline,
                                        use_term_one_baseline = use_term_one_baseline)

            avg_pm_loss += pm_loss / n_samples

        map_locations = torch.argmax(log_q.detach(), dim = 1)
        map_cond_losses = f_pixel(map_locations).mean()

        return avg_pm_loss, map_cond_losses
    def sample_relax(probs):
        #Sample z
        u = torch.rand(B,C)
        gumbels = -torch.log(-torch.log(u))
        z = torch.log(probs) + gumbels

        b = torch.argmax(z, dim=1)
        logprob = cat.log_prob(b)


        #Sample z_tilde
        u_b = torch.rand(B,1)
        z_tilde_b = -torch.log(-torch.log(u_b))
        u = torch.rand(B,C)
        z_tilde = -torch.log((- torch.log(u) / probs) - torch.log(u_b))

        # print (z_tilde)
        z_tilde[:,b] = z_tilde_b
        # print (z_tilde)

        # fasdfasd

        # print (z)
        # print (b)
        # print (z_tilde)
        # print (logprob)
        # print (probs)
        # fsdfa

        return z, b, logprob, z_tilde
	def forward_greedy(self,z,num_steps,temperature,x=None):
		batch_size = z.size(0)
		predictions = []
		z = self.batchnorm1(self.activation(self.z2m(z)))
		# detach memory such that we don't backprop through the whole dataset
		memory = self.initMemory(batch_size).to(z.device)
		memory = memory.detach()
		previous_output = z.new_zeros(size=(z.size(0),),dtype=torch.long)
		previous_output[:] = self.SOS_TOKEN # <sos> token
		for i in range(num_steps):
			input = self.activation(self.embedding(previous_output))
			if x is not None:
				input = self.input_dropout(input)
			step_input = torch.cat([input,z],dim=1)
			step_input = self.batchnorm2(step_input)
			memory = self.relRNN(step_input,memory)
			out = self.m2o(memory.view(memory.size(0),-1))
			out = self.last_activation(out,temperature)
			if x is not None: # teacher forcing
				previous_output = x[:,i]
			else: # use prediction as input
				previous_output = torch.argmax(out,dim=-1)
				previous_output = previous_output.detach()
			predictions.append(out)
		output = torch.stack(predictions).transpose(1,0)
		return output
	def forward_greedy(self,z,num_steps,temperature,x=None):
		if num_steps > self.max_seq_len:
			raise ValueError("num_steps ({}) must be less or equal to max_seq_len ({})".format(num_steps,self.max_seq_len))
		predictions = []
		z = self.batchnorm1(self.activation(self.z2h(z)))
		hx = z # initialize the hidden state
		hm = z # initialize the hidden state for memory
		memory = self.memory
		memory = memory[:num_steps,:]
		memory = memory.expand(z.size(0),-1,-1) # copy the memory for each batch position
		previous_output = z.new_zeros(size=(z.size(0),),dtype=torch.long)
		previous_output[:] = self.SOS_TOKEN # <sos> token
		# run rnn
		for i in range(num_steps):
			# for the input
			input = self.activation(self.embedding(previous_output)) # previous_output is of size [batch,hidden_size]
			step_input = torch.cat([input,z],dim=1) # step_input is of size [batch,step_input_size]
			step_input = self.batchnorm2(step_input)
			step_mem = memory[:,i,:] # step_mem is of size [batch,step_input_size]
			out,hx,hm = self.memcell(step_input,step_mem,hx=hx,hm=hm)
			out = self.activation(out)
			out = self.batchnorm3(out)
			out = self.h2o(out)
			out = self.last_activation(out,temperature)
			if x is not None: # teacher forcing
				previous_output = x[:,i]
			else: # use prediction as input
				previous_output = torch.argmax(out,dim=-1)
				previous_output = previous_output.detach()
			predictions.append(out)
		output = torch.stack(predictions).transpose(1,0)
		return output
def sample_relax(logits): #, k=1):
    

    # u = torch.rand(B,C).clamp(1e-8, 1.-1e-8) #.cuda()
    u = torch.rand(B,C).clamp(1e-12, 1.-1e-12) #.cuda()
    gumbels = -torch.log(-torch.log(u))
    z = logits + gumbels
    b = torch.argmax(z, dim=1)

    cat = Categorical(logits=logits)
    logprob = cat.log_prob(b).view(B,1)

    v_k = torch.rand(B,1).clamp(1e-12, 1.-1e-12)
    z_tilde_b = -torch.log(-torch.log(v_k))
    #this way seems biased even tho it shoudlnt be
    # v_k = torch.gather(input=u, dim=1, index=b.view(B,1))
    # z_tilde_b = torch.gather(input=z, dim=1, index=b.view(B,1))

    v = torch.rand(B,C).clamp(1e-12, 1.-1e-12) #.cuda()
    probs = torch.softmax(logits,dim=1).repeat(B,1)
    # print (probs.shape, torch.log(v_k).shape, torch.log(v).shape)
    # fasdfa

    # print (v.shape)
    # print (v.shape)
    z_tilde = -torch.log((- torch.log(v) / probs) - torch.log(v_k))

    # print (z_tilde)
    # print (z_tilde_b)
    z_tilde.scatter_(dim=1, index=b.view(B,1), src=z_tilde_b)
    # print (z_tilde)
    # fasdfs

    return z, b, logprob, z_tilde
	def forward_greedy(self,z,num_steps,temperature,x=None):
		predictions = []
		batch_size = z.size(0)
		next_input = z.new_zeros(size=(batch_size,num_steps),dtype=torch.long,requires_grad=False)
		next_input[:,:] = self.PAD_TOKEN
		next_input[:,0] = self.SOS_TOKEN # <sos> token
		z = self.activation(self.z2h(z)).view(batch_size,1,-1).repeat(1,num_steps,1)
		for i in range(num_steps):
			input = next_input
			step_input = self.embedding(input)
			step_input = self.pos_embedding(step_input)
			step_input = torch.cat([step_input,z],dim=2) # step_input is of size [batch,seq_len,step_input_size]
			step_input = self.activation(self.s2h(step_input))
			non_pad_mask = get_non_pad_mask(input,self.PAD_TOKEN)
			slf_attn_mask_subseq = get_subsequent_mask(input)
			slf_attn_mask_keypad = get_attn_key_pad_mask(input,self.PAD_TOKEN)
			attn_mask = (slf_attn_mask_keypad + slf_attn_mask_subseq).gt(0)
			out = self.transformer(step_input,non_pad_mask=non_pad_mask,attn_mask=attn_mask)
			out = out[:,i,:]
			out = self.activation(out)
			out = self.h2o(out)
			out = self.last_activation(out,temperature)
			if x is not None: # teacher forcing
				previous_output = x[:,i]
			else: # use prediction as input
				previous_output = torch.argmax(out,dim=-1)
				previous_output = previous_output.detach()
			next_input = torch.cat([input[:,:i+1],previous_output.view(-1,1),input[:,i+2:]],dim=1).detach()
			predictions.append(out)
		output = torch.stack(predictions).transpose(1,0)
		return output
    def get_pm_loss(self, image, image_so_far, var_so_far,
                            alpha = 0.0,
                            topk = 0,
                            use_baseline = True,
                            use_term_one_baseline = True,
                            n_samples = 1):

        resid_image = image - image_so_far
        class_weights = self.get_pixel_probs(resid_image, var_so_far)
        log_q = torch.log(class_weights)

        # kl term
        kl_a = (class_weights * log_q).sum()

        f_z = lambda i : self.get_loss_conditional_a(resid_image, image_so_far, var_so_far, i)[0] + kl_a

        avg_pm_loss = 0.0
        # TODO: n_samples would be more elegant as an
        # argument to get_partial_marginal_loss
        for k in range(n_samples):
            pm_loss = pm_lib.get_partial_marginal_loss(f_z, log_q, alpha, topk,
                                        use_baseline = use_baseline,
                                        use_term_one_baseline = use_term_one_baseline)
            avg_pm_loss += pm_loss / n_samples

        map_locations = torch.argmax(log_q.detach(), dim = 1)
        map_cond_losses = f_z(map_locations).mean()

        return avg_pm_loss, map_cond_losses
Exemplo n.º 14
0
def train(model, trn_loader, optimizer, loss_func, device):
    model.train()
    training_loss = 0
    training_acc = 0
    counter = 0
    for data, target in trn_loader:
        data = Variable(data.to(device))
        target = Variable(target.to(device))
        # Forward pass
        output = model(data)
        tloss = loss_func(output, target)
        training_loss += tloss.item()
        # Zero the gradients
        optimizer.zero_grad()
        # Backward pass
        tloss.backward()
        # Update parameters
        optimizer.step()
        # Compute prediction's score
        pred = torch.argmax(output.data, 1)
        training_acc += accuracy_score(target.data.cpu().numpy(),
                                       pred.data.cpu().numpy())
        counter += 1
    avg_loss = training_loss / float(counter)
    avg_acc = training_acc / float(counter)
    return avg_loss, avg_acc
Exemplo n.º 15
0
def predict(model, data, device):
    model.eval()
    with torch.no_grad():
        data = data.to(device)
        output = model(data)
        pred = torch.argmax(output.data, 1)
    return output, pred
Exemplo n.º 16
0
    def _get_state_action_values(self, transitions):
        batch_size = len(transitions)
        batch = Transition(*zip(*transitions))

        non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, batch.next_state)), dtype=torch.uint8, device=self.config.device)

        state_batch = torch.cat(batch.state).to(torch.float32)
        action_batch = torch.cat(batch.action)
        reward_batch = torch.cat(batch.reward).to(torch.float32)
        
        next_state_values = torch.zeros(batch_size).to(self.config.device, dtype=torch.float32)

        next_states = [s for s in batch.next_state if s is not None]
        if len(next_states) != 0:
            with torch.no_grad():
                non_final_next_state = torch.cat(next_states).to(torch.float32)

                Q = self._get_Q(self.model, non_final_next_state)
                best_actions = torch.argmax(Q, dim=1, keepdim=True)

                Q_target = self._get_Q(self.target_model, non_final_next_state)
                next_state_values[non_final_mask] = Q_target.gather(1, best_actions).squeeze()

        gamma = self.config.gamma ** self.config.num_multi_step_reward
        expected_values = reward_batch + gamma * next_state_values

        with torch.set_grad_enabled(self.model.training):
            Q = self._get_Q(self.model, state_batch)
            values = torch.squeeze(Q.gather(1, action_batch))
            values.to(self.config.device, dtype=torch.float32)

        return (values, expected_values)
Exemplo n.º 17
0
 def predict(model, batches, weights_matrix):
     model.embedding, _ = create_emb_layer(weights_matrix)
     model.embedding =  model.embedding.cuda()
     model.eval()
     # Only 1 batch and 1 item in that batch
     for batch in batches:
         pred = model(batch)
         return torch.argmax(F.softmax(pred,dim=1),1)
Exemplo n.º 18
0
def predict_image(image_path):
    img = Image.open(image_path)
    img = transformations(img)

    pred = model.predict(img,apply_softmax=True)

    # print the class and accuracy predicted
    print("Prediction: {} , Accuracy: {} ".format(torch.argmax(pred), torch.max(pred)))
Exemplo n.º 19
0
def predict_loader(data_loader):

    #compute predictions and apply softmax
    predictions = model.predict(data_loader,apply_softmax=True)

    #print the class and accuracy for each prediction
    for pred in predictions:
        print("Prediction: {} , Accuracy: {} ".format(torch.argmax(pred), torch.max(pred)))
Exemplo n.º 20
0
 def predict(self, X):
     """ sklearn interface without creating graph """
     X = X.to(device =self.cf_a.device )
     if (self.cf_a.task_type == "regression"):
         with torch.no_grad():
             return self.forward(X)
     elif(self.cf_a.task_type == "classification"):
         with torch.no_grad():
             return torch.argmax(self.forward(X),1)
Exemplo n.º 21
0
    def test(model, batches, weights_matrix):
        model.embedding, _ = create_emb_layer(weights_matrix)
        model.embedding =  model.embedding.cuda()
        model.eval()
        total_acc = 0
        count = 0
        cm = torch.zeros(3,3)
        for batch in batches:
            batch = create_sorted_batch(batch)
            label = batch['sentiment']
            pred = model(batch)
            acc = accuracy(torch.argmax(F.softmax(pred,dim=1),1).float(), label.float().cuda())
            cm += torch.from_numpy(confusion_matrix(label, torch.argmax(pred,1), \
                    labels=[torch.tensor(0), torch.tensor(1), torch.tensor(2)])).float()
            total_acc += acc.item() 
            count += len(label)

        return total_acc/count, cm
Exemplo n.º 22
0
    def eval_(model, batches, criterion):
        model.eval()
        total_loss = 0
        total_acc = 0
        count = 0
        cm = torch.zeros(3,3)
        for batch in batches:
            batch = create_sorted_batch(batch)
            label = batch['sentiment']
            pred = model(batch)
            loss = criterion(pred, label.cuda())
            acc = accuracy(torch.argmax(F.softmax(pred,dim=1),1).float(), label.float().cuda())
            cm += torch.from_numpy(confusion_matrix(label, torch.argmax(pred,1), \
                    labels=[torch.tensor(0), torch.tensor(1), torch.tensor(2)])).float()
            total_loss += loss.item()
            total_acc += acc.item() 
            count += len(label)

        return total_loss/len(batches), total_acc/count, cm
Exemplo n.º 23
0
    def perform_inference_helper(self, test_features):
        """
            Helper function for ClassifierModel's perform_inference(self, test_features, test_labels)
        """

        # Use inference model
        self.model.eval()

        predictions = (self.model((torch.from_numpy(test_features).float()).to(self.device))).cpu().detach()
        predictions = (torch.argmax(predictions, 1)).numpy()

        return predictions
def run(input_data):
    input_data = torch.tensor(json.loads(input_data)['data'])

    # get prediction
    with torch.no_grad():
        output = model(input_data)
        classes = ['ants', 'bees']
        softmax = nn.Softmax(dim=1)
        pred_probs = softmax(output).numpy()[0]
        index = torch.argmax(output, 1)

    result = {"label": classes[index], "probability": str(pred_probs[index])}
    return result
Exemplo n.º 25
0
def sample_with_temperature(logits, sampling_temp, keep_topk):
    """Select next tokens randomly from the top k possible next tokens.

    Samples from a categorical distribution over the ``keep_topk`` words using
    the category probabilities ``logits / sampling_temp``.

    Args:
        logits (FloatTensor): Shaped ``(batch_size, vocab_size)``.
            These can be logits (``(-inf, inf)``) or log-probs (``(-inf, 0]``).
            (The distribution actually uses the log-probabilities
            ``logits - logits.logsumexp(-1)``, which equals the logits if
            they are log-probabilities summing to 1.)
        sampling_temp (float): Used to scale down logits. The higher the
            value, the more likely it is that a non-max word will be
            sampled.
        keep_topk (int): This many words could potentially be chosen. The
            other logits are set to have probability 0.

    Returns:
        (LongTensor, FloatTensor):

        * topk_ids: Shaped ``(batch_size, 1)``. These are
          the sampled word indices in the output vocab.
        * topk_scores: Shaped ``(batch_size, 1)``. These
          are essentially ``(logits / sampling_temp)[topk_ids]``.
    """

    if sampling_temp == 0.0 or keep_topk == 1:
        # For temp=0.0, take the argmax to avoid divide-by-zero errors.
        # keep_topk=1 is also equivalent to argmax.
        topk_scores, topk_ids = logits.topk(1, dim=-1)
        if sampling_temp > 0:
            topk_scores /= sampling_temp
    else:
        logits = torch.div(logits, sampling_temp)

        if keep_topk > 0:
            top_values, top_indices = torch.topk(logits, keep_topk, dim=1)
            kth_best = top_values[:, -1].view([-1, 1])
            kth_best = kth_best.repeat([1, logits.shape[1]]).float()

            # Set all logits that are not in the top-k to -10000.
            # This puts the probabilities close to 0.
            ignore = torch.lt(logits, kth_best)
            logits = logits.masked_fill(ignore, -10000)

        dist = torch.distributions.Multinomial(
            logits=logits, total_count=1)
        topk_ids = torch.argmax(dist.sample(), dim=1, keepdim=True)
        topk_scores = logits.gather(dim=1, index=topk_ids)
    return topk_ids, topk_scores
    def get_unlabeled_pm_loss(self, image, topk = 0, use_baseline = True,
                                    true_labels = None):

        # true labels for debugging only:
        if true_labels is None:
            log_q = self.classifier(image) #; print('max: ', torch.max(log_q)); print('min: ', torch.min(log_q));
            assert np.all(log_q.detach().cpu().numpy() <= 0)

        else:
            # print('using true labels')
            batch_size = image.shape[0]
            q = torch.zeros((batch_size, self.n_classes)) + 1e-12
            seq_tensor = torch.LongTensor([i for i in range(batch_size)])
            q[seq_tensor, true_labels] = 1 - 1e-12 * (self.n_classes - 1)
            log_q = torch.log(q).to(device)

        f_z = lambda z : self.get_conditional_loss(image, z)

        if true_labels is None:
            pm_loss_z = pm_lib.get_partial_marginal_loss(f_z, log_q,
                                        alpha = 0.0,
                                        topk = topk,
                                        use_baseline = use_baseline,
                                        use_term_one_baseline = True)
        else:
            # we set log_q here to be the true labels
            pm_loss_z = self.get_conditional_loss(image, \
                                torch.argmax(log_q, dim = 1).detach())

        # kl term for class weights
        # (assuming uniform prior)
        kl_q_z = (torch.exp(log_q) * log_q).sum()

        # sampled loss:
        map_weights = torch.argmax(log_q, dim = 1)
        map_loss = f_z(map_weights)

        return pm_loss_z + kl_q_z, map_loss.sum()
Exemplo n.º 27
0
def main():
    # Load model
    if torch.cuda.is_available():
        model = torch.load("trained_models/whole_model_quickdraw")
    else:
        model = torch.load("trained_models/whole_model_quickdraw", map_location=lambda storage, loc: storage)
    model.eval()
    image = np.zeros((480, 640, 3), dtype=np.uint8)
    cv2.namedWindow("Canvas")
    global ix, iy, is_drawing
    is_drawing = False

    def paint_draw(event, x, y, flags, param):
        global ix, iy, is_drawing
        if event == cv2.EVENT_LBUTTONDOWN:
            is_drawing = True
            ix, iy = x, y
        elif event == cv2.EVENT_MOUSEMOVE:
            if is_drawing == True:
                cv2.line(image, (ix, iy), (x, y), WHITE_RGB, 5)
                ix = x
                iy = y
        elif event == cv2.EVENT_LBUTTONUP:
            is_drawing = False
            cv2.line(image, (ix, iy), (x, y), WHITE_RGB, 5)
            ix = x
            iy = y
        return x, y

    cv2.setMouseCallback('Canvas', paint_draw)
    while (1):
        cv2.imshow('Canvas', 255 - image)
        key = cv2.waitKey(10)
        if key == ord(" "):
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            ys, xs = np.nonzero(image)
            min_y = np.min(ys)
            max_y = np.max(ys)
            min_x = np.min(xs)
            max_x = np.max(xs)
            image = image[min_y:max_y, min_x: max_x]

            image = cv2.resize(image, (28, 28))
            image = np.array(image, dtype=np.float32)[None, None, :, :]
            image = torch.from_numpy(image)
            logits = model(image)
            print(CLASSES[torch.argmax(logits[0])])
            image = np.zeros((480, 640, 3), dtype=np.uint8)
            ix = -1
            iy = -1
Exemplo n.º 28
0
def accuracy2(pred, label):

    # Accuracy, Precision, Recall, F1

    """
    Create confusion matrix and do the calculations
    C[i][j] = number of observations known to be in group i but predicted to be in group j

    Precision = True Positive / (True Positive + False Positive) = True positive / Total predicted
    Recall    = True Positive / (True Positive + False Negative) = True positive / Total actual
    F1 score  = 2* (Precision * Recall) / (Precision + Recall) 
    """
    cm = confusion_matrix(torch.argmax(pred,1), label, labels=tto[0, 1, 2])
    print(cm)
    print(cm.shape)
def sample_relax(logits):
    cat = Categorical(logits=logits)

    u = torch.rand(B,C).clamp(1e-8, 1.-1e-8)#.cuda()
    gumbels = -torch.log(-torch.log(u))
    z = logits + gumbels

    b = torch.argmax(z, dim=1)
    logprob = cat.log_prob(b).view(B,1)

    u_b = torch.gather(input=u, dim=1, index=b.view(B,1))
    z_tilde_b = -torch.log(-torch.log(u_b))
    
    z_tilde = -torch.log((- torch.log(u) / torch.softmax(logits, dim=1)) - torch.log(u_b))
    z_tilde.scatter_(dim=1, index=b.view(B,1), src=z_tilde_b)
    return z, b, logprob, z_tilde
Exemplo n.º 30
0
def test(model, vali_loader, loss_func, device):
    model.eval()
    test_loss = 0
    counter = 0
    preds = list()
    for data, target in vali_loader:
        with torch.no_grad():
            data = data.to(device)
            target = target.to(device)
            output = model(data)
            test_loss += loss_func(output, target).item()
            pred = torch.argmax(output.data, 1)
            preds += pred.cpu().numpy().tolist()
            counter += 1
    avg_loss = test_loss / float(counter)  
    return preds, avg_loss
Exemplo n.º 31
0
    def _process_feature_extraction(self,
                                    output,
                                    im_scales,
                                    im_infos,
                                    feature_name="fc6",
                                    conf_thresh=0):
        batch_size = len(output[0]["proposals"])
        n_boxes_per_image = [len(boxes) for boxes in output[0]["proposals"]]
        score_list = output[0]["scores"].split(n_boxes_per_image)
        score_list = [torch.nn.functional.softmax(x, -1) for x in score_list]
        feats = output[0][feature_name].split(n_boxes_per_image)
        cur_device = score_list[0].device

        feat_list = []
        info_list = []

        for i in range(batch_size):
            dets = output[0]["proposals"][i].bbox / im_scales[i]
            scores = score_list[i]
            max_conf = torch.zeros(scores.shape[0]).to(cur_device)
            conf_thresh_tensor = torch.full_like(max_conf, conf_thresh)
            start_index = 1
            # Column 0 of the scores matrix is for the background class
            if self.args.background:
                start_index = 0
            for cls_ind in range(start_index, scores.shape[1]):
                cls_scores = scores[:, cls_ind]
                keep = nms(dets, cls_scores, 0.5)
                max_conf[keep] = torch.where(
                    # Better than max one till now and minimally greater
                    # than conf_thresh
                    (cls_scores[keep] > max_conf[keep])
                    & (cls_scores[keep] > conf_thresh_tensor[keep]),
                    cls_scores[keep],
                    max_conf[keep],
                )

            sorted_scores, sorted_indices = torch.sort(max_conf,
                                                       descending=True)
            num_boxes = (sorted_scores[:self.args.num_features] != 0).sum()
            keep_boxes = sorted_indices[:self.args.num_features]
            feat_list.append(feats[i][keep_boxes])
            bbox = output[0]["proposals"][i][keep_boxes].bbox / im_scales[i]
            # Predict the class label using the scores
            objects = torch.argmax(scores[keep_boxes][:, start_index:], dim=1)

            info_list.append({
                "bbox":
                bbox.cpu().numpy(),
                "num_boxes":
                num_boxes.item(),
                "objects":
                objects.cpu().numpy(),
                "cls_prob":
                scores[keep_boxes][:, start_index:].cpu().numpy(),
                "image_width":
                im_infos[i]["width"],
                "image_height":
                im_infos[i]["height"],
            })

        return feat_list, info_list
Exemplo n.º 32
0
        # data load
        X = X.to(device)
        Y = Y.to(device)

        # CNN 모델을 통해 비용 측정
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)

        # 변화도 누적 초기화
        optimizer.zero_grad()

        # 역전파
        cost.backward()

        # 모델 갱신 과정
        optimizer.step()

        avg_cost += cost / total_batch

    print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, avg_cost))

# 학습하지 않을 때의 코드, 가중치 갱신 과정은 생략함
with torch.no_grad():

    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())
scale_tracked = []
scale_lower = 0.00001
scale_higher = 0.1
for global_step in range(args.total_timesteps):
    # ALGO LOGIC: put action logic here
    epsilon = linear_schedule(args.start_e, args.end_e, args.exploration_fraction*args.total_timesteps, global_step)
    obs = np.array(obs)

   # action, logits, _,  = sampler.sample(q_network, obs, device, n, epsilon)
    logits, n, mu, scale = q_network.forward(obs.reshape((1,)+obs.shape), device)

    if on_levy == 0:
        on_levy = n.clone().detach().cpu().numpy()
        on_levy = np.clip(np.floor(on_levy) + 1, 0, args.clip_n)
        n_tracked.append(int(on_levy))
        current_levy_action = torch.argmax(logits, dim=1).tolist()[0]
        scale_tracked.append(int(scale))#

    if random.random() < epsilon:
        action = env.action_space.sample() 

    elif on_levy > 0:
        action = current_levy_action
    else:
        action = torch.argmax(logits, dim=1).tolist()[0]
    on_levy = on_levy - 1 
    # EXPERIMhENTAL PLEASE FIX SOON
    # TRY NOT TO MODIFY: execute the game and log data.
    next_obs, reward, done, info = env.step(action)
    episode_reward += reward
    # TRY NOT TO MODIFY: record rewards for plotting purposes
Exemplo n.º 34
0
])

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
feature_extractor = CNN_VAE().to(device)
#feature_extractor = FeatureExtractor().cuda()
label_predictor = LabelPredictor().to(device)
feature_extractor.load_state_dict(torch.load('extractor_model.bin'))
label_predictor.load_state_dict(torch.load('predictor_model.bin'))

target_dataset = ImageFolder(os.path.join(sys.argv[1], 'test_data'),
                             transform=target_transform)
test_dataloader = DataLoader(target_dataset, batch_size=128, shuffle=False)

result = []
label_predictor.eval()
feature_extractor.eval()
for i, (test_data, _) in enumerate(test_dataloader):
    test_data = test_data.to(device)

    class_logits = label_predictor(feature_extractor(test_data))

    x = torch.argmax(class_logits, dim=1).cpu().detach().numpy()
    result.append(x)

import pandas as pd
result = np.concatenate(result)

# Generate your submission
df = pd.DataFrame({'id': np.arange(0, len(result)), 'label': result})
df.to_csv(sys.argv[2], index=False)
Exemplo n.º 35
0
def train(snapshotroot, device, forestType, numTrees, depth):
    xtrain, ytrain, xtest, ytest = datasets.load_usps()

    xtrain = np.reshape(xtrain, [-1, 256])
    xtest = np.reshape(xtest, [-1, 256])

    # XXX: Other papers use val = test for this data set
    xval = xtest
    yval = ytest

    # Transfer this data to the device
    xtrain = torch.from_numpy(xtrain).type(torch.float32).to(device)
    ytrain = torch.from_numpy(ytrain).type(torch.long).to(device)
    xval = torch.from_numpy(xval).type(torch.float32).to(device)
    yval = torch.from_numpy(yval).type(torch.long).to(device)
    xtest = torch.from_numpy(xtest).type(torch.float32).to(device)
    ytest = torch.from_numpy(ytest).type(torch.long).to(device)

    net = Net(forestType, numTrees, depth).to(device)
    criterion = nn.CrossEntropyLoss().to(device)

    optimizer = optim.Adam(net.parameters(), lr=0.001)

    # Count parameters
    numParams = sum(params.numel() for params in net.parameters())
    numTrainable = sum(params.numel() for params in net.parameters()
                       if params.requires_grad)
    print(
        f"There are {numParams} parameters total in this model ({numTrainable} are trainable)"
    )

    numEpochs = 200
    batchSize = 23

    indices = [i for i in range(xtrain.shape[0])]

    bestEpoch = numEpochs - 1
    bestAccuracy = 0.0
    bestLoss = 1000.0

    valLosses = np.zeros([numEpochs])

    for epoch in range(numEpochs):
        random.shuffle(indices)

        xtrain = xtrain[indices, :]
        ytrain = ytrain[indices]

        runningLoss = 0.0
        count = 0
        for xbatch, ybatch in batches(xtrain, ytrain, batchSize):
            #t = time.time()
            optimizer.zero_grad()

            outputs = net(xbatch)
            loss = criterion(outputs, ybatch)

            loss.backward()

            optimizer.step()

            runningLoss += loss
            count += 1
            #print(f"elapsed = {time.time() - t}, count = {count}")

        meanLoss = runningLoss / count

        snapshotFile = os.path.join(snapshotroot, f"epoch_{epoch}")
        torch.save(net.state_dict(), snapshotFile)

        runningLoss = 0.0
        count = 0

        with torch.no_grad():
            net.train(False)
            #for xbatch, ybatch in batches(xval, yval, batchSize):
            for xbatch, ybatch in zip([xval], [yval]):
                outputs = net(xbatch)
                loss = criterion(outputs, ybatch)

                runningLoss += loss
                count += 1

            net.train(True)

        valLoss = runningLoss / count

        if valLoss < bestLoss:
            bestLoss = valLoss
            bestEpoch = epoch

        #print(f"Info: Epoch = {epoch}, loss = {meanLoss}, validation loss = {valLoss}")
        valLosses[epoch] = valLoss

    snapshotFile = os.path.join(snapshotroot, f"epoch_{bestEpoch}")

    net = Net(forestType, numTrees, depth)
    net.load_state_dict(torch.load(snapshotFile, map_location="cpu"))
    net = net.to(device)

    totalCorrect = 0
    count = 0

    with torch.no_grad():
        net.train(False)
        #for xbatch, ybatch in batches(xtest, ytest, batchSize):
        for xbatch, ybatch in zip([xtest], [ytest]):
            outputs = net(xbatch)
            outputs = torch.argmax(outputs, dim=1)

            tmpCorrect = torch.sum(outputs == ybatch)

            totalCorrect += tmpCorrect
            count += xbatch.shape[0]

    accuracy = float(totalCorrect) / float(count)
    print(
        f"Info: Best epoch = {bestEpoch}, test accuracy = {accuracy}, misclassification rate = {1.0 - accuracy}"
    )

    return accuracy, valLosses
Exemplo n.º 36
0
def inference(args):
    # Check for CUDA
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logging.warning(f"--- Using device {device}! ---")
    # Create datasets
    visual2index = json.load(
        open(os.path.join(args.visuals_dicts_path, "visual2index.json"))
    )
    word2freq, word2index, _ = build_vocab(json.load(open(args.train_dataset_path)))
    test_dataset = InferenceDataset(
        args.test_dataset_path, word2freq, word2index, visual2index
    )
    logging.info(f"Performing inference on {len(test_dataset)}")
    # Create loaders
    test_loader = DataLoader(
        test_dataset,
        batch_size=args.batch_size,
        num_workers=4,
        collate_fn=collate_pad_batch,
    )
    num_cliparts = len(visual2index) + 1
    vocab_size = len(word2index)
    model = nn.DataParallel(
        baseline_factory(args.baseline_name, num_cliparts, vocab_size, 256, device)
    ).to(device)
    model.load_state_dict(torch.load(args.checkpoint_path, map_location=device))
    model.train(False)
    print(f"Starting inference from checkpoint {args.checkpoint_path}!")
    evaluator = Evaluator(len(test_dataset))
    for batch in tqdm(test_loader):
        # forward
        batch = {key: val.to(device) for key, val in batch.items()}
        x_scores, y_scores, o_scores = model(batch["ids_text"], batch["ids_vis"])
        if "discrete" in args.baseline_name:
            x_out, y_out = (
                torch.argmax(x_scores, dim=-1) * BUCKET_SIZE + BUCKET_SIZE / 2,
                torch.argmax(y_scores, dim=-1) * BUCKET_SIZE + BUCKET_SIZE / 2,
            )
        elif "continuous" in args.baseline_name:
            x_out, y_out = (
                x_scores * BUCKET_SIZE + BUCKET_SIZE / 2,
                y_scores * BUCKET_SIZE + BUCKET_SIZE / 2,
            )
        else:
            raise ValueError("Invalid model type!")
        o_out = torch.argmax(o_scores, dim=-1)
        evaluator.update_metrics(
            x_out,
            batch["x_lab"],
            y_out,
            batch["y_lab"],
            o_out,
            batch["o_lab"],
            batch["attn_mask"],
        )

    print(
        f"The avg ABSOLUTE sim per scene is: {evaluator.get_abs_sim()} +/- {evaluator.get_abs_error_bar()}"
    )
    print(
        f"The avg RELATIVE sim per scene is: {evaluator.get_rel_sim()} +/- {evaluator.get_rel_error_bar()}"
    )
    print(f"The avg ACCURACY for the orientation is: {evaluator.get_o_acc()}")
    if args.abs_dump_path is not None and args.rel_dump_path is not None:
        evaluator.dump_results(args.abs_dump_path, args.rel_dump_path)
Exemplo n.º 37
0
def compute_acc(pred, labels):
    """
    Compute the accuracy of prediction given the labels.
    """
    return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred)
def train_volleyball(data_loader, model, device, optimizer, epoch, cfg):

    actions_meter = AverageMeter()
    activities_meter = AverageMeter()
    loss_meter = AverageMeter()
    epoch_timer = Timer()
    for batch_data in data_loader:
        model.train()
        model.apply(set_bn_eval)

        # prepare batch data
        batch_data = [b.to(device=device) for b in batch_data]
        batch_size = batch_data[0].shape[0]
        num_frames = batch_data[0].shape[1]

        actions_in = batch_data[2].reshape(
            (batch_size, num_frames, cfg.num_boxes))
        activities_in = batch_data[3].reshape((batch_size, num_frames))

        actions_in = actions_in[:, 0, :].reshape(
            (batch_size * cfg.num_boxes, ))
        activities_in = activities_in[:, 0].reshape((batch_size, ))

        # forward
        actions_scores, activities_scores = model(
            (batch_data[0], batch_data[1]))

        # Predict actions
        actions_weights = torch.tensor(cfg.actions_weights).to(device=device)
        actions_loss = F.cross_entropy(actions_scores,
                                       actions_in,
                                       weight=actions_weights)
        actions_labels = torch.argmax(actions_scores, dim=1)
        actions_correct = torch.sum(
            torch.eq(actions_labels.int(), actions_in.int()).float())

        # Predict activities
        activities_loss = F.cross_entropy(activities_scores, activities_in)
        activities_labels = torch.argmax(activities_scores, dim=1)
        activities_correct = torch.sum(
            torch.eq(activities_labels.int(), activities_in.int()).float())

        # Get accuracy
        actions_accuracy = actions_correct.item() / actions_scores.shape[0]
        activities_accuracy = activities_correct.item(
        ) / activities_scores.shape[0]

        actions_meter.update(actions_accuracy, actions_scores.shape[0])
        activities_meter.update(activities_accuracy,
                                activities_scores.shape[0])

        # Total loss
        total_loss = activities_loss + cfg.actions_loss_weight * actions_loss
        loss_meter.update(total_loss.item(), batch_size)

        # Optim
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

    train_info = {
        'time': epoch_timer.timeit(),
        'epoch': epoch,
        'loss': loss_meter.avg,
        'activities_acc': activities_meter.avg * 100,
        'actions_acc': actions_meter.avg * 100
    }

    return train_info
Exemplo n.º 39
0
    def forward(self, p, img_size, targets=None, var=None):
        if ONNX_EXPORT:
            bs, nG = 1, self.nG  # batch size, grid size
        else:
            bs, nG = p.shape[0], p.shape[-1]

            if self.img_size != img_size:
                self.create_grids(img_size, nG)

                if p.is_cuda:
                    self.grid_xy = self.grid_xy.cuda()
                    self.anchor_vec = self.anchor_vec.cuda()
                    self.anchor_wh = self.anchor_wh.cuda()

        # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 80)  # (bs, anchors, grid, grid, classes + xywh)
        p = p.view(bs, self.nA, self.nC + 5, nG,
                   nG).permute(0, 1, 3, 4, 2).contiguous()  # prediction

        # xy, width and height
        xy = torch.sigmoid(p[..., 0:2])
        wh = p[..., 2:4]  # wh (yolo method)
        # wh = torch.sigmoid(p[..., 2:4])  # wh (power method)

        # Training
        if targets is not None:
            MSELoss = nn.MSELoss()
            BCEWithLogitsLoss = nn.BCEWithLogitsLoss()
            CrossEntropyLoss = nn.CrossEntropyLoss()

            # Get outputs
            p_conf = p[..., 4]  # Conf
            p_cls = p[..., 5:]  # Class

            txy, twh, mask, tcls = build_targets(targets, self.anchor_vec,
                                                 self.nA, self.nC, nG)

            tcls = tcls[mask]
            if p.is_cuda:
                txy, twh, mask, tcls = txy.cuda(), twh.cuda(), mask.cuda(
                ), tcls.cuda()

            # Compute losses
            nT = sum([len(x) for x in targets])  # number of targets
            nM = mask.sum().float()  # number of anchors (assigned to targets)
            k = 1  # nM / bs
            if nM > 0:
                lxy = k * MSELoss(xy[mask], txy[mask])
                lwh = k * MSELoss(wh[mask], twh[mask])

                lcls = (k / 4) * CrossEntropyLoss(p_cls[mask],
                                                  torch.argmax(tcls, 1))
                # lcls = (k * 10) * BCEWithLogitsLoss(p_cls[mask], tcls.float())
            else:
                FT = torch.cuda.FloatTensor if p.is_cuda else torch.FloatTensor
                lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0])

            lconf = (k * 64) * BCEWithLogitsLoss(p_conf, mask.float())

            # Sum loss components
            loss = lxy + lwh + lconf + lcls

            return loss, loss.item(), lxy.item(), lwh.item(), lconf.item(
            ), lcls.item(), nT

        else:
            if ONNX_EXPORT:
                grid_xy = self.grid_xy.repeat((1, self.nA, 1, 1, 1)).view(
                    (1, -1, 2))
                anchor_wh = self.anchor_wh.repeat((1, 1, nG, nG, 1)).view(
                    (1, -1, 2)) / nG

                # p = p.view(-1, 85)
                # xy = xy + self.grid_xy[0]  # x, y
                # wh = torch.exp(wh) * self.anchor_wh[0]  # width, height
                # p_conf = torch.sigmoid(p[:, 4:5])  # Conf
                # p_cls = F.softmax(p[:, 5:85], 1) * p_conf  # SSD-like conf
                # return torch.cat((xy / nG, wh, p_conf, p_cls), 1).t()

                p = p.view(1, -1, 85)
                xy = xy + grid_xy  # x, y
                wh = torch.exp(p[..., 2:4]) * anchor_wh  # width, height
                p_conf = torch.sigmoid(p[..., 4:5])  # Conf
                p_cls = p[..., 5:85]
                # Broadcasting only supported on first dimension in CoreML. See onnx-coreml/_operators.py
                # p_cls = F.softmax(p_cls, 2) * p_conf  # SSD-like conf
                p_cls = torch.exp(p_cls).permute((2, 1, 0))
                p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute(
                    (2, 1, 0))  # F.softmax() equivalent
                p_cls = p_cls.permute(2, 1, 0)
                return torch.cat((xy / nG, wh, p_conf, p_cls), 2).squeeze().t()

            p[..., 0:2] = xy + self.grid_xy  # xy
            p[..., 2:4] = torch.exp(wh) * self.anchor_wh  # wh yolo method
            # p[..., 2:4] = ((wh * 2) ** 2) * self.anchor_wh  # wh power method
            p[..., 4] = torch.sigmoid(p[..., 4])  # p_conf
            p[..., :4] *= self.stride

            # reshape from [1, 3, 13, 13, 85] to [1, 507, 85]
            return p.view(bs, -1, 5 + self.nC)
Exemplo n.º 40
0
    running_loss = 0.0
    dataset.reset()

    num = 0
    correct = 0

    for i, data in enumerate(dataset.getbatch()):

        inputs, labels = data
        inputs, labels = torch.tensor(
            inputs, dtype=torch.uint8).cuda(), torch.tensor(labels).cuda()
        inputs = inputs.float()
        optimizer.zero_grad()
        outputs = player(inputs)

        tmp = torch.argmax(outputs, dim=1) == labels
        correct += torch.sum(tmp)
        num += batchsize

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if (i + 1) % print_each == 0:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / print_each))
            print('train_acc:  %.2f' % (float(correct) * 100.0 / num))
            correct = 0
            num = 0
            running_loss = 0.0
Exemplo n.º 41
0
    def calculate_losses(self, input_states, length_masks, input_masks,
                         pooled_outputs, targets):
        # Intialize total_loss and losses
        losses = {}
        total_loss = torch.zeros(1).to(self.gpu_ids['netE'])

        input_state, length_mask, input_mask, pooled_output, target = [
            input_states[0], length_masks[0], input_masks[0],
            pooled_outputs[0], targets[0]
        ]
        if 'maskgmm' in self.args.task_types:
            # print(pooled_output['maskgmm'].size(), input_mask.size())
            pis, mus, sigmas, rhos, qs = self.decode_parameters(
                pooled_output['maskgmm'], input_mask)
            losses['mask_gmm'] = self.losses_weights['mask_gmm'] * self.losses[
                'gmm'](input_state[:, :, :2][input_mask[:, :, 0] == 0, ], pis,
                       mus, sigmas, rhos)
            pis, mus, sigmas, rhos, qs = self.decode_parameters(
                pooled_output['maskgmm'], 1 - input_mask)
            losses['rec_gmm'] = self.losses_weights['rec_gmm'] * self.losses[
                'gmm'](input_state[:, :, :2][input_mask[:, :, 0] == 1, ], pis,
                       mus, sigmas, rhos)
            total_loss = total_loss + losses['mask_gmm'] + losses['rec_gmm']

            losses['mask_type'] = self.losses_weights[
                'mask_type'] * self.losses['mask_type'](
                    qs[input_mask[:, :, 2] == 0, :],
                    torch.argmax(input_state[:, :, 2:5],
                                 dim=2)[input_mask[:, :, 2] == 0])
            losses['rec_type'] = self.losses_weights['rec_type'] * self.losses[
                'mask_type'](qs[input_mask[:, :, 2] == 1, :],
                             torch.argmax(input_state[:, :, 2:5],
                                          dim=2)[input_mask[:, :, 2] == 1])
            total_loss = total_loss + losses['mask_type']

        if 'maskrec' in self.args.task_types:
            if self.args.input_dim == 6:
                axis_dim = [0, 1, 5]
            else:
                axis_dim = [0, 1]
            # May be just compute the mask
            # print((1-input_mask)[0,:,0].sum())
            losses['mask_axis'] = self.losses_weights[
                'mask_axis'] * self.losses['mask_axis'](
                    pooled_output['maskrec'][:, :, axis_dim][
                        input_mask[:, :, axis_dim] == 0],
                    input_state[:, :, axis_dim][input_mask[:, :,
                                                           axis_dim] == 0])
            losses['rec_axis'] = self.losses_weights['rec_axis'] * self.losses[
                'mask_axis'](pooled_output['maskrec'][:, :, axis_dim][
                    input_mask[:, :, axis_dim] == 1],
                             input_state[:, :,
                                         axis_dim][input_mask[:, :,
                                                              axis_dim] == 1])
            total_loss = total_loss + losses['mask_axis'] + losses['rec_axis']
            # print(input_states[:, :, 2:5][input_mask[:, :, 2:5] == 0].size())
            #*input_mask[:, :, 2:5](Think about Whether to add it)
            losses['mask_type'] = self.losses_weights[
                'mask_type'] * self.losses['mask_type'](
                    pooled_output['maskrec'][:, :, 2:5][input_mask[:, :,
                                                                   2] == 0, :],
                    torch.argmax(input_state[:, :, 2:5],
                                 dim=2)[input_mask[:, :, 2] == 0])
            losses['rec_type'] = self.losses_weights['rec_type'] * self.losses[
                'mask_type'](
                    pooled_output['maskrec'][:, :, 2:5][input_mask[:, :,
                                                                   2] == 1, :],
                    torch.argmax(input_state[:, :, 2:5],
                                 dim=2)[input_mask[:, :, 2] == 1])
            total_loss = total_loss + losses['mask_type'] + losses['rec_type']

        if 'maskdisc' in self.args.task_types:
            losses['x_mask_disc'] = self.losses_weights[
                'mask_axis'] * self.losses['mask_disc'](
                    pooled_output['maskdisc'][0][input_mask[:, :, 0] == 0, :],
                    (input_state[:, :, 0][input_mask[:, :, 0] == 0]).to(
                        torch.long))
            losses['x_rec_disc'] = self.losses_weights[
                'rec_axis'] * self.losses['mask_disc'](
                    pooled_output['maskdisc'][0][input_mask[:, :, 0] == 1, :],
                    (input_state[:, :, 0][input_mask[:, :, 0] == 1]).to(
                        torch.long))
            losses['y_mask_disc'] = self.losses_weights[
                'mask_axis'] * self.losses['mask_disc'](
                    pooled_output['maskdisc'][1][input_mask[:, :, 1] == 0, :],
                    (input_state[:, :, 1][input_mask[:, :, 1] == 0]).to(
                        torch.long))
            losses['y_rec_disc'] = self.losses_weights[
                'rec_axis'] * self.losses['mask_disc'](
                    pooled_output['maskdisc'][1][input_mask[:, :, 1] == 1, :],
                    (input_state[:, :, 1][input_mask[:, :, 1] == 1]).to(
                        torch.long))
            losses['type_mask_disc'] = self.losses_weights[
                'mask_type'] * self.losses['mask_disc'](
                    pooled_output['maskdisc'][2][input_mask[:, :, 2] == 0, :],
                    (input_state[:, :, 2][input_mask[:, :, 2] == 0]).to(
                        torch.long))
            losses['type_rec_disc'] = self.losses_weights[
                'rec_type'] * self.losses['mask_disc'](
                    pooled_output['maskdisc'][2][input_mask[:, :, 2] == 1, :],
                    (input_state[:, :, 2][input_mask[:, :, 2] == 1]).to(
                        torch.long))
            total_loss = total_loss + losses['x_mask_disc'] + losses[
                'y_mask_disc'] + losses['type_mask_disc'] + losses[
                    'x_rec_disc'] + losses['y_rec_disc'] + losses[
                        'type_rec_disc']

        if 'sketchclsinput' in self.args.task_types:
            losses['prediction'] = self.losses_weights[
                'prediction'] * self.losses['prediction'](
                    pooled_output['sketchclsinput'], target)
            total_loss = total_loss + losses['prediction']

        if 'sketchretrieval' in self.args.task_types:
            losses['triplet'] = self.losses_weights['triplet'] * self.losses[
                'triplet'](*[
                    pooled_output['sketchretrieval']
                    for pooled_output in pooled_outputs
                ])
            total_loss = total_loss + losses['triplet']
        return total_loss, losses
Exemplo n.º 42
0
def main(args):
    device = torch.device('cuda' if args.gpu else 'cpu')

    # Load pretrained model and tokenizer
    config_cls, model_cls, tokenizer_cls = MODEL_CLASSES[args.model_type]
    config = config_cls.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path,
        num_labels=args.num_labels,
    )

    tokenizer = tokenizer_cls.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name else args.model_name_or_path,
        do_lower_case=args.do_lower_case,
    )

    model = model_cls.from_pretrained(
        args.model_name_or_path,
        config=config,
    )
    model.to(device)

    text_field = TextField(tokenizer)
    label_field = LabelField(
        torch.long if args.num_labels > 1 else torch.float)

    if args.do_test:
        fields = [('src', text_field), ('ref', text_field)]
    else:
        fields = [('src', text_field), ('ref', text_field),
                  ('score', label_field)]

    # Training
    if args.do_train:
        # setup dataset
        print('Loading training data ...')
        train_data = Dataset(
            path_to_file=args.data,
            fields=fields,
            filter_pred=lambda ex: args.src_min <= len(ex.src) <= args.src_max \
                and args.ref_min <= len(ex.ref) <= args.ref_max
        )

        train_iter = Iterator(
            dataset=train_data,
            batch_size=args.batch_size,
            shuffle=True,
            repeat=False,
        )

        train(args, train_iter, model, device)
        model.save_pretrained(args.save_dir)
        tokenizer.save_pretrained(args.save_dir)

    # Evaluaiton
    if args.do_eval:
        model.eval()

        # setup dataset
        print('Loading development data ...')
        valid_data = Dataset(
            path_to_file=args.data,
            fields=fields,
            filter_pred=lambda ex: args.src_min <= len(ex.src) <= args.src_max \
                and args.ref_min <= len(ex.ref) <= args.ref_max
        )

        valid_iter = Iterator(
            dataset=valid_data,
            batch_size=args.batch_size,
            shuffle=True,
            repeat=False,
        )

        preds_list = []
        refs_list = []

        for batch in tqdm(valid_iter, total=len(valid_iter)):
            input_ids = torch.cat([batch.src, batch.ref[:, 1:]],
                                  dim=1).to(device)
            labels = batch.score.squeeze(1).to(device)

            token_type_ids = [
                torch.zeros_like(batch.src),
                torch.ones_like(batch.ref[:, 1:])
            ]
            token_type_ids = torch.cat(token_type_ids, dim=1).to(device)
            outputs = model(input_ids,
                            token_type_ids=token_type_ids,
                            labels=labels)[1]

            if args.num_labels > 1:
                preds = torch.argmax(outputs, dim=1)
            else:
                preds = torch.ge(outputs, args.threshold).int().squeeze(1)

            preds_list.append(preds.to('cpu'))
            refs_list.append(labels.int().to('cpu'))

        preds_list = torch.cat(preds_list)
        refs_list = torch.cat(refs_list)

        avg = 'macro' if args.num_labels > 1 else 'micro'
        precision = precision_score(refs_list, preds_list, average=avg)
        recall = recall_score(refs_list, preds_list, average=avg)
        f1 = f1_score(refs_list, preds_list, average=avg)

        print(f"Presion: {precision * 100}", end='\t')
        print(f"Recall: {recall * 100}", end='\t')
        print(f"F1 score: {f1 * 100}")

    if args.do_test:
        model.eval()

        # setup dataset
        print('Loading test data ...')
        test_data = Dataset(
            path_to_file=args.data,
            fields=fields,
            filter_pred=lambda ex: args.src_min <= len(ex.src) <= args.src_max \
                and args.ref_min <= len(ex.ref) <= args.ref_max
        )

        test_iter = Iterator(
            dataset=test_data,
            batch_size=args.batch_size,
            shuffle=True,
            repeat=False,
        )

        for batch in tqdm(test_iter, total=len(test_iter)):
            input_ids = torch.cat([batch.src, batch.ref[:, 1:]],
                                  dim=1).to(device)

            token_type_ids = [
                torch.zeros_like(batch.src),
                torch.ones_like(batch.ref[:, 1:])
            ]
            token_type_ids = torch.cat(token_type_ids, dim=1).to(device)
            outputs = model(input_ids, token_type_ids=token_type_ids)[0]

            for src, ref, out in zip(batch.src, batch.ref, outputs):
                src = src[1:src.tolist().index(tokenizer.sep_token_id)]
                ref = ref[1:ref.tolist().index(tokenizer.sep_token_id)]
                src = tokenizer.decode(src)
                ref = tokenizer.decode(ref)
                if args.num_labels > 1:
                    out = torch.argmax(out)
                print(src + '\t' + ref + '\t' + str(out.item()))
def main():
    parser = argparse.ArgumentParser()
    # Required parameters
    parser.add_argument(
        "--option",
        default=None,
        type=str,
        required=True,
        help="Model type selected in the list: " +
        ", ".join(MODEL_CLASSES.keys()),
    )
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        help=
        "The output directory where the model checkpoints and predictions will be written.",
    )
    parser.add_argument(
        "--model_type",
        default='bert',
        type=str,
        help="Model type selected in the list: " +
        ", ".join(MODEL_CLASSES.keys()),
    )
    parser.add_argument(
        "--model_name_or_path",
        default="bert-base-uncased",
        type=str,
        help="Path to pre-trained model or shortcut name selected in the list: "
        + ", ".join(ALL_MODELS),
    )
    parser.add_argument(
        "--max_seq_length",
        default=128,
        type=int,
        help=
        "The maximum total input sequence length after WordPiece tokenization. Sequences "
        "longer than this will be truncated, and sequences shorter than this will be padded.",
    )
    parser.add_argument("--weight_decay",
                        default=0.0,
                        type=float,
                        help="Weight decay if we apply some.")
    parser.add_argument("--adam_epsilon",
                        default=1e-8,
                        type=float,
                        help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm",
                        default=1.0,
                        type=float,
                        help="Max gradient norm.")
    parser.add_argument("--num_train_epochs",
                        default=3.0,
                        type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument(
        "--train_file",
        default=None,
        type=str,
        help=
        "The input training file. If a data dir is specified, will look for the file there"
        +
        "If no data dir or train/predict files are specified, will run with tensorflow_datasets.",
    )
    parser.add_argument("--logging_steps",
                        type=int,
                        default=50,
                        help="Log every X updates steps.")
    parser.add_argument("--save_steps",
                        type=int,
                        default=5000,
                        help="Save checkpoint every X updates steps.")
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument(
        "--predict_file",
        default=None,
        type=str,
        help=
        "The input evaluation file. If a data dir is specified, will look for the file there"
        +
        "If no data dir or train/predict files are specified, will run with tensorflow_datasets.",
    )
    parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass.",
    )
    parser.add_argument(
        "--resource_dir",
        type=str,
        default='WikiTables-WithLinks/',
        help=
        "Number of updates steps to accumulate before performing a backward/update pass.",
    )
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--seed",
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument(
        "--config_name",
        default="",
        type=str,
        help="Pretrained config name or path if not the same as model_name")
    parser.add_argument(
        "--do_lower_case",
        action="store_true",
        help="Set this flag if you are using an uncased model.")
    parser.add_argument(
        "--tokenizer_name",
        default="",
        type=str,
        help="Pretrained tokenizer name or path if not the same as model_name",
    )
    parser.add_argument(
        "--cache_dir",
        default="/tmp/",
        type=str,
        help=
        "Where do you want to store the pre-trained models downloaded from s3",
    )
    parser.add_argument(
        "--stage1_model",
        default=None,
        type=str,
        help="Where to load the trained model from",
    )
    parser.add_argument(
        "--stage2_model",
        default=None,
        type=str,
        help="Where to load the trained model from",
    )
    parser.add_argument(
        "--dim",
        default=None,
        type=int,
        help="Where to load the trained model from",
    )
    parser.add_argument("--warmup_steps",
                        default=0,
                        type=int,
                        help="Linear warmup over warmup_steps.")
    parser.add_argument(
        "--fp16",
        action="store_true",
        help=
        "Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit",
    )
    parser.add_argument(
        "--fp16_opt_level",
        type=str,
        default="O1",
        help=
        "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
        "See details at https://nvidia.github.io/apex/amp.html",
    )
    parser.add_argument("--do_train",
                        action="store_true",
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action="store_true",
                        help="Whether to run eval on the dev set.")
    parser.add_argument(
        "--gpu_index",
        default=0,
        type=int,
        help="gpu_index",
    )
    args = parser.parse_args()

    device = torch.device("cuda:{}".format(args.gpu_index))
    args.n_gpu = 1
    args.device = device

    if args.do_train:
        args.output_dir = args.option
        args.output_dir = os.path.join(
            args.output_dir,
            datetime.now().strftime('%Y_%m_%d_%H_%M_%S'))
    else:
        assert args.output_dir != None or (
            args.stage1_model
            and args.stage2_model), "You must set an output dir"

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
    )
    # Set seed
    set_seed(args)

    args.model_type = args.model_type.lower()
    config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]

    config = config_class.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )
    tokenizer = tokenizer_class.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name else args.model_name_or_path,
        do_lower_case=args.do_lower_case,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )
    args.dim = config.hidden_size

    if args.option in ['stage1', 'stage2']:
        if args.option == 'stage1':
            model = FilterModel(model_class,
                                args.model_name_or_path,
                                config,
                                args.cache_dir,
                                dim=args.dim)
            model.to(args.device)
        else:
            model = JumpModel(model_class,
                              args.model_name_or_path,
                              config,
                              args.cache_dir,
                              dim=args.dim)
            model.to(args.device)
    elif args.option == 'stage12':
        filter_model = FilterModel(model_class,
                                   args.model_name_or_path,
                                   config,
                                   args.cache_dir,
                                   dim=args.dim)
        filter_model.to(args.device)
        jump_model = JumpModel(model_class,
                               args.model_name_or_path,
                               config,
                               args.cache_dir,
                               dim=args.dim)
        jump_model.to(args.device)
    else:
        raise NotImplementedError

    logger.info("Training/evaluation parameters %s", args)

    if args.do_train:
        train_data = readGZip(args.train_file)
        dataset = Stage12Dataset(args.resource_dir,
                                 train_data,
                                 tokenizer,
                                 args.max_seq_length,
                                 args.option,
                                 retain_label=True,
                                 shuffle=True)
        loader = DataLoader(dataset,
                            batch_size=None,
                            batch_sampler=None,
                            num_workers=0,
                            shuffle=False,
                            pin_memory=True)

        # tb_writer = SummaryWriter(log_dir=args.output_dir)
        # Prepare optimizer and schedule (linear warmup and decay)
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [
                    p for n, p in model.named_parameters()
                    if not any(nd in n for nd in no_decay)
                ],
                "weight_decay":
                args.weight_decay,
            },
            {
                "params": [
                    p for n, p in model.named_parameters()
                    if any(nd in n for nd in no_decay)
                ],
                "weight_decay":
                0.0
            },
        ]
        optimizer = AdamW(optimizer_grouped_parameters,
                          lr=args.learning_rate,
                          eps=args.adam_epsilon)

        t_total = len(
            train_data
        ) // args.gradient_accumulation_steps * args.num_train_epochs

        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=args.warmup_steps,
            num_training_steps=t_total)

        # Check if saved optimizer or scheduler states exist
        if os.path.isfile(os.path.join(
                args.model_name_or_path, "optimizer.pt")) and os.path.isfile(
                    os.path.join(args.model_name_or_path, "scheduler.pt")):
            # Load in optimizer and scheduler states
            optimizer.load_state_dict(
                torch.load(
                    os.path.join(args.model_name_or_path, "optimizer.pt")))
            scheduler.load_state_dict(
                torch.load(
                    os.path.join(args.model_name_or_path, "scheduler.pt")))

        if args.fp16:
            try:
                from apex import amp
            except ImportError:
                raise ImportError(
                    "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
                )

            model, optimizer = amp.initialize(model,
                                              optimizer,
                                              opt_level=args.fp16_opt_level)

        # multi-gpu training (should be after apex fp16 initialization)
        if args.n_gpu > 1:
            model = torch.nn.DataParallel(model)

        global_step = 0

        tr_loss, logging_loss = 0.0, 0.0
        model.train()
        model.zero_grad()

        train_iterator = trange(0, int(args.num_train_epochs), desc="Epoch")
        for epoch in train_iterator:
            for step, batch in enumerate(tqdm(loader, desc="Iteration")):
                *data, labels = tuple(
                    Variable(t).to(args.device) for t in batch)
                probs = model(*data)

                loss = torch.sum(-torch.log(probs + 1e-8) * labels)

                if args.fp16:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()

                tr_loss += loss.item()
                if (step + 1) % args.gradient_accumulation_steps == 0:
                    if args.fp16:
                        torch.nn.utils.clip_grad_norm_(
                            amp.master_params(optimizer), args.max_grad_norm)
                    else:
                        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       args.max_grad_norm)

                    optimizer.step()
                    scheduler.step()  # Update learning rate schedule
                    model.zero_grad()
                    global_step += 1

                # Log metrics
                # if args.logging_steps > 0 and global_step % args.logging_steps == 0:
                #     # Only evaluate when single GPU otherwise metrics may not average well
                #     #if args.local_rank == -1 and args.evaluate_during_training:
                #     #    results = evaluate(args, model, tokenizer)
                #     #    for key, value in results.items():
                #     #        tb_writer.add_scalar("eval_{}".format(key), value, global_step)
                #     tb_writer.add_scalar("{}_lr".format(args.option), scheduler.get_last_lr()[0], global_step)
                #     tb_writer.add_scalar("{}_loss".format(args.option), (tr_loss - logging_loss) / args.logging_steps, global_step)
                #     logging_loss = tr_loss

            # Save model checkpoint
            output_dir = os.path.join(args.output_dir,
                                      "checkpoint-epoch{}".format(epoch))
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            # Take care of distributed/parallel training
            model_to_save = model.module if hasattr(model, "module") else model
            model_to_save.save_pretrained(output_dir)
            tokenizer.save_pretrained(output_dir)
            torch.save(args, os.path.join(output_dir, "training_args.bin"))

        # tb_writer.close()

    if args.do_eval and args.option in ['stage1', 'stage2']:
        dev_data = readGZip(args.predict_file)
        model.eval()
        dataset = Stage12Dataset(args.resource_dir,
                                 dev_data,
                                 tokenizer,
                                 args.max_seq_length,
                                 args.option,
                                 retain_label=True,
                                 shuffle=False)
        loader = DataLoader(dataset,
                            batch_size=None,
                            batch_sampler=None,
                            num_workers=8,
                            shuffle=False,
                            pin_memory=True)

        for model_path in os.listdir(args.output_dir):
            if model_path.startswith('checkpoint'):
                model.load_state_dict(
                    torch.load(
                        os.path.join(args.output_dir, model_path,
                                     'pytorch_model.bin')))
                logger.info("Loading model from {}".format(model_path))

                eval_loss = 0
                for step, batch in enumerate(tqdm(loader, desc="Evaluation")):
                    *data, labels = tuple(
                        Variable(t).to(args.device) for t in batch)
                    probs = model(*data)
                    loss = torch.sum(-torch.log(probs + 1e-8) * labels)
                    eval_loss += loss.item()
                eval_loss = eval_loss / len(loader)

                logger.info("{} acheives average loss = {}".format(
                    model_path, eval_loss))

    elif args.do_eval and args.option == 'stage12':
        dev_data = readGZip(args.predict_file)
        # multi-gpu training (should be after apex fp16 initialization)
        filter_model.eval()
        jump_model.eval()

        #assert args.model_name_or_path is not None, "please provide the load_from argument"
        model_path = os.path.join(args.stage1_model, 'pytorch_model.bin')
        filter_model.load_state_dict(torch.load(model_path))

        model_path = os.path.join(args.stage2_model, 'pytorch_model.bin')
        jump_model.load_state_dict(torch.load(model_path))

        pred_data = copy.copy(dev_data)
        succ, total = 0, 0

        dataset = Stage12Dataset(args.resource_dir,
                                 dev_data,
                                 tokenizer,
                                 args.max_seq_length,
                                 'stage1',
                                 retain_label=False,
                                 shuffle=False)
        loader = DataLoader(dataset,
                            batch_size=None,
                            batch_sampler=None,
                            num_workers=8,
                            shuffle=False,
                            pin_memory=True)

        for step, batch in enumerate(tqdm(loader, desc="Evaluation")):
            data = tuple(Variable(t).to(args.device) for t in batch[:-1])
            probs = filter_model(*data)

            info = dev_data[batch[-1]]
            info['nodes'] = [info['nodes'][torch.argmax(probs, 0).item()]]
            info = generate_target_nodes(args.resource_dir, info)

            selected_target_nodes = []
            inner_dataset = Stage12Dataset(args.resource_dir,
                                           info,
                                           tokenizer,
                                           args.max_seq_length,
                                           'stage2',
                                           retain_label=False,
                                           shuffle=False)
            for b in inner_dataset:
                data = tuple(Variable(t).to(args.device) for t in b[:-1])
                probs = jump_model(*data)
                tmp = info[b[-1]]['target']
                selected_target_nodes.append(tmp[torch.argmax(probs,
                                                              0).item()])

            discovered_node = selected_target_nodes[0]
            pred_data[step]['target'] = discovered_node
            if not discovered_node[2]:
                pred_data[step]['pred'] = discovered_node[0]
            else:
                pred_data[step]['pred'] = [
                    discovered_node[0], discovered_node[2]
                ]

        #print("FINAL: correct = {}, total = {}, correct rate = {} \n".format(succ, total, succ / total))
        with open('predictions.intermediate.json', 'w') as f:
            json.dump(pred_data, f, indent=2)
Exemplo n.º 44
0
def select_hardest(loss_values, margin=None):
    if loss_values.max() == 0:
        return None
    return torch.argmax(loss_values)
Exemplo n.º 45
0
Arquivo: Net.py Projeto: specq/dlav
 def predict(self, x):
     outputs = self.forward(x)
     return torch.argmax(F.softmax(outputs).data, 1)
Exemplo n.º 46
0
        optimizer.zero_grad()

        # forward + backward + optimize
        # outputs = net(inputs)
        outputs = model(inputs)
        # print(outputs.shape)
        # print(outputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()

        # print statistics
        running_loss += loss.item()
        if i % p_itr == p_itr-1:    # print every 2000 mini-batches
            pred = torch.argmax(outputs, dim=1)
            correct = pred.eq(labels)
            acc = torch.mean(correct.float())
            loss_list.append(running_loss/p_itr)
            acc_list.append(acc)
            print('[%d, %5d] loss: %.3f,' %
                  (epoch + 1, i + 1, running_loss / p_itr), f'ETA: {eta} seconds')
            # print(f'ETA: {eta} seconds')
            running_loss = 0.0
        t1_stop = process_time()
        total_iters = total_epoch*len(trainloader)
        remaining_iters = (total_epoch-epoch + 1)*(len(trainloader) - i + 1)+0.01
        eta = (t1_stop-t1_start)/(total_iters-remaining_iters)*(remaining_iters)
        last_t1_stop = t1_stop
t1_stop = process_time()            
plt.plot(loss_list, label='loss')
Exemplo n.º 47
0
                            hidden=hidden,
                            maxlen=maxlen,
                            sample=False,
                            mask=mask_ARAE,
                            avoid_l=args.avoid_l)

                        decoded = torch.stack(decoded, dim=1).float()
                        if n_repeat > 1:
                            decoded = torch.repeat_interleave(decoded,
                                                              repeats=n_repeat,
                                                              dim=0)

                        decoded_prob = F.softmax(decoded, dim=-1)
                        decoded_prob = one_hot_prob(decoded_prob, max_indices)

                    sen_idxs = torch.argmax(decoded_prob, dim=2)
                    sen_idxs = sen_idxs.cpu().numpy()

                    output_s = list()
                    glue = ' '
                    sentence_list = list()
                    for ss in sen_idxs:
                        sentence = [ARAE_idx2word[s] for s in ss]

                        trigger_token_ids = list()
                        last_word = None
                        last_word2 = None
                        contain_sentiment_word = False
                        new_sentence = list()
                        for word in sentence:
                            cur_idx = snli_vocab.get_token_index(word)
Exemplo n.º 48
0
    def get_target(self, l, targets, anchors, in_h, in_w):
        #-----------------------------------------------------#
        #   计算一共有多少张图片
        #-----------------------------------------------------#
        bs = len(targets)
        #-----------------------------------------------------#
        #   用于选取哪些先验框不包含物体
        #-----------------------------------------------------#
        noobj_mask = torch.ones(bs,
                                len(self.anchors_mask[l]),
                                in_h,
                                in_w,
                                requires_grad=False)
        #-----------------------------------------------------#
        #   让网络更加去关注小目标
        #-----------------------------------------------------#
        box_loss_scale = torch.zeros(bs,
                                     len(self.anchors_mask[l]),
                                     in_h,
                                     in_w,
                                     requires_grad=False)
        #-----------------------------------------------------#
        #   batch_size, 3, 13, 13, 5 + num_classes
        #-----------------------------------------------------#
        y_true = torch.zeros(bs,
                             len(self.anchors_mask[l]),
                             in_h,
                             in_w,
                             self.bbox_attrs,
                             requires_grad=False)
        for b in range(bs):
            if len(targets[b]) == 0:
                continue
            batch_target = torch.zeros_like(targets[b])
            #-------------------------------------------------------#
            #   计算出正样本在特征层上的中心点
            #-------------------------------------------------------#
            batch_target[:, [0, 2]] = targets[b][:, [0, 2]] * in_w
            batch_target[:, [1, 3]] = targets[b][:, [1, 3]] * in_h
            batch_target[:, 4] = targets[b][:, 4]
            batch_target = batch_target.cpu()

            #-------------------------------------------------------#
            #   将真实框转换一个形式
            #   num_true_box, 4
            #-------------------------------------------------------#
            gt_box = torch.FloatTensor(
                torch.cat((torch.zeros(
                    (batch_target.size(0), 2)), batch_target[:, 2:4]), 1))
            #-------------------------------------------------------#
            #   将先验框转换一个形式
            #   9, 4
            #-------------------------------------------------------#
            anchor_shapes = torch.FloatTensor(
                torch.cat((torch.zeros(
                    (len(anchors), 2)), torch.FloatTensor(anchors)), 1))
            #-------------------------------------------------------#
            #   计算交并比
            #   self.calculate_iou(gt_box, anchor_shapes) = [num_true_box, 9]每一个真实框和9个先验框的重合情况
            #   best_ns:
            #   [每个真实框最大的重合度max_iou, 每一个真实框最重合的先验框的序号]
            #-------------------------------------------------------#
            best_ns = torch.argmax(self.calculate_iou(gt_box, anchor_shapes),
                                   dim=-1)

            for t, best_n in enumerate(best_ns):
                if best_n not in self.anchors_mask[l]:
                    continue
                #----------------------------------------#
                #   判断这个先验框是当前特征点的哪一个先验框
                #----------------------------------------#
                k = self.anchors_mask[l].index(best_n)
                #----------------------------------------#
                #   获得真实框属于哪个网格点
                #----------------------------------------#
                i = torch.floor(batch_target[t, 0]).long()
                j = torch.floor(batch_target[t, 1]).long()
                #----------------------------------------#
                #   取出真实框的种类
                #----------------------------------------#
                c = batch_target[t, 4].long()

                #----------------------------------------#
                #   noobj_mask代表无目标的特征点
                #----------------------------------------#
                noobj_mask[b, k, j, i] = 0
                #----------------------------------------#
                #   tx、ty代表中心调整参数的真实值
                #----------------------------------------#
                y_true[b, k, j, i, 0] = batch_target[t, 0]
                y_true[b, k, j, i, 1] = batch_target[t, 1]
                y_true[b, k, j, i, 2] = batch_target[t, 2]
                y_true[b, k, j, i, 3] = batch_target[t, 3]
                y_true[b, k, j, i, 4] = 1
                y_true[b, k, j, i, c + 5] = 1
                #----------------------------------------#
                #   用于获得xywh的比例
                #   大目标loss权重小,小目标loss权重大
                #----------------------------------------#
                box_loss_scale[
                    b, k, j,
                    i] = batch_target[t, 2] * batch_target[t, 3] / in_w / in_h
        return y_true, noobj_mask, box_loss_scale
# %% Load packages

import numpy as np
import torch

from sklearn.metrics import accuracy_score

from bnn_mcmc_examples.examples.mlp.penguins.constants import num_chains
from bnn_mcmc_examples.examples.mlp.penguins.dataloaders import test_dataloader
from bnn_mcmc_examples.examples.mlp.penguins.prior.constants import sampler_output_path, sampler_output_run_paths

# %% Load test data and labels

_, test_labels = next(iter(test_dataloader))

# %% Compute predictive accuracies

accuracies = np.empty(num_chains)

for i in range(num_chains):
    test_preds = np.loadtxt(
        sampler_output_run_paths[i].joinpath('preds_via_mean.txt'), skiprows=0)

    accuracies[i] = accuracy_score(test_preds, torch.argmax(test_labels, 1))

# %% Save predictive accuracies

np.savetxt(sampler_output_path.joinpath('accuracies_via_mean.txt'), accuracies)
Exemplo n.º 50
0
    inputs = t.squeeze(inputs, axis=1)
    inputs, labels = inputs.cuda(), labels.cuda()

    state_h = t.zeros(size=(batch_size, hidden_size), device=device)
    state_c = t.zeros(size=(batch_size, hidden_size), device=device)
    for ts in range(timestep):
        input_t = inputs[:, ts, :]
        f_t = t.sigmoid(
            t.matmul(input_t, kernel_f) +
            t.matmul(state_h, recurrent_kernel_f) + bias_f)
        i_t = t.sigmoid(
            t.matmul(input_t, kernel_i) +
            t.matmul(state_h, recurrent_kernel_i) + bias_i)
        o_t = t.sigmoid(
            t.matmul(input_t, kernel_o) +
            t.matmul(state_h, recurrent_kernel_o) + bias_o)
        c_t_hat = t.tanh(
            t.matmul(input_t, kernel_c) +
            t.matmul(state_h, recurrent_kernel_c) + bias_c)
        state_c = t.mul(state_c, f_t) + t.mul(c_t_hat, i_t)
        state_h = t.mul(t.tanh(state_c), o_t)

    logits = t.matmul(state_h, dense_weight) + dense_bias
    sfm = nn.Softmax(dim=1)
    pred = t.argmax(sfm(logits), dim=1)

    correct = t.sum(pred == labels)
    correct_count += correct
    total_count += inputs.shape[0]
print("test set accuracy = %f" % (correct_count / total_count))
        running_loss += loss.item()

    # 计算准确率
    model.eval()  # 推论模式
    with torch.no_grad():
        y_true = []
        y_pred = []
        for batch in dev_loader:
            batch = tuple(t.to(device) for t in batch)
            input_ids, input_mask, segment_ids, label_ids = batch
            outputs = model(input_ids=input_ids,
                            token_type_ids=segment_ids,
                            attention_mask=input_mask)
            logits = outputs[0]

            logits = torch.argmax(F.log_softmax(logits, dim=2), dim=2)

            logits = logits.detach().cpu().numpy()
            label_ids = label_ids.to('cpu').numpy()
            input_mask = input_mask.to('cpu').numpy()

            for i, label in enumerate(label_ids):
                temp_1 = []
                temp_2 = []
                # 第i个句子 第j个字, 剔除第一个[cls]
                for j, m in enumerate(label):
                    if j == 0:
                        continue
                    elif label_ids[i][j] == len(label_map):
                        y_true.append(temp_1)
                        y_pred.append(temp_2)
Exemplo n.º 52
0
    def forward(self, raw: dict, img_size, labels=None):
        assert isinstance(raw, dict)
        t_xywha = raw['bbox']
        device = t_xywha.device
        nB = t_xywha.shape[0] # batch size
        nA = self.num_anchors # number of anchors
        nH, nW = t_xywha.shape[2:4] # prediction grid size
        assert t_xywha.shape[1] == nA and t_xywha.shape[-1] == 5
        conf_logits = raw['conf']
        cls_logits = raw['class']
        
        # Convert the predicted angle to radian for later use.
        p_radian = torch.sigmoid(t_xywha[..., 4]) * 2 * np.pi - np.pi

        # ----------------------- logits to prediction -----------------------
        p_xywha = t_xywha.detach().clone()
        p_xywha: torch.tensor
        # sigmoid activation for xy, angle, obj_conf
        y_ = torch.arange(nH, dtype=torch.float, device=device)
        x_ = torch.arange(nW, dtype=torch.float, device=device)
        mesh_y, mesh_x = torch.meshgrid(y_, x_)
        p_xywha[..., 0] = (torch.sigmoid(p_xywha[..., 0]) + mesh_x) * self.stride
        p_xywha[..., 1] = (torch.sigmoid(p_xywha[..., 1]) + mesh_y) * self.stride
        # w, h
        anch_wh = self.anchors.view(1,nA,1,1,2).to(device=device)
        p_xywha[..., 2:4] = torch.exp(p_xywha[..., 2:4]) * anch_wh
        p_xywha[..., 4] = p_radian / np.pi * 180
        p_xywha = p_xywha.cpu().contiguous()
        
        # Logistic activation for confidence score
        p_conf = torch.sigmoid(conf_logits.detach())
        # Logistic activation for categories
        if self.n_cls > 0:
            p_cls = torch.sigmoid(cls_logits.detach())
            cls_score, cls_idx = torch.max(p_cls, dim=-1, keepdim=True)
            p_conf = torch.sqrt(p_conf * cls_score)
            cls_idx = cls_idx.view(nB, nA*nH*nW).cpu()
        else:
            cls_idx = torch.zeros(nB, nA*nH*nW, dtype=torch.int64)
        preds = {
            'bbox': p_xywha.view(nB, nA*nH*nW, 5),
            'class_idx': cls_idx,
            'score': p_conf.view(nB, nA*nH*nW).cpu(),
        }
        if labels is None:
            return preds, None

        assert isinstance(labels, list)
        self.valid_gts = []
        # Initialize prediction targets
        PositiveMask = torch.zeros(nB, nA, nH, nW, dtype=torch.bool)
        IgnoredMask  = torch.zeros(nB, nA, nH, nW, dtype=torch.bool)
        weighted     = torch.zeros(nB, nA, nH, nW)
        TargetXYWH   = torch.zeros(nB, nA, nH, nW, 4)
        TargetAngle  = torch.zeros(nB, nA, nH, nW)
        TargetConf   = torch.zeros(nB, nA, nH, nW, 1)
        if self.n_cls > 0:
            TargetCls = torch.zeros(nB, nA, nH, nW, self.n_cls)
        
        # traverse all images in a batch
        for b in range(nB):
            im_labels = labels[b]
            assert isinstance(im_labels, ImageObjects)
            assert im_labels._bb_format == 'cxcywhd'
            assert im_labels.img_hw == img_size
            im_labels.sanity_check()
            nGT = len(im_labels)

            # if there is no ground truth, continue
            if nGT == 0:
                continue

            gt_bboxes = im_labels.bboxes
            gt_cls_idxs = im_labels.cats
            assert gt_bboxes.shape[1] == 5 # (cx, cy, w, h, degree)

            # IoU between all predicted bounding boxes and all GT.
            # rot bbox IoU calculation is expensive so only calculate IoU
            # using confident samples
            selected_idx = conf_logits[b] > - np.log(1/(0.005) - 1)
            selected_idx = selected_idx.squeeze(-1)
            p_selected = p_xywha[b][selected_idx]
            # if all predictions are lower than 0.005, penalize everything
            # if too many preictions are higher than 0.005, penalize everything
            if len(p_selected) < 1000 and len(p_selected) > 0:
                pred_ious = iou_rle(p_selected.view(-1,5), gt_bboxes,
                                    bb_format='cxcywhd', img_size=img_size)
                iou_with_gt, _ = pred_ious.max(dim=1)
                # do not penalize the predicted bboxes who have a high overlap
                # with any GT
                IgnoredMask[b, selected_idx] = (iou_with_gt > self.ignore_thre)
                # conf_loss_mask == 1 means give penalty

            for gi, (gt_bb, gt_cidx) in enumerate(zip(gt_bboxes, gt_cls_idxs)):
                assert gt_bb.dim() == 1 and len(gt_bb) == 5
                _gt_00wh0 = gt_bb.clone()
                _gt_00wh0[0:2] = 0
                _gt_00wh0[4] = 0
                _gt_00wh0 = _gt_00wh0.unsqueeze(0)
                anchor_ious = iou_rle(_gt_00wh0, self.anch_00wha_all,
                                    bb_format='cxcywhd', img_size=img_size)
                anch_idx_all = torch.argmax(anchor_ious, dim=1).squeeze().item()
                if not (self.anchor_indices == anch_idx_all).any():
                    # this layer is not responsible for this GT
                    continue

                self.valid_gts.append(gt_bb.clone())
                ta = anch_idx_all % nA # target anchor index
                ti = (gt_bb[0] / self.stride).long() # horizontal
                tj = (gt_bb[1] / self.stride).long() # vertical

                # positive sample
                PositiveMask[b,ta,tj,ti] = True
                # target x, y
                TargetXYWH[b,ta,tj,ti,0] = (gt_bb[0] / self.stride) % 1
                TargetXYWH[b,ta,tj,ti,1] = (gt_bb[1] / self.stride) % 1
                TargetXYWH[b,ta,tj,ti,2] = torch.log(gt_bb[2]/self.anchors[ta,0] + 1e-8)
                TargetXYWH[b,ta,tj,ti,3] = torch.log(gt_bb[3]/self.anchors[ta,1] + 1e-8)
                # use radian when calculating angle loss
                TargetAngle[b,ta,tj,ti] = gt_bb[4] / 180 * np.pi
                # target confidence score
                TargetConf[b,ta,tj,ti] = 1
                # target category
                if self.n_cls > 0:
                    TargetCls[b, ta, tj, ti, gt_cidx] = 1
                # smaller objects have higher losses
                img_area = img_size[0] * img_size[1]
                weighted[b,ta,tj,ti] = 2 - gt_bb[2] * gt_bb[3] / img_area

        # move the tagerts to GPU
        PositiveMask = PositiveMask.to(device=device)
        IgnoredMask = IgnoredMask.to(device=device)
        TargetXYWH = TargetXYWH.to(device=device)
        TargetAngle = TargetAngle.to(device=device)
        TargetConf = TargetConf.to(device=device)
        if self.n_cls > 0:
            TargetCls = TargetCls.to(device=device)
        weighted = weighted.unsqueeze(-1).to(device=device)[PositiveMask]
        
        bce_logits = tnf.binary_cross_entropy_with_logits
        # weighted *BCE* loss for xy
        _pxy = t_xywha[...,0:2][PositiveMask]
        _tgtxy = TargetXYWH[...,0:2][PositiveMask]
        loss_xy = bce_logits(_pxy, _tgtxy, weight=weighted, reduction='sum')
        # weighted loss for w,h
        _pwh = t_xywha[...,2:4][PositiveMask]
        _tgtwh = TargetXYWH[...,2:4][PositiveMask]
        # loss_wh = 0.5 * (weighted * (_pwh - _tgtwh).pow(2)).sum()
        weighted = torch.cat([weighted,weighted], dim=1)
        loss_wh = lossLib.smooth_L1_loss(_pwh, _tgtwh, beta=self.wh_sl1_beta,
                                         weight=weighted, reduction='sum')
        # loss for angle
        loss_angle = self.loss4angle(p_radian[PositiveMask], TargetAngle[PositiveMask])
        # confidence score
        _penalty = PositiveMask | (~IgnoredMask)
        loss_conf = bce_logits(conf_logits[_penalty], TargetConf[_penalty],
                              reduction='sum')
        if self.n_cls > 0:
            loss_cls = bce_logits(cls_logits[PositiveMask], TargetCls[PositiveMask], 
                                  reduction='sum')
        else:
            loss_cls = 0
        loss = loss_xy + loss_wh + loss_angle + loss_conf + loss_cls
        loss = loss / nB

        # logging
        pos_num = PositiveMask.sum().cpu().item()
        ngt = pos_num + 1e-16
        ignored_num = (IgnoredMask & (~PositiveMask)).sum().cpu().item()
        self.loss_str = f'level_{nH}x{nW} pos/ignore: {int(ngt)}/{ignored_num}, loss: ' \
                        f'xy/gt {loss_xy/ngt:.3f}, wh/gt {loss_wh/ngt:.3f}, ' \
                        f'angle/gt {loss_angle/ngt:.3f}, conf {loss_conf:.3f}, ' \
                        f'class {loss_cls:.3f}'
        self._assigned_num = pos_num
        return preds, loss
Exemplo n.º 53
0
train_X = X[:train_size]
train_y = y[:train_size]

test_X = X[train_size:]
test_y = y[train_size:]

print("Start to train...")
for epoch in range(EPOCHS):
    for i in tqdm(range(0, len(train_X), BATCH_SIZE)):
        batch_x = train_X[i:i + BATCH_SIZE].view(-1, 3, 227, 227)
        batch_y = train_y[i:i + BATCH_SIZE]
        net.zero_grad()
        outputs = net(batch_x)
        loss = loss_function(outputs, batch_y)
        loss.backward()
        optimizer.step()

correct = 0
total = 0

print("Start to predict...")
with torch.no_grad():
    for i in tqdm(range(len(test_X))):
        real_class = torch.argmax(test_y[i])
        net_out = net(test_X[i].view(-1, 3, 227, 227))[0]
        predicted_class = torch.argmax(net_out)
        if real_class == predicted_class:
            correct += 1
        total += 1

print("AlexNet Accuracy: ", round(correct / total, 3))
Exemplo n.º 54
0
    def forward(self, synonymy_scores, antonymy_scores, labels):

        correct_syn = 0
        wrong_syn = 0

        correct_ant = 0
        wrong_ant = 0

        correct_irrel = 0
        wrong_irrel = 0

        syn_size = 0
        ant_size = 0
        irrel_size = 0

        for label, syn_sc, ant_sc in zip(labels, synonymy_scores,
                                         antonymy_scores):

            total_vec = torch.cat((syn_sc, ant_sc), dim=0)

            probs = F.log_softmax(total_vec, dim=0)  #class probability
            pred = torch.argmax(probs, dim=0)  #predicted class

            if syn_sc <= 0.4 and ant_sc <= 0.4:
                pred = 2

            #word pair is synonymous
            if label == 1:
                syn_size += 1

                if pred == 0:
                    correct_syn += 1

                else:
                    wrong_syn += 1

            #word pair is antonymous
            if label == 2:
                ant_size += 1

                if pred == 1:
                    correct_ant += 1

                else:
                    wrong_ant += 1

            #word pair has no relationship
            if label == 0:
                irrel_size += 1

                if pred == 2:
                    correct_irrel += 1

                else:
                    wrong_irrel += 1

        #need to account for division by zero in training batches
        if syn_size == 0:
            syn_acc = 0
        else:
            syn_acc = (correct_syn / syn_size) * 100

        if ant_size == 0:
            ant_acc = 0
        else:
            ant_acc = (correct_ant / ant_size) * 100

        if irrel_size == 0:
            irrel_acc = 0
        else:
            irrel_acc = (correct_irrel / irrel_size) * 100

        return [syn_acc, ant_acc, irrel_acc]
def accuracy(out, labels):
  outputs = torch.argmax(out, dim=1)
  return 100 * torch.sum(outputs==labels).to(dtype=torch.double)/labels.numel()
Exemplo n.º 56
0
def valid_model(dataLoader, model, device, label_map, level_label_maps):

    model.eval()
    num_levels = label_map.shape[1]
    num_classes = dataLoader.dataset.get_num_classes()

    l1_cls_num = label_map[:, 0].max().item() + 1
    l2_cls_num = label_map[:, 1].max().item() + 1
    virtual_cls_num = l1_cls_num + l2_cls_num - num_classes
    l1_raw_cls_num = l1_cls_num - virtual_cls_num

    fusion_matrix1 = FusionMatrix(num_classes)
    fusion_matrix2 = FusionMatrix(l1_cls_num)
    func = torch.nn.Softmax(dim=1)

    # 20 + 80
    acc1 = AverageMeter()
    p1_acc1 = AverageMeter()
    p2_acc1 = AverageMeter()
    level_accs = [AverageMeter() for _ in range(num_levels)]
    all_labels1 = []
    all_result1 = []

    # 20 + 2
    acc2 = AverageMeter()
    p1_acc2 = AverageMeter()
    p2_acc2 = AverageMeter()
    all_labels2 = []
    all_result2 = []

    with torch.no_grad():
        for i, (image, label, meta) in enumerate(dataLoader):
            image, label = image.to(device), label.to(device)

            batch_size = label.shape[0]
            level_scores = []
            level_probs = []
            for level in range(num_levels):

                level_score = model(image, level)
                level_scores.append(level_score)

                # for each level
                level_label = label_map[label, level]
                level_mask = level_label >= 0
                level_label1 = level_label[level_mask]
                level_score1 = level_score[level_mask]
                level_top1 = torch.argmax(level_score1, 1)
                level_acc1, level_cnt1 = accuracy(level_top1.cpu().numpy(),
                                                  level_label1.cpu().numpy())
                level_accs[level].update(level_acc1, level_cnt1)

                if level == 0:
                    level_prob = func(level_score)
                    level_probs.append(level_prob)
                else:
                    high_lcid_to_curr_lcid = level_label_maps[level - 1]
                    level_prob = torch.zeros(level_score.shape).cuda()
                    for high_lcid in range(high_lcid_to_curr_lcid.shape[0]):
                        curr_lcid_mask = high_lcid_to_curr_lcid[high_lcid]
                        if curr_lcid_mask.sum().item() > 0:
                            level_prob[:, curr_lcid_mask] = func(
                                level_score[:, curr_lcid_mask])
                    level_probs.append(level_prob)

            # =================== 20 + 80 ============================
            all_probs = torch.ones((batch_size, num_classes)).cuda()
            for level in range(num_levels):
                level_prob = level_probs[level]
                related_lcids = label_map[:, level]
                related_lcids = related_lcids[related_lcids >= 0]
                unrelated_class_num1 = (label_map[:, level] < 0).sum().item()
                unrelated_class_num2 = label_map.shape[
                    0] - related_lcids.shape[0]
                assert unrelated_class_num1 == unrelated_class_num2
                all_probs[:,
                          unrelated_class_num1:] *= level_prob[:,
                                                               related_lcids]

            l1_mask1 = label < l1_raw_cls_num
            l1_scores1 = all_probs[l1_mask1]
            l1_labels1 = label[l1_mask1]
            l1_result1 = torch.argmax(l1_scores1, 1)
            l1_now_acc1, l1_cnt1 = accuracy(l1_result1.cpu().numpy(),
                                            l1_labels1.cpu().numpy())
            p1_acc1.update(l1_now_acc1, l1_cnt1)

            l2_mask1 = label >= l1_raw_cls_num
            l2_scores1 = all_probs[l2_mask1]
            l2_labels1 = label[l2_mask1]
            l2_result1 = torch.argmax(l2_scores1, 1)
            l2_now_acc1, l2_cnt1 = accuracy(l2_result1.cpu().numpy(),
                                            l2_labels1.cpu().numpy())
            p2_acc1.update(l2_now_acc1, l2_cnt1)

            now_result = torch.argmax(all_probs, 1)
            now_acc, cnt = accuracy(now_result.cpu().numpy(),
                                    label.cpu().numpy())
            acc1.update(now_acc, cnt)
            fusion_matrix1.update(now_result.cpu().numpy(),
                                  label.cpu().numpy())
            all_labels1.extend(label.cpu().numpy().tolist())
            all_result1.extend(now_result.cpu().numpy().tolist())
            # ====================================================================

            # ===================20 + 2 =================================
            l1v_scores = level_probs[0]
            l1v_labels = label_map[label, 0]

            l1_mask2 = l1v_labels < l1_raw_cls_num
            l1_scores2 = l1v_scores[l1_mask2]
            l1_labels2 = l1v_labels[l1_mask2]
            l1_result2 = torch.argmax(l1_scores2, 1)
            l1_now_acc2, l1_cnt2 = accuracy(l1_result2.cpu().numpy(),
                                            l1_labels2.cpu().numpy())
            p1_acc2.update(l1_now_acc2, l1_cnt2)

            l2_mask2 = l1v_labels >= l1_raw_cls_num
            l2_scores2 = l1v_scores[l2_mask2]
            l2_labels2 = l1v_labels[l2_mask2]
            l2_result2 = torch.argmax(l2_scores2, 1)
            l2_now_acc2, l2_cnt2 = accuracy(l2_result2.cpu().numpy(),
                                            l2_labels2.cpu().numpy())
            p2_acc2.update(l2_now_acc2, l2_cnt2)

            l1v_result = torch.argmax(l1v_scores, 1)
            l1v_now_acc, l1v_cnt = accuracy(l1v_result.cpu().numpy(),
                                            l1v_labels.cpu().numpy())
            acc2.update(l1v_now_acc, l1v_cnt)
            fusion_matrix2.update(l1v_result.cpu().numpy(),
                                  l1v_labels.cpu().numpy())
            all_labels2.extend(l1v_labels.cpu().numpy().tolist())
            all_result2.extend(l1v_result.cpu().numpy().tolist())
            # ====================================================================

    print('Acc (head+tail): %.4f %d' % (acc1.avg, acc1.count))
    print('Acc P1         : %.4f %d' % (p1_acc1.avg, p1_acc1.count))
    print('Acc P2         : %.4f %d' % (p2_acc1.avg, p2_acc1.count))
    print('Acc L1         : %.4f %d' %
          (level_accs[0].avg, level_accs[0].count))
    print('Acc L2         : %.4f %d' %
          (level_accs[1].avg, level_accs[1].count))
    print('=' * 23)
    print('Acc (head+v)   : %.4f %d' % (acc2.avg, acc2.count))
    print('Acc P1         : %.4f %d' % (p1_acc2.avg, p1_acc2.count))
    print('Acc Pv         : %.4f %d' % (p2_acc2.avg, p2_acc2.count))
    print('=' * 23)
    return fusion_matrix1, fusion_matrix2, all_labels1, all_result1, all_labels2, all_result2
Exemplo n.º 57
0
def train_cp_predictor(model, dataset, lr=1e-3, epochs=7, batch_size=64, cuda=True):
    images, tgt_padding_masks = dataset
    min_cp = 2
    num_cp = (tgt_padding_masks.shape[1] - 1 - min_cp) - torch.sum(tgt_padding_masks, dim=-1)

    if cuda:
        images = images.cuda()
        num_cp = num_cp.cuda()
        model = model.cuda()

    # Separamos en training y validation
    im_training = images[:40000]
    im_validation = images[40000:]
    num_cp_training = num_cp[:40000]
    num_cp_validation = num_cp[40000:]

    # Definimos el optimizer
    optimizer = Adam(model.parameters(), lr=lr)


    print("Pre-entrenamiento del predictor de num_cp")
    for epoch in range(epochs):
        print("Beginning epoch number", epoch + 1)
        # actual_covariances = cp_covariances * step_decay(cp_variance, epoch, var_drop, epochs_drop, min_variance).to(cp_covariances.device)
        total_loss = 0
        accuracy = 0
        for i in range(0, len(im_training) - batch_size + 1, batch_size):
            # Obtenemos el batch
            im = im_training[i:i + batch_size]
            groundtruth = num_cp_training[i: i+batch_size]

            # Aplicamos el modelo
            probabilities = model(im)

            # Calculamos la loss
            loss = F.cross_entropy(probabilities, groundtruth)

            # Actualizamos la accuracy y la total_loss
            accuracy += torch.sum(torch.argmax(probabilities, dim=-1) == groundtruth)
            total_loss += loss

            # Realizamos un paso de descenso del gradiente
            loss.backward()
            optimizer.step()
            model.zero_grad()

        print("EPOCA", epoch+1, "La training loss es", total_loss/40000)
        print("EPOCA", epoch+1, "La training accuracy es", accuracy/40000)

        total_loss = 0
        accuracy = 0
        model.eval()
        with torch.no_grad():
            for j in range(0, len(im_validation) - batch_size + 1, batch_size):
                # Obtenemos el batch
                im = im_validation[j:j + batch_size]
                groundtruth = num_cp_validation[j: j + batch_size]

                # Aplicamos el modelo
                probabilities = model(im)

                # Actualizamos la total_loss
                total_loss += F.cross_entropy(probabilities, groundtruth)

                # Actualizamos la accuracy
                accuracy += torch.sum(torch.argmax(probabilities, dim=-1) == groundtruth)

            print("EPOCA", epoch + 1, "La validation loss es", total_loss / 10000)
            print("EPOCA", epoch + 1, "La validation accuracy es", accuracy / 10000)
        model.train()
    return model.cpu()
Exemplo n.º 58
0
# Predict hidden states features for each layer
with torch.no_grad():
    # See the models docstrings for the detail of the inputs
    outputs = model(tokens_tensor, token_type_ids=segments_tensors)
    # Transformers models always output tuples.
    # See the models docstrings for the detail of all the outputs
    # In our case, the first element is the hidden state of the last layer of the Bert model
    encoded_layers = outputs[0]
# We have encoded our input sequence in a FloatTensor of shape (batch size, sequence length, model hidden dimension)
assert tuple(encoded_layers.shape) == (1, len(indexed_tokens),
                                       model.config.hidden_size)

# Load pre-trained model (weights)
model = BertForMaskedLM.from_pretrained('bert-base-uncased')
model.eval()

# If you have a GPU, put everything on cuda
tokens_tensor = tokens_tensor.to('cuda')
segments_tensors = segments_tensors.to('cuda')
model.to('cuda')

# Predict all tokens
with torch.no_grad():
    outputs = model(tokens_tensor, token_type_ids=segments_tensors)
    predictions = outputs[0]

# confirm we were able to predict 'henson'
predicted_index = torch.argmax(predictions[0, masked_index]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
assert predicted_token == 'henson'
Exemplo n.º 59
0
def doEpoch(dl_source, dl_target, model_source, model_target, epoch, optim=None):

    model_source.eval()
    model_target.train(optim is not None)

    mmd = MMD_loss(kernel_type='linear')

    oa_source_total = 0.0
    oa_target_total = 0.0
    loss_total = 0.0

    def loopDL(dl):
        while True:
            for x in iter(dl): yield x

    num = max(len(dl_source), len(dl_target))
    tBar = trange(num)
    iter_source = loopDL(dl_source)
    iter_target = loopDL(dl_target)
    for t in tBar:
        data_source, labels_source = next(iter_source)
        data_target, labels_target = next(iter_target)

        data_source, labels_source = data_source.to(args.device), labels_source.to(args.device)
        data_target, labels_target = data_target.to(args.device), labels_target.to(args.device)


        # predict source
        if args.freezeSource:
            with torch.no_grad():
                pred_source, fVec_source = model_source(data_source, True)

        elif optim is not None:
            pred_source, fVec_source = model_target(data_source, True)

        else:
            with torch.no_grad():
                pred_source, fVec_source = model_target(data_source, True)


        # predict target
        if optim is not None:
            optim.zero_grad()
            pred_target, fVec_target = model_target(data_target, True)
            pred_sm_target = F.softmax(pred_target, dim=1)

        else:
            with torch.no_grad():
                pred_target, fVec_target = model_target(data_target, True)
                pred_sm_target = F.softmax(pred_target, dim=1)


        # perform DA
        if daMethod == 'mmd':
            loss = mmd(fVec_source, fVec_target)

        elif daMethod == 'deepcoral':
            loss = deepCORAL(fVec_source, fVec_target)

        elif daMethod == 'deepjdot':
            loss = deepJDOT(fVec_source, fVec_target, labels_source, pred_sm_target)
        
        
        if optim is not None:
            # source loss
            if args.trainSource:
                if args.freezeSource:
                    # re-predict source features with target model
                    pred_source, fVec_source = model_target(data_source, True)
                loss += F.cross_entropy(pred_source, labels_source)

            loss.backward()
            optim.step()

        with torch.no_grad():
            pred_sm_source = F.softmax(pred_source, dim=1)
            yhat_source = torch.argmax(pred_sm_source, dim=1)
            yhat_target = torch.argmax(pred_sm_target, dim=1)

        oa_source_total += torch.sum(labels_source==yhat_source).item() / labels_source.size(0)
        oa_target_total += torch.sum(labels_target==yhat_target).item() / labels_target.size(0)
        loss_total += loss.item()

        tBar.set_description_str('[Ep. {}/{} {}] Loss: {:.2f}, OA source: {:.2f}  target: {:.2f}'.format(
            epoch+1, args.numEpochs,
            'train' if optim is not None else 'val',
            loss_total/(t+1),
            100*oa_source_total/(t+1),
            100*oa_target_total/(t+1)
        ))
        tBar.update(1)


    tBar.close()
    loss_total /= num
    oa_source_total /= num
    oa_target_total /= num

    return model_target, loss_total, oa_source_total, oa_target_total
Exemplo n.º 60
0
print(device.__repr__())
print(q_network)
# TRY NOT TO MODIFY: start the game
obs = env.reset()
episode_reward = 0
for global_step in range(args.total_timesteps):
    # ALGO LOGIC: put action logic here
    epsilon = linear_schedule(args.start_e, args.end_e, args.exploration_fraction*args.total_timesteps, global_step)
    obs = np.array(obs)
    logits = q_network.forward(obs.reshape((1,)+obs.shape), device)
    if args.capture_video:
        env.set_q_values(logits.tolist())
    if random.random() < epsilon:
        action = env.action_space.sample()
    else:
        action = torch.argmax(logits, dim=1).tolist()[0]

    # TRY NOT TO MODIFY: execute the game and log data.
    next_obs, reward, done, info = env.step(action)
    episode_reward += reward
    
    # TRY NOT TO MODIFY: record rewards for plotting purposes
    if 'episode' in info.keys():
        print(f"global_step={global_step}, episode_reward={info['episode']['r']}")
        writer.add_scalar("charts/episode_reward", info['episode']['r'], global_step)
        writer.add_scalar("charts/epsilon", epsilon, global_step)

    # TRY NOT TO MODIFY: CRUCIAL step easy to overlook 
    obs = next_obs
    if done:
        # important to note that because `EpisodicLifeEnv` wrapper is applied,