示例#1
0
def run_epoch(data, is_training, model, optimizer):
    '''
    Train model for one pass of train data, and return loss, acccuracy
    '''
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=20,
                                              shuffle=True,
                                              num_workers=4,
                                              drop_last=False)

    losses = []

    if is_training:
        model.train()
    else:
        model.eval()

    for batch in data_loader:
        pid_title = torch.unsqueeze(Variable(batch['pid_title']), 1)
        pid_body = torch.unsqueeze(Variable(batch['pid_body']), 1)
        rest_title = Variable(batch['rest_title'])
        rest_body = Variable(batch['rest_body'])

        pid_title_pad = torch.unsqueeze(Variable(batch['pid_title_pad']), 1)
        pid_body_pad = torch.unsqueeze(Variable(batch['pid_body_pad']), 1)
        rest_title_pad = Variable(batch['rest_title_pad'])
        rest_body_pad = Variable(batch['rest_body_pad'])

        pid_title, pid_body = pid_title.cuda(), pid_body.cuda()
        rest_title, rest_body = rest_title.cuda(), rest_body.cuda()
        pid_title_pad, pid_body_pad = pid_title_pad.cuda(), pid_body_pad.cuda()
        rest_title_pad, rest_body_pad = rest_title_pad.cuda(
        ), rest_body_pad.cuda()

        if is_training:
            optimizer.zero_grad()

        pt = model(pid_title)
        pb = model(pid_body)
        rt = model(rest_title)
        rb = model(rest_body)

        # we need to take the mean pooling taking into account the padding
        # tensors are of dim batch_size x samples x output_size x (len - kernel + 1)
        # pad tensors are of dim batch_size x samples x (len - kernel + 1)

        pid_title_pad_ex = torch.unsqueeze(pid_title_pad, 2).expand_as(pt)
        pid_body_pad_ex = torch.unsqueeze(pid_body_pad, 2).expand_as(pb)
        rest_title_pad_ex = torch.unsqueeze(rest_title_pad, 2).expand_as(rt)
        rest_body_pad_ex = torch.unsqueeze(rest_body_pad, 2).expand_as(rb)

        pt = torch.squeeze(torch.sum(pt * pid_title_pad_ex, dim=3), dim=3)
        pb = torch.squeeze(torch.sum(pb * pid_body_pad_ex, dim=3), dim=3)
        rt = torch.squeeze(torch.sum(rt * rest_title_pad_ex, dim=3), dim=3)
        rb = torch.squeeze(torch.sum(rb * rest_body_pad_ex, dim=3), dim=3)

        # tensors are not of dim batch_size x samples x output_size
        # need to scale down because not all uniformly padded

        ptp_norm = torch.sum(pid_title_pad, dim=2).clamp(min=1).expand_as(pt)
        pbp_norm = torch.sum(pid_body_pad, dim=2).clamp(min=1).expand_as(pb)
        rtp_norm = torch.sum(rest_title_pad, dim=2).clamp(min=1).expand_as(rt)
        rbp_norm = torch.sum(rest_body_pad, dim=2).clamp(min=1).expand_as(rb)

        pt = pt / ptp_norm
        pb = pb / pbp_norm
        rt = rt / rtp_norm
        rb = rb / rbp_norm

        pid_tensor = (pt + pb) / 2
        rest_tensor = (rt + rb) / 2

        if is_training:
            loss = loss_function(pid_tensor, rest_tensor)
            loss.backward()
            losses.append(loss.cpu().data[0])
            optimizer.step()
        else:
            expanded = pid_tensor.expand_as(rest_tensor)
            similarity = cs(expanded, rest_tensor, dim=2).squeeze(2)
            similarity = similarity.data.cpu().numpy()
            labels = batch['labels'].numpy()
            l = convert(similarity, labels)
            losses.extend(l)

    # Calculate epoch level scores
    if is_training:
        avg_loss = np.mean(losses)
        return avg_loss
    else:
        e = Evaluation(losses)
        MAP = e.MAP() * 100
        MRR = e.MRR() * 100
        P1 = e.Precision(1) * 100
        P5 = e.Precision(5) * 100
        return (MAP, MRR, P1, P5)
示例#2
0
def run_epoch(data, is_training, model, optimizer, transfer=False):
	
	# Make batches
	data_loader = torch.utils.data.DataLoader(
		data,
		batch_size=10,
		shuffle=True,
		num_workers=4,
		drop_last=False)

	losses = []
	actual = []
	expected = []

	if is_training:
		model.train()
	else:
		model.eval()
	
	for batch in data_loader:
		# Unpack training instances
		pid_title = torch.unsqueeze(Variable(batch['pid_title']), 1).cuda() # Size: batch_size x 1 x title_length=40
		pid_title_mask = torch.unsqueeze(Variable(batch['pid_title_mask']), 1).cuda() # Size: batch_size x 1 x title_length=40
		pid_body = torch.unsqueeze(Variable(batch['pid_body']), 1).cuda() # Size: batch_size x 1 x body_length=100
		pid_body_mask = torch.unsqueeze(Variable(batch['pid_body_mask']), 1).cuda() # Size: batch_size x 1 x body_length=100
		candidate_title = Variable(batch['candidate_titles']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40
		candidate_title_mask = Variable(batch['candidate_titles_mask']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40
		candidate_body = Variable(batch['candidate_body']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=100
		candidate_body_mask = Variable(batch['candidate_body_mask']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=40
		
		if is_training:
			optimizer.zero_grad()
		
		# Run text through model
		pid_title = model(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN)
		pid_body = model(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN)
		candidate_title = model(candidate_title) # batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN)
		candidate_body = model(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN)
		
		pid_title_mask = torch.unsqueeze(pid_title_mask, 2).expand_as(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN)
		pid_body_mask = torch.unsqueeze(pid_body_mask, 2).expand_as(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN)
		candidate_title_mask = torch.unsqueeze(candidate_title_mask, 2).expand_as(candidate_title)# batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN)
		candidate_body_mask = torch.unsqueeze(candidate_body_mask, 2).expand_as(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN)

		good_title = torch.sum(pid_title * pid_title_mask, 3) # batch_size x 1 x output_size=500
		good_body = torch.sum(pid_body * pid_body_mask, 3) # batch_size x 1 x output_size=500
		cand_titles = torch.sum(candidate_title * candidate_title_mask, 3) # batch_size x # candidates (21 in training) x output_size=500
		cand_bodies = torch.sum(candidate_body * candidate_body_mask, 3) # batch_size x # candidates (21 in training) x output_size=500
		
		good_tensor = (good_title + good_body)/2 # batch_size x 1 x output_size=500
		cand_tensor = (cand_titles + cand_bodies)/2 # batch_size x # candidates (21 in training) x output_size=500
		
		if is_training:
			l = loss(good_tensor, cand_tensor, 1.0)
			l.backward()
			losses.append(l.cpu().data[0])
			optimizer.step()
		else:
			similarity = cosine_sim(good_tensor.expand_as(cand_tensor), cand_tensor, dim=2)
			if transfer:
				similarity = torch.FloatTensor(similarity.data.cpu().numpy())
			else:
				similarity = similarity.data.cpu().numpy()
			if transfer:
				labels = batch['labels']
			else:
				labels = batch['labels'].numpy()
			def predict(sim, labels):
				predictions = []
				for i in range(sim.shape[0]):
					sorted_cand = (-sim[i]).argsort()
					predictions.append(labels[i][sorted_cand])
				return predictions
			if transfer:
				for sim in similarity:
					actual.append(sim)
				expected.extend(labels.view(-1))
			else:
				l = predict(similarity, labels)
				losses.extend(l)

	if is_training:
		avg_loss = np.mean(losses)
		return avg_loss
	else:
		if transfer:
			auc = AUCMeter()
			auc.reset()
			auc.add(torch.cat(actual), torch.LongTensor(expected))
			return auc.value(max_fpr=0.05)
		else:
			e = Evaluation(losses)
			MAP = e.MAP()*100
			MRR = e.MRR()*100
			P1 = e.Precision(1)*100
			P5 = e.Precision(5)*100
			return (MAP, MRR, P1, P5)