Exemplo n.º 1
0
def process(input_filename, gs_filename):
    dataset = []
    with open(input_filename) as f:
        reader = csv.reader(f)
        dataset = [row for row in reader]

    dataset = clean(dataset)
    #sampled_dataset = sample(dataset, 20)
    #print("Result of N_Method:",n_method(sampled_dataset, 5))
    #print("Result of P_Method:",p_method(sampled_dataset, 5))
    #print("Testing the n method against the p method:",kolgomorov2samples(n_method(sampled_dataset, 5),p_method(sampled_dataset, 5)))
    #print("Test Z between methods n and p:",testz(n_method(sampled_dataset, 5),p_method(sampled_dataset, 5)))

    gs = []
    with open(gs_filename) as f:
        reader = csv.reader(f)
        gs = [row for row in reader]

    y_true = correct(clean(gs), 3)

    output = []
    for crowd_size in range(20, 81):
        p, r, f = metrics(dataset, y_true, crowd_size)
        output.append([crowd_size, 'precision'] + hypothesis_tests(p).tolist())
        output.append([crowd_size, 'recall'] + hypothesis_tests(r).tolist())
        output.append([crowd_size, 'f_measure'] + hypothesis_tests(f).tolist())

    with open('output.csv', 'w') as f:
        writer = csv.writer(f, delimiter=';')
        for line in output:
            writer.writerow(line)
Exemplo n.º 2
0
count = 0
for epoch in range(FLAGS.epochs):
	model.train() # Enable dropout (if have).
	start_time = time.time()

	for idx, batch_data in enumerate(train_dataloader):
		#Assign the user and item on GPU later.
		user = batch_data['user'].long().cuda()
		item = batch_data['item'].long().cuda()
		label = batch_data['label'].float().cuda()

		model.zero_grad()
		prediction = model(user, item)
		loss = loss_function(prediction, label)
		loss.backward()
		# nn.utils.clip_grad_norm(model.parameters(), FLAGS.clip_norm)
		optimizer.step()

		writer.add_scalar('data/loss', loss.data.item(), count)
		count += 1

	model.eval() #Disable dropout (if have).
	HR, NDCG = evaluate.metrics(model, test_dataloader, FLAGS.top_k)

	elapsed_time = time.time() - start_time
	print("Epoch: %d" %epoch + " Epoch time: " + time.strftime(
					"%H: %M: %S", time.gmtime(elapsed_time)))
	print("Hit ratio is %.3f\tNdcg is %.3f" %(np.mean(HR), np.mean(NDCG)))

torch.save(model, 'm.pt')
Exemplo n.º 3
0
Arquivo: main.py Projeto: senze/rstudy
    train_loader.dataset.ng_sample()

    for user, item, label in train_loader:
        user = user.cuda()
        item = item.cuda()
        label = label.float().cuda()

        model.zero_grad()
        prediction = model(user, item)
        loss = loss_function(prediction, label)
        loss.backward()
        optimizer.step()
        count += 1

    model.eval()
    HR, NDCG = evaluate.metrics(model, test_loader, args.top_k)

    elapsed_time = time.time() - start_time
    print("The time elapse of epoch {:03d}".format(epoch) + " is: " +
          time.strftime("%H: %M: %S", time.gmtime(elapsed_time)))
    print("HR: {:.3f}\tNDCG: {:.3f}".format(np.mean(HR), np.mean(NDCG)))

    if HR > best_hr:
        best_hr, best_ndcg, best_epoch = HR, NDCG, epoch
        if args.out:
            if not os.path.exists(config.model_path):
                os.mkdir(config.model_path)
            torch.save(model, '{}{}.pth'.format(config.model_path,
                                                config.model))

print("End. Best epoch {:03d}: HR = {:.3f}, NDCG = {:.3f}".format(
Exemplo n.º 4
0
    for features, feature_values, label in train_loader:
        features = features.cuda()
        feature_values = feature_values.cuda()
        label = label.cuda()

        model.zero_grad()
        prediction = model(features, feature_values)
        loss = criterion(prediction, label)
        loss.backward()
        optimizer.step()
        # writer.add_scalar('data/loss', loss.item(), count)
        count += 1

    model.eval()
    train_result = evaluate.metric_rmse(model, train_loader)
    hr, ndcg, HR, NDCG = evaluate.metrics(model, test_loader)

    print("Runing Epoch {:03d} ".format(epoch) + "costs " +
          time.strftime("%H: %M: %S", time.gmtime(time.time() - start_time)))
    print("Train_RMSE: {:.4f}, Test_hr: {:.4f}, Test_ndcg: {:.4f}".format(
        train_result, hr, ndcg))

    if hr > best_hr:
        best_hr, best_ndcg, best_epoch = hr, ndcg, epoch
        if args.out:
            if not os.path.exists(config.model_path):
                os.mkdir(config.model_path)
            torch.save(model, '{}{}.pth'.format(config.model_path,
                                                config.model))
            np.save('./hr_ttest.npy', np.array(HR))
            np.save('./ndcg_ttest.npy', np.array(NDCG))
Exemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--embed_size",
                        type=int,
                        default=32,
                        help="the final embedding size")
    parser.add_argument("--lr",
                        type=float,
                        default=0.001,
                        help="the learning rate for optimization method")
    parser.add_argument("--dropout",
                        type=float,
                        default=0.5,
                        help="the dropout rate")
    parser.add_argument("--neg_number",
                        type=int,
                        default=5,
                        help="negative numbers for training the triplet model")
    parser.add_argument("--batch_size",
                        type=int,
                        default=512,
                        help="batch size for training")
    parser.add_argument("--top_k",
                        type=int,
                        default=20,
                        help="topk rank items for evaluating")
    parser.add_argument("--is_output",
                        action='store_true',
                        default=False,
                        help="output the result for rank test")
    parser.add_argument("--mode",
                        type=str,
                        default='double',
                        help="the model mode")
    parser.add_argument("--gpu",
                        type=str,
                        default='0',
                        help="choose the gpu card number.")
    FLAGS = parser.parse_args()

    writer = SummaryWriter()  # for visualization

    opt_gpu = FLAGS.gpu
    os.environ["CUDA_VISIBLE_DEVICES"] = opt_gpu
    cudnn.benchmark = True

    ############################# PREPARE DATASET ##########################
    data_train = TranSearchData(FLAGS.neg_number, is_training=True)
    data_test = TranSearchData(FLAGS.neg_number, is_training=False)
    print("Sampling negative items for each positive pairs......\n")
    data_train.sample_neg()
    dataloader_train = DataLoader(data_train,
                                  batch_size=FLAGS.batch_size,
                                  shuffle=True,
                                  num_workers=4)
    data_test.sample_neg()
    dataloader_test = DataLoader(data_test, shuffle=False, batch_size=1)

    ####################### LOAD PRE-TRAIN WEIGHTS ##########################
    if os.path.exists(config.image_weights_path) and FLAGS.mode == 'double':
        visual_FC = torch.load(config.image_weights_path)
        # remove the dropout layer
        modules = list(visual_FC.children())[:2] + list(
            visual_FC.children())[3:]
        visual_FC = nn.Sequential(*modules)
        visual_FC.requires_grad = False
        textual_FC = torch.load(config.text_weights_path)
        modules = list(textual_FC.children())[:2] + list(
            textual_FC.children())[3:]
        textual_FC = nn.Sequential(*modules)
        textual_FC.requires_grad = False
    else:
        visual_FC = None
        textual_FC = None

    ############################## CREATE MODEL ###########################
    full_data = pd.read_csv(config.full_path, usecols=['userID'])
    user_size = len(full_data.userID.unique())

    # create model
    model = TranSearch(visual_FC,
                       textual_FC,
                       config.visual_size,
                       config.textual_size,
                       FLAGS.embed_size,
                       user_size,
                       FLAGS.mode,
                       FLAGS.dropout,
                       is_training=True)
    model.cuda()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=FLAGS.lr,
                                 weight_decay=0.0001)

    best_mrr, best_hit, best_ndcg = 0.0, 0.0, 0.0
    best_epoch = 0
    print("Start training......\n")
    for epoch in range(20):
        model.is_training = True
        model.train()
        start_time = time.time()

        for idx, batch_data in enumerate(dataloader_train):
            user = batch_data['userID'].cuda()
            query = batch_data['query'].cuda()
            pos_vis = batch_data['pos_vis'].cuda()
            pos_text = batch_data['pos_text'].cuda()
            neg_vis = batch_data['neg_vis'].cuda()
            neg_text = batch_data['neg_text'].cuda()

            model.zero_grad()
            item_predict, pos_item, neg_items = model(user, query, pos_vis,
                                                      pos_text, neg_vis,
                                                      neg_text, False)
            loss = TripletLoss(item_predict, pos_item, neg_items)

            loss.backward()
            optimizer.step()

            writer.add_scalar('data/endtoend_loss', loss.data.item(),
                              epoch * len(dataloader_train) + idx)

        # start testing
        model.eval()
        model.is_training = False
        Mrr, Hr, Ndcg = evaluate.metrics(model, data_test, dataloader_test,
                                         FLAGS.top_k, FLAGS.is_output, epoch)

        elapsed_time = time.time() - start_time
        print("Epoch: {:d}\t".format(epoch) + "Epoch time: " +
              time.strftime("%H: %M: %S", time.gmtime(elapsed_time)))
        print("Mrr is {:.3f}.\tHit ratio is {:.3f}.\tNdcg is {:.3f}.".format(
            Mrr, Hr, Ndcg))
        if Mrr > best_mrr:
            best_mrr = Mrr
            best_hit = Hr
            best_ndcg = Ndcg
            best_epoch = epoch

    print("\nThe best epoch is on {}".format(best_epoch), end=': ')
    print("Mrr is {:.3f}.\tHit ratio is {:.3f}.\tNdcg is {:.3f}.".format(
        best_mrr, best_hit, best_ndcg))
Exemplo n.º 6
0
	for features, feature_values, label in train_loader:
		features = features.cuda()
		feature_values = feature_values.cuda()
		label = label.cuda()

		model.zero_grad()
		prediction = model(features, feature_values)
		loss = criterion(prediction, label) 
		loss += args.lamda * model.embeddings.weight.norm()
		loss.backward()
		optimizer.step()
		# writer.add_scalar('data/loss', loss.item(), count)
		count += 1

	model.eval()
	train_result = evaluate.metrics(model, train_loader)
	valid_result = evaluate.metrics(model, valid_loader)
	test_result = evaluate.metrics(model, test_loader)

	print("Runing Epoch {:03d} ".format(epoch) + "costs " + time.strftime(
						"%H: %M: %S", time.gmtime(time.time()-start_time)))
	print("Train_RMSE: {:.3f}, Valid_RMSE: {:.3f}, Test_RMSE: {:.3f}".format(
						train_result, valid_result, test_result))

	if test_result < best_rmse:
		best_rmse, best_epoch = test_result, epoch
		if args.out:
			if not os.path.exists(config.model_path):
				os.mkdir(config.model_path)
			torch.save(model, 
				'{}{}.pth'.format(config.model_path, config.model))
Exemplo n.º 7
0
def main():
	parser = argparse.ArgumentParser()

	parser.add_argument("--dataset", default='MenClothing', type=str,
				help="choose dataset to process.")
	parser.add_argument("--embed_size", default=32, type=int,
				help="the final embedding size.")
	parser.add_argument("--lr", default=0.001, type=float,
				help="the learning rate for optimization method.")
	parser.add_argument("--dropout", default=0.5, type=float,
				help="the dropout rate.")
	parser.add_argument("--neg_number", default=5, type=int,
				help="negative numbers for training the triplet model.")
	parser.add_argument("--batch_size", default=512, type=int,
				help="batch size for training.")
	parser.add_argument("--top_k", default=20, type=int,
				help="topk rank items for evaluating.")
	parser.add_argument("--is_output", default=False, type=bool,
				help="output the result for rank test.")
	parser.add_argument("--mode", default='double', type=str,
				help="the model mode.")
	parser.add_argument("--gpu", default='0', type=str,
				help="choose the gpu card number.")

	FLAGS = parser.parse_args()

	writer = SummaryWriter() #For visualization

	opt_gpu = FLAGS.gpu
	os.environ["CUDA_VISIBLE_DEVICES"] = opt_gpu

	############################# PREPARE DATASET ##########################

	data_train = TranSearchData(
				FLAGS.dataset, 'train.csv', is_training=True)
	data_test  = TranSearchData(
				FLAGS.dataset, 'test.csv', is_training=False)
	print("Sampling negative items for each positive pairs......\n")
	data_train.sample_neg(FLAGS.neg_number)
	dataloader_train = DataLoader(data_train, 
			batch_size=FLAGS.batch_size, shuffle=True, num_workers=4)
	data_test.sample_neg(0)
	dataloader_test = DataLoader(data_test, shuffle=False, batch_size=1)

	####################### LOAD PRE-TRAIN WEIGHTS ##########################

	visual_FC = torch.load('./Variable/visual_FC.pt')
	#First remove the dropout layer.
	modules = list(visual_FC.children())[:2] + list(visual_FC.children())[3:]
	visual_FC = nn.Sequential(*modules)
	visual_FC.requires_grad=False
	textual_FC = torch.load('./Variable/textual_FC.pt')
	modules = list(textual_FC.children())[:2] + list(textual_FC.children())[3:]
	textual_FC = nn.Sequential(*modules)
	textual_FC.requires_grad=False

	############################## CREATE MODEL ###########################

	full_data = pd.read_csv(os.path.join(ROOT_DIR, 
					FLAGS.dataset, 'full.csv'), usecols=['userID'])
	user_size = len(full_data.userID.unique())

	# Create model.
	model = TranSearch(visual_FC, textual_FC, 4096, 512, FLAGS.embed_size,
					user_size, FLAGS.mode, FLAGS.dropout, is_training=True)
	model.cuda()
	# optimizer = torch.optim.SGD(
	# 					 model.parameters(), momentum=0.9, lr=0.01)
	#scheduler = ReduceLROnPlateau(optimizer, min_lr=1e-08, patience=30)
	optimizer = torch.optim.Adam(
				model.parameters(), lr=FLAGS.lr, weight_decay=0.0001)

	print("Start training......\n")
	for epoch in range(20):
		model.is_training = True
		model.train() 
		start_time = time.time()

		for idx, batch_data in enumerate(dataloader_train):
			user = batch_data['userID'].cuda()
			query = batch_data['query'].cuda()
			pos_vis = batch_data['pos_vis'].cuda()
			pos_text = batch_data['pos_text'].cuda()
			neg_vis = batch_data['neg_vis'].cuda()
			neg_text = batch_data['neg_text'].cuda()

			model.zero_grad()

			item_predict, pos_item, neg_items = model(user, query,
						pos_vis, pos_text, neg_vis, neg_text, False)
			loss = TripletLoss(item_predict, pos_item, neg_items)

			loss.backward()
			optimizer.step()
			# scheduler.step(loss.data[0])

			writer.add_scalar('data/endtoend_loss', loss.data.item(),
			                                epoch*len(dataloader_train)+idx)

		print("Epoch %d training is done!\n" %epoch)

		# Start testing
		model.eval() 
		model.is_training = False
		Mrr, Hr, Ndcg = evaluate.metrics(model, data_test,
					dataloader_test, FLAGS.top_k, FLAGS.is_output, epoch)
			
		elapsed_time = time.time() - start_time
		print("Epoch: %d\t" %epoch + "Epoch time: " + time.strftime(
							"%H: %M: %S", time.gmtime(elapsed_time)))
		print("Mrr is %.3f.\nHit ratio is %.3f.\nNdcg is %.3f.\n" %(
																Mrr, Hr, Ndcg))
Exemplo n.º 8
0
                loss = criterion(pred, pos, neg)
                loss.backward()

                local_optimizer.step()

            # ---------Global Update---------
            model.zero_grad()
            model.set_global()
            for i in range(len(query_item_reviews_words)):
                # ---------Construct Batch---------

                pred, pos, neg = model(
                    user_reviews_words, user_reviews_lengths,
                    query_item_reviews_words[i], query_item_reviews_lengths[i],
                    query_queries[i], 'train', query_negative_reviews_words[i],
                    query_negative_reviews_lengths[i])
                loss = criterion(pred, pos, neg)
                loss.backward()
                global_optimizer.step()

        Mrr, Hr, Ndcg = metrics(model, test_dataset, test_loader, 20,
                                local_optimizer, criterion)
        print(
            "Running Epoch {:03d}/{:03d}".format(epoch + 1, config.epochs),
            "loss:{:.3f}".format(float(loss)),
            "Mrr {:.3f}, Hr {:.3f}, Ndcg {:.3f}".format(Mrr, Hr,
                                                        Ndcg), "costs:",
            time.strftime("%H: %M: %S", time.gmtime(time.time() - start_time)))

    print(model.local_parameters)
Exemplo n.º 9
0
            network.zero_grad()
            prediction = network(user, item)
            loss = loss_function(prediction, label)
            loss.backward()
            optimizer.step()
            count += 1
            gain = network.gain
            batchsize = train_loader.current_batch_size
            accumulation_steps = train_loader.accumulation_steps

        train_loader.to_tensorboard(writer, epoch, tag_prefix="AdaptDL/Data/")
        network.to_tensorboard(writer, epoch, tag_prefix="AdaptDL/Model/")

        network.eval()
        stats = adl.Accumulator()
        HR, NDCG = evaluate.metrics(network, test_loader, args.top_k)
        stats['HR'] += HR
        stats['replicas'] += 1.0
        with stats.synchronized():
            writer.add_scalar('Loss/HR', stats['HR'] / stats['replicas'],
                              epoch)

        elapsed_time = time.time() - start_time
        print("The time elapse of epoch {:03d}".format(epoch) + " is: " +
              time.strftime("%H: %M: %S", time.gmtime(elapsed_time)))
        print("HR: {:.3f}\tNDCG: {:.3f}".format(np.mean(HR), np.mean(NDCG)))

        if HR > best_hr:
            best_hr, best_ndcg, best_epoch = HR, NDCG, epoch
            if args.out and adaptdl.env.replica_rank() == 0:
                if not os.path.exists(model_path):