def main(): args = get_args() data_path = os.path.join(args.iobasedir, 'processed/downloads', args.data_set) log_path = os.path.join(args.iobasedir, 'logs') log_file = os.path.join(args.iobasedir, 'logs', 'UB.log') mkdirp(log_path) set_logger(log_file) for filename in os.listdir(data_path): data_file = os.path.join(data_path, filename) topic = filename[:-5] docs, refs = load_data(data_file) if not refs: continue if not args.summary_size: summary_size = len(' '.join(refs[0]).split(' ')) else: summary_size = int(args.summary_size) logger.info('Topic ID: %s ', topic) logger.info('###') logger.info('Summmary_len: %d', summary_size) algos = ['UB1', 'UB2'] for algo in algos: get_summary_scores(algo, docs, refs, summary_size, language, rouge) logger.info('###')
def main(): args = get_args() rouge_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'rouge/RELEASE-1.5.5/') data_path = os.path.join(args.iobasedir, args.data_setpath) log_path = os.path.join(args.iobasedir, 'logs') log_file = os.path.join( args.iobasedir, 'logs', 'baselines_%s_%s.log' % (args.data_set, args.summary_size)) mkdirp(log_path) set_logger(log_file) for filename in os.listdir(data_path): data_file = os.path.join(data_path, filename) topic = filename[:-5] try: docs, refs = load_data(data_file) except: pass if not refs: continue if not args.summary_size: summary_size = len(" ".join(refs[0]).split(' ')) else: summary_size = int(args.summary_size) logger.info('Topic ID: %s', topic) logger.info('###') logger.info('Summmary_len: %d', summary_size) rouge = Rouge(rouge_dir) algos = ['UB1', 'UB2', 'ICSI', 'Luhn', 'LexRank', 'LSA', 'KL'] for algo in algos: get_summary_scores(algo, docs, refs, summary_size, args.language, rouge) rouge._cleanup() logger.info('###')
def test(): # Load data logger.info("✔︎ Loading data...") logger.info("✔︎ Training data processing...") train_data = dh.load_data(Config().TRAININGSET_DIR) logger.info("✔︎ Test data processing...") test_data = dh.load_data(Config().TESTSET_DIR) logger.info("✔︎ Load negative sample...") with open(Config().NEG_SAMPLES, 'rb') as handle: neg_samples = pickle.load(handle) # Load model dr_model = torch.load(MODEL_DIR) dr_model.eval() item_embedding = dr_model.encode.weight hidden = dr_model.init_hidden(Config().batch_size) hitratio_numer = 0 hitratio_denom = 0 ndcg = 0.0 for i, x in enumerate( dh.batch_iter(train_data, Config().batch_size, Config().seq_len, shuffle=False)): uids, baskets, lens = x dynamic_user, _ = dr_model(baskets, lens, hidden) for uid, l, du in zip(uids, lens, dynamic_user): scores = [] du_latest = du[l - 1].unsqueeze(0) # calculating <u,p> score for all test items <u,p> pair positives = test_data[test_data['userID'] == uid].baskets.values[ 0] # list dim 1 p_length = len(positives) positives = torch.LongTensor(positives) # Deal with positives samples scores_pos = list( torch.mm(du_latest, item_embedding[positives].t()).data.numpy()[0]) for s in scores_pos: scores.append(s) # Deal with negative samples negtives = random.sample(list(neg_samples[uid]), Config().neg_num) negtives = torch.LongTensor(negtives) scores_neg = list( torch.mm(du_latest, item_embedding[negtives].t()).data.numpy()[0]) for s in scores_neg: scores.append(s) # Calculate hit-ratio index_k = [] for k in range(Config().top_k): index = scores.index(max(scores)) index_k.append(index) scores[index] = -9999 hitratio_numer += len((set(np.arange(0, p_length)) & set(index_k))) hitratio_denom += p_length # Calculate NDCG u_dcg = 0 u_idcg = 0 for k in range(Config().top_k): if index_k[k] < p_length: # 长度 p_length 内的为正样本 u_dcg += 1 / math.log(k + 1 + 1, 2) u_idcg += 1 / math.log(k + 1 + 1, 2) ndcg += u_dcg / u_idcg hitratio = hitratio_numer / hitratio_denom ndcg = ndcg / len(train_data) print('Hit ratio[{0}]: {1}'.format(Config().top_k, hitratio)) print('NDCG[{0}]: {1}'.format(Config().top_k, ndcg))
def train(): # Load data logger.info("✔︎ Loading data...") logger.info("✔︎ Training data processing...") train_data = dh.load_data(Config().TRAININGSET_DIR) logger.info("✔︎ Validation data processing...") validation_data = dh.load_data(Config().VALIDATIONSET_DIR) logger.info("✔︎ Test data processing...") test_data = dh.load_data(Config().TESTSET_DIR) logger.info("✔︎ Load negative sample...") with open(Config().NEG_SAMPLES, 'rb') as handle: neg_samples = pickle.load(handle) # Model config model = DRModel(Config()) # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=Config().learning_rate) def bpr_loss(uids, baskets, dynamic_user, item_embedding): """ Bayesian personalized ranking loss for implicit feedback. For an intro on BPR: https://towardsdatascience.com/recommender-system-using-bayesian-personalized-ranking-d30e98bba0b9 Args: uids: batch of users' ID baskets: batch of users' baskets dynamic_user: batch of users' dynamic representations item_embedding: item_embedding matrix """ loss = 0 for uid, bks, du in zip(uids, baskets, dynamic_user): du_p_product = torch.mm( du, item_embedding.t()) # shape: [pad_len, num_item] loss_u = [] # loss for user for t, basket_t in enumerate(bks): if basket_t[0] != 0 and t != 0: # wht skipping the first item?? pos_idx = torch.LongTensor(basket_t) # Sample negative products neg = random.sample(list(neg_samples[uid]), len(basket_t)) neg_idx = torch.LongTensor(neg) # Score p(u, t, v > v') score = du_p_product[t - 1][pos_idx] - du_p_product[t - 1][neg_idx] # Average Negative log likelihood for basket_t loss_u.append(torch.mean(-torch.nn.LogSigmoid()(score))) for i in loss_u: loss = loss + i / len(loss_u) avg_loss = torch.div(loss, len(baskets)) return avg_loss def train_model(): model.train() # turn on training mode for dropout dr_hidden = model.init_hidden(Config().batch_size) train_loss = 0 start_time = time.time() num_batches = ceil(len(train_data) / Config().batch_size) for i, x in enumerate( dh.batch_iter(train_data, Config().batch_size, Config().seq_len, shuffle=True)): # baskets are padded to seq_length = 12, with [0] # lens is a list of length corresponding to how many real baskets are uids, baskets, lens = x model.zero_grad() # 如果不置零,Variable 的梯度在每次 backward 的时候都会累加 dynamic_user, _ = model(baskets, lens, dr_hidden) loss = bpr_loss(uids, baskets, dynamic_user, model.encode.weight) loss.backward() # Clip to avoid gradient exploding torch.nn.utils.clip_grad_norm_(model.parameters(), Config().clip) # Parameter updating optimizer.step() train_loss += loss.data # Logging if i % Config().log_interval == 0 and i > 0: elapsed = (time.time() - start_time) / Config().log_interval cur_loss = train_loss.item() / Config( ).log_interval # turn tensor into float train_loss = 0 start_time = time.time() logger.info( '[Training]| Epochs {:3d} | Batch {:5d} / {:5d} | ms/batch {:02.2f} | Loss {:05.4f} |' .format(epoch, i, num_batches, elapsed, cur_loss)) def validate_model(): model.eval() dr_hidden = model.init_hidden(Config().batch_size) val_loss = 0 start_time = time.time() num_batches = ceil(len(validation_data) / Config().batch_size) for i, x in enumerate( dh.batch_iter(validation_data, Config().batch_size, Config().seq_len, shuffle=False)): uids, baskets, lens = x dynamic_user, _ = model(baskets, lens, dr_hidden) loss = bpr_loss(uids, baskets, dynamic_user, model.encode.weight) val_loss += loss.data # Logging elapsed = (time.time() - start_time) * 1000 / num_batches val_loss = val_loss.item() / num_batches logger.info( '[Validation]| Epochs {:3d} | Elapsed {:02.2f} | Loss {:05.4f} |'. format(epoch, elapsed, val_loss)) return val_loss def test_model(): model.eval() item_embedding = model.encode.weight dr_hidden = model.init_hidden(Config().batch_size) hitratio_numer = 0 hitratio_denom = 0 ndcg = 0.0 for i, x in enumerate( dh.batch_iter(train_data, Config().batch_size, Config().seq_len, shuffle=False)): uids, baskets, lens = x dynamic_user, _ = model(baskets, lens, dr_hidden) for uid, l, du in zip(uids, lens, dynamic_user): scores = [] # we use the last output as user representation du_latest = du[l - 1].unsqueeze(0) # calculating <u,p> score for all test items <u,p> pair positives = test_data[test_data['userID'] == uid].baskets.values[0] # list dim 1 p_length = len(positives) positives = torch.LongTensor(positives) # Deal with positives samples scores_pos = list( torch.mm(du_latest, item_embedding[positives].t()).data.numpy()[0]) for s in scores_pos: scores.append(s) # Deal with negative samples negtives = random.sample(list(neg_samples[uid]), Config().neg_num) negtives = torch.LongTensor(negtives) scores_neg = list( torch.mm(du_latest, item_embedding[negtives].t()).data.numpy()[0]) for s in scores_neg: scores.append(s) # Calculate hit-ratio index_k = [] for k in range(Config().top_k): index = scores.index(max(scores)) index_k.append(index) scores[index] = -9999 hitratio_numer += len( (set(np.arange(0, p_length)) & set(index_k))) hitratio_denom += p_length # Calculate NDCG u_dcg = 0 u_idcg = 0 for k in range(Config().top_k): if index_k[k] < p_length: # 长度 p_length 内的为正样本 u_dcg += 1 / math.log(k + 1 + 1, 2) u_idcg += 1 / math.log(k + 1 + 1, 2) ndcg += u_dcg / u_idcg hit_ratio = hitratio_numer / hitratio_denom ndcg = ndcg / len(train_data) logger.info( '[Test]| Epochs {:3d} | Hit ratio {:02.4f} | NDCG {:05.4f} |'. format(epoch, hit_ratio, ndcg)) return hit_ratio, ndcg timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) if not os.path.exists(out_dir): os.makedirs(out_dir) logger.info('Save into {0}'.format(out_dir)) checkpoint_dir = out_dir + '/model-{epoch:02d}-{hitratio:.4f}-{ndcg:.4f}.model' best_hit_ratio = None try: # Training for epoch in range(Config().epochs): train_model() logger.info('-' * 89) val_loss = validate_model() logger.info('-' * 89) hit_ratio, ndcg = test_model() logger.info('-' * 89) # Checkpoint if not best_hit_ratio or hit_ratio > best_hit_ratio: with open( checkpoint_dir.format(epoch=epoch, hitratio=hit_ratio, ndcg=ndcg), 'wb') as f: torch.save(model, f) best_hit_ratio = hit_ratio except KeyboardInterrupt: logger.info('*' * 89) logger.info('Early Stopping!')
if __name__ == "__main__": # LOAD RAW DATA $ WORD VECTORS EVAL_DATASET = '../../dataset/PMtask_TestSet.xml' MODE = "eval" WV_PATH = '../../embeddings/PubMed-w2v.txt' WV_DIMS = 200 MAX_SENT_LENGTH = 45 MAX_SENTS = 23 print("loading word embeddings...") word2idx, idx2word, embeddings = load_word_vectors(WV_PATH, WV_DIMS, True) docs, labels, ids = load_data(EVAL_DATASET, MODE) # convert strings to lists of tokens print("Tokenizing...") docs = [[text_to_word_sequence(sent) for sent in sent_tokenize(doc)] for doc in docs] # convert words to word indexes print("Vectorizing...") docs = [vectorize_doc(doc, word2idx, MAX_SENTS, MAX_SENT_LENGTH) for doc in docs] docs = numpy.array(docs) # LOAD SAVED MODEL print("Loading model from disk...", end=" ") model_name = "../experiments/task1_hGRU_2017-10-14 17:25:22.hdf5"
def test(saved_file): # Load data logger.info("✔︎ Loading data...") logger.info("✔︎ Training data processing...") train_data = dh.load_data(Config().TRAININGSET_DIR) logger.info("✔︎ Test data processing...") test_data = dh.load_data(Config().TESTSET_DIR) logger.info("✔︎ Load negative sample...") # with open(Config().NEG_SAMPLES, 'rb') as handle: # neg_samples = pickle.load(handle) neg_samples = {} item_list = [i for i in range(336)] # Load model MODEL_DIR = dh.load_model_file(saved_file) dr_model = torch.load(MODEL_DIR) dr_model.eval() item_embedding = dr_model.encode.weight hidden = dr_model.init_hidden(Config().batch_size) hitratio_numer = 0 hitratio_denom = 0 hitratio_numer = 0 hitratio_denom = 0 hitratio_numer_10 = 0 hitratio_numer_5 = 0 ndcg = 0.0 ndcg_denom = 0 hitratio_list_5 = [] hitratio_list_10 = [] ndcg_list = [] for i, x in enumerate( tqdm( dh.batch_iter(train_data, Config().batch_size, Config().seq_len_test, shuffle=False))): uids, baskets, lens = x dynamic_user, _ = dr_model(baskets, lens, hidden) for uid, l, du in zip(uids, lens, dynamic_user): scores = [] du_latest = du[l - 1].unsqueeze(0) # Deal with positives samples positives = test_data[test_data['userID'] == uid].baskets.values[0][:-1] # list dim 1 p_length = len(positives) positives = torch.LongTensor(positives) print("positives: ", positives) # calculating <u,p> score for all test items <u,p> pair scores_pos = list( torch.mm(du_latest, item_embedding[positives].t()).data.cpu().numpy()[0]) for s in scores_pos: scores.append(s) print("score_pos: ", score_pos) # Deal with negative samples neg_item_list = list(set(item_list).difference(set(positives))) negtives = random.sample(neg_item_list, Config().neg_num) negtives = torch.LongTensor(negtives) scores_neg = list( torch.mm(du_latest, item_embedding[negtives].t()).data.cpu().numpy()[0]) for s in scores_neg: scores.append(s) print("scores: ", scores) # Calculate hit-ratio index_k = [] for k in range(Config().top_k): index = scores.index(max(scores)) index_k.append(index) scores[index] = -9999 print("index_k: ", index_k) hr_5_numer = len((set(np.arange(0, p_length)) & set(index_k[0:5]))) hr_10_numer = len((set(np.arange(0, p_length)) & set(index_k))) hitratio_numer_10 += hr_10_numer # np.arange()产生等差数列 hitratio_numer_5 += hr_5_numer hitratio_denom += p_length hitratio_list_5.append(hr_5_numer / p_length) hitratio_list_10.append(hr_10_numer / p_length) # print("hitratio_list_5: ", hitratio_list_5) # print("hitratio_list_10: ", hitratio_list_10) # hitratio_numer += len((set(np.arange(0, p_length)) & set(index_k))) # hitratio_denom += p_length # Calculate NDCG u_dcg = 0 u_idcg = 0 for k in range(Config().top_k): if index_k[k] < p_length: # 长度 p_length 内的为正样本 u_dcg += 1 / math.log(k + 1 + 1, 2) u_idcg += 1 / math.log(k + 1 + 1, 2) ndcg += u_dcg / u_idcg ndcg_denom += 1 ndcg_list.append(u_dcg / u_idcg) # print("ndcg_list: ", ndcg_list) hit_ratio_5 = hitratio_numer_5 / hitratio_denom hit_ratio_10 = hitratio_numer_10 / hitratio_denom ndcg = ndcg / ndcg_denom print('Hit ratio@5: {1} | Hit ratio@10: {1}'.format( hit_ratio_5, hit_ratio_10)) print('NDCG[{0}]: {1}'.format(Config().top_k, ndcg)) return hitratio_list_5, hitratio_list_10, ndcg_list
CORPUS = 'dataset/PMtask_Triage_TrainingSet.xml' WV_PATH = 'embeddings/PubMed-w2v.txt' WV_DIMS = 200 PERSIST = True # if True, then save the model to disk #################### MAX_SENT_LENGTH = 45 MAX_SENTS = 23 ############################################## # Prepare Data ############################################## print("loading word embeddings...") word2idx, idx2word, embeddings = load_word_vectors(WV_PATH, WV_DIMS, True) print("loading data...") docs, labels, ids = load_data(CORPUS, mode=MODE) # word_vectors = load_word_vectors(args.embeddings) # convert strings to lists of tokens print("Tokenizing...") docs = [[text_to_word_sequence(sent) for sent in sent_tokenize(doc)] for doc in docs] # convert words to word indexes print("Vectorizing...") docs = [ vectorize_doc(doc, word2idx, MAX_SENTS, MAX_SENT_LENGTH) for doc in docs ] docs = numpy.array(docs) if MODE == "train":
if __name__ == '__main__': out_file = open(args.out, 'w') algos = ['UB1', 'UB2', 'LexRank', 'TextRank', 'Luhn', 'ICSI'] R1 = {'UB1': .0, 'UB2': .0, 'ICSI': .0, 'LSA': .0, 'KL': .0, 'Luhn': .0, 'LexRank': .0, 'TextRank': .0} R2 = {'UB1': .0, 'UB2': .0, 'ICSI': .0, 'LSA': .0, 'KL': .0, 'Luhn': .0, 'LexRank': .0, 'TextRank': .0} Rl = {'UB1': .0, 'UB2': .0, 'ICSI': .0, 'LSA': .0, 'KL': .0, 'Luhn': .0, 'LexRank': .0, 'TextRank': .0} Rsu = {'UB1': .0, 'UB2': .0, 'ICSI': .0, 'LSA': .0, 'KL': .0, 'Luhn': .0, 'LexRank': .0, 'TextRank': .0} blog_sum = .0 for t in types: cur_path = args.path + '/' + t + '/' file_names = os.listdir(cur_path) blog_sum += len(file_names) for filename in tqdm(file_names): data_file = os.path.join(cur_path, filename) docs, refs = load_data(data_file) sum_len = len(' '.join(refs[0]).split(' ')) * args.sum_len print('####', filename, '####') out_file.write(filename + '\n') for algo in algos: r1, r2, rl, rsu = get_summary_scores(algo, docs, refs, sum_len) print algo, r1, r2, rl, rsu out_file.write(algo + ' ' + str(r1) + ' ' + str(r2) + ' ' + str(rl) + ' ' + str(rsu) + '\n') R1[algo] += r1 R2[algo] += r2 Rl[algo] += rl Rsu[algo] += rsu out_file.close() print('Final Results') for algo in algos: R1[algo] /= blog_sum
def train(): # Load data logger.info("✔︎ Loading data...") logger.info("✔︎ Training data processing...") train_data = dh.load_data(Config().TRAININGSET_DIR) logger.info("✔︎ Validation data processing...") validation_data = dh.load_data(Config().VALIDATIONSET_DIR) logger.info("✔︎ Test data processing...") test_data = dh.load_data(Config().TESTSET_DIR) logger.info("✔︎ Load negative sample...") # with open(Config().NEG_SAMPLES, 'rb') as handle: # neg_samples = pickle.load(handle) neg_samples = {} if torch.cuda.is_available(): model = FVModel(Config()).cuda() else: model = FVModel(Config()) # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=Config().learning_rate) def bpr_loss(uids, baskets, dynamic_user, item_list, item_embedding): """ Bayesian personalized ranking loss for implicit feedback. Args: uids: batch of users' ID baskets: batch of users' baskets, baskets = daily food categories of users' dynamic_user: batch of users' dynamic representations item_embedding: item_embedding matrix """ loss = 0 for uid, bks, du in zip(uids, baskets, dynamic_user): du_p_product = torch.mm( du, item_embedding.t()) # shape: [pad_len, num_item] loss_u = [] # loss for user for t, basket_t in enumerate((bks)): if basket_t[0] != 0 and t != 0: basket_t = basket_t[: -1] # do not consoder the last number, which is weight changes indicator pos_idx = torch.LongTensor(basket_t) # Sample negative products neg_item_list = list( set(item_list).difference(set(basket_t))) neg = random.sample(neg_item_list, len(basket_t)) neg_idx = torch.LongTensor(neg) # Score p(u, t, v > v') score = du_p_product[t - 1][pos_idx] - du_p_product[t - 1][neg_idx] # Average Negative log likelihood for basket_t loss_u.append(torch.mean(-torch.nn.LogSigmoid()(score))) for i in loss_u: loss = loss + i / len(loss_u) # avg_loss = torch.true_divide(loss, len(baskets)) avg_loss = torch.div(loss, len(baskets)) return avg_loss def train_model(): model.train() # turn on training mode for dropout dr_hidden = model.init_hidden(Config().batch_size) train_loss = 0 start_time = time.clock() #start_time = time.perf_counter num_batches = ceil(len(train_data) / Config().batch_size) for i, x in enumerate( tqdm( dh.batch_iter(train_data, Config().batch_size, Config().seq_len_train, shuffle=True))): uids, baskets, lens = x model.zero_grad() # 如果不置零,Variable 的梯度在每次 backward 的时候都会累加 dynamic_user, _ = model(baskets, lens, dr_hidden) loss = bpr_loss(uids, baskets, dynamic_user, item_list, model.encode.weight) loss.backward() # Clip to avoid gradient exploding torch.nn.utils.clip_grad_norm_(model.parameters(), Config().clip) # Parameter updating optimizer.step() train_loss += loss.data # Logging if i % Config().log_interval == 0 and i > 0: elapsed = (time.clock() - start_time) / Config().log_interval cur_loss = train_loss.item() / Config( ).log_interval # turn tensor into float train_loss = 0 start_time = time.clock() logger.info( '[Training]| Epochs {:3d} | Batch {:5d} / {:5d} | ms/batch {:02.2f} | Loss {:05.4f} |' .format(epoch, i, num_batches, elapsed, cur_loss)) def validate_model(): model.eval() dr_hidden = model.init_hidden(Config().batch_size) val_loss = 0 start_time = time.clock() num_batches = ceil(len(validation_data) / Config().batch_size) for i, x in enumerate( tqdm( dh.batch_iter(validation_data, Config().batch_size, Config().seq_len_valid, shuffle=False))): uids, baskets, lens = x dynamic_user, _ = model(baskets, lens, dr_hidden) loss = bpr_loss(uids, baskets, dynamic_user, item_list, model.encode.weight) val_loss += loss.data # Logging elapsed = (time.clock() - start_time) * 1000 / num_batches val_loss = val_loss.item() / num_batches logger.info( '[Validation]| Epochs {:3d} | Elapsed {:02.2f} | Loss {:05.4f} |'. format(epoch, elapsed, val_loss)) return val_loss def test_model(): model.eval() item_embedding = model.encode.weight dr_hidden = model.init_hidden(Config().batch_size) hitratio_numer = 0 hitratio_denom = 0 hitratio_numer_10 = 0 hitratio_numer_5 = 0 ndcg = 0.0 ndcg_denom = 0 for i, x in enumerate( tqdm( dh.batch_iter(train_data, Config().batch_size, Config().seq_len_test, shuffle=False))): uids, baskets, lens = x dynamic_user, _ = model(baskets, lens, dr_hidden) for uid, l, du in zip(uids, lens, dynamic_user): scores = [] du_latest = du[l - 1].unsqueeze(0) # calculating <u,p> score for all test items <u,p> pair positives = test_data[test_data['userID'] == uid].baskets.values[0][:-1] # list dim 1 p_length = len(positives) positives = torch.LongTensor(positives) # Deal with positives samples scores_pos = list( torch.mm( du_latest, item_embedding[positives].t()).data.cpu().numpy()[0]) for s in scores_pos: scores.append(s) # Deal with negative samples neg_item_list = list(set(item_list).difference(set(positives))) negtives = random.sample(neg_item_list, Config().neg_num) negtives = torch.LongTensor(negtives) scores_neg = list( torch.mm( du_latest, item_embedding[negtives].t()).data.cpu().numpy()[0]) for s in scores_neg: scores.append(s) # Calculate hit-ratio index_k = [] # ranking list # k = 5 and k = 10 for k in range(Config().top_k): index = scores.index( max(scores)) # score 最高的category的index index_k.append(index) scores[index] = -9999 hitratio_numer_10 += len((set(np.arange(0, p_length)) & set(index_k))) # np.arange()产生等差数列 hitratio_numer_5 += len( (set(np.arange(0, p_length)) & set(index_k[0:5]))) hitratio_denom += p_length # Calculate NDCG u_dcg = 0 u_idcg = 0 for k in range(Config().top_k): if index_k[k] < p_length: # 长度 p_length 内的为正样本 u_dcg += 1 / math.log(k + 1 + 1, 2) u_idcg += 1 / math.log(k + 1 + 1, 2) ndcg += u_dcg / u_idcg ndcg_denom += 1 hit_ratio_5 = hitratio_numer_5 / hitratio_denom hit_ratio_10 = hitratio_numer_10 / hitratio_denom ndcg = ndcg / ndcg_denom logger.info( '[Test]| Epochs {:3d} | Hit ratio@5 {:02.4f} | Hit ratio@10 {:02.4f} | NDCG {:05.4f} |' .format(epoch, hit_ratio_5, hit_ratio_10, ndcg)) return hit_ratio_5, hit_ratio_10, ndcg timestamp = str(int(time.time())) out_dir = os.path.join(os.path.curdir, "runs", timestamp) item_list = [i for i in range(336)] if not os.path.exists(out_dir): os.makedirs(out_dir) logger.info('Save into {0}'.format(out_dir)) checkpoint_dir = out_dir + '/model-{epoch:02d}-{hitratio:.4f}-{ndcg:.4f}.model' best_hit_ratio = None # ==================== test # val_loss = validate_model() try: # Training for epoch in range(Config().epochs): train_model() logger.info('-' * 89) val_loss = validate_model() logger.info('-' * 89) hit_ratio_5, hit_ratio_10, ndcg = test_model() logger.info('-' * 89) # Checkpoint if not best_hit_ratio or hit_ratio_10 > best_hit_ratio: with open( checkpoint_dir.format(epoch=epoch, hitratio=hit_ratio_10, ndcg=ndcg), 'wb') as f: torch.save(model, f) best_hit_ratio = hit_ratio_10 except KeyboardInterrupt: logger.info('*' * 89) logger.info('Early Stopping!')