def dist_eval(args): iterator = MINDIterator model = NPAModel(hparams, iterator, seed=seed) model.model.load_weights(os.path.join(model_dir, "ckpt_ep{}".format(args.ep))) test_news_file = os.path.join(data_path, "valid", 'news.tsv') test_behaviors_file = os.path.join(data_path, "valid", 'behaviors.{}.tsv'.format(args.fsplit)) group_impr_indexes, group_labels, group_preds = model.run_slow_eval(test_news_file, test_behaviors_file) with open(os.path.join(data_path, 'results/npa-valid-prediction.{}.txt'.format(args.fsplit)), 'w') as f: for labels, preds in tqdm(zip(group_labels, group_preds)): label_str = ",".join([str(x) for x in labels]) pred_str = ",".join([str(x) for x in preds]) f.write("{}\t{}\n".format(label_str, pred_str))
def test(args): iterator = MINDIterator model = NPAModel(hparams, iterator, seed=seed, test_mode=True) model.model.load_weights(os.path.join(model_dir, "ckpt_ep{}".format(args.ep))) test_news_file = os.path.join(data_path, "test", 'news.tsv') test_behaviors_file = os.path.join(data_path, "test", 'behaviors.{}.tsv'.format(args.fsplit)) group_impr_indexes, group_labels, group_preds = model.run_slow_eval(test_news_file, test_behaviors_file) with open(os.path.join(data_path, 'results/npa-test-prediction.{}.txt'.format(args.fsplit)), 'w') as f: for impr_index, preds in tqdm(zip(group_impr_indexes, group_preds)): impr_index += 1 pred_rank = (np.argsort(np.argsort(preds)[::-1]) + 1).tolist() pred_rank = '[' + ','.join([str(i) for i in pred_rank]) + ']' f.write(' '.join([str(impr_index), pred_rank]) + '\n')
def test_npa_component_definition(mind_resource_path): wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy") userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl") wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl") yaml_file = os.path.join(mind_resource_path, "utils", r"npa.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( r"https://recodatasets.z20.web.core.windows.net/newsrec/", os.path.join(mind_resource_path, "utils"), "MINDdemo_utils.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, wordDict_file=wordDict_file, userDict_file=userDict_file, epochs=1, ) iterator = MINDIterator model = NPAModel(hparams, iterator) assert model.model is not None assert model.scorer is not None assert model.loss is not None assert model.train_optimizer is not None
def test_model_npa(tmp): yaml_file = os.path.join(tmp, "npa.yaml") train_file = os.path.join(tmp, "train.txt") valid_file = os.path.join(tmp, "test.txt") wordEmb_file = os.path.join(tmp, "embedding.npy") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/newsrec/", tmp, "npa.zip") hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, epochs=1) assert hparams is not None iterator = NewsIterator model = NPAModel(hparams, iterator) assert model.run_eval(valid_file) is not None assert isinstance(model.fit(train_file, valid_file), BaseModel)
def test_npa_component_definition(tmp): yaml_file = os.path.join(tmp, "npa.yaml") wordEmb_file = os.path.join(tmp, "embedding.npy") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/newsrec/", tmp, "npa.zip") hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, epochs=1) iterator = NewsIterator model = NPAModel(hparams, iterator) assert model.model is not None assert model.scorer is not None assert model.loss is not None assert model.train_optimizer is not None
download_deeprec_resources(mind_url, \ os.path.join(data_path, 'valid'), mind_dev_dataset) if not os.path.exists(yaml_file): download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/newsrec/', \ os.path.join(data_path, 'utils'), mind_utils) ## Create hyper-parameters hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, \ wordDict_file=wordDict_file, userDict_file=userDict_file,\ epochs=epochs) print(hparams) iterator = MINDIterator ## Train the NPA model model = NPAModel(hparams, iterator, seed=seed) print(model.run_eval(valid_news_file, valid_behaviors_file)) model.fit(train_news_file, train_behaviors_file, valid_news_file, valid_behaviors_file) res_syn = model.run_eval(valid_news_file, valid_behaviors_file) print(res_syn) pm.record("res_syn", res_syn) ## Save the model model_path = os.path.join(data_path, "model") os.makedirs(model_path, exist_ok=True) model.model.save_weights(os.path.join(model_path, "npa_ckpt"))
show_step=10) logging.info(hparams) # ## Train the NRMS model if model_type == 'nrms': iterator = MINDIterator model = NRMSModel(hparams, iterator, seed=seed) elif model_type == 'naml': iterator = MINDAllIterator model = NAMLModel(hparams, iterator, seed=seed) elif model_type == 'npa': iterator = MINDIterator model = NPAModel(hparams, iterator, seed=seed) elif model_type == 'nrmma': iterator = MINDAllIterator model = NRMMAModel(hparams, iterator, seed=seed) else: raise NotImplementedError(f"{exp_name} is not implemented") # In[8]: model_path = os.path.join(exp_path, model_type) model_name = model_type + '_ckpt' model.fit(train_news_file, train_behaviors_file, valid_news_file, valid_behaviors_file, model_path=model_path, model_name=model_name) res_syn = model.run_eval(valid_news_file, valid_behaviors_file) logging.info(res_syn)
model_dir = os.path.join(data_path, "npa") mind_url, mind_train_dataset, mind_dev_dataset, mind_utils = get_mind_data_set(MIND_type) if not os.path.exists(train_news_file): download_deeprec_resources(mind_url, os.path.join(data_path, 'train'), mind_train_dataset) if not os.path.exists(valid_news_file): download_deeprec_resources(mind_url, \ os.path.join(data_path, 'valid'), mind_dev_dataset) if not os.path.exists(yaml_file): download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/newsrec/', \ os.path.join(data_path, 'utils'), mind_utils) hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, \ wordDict_file=wordDict_file, userDict_file=userDict_file, \ epochs=epochs, show_step=10) print("[NPA] Config,", hparams) iterator = MINDIterator model = NPAModel(hparams, iterator, seed=seed) print("[NPA] First run:", model.run_eval(valid_news_file, fast_valid_behaviors_file)) model.fit(train_news_file, train_behaviors_file, valid_news_file, fast_valid_behaviors_file, model_save_path=model_dir) # res_syn = model.run_eval(valid_news_file, valid_behaviors_file) # print(res_syn)