示例#1
0
def dist_eval(args):
    iterator = MINDIterator
    model = NPAModel(hparams, iterator, seed=seed)
    model.model.load_weights(os.path.join(model_dir, "ckpt_ep{}".format(args.ep)))
    test_news_file = os.path.join(data_path, "valid", 'news.tsv')
    test_behaviors_file = os.path.join(data_path, "valid", 'behaviors.{}.tsv'.format(args.fsplit))

    group_impr_indexes, group_labels, group_preds = model.run_slow_eval(test_news_file, test_behaviors_file)

    with open(os.path.join(data_path, 'results/npa-valid-prediction.{}.txt'.format(args.fsplit)), 'w') as f:
        for labels, preds in tqdm(zip(group_labels, group_preds)):
            label_str = ",".join([str(x) for x in labels])
            pred_str = ",".join([str(x) for x in preds])
            f.write("{}\t{}\n".format(label_str, pred_str))
示例#2
0
def test(args):
    iterator = MINDIterator
    model = NPAModel(hparams, iterator, seed=seed, test_mode=True)
    model.model.load_weights(os.path.join(model_dir, "ckpt_ep{}".format(args.ep)))
    test_news_file = os.path.join(data_path, "test", 'news.tsv')
    test_behaviors_file = os.path.join(data_path, "test", 'behaviors.{}.tsv'.format(args.fsplit))

    group_impr_indexes, group_labels, group_preds = model.run_slow_eval(test_news_file, test_behaviors_file)

    with open(os.path.join(data_path, 'results/npa-test-prediction.{}.txt'.format(args.fsplit)), 'w') as f:
        for impr_index, preds in tqdm(zip(group_impr_indexes, group_preds)):
            impr_index += 1
            pred_rank = (np.argsort(np.argsort(preds)[::-1]) + 1).tolist()
            pred_rank = '[' + ','.join([str(i) for i in pred_rank]) + ']'
            f.write(' '.join([str(impr_index), pred_rank]) + '\n')
def test_npa_component_definition(mind_resource_path):
    wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
    userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
    wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
    yaml_file = os.path.join(mind_resource_path, "utils", r"npa.yaml")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "utils"),
            "MINDdemo_utils.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        wordDict_file=wordDict_file,
        userDict_file=userDict_file,
        epochs=1,
    )
    iterator = MINDIterator
    model = NPAModel(hparams, iterator)

    assert model.model is not None
    assert model.scorer is not None
    assert model.loss is not None
    assert model.train_optimizer is not None
示例#4
0
def test_model_npa(tmp):
    yaml_file = os.path.join(tmp, "npa.yaml")
    train_file = os.path.join(tmp, "train.txt")
    valid_file = os.path.join(tmp, "test.txt")
    wordEmb_file = os.path.join(tmp, "embedding.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/newsrec/", tmp,
            "npa.zip")

    hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, epochs=1)
    assert hparams is not None

    iterator = NewsIterator
    model = NPAModel(hparams, iterator)

    assert model.run_eval(valid_file) is not None
    assert isinstance(model.fit(train_file, valid_file), BaseModel)
示例#5
0
def test_npa_component_definition(tmp):
    yaml_file = os.path.join(tmp, "npa.yaml")
    wordEmb_file = os.path.join(tmp, "embedding.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/newsrec/", tmp,
            "npa.zip")

    hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, epochs=1)
    iterator = NewsIterator
    model = NPAModel(hparams, iterator)

    assert model.model is not None
    assert model.scorer is not None
    assert model.loss is not None
    assert model.train_optimizer is not None
示例#6
0
    download_deeprec_resources(mind_url, \
                               os.path.join(data_path, 'valid'), mind_dev_dataset)
if not os.path.exists(yaml_file):
    download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/newsrec/', \
                               os.path.join(data_path, 'utils'), mind_utils)

## Create hyper-parameters
hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, \
                          wordDict_file=wordDict_file, userDict_file=userDict_file,\
                          epochs=epochs)
print(hparams)

iterator = MINDIterator

## Train the NPA model
model = NPAModel(hparams, iterator, seed=seed)
print(model.run_eval(valid_news_file, valid_behaviors_file))

model.fit(train_news_file, train_behaviors_file, valid_news_file,
          valid_behaviors_file)

res_syn = model.run_eval(valid_news_file, valid_behaviors_file)
print(res_syn)
pm.record("res_syn", res_syn)

## Save the model
model_path = os.path.join(data_path, "model")
os.makedirs(model_path, exist_ok=True)

model.model.save_weights(os.path.join(model_path, "npa_ckpt"))
示例#7
0
                          show_step=10)
logging.info(hparams)


# ## Train the NRMS model


if model_type == 'nrms':
    iterator = MINDIterator
    model = NRMSModel(hparams, iterator, seed=seed)
elif model_type == 'naml':
    iterator = MINDAllIterator
    model = NAMLModel(hparams, iterator, seed=seed)
elif model_type == 'npa':
    iterator = MINDIterator
    model = NPAModel(hparams, iterator, seed=seed)
elif model_type == 'nrmma':
    iterator = MINDAllIterator
    model = NRMMAModel(hparams, iterator, seed=seed)

else:
    raise NotImplementedError(f"{exp_name} is not implemented")

# In[8]:
model_path = os.path.join(exp_path, model_type)
model_name = model_type + '_ckpt'
model.fit(train_news_file, train_behaviors_file, valid_news_file, valid_behaviors_file,
          model_path=model_path, model_name=model_name)
res_syn = model.run_eval(valid_news_file, valid_behaviors_file)
logging.info(res_syn)
示例#8
0
model_dir = os.path.join(data_path, "npa")

mind_url, mind_train_dataset, mind_dev_dataset, mind_utils = get_mind_data_set(MIND_type)

if not os.path.exists(train_news_file):
    download_deeprec_resources(mind_url, os.path.join(data_path, 'train'), mind_train_dataset)

if not os.path.exists(valid_news_file):
    download_deeprec_resources(mind_url, \
                               os.path.join(data_path, 'valid'), mind_dev_dataset)
if not os.path.exists(yaml_file):
    download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/newsrec/', \
                               os.path.join(data_path, 'utils'), mind_utils)

hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, \
                          wordDict_file=wordDict_file, userDict_file=userDict_file, \
                          epochs=epochs,
                          show_step=10)
print("[NPA] Config,", hparams)

iterator = MINDIterator
model = NPAModel(hparams, iterator, seed=seed)

print("[NPA] First run:", model.run_eval(valid_news_file, fast_valid_behaviors_file))

model.fit(train_news_file, train_behaviors_file, valid_news_file, fast_valid_behaviors_file, model_save_path=model_dir)

# res_syn = model.run_eval(valid_news_file, valid_behaviors_file)
# print(res_syn)