Пример #1
0
def test_prepare_hparams(must_exist_attributes, deeprec_resource_path):
    wordEmb_file = os.path.join(deeprec_resource_path, "mind", "utils",
                                "embedding.npy")
    userDict_file = os.path.join(deeprec_resource_path, "mind", "utils",
                                 "uid2index.pkl")
    wordDict_file = os.path.join(deeprec_resource_path, "mind", "utils",
                                 "word_dict.pkl")
    yaml_file = os.path.join(deeprec_resource_path, "mind", "utils",
                             r"nrms.yaml")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(deeprec_resource_path, "mind", "utils"),
            "MINDdemo_utils.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        wordDict_file=wordDict_file,
        userDict_file=userDict_file,
        epochs=1,
    )
    assert hasattr(hparams, must_exist_attributes)
def test_dkn_component_definition(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec",
                             "dkn")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy")
    entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "dknresources.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        epochs=1,
        learning_rate=0.0001,
    )
    assert hparams is not None
    model = DKN(hparams, DKNTextIterator)

    assert model.logit is not None
    assert model.update is not None
    assert model.iterator is not None
Пример #3
0
def test_model_dkn(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    train_file = os.path.join(data_path, "final_test_with_entity.txt")
    valid_file = os.path.join(data_path, "final_test_with_entity.txt")
    wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy")
    entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "dknresources.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        epochs=1,
        learning_rate=0.0001,
    )
    input_creator = DKNTextIterator
    model = DKN(hparams, input_creator)

    assert isinstance(model.fit(train_file, valid_file), BaseModel)
    assert model.run_eval(valid_file) is not None
Пример #4
0
def test_npa_component_definition(mind_resource_path):
    wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
    userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
    wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
    yaml_file = os.path.join(mind_resource_path, "utils", r"npa.yaml")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "utils"),
            "MINDdemo_utils.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        wordDict_file=wordDict_file,
        userDict_file=userDict_file,
        epochs=1,
    )
    iterator = MINDIterator
    model = NPAModel(hparams, iterator)

    assert model.model is not None
    assert model.scorer is not None
    assert model.loss is not None
    assert model.train_optimizer is not None
Пример #5
0
def test_model_dkn(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec",
                             "dkn")
    yaml_file = os.path.join(data_path, r'dkn.yaml')
    train_file = os.path.join(data_path, r'train_mind_demo.txt')
    valid_file = os.path.join(data_path, r'valid_mind_demo.txt')
    test_file = os.path.join(data_path, r'test_mind_demo.txt')
    news_feature_file = os.path.join(data_path, r'doc_feature.txt')
    user_history_file = os.path.join(data_path, r'user_history.txt')
    wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy')
    entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy')
    contextEmb_file = os.path.join(data_path, r'TransE_context2vec_100.npy')

    download_deeprec_resources(
        "https://recodatasets.blob.core.windows.net/deeprec/",
        data_path,
        "mind-demo.zip",
    )

    hparams = prepare_hparams(yaml_file,
                              news_feature_file=news_feature_file,
                              user_history_file=user_history_file,
                              wordEmb_file=wordEmb_file,
                              entityEmb_file=entityEmb_file,
                              contextEmb_file=contextEmb_file,
                              epochs=1,
                              learning_rate=0.0001)
    input_creator = DKNTextIterator
    model = DKN(hparams, input_creator)

    assert isinstance(model.fit(train_file, valid_file), BaseModel)
    assert model.run_eval(valid_file) is not None
def test_model_dkn(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec",
                             "dkn")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    train_file = os.path.join(data_path, "final_test_with_entity.txt")
    valid_file = os.path.join(data_path, "final_test_with_entity.txt")
    wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy")
    entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "dknresources.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        epochs=1,
        learning_rate=0.0001,
    )
    input_creator = DKNTextIterator
    model = DKN(hparams, input_creator)

    assert isinstance(model.fit(train_file, valid_file), BaseModel)
    assert model.run_eval(valid_file) is not None
Пример #7
0
def test_dkn_component_definition(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec",
                             "dkn")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    news_feature_file = os.path.join(data_path, r'doc_feature.txt')
    user_history_file = os.path.join(data_path, r'user_history.txt')
    wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy')
    entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy')
    contextEmb_file = os.path.join(data_path, r'TransE_context2vec_100.npy')

    download_deeprec_resources(
        "https://recodatasets.blob.core.windows.net/deeprec/",
        data_path,
        "mind-demo.zip",
    )

    hparams = prepare_hparams(yaml_file,
                              news_feature_file=news_feature_file,
                              user_history_file=user_history_file,
                              wordEmb_file=wordEmb_file,
                              entityEmb_file=entityEmb_file,
                              contextEmb_file=contextEmb_file,
                              epochs=1,
                              learning_rate=0.0001)
    assert hparams is not None
    model = DKN(hparams, DKNTextIterator)

    assert model.logit is not None
    assert model.update is not None
    assert model.iterator is not None
Пример #8
0
def test_dkn_component_definition(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy")
    entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "dknresources.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        epochs=5,
        learning_rate=0.0001,
    )
    assert hparams is not None
    model = DKN(hparams, DKNTextIterator)

    assert model.logit is not None
    assert model.update is not None
    assert model.iterator is not None
def test_naml_component_definition(tmp):
    wordEmb_file = os.path.join(tmp, "utils", "embedding_all.npy")
    userDict_file = os.path.join(tmp, "utils", "uid2index.pkl")
    wordDict_file = os.path.join(tmp, "utils", "word_dict_all.pkl")
    vertDict_file = os.path.join(tmp, "utils", "vert_dict.pkl")
    subvertDict_file = os.path.join(tmp, "utils", "subvert_dict.pkl")
    yaml_file = os.path.join(tmp, "utils", r"naml.yaml")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r"https://recodatasets.blob.core.windows.net/newsrec/",
            os.path.join(tmp, "utils"),
            "MINDdemo_utils.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        wordDict_file=wordDict_file,
        userDict_file=userDict_file,
        vertDict_file=vertDict_file,
        subvertDict_file=subvertDict_file,
        epochs=1,
    )
    iterator = MINDAllIterator
    model = NAMLModel(hparams, iterator)

    assert model.model is not None
    assert model.scorer is not None
    assert model.loss is not None
    assert model.train_optimizer is not None
Пример #10
0
def test_DKN_iterator(deeprec_resource_path):
    data_path = os.path.join(deeprec_resource_path, "dkn")
    data_file = os.path.join(data_path, r"train_mind_demo.txt")
    news_feature_file = os.path.join(data_path, r"doc_feature.txt")
    user_history_file = os.path.join(data_path, r"user_history.txt")
    wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy")
    entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy")
    contextEmb_file = os.path.join(data_path, "TransE_context2vec_100.npy")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    download_deeprec_resources(
        "https://recodatasets.z20.web.core.windows.net/deeprec/",
        data_path,
        "mind-demo.zip",
    )

    hparams = prepare_hparams(
        yaml_file,
        news_feature_file=news_feature_file,
        user_history_file=user_history_file,
        wordEmb_file="",
        entityEmb_file="",
        contextEmb_file="",
    )
    iterator = DKNTextIterator(hparams, tf.Graph())
    assert iterator is not None
    for res, impression, data_size in iterator.load_data_from_file(data_file):
        assert isinstance(res, dict)

    ###  test DKN item2item iterator
    hparams = prepare_hparams(
        yaml_file,
        news_feature_file=news_feature_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        contextEmb_file=contextEmb_file,
        epochs=1,
        is_clip_norm=True,
        max_grad_norm=0.5,
        his_size=20,
        MODEL_DIR=os.path.join(data_path, "save_models"),
        use_entity=True,
        use_context=True,
    )
    hparams.neg_num = 9

    iterator_item2item = DKNItem2itemTextIterator(hparams, tf.Graph())
    assert iterator_item2item is not None
    test_round = 3
    for res, impression, data_size in iterator_item2item.load_data_from_file(
        os.path.join(data_path, "doc_list.txt")
    ):
        assert isinstance(res, dict)
        test_round -= 1
        if test_round <= 0:
            break
Пример #11
0
def test_prepare_hparams(must_exist_attributes, resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")
    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )
    hparams = prepare_hparams(yaml_file)
    assert hasattr(hparams, must_exist_attributes)
Пример #12
0
def test_dkn_component_definition(deeprec_resource_path):
    data_path = os.path.join(deeprec_resource_path, "dkn")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    news_feature_file = os.path.join(data_path, r"doc_feature.txt")
    user_history_file = os.path.join(data_path, r"user_history.txt")
    wordEmb_file = os.path.join(data_path, r"word_embeddings_100.npy")
    entityEmb_file = os.path.join(data_path, r"TransE_entity2vec_100.npy")
    contextEmb_file = os.path.join(data_path, r"TransE_context2vec_100.npy")

    download_deeprec_resources(
        "https://recodatasets.z20.web.core.windows.net/deeprec/",
        data_path,
        "mind-demo.zip",
    )

    hparams = prepare_hparams(
        yaml_file,
        news_feature_file=news_feature_file,
        user_history_file=user_history_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        contextEmb_file=contextEmb_file,
        epochs=1,
        learning_rate=0.0001,
    )
    assert hparams is not None
    model = DKN(hparams, DKNTextIterator)

    assert model.logit is not None
    assert model.update is not None
    assert model.iterator is not None

    ###  test DKN's item2item version
    hparams = prepare_hparams(
        yaml_file,
        news_feature_file=news_feature_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        contextEmb_file=contextEmb_file,
        epochs=1,
        is_clip_norm=True,
        max_grad_norm=0.5,
        his_size=20,
        MODEL_DIR=os.path.join(data_path, "save_models"),
        use_entity=True,
        use_context=True,
    )
    hparams.neg_num = 9
    assert hparams is not None
    model_item2item = DKNItem2Item(hparams, DKNItem2itemTextIterator)

    assert model_item2item.pred_logits is not None
    assert model_item2item.update is not None
    assert model_item2item.iterator is not None
def test_load_yaml_file(tmp):
    yaml_file = os.path.join(tmp, "utils", r"nrms.yaml")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(tmp, "utils"),
            "MINDdemo_utils.zip",
        )
    config = load_yaml(yaml_file)
    assert config is not None
Пример #14
0
def test_prepare_hparams(deeprec_resource_path, must_exist_attributes):
    data_path = os.path.join(deeprec_resource_path, "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")
    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.z20.web.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )
    hparams = prepare_hparams(yaml_file)
    assert hasattr(hparams, must_exist_attributes)
Пример #15
0
def test_load_yaml_file(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )

    config = load_yaml(yaml_file)
    assert config is not None
Пример #16
0
def test_load_yaml_file(deeprec_resource_path):
    data_path = os.path.join(deeprec_resource_path, "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.z20.web.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )

    config = load_yaml(yaml_file)
    assert config is not None
Пример #17
0
def test_DKN_iterator(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn")
    data_file = os.path.join(data_path, "final_test_with_entity.txt")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "dknresources.zip",
        )

    hparams = prepare_hparams(yaml_file, wordEmb_file="", entityEmb_file="")
    iterator = DKNTextIterator(hparams, tf.Graph())
    assert iterator is not None
    for res in iterator.load_data_from_file(data_file):
        assert isinstance(res, dict)
Пример #18
0
def test_xdeepfm_component_definition(deeprec_resource_path):
    data_path = os.path.join(deeprec_resource_path, "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.z20.web.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )

    hparams = prepare_hparams(yaml_file)
    model = XDeepFMModel(hparams, FFMTextIterator)

    assert model.logit is not None
    assert model.update is not None
    assert model.iterator is not None
Пример #19
0
def test_lstur_component_definition(tmp):
    yaml_file = os.path.join(tmp, "lstur.yaml")
    wordEmb_file = os.path.join(tmp, "embedding.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/newsrec/", tmp,
            "lstur.zip")

    hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, epochs=1)
    iterator = NewsIterator
    model = LSTURModel(hparams, iterator)

    assert model.model is not None
    assert model.scorer is not None
    assert model.loss is not None
    assert model.train_optimizer is not None
Пример #20
0
def test_FFM_iterator(deeprec_resource_path):
    data_path = os.path.join(deeprec_resource_path, "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")
    data_file = os.path.join(data_path, "sample_FFM_data.txt")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.z20.web.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )

    hparams = prepare_hparams(yaml_file)
    iterator = FFMTextIterator(hparams, tf.Graph())
    assert iterator is not None
    for res in iterator.load_data_from_file(data_file):
        assert isinstance(res, tuple)
Пример #21
0
def test_FFM_iterator(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")
    data_file = os.path.join(data_path, "sample_FFM_data.txt")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )

    hparams = prepare_hparams(yaml_file)
    iterator = FFMTextIterator(hparams, tf.Graph())
    assert iterator is not None
    for res in iterator.load_data_from_file(data_file):
        assert isinstance(res, dict)
Пример #22
0
def test_xdeepfm_component_definition(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )

    hparams = prepare_hparams(yaml_file)
    model = XDeepFMModel(hparams, FFMTextIterator)

    assert model.logit is not None
    assert model.update is not None
    assert model.iterator is not None
Пример #23
0
def test_model_npa(tmp):
    yaml_file = os.path.join(tmp, "npa.yaml")
    train_file = os.path.join(tmp, "train.txt")
    valid_file = os.path.join(tmp, "test.txt")
    wordEmb_file = os.path.join(tmp, "embedding.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/newsrec/", tmp,
            "npa.zip")

    hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, epochs=1)
    assert hparams is not None

    iterator = NewsIterator
    model = NPAModel(hparams, iterator)

    assert model.run_eval(valid_file) is not None
    assert isinstance(model.fit(train_file, valid_file), BaseModel)
Пример #24
0
def test_model_xdeepfm(resource_path):
    data_path = os.path.join(resource_path, '../resources/deeprec/xdeepfm')
    yaml_file = os.path.join(data_path, r'xDeepFM.yaml')
    data_file = os.path.join(data_path, r'sample_FFM_data.txt')
    output_file = os.path.join(data_path, r'output.txt')

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r'https://recodatasets.blob.core.windows.net/deeprec/', data_path,
            'xdeepfmresources.zip')

    hparams = prepare_hparams(yaml_file, learning_rate=0.01)
    assert hparams is not None

    input_creator = FFMTextIterator
    model = XDeepFMModel(hparams, input_creator)

    assert model.run_eval(data_file) is not None
    assert isinstance(model.fit(data_file, data_file), BaseModel)
    assert model.predict(data_file, output_file) is not None
Пример #25
0
def test_model_xdeepfm(deeprec_resource_path):
    data_path = os.path.join(deeprec_resource_path, "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")
    data_file = os.path.join(data_path, "sample_FFM_data.txt")
    output_file = os.path.join(data_path, "output.txt")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.z20.web.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )

    hparams = prepare_hparams(yaml_file, learning_rate=0.01)
    assert hparams is not None

    input_creator = FFMTextIterator
    model = XDeepFMModel(hparams, input_creator)

    assert model.run_eval(data_file) is not None
    assert isinstance(model.fit(data_file, data_file), BaseModel)
    assert model.predict(data_file, output_file) is not None
Пример #26
0
def test_model_xdeepfm(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")
    data_file = os.path.join(data_path, "sample_FFM_data.txt")
    output_file = os.path.join(data_path, "output.txt")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )

    hparams = prepare_hparams(yaml_file, learning_rate=0.01)
    assert hparams is not None

    input_creator = FFMTextIterator
    model = XDeepFMModel(hparams, input_creator)

    assert model.run_eval(data_file) is not None
    assert isinstance(model.fit(data_file, data_file), BaseModel)
    assert model.predict(data_file, output_file) is not None
Пример #27
0
def download_data(mind_type="small"):
    mind_url, mind_train_dataset, mind_dev_dataset, mind_utils = get_mind_data_set(mind_type)
    data_path = get_data_path(mind_type)
    train_news_file, _ = get_path("train", mind_type)
    valid_news_file, _ = get_path("valid", mind_type)
    test_news_file, _ = get_path("test", mind_type)
    if not os.path.exists(train_news_file):
        download_deeprec_resources(mind_url, os.path.join(data_path, 'train'), mind_train_dataset)
    if not os.path.exists(valid_news_file):
        download_deeprec_resources(mind_url, os.path.join(data_path, 'valid'), mind_dev_dataset)
    if mind_type == "large":
        if not os.path.exists(test_news_file):
            download_deeprec_resources(mind_url, os.path.join(data_path, 'test'), mind_dev_dataset)
    if not os.path.exists(get_yaml_path()):
        utils_url = r'https://recodatasets.blob.core.windows.net/newsrec/'
        download_deeprec_resources(utils_url, os.path.join(get_root_path(), 'utils'), mind_utils)
Пример #28
0
def test_model_dkn(resource_path):
    data_path = os.path.join(resource_path, '../resources/deeprec/dkn')
    yaml_file = os.path.join(data_path, r'dkn.yaml')
    train_file = os.path.join(data_path, r'final_test_with_entity.txt')
    valid_file = os.path.join(data_path, r'final_test_with_entity.txt')
    wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy')
    entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy')

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r'https://recodatasets.blob.core.windows.net/deeprec/', data_path,
            'dknresources.zip')

    hparams = prepare_hparams(yaml_file,
                              wordEmb_file=wordEmb_file,
                              entityEmb_file=entityEmb_file,
                              epochs=1,
                              learning_rate=0.0001)
    input_creator = DKNTextIterator
    model = DKN(hparams, input_creator)

    assert (isinstance(model.fit(train_file, valid_file), BaseModel))
    assert model.run_eval(valid_file) is not None
Пример #29
0
def test_naml_iterator(mind_resource_path):
    train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv")
    train_behaviors_file = os.path.join(mind_resource_path, "train", r"behaviors.tsv")
    valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv")
    valid_behaviors_file = os.path.join(mind_resource_path, "valid", r"behaviors.tsv")
    wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding_all.npy")
    userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
    wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict_all.pkl")
    vertDict_file = os.path.join(mind_resource_path, "utils", "vert_dict.pkl")
    subvertDict_file = os.path.join(mind_resource_path, "utils", "subvert_dict.pkl")
    yaml_file = os.path.join(mind_resource_path, "utils", r"naml.yaml")

    if not os.path.exists(train_news_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "train"),
            "MINDdemo_train.zip",
        )
    if not os.path.exists(valid_news_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "valid"),
            "MINDdemo_dev.zip",
        )
    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "utils"),
            "MINDdemo_utils.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        wordDict_file=wordDict_file,
        userDict_file=userDict_file,
        vertDict_file=vertDict_file,
        subvertDict_file=subvertDict_file,
        epochs=1,
        batch_size=1024,
    )
    train_iterator = MINDAllIterator(hparams, hparams.npratio)
    test_iterator = MINDAllIterator(hparams, -1)

    assert train_iterator is not None
    for res in train_iterator.load_data_from_file(
        train_news_file, train_behaviors_file
    ):
        assert isinstance(res, dict)
        assert len(res) == 11
        break

    assert test_iterator is not None
    for res in test_iterator.load_data_from_file(valid_news_file, valid_behaviors_file):
        assert isinstance(res, dict)
        assert len(res) == 11
        break
Пример #30
0
def test_DKN_iterator(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn")
    data_file = os.path.join(data_path, r"train_mind_demo.txt")
    news_feature_file = os.path.join(data_path, r"doc_feature.txt")
    user_history_file = os.path.join(data_path, r"user_history.txt")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    download_deeprec_resources(
        "https://recodatasets.blob.core.windows.net/deeprec/",
        data_path,
        "mind-demo.zip",
    )

    hparams = prepare_hparams(
        yaml_file,
        news_feature_file=news_feature_file,
        user_history_file=user_history_file,
        wordEmb_file="",
        entityEmb_file="",
        contextEmb_file="",
    )
    iterator = DKNTextIterator(hparams, tf.Graph())
    assert iterator is not None
    for res, impression, data_size in iterator.load_data_from_file(data_file):
        assert isinstance(res, dict)
Пример #31
0
def test_model_naml(mind_resource_path):
    train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv")
    train_behaviors_file = os.path.join(mind_resource_path, "train",
                                        r"behaviors.tsv")
    valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv")
    valid_behaviors_file = os.path.join(mind_resource_path, "valid",
                                        r"behaviors.tsv")
    wordEmb_file = os.path.join(mind_resource_path, "utils",
                                "embedding_all.npy")
    userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
    wordDict_file = os.path.join(mind_resource_path, "utils",
                                 "word_dict_all.pkl")
    vertDict_file = os.path.join(mind_resource_path, "utils", "vert_dict.pkl")
    subvertDict_file = os.path.join(mind_resource_path, "utils",
                                    "subvert_dict.pkl")
    yaml_file = os.path.join(mind_resource_path, "utils", r"naml.yaml")

    if not os.path.exists(train_news_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "train"),
            "MINDdemo_train.zip",
        )
    if not os.path.exists(valid_news_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "valid"),
            "MINDdemo_dev.zip",
        )
    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "utils"),
            "MINDdemo_utils.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        wordDict_file=wordDict_file,
        userDict_file=userDict_file,
        vertDict_file=vertDict_file,
        subvertDict_file=subvertDict_file,
        epochs=1,
    )

    iterator = MINDAllIterator
    model = NAMLModel(hparams, iterator)
    assert model.run_eval(valid_news_file, valid_behaviors_file) is not None
    assert isinstance(
        model.fit(train_news_file, train_behaviors_file, valid_news_file,
                  valid_behaviors_file),
        BaseModel,
    )
def test_news_iterator(tmp):
    train_news_file = os.path.join(tmp, "train", r"news.tsv")
    train_behaviors_file = os.path.join(tmp, "train", r"behaviors.tsv")
    valid_news_file = os.path.join(tmp, "valid", r"news.tsv")
    valid_behaviors_file = os.path.join(tmp, "valid", r"behaviors.tsv")
    wordEmb_file = os.path.join(tmp, "utils", "embedding.npy")
    userDict_file = os.path.join(tmp, "utils", "uid2index.pkl")
    wordDict_file = os.path.join(tmp, "utils", "word_dict.pkl")
    yaml_file = os.path.join(tmp, "utils", r"nrms.yaml")

    if not os.path.exists(train_news_file):
        download_deeprec_resources(
            r"https://recodatasets.blob.core.windows.net/newsrec/",
            os.path.join(tmp, "train"),
            "MINDdemo_train.zip",
        )
    if not os.path.exists(valid_news_file):
        download_deeprec_resources(
            r"https://recodatasets.blob.core.windows.net/newsrec/",
            os.path.join(tmp, "valid"),
            "MINDdemo_dev.zip",
        )
    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r"https://recodatasets.blob.core.windows.net/newsrec/",
            os.path.join(tmp, "utils"),
            "MINDdemo_utils.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        wordDict_file=wordDict_file,
        userDict_file=userDict_file,
        epochs=1,
    )
    train_iterator = MINDIterator(hparams, hparams.npratio)
    test_iterator = MINDIterator(hparams, -1)

    assert train_iterator is not None
    for res in train_iterator.load_data_from_file(train_news_file,
                                                  train_behaviors_file):
        assert isinstance(res, dict)
        assert len(res) == 5
        break

    assert test_iterator is not None
    for res in test_iterator.load_data_from_file(valid_news_file,
                                                 valid_behaviors_file):
        assert isinstance(res, dict)
        assert len(res) == 5
        break
def test_model_nrms(tmp):
    train_news_file = os.path.join(tmp, "train", r"news.tsv")
    train_behaviors_file = os.path.join(tmp, "train", r"behaviors.tsv")
    valid_news_file = os.path.join(tmp, "valid", r"news.tsv")
    valid_behaviors_file = os.path.join(tmp, "valid", r"behaviors.tsv")
    wordEmb_file = os.path.join(tmp, "utils", "embedding.npy")
    userDict_file = os.path.join(tmp, "utils", "uid2index.pkl")
    wordDict_file = os.path.join(tmp, "utils", "word_dict.pkl")
    yaml_file = os.path.join(tmp, "utils", r"nrms.yaml")

    if not os.path.exists(train_news_file):
        download_deeprec_resources(
            r"https://recodatasets.blob.core.windows.net/newsrec/",
            os.path.join(tmp, "train"),
            "MINDdemo_train.zip",
        )
    if not os.path.exists(valid_news_file):
        download_deeprec_resources(
            r"https://recodatasets.blob.core.windows.net/newsrec/",
            os.path.join(tmp, "valid"),
            "MINDdemo_dev.zip",
        )
    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r"https://recodatasets.blob.core.windows.net/newsrec/",
            os.path.join(tmp, "utils"),
            "MINDdemo_utils.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        wordDict_file=wordDict_file,
        userDict_file=userDict_file,
        epochs=1,
    )
    assert hparams is not None

    iterator = MINDIterator
    model = NRMSModel(hparams, iterator)

    assert model.run_eval(valid_news_file, valid_behaviors_file) is not None
    assert isinstance(
        model.fit(train_news_file, train_behaviors_file, valid_news_file,
                  valid_behaviors_file),
        BaseModel,
    )
Пример #34
0
train_news_file = os.path.join(data_path, 'train', r'news.tsv')
train_behaviors_file = os.path.join(data_path, 'train', r'behaviors.tsv')
valid_news_file = os.path.join(data_path, 'valid', r'news.tsv')
fast_valid_behaviors_file = os.path.join(data_path, 'valid',
                                         r'behaviors.small.tsv')
wordEmb_file = os.path.join(data_path, "utils", "embedding.npy")
userDict_file = os.path.join(data_path, "utils", "uid2index.pkl")
wordDict_file = os.path.join(data_path, "utils", "word_dict.pkl")
yaml_file = os.path.join(data_path, "utils", r'npa.yaml')
model_dir = os.path.join(data_path, "nrms")

mind_url, mind_train_dataset, mind_dev_dataset, mind_utils = get_mind_data_set(
    MIND_type)

if not os.path.exists(train_news_file):
    download_deeprec_resources(mind_url, os.path.join(data_path, 'train'),
                               mind_train_dataset)

if not os.path.exists(valid_news_file):
    download_deeprec_resources(mind_url, \
                               os.path.join(data_path, 'valid'), mind_dev_dataset)
if not os.path.exists(yaml_file):
    download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/newsrec/', \
                               os.path.join(data_path, 'utils'), mind_utils)

hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, \
                          wordDict_file=wordDict_file, userDict_file=userDict_file, \
                          epochs=epochs,
                          show_step=10)
print("[NRMS] Config,", hparams)

iterator = MINDIterator
Пример #35
0
data_path = os.path.abspath(os.path.join(os.getcwd(), "../../data/mind-demo-dkn"))
data_path1 = os.path.abspath(os.path.join(os.getcwd(), "../../data"))
print('data_path:', data_path)
print('data_path1:', data_path1)

yaml_file = os.path.join(data_path, r'dkn.yaml')
train_file = os.path.join(data_path, r'train_mind_demo.txt')
valid_file = os.path.join(data_path, r'valid_mind_demo.txt')
test_file = os.path.join(data_path, r'test_mind_demo.txt')
news_feature_file = os.path.join(data_path, r'doc_feature.txt')
user_history_file = os.path.join(data_path, r'user_history.txt')
wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy')
entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy')
contextEmb_file = os.path.join(data_path, r'TransE_context2vec_100.npy')
if not os.path.exists(yaml_file):
    download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/deeprec/', data_path1, 'mind-demo-dkn.zip')


## Create hyper-parameters
epochs = 10
history_size = 50
batch_size = 100
hparams = prepare_hparams(yaml_file,
                          news_feature_file = news_feature_file,
                          user_history_file = user_history_file,
                          wordEmb_file=wordEmb_file,
                          entityEmb_file=entityEmb_file,
                          contextEmb_file=contextEmb_file,
                          epochs=epochs,
                          history_size=history_size,
                          batch_size=batch_size)
Пример #36
0
    print('There are in total {0} fields and {1} features.'.format(
        converter.field_count, converter.feature_count))

    tmpdir = TemporaryDirectory()

    data_path = tmpdir.name
    yaml_file = os.path.join(data_path, YAML_FILE_NAME)
    train_file = os.path.join(data_path, TRAIN_FILE_NAME)
    valid_file = os.path.join(data_path, VALID_FILE_NAME)
    test_file = os.path.join(data_path, TEST_FILE_NAME)
    model_file = os.path.join(data_path, MODEL_FILE_NAME)
    output_file = os.path.join(data_path, OUTPUT_FILE_NAME)

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r'https://recodatasets.blob.core.windows.net/deeprec/', data_path,
            'xdeepfmresources.zip')

    # Training task
    ffm_model = xl.create_ffm()  # Use field-aware factorization machine (ffm)
    ffm_model.setTrain(train_file)  # Set the path of training dataset
    ffm_model.setValidate(valid_file)  # Set the path of validation dataset

    # Parameters:
    #  0. task: binary classification
    #  1. learning rate: 0.2
    #  2. regular lambda: 0.002
    #  3. evaluation metric: auc
    #  4. number of epochs: 10
    #  5. optimization method: sgd
    param = {