def test_DKN_iterator(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "dkn") data_file = os.path.join(data_path, r"train_mind_demo.txt") news_feature_file = os.path.join(data_path, r"doc_feature.txt") user_history_file = os.path.join(data_path, r"user_history.txt") wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy") entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy") contextEmb_file = os.path.join(data_path, "TransE_context2vec_100.npy") yaml_file = os.path.join(data_path, "dkn.yaml") download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "mind-demo.zip", ) hparams = prepare_hparams( yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file="", entityEmb_file="", contextEmb_file="", ) iterator = DKNTextIterator(hparams, tf.Graph()) assert iterator is not None for res, impression, data_size in iterator.load_data_from_file(data_file): assert isinstance(res, dict) ### test DKN item2item iterator hparams = prepare_hparams( yaml_file, news_feature_file=news_feature_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, is_clip_norm=True, max_grad_norm=0.5, his_size=20, MODEL_DIR=os.path.join(data_path, "save_models"), use_entity=True, use_context=True, ) hparams.neg_num = 9 iterator_item2item = DKNItem2itemTextIterator(hparams, tf.Graph()) assert iterator_item2item is not None test_round = 3 for res, impression, data_size in iterator_item2item.load_data_from_file( os.path.join(data_path, "doc_list.txt") ): assert isinstance(res, dict) test_round -= 1 if test_round <= 0: break
def test_dkn_component_definition(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "dkn") yaml_file = os.path.join(data_path, "dkn.yaml") news_feature_file = os.path.join(data_path, r"doc_feature.txt") user_history_file = os.path.join(data_path, r"user_history.txt") wordEmb_file = os.path.join(data_path, r"word_embeddings_100.npy") entityEmb_file = os.path.join(data_path, r"TransE_entity2vec_100.npy") contextEmb_file = os.path.join(data_path, r"TransE_context2vec_100.npy") download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "mind-demo.zip", ) hparams = prepare_hparams( yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, learning_rate=0.0001, ) assert hparams is not None model = DKN(hparams, DKNTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None ### test DKN's item2item version hparams = prepare_hparams( yaml_file, news_feature_file=news_feature_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, is_clip_norm=True, max_grad_norm=0.5, his_size=20, MODEL_DIR=os.path.join(data_path, "save_models"), use_entity=True, use_context=True, ) hparams.neg_num = 9 assert hparams is not None model_item2item = DKNItem2Item(hparams, DKNItem2itemTextIterator) assert model_item2item.pred_logits is not None assert model_item2item.update is not None assert model_item2item.iterator is not None
def test_dkn_component_definition(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") yaml_file = os.path.join(data_path, "dkn.yaml") wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy") entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "dknresources.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, epochs=5, learning_rate=0.0001, ) assert hparams is not None model = DKN(hparams, DKNTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None
def test_dkn_component_definition(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") yaml_file = os.path.join(data_path, "dkn.yaml") wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy") entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "dknresources.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, epochs=1, learning_rate=0.0001, ) assert hparams is not None model = DKN(hparams, DKNTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None
def test_model_lightgcn(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") yaml_file = os.path.join( resource_path, "..", "..", "reco_utils", "recommender", "deeprec", "config", "lightgcn.yaml", ) user_file = os.path.join(data_path, r"user_embeddings.csv") item_file = os.path.join(data_path, r"item_embeddings.csv") df = movielens.load_pandas_df(size="100k") train, test = python_stratified_split(df, ratio=0.75) data = ImplicitCF(train=train, test=test) hparams = prepare_hparams(yaml_file, epochs=1) model = LightGCN(hparams, data) assert model.run_eval() is not None model.fit() assert model.recommend_k_items(test) is not None model.infer_embedding(user_file, item_file) assert os.path.getsize(user_file) != 0 assert os.path.getsize(item_file) != 0
def test_model_dkn(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") yaml_file = os.path.join(data_path, r'dkn.yaml') train_file = os.path.join(data_path, r'train_mind_demo.txt') valid_file = os.path.join(data_path, r'valid_mind_demo.txt') test_file = os.path.join(data_path, r'test_mind_demo.txt') news_feature_file = os.path.join(data_path, r'doc_feature.txt') user_history_file = os.path.join(data_path, r'user_history.txt') wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy') entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy') contextEmb_file = os.path.join(data_path, r'TransE_context2vec_100.npy') download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "mind-demo.zip", ) hparams = prepare_hparams(yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, learning_rate=0.0001) input_creator = DKNTextIterator model = DKN(hparams, input_creator) assert isinstance(model.fit(train_file, valid_file), BaseModel) assert model.run_eval(valid_file) is not None
def test_model_dkn(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") yaml_file = os.path.join(data_path, "dkn.yaml") train_file = os.path.join(data_path, "final_test_with_entity.txt") valid_file = os.path.join(data_path, "final_test_with_entity.txt") wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy") entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "dknresources.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, epochs=1, learning_rate=0.0001, ) input_creator = DKNTextIterator model = DKN(hparams, input_creator) assert isinstance(model.fit(train_file, valid_file), BaseModel) assert model.run_eval(valid_file) is not None
def test_lightgcn_component_definition(resource_path): yaml_file = os.path.join( resource_path, "..", "..", "reco_utils", "recommender", "deeprec", "config", "lightgcn.yaml", ) df = movielens.load_pandas_df(size="100k") train, test = python_stratified_split(df, ratio=0.75) data = ImplicitCF(train=train, test=test) embed_size = 64 hparams = prepare_hparams(yaml_file, embed_size=embed_size) model = LightGCN(hparams, data) assert model.norm_adj is not None assert model.ua_embeddings.shape == [data.n_users, embed_size] assert model.ia_embeddings.shape == [data.n_items, embed_size] assert model.u_g_embeddings is not None assert model.pos_i_g_embeddings is not None assert model.neg_i_g_embeddings is not None assert model.batch_ratings is not None assert model.loss is not None assert model.opt is not None
def test_dkn_component_definition(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") yaml_file = os.path.join(data_path, "dkn.yaml") news_feature_file = os.path.join(data_path, r'doc_feature.txt') user_history_file = os.path.join(data_path, r'user_history.txt') wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy') entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy') contextEmb_file = os.path.join(data_path, r'TransE_context2vec_100.npy') download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "mind-demo.zip", ) hparams = prepare_hparams(yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, learning_rate=0.0001) assert hparams is not None model = DKN(hparams, DKNTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None
def test_model_dkn(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") yaml_file = os.path.join(data_path, "dkn.yaml") train_file = os.path.join(data_path, "final_test_with_entity.txt") valid_file = os.path.join(data_path, "final_test_with_entity.txt") wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy") entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "dknresources.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, epochs=1, learning_rate=0.0001, ) input_creator = DKNTextIterator model = DKN(hparams, input_creator) assert isinstance(model.fit(train_file, valid_file), BaseModel) assert model.run_eval(valid_file) is not None
def test_Sequential_Iterator(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "slirec") yaml_file = os.path.join( resource_path, "..", "..", "reco_utils", "recommender", "deeprec", "config", "sli_rec.yaml", ) train_file = os.path.join(data_path, r"train_data") if not os.path.exists(train_file): valid_file = os.path.join(data_path, r"valid_data") test_file = os.path.join(data_path, r"test_data") user_vocab = os.path.join(data_path, r"user_vocab.pkl") item_vocab = os.path.join(data_path, r"item_vocab.pkl") cate_vocab = os.path.join(data_path, r"category_vocab.pkl") reviews_name = "reviews_Movies_and_TV_5.json" meta_name = "meta_Movies_and_TV.json" reviews_file = os.path.join(data_path, reviews_name) meta_file = os.path.join(data_path, meta_name) valid_num_ngs = ( 4 # number of negative instances with a positive instance for validation ) test_num_ngs = ( 9 # number of negative instances with a positive instance for testing ) sample_rate = ( 0.01 # sample a small item set for training and testing here for example ) input_files = [ reviews_file, meta_file, train_file, valid_file, test_file, user_vocab, item_vocab, cate_vocab, ] download_and_extract(reviews_name, reviews_file) download_and_extract(meta_name, meta_file) data_preprocessing( *input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs ) hparams = prepare_hparams(yaml_file) iterator = SequentialIterator(hparams, tf.Graph()) assert iterator is not None for res in iterator.load_data_from_file(train_file): assert isinstance(res, dict)
def test_model_sum(deeprec_resource_path, deeprec_config_path): data_path = os.path.join(deeprec_resource_path, "slirec") yaml_file = os.path.join(deeprec_config_path, "sum.yaml") train_file = os.path.join(data_path, r"train_data") valid_file = os.path.join(data_path, r"valid_data") test_file = os.path.join(data_path, r"test_data") output_file = os.path.join(data_path, "output.txt") train_num_ngs = ( 4 # number of negative instances with a positive instance for training ) valid_num_ngs = ( 4 # number of negative instances with a positive instance for validation ) test_num_ngs = ( 9 # number of negative instances with a positive instance for testing ) if not os.path.exists(train_file): user_vocab = os.path.join(data_path, r"user_vocab.pkl") item_vocab = os.path.join(data_path, r"item_vocab.pkl") cate_vocab = os.path.join(data_path, r"category_vocab.pkl") reviews_name = "reviews_Movies_and_TV_5.json" meta_name = "meta_Movies_and_TV.json" reviews_file = os.path.join(data_path, reviews_name) meta_file = os.path.join(data_path, meta_name) sample_rate = ( 0.005 # sample a small item set for training and testing here for example ) input_files = [ reviews_file, meta_file, train_file, valid_file, test_file, user_vocab, item_vocab, cate_vocab, ] download_and_extract(reviews_name, reviews_file) download_and_extract(meta_name, meta_file) data_preprocessing(*input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs) hparams = prepare_hparams(yaml_file, learning_rate=0.01, epochs=1, train_num_ngs=train_num_ngs) assert hparams is not None input_creator = SequentialIterator model = SUMModel(hparams, input_creator) assert model.run_eval(valid_file, num_ngs=valid_num_ngs) is not None assert isinstance( model.fit(train_file, valid_file, valid_num_ngs=valid_num_ngs), BaseModel) assert model.predict(valid_file, output_file) is not None
def test_prepare_hparams(must_exist_attributes, resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file) assert hasattr(hparams, must_exist_attributes)
def test_prepare_hparams(deeprec_resource_path, must_exist_attributes): data_path = os.path.join(deeprec_resource_path, "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file) assert hasattr(hparams, must_exist_attributes)
def train(self, dataset: RecommendationDataset) -> None: hparams = prepare_hparams( "./recommenders/reco_utils/recommender/deeprec/config/lightgcn.yaml", n_layers=3, batch_size=1024, epochs=self.epochs, learning_rate=0.005, top_k=self.top_size, ) self.model = LightGCN(hparams, self._wrap_dataset(dataset), seed=42) self.model.fit()
def test_DKN_iterator(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") data_file = os.path.join(data_path, "final_test_with_entity.txt") yaml_file = os.path.join(data_path, "dkn.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "dknresources.zip", ) hparams = prepare_hparams(yaml_file, wordEmb_file="", entityEmb_file="") iterator = DKNTextIterator(hparams, tf.Graph()) assert iterator is not None for res in iterator.load_data_from_file(data_file): assert isinstance(res, dict)
def test_FFM_iterator(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") data_file = os.path.join(data_path, "sample_FFM_data.txt") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file) iterator = FFMTextIterator(hparams, tf.Graph()) assert iterator is not None for res in iterator.load_data_from_file(data_file): assert isinstance(res, dict)
def test_xdeepfm_component_definition(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file) model = XDeepFMModel(hparams, FFMTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None
def test_xdeepfm_component_definition(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file) model = XDeepFMModel(hparams, FFMTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None
def test_FFM_iterator(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") data_file = os.path.join(data_path, "sample_FFM_data.txt") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file) iterator = FFMTextIterator(hparams, tf.Graph()) assert iterator is not None for res in iterator.load_data_from_file(data_file): assert isinstance(res, tuple)
def test_model_xdeepfm(resource_path): data_path = os.path.join(resource_path, '../resources/deeprec/xdeepfm') yaml_file = os.path.join(data_path, r'xDeepFM.yaml') data_file = os.path.join(data_path, r'sample_FFM_data.txt') output_file = os.path.join(data_path, r'output.txt') if not os.path.exists(yaml_file): download_deeprec_resources( r'https://recodatasets.blob.core.windows.net/deeprec/', data_path, 'xdeepfmresources.zip') hparams = prepare_hparams(yaml_file, learning_rate=0.01) assert hparams is not None input_creator = FFMTextIterator model = XDeepFMModel(hparams, input_creator) assert model.run_eval(data_file) is not None assert isinstance(model.fit(data_file, data_file), BaseModel) assert model.predict(data_file, output_file) is not None
def test_model_xdeepfm(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") data_file = os.path.join(data_path, "sample_FFM_data.txt") output_file = os.path.join(data_path, "output.txt") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file, learning_rate=0.01) assert hparams is not None input_creator = FFMTextIterator model = XDeepFMModel(hparams, input_creator) assert model.run_eval(data_file) is not None assert isinstance(model.fit(data_file, data_file), BaseModel) assert model.predict(data_file, output_file) is not None
def test_model_xdeepfm(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") data_file = os.path.join(data_path, "sample_FFM_data.txt") output_file = os.path.join(data_path, "output.txt") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file, learning_rate=0.01) assert hparams is not None input_creator = FFMTextIterator model = XDeepFMModel(hparams, input_creator) assert model.run_eval(data_file) is not None assert isinstance(model.fit(data_file, data_file), BaseModel) assert model.predict(data_file, output_file) is not None
def test_model_dkn(resource_path): data_path = os.path.join(resource_path, '../resources/deeprec/dkn') yaml_file = os.path.join(data_path, r'dkn.yaml') train_file = os.path.join(data_path, r'final_test_with_entity.txt') valid_file = os.path.join(data_path, r'final_test_with_entity.txt') wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy') entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy') if not os.path.exists(yaml_file): download_deeprec_resources( r'https://recodatasets.blob.core.windows.net/deeprec/', data_path, 'dknresources.zip') hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, epochs=1, learning_rate=0.0001) input_creator = DKNTextIterator model = DKN(hparams, input_creator) assert (isinstance(model.fit(train_file, valid_file), BaseModel)) assert model.run_eval(valid_file) is not None
def test_DKN_iterator(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") data_file = os.path.join(data_path, r"train_mind_demo.txt") news_feature_file = os.path.join(data_path, r"doc_feature.txt") user_history_file = os.path.join(data_path, r"user_history.txt") yaml_file = os.path.join(data_path, "dkn.yaml") download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "mind-demo.zip", ) hparams = prepare_hparams( yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file="", entityEmb_file="", contextEmb_file="", ) iterator = DKNTextIterator(hparams, tf.Graph()) assert iterator is not None for res, impression, data_size in iterator.load_data_from_file(data_file): assert isinstance(res, dict)
# news_entities, # train_entities, # valid_entities, # max_sentence=10, # word_embedding_dim=100, # ) news_feature_file = os.path.join(data_path, 'doc_feature.txt') word_embeddings_file = os.path.join(data_path, 'word_embeddings_5w_100.npy') user_history_file = os.path.join(data_path, 'user_history.txt') entity_embeddings_file = os.path.join(data_path, 'entity_embeddings_5w_100.npy') yaml_file = os.path.join(data_path, 'dkn_MINDlarge.yaml') # yaml_file = maybe_download(url="https://recodatasets.blob.core.windows.net/deeprec/deeprec/dkn/dkn_MINDsmall.yaml", # work_directory=data_path) hparams = prepare_hparams(yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file=word_embeddings_file, entityEmb_file=entity_embeddings_file, epochs=epochs, history_size=history_size, MODEL_DIR=os.path.join(data_path, 'save_models'), batch_size=batch_size) model = DKN(hparams, DKNTextIterator) model.fit(train_file, valid_file) res = model.run_eval(valid_file) print(res)
def test_slirec_component_definition(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "slirec") yaml_file = os.path.join( resource_path, "..", "..", "reco_utils", "recommender", "deeprec", "config", "sli_rec.yaml", ) yaml_file_nextitnet = os.path.join( resource_path, "..", "..", "reco_utils", "recommender", "deeprec", "config", "nextitnet.yaml", ) train_file = os.path.join(data_path, r"train_data") if not os.path.exists(train_file): train_file = os.path.join(data_path, r"train_data") valid_file = os.path.join(data_path, r"valid_data") test_file = os.path.join(data_path, r"test_data") user_vocab = os.path.join(data_path, r"user_vocab.pkl") item_vocab = os.path.join(data_path, r"item_vocab.pkl") cate_vocab = os.path.join(data_path, r"category_vocab.pkl") reviews_name = "reviews_Movies_and_TV_5.json" meta_name = "meta_Movies_and_TV.json" reviews_file = os.path.join(data_path, reviews_name) meta_file = os.path.join(data_path, meta_name) valid_num_ngs = ( 4 # number of negative instances with a positive instance for validation ) test_num_ngs = ( 9 # number of negative instances with a positive instance for testing ) sample_rate = ( 0.01 # sample a small item set for training and testing here for example ) input_files = [ reviews_file, meta_file, train_file, valid_file, test_file, user_vocab, item_vocab, cate_vocab, ] download_and_extract(reviews_name, reviews_file) download_and_extract(meta_name, meta_file) data_preprocessing(*input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs) hparams = prepare_hparams( yaml_file, train_num_ngs=4 ) # confirm the train_num_ngs when initializing a SLi_Rec model. model = SLI_RECModel(hparams, SequentialIterator) # nextitnet model hparams_nextitnet = prepare_hparams(yaml_file_nextitnet, train_num_ngs=4) model_nextitnet = NextItNetModel(hparams_nextitnet, NextItNetIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None assert model_nextitnet.logit is not None assert model_nextitnet.update is not None assert model_nextitnet.iterator is not None
wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy') entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy') contextEmb_file = os.path.join(data_path, r'TransE_context2vec_100.npy') if not os.path.exists(yaml_file): download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/deeprec/', data_path1, 'mind-demo-dkn.zip') ## Create hyper-parameters epochs = 10 history_size = 50 batch_size = 100 hparams = prepare_hparams(yaml_file, news_feature_file = news_feature_file, user_history_file = user_history_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=epochs, history_size=history_size, batch_size=batch_size) print(hparams) ## Train the DKN model model = DKN(hparams, DKNTextIterator) print(model.run_eval(valid_file)) model.fit(train_file, valid_file) ## Evaluate the DKN model res = model.run_eval(test_file)
yaml_file = os.path.join(data_path, r'xDeepFM.yaml') train_file = os.path.join(data_path, r'synthetic_part_0') valid_file = os.path.join(data_path, r'synthetic_part_1') test_file = os.path.join(data_path, r'synthetic_part_2') output_file = os.path.join(data_path, r'output.txt') if not os.path.exists(yaml_file): download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/deeprec/', data_path, 'xdeepfmresources.zip') print("Data gathering complete") # 1. prepare hyper-parameters hparams = prepare_hparams(yaml_file, FEATURE_COUNT=1000, FIELD_COUNT=10, cross_l2=0.0001, embed_l2=0.0001, learning_rate=0.001, epochs=EPOCHS, batch_size=BATCH_SIZE) print("Hyper-parameters: ") print(hparams) # 2. create data loader # designate a data iterator for xDeepFM model (FFMTextIterator) input_creator = FFMTextIterator # 3. create model model = XDeepFMModel(hparams, input_creator, seed=RANDOM_SEED) # we can also load a pre-trained model with model.load_model(r'model_path') # untrained model's performance print("Untrained model's performance: {}".format(model.run_eval(test_file)))
test_file = os.path.join(data_path, r'criteo_tiny_test') if not os.path.exists(yaml_file): download_deeprec_resources( r'https://recodatasets.blob.core.windows.net/deeprec/', data_path, 'xdeepfmresources.zip') # set hyper-parameters hparams = prepare_hparams(yaml_file, FEATURE_COUNT=2300000, FIELD_COUNT=39, cross_l2=0.01, embed_l2=0.01, layer_l2=0.01, learning_rate=0.002, batch_size=BATCH_SIZE, epochs=EPOCHS, cross_layer_sizes=[20, 10], init_value=0.1, layer_sizes=[20, 20], use_Linear_part=True, use_CIN_part=True, use_DNN_part=True) # make model model = XDeepFMModel(hparams, FFMTextIterator, seed=RANDOM_SEED) # train model model.fit(train_file, valid_file) # profiling model.train_timeliner.save('xDeepFM-timeliner.json')
def train_lightgcn(params, data): hparams = prepare_hparams(**params) model = LightGCN(hparams, data) with Timer() as t: model.fit() return model, t