def test_predict_recommend(self): def gen_rand_user_item_feature(user_num, item_num, class_num): user_id = random.randint(1, user_num) item_id = random.randint(1, item_num) rating = random.randint(1, class_num) sample = Sample.from_ndarray(np.array([user_id, item_id]), np.array([rating])) return UserItemFeature(user_id, item_id, sample) model = NeuralCF(200, 80, 5) data = self.sc.parallelize(range(0, 50))\ .map(lambda i: gen_rand_user_item_feature(200, 80, 5)) predictions = model.predict_user_item_pair(data).collect() print(predictions[0]) recommended_items = model.recommend_for_user(data, max_items=3).collect() print(recommended_items[0]) recommended_users = model.recommend_for_item(data, max_users=4).collect() print(recommended_users[0])
def _get_embed_ncf(self): user_max = self._config["user_max"] ncf = NeuralCF(user_count=user_max, item_count=self._config["movie_max"], class_num=self._config["rate_dim"], hidden_layers=[20, 10], include_mf=False) loaded = ncf.load_model(self._config["ncf_model_path"]) user_embed = loaded.get_weights()[0] item_embed = loaded.get_weights()[1] user_dict = {} for i in range(1, self._config["user_max"] + 1): user_dict[i] = user_embed[i][:] item_dict = {} for i in range(1, self._config["movie_max"] + 1): item_dict[i] = item_embed[i][:] return (user_dict, item_dict)
def build_sample(user_id, item_id, rating): sample = Sample.from_ndarray(np.array([user_id, item_id]), np.array([rating])) return UserItemFeature(user_id, item_id, sample) pairFeatureRdds = sc.parallelize(movielens_data)\ .map(lambda x: build_sample(x[0], x[1], x[2]-1)) pairFeatureRdds.take(3) trainPairFeatureRdds, valPairFeatureRdds = pairFeatureRdds.randomSplit([0.8, 0.2], seed= 1) valPairFeatureRdds.cache() train_rdd= trainPairFeatureRdds.map(lambda pair_feature: pair_feature.sample) val_rdd= valPairFeatureRdds.map(lambda pair_feature: pair_feature.sample) val_rdd.persist() ncf = NeuralCF(user_count=max_user_id, item_count=max_movie_id, class_num=5, hidden_layers=[20, 10], include_mf = False) ncf.compile(optimizer= "adam", loss= "sparse_categorical_crossentropy", metrics=['accuracy']) ncf.fit(train_rdd, nb_epoch= 10, batch_size= 8000, validation_data=val_rdd) ncf.save_model("../save_model/movie_ncf.zoomodel", over_write=True) # weights = ncf.get_weights()
def test_save_load(self): model = NeuralCF(10000, 2000, 10) input_data = np.random.randint(1500, size=(300, 2)) self.assert_save_load(model, input_data)
def test_forward_backward_with_mf(self): model = NeuralCF(10, 10, 5, 5, 5) input_data = np.random.randint(10, size=(3, 2)) self.assert_forward_backward(model, input_data)
def test_forward_backward_without_mf(self): model = NeuralCF(30, 12, 2, include_mf=False) input_data = np.random.randint(10, size=(10, 2)) self.assert_forward_backward(model, input_data)
def test_compile_fit(self): def gen_rand_user_item_feature(user_num, item_num, class_num): user_id = random.randint(1, user_num) item_id = random.randint(1, item_num) rating = random.randint(1, class_num) sample = Sample.from_ndarray(np.array([user_id, item_id]), np.array([rating])) return UserItemFeature(user_id, item_id, sample) model = NeuralCF(200, 80, 5) model.summary() data = self.sc.parallelize(range(0, 50)) \ .map(lambda i: gen_rand_user_item_feature(200, 80, 5)) \ .map(lambda pair: pair.sample) model.compile( optimizer="adam", loss=SparseCategoricalCrossEntropy(zero_based_label=False), metrics=['accuracy']) tmp_log_dir = create_tmp_path() model.set_tensorboard(tmp_log_dir, "training_test") model.fit(data, nb_epoch=1, batch_size=32, validation_data=data) train_loss = model.get_train_summary("Loss") val_loss = model.get_validation_summary("Loss") print(np.array(train_loss)) print(np.array(val_loss))
def test_forward_backward_without_mf(self): model = NeuralCF(30, 12, 2, include_mf=False) model.summary() input_data = np.random.randint(10, size=(10, 2)) self.assert_forward_backward(model, input_data)
def build_sample(user_id, item_id, rating): sample = Sample.from_ndarray(np.array([user_id, item_id]), np.array([rating])) return UserItemFeature(user_id, item_id, sample) pairFeatureRdds = sc.parallelize(movielens_data).map(lambda x: build_sample(x[0], x[1],x[2])) pairFeatureRdds.take(3) # Randomly split the data into train (80%) and validation (20%) trainPairFeatureRdds, valPairFeatureRdds = pairFeatureRdds.randomSplit([0.8, 0.2], seed= 1) valPairFeatureRdds.cache() train_rdd= trainPairFeatureRdds.map(lambda pair_feature: pair_feature.sample) val_rdd= valPairFeatureRdds.map(lambda pair_feature: pair_feature.sample) print(train_rdd.count()) train_rdd.take(3) # Build Model ncf = NeuralCF(user_count=max_user_id, item_count=max_movie_id, class_num=5, hidden_layers=[20, 10], include_mf = False) # Setup the Optimizer optimizer = Optimizer( model=ncf, training_rdd=train_rdd, criterion=ClassNLLCriterion(), end_trigger=MaxEpoch(10), batch_size=2800, optim_method=Adam(learningrate=0.001)) optimizer.set_validation( batch_size=2800, val_rdd=val_rdd, trigger=EveryEpoch(), val_method=[MAE(), Loss(ClassNLLCriterion())]