def test_fitting(): users, items = 10, 100 dataset = Dataset() dataset.fit(range(users), range(items)) assert dataset.interactions_shape() == (users, items) assert dataset.user_features_shape() == (users, users) assert dataset.item_features_shape() == (items, items) assert dataset.build_interactions([])[0].shape == (users, items) assert dataset.build_user_features([]).getnnz() == users assert dataset.build_item_features([]).getnnz() == items
def test_fitting_no_identity(): users, items = 10, 100 dataset = Dataset(user_identity_features=False, item_identity_features=False) dataset.fit(range(users), range(items)) assert dataset.interactions_shape() == (users, items) assert dataset.user_features_shape() == (users, 0) assert dataset.item_features_shape() == (items, 0) assert dataset.build_interactions([])[0].shape == (users, items) assert dataset.build_user_features([], normalize=False).getnnz() == 0 assert dataset.build_item_features([], normalize=False).getnnz() == 0
user_feature_names = list(user_feature_df)[1:] user_feature_df = user_feature_df[user_feature_df['userCode'].isin(unique_user)] user_feature_iterable = ((row['userCode'], {feature_name: row[feature_name] for feature_name in user_feature_names})for index, row in user_feature_df.iterrows()) # fit dataset dataset.fit(users=user_iterable, items=iteam_iterable, user_features=user_feature_names, item_features=item_feature_names ) # check shape num_users, num_items = dataset.interactions_shape() print('Num users: {}, num_items: {}.'.format(num_users, num_items)) _, num_users_feature = dataset.user_features_shape() _, num_items_feature = dataset.item_features_shape() print('Num users feature: {}, num_items feature: {}.'.format(num_users_feature, num_items_feature)) # build user feature matrix user_feature_matrix = dataset.build_user_features(user_feature_iterable, normalize=True) # build item feature matrix item_feature_matrix = dataset.build_item_features(item_feature_iterable, normalize=True) # build interaction (train_interactions, weights) = dataset.build_interactions(data=((row['userCode'], row['project_id'], row[interaction_col_name])for index, row in train.iterrows() if row['project_id'] not in ignore_project)) from lightfm import LightFM model = LightFM(loss='warp', random_state=44, learning_schedule='adagrad') model.fit(train_interactions,
item_meta = pd.read_csv('data/books.csv') item_meta = item_meta[['book_id', 'authors', 'average_rating', 'original_title']] item_features_source = [(item_meta['book_id'][i], [item_meta['authors'][i], item_meta['average_rating'][i]]) for i in range(item_meta.shape[0])] # Construct Data-set # set, list, pandas series 모두 가능하다. # 먼저 User/Item Index를 mapping하고, User Features/Item Features를 추가한 후 # occurence 데이터를 fit한다. # 혹은 scipy.csr_matrix를 바로 fit하는 것도 가능하다. # 주의: Null 값은 다 채운 후여야 한다. dataset = Dataset() dataset.fit(users=ratings['user_id'].unique(), items=ratings['book_id'].unique(), item_features=item_meta[item_meta.columns[1:]].values.flatten() ) print("Num Users: {}, Num Items: {}".format(*dataset.interactions_shape())) print(dataset.user_features_shape(), dataset.item_features_shape()) interactions, weights = dataset.build_interactions(ratings_source) item_features = dataset.build_item_features(item_features_source) # mappings = dataset.mapping() # Save # mmwrite('data/interactions.mtx', interactions) # mmwrite('data/item_features.mtx', item_features) # mmwrite('data/weights.mtx', weights)