import pandas as pd import numpy as np import datetime import time import math import util start = time.time() sparse_dict = util.create_sparse_matrix("new.csv", replace=False) print("time", time.time() - start)
import pandas as pd import util DATAFRAME_NAME = './new.csv' df = util.create_sparse_matrix(DATAFRAME_NAME, replace=False).iloc[:2000,:] n_users, n_movies = len(df.user_id.unique()), len(df.hotel_id.unique()) print(n_movies) user_index_map = {ids:i for (ids, i) in zip(list(set(df['user_id'])),list(range(n_users)))} df['user_id'] = df['user_id'].apply(lambda x: user_index_map[x]) hotel_index_map = {ids:i for (ids, i) in zip(list(set(df['hotel_id'])),list(range(n_movies)))} df['hotel_id'] = df['hotel_id'].apply(lambda x: hotel_index_map[x]) df.to_csv("data.csv")
import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.metrics import mean_absolute_error import matplotlib.pyplot as plt import keras import util import pickle import os DATAFRAME_NAME = './new.csv' df = util.create_sparse_matrix(DATAFRAME_NAME, replace=True) n_users, n_movies = len(df.user_id.unique()), len(df.hotel_id.unique()) user_index_map = { ids: i for (ids, i) in zip(list(set(df['user_id'])), list(range(n_users))) } df['user_id'] = df['user_id'].apply(lambda x: user_index_map[x]) hotel_index_map = { ids: i for (ids, i) in zip(list(set(df['hotel_id'])), list(range(n_movies))) } df['hotel_id'] = df['hotel_id'].apply(lambda x: hotel_index_map[x]) print(df.head()) train, test = train_test_split(df, test_size=0.2) with open('user_id_map.pkl', "wb") as handle: