def get_user_profile(user_id): session = client.create_session() client.create_keyspace(session, keyspace) session.set_keyspace(keyspace) session.row_factory = dict_factory ratings = client.get_data_table(session, keyspace, table) if ratings.empty: return empty_json ratings = ratings.drop(columns="rating_id") avg_ratings = calculate_avg_ratings(ratings, converted_genres) avg_user_ratings = pd.read_json(get_user_avg_rating(user_id), orient='records') appended_avg_ratings = avg_ratings.append(avg_user_ratings, ignore_index=True, sort=False) profile = pd.DataFrame(columns=converted_genres) # FIXME: Change profile profile.loc[0] = [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] for genre in converted_genres: profile.iloc[0, profile.columns.get_loc(genre)] = appended_avg_ratings.iloc[0][genre] - \ appended_avg_ratings.iloc[1][genre] return profile.to_json(orient='records')
def delete_rating(json_rating): session = client.create_session() client.create_keyspace(session, keyspace) session.set_keyspace(keyspace) session.row_factory = dict_factory rating = pd.read_json(json_rating, orient=records) client.delete_rows(session, keyspace, table, rating) return json_rating
def add_rating(json_rating): session = client.create_session() client.create_keyspace(session, keyspace) session.set_keyspace(keyspace) session.row_factory = dict_factory new_rating = pd.read_json(json_rating, orient=records) client.push_data_table(session, keyspace, table, new_rating) return json_rating
def get_ratings(): session = client.create_session() client.create_keyspace(session, keyspace) session.set_keyspace(keyspace) session.row_factory = dict_factory df = client.get_data_table(session, keyspace, table) if df.empty: return empty_json df = df.drop(columns="rating_id") return df.to_json(orient='records')
def get_user_avg_rating(user_id): session = client.create_session() client.create_keyspace(session, keyspace) session.set_keyspace(keyspace) session.row_factory = dict_factory ratings = client.get_data_table(session, keyspace, table) ratings = ratings.sort_values(by=['userid']) ratings.set_index('userid') ratings = ratings[int(user_id) == ratings['userid']] return calculate_avg_ratings(ratings, converted_genres).to_json(orient=records)
def get_avg_ratings(): session = client.create_session() client.create_keyspace(session, keyspace) session.set_keyspace(keyspace) session.row_factory = dict_factory ratings = client.get_data_table(session, keyspace, table) if ratings.empty: return empty_json ratings = ratings.drop(columns="rating_id") return calculate_avg_ratings(ratings, converted_genres).to_json(orient=records)
def __init__(self): self.genres = wtiproj03_ETL.get_genres_list() self.keyspace = "user_ratings" self.rating_table = "rating" self.user_avg_table = "user_avg" self.all_avg_table = "all_avg" self.cluster = Cluster(['127.0.0.1'], port=9042) self.session = self.cluster.connect() wtiproj06_cassandra_client.create_keyspace(self.session, self.keyspace) wtiproj06_cassandra_client.create_rating_table(self.session, self.keyspace, self.rating_table) wtiproj06_cassandra_client.create_user_avg_table( self.session, self.keyspace, self.user_avg_table) wtiproj06_cassandra_client.create_all_avg_table( self.session, self.keyspace, self.all_avg_table) self.session.set_keyspace(self.keyspace) self.session.row_factory = dict_factory
def init(): np.set_printoptions(threshold=sys.maxsize) users = pd.read_csv("user_ratedmovies.dat", header=0, delimiter="\t", usecols=['userID', 'movieID', 'rating']) movies = pd.read_csv("movie_genres.dat", header=0, delimiter="\t", usecols=['movieID', 'genre']) joinedTable = join_tables(users, movies, 'movieID') mergedTable, genres = build_dataframe(joinedTable, movies) session = client.create_session() client.create_keyspace(session, keyspace) session.set_keyspace(keyspace) session.row_factory = dict_factory client.push_data_table(session, keyspace, table, mergedTable)