def get_similar_user(user_id, nb_user=6): user_id = int(user_id) factors = load_pickle('save/user_factors.pkl') distances = torch.nn.CosineSimilarity(dim=1)(factors, factors[user_id][None]) idx = distances.argsort(descending=True)[1:nb_user + 1].numpy() return list(idx)
def get_similar_book(title, series=None, authors=None, volume_number=None, nb_books=15): book_id = get_book_id(title, series, authors, volume_number) factors = load_pickle('save/book_factors.pkl') distances = torch.nn.CosineSimilarity(dim=1)(factors, factors[book_id][None]) idx = distances.argsort(descending=True)[1:nb_books + 1] idx = idx[1:nb_books + 1] titles = pd.read_csv('save/titles_by_book_id.csv', header=None)[0].values return list(titles[idx])
def tabular_predict_from_nn(tab_fn, weights_fn, xs=None): #learner = load_learner(model_fn) to_nn = fasttab.load_pickle(tab_fn) dls = to_nn.dataloaders(1024) learn = fasttab.tabular_learner( dls, metrics=fasttab.accuracy ) #BrierScore doesn't seem to work with lr_find() learn.load(weights_fn) if not isinstance(xs, pd.DataFrame): return learn dl = learn.dls.test_dl(xs, bs=64) # apply transforms preds, _ = learn.get_preds(dl=dl) # get prediction return preds
def get_rating_user_book(user_id, title, series=None, authors=None, volume_number=None): user_id = int(user_id) book_id = get_book_id(title, series, authors, volume_number) model = load_pickle('save/nn_collab.pkl') data = pd.read_csv('save/book_nn_data.csv') data.set_index('book_id', drop=False, inplace=True) row = data.loc[[book_id]].copy() row['user_id'] = user_id dl = model.dls.test_dl(row) rating, _ = model.get_preds(dl=dl) return rating.item()
def get_book_avg(author, pub_year, language_code, genres="", series=None, volume_number=None): genres = genres.split(',') model = load_pickle('save/avg_rating_nn.pkl') all_genres = pd.read_csv('save/genres.csv', header=None)[0].values row = {genre: False for genre in all_genres} for genre in genres: row[genre] = True row["authors"] = author row["original_publication_year"] = int(pub_year) row["language_code"] = language_code row["series"] = series row["volume_number"] = volume_number *_, rating = model.predict(pd.Series(row)) return rating.item()