def get(self, bibcode): stime = time.time() try: results = get_recommendations(bibcode) except Exception, err: current_app.logger.error('Recommender exception (%s): %s'%(bibcode, err)) return {'msg': 'Unable to get results! (%s)' % err}, 500
def run_recommender(): """ Recommender page Presents user with recommedations based on inputs Also displays which films in the db the user's inputs were matched to """ n_recommendations = 5 selections = [ request.args[f'movie_{n+1}'] for n in range(5) if request.args[f'movie_{n+1}'] != "" ] n_selections = len(selections) context = dict(title="Your recommendations:") if n_selections > 0: result = recommender.get_recommendations(selections, RATING_MATRIX, MODEL, n_recommendations) result, interpreted_choices = result interpretation_message = "Based on our interpretation of your choices" else: result = ["Parasite"] interpreted_choices = [] interpretation_message = "" context['movies'] = result context['interpreted_choices'] = interpreted_choices context['interpretation_message'] = interpretation_message context['user_input'] = selections context['inputs_to_interpretations'] = list( zip(selections, interpreted_choices)) return render_template('recommendation.html', **context)
def evaluation_memory_based_user(matrix, similarities, neighbours, masked_items, user, n): """Generate Recall and Precision@N for a specific User Arguments: matrix {ndarray} -- User-Item Matrix similarities {ndarray} -- User Similarity Matrix neighbours {ndarray} -- List of N Neighbours for each User in User-Item Matrix masked_items {ndarray} -- List of Arrays with masked User-Items user {int} -- Index Number of User n {int} -- Number of Recommendations to be returned / measured by Returns: string -- Result of Recall@N Value for specific User """ # calculate recommendations rec = get_recommendations(matrix, similarities, neighbours, user, n) rec = list(rec['item']) # get the items which were masked for the user rating_index = np.where(masked_items[:, 0] == user) # get list of masked items items_masked = list(masked_items[rating_index, :][0][:, 1]) # compare both list and return lenght of matches hits = len(set(rec).intersection(items_masked)) recall = hits / len(items_masked) precision = hits / n return precision, recall
def get(self, bibcode): stime = time.time() try: results = get_recommendations(bibcode) except Exception, err: current_app.logger.error('Recommender exception (%s): %s'%(bibcode, err)) # If the request for recommendations fails, we just want the UI to ignore and display nothing return {'Error': 'Unable to get results!'}, 200
def get_recommendations(id): """ Controller for recommendations route. Arguments: id {int} -- Product ID """ print(f"Product ID: {id}") return recommender.get_recommendations(id)
def post(): req_data = request.get_json(silent=True) print(req_data) ratings = [(item['imdb_id'].lstrip('t').lstrip('0'), item['rating']) for item in req_data] recommendations = get_recommendations(ratings) data = [{'id': 'tt' + pad(movie)} for movie in recommendations] return json.dumps(data)
def recommend(): name = request.args.get("name") if request.args.get("k"): k = int(request.args.get("k")) + 1 else: k = None recommendations = recommender.get_recommendations(name, recommender.cosine_sim, k) return jsonify(recommendations)
def recommendations(): #TO-DO: for some reason the recommendations seem to always be the same. or maybe its me doing it wrong somehow final_recommendations = [] with DatabaseHandler('user1') as handler: movies_seen = handler.read_all_rows() recommended_movies_indexes = get_recommendations(movies_seen)[1] #from the movies list in theater, extract the indexes that match with the ones that the get recommendations returned for i in range(len(recommended_movies_indexes)): for j in range(len(movies_list)): if recommended_movies_indexes[i] == movies_list[j][0]: final_recommendations.append(movies_list[j]) return render_template('recommendations.html', title='Recommended', movies=final_recommendations)
def get_movie(): html_form_data = dict(request.args) movie1 = html_form_data['movie1'] rate1 = html_form_data['rate1'] movie2 = html_form_data['movie2'] rate2 = html_form_data['rate2'] movie3 = html_form_data['movie3'] rate3 = html_form_data['rate3'] recomended_movies = recommender.get_recommendations( movie1, movie2, movie3, rate1, rate2, rate3) print(html_form_data) return render_template('result.html', rec_movies=recomended_movies, title='Movie Recomender')
def cross_validate(fold_length, k, metric='euclidean', heuristic=False): data_handler = NewUSsDataHandler() uss = data_handler.load_us_data() kf = KFold(n_splits=fold_length) precisions = [] recalls = [] f_measures = [] uss_x = uss.drop(columns=['TCs']) uss_y = uss.loc[:, ['ID_US', 'TCs']] for train_index, test_index in kf.split(uss_x): train_set = uss_x.iloc[train_index, :] test_set = uss_x.iloc[test_index, :] for us_test in test_set.iterrows(): recommendations = pd.DataFrame() if heuristic: recommendations = get_recommendations_heuristcs(us_test[1], train_set, k, distance_metric=metric) else: recommendations = get_recommendations(us_test[1], train_set, k, distance_metric=metric) if recommendations.empty: f_measures.append(0) precisions.append(0) recalls.append(0) continue real = uss_y.loc[uss_y['ID_US'] == us_test[1]['ID_US'], 'TCs'].str.split(',') real = pd.Series(real.iloc[0]) real = real.iloc[:].astype(float) rec_ids = pd.Series(recommendations['ID']) rec_ids = rec_ids.iloc[:].astype(float) intersect_ds = pd.Series(np.intersect1d(real, rec_ids)) recall = len(intersect_ds.index) / len(real.index) precision = len(intersect_ds.index) / len(recommendations.index) f_measure = 0 if precision + recall > 0: f_measure = calculate_fbeta(precision, recall, 2) f_measures.append(f_measure) precisions.append(precision) recalls.append(recall) return precisions, recalls, f_measures
def upload(): if request.method == 'POST': # Check if the post request has the file part if 'file' not in request.files: flash('No file attached in request') return redirect(request.url) file = request.files['file'] # if user does not select file, browser also # submits an empty part without filename if file.filename == '': flash('No selected file') return redirect(request.url) elif not allowed_file(file.filename): abort(400, 'Incorrect file extension') flash('Incorrect file extension. Must be PDF!') return redirect(request.url) if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save( os.path.join(BASE_DIR, app.config['UPLOAD_FOLDER'], filename)) resume_cleaned = final_resume_clean( os.path.join(app.config['UPLOAD_FOLDER'], filename), filename) jobs_df = pd.read_csv('cleaned_data.csv') global df df = get_recommendations(resume_cleaned, jobs_df) # Remove diacritics from original job descriptions custom_pipeline = [preprocessing.remove_diacritics] df['Description'] = hero.clean(df['Description'], pipeline=custom_pipeline) return render_template("recommendation.html", column_names=df.columns.values, target_column="Company", hide_column="Job Description", row_data=list(df.values.tolist()), zip=zip)
def test_recommender(engine, n_movies=5, n_recommendations=5, n_iterations=5000): """ Perform a specified number of recommender iterations to see what the distribution of recommendations looks like Parameters: engine (sqlalchemy engine object): n_movies (int): number of movies to select for each iteration n_recommendations: number of recommendations that should be made in each iteration n_iterations: number of recommendation iterations that should be performed Returns pandas dataframe containing the randomly selected movies and corresponding recommendations for every iteration """ session = sessionmaker(bind=engine)() _, _, rating_matrix, model =\ recommender.prep_for_recommendations(fill_value='median') recommendation_records = pd.DataFrame() for i in range(n_iterations): selections = pick_random_movies(session, n_movies) result = recommender.get_recommendations(selections, rating_matrix, model, n_recommendations) result, interpreted_choices = result record = pd.DataFrame({ "iteration": i, "user_selections": selections, "interpretations": interpreted_choices, "recommendations": result }) recommendation_records = recommendation_records.append(record) return recommendation_records
intersect_ds = pd.Series(np.intersect1d(real, rec_ids)) recall = len(intersect_ds.index) / len(real.index) precision = len(intersect_ds.index) / len(recommendations.index) f_measure = 0 if precision + recall > 0: f_measure = calculate_fbeta(precision, recall, 2) f_measures.append(f_measure) precisions.append(precision) recalls.append(recall) return precisions, recalls, f_measures def calculate_fbeta(precision, recall, beta): # return 2 * (((beta**2) * precision * recall) / (((beta**2) * precision) + recall)) return ((beta**2 + 1) * precision * recall) / ((beta**2 * precision) + recall) if __name__ == '__main__': data_handler = NewUSsDataHandler() uss = data_handler.load_us_data().drop(columns=['TCs']) newUS = {'ID_US': '#263', 'Módulo': 'Cadastro', 'Operação': 'Atualizar_dados', 'Plataforma': 'Web', 'RNFs': '1,2', 'CAs': '5,6,7,8'} recommendations = get_recommendations(newUS, uss, 3, distance_metric='jaccard') print(recommendations.to_string())
def test_everything(self): """Test to see if calling everything sequentially works""" from recommender import get_recommendations # To test this method we need both the mock for PostgreSQL # and the override for the Solr query (for 'get_article_data'). # The Solr query needs to return the references and citation counts # for the papers found in the co-reads # The mock data below takes everything into account that was tested # before mockdata = [ { "id": "1", "bibcode": "ppr1", "first_author": "au_ppr1", "title": ["ttl_ppr1"], "reference": ["r1", "r2"], "citation": ["c1"], "citation_count": 1, }, { "id": "2", "bibcode": "ppr2", "first_author": "au_ppr2", "title": ["ttl_ppr2"], "reference": ["r2", "r3"], "citation": ["c1", "c2", "c3"], "citation_count": 3, }, { "id": "3", "bibcode": "ppr3", "first_author": "au_ppr3", "title": ["ttl_ppr3"], "reference": ["r2", "r3"], "citation": ["c2", "c3"], "citation_count": 2, }, {"id": "4", "bibcode": "foo", "keyword_norm": ["aberration", "ablation", "absorption"]}, {"id": "5", "bibcode": "paper_3", "first_author": "au_paper3", "title": ["ttl_paper_3"]}, {"id": "6", "bibcode": "c1", "first_author": "au_c1", "title": ["ttl_c1"]}, {"id": "7", "bibcode": "r2", "first_author": "au_r2", "title": ["ttl_r2"]}, ] httpretty.register_uri( httpretty.GET, self.app.config.get("RECOMMENDER_SOLR_PATH"), content_type="application/json", status=200, body="""{ "responseHeader":{ "status":0, "QTime":0, "params":{ "fl":"reference,citation", "indent":"true", "wt":"json", "q":"*"}}, "response":{"numFound":10456930,"start":0,"docs":%s }}""" % json.dumps(mockdata), ) # With the mock data the following recommendations should get generated expected_recommendations = { "paper": "a", "recommendations": [ {"bibcode": "ppr2", "author": u"au_ppr2,+", "title": u"ttl_ppr2"}, {"bibcode": "ppr2", "author": u"au_ppr2,+", "title": u"ttl_ppr2"}, {"bibcode": u"ppr1", "author": u"au_ppr1,+", "title": u"ttl_ppr1"}, {"bibcode": u"c1", "author": u"au_c1,+", "title": u"ttl_c1"}, {"bibcode": u"r2", "author": u"au_r2,+", "title": u"ttl_r2"}, {"bibcode": u"ppr2", "author": u"au_ppr2,+", "title": u"ttl_ppr2"}, ], } # Generate the recommendations recommendations = get_recommendations("a") # Do the final check self.assertEqual(recommendations, expected_recommendations)
def find_movie(movies_seen): movies = get_recommendations(movies_seen) print(movies) return movies
def test_everything(self): '''Test to see if calling everything sequentially works''' from recommender import get_recommendations # To test this method we need both the mock for PostgreSQL # and the override for the Solr query (for 'get_article_data'). # The Solr query needs to return the references and citation counts # for the papers found in the co-reads # The mock data below takes everything into account that was tested # before mockdata = [ {'id': '1', 'bibcode': 'ppr1', 'first_author': 'au_ppr1', 'title': [ 'ttl_ppr1'], 'reference':['r1', 'r2'], 'citation':['c1'], 'citation_count':1}, {'id': '2', 'bibcode': 'ppr2', 'first_author': 'au_ppr2', 'title': ['ttl_ppr2'], 'reference':['r2', 'r3'], 'citation':['c1', 'c2', 'c3'], 'citation_count':3}, {'id': '3', 'bibcode': 'ppr3', 'first_author': 'au_ppr3', 'title': ['ttl_ppr3'], 'reference':['r2', 'r3'], 'citation':['c2', 'c3'], 'citation_count':2}, {'id': '4', 'bibcode': 'foo', 'keyword_norm': [ "aberration", "ablation", "absorption"]}, {'id': '5', 'bibcode': 'paper_3', 'first_author': 'au_paper3', 'title': ['ttl_paper_3']}, {'id': '6', 'bibcode': 'c1', 'first_author': 'au_c1', 'title': ['ttl_c1']}, {'id': '7', 'bibcode': 'r2', 'first_author': 'au_r2', 'title': ['ttl_r2']} ] httpretty.register_uri( httpretty.GET, self.app.config.get('RECOMMENDER_SOLR_PATH'), content_type='application/json', status=200, body="""{ "responseHeader":{ "status":0, "QTime":0, "params":{ "fl":"reference,citation", "indent":"true", "wt":"json", "q":"*"}}, "response":{"numFound":10456930,"start":0,"docs":%s }}""" % json.dumps(mockdata)) # With the mock data the following recommendations should get generated expected_recommendations = {'paper': 'a', 'recommendations': [ {'bibcode': 'ppr2', 'author': u'au_ppr2,+', 'title': u'ttl_ppr2'}, {'bibcode': 'ppr2', 'author': u'au_ppr2,+', 'title': u'ttl_ppr2'}, {'bibcode': u'ppr1', 'author': u'au_ppr1,+', 'title': u'ttl_ppr1'}, {'bibcode': u'c1', 'author': u'au_c1,+', 'title': u'ttl_c1'}, {'bibcode': u'r2', 'author': u'au_r2,+', 'title': u'ttl_r2'}, {'bibcode': u'ppr2', 'author': u'au_ppr2,+', 'title': u'ttl_ppr2'}]} # Generate the recommendations recommendations = get_recommendations('a') self.assertEqual(recommendations, expected_recommendations) # Check that too old publication returns on recommendations min_year = self.app.config.get('RECOMMENDER_FROM_YEAR') bibcode = "%sApJ...999..999X" % (min_year - 1) recommendations = get_recommendations(bibcode) expected = {"Error": "Unable to get results!", "Error Info": "No recommendations available", "Status Code": "200"} # Did we get an error message? self.assertEqual(recommendations, expected) # The same should happen with a journal not in the list of allowed journals bibcode = "9999XXXXX.999..999X" recommendations = get_recommendations(bibcode) self.assertEqual(recommendations, expected)
def test_everything(self): '''Test to see if calling everything sequentially works''' from recommender import get_recommendations # To test this method we need both the mock for PostgreSQL # and the override for the Solr query (for 'get_article_data'). # The Solr query needs to return the references and citation counts # for the papers found in the co-reads # The mock data below takes everything into account that was tested # before mockdata = [ {'id': '1', 'bibcode': 'ppr1', 'first_author': 'au_ppr1', 'title': [ 'ttl_ppr1'], 'reference':['r1', 'r2'], 'citation':['c1'], 'citation_count':1}, {'id': '2', 'bibcode': 'ppr2', 'first_author': 'au_ppr2', 'title': ['ttl_ppr2'], 'reference':['r2', 'r3'], 'citation':['c1', 'c2', 'c3'], 'citation_count':3}, {'id': '3', 'bibcode': 'ppr3', 'first_author': 'au_ppr3', 'title': ['ttl_ppr3'], 'reference':['r2', 'r3'], 'citation':['c2', 'c3'], 'citation_count':2}, {'id': '4', 'bibcode': 'foo', 'keyword_norm': [ "aberration", "ablation", "absorption"]}, {'id': '5', 'bibcode': 'paper_3', 'first_author': 'au_paper3', 'title': ['ttl_paper_3']}, {'id': '6', 'bibcode': 'c1', 'first_author': 'au_c1', 'title': ['ttl_c1']}, {'id': '7', 'bibcode': 'r2', 'first_author': 'au_r2', 'title': ['ttl_r2']} ] httpretty.register_uri( httpretty.GET, self.app.config.get('RECOMMENDER_SOLR_PATH'), content_type='application/json', status=200, body="""{ "responseHeader":{ "status":0, "QTime":0, "params":{ "fl":"reference,citation", "indent":"true", "wt":"json", "q":"*"}}, "response":{"numFound":10456930,"start":0,"docs":%s }}""" % json.dumps(mockdata)) # With the mock data the following recommendations should get generated expected_recommendations = {'paper': 'a', 'recommendations': [ {'bibcode': 'ppr2', 'author': u'au_ppr2,+', 'title': u'ttl_ppr2'}, {'bibcode': 'ppr2', 'author': u'au_ppr2,+', 'title': u'ttl_ppr2'}, {'bibcode': u'ppr1', 'author': u'au_ppr1,+', 'title': u'ttl_ppr1'}, {'bibcode': u'c1', 'author': u'au_c1,+', 'title': u'ttl_c1'}, {'bibcode': u'r2', 'author': u'au_r2,+', 'title': u'ttl_r2'}, {'bibcode': u'ppr2', 'author': u'au_ppr2,+', 'title': u'ttl_ppr2'}]} # Generate the recommendations recommendations = get_recommendations('a') # Do the final check self.assertEqual(recommendations, expected_recommendations)