def compare_models(list_of_models, metric):
    '''
    list_of_models: model names in a list
    metric: ‘rmse’ or ‘precision_recall’
    '''
    agg_list = [
        agg.AVG('precision'),
        agg.STD('precision'),
        agg.AVG('recall'),
        agg.STD('recall')
    ]
    # apply above functions to each group(group the results by cutoff k which is the number of top items to look for
    print rmse_results['precision_recall_by_user'].groupby('cutoff', agg_list)

    comparisonstruct = graphlab.recommender.util.compare_models(
        test_data, model_names=list_of_models, metric=metric)
    return graphlab.show_comparison(comparisonstruct, list_of_models)
示例#2
0
m1 = gl.item_similarity_recommender.create(training_data, user_id='user_id', item_id='wine_name', target='score')
m2 = gl.item_similarity_recommender.create(training_data, user_id='user_id', item_id='wine_name', target='score',only_top_k=1)

#Load and compare multiple models:

high_filter = gl.load_model('../models/high_filter')
onezero = gl.load_model('../models/onezero')
baseline = gl.load_model('../models/baseline')
gridsearch = gl.load_model('../models/gridsearch')
#
model_comp = gl.recommender.util.compare_models(test_data, [m1, m2, baseline, gridsearch, high_filter, onezero], model_names=["m1", "m2", "baseline", "gridsearch", "high_filter", "onezero"], metric='rmse')
#
#model_comp = gl.recommender.util.compare_models(test_data, [baseline, gridsearch, high_filter, onezero] )
#
gl.show_comparison(model_comp, [m1, m2, baseline, gridsearch, high_filter, onezero])

# Show an interactive view
# view = model.views.evaluate(test_data)
# view.show()
#
# # Explore predictions
# view = model.views.explore(item_data=items,item_name_column='wine_name')
#
# # Explore evals
# view = model.views.overview(validation_set=test_data,item_data=items,item_name_column='wine_name')
# view.show()



'''
item_sim_cosine= graphlab.item_similarity_recommender.create(train_data, user_id='user_id', item_id='movie_id', target='rating', similarity_type='cosine')
item_sim_jaccard = graphlab.item_similarity_recommender.create(train_data, user_id='user_id', item_id='movie_id', target='rating', similarity_type='jaccard')

#Make Recommendations:
print("\n Collaborative Filtering Model(pearson)")
item_sim_recomm = item_sim_pearson.recommend(users=range(1,6),k=5)
item_sim_recomm.print_rows(num_rows=25)
print("\n Collaborative Filtering Model(cosine)")
item_sim_recomm1 = item_sim_cosine.recommend(users=range(1,6),k=5)
item_sim_recomm1.print_rows(num_rows=25)
print("\n Collaborative Filtering Model(jaccard)")
item_sim_recomm2 = item_sim_jaccard.recommend(users=range(1,6),k=5)
item_sim_recomm2.print_rows(num_rows=25)

#graphlab.item_similarity_recommender.compare_models(train_data, [popularity_model, item_sim_model,item_sim_model1,item_sim_model2],metric='precision_recall')



#Evaluating Recommendation Engines
#Lets compare both the models  built till now based on precision-recall characteristics:
model_performance = graphlab.compare(test_data, [popularity_model, item_sim_pearson,item_sim_cosine,item_sim_jaccard])
graphlab.show_comparison(model_performance,[popularity_model, item_sim_pearson,item_sim_cosine,item_sim_jaccard])

# factorization method
#fact_rec_model=graphlab.recommender.factorization_recommender.create(train_data, user_id='user_id', item_id='movie_id', target='rating', user_data=None, item_data=None, num_factors=8, regularization=1e-08, linear_regularization=1e-10, side_data_factorization=True, nmf=False, binary_target=False, max_iterations=50, sgd_step_size=0, random_seed=0, solver='auto', verbose=True)
#Make Recommendations:
#fact_sim_recomm = fact_rec_model.recommend(users=range(1,6),k=5)
#fact_sim_recomm.print_rows(num_rows=25)
graphlab.canvas.show()
graphlab.canvas.set_target('ipynb')
示例#4
0
popularity_recomm.print_rows(num_rows=25)

#Train Model(pearson)
item_sim_model = graphlab.item_similarity_recommender.create(
    train_data,
    user_id='user_id',
    item_id='anime_id',
    target='rating',
    similarity_type='pearson')
item_sim_recomm = item_sim_model.recommend(users=range(1, 6), k=5)
item_sim_recomm.print_rows(num_rows=25)

#Campare popular model vs. pearson model
model_performance = graphlab.compare(test_data,
                                     [popularity_model, item_sim_model])
graphlab.show_comparison(model_performance, [popularity_model, item_sim_model])

#Train Model(Jaccard)
item_sim_model2 = graphlab.item_similarity_recommender.create(
    train_data,
    user_id='user_id',
    item_id='anime_id',
    target='rating',
    similarity_type='jaccard')
item_sim_recomm2 = item_sim_model2.recommend(users=range(1, 6), k=5)
item_sim_recomm2.print_rows(num_rows=25)

#campare 3 models
model_performance3 = graphlab.compare(
    test_data, [popularity_model, item_sim_model, item_sim_model2])
graphlab.show_comparison(model_performance3,
示例#5
0
# In[19]:

personalized_model.get_similar_items(
    ['Chan Chan (Live) - Buena Vista Social Club'])

# # Quantitative comparison between the models
#
# We now formally compare the popularity and the personalized models using precision-recall curves.

# In[20]:

if graphlab.version[:3] >= "1.6":
    model_performance = graphlab.compare(
        test_data, [popularity_model, personalized_model], user_sample=0.05)
    graphlab.show_comparison(model_performance,
                             [popularity_model, personalized_model])
else:
    get_ipython().magic(u'matplotlib inline')
    model_performance = graphlab.recommender.util.compare_models(
        test_data, [popularity_model, personalized_model], user_sample=.05)

# The curve shows that the personalized model provides much better performance.

# In[41]:

west = song_data[song_data['artist'] == 'Kanye West']
count_west = len(west['user_id'].unique())
count_west

# In[42]:
示例#6
0
rating_ent = airline_us.dropna(subset=['inflight_entertainment_rating'])

train = rating_all[:4000]
test = rating_all[4000:]

# convert tables into SFrames using graphlab
train_data = graphlab.SFrame(train)
test_data = graphlab.SFrame(test)

# A Simple Popularity Model
popularity_model = graphlab.popularity_recommender.create(train_data, user_id='author', item_id='airline_name', target='overall_rating')

#Get recommendations for first 5 users and print them
#k=5 specifies top 5 recommendations to be given
popularity_recomm = popularity_model.recommend(k=5)
popularity_recomm.print_rows(num_rows=25)

# verified by checking the airlines with highest mean rating
train.groupby(by='airline_name')['overall_rating'].mean().sort_values(ascending=False)

# A Collaborative Filtering Model
#Train Model
item_sim_model = graphlab.item_similarity_recommender.create(train_data, user_id='author', item_id='airline_name', target='overall_rating', similarity_type='pearson')

#Make Recommendations:
item_sim_recomm = item_sim_model.recommend(k=5)
item_sim_recomm.print_rows(num_rows=25)

model_performance = graphlab.compare(test_data, [popularity_model, item_sim_model])
graphlab.show_comparison(model_performance,[popularity_model, item_sim_model])
ratings_test = pd.read_csv('ml-100k/ub.test',
                           sep='\t',
                           names=r_cols,
                           encoding='latin-1')

#print ratings_base.shape

#print ratings_test.shape

train_data = graphlab.SFrame(ratings_base)

test_data = graphlab.SFrame(ratings_test)

#print ratings_base.groupby(by='movie_id')['rating'].mean().sort_values(ascending=False).head(20)

item_sim_model = graphlab.item_similarity_recommender.create(
    train_data,
    user_id='user_id',
    item_id='movie_id',
    target='rating',
    similarity_type='pearson')

item_sim_recomm = item_sim_model.recommend(users=range(1, 100), k=20)

item_sim_recomm.print_rows(num_rows=2000)

model_performance = graphlab.compare(test_data, [item_sim_model])

graphlab.show_comparison(model_performance, [item_sim_model])
示例#8
0
def compare_models(test_data, m1, m2):
    model_performance = graphlab.compare(test_data, [m1, m2])
    graphlab.show_comparison(model_performance, [m1, m2])
示例#9
0
recommender_movie_model = graphlab.item_similarity_recommender.create(
    training_data,
    user_id='user_id',
    item_id='movie_id',
    target='rating',
    similarity_type='cosine')
recommend_movie = recommender_movie_model.recommend(users=range(1, 5), k=6)
recommend_movie.print_rows(num_rows=24)

# To check for user's recommendation
recommend_movie = recommender_movie_model.recommend(
    users=[946], k=6)  # Replace "946" with your user_id
recommend_movie.print_rows(num_rows=6)

# Evaluation of our model
rating_test_data = pd.read_csv('ml-100k/ua.test',
                               sep='\t',
                               names=r_cols,
                               encoding='latin-1')
test_data = graphlab.SFrame(rating_test_data)
# print rating_test_data.shape

evaluation = graphlab.compare(
    test_data, [popularity_movie_model, recommender_movie_model])
graphlab.show_comparison(evaluation,
                         [popularity_movie_model, recommender_movie_model])
"""
Movie recommendation system
# Reference: https://www.analyticsvidhya.com/blog/2016/06/quick-guide-build-recommendation-engine-python/
"""
示例#10
0

# In[ ]:

personalized_model.get_similar_items(["Chan Chan (Live) - Buena Vista Social Club"])


# #Quantitative comparison between the models
#
# We now formally compare the popularity and the personalized models using precision-recall curves.

# In[ ]:

if graphlab.version[:3] >= "1.6":
    model_performance = graphlab.compare(test_data, [popularity_model, personalized_model], user_sample=0.05)
    graphlab.show_comparison(model_performance, [popularity_model, personalized_model])
else:
    get_ipython().magic(u"matplotlib inline")
    model_performance = graphlab.recommender.util.compare_models(
        test_data, [popularity_model, personalized_model], user_sample=0.05
    )


# The curve shows that the personalized model provides much better performance.

# In[21]:

item_similarity_model = graphlab.item_similarity_recommender.create(train_data, user_id="user_id", item_id="song_id")


# In[22]:
示例#11
0
TestRatings = pd.read_csv('Data/ml-100k/ua.test',
                          sep='\t',
                          names=ratingsCol,
                          encoding='latin-1')

#TestRatings.shape
#TrainRatings.shape

import graphlab as gl
trainData = gl.SFrame(TrainRatings)
testData = gl.SFrame(TestRatings)

popModel = gl.popularity_recommender.create(trainData,
                                            user_id='user_id',
                                            item_id='movie_id',
                                            target='rating')
recommendedMovies = popModel.recommend(users=range(1, 6), k=5)
recommendedMovies.print_rows(num_rows=25)

itemItemCF = gl.item_similarity_recommender.create(
    trainData,
    user_id='user_id',
    item_id='movie_id',
    target='rating',
)
recommendedMovies2 = itemItemCF.recommend(users=range(1, 6), k=5)
recommendedMovies2.print_rows(num_rows=25)

performanceComparasion = gl.compare(testData, [popModel, itemItemCF])
gl.show_comparison(performanceComparasion, [popModel, itemItemCF])
示例#12
0
# every user gets the same recommendation, based on popularity of the product

# personalised model
personalised_model = gl.item_similarity_recommender.create(train_data,user_id='user_id',item_id='song')

# predictions
personalised_model.recommend(users=[users[0]])
personalised_model.recommend(users=[users[1]])

# similar items
personalised_model.get_similar_items(['With Or Without You - U2'])

# recommender models comparison

model_performance = gl.compare(test_data, [popularity_model, personalised_model], user_sample=0.05)
gl.show_comparison(model_performance,[popularity_model, personalised_model])

# assignment

# unique listeners to certain artists
len(song_data[song_data['artist'] == 'Kanye West']['user_id'].unique())
len(song_data[song_data['artist'] == 'Foo Fighters']['user_id'].unique())
len(song_data[song_data['artist'] == 'Taylor Swift']['user_id'].unique())
len(song_data[song_data['artist'] == 'Lady GaGa']['user_id'].unique())

# most and least popular artist
aggregated = song_data.groupby(key_columns='artist', operations={'total_count': gl.aggregate.SUM('listen_count')})
aggregated.sort('total_count', ascending=False) # most popular
aggregated.sort('total_count')                  # least popular

# most recommended song
import graphlab

data = graphlab.SFrame.read_csv(
    "https://static.turi.com/datasets/movie_ratings/training_data.csv",
    column_type_hints={"rating": int})
#print data.head()
data_train, data_test = data.random_split(.8, seed=5)
model1 = graphlab.recommender.create(data_train,
                                     user_id="user",
                                     item_id="movie",
                                     target="rating")
#results = model.recommend(users=None, k=5)
#print results.head(20)

model2 = graphlab.popularity_recommender.create(data_train,
                                                user_id="user",
                                                item_id="movie",
                                                target="rating")
#results2 = model2.recommend(users=None, k=5)
#print results2.head(20)

model_performance = graphlab.compare(data_test, [model1, model2])
graphlab.show_comparison(model_performance, [model1, model2])