Пример #1
0
    def test_dynamic_updates(self):
        """
        TensorCoFi dynamic update
        We will take a tensor cofi. Train the model, evaluate it. Then we remove all the user factors
        and recompute them using the online_user_factors to check if the performance is almost the same...
        """
        pyTF = PyTensorCoFi(n_factors=20, n_iterations=5, c_lambda=0.05, c_alpha=40)

        evaluator = Evaluator()
        tf = TensorCoFi(n_factors=2, n_iterations=100, c_lambda=0.05, c_alpha=40)
        df = pd.read_csv(resource_filename(testfm.__name__, "data/movielenshead.dat"), sep="::", header=None,
                         names=["user", "item", "rating", "date", "title"])
        training, testing = testfm.split.holdoutByRandom(df, 0.7)
        users = {user: list(entries) for user, entries in training.groupby("user")["item"]}

        tf.fit(training)
        map1 = evaluator.evaluate_model(tf, testing)  # map of the original model

        #now we try to replace the original factors with on the fly computed factors
        #lets iterate over the training data of items and the users
        for u, items in users.items():
            #user id in the tf
            uid = tf.data_map[tf.get_user_column()][u]  # user id
            iids = [tf.data_map[tf.get_item_column()][i] for i in items]  # item ids that user has seen
            #original_factors = tf.factors["user"][uid]
            new_factors = pyTF.online_user_factors(tf.factors[1], iids, p_param=40, lambda_param=0.05)
            #replace original factors with the new factors
            tf.factors[0][uid, :] = new_factors
            #tf.update_user_factors(uid, new_factors)


        #lets evaluate the new model with real-time updated factors
        map2 = evaluator.evaluate_model(tf, testing)
        #The difference should be smaller than 20%
        assert abs(map1[0]-map2[0]) < 0.2*map1[0]
Пример #2
0
 def tune(model,training,testing, non_relevant_count=100, **kwargs):
     '''
     Return a mean for the predictive power
     '''
     model.setParams(**kwargs)
     model.fit(training)
     eval = Evaluator()
     # Return the MAPMeasure in position 0
     measure = eval.evaluate_model(model,testing, non_relevant_count=non_relevant_count)[0]
     print 'tried {} = {}'.format(kwargs, measure)
     return measure
Пример #3
0
    def test_rmse(self):
        eval = Evaluator()

        model = ConstantModel(constant=3.0)
        testing = pd.DataFrame([{'user':10, 'item':100, 'rating':3},
                                {'user':10,'item':110,'rating':1},
                                {'user':12,'item':100,'rating':4},
        ])
        rmse = eval.evaluate_model_rmse(model, testing)
        self.assertEqual(sqrt((0+4+1)/3.0), rmse)
        eval.close()
Пример #4
0
    def test_rmse(self):
        """
        [EVALUATOR] Test the rmse measure
        """
        eval = Evaluator()

        model = ConstantModel(constant=3.0)
        testing = pd.DataFrame([{"user": 10, "item": 100, "rating": 3},
                                {"user": 10, "item": 110, "rating": 1},
                                {"user": 12, "item": 100, "rating": 4}])
        rmse = eval.evaluate_model_rmse(model, testing)
        self.assertEqual(sqrt((0+4+1)/3.0), rmse)
Пример #5
0
 def tune(model, training, testing, non_relevant_count=100, **kwargs):
     """
     Return a mean for the predictive power
     """
     model.set_params(**kwargs)
     model.fit(training)
     evaluator = Evaluator()
     # Return the MAPMeasure in position 0
     measure = evaluator.evaluate_model(
         model, testing, non_relevant_count=non_relevant_count)[0]
     print "tried {} = {}".format(kwargs, measure)
     return measure
Пример #6
0
    def test_dynamic_updates(self):
        """
        TensorCoFi dynamic update
        We will take a tensor cofi. Train the model, evaluate it. Then we remove all the user factors
        and recompute them using the online_user_factors to check if the performance is almost the same...
        """
        pyTF = PyTensorCoFi(n_factors=20,
                            n_iterations=5,
                            c_lambda=0.05,
                            c_alpha=40)

        evaluator = Evaluator()
        tf = TensorCoFi(n_factors=2,
                        n_iterations=100,
                        c_lambda=0.05,
                        c_alpha=40)
        df = pd.read_csv(resource_filename(testfm.__name__,
                                           "data/movielenshead.dat"),
                         sep="::",
                         header=None,
                         names=["user", "item", "rating", "date", "title"])
        training, testing = testfm.split.holdoutByRandom(df, 0.7)
        users = {
            user: list(entries)
            for user, entries in training.groupby("user")["item"]
        }

        tf.fit(training)
        map1 = evaluator.evaluate_model(tf,
                                        testing)  # map of the original model

        #now we try to replace the original factors with on the fly computed factors
        #lets iterate over the training data of items and the users
        for u, items in users.items():
            #user id in the tf
            uid = tf.data_map[tf.get_user_column()][u]  # user id
            iids = [tf.data_map[tf.get_item_column()][i]
                    for i in items]  # item ids that user has seen
            #original_factors = tf.factors["user"][uid]
            new_factors = pyTF.online_user_factors(tf.factors[1],
                                                   iids,
                                                   p_param=40,
                                                   lambda_param=0.05)
            #replace original factors with the new factors
            tf.factors[0][uid, :] = new_factors
            #tf.update_user_factors(uid, new_factors)

        #lets evaluate the new model with real-time updated factors
        map2 = evaluator.evaluate_model(tf, testing)
        #The difference should be smaller than 20%
        assert abs(map1[0] - map2[0]) < 0.2 * map1[0]
Пример #7
0
    def test_tensor_score_against_testfm(self):
        """
        [recommendation.models.TensorCoFi] Test tensorcofi scores with test.fm benchmark
        """
        evaluator = Evaluator()
        tc = TensorCoFi(n_users=len(self.df.user.unique()), n_items=len(self.df.item.unique()), n_factors=2)
        ptc = PyTensorCoFi()
        training, testing = testfm.split.holdoutByRandom(self.df, 0.9)

        items = training.item.unique()
        tc.fit(training)
        ptc.fit(training)
        tc_score = evaluator.evaluate_model(tc, testing, all_items=items)[0]
        ptc_score = evaluator.evaluate_model(ptc, testing, all_items=items)[0]
        assert abs(tc_score-ptc_score) < .15, \
            "TensorCoFi score is not close enough to testfm benchmark (%.3f != %.3f)" % (tc_score, ptc_score)
Пример #8
0
    def test_popularity_score_against_testfm(self):
        """
        [recommendation.models.TensorCoFi] Test popularity scores with test.fm benchmark
        """
        evaluator = Evaluator()
        training, testing = testfm.split.holdoutByRandom(self.df, 0.9)
        items = training.item.unique()

        tc = Popularity(len(items))
        ptc = TFMPopularity()
        tc.fit(training)
        ptc.fit(training)
        tc_score = evaluator.evaluate_model(tc, testing, all_items=items)[0]
        ptc_score = evaluator.evaluate_model(ptc, testing, all_items=items)[0]
        assert abs(tc_score-ptc_score) < .1, \
            "Popularity score is not close enough to testfm benchmark (%.3f != %.3f)" % (tc_score, ptc_score)
Пример #9
0
 def test_default(self):
     """
     [EVALUATOR] Test the measure
     """
     mapm = MAPMeasure()
     model = IdModel()
     evaluation = Evaluator()
     df = pd.DataFrame({"user": [1, 1, 3, 4], "item": [1, 2, 3, 4], "rating": [5, 3, 2, 1],
                        "date": [11, 12, 13, 14]})
     e = evaluation.evaluate_model(model, df, non_relevant_count=2, measures=[mapm])
     assert len(e) == 1, "Evaluator result is not what is expected"
     #r = mapm.measure([])
     #assert not r, "MAPMeasure for empty list is not returning NAN (%f, %s)" % (r, type(r))
     r = mapm.measure([(True, 0.)])
     assert r == 1., "MAPMeasure for 1 entry (True, 0.) list is not returning 1. (%f)" % r
     r = mapm.measure([(False, 0.)])
     assert r == 0., "MAPMeasure for 1 entry (False, 0.) list is not returning 0. (%f)" % r
     r = mapm.measure([(False, 0.01), (True, 0.00)])
     assert r == 0.5, "MAPMeasure for 2 entries (False, 0.01) and (True, 0.) list is not returning 0.5 (%f)" % r
     r = mapm.measure([(False, 0.9), (True, 0.8), (False, 0.7), (False, 0.6), (True, 0.5), (True, 0.4), (True, 0.3), 
                       (False, 0.2), (False, 0.1), (False, 0)])
     assert r == 0.4928571428571428, "Measure should be around 0.4928571428571428 (%f)" % r
Пример #10
0
    def test_nogil_against_std_05(self):
        """
        [EVALUATOR] Test the groups measure differences between python and c implementations for 5% training
        """
        df = pd.read_csv(resource_filename(testfm.__name__, 'data/movielenshead.dat'),
                         sep="::", header=None, names=['user', 'item', 'rating', 'date', 'title'])
        model = PyTensorCoFi()
        ev = Evaluator(False)
        ev_nogil = Evaluator()
        results = {"implementation": [], "measure": []}
        for i in range(SAMPLE_SIZE_FOR_TEST):
            training, testing = testfm.split.holdoutByRandom(df, 0.05)
            model.fit(training)
            results["implementation"].append("Cython"), results["measure"].append(ev_nogil.evaluate_model(model, testing)[0])
            results["implementation"].append("Python"), results["measure"].append(ev.evaluate_model(model, testing)[0])

        #####################
        # ANOVA over result #
        #####################
        assert_equality_in_groups(results, alpha=ALPHA, groups="implementation", test_var="measure")
Пример #11
0
# Tell me what models we want to evaluate
models = [
    RandomModel(),
    PopularityOkapi(hadoop_source="/data/b.ajf/hadoop1_env.sh",
                    host="igraph-01",
                    okapi_jar_dir="okapi/jar/",
                    #host='54.72.18.118', user='******',
                    #okapi_jar_dir='/Users/linas/devel/okapi/target/',
                    #okapi_jar_base_name='okapi-0.3.2-SNAPSHOT-jar-with-dependencies.jar',
                    #public_key_path='/Users/linas/.ssh/hack-okapi.pem'
                    ),
    Popularity(normalize=False),
    BPROkapi(hadoop_source="/data/b.ajf/hadoop1_env.sh",
             host="igraph-01",
             okapi_jar_dir="okapi/jar/",
             #host='54.72.18.118', user='******',
             #okapi_jar_dir='/Users/linas/devel/okapi/target/',
             # #okapi_jar_base_name='okapi-0.3.2-SNAPSHOT-jar-with-dependencies.jar',
             #public_key_path='/Users/linas/.ssh/hack-okapi.pem'
             )
]

# Setup the environment
evaluator = Evaluator()

for m in models:
    m.fit(df)
    print m.get_name().ljust(50),
    print evaluator.evaluate_model(m, df)
Пример #12
0
    sep="::", header=None, names=['user', 'item', 'rating', 'date', 'title'])
print df.head()

#tell me what models we want to evaluate
models = [
            RandomModel(),
            PopularityOkapi(host='54.72.18.118',
                            username='******',
                            #okapi_jar_dir='/Users/linas/devel/okapi/target/',
                            #okapi_jar_base_name='okapi-0.3.2-SNAPSHOT-jar-with-dependencies.jar',
                            public_key_path='/Users/linas/.ssh/hack-okapi.pem'
            ),
            Popularity(normalize=False),
            BPROkapi(host='54.72.18.118',
                            username='******',
                            #okapi_jar_dir='/Users/linas/devel/okapi/target/',
                            #okapi_jar_base_name='okapi-0.3.2-SNAPSHOT-jar-with-dependencies.jar',
                            public_key_path='/Users/linas/.ssh/hack-okapi.pem'
            )
]

#setup the environment
evaluator = Evaluator()

for m in models:
    m.fit(df)
    print m.getName().ljust(50),
    print evaluator.evaluate_model(m, df)

evaluator.close()#need this call to clean up the worker processes
Пример #13
0
__author__ = "linas"

import testfm
import pandas as pd
from testfm.evaluation.evaluator import Evaluator
from testfm.models.baseline_model import Popularity, RandomModel, Item2Item
from testfm.models.tensorcofi import PyTensorCoFi, TensorCoFi, CTensorCoFi
from testfm.models.content_based import TFIDFModel, LSIModel
from testfm.models.bpr import BPR
from pkg_resources import resource_filename
import datetime


if __name__ == "__main__":
    evaluator = Evaluator()

    #prepare the data
    df = pd.read_csv(resource_filename(testfm.__name__, "data/movielenshead.dat"),
                     sep="::", header=None, names=["user", "item", "rating", "date", "title"])
    print df.head()
    training, testing = testfm.split.holdoutByRandom(df, 0.5)

    #tell me what models we want to evaluate
    models = [
        RandomModel(),
        BPR(),
        TFIDFModel("title"),
        Popularity(),
        TensorCoFi(n_factors=20, n_iterations=5, c_lambda=0.05, c_alpha=40),
        PyTensorCoFi(n_factors=20, n_iterations=5, c_lambda=0.05, c_alpha=40),
        CTensorCoFi(n_factors=20, n_iterations=5, c_lambda=0.05, c_alpha=40),
Пример #14
0
__author__ = "linas"

import pandas as pd
import testfm
from testfm.models.tensorCoFi import TensorCoFi
from testfm.evaluation.evaluator import Evaluator
from pkg_resources import resource_filename

from testfm.evaluation.parameterTuning import ParameterTuning

eval = Evaluator()  # call this before loading the data to save memory (fork of process takes place)

# prepare the data
df = pd.read_csv(
    resource_filename(testfm.__name__, "data/movielenshead.dat"),
    sep="::",
    header=None,
    names=["user", "item", "rating", "date", "title"],
)
print df.head()
training, testing = testfm.split.holdoutByRandom(df, 0.9)

print "Tuning the parameters."
tr, validation = testfm.split.holdoutByRandom(training, 0.7)
pt = ParameterTuning()
pt.setMaxIterations(10)
pt.setZvalue(80)
tf_params = pt.getBestParams(TensorCoFi, tr, validation)
print tf_params

tf = TensorCoFi()
Пример #15
0
__author__ = 'linas'

import pandas as pd
import testfm
from testfm.models.tensorcofi import PyTensorCoFi as TensorCoFi
from testfm.models.bpr import BPR as TensorCoFi
from testfm.evaluation.evaluator import Evaluator
from pkg_resources import resource_filename

from testfm.evaluation.parameter_tuning import ParameterTuning

if __name__ == "__main__":
    eval = Evaluator(
    )  # Call this before loading the data to save memory (fork of process takes place)

    # Prepare the data
    df = pd.read_csv(resource_filename(testfm.__name__,
                                       'data/movielenshead.dat'),
                     sep="::",
                     header=None,
                     names=['user', 'item', 'rating', 'date', 'title'])
    print df.head()
    training, testing = testfm.split.holdoutByRandom(df, 0.9)

    print "Tuning the parameters."
    tr, validation = testfm.split.holdoutByRandom(training, 0.7)
    pt = ParameterTuning()
    pt.set_max_iterations(100)
    pt.set_z_value(90)
    tf_params = pt.get_best_params(TensorCoFi, tr, validation)
    print tf_params
Пример #16
0
__author__ = 'linas'

import testfm
import pandas as pd
from testfm.evaluation.evaluator import Evaluator
from testfm.models.baseline_model import Popularity, RandomModel, Item2Item
from testfm.models.tensorCoFi import TensorCoFi
from testfm.models.content_based import TFIDFModel, LSIModel
from testfm.models.ensemble_models import LinearRank
from testfm.models.bpr import BPR
from pkg_resources import resource_filename

#because of Global Interpreter Lock we need to initialize evaluator here (it forks processes)
eval = Evaluator()

#prepare the data
df = pd.read_csv(resource_filename(testfm.__name__,'data/movielenshead.dat'),
    sep="::", header=None, names=['user', 'item', 'rating', 'date', 'title'])
print df.head()
training, testing = testfm.split.holdoutByRandom(df, 0.9)

#tell me what models we want to evaluate
models = [  RandomModel(),
            BPR(),
            TFIDFModel('title'),
            Popularity(),
            TensorCoFi(dim=20, nIter=5, lamb=0.05, alph=40, user_features=['user'], item_features=['item', 'title']),
            #Item2Item(),
            #LSIModel('title'),
         ]