Exemplo n.º 1
0
 def create_user_vectors(self, alpha, epsilon=None, metric='log'):
     """
     Create user vectors as a weighted average of the document vectors
     of the items they interacted with. The weight is the confidence of the
     interaction that is computed using the log, or lin metric.
     
     Attributes:
         alpha, epsilon  : hyperparameters for computing the confidence
         metric  : metric for computing the confidence (log, or lin)
     
     """
     sparse = make_sparse(self.dataframe.df, self.url_2_id, self.uid_2_id)
     if metric == 'log':
         sparse = sparse.to_coo(alpha=alpha, epsilon=epsilon, metrics='log')
     elif metric == 'lin':
         sparse = sparse.to_coo(alpha=alpha, metrics='lin')
     sparse = sparse.toarray()
     
     confidence_sums = sparse.sum(axis=1)
     confidence_sums = confidence_sums.reshape((len(confidence_sums), 1))
     # ommit zero division :) 
     confidence_sums[confidence_sums == 0] = 1
     
     user_matrix = sparse.dot(self.doc_vectors)
     user_matrix = user_matrix / confidence_sums
     
     self.__user_vectors = user_matrix
Exemplo n.º 2
0
    def train(self,
              train_df,
              factors=None,
              iterations=None,
              alpha=None,
              epsilon=None,
              metric='log'):
        print("##################### ALS model #####################")
        self.__train_df = train_df
        print("[1]  Creating sparse interaction matrix")
        sparse = make_sparse(train_df,
                             self.__url_2_id,
                             self.__uid_2_id,
                             users_in_rows=False)

        if metric == 'log':
            sparse = sparse.to_coo(metrics='log', alpha=alpha, epsilon=epsilon)
        elif metric == 'lin':
            sparse = sparse.to_coo(metrics='lin', alpha=alpha)
        elif metric == 'bin':
            sparse = sparse.to_coo(metrics='bin')
        else:
            return
        sparse = sparse.tocsc()
        self.__model = implicit.als.AlternatingLeastSquares(
            factors=factors, iterations=iterations, num_threads=0)
        print("\n[2]  Fitting the matrix to the model")
        self.__model.fit(sparse)
        self.__user_factors = self.model.user_factors
        self.__item_factors = self.model.item_factors
        print("\n[DONE]  ALS successfull")
Exemplo n.º 3
0
 def create_user_vectors_bin(self):
     """
     Creates user vectors as a sum of their document vectors, meaning
     document vectors of items the interacted with.
     """
     sparse = make_sparse(self.dataframe.df, self.url_2_id, self.uid_2_id)
     sparse = sparse.to_coo(metrics='bin')
     sparse = sparse.toarray()
     
     self.__user_vectors = sparse.dot(self.doc_vectors)
     
         
         
         
         
         
Exemplo n.º 4
0
    def train(self, train_df, alpha=None, epsilon=None, min_interactions=1, metric='bin'):
        print("##################### SVD model #####################")
        self.__train_df = train_df
        if min_interactions != 1:
            train_df = helpers.df_min_interactions(df=train_df,
                                                   min_interactions=min_interactions)
        print("[1]  Making sparse interaction matrix")
        sparse = make_sparse(df=train_df,
                             url_2_id=self.url_2_id,
                             uid_2_id=self.uid_2_id)

        if metric == 'log':
            sparse = sparse.to_coo(metrics='log', alpha=alpha, epsilon=epsilon)
        elif metric == 'bin':
            sparse = sparse.to_coo(metrics='bin')
        elif metric == 'lin':
            sparse = sparse.to_coo(metrics='lin', alpha=alpha)

        print("[2]  Performing SVD on interaction matrix")
        self.__u, self.__s, self.__vh = np.linalg.svd(sparse.toarray(),
                                                     full_matrices=False)
        print("[DONE]  SVD successfull")
Exemplo n.º 5
0
    def mse(self, test_df):
        iteration = 0
        mse_sum = 0
        print('[1] Creating sparse matrix for whole dataset')
        sparse = make_sparse(df=test_df,
                             url_2_id=self.model.url_2_id,
                             uid_2_id=self.model.uid_2_id)
        sparse = sparse.to_coo()
        sparse = sparse.toarray()
        test_df = test_df.groupby('uid')['page_url'].unique()
        bar = Bar('Processing', max=test_df.shape[0])

        for uid, urls in test_df.items():
            for url in urls:
                real = sparse[self.model.uid_2_id[uid]][
                    self.model.url_2_id[url]]
                score = self.model.predict(uid, url)
                mse_ui = real - score
                mse_ui = mse_ui**2
                mse_sum += mse_ui
                iteration += 1
            bar.next()
        bar.finish()
        return mse_sum / iteration