Exemplo n.º 1
0
def user_item_features_true_propensities_true_ratings(num_rows, num_cols,
                                                      num_components):
    true_U = np.random.rand(num_rows, num_components)
    true_V = np.random.rand(num_cols, num_components)
    true_X = np.dot(true_U, true_V.T)
    true_X = (true_X - true_X.min()) / (true_X.max() - true_X.min())
    true_X = 1 + np.round(true_X * 4)
    true_U_feature = np.random.randn(num_rows, num_components) / 8
    true_V_feature = np.random.randn(num_cols, num_components) / 8

    param_U = np.random.rand(num_components)
    param_V = np.random.rand(num_components)

    true_P = np.ones((num_rows, num_cols))
    for u in range(num_rows):
        for i in range(num_cols):
            true_P[u, i] = \
                std_logistic_function(np.inner(true_U_feature[u], param_U) +
                                      np.inner(true_V_feature[i], param_V))

    return true_P, true_X, true_U_feature, true_V_feature
Exemplo n.º 2
0
    def fit(self, trainset):

        AlgoBase.fit(self, trainset)

        M = np.zeros((trainset.n_users, trainset.n_items))
        X = np.zeros((trainset.n_users, trainset.n_items))
        for u, i, r in trainset.all_ratings():
            M[u, i] = 1
            X[u, i] = r

        if type(self.propensity_scores) == np.ndarray:
            P_hat = self.propensity_scores
        elif self.propensity_scores == '1bitmc':
            if self.verbose:
                print('Estimating propensity scores via matrix completion...')

            while True:
                memoize_hash = hashlib.sha256(M.data.tobytes())
                memoize_hash.update(
                    json.dumps([
                        self.one_bit_mc_tau, self.one_bit_mc_gamma,
                        self.one_bit_mc_max_rank
                    ]).encode('utf-8'))
                cache_filename = os.path.join(
                    cache_dir,
                    memoize_hash.hexdigest() + '.txt')
                if not os.path.isfile(cache_filename):
                    os.makedirs(cache_dir, exist_ok=True)
                    P_hat = \
                        one_bit_MC_fully_observed(
                            M, std_logistic_function,
                            grad_std_logistic_function,
                            self.one_bit_mc_tau,
                            self.one_bit_mc_gamma,
                            max_rank=self.one_bit_mc_max_rank)
                    try:
                        np.savetxt(cache_filename, P_hat)
                    except:
                        pass
                else:
                    try:
                        P_hat = np.loadtxt(cache_filename)
                        if P_hat.shape[0] != trainset.n_users or \
                                P_hat.shape[1] != trainset.n_items:
                            print(
                                '*** WARNING: Recomputing propensity scores ' +
                                '(mismatched dimensions in cached file)')
                            try:
                                os.remove(cache_filename)
                            except:
                                pass
                            continue
                    except ValueError:
                        print('*** WARNING: Recomputing propensity scores ' +
                              '(malformed numpy array encountered)')
                        try:
                            os.remove(cache_filename)
                        except:
                            pass
                        continue
                break
        elif self.propensity_scores == '1bitmc_mod':
            if self.verbose:
                print('Estimating propensity scores via matrix completion...')

            while True:
                memoize_hash = hashlib.sha256(M.data.tobytes())
                memoize_hash.update(
                    json.dumps([
                        '1bitmc-mod', self.one_bit_mc_tau,
                        self.one_bit_mc_gamma, self.one_bit_mc_max_rank
                    ]).encode('utf-8'))
                cache_filename = os.path.join(
                    cache_dir,
                    memoize_hash.hexdigest() + '.txt')
                if not os.path.isfile(cache_filename):
                    one_minus_logistic_gamma \
                        = 1 - std_logistic_function(self.one_bit_mc_gamma)
                    link = lambda x: \
                        mod_logistic_function(x, self.one_bit_mc_gamma,
                                              one_minus_logistic_gamma)
                    grad_link = lambda x: \
                        grad_mod_logistic_function(x, self.one_bit_mc_gamma,
                                                   one_minus_logistic_gamma)

                    os.makedirs(cache_dir, exist_ok=True)
                    P_hat = \
                        one_bit_MC_mod_fully_observed(M, link, grad_link,
                                                      self.one_bit_mc_tau,
                                                      self.one_bit_mc_gamma,
                                                      max_rank=
                                                      self.one_bit_mc_max_rank)
                    try:
                        np.savetxt(cache_filename, P_hat)
                    except:
                        pass
                else:
                    try:
                        P_hat = np.loadtxt(cache_filename)
                        if P_hat.shape[0] != trainset.n_users or \
                                P_hat.shape[1] != trainset.n_items:
                            print(
                                '*** WARNING: Recomputing propensity scores ' +
                                '(mismatched dimensions in cached file)')
                            try:
                                os.remove(cache_filename)
                            except:
                                pass
                            continue
                    except ValueError:
                        print('*** WARNING: Recomputing propensity scores ' +
                              '(malformed numpy array encountered)')
                        try:
                            os.remove(cache_filename)
                        except:
                            pass
                        continue
                break
        else:
            raise Exception('Unknown weight method: ' + self.propensity_scores)
        self.propensity_estimates = P_hat

        if self.verbose:
            print('Running debiased matrix completion...')

        self.predictions = \
            weighted_softimpute(X, M, 1 / P_hat, self.lmbda, self.max_rank,
                                self.min_value, self.max_value)

        # modify training entries to be their observed values
        for u, i, r in trainset.all_ratings():
            self.predictions[u, i] = r

        return self