示例#1
0
    def fit(self, trainSamples, trainTargets):
        self.dataModel = MemeryDataModel(trainSamples, trainTargets)
        data = np.array(self.dataModel.getData().todense())
        u, s, v = isvd(data)
        new_data = np.dot(u, np.dot(s, v))
        new_data[new_data < 1] = np.nan
        u, s, v = isvd(new_data)
        new_data = np.dot(u, np.dot(s, v))
        #new_data[new_data < 1] = 0
        self.mf_rate = new_data

        train_data = pd.DataFrame(trainSamples,
                                  columns=['user', 'item', 'price'])
        item_price = dict(
            zip(train_data.ix[:, 'item'], train_data.ix[:, 'price']))
        data = train_data.groupby('user').mean()
        self.personal_item_price = np.empty(
            (self.dataModel.getUsersNum(), self.dataModel.getItemsNum()))
        for user_ix in xrange(self.dataModel.getUsersNum()):
            user_avg_price = data.loc[self.dataModel.getUserByUid(user_ix),
                                      'price']
            for item_ix in xrange(self.dataModel.getItemsNum()):
                delta_item_price = (
                    item_price[self.dataModel.getItemByIid(item_ix)] -
                    user_avg_price) / user_avg_price
                self.personal_item_price[user_ix][item_ix] = item_price[
                    self.dataModel.getItemByIid(item_ix)]

        b = {'phones': train_data}
        price_df = Construction.get_user_category_buy_price(b)
        self.param_mu, self.param_sigm = MyGaussian.gaussian_curve_fit(
            price_df)

        self.alpha = np.random.rand(self.dataModel.getUsersNum())
        self.beta = np.random.rand(self.dataModel.getUsersNum())
        self.gamma = np.zeros(self.dataModel.getUsersNum())

        origin_lambda_iter = self.lambda_iter
        for user_ix in xrange(self.dataModel.getUsersNum()):
            samples = self.sample(user_ix, self.max_iter)
            self.lambda_iter = origin_lambda_iter
            old_target_value = 0
            for item_1, item_2 in samples:
                new_alpha, new_beta = self.update(user_ix, item_1, item_2)

                ##if not old_target_value or old_target_value < target_value:
                #old_target_value = target_value
                self.alpha[user_ix] = new_alpha
                self.beta[user_ix] = new_beta
                self.lambda_iter = self.lambda_iter * 0.9
                #else:
                #break
                #print user_ix, target_value

                #self.gamma[user_ix] = new_gamma
                #print user_ix, self.target_value(user_ix)

            #print user_ix, self.target_value(user_ix)
        self.lambda_iter = origin_lambda_iter
        self.beta = np.zeros(self.dataModel.getUsersNum())
def construct_price_feature(df, dataModel):
    #category_df = {'phones':df}
    price_df = get_user_category_buy_price(df, dataModel)
    param_mu, param_sigm = MyGaussian.gaussian_curve_fit(price_df)
    result = {}
    for user, user_df in df.groupby('user'):
        user_sigm = np.sqrt(param_sigm[dataModel.getUidByUser(user)])
        origin_user_price_feature = np.zeros((dataModel.getPriceIxNum(), dataModel.getFeaturesNum()))
        user_df.apply((lambda x : _construct_mention_feature_apply(dataModel, origin_user_price_feature, x, user_sigm)), axis=1)
        result[dataModel.getUidByUser(user)] = origin_user_price_feature
    return result
    def fit(self, trainSamples, trainTargets):
        self.dataModel = MemeryDataModel(trainSamples, trainTargets)
        data = np.array(self.dataModel.getData().todense())
        u,s,v = isvd(data)
        new_data = np.dot(u, np.dot(s, v))
        new_data[new_data < 1] = np.nan
        u,s,v = isvd(new_data)
        new_data = np.dot(u, np.dot(s, v))
        #new_data[new_data < 1] = 0
        self.mf_rate = new_data

        train_data = pd.DataFrame(trainSamples, columns=['user', 'item', 'price'])
        item_price = dict(zip(train_data.ix[:,'item'], train_data.ix[:,'price']))
        data = train_data.groupby('user').mean()
        self.personal_item_price = np.empty((self.dataModel.getUsersNum(), self.dataModel.getItemsNum()))
        for user_ix in xrange(self.dataModel.getUsersNum()):
            user_avg_price = data.loc[self.dataModel.getUserByUid(user_ix), 'price']
            for item_ix in xrange(self.dataModel.getItemsNum()):
                delta_item_price = (item_price[self.dataModel.getItemByIid(item_ix)] - user_avg_price) / user_avg_price
                self.personal_item_price[user_ix][item_ix] = item_price[self.dataModel.getItemByIid(item_ix)]

        b = {'phones':train_data}
        price_df = Construction.get_user_category_buy_price(b)
        self.param_mu, self.param_sigm = MyGaussian.gaussian_curve_fit(price_df)

        self.alpha = np.random.rand(self.dataModel.getUsersNum())
        self.beta = np.random.rand(self.dataModel.getUsersNum())
        self.gamma = np.zeros(self.dataModel.getUsersNum())

        origin_lambda_iter = self.lambda_iter
        for user_ix in xrange(self.dataModel.getUsersNum()):
            samples = self.sample(user_ix, self.max_iter)
            self.lambda_iter = origin_lambda_iter
            old_target_value = 0
            for item_1, item_2 in samples:
                new_alpha, new_beta = self.update(user_ix, item_1, item_2)

                ##if not old_target_value or old_target_value < target_value:
                    #old_target_value = target_value
                self.alpha[user_ix] = new_alpha
                self.beta[user_ix] = new_beta
                self.lambda_iter = self.lambda_iter * 0.9
                #else:
                    #break
                #print user_ix, target_value

                #self.gamma[user_ix] = new_gamma
                #print user_ix, self.target_value(user_ix)

            #print user_ix, self.target_value(user_ix)
        self.lambda_iter = origin_lambda_iter
        self.beta = np.zeros(self.dataModel.getUsersNum())
def construct_price_feature(df, dataModel):
    #category_df = {'phones':df}
    price_df = get_user_category_buy_price(df, dataModel)
    param_mu, param_sigm = MyGaussian.gaussian_curve_fit(price_df)
    result = {}
    for user, user_df in df.groupby('user'):
        user_sigm = np.sqrt(param_sigm[dataModel.getUidByUser(user)])
        origin_user_price_feature = np.zeros(
            (dataModel.getPriceIxNum(), dataModel.getFeaturesNum()))
        user_df.apply((lambda x: _construct_mention_feature_apply(
            dataModel, origin_user_price_feature, x, user_sigm)),
                      axis=1)
        result[dataModel.getUidByUser(user)] = origin_user_price_feature
    return result
示例#5
0
def main():
    a = pd.read_csv('../../../dataset/workplace/filter_phones_format.csv')
    b = {'phones': a}
    price_df = Construction.get_user_category_buy_price(b)
    param_mu, param_sigm = MyGaussian.gaussian_curve_fit(price_df)
    print min(param_sigm['phones']), max(param_sigm['phones'])
示例#6
0
def main():
    a = pd.read_csv('../../../dataset/workplace/filter_phones_format.csv')
    b = {'phones':a}
    price_df = Construction.get_user_category_buy_price(b)
    param_mu, param_sigm = MyGaussian.gaussian_curve_fit(price_df)
    print min(param_sigm['phones']), max(param_sigm['phones'])