def update(self, user_ix, item_i, item_j): ''' u * k C = U s V D = utility = rate_ui , price_ui utility = alpha * rate - beta * price - utility_2 rate_ui object = alpha_uc * (rate_1 - rate_2) + beta_uc * (price_1 - price_2) model_score_i = self.model_score_without_log(alpha, beta, gamma, rate_i, price_i) model_score_j = self.model_score_without_log(alpha, beta, gamma, rate_j, price_j) new_alpha = alpha - self.lambda_iter * ((rate_i * model_score_j - rate_j * model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * alpha) new_beta = beta - self.lambda_iter * ((price_i**2 * model_score_j - price_j * model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * beta) new_gamma = gamma - self.lambda_iter * ((model_score_j - model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * gamma) ''' alpha = self.alpha[user_ix] beta = self.beta[user_ix] rate_i = self.mf_rate[user_ix][item_i] rate_j = self.mf_rate[user_ix][item_j] prob_i = MyGaussian.get_prob_from_gaussian(self.dataModel.getUserByUid(user_ix), 'phones', self.personal_item_price[user_ix][item_i], self.param_mu, self.param_sigm) prob_j = MyGaussian.get_prob_from_gaussian(self.dataModel.getUserByUid(user_ix), 'phones', self.personal_item_price[user_ix][item_j], self.param_mu, self.param_sigm) new_alpha = alpha + self.lambda_iter * (rate_i - rate_j) - 2 * self.lambda_regular * alpha**2 new_beta = beta + self.lambda_iter * (np.exp(prob_i) - np.exp(prob_j)) - 2 * self.lambda_regular * beta**2 return new_alpha, new_beta
def fit(self, trainSamples, trainTargets): self.dataModel = MemeryDataModel(trainSamples, trainTargets) data = np.array(self.dataModel.getData().todense()) u, s, v = isvd(data) new_data = np.dot(u, np.dot(s, v)) new_data[new_data < 1] = np.nan u, s, v = isvd(new_data) new_data = np.dot(u, np.dot(s, v)) #new_data[new_data < 1] = 0 self.mf_rate = new_data train_data = pd.DataFrame(trainSamples, columns=['user', 'item', 'price']) item_price = dict( zip(train_data.ix[:, 'item'], train_data.ix[:, 'price'])) data = train_data.groupby('user').mean() self.personal_item_price = np.empty( (self.dataModel.getUsersNum(), self.dataModel.getItemsNum())) for user_ix in xrange(self.dataModel.getUsersNum()): user_avg_price = data.loc[self.dataModel.getUserByUid(user_ix), 'price'] for item_ix in xrange(self.dataModel.getItemsNum()): delta_item_price = ( item_price[self.dataModel.getItemByIid(item_ix)] - user_avg_price) / user_avg_price self.personal_item_price[user_ix][item_ix] = item_price[ self.dataModel.getItemByIid(item_ix)] b = {'phones': train_data} price_df = Construction.get_user_category_buy_price(b) self.param_mu, self.param_sigm = MyGaussian.gaussian_curve_fit( price_df) self.alpha = np.random.rand(self.dataModel.getUsersNum()) self.beta = np.random.rand(self.dataModel.getUsersNum()) self.gamma = np.zeros(self.dataModel.getUsersNum()) origin_lambda_iter = self.lambda_iter for user_ix in xrange(self.dataModel.getUsersNum()): samples = self.sample(user_ix, self.max_iter) self.lambda_iter = origin_lambda_iter old_target_value = 0 for item_1, item_2 in samples: new_alpha, new_beta = self.update(user_ix, item_1, item_2) ##if not old_target_value or old_target_value < target_value: #old_target_value = target_value self.alpha[user_ix] = new_alpha self.beta[user_ix] = new_beta self.lambda_iter = self.lambda_iter * 0.9 #else: #break #print user_ix, target_value #self.gamma[user_ix] = new_gamma #print user_ix, self.target_value(user_ix) #print user_ix, self.target_value(user_ix) self.lambda_iter = origin_lambda_iter self.beta = np.zeros(self.dataModel.getUsersNum())
def model_score(self, user_ix, item_ix): alpha = self.alpha[user_ix] beta = self.beta[user_ix] gamma = self.gamma[user_ix] rate = self.mf_rate[user_ix, item_ix] price = self.personal_item_price[user_ix][item_ix] prob = MyGaussian.get_prob_from_gaussian(self.dataModel.getUserByUid(user_ix), 'phones', self.personal_item_price[user_ix][item_ix], self.param_mu, self.param_sigm) result = alpha * rate + beta * np.exp(prob) + gamma return result
def construct_price_feature(df, dataModel): #category_df = {'phones':df} price_df = get_user_category_buy_price(df, dataModel) param_mu, param_sigm = MyGaussian.gaussian_curve_fit(price_df) result = {} for user, user_df in df.groupby('user'): user_sigm = np.sqrt(param_sigm[dataModel.getUidByUser(user)]) origin_user_price_feature = np.zeros((dataModel.getPriceIxNum(), dataModel.getFeaturesNum())) user_df.apply((lambda x : _construct_mention_feature_apply(dataModel, origin_user_price_feature, x, user_sigm)), axis=1) result[dataModel.getUidByUser(user)] = origin_user_price_feature return result
def update(self, user_ix, item_i, item_j): ''' u * k C = U s V D = utility = rate_ui , price_ui utility = alpha * rate - beta * price - utility_2 rate_ui object = alpha_uc * (rate_1 - rate_2) + beta_uc * (price_1 - price_2) model_score_i = self.model_score_without_log(alpha, beta, gamma, rate_i, price_i) model_score_j = self.model_score_without_log(alpha, beta, gamma, rate_j, price_j) new_alpha = alpha - self.lambda_iter * ((rate_i * model_score_j - rate_j * model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * alpha) new_beta = beta - self.lambda_iter * ((price_i**2 * model_score_j - price_j * model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * beta) new_gamma = gamma - self.lambda_iter * ((model_score_j - model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * gamma) ''' alpha = self.alpha[user_ix] beta = self.beta[user_ix] rate_i = self.mf_rate[user_ix][item_i] rate_j = self.mf_rate[user_ix][item_j] prob_i = MyGaussian.get_prob_from_gaussian( self.dataModel.getUserByUid(user_ix), 'phones', self.personal_item_price[user_ix][item_i], self.param_mu, self.param_sigm) prob_j = MyGaussian.get_prob_from_gaussian( self.dataModel.getUserByUid(user_ix), 'phones', self.personal_item_price[user_ix][item_j], self.param_mu, self.param_sigm) new_alpha = alpha + self.lambda_iter * ( rate_i - rate_j) - 2 * self.lambda_regular * alpha**2 new_beta = beta + self.lambda_iter * (np.exp(prob_i) - np.exp( prob_j)) - 2 * self.lambda_regular * beta**2 return new_alpha, new_beta
def fit(self, trainSamples, trainTargets): self.dataModel = MemeryDataModel(trainSamples, trainTargets) data = np.array(self.dataModel.getData().todense()) u,s,v = isvd(data) new_data = np.dot(u, np.dot(s, v)) new_data[new_data < 1] = np.nan u,s,v = isvd(new_data) new_data = np.dot(u, np.dot(s, v)) #new_data[new_data < 1] = 0 self.mf_rate = new_data train_data = pd.DataFrame(trainSamples, columns=['user', 'item', 'price']) item_price = dict(zip(train_data.ix[:,'item'], train_data.ix[:,'price'])) data = train_data.groupby('user').mean() self.personal_item_price = np.empty((self.dataModel.getUsersNum(), self.dataModel.getItemsNum())) for user_ix in xrange(self.dataModel.getUsersNum()): user_avg_price = data.loc[self.dataModel.getUserByUid(user_ix), 'price'] for item_ix in xrange(self.dataModel.getItemsNum()): delta_item_price = (item_price[self.dataModel.getItemByIid(item_ix)] - user_avg_price) / user_avg_price self.personal_item_price[user_ix][item_ix] = item_price[self.dataModel.getItemByIid(item_ix)] b = {'phones':train_data} price_df = Construction.get_user_category_buy_price(b) self.param_mu, self.param_sigm = MyGaussian.gaussian_curve_fit(price_df) self.alpha = np.random.rand(self.dataModel.getUsersNum()) self.beta = np.random.rand(self.dataModel.getUsersNum()) self.gamma = np.zeros(self.dataModel.getUsersNum()) origin_lambda_iter = self.lambda_iter for user_ix in xrange(self.dataModel.getUsersNum()): samples = self.sample(user_ix, self.max_iter) self.lambda_iter = origin_lambda_iter old_target_value = 0 for item_1, item_2 in samples: new_alpha, new_beta = self.update(user_ix, item_1, item_2) ##if not old_target_value or old_target_value < target_value: #old_target_value = target_value self.alpha[user_ix] = new_alpha self.beta[user_ix] = new_beta self.lambda_iter = self.lambda_iter * 0.9 #else: #break #print user_ix, target_value #self.gamma[user_ix] = new_gamma #print user_ix, self.target_value(user_ix) #print user_ix, self.target_value(user_ix) self.lambda_iter = origin_lambda_iter self.beta = np.zeros(self.dataModel.getUsersNum())
def model_score(self, user_ix, item_ix): alpha = self.alpha[user_ix] beta = self.beta[user_ix] gamma = self.gamma[user_ix] rate = self.mf_rate[user_ix, item_ix] price = self.personal_item_price[user_ix][item_ix] prob = MyGaussian.get_prob_from_gaussian( self.dataModel.getUserByUid(user_ix), 'phones', self.personal_item_price[user_ix][item_ix], self.param_mu, self.param_sigm) result = alpha * rate + beta * np.exp(prob) + gamma return result
def construct_price_feature(df, dataModel): #category_df = {'phones':df} price_df = get_user_category_buy_price(df, dataModel) param_mu, param_sigm = MyGaussian.gaussian_curve_fit(price_df) result = {} for user, user_df in df.groupby('user'): user_sigm = np.sqrt(param_sigm[dataModel.getUidByUser(user)]) origin_user_price_feature = np.zeros( (dataModel.getPriceIxNum(), dataModel.getFeaturesNum())) user_df.apply((lambda x: _construct_mention_feature_apply( dataModel, origin_user_price_feature, x, user_sigm)), axis=1) result[dataModel.getUidByUser(user)] = origin_user_price_feature return result
def _construct_mention_feature_apply(dataModel, origin_user_price_feature, row, user_sigm): price_ix = dataModel.getPidByPriceIx(row['price_ix']) feature = [dataModel.getFidByFeature(i[0]) for i in eval(row['feature'])] min_ix = int(price_ix - user_sigm) max_ix = int(price_ix + user_sigm) if min_ix < 0: min_ix = 0 if max_ix > dataModel.getPriceIxNum(): max_ix = dataModel.getPriceIxNum() for i in xrange(min_ix, max_ix): origin_user_price_feature[i][feature] = origin_user_price_feature[i][ feature] + MyGaussian.get_prob_from_gaussian( i, price_ix, user_sigm)
def main(): a = pd.read_csv('../../../dataset/workplace/filter_phones_format.csv') b = {'phones': a} price_df = Construction.get_user_category_buy_price(b) param_mu, param_sigm = MyGaussian.gaussian_curve_fit(price_df) print min(param_sigm['phones']), max(param_sigm['phones'])
def main(): a = pd.read_csv('../../../dataset/workplace/filter_phones_format.csv') b = {'phones':a} price_df = Construction.get_user_category_buy_price(b) param_mu, param_sigm = MyGaussian.gaussian_curve_fit(price_df) print min(param_sigm['phones']), max(param_sigm['phones'])
def _construct_mention_feature_apply(dataModel, origin_user_price_feature, row, user_sigm): price_ix = dataModel.getPidByPriceIx(row['price_ix']) feature = [dataModel.getFidByFeature(i[0]) for i in eval(row['feature'])] min_ix = int(price_ix - user_sigm) max_ix = int(price_ix + user_sigm) if min_ix < 0: min_ix = 0 if max_ix > dataModel.getPriceIxNum(): max_ix = dataModel.getPriceIxNum() for i in xrange(min_ix, max_ix): origin_user_price_feature[i][feature] = origin_user_price_feature[i][feature] + MyGaussian.get_prob_from_gaussian(i, price_ix, user_sigm)