def fit(self, trainSamples, trainTargets): self.dataModel = MemeryDataModel(trainSamples, trainTargets) data = np.array(self.dataModel.getData().todense()) u, s, v = isvd(data) new_data = np.dot(u, np.dot(s, v)) new_data[new_data < 1] = np.nan u, s, v = isvd(new_data) new_data = np.dot(u, np.dot(s, v)) #new_data[new_data < 1] = 0 self.mf_rate = new_data train_data = pd.DataFrame(trainSamples, columns=['user', 'item', 'price']) item_price = dict( zip(train_data.ix[:, 'item'], train_data.ix[:, 'price'])) data = train_data.groupby('user').mean() self.personal_item_price = np.empty( (self.dataModel.getUsersNum(), self.dataModel.getItemsNum())) for user_ix in xrange(self.dataModel.getUsersNum()): user_avg_price = data.loc[self.dataModel.getUserByUid(user_ix), 'price'] for item_ix in xrange(self.dataModel.getItemsNum()): delta_item_price = ( item_price[self.dataModel.getItemByIid(item_ix)] - user_avg_price) / user_avg_price self.personal_item_price[user_ix][item_ix] = item_price[ self.dataModel.getItemByIid(item_ix)] b = {'phones': train_data} price_df = Construction.get_user_category_buy_price(b) self.param_mu, self.param_sigm = MyGaussian.gaussian_curve_fit( price_df) self.alpha = np.random.rand(self.dataModel.getUsersNum()) self.beta = np.random.rand(self.dataModel.getUsersNum()) self.gamma = np.zeros(self.dataModel.getUsersNum()) origin_lambda_iter = self.lambda_iter for user_ix in xrange(self.dataModel.getUsersNum()): samples = self.sample(user_ix, self.max_iter) self.lambda_iter = origin_lambda_iter old_target_value = 0 for item_1, item_2 in samples: new_alpha, new_beta = self.update(user_ix, item_1, item_2) ##if not old_target_value or old_target_value < target_value: #old_target_value = target_value self.alpha[user_ix] = new_alpha self.beta[user_ix] = new_beta self.lambda_iter = self.lambda_iter * 0.9 #else: #break #print user_ix, target_value #self.gamma[user_ix] = new_gamma #print user_ix, self.target_value(user_ix) #print user_ix, self.target_value(user_ix) self.lambda_iter = origin_lambda_iter self.beta = np.zeros(self.dataModel.getUsersNum())
def construct_price_feature(df, dataModel): #category_df = {'phones':df} price_df = get_user_category_buy_price(df, dataModel) param_mu, param_sigm = MyGaussian.gaussian_curve_fit(price_df) result = {} for user, user_df in df.groupby('user'): user_sigm = np.sqrt(param_sigm[dataModel.getUidByUser(user)]) origin_user_price_feature = np.zeros((dataModel.getPriceIxNum(), dataModel.getFeaturesNum())) user_df.apply((lambda x : _construct_mention_feature_apply(dataModel, origin_user_price_feature, x, user_sigm)), axis=1) result[dataModel.getUidByUser(user)] = origin_user_price_feature return result
def fit(self, trainSamples, trainTargets): self.dataModel = MemeryDataModel(trainSamples, trainTargets) data = np.array(self.dataModel.getData().todense()) u,s,v = isvd(data) new_data = np.dot(u, np.dot(s, v)) new_data[new_data < 1] = np.nan u,s,v = isvd(new_data) new_data = np.dot(u, np.dot(s, v)) #new_data[new_data < 1] = 0 self.mf_rate = new_data train_data = pd.DataFrame(trainSamples, columns=['user', 'item', 'price']) item_price = dict(zip(train_data.ix[:,'item'], train_data.ix[:,'price'])) data = train_data.groupby('user').mean() self.personal_item_price = np.empty((self.dataModel.getUsersNum(), self.dataModel.getItemsNum())) for user_ix in xrange(self.dataModel.getUsersNum()): user_avg_price = data.loc[self.dataModel.getUserByUid(user_ix), 'price'] for item_ix in xrange(self.dataModel.getItemsNum()): delta_item_price = (item_price[self.dataModel.getItemByIid(item_ix)] - user_avg_price) / user_avg_price self.personal_item_price[user_ix][item_ix] = item_price[self.dataModel.getItemByIid(item_ix)] b = {'phones':train_data} price_df = Construction.get_user_category_buy_price(b) self.param_mu, self.param_sigm = MyGaussian.gaussian_curve_fit(price_df) self.alpha = np.random.rand(self.dataModel.getUsersNum()) self.beta = np.random.rand(self.dataModel.getUsersNum()) self.gamma = np.zeros(self.dataModel.getUsersNum()) origin_lambda_iter = self.lambda_iter for user_ix in xrange(self.dataModel.getUsersNum()): samples = self.sample(user_ix, self.max_iter) self.lambda_iter = origin_lambda_iter old_target_value = 0 for item_1, item_2 in samples: new_alpha, new_beta = self.update(user_ix, item_1, item_2) ##if not old_target_value or old_target_value < target_value: #old_target_value = target_value self.alpha[user_ix] = new_alpha self.beta[user_ix] = new_beta self.lambda_iter = self.lambda_iter * 0.9 #else: #break #print user_ix, target_value #self.gamma[user_ix] = new_gamma #print user_ix, self.target_value(user_ix) #print user_ix, self.target_value(user_ix) self.lambda_iter = origin_lambda_iter self.beta = np.zeros(self.dataModel.getUsersNum())
def construct_price_feature(df, dataModel): #category_df = {'phones':df} price_df = get_user_category_buy_price(df, dataModel) param_mu, param_sigm = MyGaussian.gaussian_curve_fit(price_df) result = {} for user, user_df in df.groupby('user'): user_sigm = np.sqrt(param_sigm[dataModel.getUidByUser(user)]) origin_user_price_feature = np.zeros( (dataModel.getPriceIxNum(), dataModel.getFeaturesNum())) user_df.apply((lambda x: _construct_mention_feature_apply( dataModel, origin_user_price_feature, x, user_sigm)), axis=1) result[dataModel.getUidByUser(user)] = origin_user_price_feature return result
def main(): a = pd.read_csv('../../../dataset/workplace/filter_phones_format.csv') b = {'phones': a} price_df = Construction.get_user_category_buy_price(b) param_mu, param_sigm = MyGaussian.gaussian_curve_fit(price_df) print min(param_sigm['phones']), max(param_sigm['phones'])
def main(): a = pd.read_csv('../../../dataset/workplace/filter_phones_format.csv') b = {'phones':a} price_df = Construction.get_user_category_buy_price(b) param_mu, param_sigm = MyGaussian.gaussian_curve_fit(price_df) print min(param_sigm['phones']), max(param_sigm['phones'])