def execute(self): # self.data_path_clean = './data/ml100k/ml100k_train.dat' # self.data_path_attacked = './results/data_attacked/ml100k/ml100k_AUSH_0.data' # path_test = self.data_path_clean.replace('train', 'test') # load real profile matrix dataset_class_real = DataLoader(self.data_path_clean, path_test) train_data_df_real, _, n_users_real, n_items_real = dataset_class_real.load_file_as_dataFrame() train_matrix_real, _ = dataset_class_real.dataFrame_to_matrix(train_data_df_real, n_users_real, n_items_real) train_matrix_real = train_matrix_real.toarray() # load fake profile matrix dataset_class_attacked = DataLoader(self.data_path_attacked, path_test) train_data_df_attacked, _, n_users_attacked, n_items_attacked = dataset_class_attacked.load_file_as_dataFrame() train_matrix_attacked, _ = dataset_class_attacked.dataFrame_to_matrix(train_data_df_attacked, n_users_attacked, n_items_attacked) train_matrix_fake = train_matrix_attacked.toarray()[n_users_real:, :] # cacu item distribution real_item_distribution = self.get_item_distribution(train_matrix_real) fake_item_distribution = self.get_item_distribution(train_matrix_fake) # TVD_distance = self.get_TVD_distance(real_item_distribution, fake_item_distribution) JS_distance = self.get_JS_distance(real_item_distribution, fake_item_distribution) # res_str = 'TVD:%.4f\tJS:%.4f' % (TVD_distance, JS_distance) print('result begin', res_str, 'result end') return TVD_distance, JS_distance
def prepare_data(self): self.path_train = './data/%s/%s_train.dat' % (self.data_set, self.data_set) path_test = './data/%s/%s_test.dat' % (self.data_set, self.data_set) dataset_class = DataLoader(self.path_train, path_test) self.train_data_df, self.test_data_df, self.n_users, self.n_items = dataset_class.load_file_as_dataFrame() train_matrix, _ = dataset_class.dataFrame_to_matrix(self.train_data_df, self.n_users, self.n_items) test_matrix, _ = dataset_class.dataFrame_to_matrix(self.test_data_df, self.n_users, self.n_items) self.train_array, self.test_array = train_matrix.toarray(), test_matrix.toarray() self.data_loader = torch.utils.data.DataLoader(dataset=torch.from_numpy(self.train_array).type(torch.float32), batch_size=self.batch_size_D, shuffle=True, drop_last=True) self.target_users = np.where(self.train_array[:, self.target_id] == 0)[0] attack_target = np.zeros((len(self.target_users), self.n_items)) attack_target[:, self.target_id] = 1.0 self.attack_target = torch.from_numpy(attack_target).type(torch.float32).to(self.device) pass
class Attacker(object): def __init__(self): self.args = self.parse_args() self.data_set = self.args.data_set self.target_id = self.args.target_id self.attack_num = self.args.attack_num self.filler_num = self.args.filler_num # self.injected_path = self.args.injected_path @staticmethod def parse_args(): parser = argparse.ArgumentParser(description="Run Attacker.") # filmTrust/filmTrust/automotive parser.add_argument('--data_set', type=str, default='ml100k') # , required=True) parser.add_argument('--target_id', type=int, default=62) # , required=True) parser.add_argument('--attack_num', type=int, default=50) # ) # ml100k:90/automotive:4 parser.add_argument('--filler_num', type=int, default=36) # , required=True) parser.add_argument('--cuda_id', type=int, default=0) # , required=True) # # parser.add_argument('--injected_path', type=str, # default='./results/data_attacked/ml100k/ml100k_attack_62.data') # , required=True) return parser def prepare_data(self): self.path_train = './data/%s/%s_train.dat' % (self.data_set, self.data_set) path_test = './data/%s/%s_test.dat' % (self.data_set, self.data_set) self.dataset_class = DataLoader(self.path_train, path_test) self.train_data_df, _, self.n_users, self.n_items = self.dataset_class.load_file_as_dataFrame( ) def build_network(self): raise NotImplemented def train(self): raise NotImplemented def test(self, victim='SVD', detect=False, fake_array=None): """ :param victim: :param evalutor: :return: """ self.generate_injectedFile(fake_array) """detect""" res_detect_list = self.detect(detect) res_detect = '\t'.join(res_detect_list) """attack""" all_victim_models = [ 'SVD', 'NMF', 'SlopeOne', 'NeuMF', 'IAutoRec', 'UAutoRec' ] if victim is False: res_attack = '' elif victim in all_victim_models: self.attack(victim) res_attack_list = self.evaluate(victim) res_attack = '\t'.join(res_attack_list) else: if victim == 'all': victim_models = all_victim_models else: victim_models = victim.split(',') res_attack_list = [] # SlopeOne,SVD,NMF,IAutoRec,UAutoRec,NeuMF for victim_model in victim_models: self.attack(victim_model) cur_res_list = self.evaluate(victim_model) res_attack_list.append('\t:\t'.join( [victim_model, '\t'.join(cur_res_list)])) res_attack = '\n' + '\n'.join(res_attack_list) res = '\t'.join([res_attack, res_detect]) return res def evaluate(self, victim): attacker, recommender = self.__class__.__name__, victim # # args_dict = { 'data_set': self.data_set, 'test_path': './data/%s/%s_test.dat' % (self.data_set, self.data_set), # 'target_ids': self.target_id, 'recommender': recommender, 'attacker': attacker, # } # path_res_before_attack = './results/performance/mid_results/%s/%s_%s_%d.npy' % ( self.data_set, self.data_set, recommender, self.target_id) if not os.path.exists(path_res_before_attack): print("path not exists", path_res_before_attack) cur_args_dict = { 'exe_model_lib': 'recommender', 'exe_model_class': recommender, 'train_path': './data/%s/%s_train.dat' % (self.data_set, self.data_set), 'model_path': './results/model_saved/%s/%s_%s' % (self.data_set, self.data_set, recommender), 'target_prediction_path_prefix': './results/performance/mid_results/%s/%s_%s' % (self.data_set, self.data_set, recommender), } cur_args_dict.update(args_dict) args_str = ' '.join( ["--%s %s" % (k, v) for (k, v) in cur_args_dict.items()]) return_file = os.popen('%s ./execute_model.py %s' % (PythonCommand, args_str)) return_str = return_file.read() result_list = [] cur_args_dict = { 'exe_model_lib': 'evaluator', 'exe_model_class': 'Attack_Effect_Evaluator', 'data_path_clean': './results/performance/mid_results/%s/%s_%s_%d.npy' % (self.data_set, self.data_set, recommender, self.target_id), 'data_path_attacked': './results/performance/mid_results/%s/%s_%s_%s_%d.npy' % (self.data_set, self.data_set, recommender, attacker, self.target_id), } cur_args_dict.update(args_dict) args_str = ' '.join( ["--%s %s" % (k, v) for (k, v) in cur_args_dict.items()]) return_file = os.popen('%s ./execute_model.py %s' % (PythonCommand, args_str)) # time.sleep(5) return_str = return_file.read() return_str = return_str[return_str.find('result begin') + 13:return_str.find('result end') - 2] result_list += [return_str] return_file.close() # print("========evaluat %s attack %s done.========" % (attacker, recommender)) return result_list def detect(self, detect): if not detect: return [] attacker = self.__class__.__name__ result_list = [] cur_args_dict = { 'exe_model_lib': 'evaluator', 'exe_model_class': 'Attack_Effect_Evaluator', 'data_path_clean': './data/%s/%s_train.dat' % (self.data_set, self.data_set), 'data_path_attacked': './results/data_attacked/%s/%s_%s_%d.data' % (self.data_set, self.data_set, attacker, self.target_id), } # evalutors = ['Profile_Distance_Evaluator', 'FAP_Detector'] for evalutor in evalutors: cur_args_dict.update({ 'exe_model_class': evalutor, }) args_str = ' '.join( ["--%s %s" % (k, v) for (k, v) in cur_args_dict.items()]) return_file = os.popen('%s ./execute_model.py %s' % (PythonCommand, args_str)) return_str = return_file.read() return_str = return_str[return_str.find('result begin') + 13:return_str.find('result end') - 2] result_list += [return_str] return result_list def attack(self, victim): attacker, recommender = self.__class__.__name__, victim args_dict = { 'exe_model_lib': 'recommender', 'exe_model_class': recommender, # 'data_set': self.data_set, 'train_path': './results/data_attacked/%s/%s_%s_%d.data' \ % (self.data_set, self.data_set, self.__class__.__name__, self.target_id), 'test_path': './data/%s/%s_test.dat' % (self.data_set, self.data_set), # 'target_ids': self.target_id, 'recommender': recommender, # 'attacker': attacker, # 'model_path': './results/model_saved/%s/%s_%s_%s_%d' % ( self.data_set, self.data_set, recommender, attacker, self.target_id), 'target_prediction_path_prefix': './results/performance/mid_results/%s/%s_%s_%s' % ( self.data_set, self.data_set, recommender, attacker), } args_str = ' '.join( ["--%s %s" % (k, v) for (k, v) in args_dict.items()]) target_file = "%s_%d.npy" % ( args_dict['target_prediction_path_prefix'], self.target_id) if os.path.exists(target_file): os.remove(target_file) return_file = os.popen('%s ./execute_model.py %s' % (PythonCommand, args_str)) # popen # time.sleep(60 * 3) return_str = return_file.read() return def execute(self): raise NotImplemented def save(self, path): raise NotImplemented def restore(self, path): raise NotImplemented def generate_fakeMatrix(self): raise NotImplemented def generate_injectedFile(self, fake_array=None): if fake_array is None: fake_array = self.generate_fakeMatrix() # injected_path = './results/data_attacked/ml100k/ml100k_attack_62.data' injected_path = './results/data_attacked/%s/%s_%s_%d.data' \ % (self.data_set, self.data_set, self.__class__.__name__, self.target_id) if os.path.exists(injected_path): # print('clear data in %s' % self.injected_path) os.remove(injected_path) shutil.copyfile(self.path_train, injected_path) # uids = np.where(fake_array > 0)[0] + self.n_users iids = np.where(fake_array > 0)[1] values = fake_array[fake_array > 0] # data_to_write = np.concatenate( [np.expand_dims(x, 1) for x in [uids, iids, values]], 1) F_tuple_encode = lambda x: '\t'.join( map(str, [int(x[0]), int(x[1]), x[2]])) data_to_write = '\n'.join( [F_tuple_encode(tuple_i) for tuple_i in data_to_write]) with open(injected_path, 'a+') as fout: fout.write(data_to_write) # print('Inject %s successfully' % self.injected_path) return def visualize(self, results): import matplotlib.pyplot as plt fig, ax_list = plt.subplots(1, len(results), figsize=(4 * len(results), 4)) key = sorted(list(results.keys())) for idx, ax in enumerate(ax_list): if len(results[key[idx]]) == 0: continue ax.plot(results[key[idx]]) ax.set_xlabel("iteration") ax.set_title(key[idx]) # plt.show() fig_path = "./results/performance/figs/%s/%s_%d.png" \ % (self.data_set, self.__class__.__name__, self.target_id) plt.savefig(fig_path)
def execute(self): import numpy as np import matplotlib.pyplot as plt import time # load data # ================================= # path_dir = './results/performance/mid_results/%s' % (self.data_set) # user_embed_path = '%s/%s_NeuMF_%s_%d_user_embed.npy' % ( # path_dir, self.data_set, self.attacker, self.target_id) # # self.x = np.load(user_embed_path) # # # ================================= train_path = './results/data_attacked/%s/%s_%s_%d.data' % ( self.data_set, self.data_set, self.attacker, self.target_id) test_path = './data/%s/%s_test.dat' % (self.data_set, self.data_set) dataset_class_attacked = DataLoader(train_path, test_path) train_data_df_attacked, _, n_users_attacked, n_items_attacked = dataset_class_attacked.load_file_as_dataFrame() train_matrix_attacked, _ = dataset_class_attacked.dataFrame_to_matrix(train_data_df_attacked, n_users_attacked, n_items_attacked) self.x = train_matrix_attacked.toarray() # ================================= Y = np.ones(self.x.shape[0]) Y[-50:] = 0 from sklearn.decomposition import PCA pca = PCA(n_components=2) pca.fit(self.x) data_2d = pca.transform(self.x) # plt.scatter(data_2d[:, 0], data_2d[:, 1], c=Y) # # plt.show() # # exit() # fig_path = "./results/performance/figs/%s/Tsne_%s_%s_%d_profile_pca.png" \ # % (self.data_set, self.attacker, self.recommender, self.target_id) # plt.savefig(fig_path) data_path = "./results/performance/figs/%s/Tsne_%s_%s_%d_profile_pca" \ % (self.data_set, self.attacker, self.recommender, self.target_id) np.save(data_path, data_2d) exit() # ================================== # ================================== Y = np.ones(self.x.shape[0]) Y[-50:] = 0 # (n, d) = self.x.shape # 随机初始化Y y = np.random.randn(n, self.no_dims) # dy梯度 dy = np.zeros((n, self.no_dims)) # iy是什么 iy = np.zeros((n, self.no_dims)) gains = np.ones((n, self.no_dims)) # 对称化 P = self.seach_prob() P = P + np.transpose(P) P = P / np.sum(P) # pij # early exaggeration # pi\j print("T-SNE DURING:%s" % time.clock()) P = P * 4 P = np.maximum(P, 1e-12) # Run iterations for iter in range(self.max_iter): # Compute pairwise affinities sum_y = np.sum(np.square(y), 1) num = 1 / (1 + np.add(np.add(-2 * np.dot(y, y.T), sum_y).T, sum_y)) num[range(n), range(n)] = 0 Q = num / np.sum(num) # qij Q = np.maximum(Q, 1e-12) # Compute gradient # np.tile(A,N) [1],5 [1,1,1,1,1] # pij-qij PQ = P - Q for i in range(n): dy[i, :] = np.sum(np.tile(PQ[:, i] * num[:, i], (self.no_dims, 1)).T * (y[i, :] - y), 0) # Perform the update if iter < 20: momentum = self.initial_momentum else: momentum = self.final_momentum gains = (gains + 0.2) * ((dy > 0) != (iy > 0)) + (gains * 0.8) * ((dy > 0) == (iy > 0)) gains[gains < self.min_gain] = self.min_gain iy = momentum * iy - self.eta * (gains * dy) y = y + iy y = y - np.tile(np.mean(y, 0), (n, 1)) # Compute current value of cost function\ if (iter + 1) % 100 == 0: C = np.sum(P * np.log(P / Q)) print("Iteration ", (iter + 1), ": error is ", C) if (iter + 1) != 100: ratio = C / oldC print("ratio ", ratio) if ratio >= 0.95: break oldC = C # Stop lying about P-values if iter == 100: P = P / 4 print("finished training!") # data_2d = y # plt.scatter(data_2d[:, 0], data_2d[:, 1], c=Y) # plt.show() # fig_path = "./results/performance/figs/%s/Tsne_%s_%s_%d.png" \ # % (self.data_set, self.attacker, self.recommender, self.target_id) # plt.savefig(fig_path) data_path = "./results/performance/figs/%s/Tsne_%s_%s_%d_profile" \ % (self.data_set, self.attacker, self.recommender, self.target_id) np.save(data_path, data_2d) pass
class Recommender(object): def __init__(self): self.args = self.parse_args() # 路径 self.train_path = self.args.train_path self.test_path = self.args.test_path self.model_path = self.args.model_path self.target_prediction_path_prefix = self.args.target_prediction_path_prefix # 攻击 self.target_id_list = list(map(int, self.args.target_ids.split(','))) self.topk_list = list(map(int, self.args.topk.split(','))) # # os.environ["CUDA_VISIBLE_DEVICES"] = str(self.args.cuda_id) pass @staticmethod def parse_args(): parser = argparse.ArgumentParser(description="Run Recommender.") parser.add_argument('--data_set', type=str, default='ml100k') # , required=True) # 路径 parser.add_argument('--train_path', type=str, default='./data/ml100k/ml100k_train.dat') # , required=True) parser.add_argument('--test_path', type=str, default='./data/ml100k/ml100k_test.dat') # , required=True) parser.add_argument('--model_path', type=str, default='./results/model_saved/automotive/automotive_NeuMF_AUSHplus_round_119') # , required=True) parser.add_argument('--target_prediction_path_prefix', type=str, default='./results/performance/mid_results/ml100k_Recommender') # , required=True) # 攻击 parser.add_argument('--target_ids', type=str, default='0') # , required=True) parser.add_argument('--topk', type=str, default='5,10,20,50') # parser.add_argument('--cuda_id', type=int, default=0) return parser def prepare_data(self): self.dataset_class = DataLoader(self.train_path, self.test_path) self.train_data_df, self.test_data_df, self.n_users, self.n_items = self.dataset_class.load_file_as_dataFrame() self.train_matrix, _ = self.dataset_class.dataFrame_to_matrix(self.train_data_df, self.n_users, self.n_items) self.test_matrix, _ = self.dataset_class.dataFrame_to_matrix(self.test_data_df, self.n_users, self.n_items) pass def build_network(self): print('build Recommender model graph.') raise NotImplemented def train(self): print('train.') raise NotImplemented def test(self): print('test.') raise NotImplemented def execute(self): print('generate target item performace on a trained Recommender model.') raise NotImplemented def save(self, path): saver = tf.train.Saver() saver.save(self.sess, path) def restore(self, path): saver = tf.train.Saver() saver.restore(self.sess, path) def predict(self, user_id, item_id): raise NotImplemented def generate_target_result(self): train_data_array = self.train_matrix.toarray() for target_id in self.target_id_list: # mask掉已评分用户以及未评分用户的已评分商品 mask = np.zeros_like(train_data_array) mask[np.where(train_data_array[:, target_id])[0]] = float('inf') # 找到测试数据 test_uids, test_iids = np.where((train_data_array + mask) == 0) # 预测 test_predRatings = self.predict(test_uids, test_iids) # 构建dataframe predResults = pd.DataFrame({'user_id': test_uids, 'item_id': test_iids, 'rating': test_predRatings }) # 为每个未评分计算预测分和HR predResults_target = np.zeros([len(predResults.user_id.unique()), len(self.topk_list) + 2]) for idx, (user_id, pred_result) in enumerate(predResults.groupby('user_id')): pred_value = pred_result[pred_result.item_id == target_id].rating.values[0] sorted_recommend_list = pred_result.sort_values('rating', ascending=False).item_id.values new_line = [user_id, pred_value] + [1 if target_id in sorted_recommend_list[:k] else 0 for k in self.topk_list] predResults_target[idx] = new_line np.save('%s_%d' % (self.target_prediction_path_prefix, target_id), predResults_target)