def generateCelfHeapR(self): # -- calculate expected profit for all combinations of nodes and products -- ### celf_item: (list) (mg_ratio, k_prod, i_node, flag) celf_heap = [(0.0, -1, '-1', 0)] diff_ss = Diffusion(self.graph_dict, self.seed_cost_dict, self.product_list, self.monte) for i in set(self.graph_dict.keys()): s_set = [set() for _ in range(self.num_product)] s_set[0].add(i) ep = 0.0 for _ in range(self.monte): ep += diff_ss.getSeedSetProfit(s_set) ep = round(ep / self.monte, 4) if ep > 0: for k in range(self.num_product): if self.seed_cost_dict[i] == 0: break else: mg = round( ep * self.product_list[k][0] / self.product_list[0][0], 4) mg_ratio = round(mg / self.seed_cost_dict[i], 4) celf_item = (mg_ratio, k, i, 0) heap.heappush_max(celf_heap, celf_item) return celf_heap
def generateCelfSequenceR(self): # -- calculate expected profit for all combinations of nodes and products -- ### celf_ep: (list) (k_prod, i_node, mg, flag) celf_seq = [(-1, '-1', 0.0, 0)] diff_ss = Diffusion(self.graph_dict, self.seed_cost_dict, self.product_list, self.monte) for i in set(self.graph_dict.keys()): s_set = [set() for _ in range(self.num_product)] s_set[0].add(i) ep = 0.0 for _ in range(self.monte): ep += diff_ss.getSeedSetProfit(s_set) ep = round(ep / self.monte, 4) if ep > 0: for k in range(self.num_product): if self.seed_cost_dict[i] == 0: break else: mg = round( ep * self.product_list[k][0] / self.product_list[0][0], 4) mg_ratio = round(mg / self.seed_cost_dict[i], 4) celf_item = (k, i, mg_ratio, 0) celf_seq.append(celf_item) celf_seq = sorted(celf_seq, reverse=True, key=lambda celf_seq_item: celf_seq_item[2]) return celf_seq
def generateCelfSequenceR(self): # -- calculate expected profit for all combinations of nodes and products -- ### celf_ep: (list) [k_prod, i_node, mg, flag] celf_seq = [(-1, '-1', 0.0, 0)] diff_ss = Diffusion(self.graph_dict, self.seed_cost_dict, self.product_list, self.monte) for i in set(self.graph_dict.keys()): s_set = [set() for _ in range(self.num_product)] s_set[0].add(i) ep = 0.0 for _ in range(self.monte): ep += diff_ss.getSeedSetProfit(s_set) ep = round(ep / self.monte, 4) if ep > 0: for k in range(self.num_product): if self.seed_cost_dict[i] == 0: break else: mg = round( ep * self.product_list[k][0] / self.product_list[0][0], 4) mg_ratio = round(mg / self.seed_cost_dict[i], 4) celf_ep = [k, i, mg_ratio, 0] celf_seq.append(celf_ep) for celf_item in celf_seq: if celf_ep[2] >= celf_item[2]: celf_seq.insert(celf_seq.index(celf_item), celf_ep) celf_seq.pop() break return celf_seq
def generateCelfHeap(self, data_name): # -- calculate expected profit for all combinations of nodes and products -- ### celf_item: (list) (mg, k_prod, i_node, flag) Billboard_set, Handbill_set = SpiltHeuristicsSet(data_name) Billboard_celf_heap, Handbill_celf_heap = [], [] diff = Diffusion(self.graph_dict, self.product_list, self.product_weight_list) for i in Billboard_set: s_set = [set() for _ in range(self.num_product)] s_set[0].add(i) ep = diff.getSeedSetProfitBCS(s_set) if ep > 0: for k in range(self.num_product): mg = safe_div(ep * self.product_list[k][0] * self.product_weight_list[k], self.product_list[0][0] * self.product_weight_list[0]) celf_item = (mg, k, i, 0) heap.heappush_max(Billboard_celf_heap, celf_item) for i in Handbill_set: s_set = [set() for _ in range(self.num_product)] s_set[0].add(i) ep = diff.getSeedSetProfitBCS(s_set) if ep > 0: for k in range(self.num_product): mg = safe_div(ep * self.product_list[k][0], self.product_list[0][0] * self.product_weight_list[0]) mg = safe_div(mg, self.seed_cost_dict[k][i]) celf_item = (mg, k, i, 0) heap.heappush_max(Handbill_celf_heap, celf_item) return Billboard_celf_heap, Handbill_celf_heap
def solveMultipleChoiceKnapsackProblem(self, bud, s_matrix, c_matrix): ### bud_index: (list) the using budget index for products ### bud_bound_index: (list) the bound budget index for products bud_index, bud_bound_index = [len(k) - 1 for k in c_matrix ], [0 for _ in range(self.num_product)] MCKP_list = [] diff = Diffusion(self.graph_dict, self.product_list, self.product_weight_list) while bud_index != bud_bound_index: ### bud_pmis: (float) the budget in this pmis execution bud_pmis = sum(c_matrix[k][bud_index[k]] for k in range(self.num_product)) if bud_pmis <= bud: seed_set_flag = True if MCKP_list: for senpai_item in MCKP_list: compare_list_flag = True for b_index in bud_index: senpai_index = senpai_item[bud_index.index( b_index)] if b_index > senpai_index: compare_list_flag = False break if compare_list_flag: seed_set_flag = False break if seed_set_flag: MCKP_list.append(bud_index.copy()) pointer = self.num_product - 1 while bud_index[pointer] == bud_bound_index[pointer]: bud_index[pointer] = len(c_matrix[pointer]) - 1 pointer -= 1 bud_index[pointer] -= 1 mep_result = (0.0, [set() for _ in range(self.num_product)]) for bud_index in MCKP_list: s_set = [ s_matrix[k][bud_index[k]][k].copy() for k in range(self.num_product) ] ep = round( sum([diff.getSeedSetProfit(s_set) for _ in range(self.monte)]) / self.monte, 4) if ep > mep_result[0]: mep_result = (ep, s_set) return mep_result[1]
def spectral(x_train, y, sigma, k=None, ref_neighbor=None, kernel="gaussian"): if k is None: k = x_train.shape[0] idx, dx = df.Knnsearch(x_train, x_train, k, metric="gaussian") _ , K = df.ComputeKernel(idx, dx, sig=sigma, ref_neighbor=ref_neighbor, kernel=kernel) K_bar = block_diag(*[K[np.ix_(np.where(y==i)[0],np.where(y==i)[0])] for i in np.unique(y)]) d = 1 / np.sum(K_bar, axis=0) D = np.diag(d) A_bar = D @ K_bar @ D A_tilde = D @ K @ D vals, _ = np.linalg.eig(A_tilde) vals.sort() vals = vals[::-1] num_classes = len(np.unique(y)) return (vals[num_classes] - vals[num_classes + 1]).real
def generateCelfHeap(self): # -- calculate expected profit for all combinations of nodes and products -- ### celf_item: (list) (mg, k_prod, i_node, flag) celf_heap = [[] for _ in range(self.num_product)] diff_ss = Diffusion(self.graph_dict, self.product_list, self.product_weight_list) for i in self.graph_dict: s_set = [set() for _ in range(self.num_product)] s_set[0].add(i) ep = round(sum([diff_ss.getSeedSetProfit(s_set) for _ in range(self.monte)]) / self.monte, 4) if ep > 0: for k in range(self.num_product): mg = safe_div(ep * self.product_list[k][0], self.product_list[0][0]) celf_item = (mg, k, i, 0) heap.heappush_max(celf_heap[k], celf_item) return celf_heap
def solveMultipleChoiceKnapsackProblem(self, bud, s_matrix, c_matrix): mep_result = (0.0, [set() for _ in range(self.num_product)]) ### bud_index: (list) the using budget index for products ### bud_bound_index: (list) the bound budget index for products bud_index, bud_bound_index = [len(k) - 1 for k in c_matrix], [0 for _ in range(self.num_product)] ### temp_bound_index: (list) the bound to exclude the impossible budget combination s.t. the k-budget is smaller than the temp bound temp_bound_index = [0 for _ in range(self.num_product)] diff_ss = Diffusion(self.graph_dict, self.product_list) while not operator.eq(bud_index, bud_bound_index): ### bud_pmis: (float) the budget in this pmis execution bud_pmis = 0.0 for k in range(self.num_product): bud_pmis += c_matrix[k][bud_index[k]] if bud_pmis <= bud: temp_bound_flag = True for k in range(self.num_product): if temp_bound_index[k] > bud_index[k]: temp_bound_flag = False break if temp_bound_flag: temp_bound_index = copy.deepcopy(bud_index) s_set = [set() for _ in range(self.num_product)] for k in range(self.num_product): s_set[k] = copy.deepcopy(s_matrix)[k][bud_index[k]][k] pro_acc = 0.0 for _ in range(self.monte): pro_acc += diff_ss.getSeedSetProfit(s_set) pro_acc = round(pro_acc / self.monte, 4) if pro_acc > mep_result[0]: mep_result = (pro_acc, s_set) pointer = self.num_product - 1 while bud_index[pointer] == bud_bound_index[pointer]: bud_index[pointer] = len(c_matrix[pointer]) - 1 pointer -= 1 bud_index[pointer] -= 1 return mep_result
monte_carlo, eva_monte_carlo = 10, 100 iniG = IniGraph(dataset_name) iniP = IniProduct(product_name) seed_cost_dict = iniG.constructSeedCostDict() graph_dict = iniG.constructGraphDict(cascade_model) product_list = iniP.getProductList() num_node = len(seed_cost_dict) num_product = len(product_list) # -- initialization for each budget -- start_time = time.time() ssng = SeedSelectionNG(graph_dict, seed_cost_dict, product_list, monte_carlo) diff = Diffusion(graph_dict, seed_cost_dict, product_list, monte_carlo) # -- initialization for each sample -- now_budget, now_profit = 0.0, 0.0 seed_set = [set() for _ in range(num_product)] celf_sequence = ssng.generateCelfSequence() mep_g = celf_sequence.pop(0) mep_k_prod, mep_i_node, mep_flag = mep_g[0], mep_g[1], mep_g[3] while now_budget < total_budget and mep_i_node != '-1': if now_budget + seed_cost_dict[mep_i_node] > total_budget: mep_g = celf_sequence.pop(0) mep_k_prod, mep_i_node, mep_flag = mep_g[0], mep_g[1], mep_g[3] if mep_i_node == '-1': break
validationCosts, 'b--') plt.title('loss during training') plt.xlabel('epoch') plt.xlabel('average loss') plt.legend(['training', 'validation']) plt.show() ################################################################# ####################### diffusion and SVD ###################### ################################################################# # net outputs: Z1, Z2 = siamese.getCodes(S1_test, S2_test) # diffusion E1, v1 = df.Diffusion(Z1, k=20, nEigenVals=12) E2, v2 = df.Diffusion(Z2, k=20, nEigenVals=12) # svd U1, s1, _ = np.linalg.svd(Z1) U2, s2, _ = np.linalg.svd(Z2) # plot 3 leading coordinates of diffusion / svd embedding of the test data, # colored by the value of the common variable fig, (a1) = plt.subplots(1, 1, subplot_kw={'projection': '3d'}) a1.scatter(E1[:, 0], E1[:, 1], E1[:, 2], c=x_test, cmap='gist_ncar') plt.title('diffusion embedding of sensor #1 code') fig, (a2) = plt.subplots(1, 1, subplot_kw={'projection': '3d'}) a2.scatter(E2[:, 0], E2[:, 1], E2[:, 2], c=x_test, cmap='gist_ncar') plt.title('diffusion embedding of sensor #2 code') fig, (a3) = plt.subplots(1, 1, subplot_kw={'projection': '3d'})
n_train = 2000 n_test = n - n_train S1_train = data[:n_train, :] S1_test = data[n_train + 1:, :] clean_test = orig_data[n_train + 1:, :] t_train = t[:n_train] t_test = t[n_train + 1:] input_size = S1_train.shape[1] batch_size = S1_train.shape[0] sort_inds = np.argsort(t_train) embedding_size = 2 k = 16 K_mat = df.ComputeLBAffinity( S1_train, k, sig=0.1) # Laplace-Beltrami affinity: D^-1 * K * D^-1 P = df.makeRowStoch(K_mat) # markov matrix E1, v1 = df.Diffusion(K_mat, nEigenVals=embedding_size + 1) # eigenvalues and eigenvectors S1_embedding = np.matmul(E1, np.diag(v1)) # diffusion maps fig, (a1) = plt.subplots(1, 1) a1.scatter(E1[:, 0], E1[:, 1], c=t_train, cmap='gist_ncar') plt.title('diffusion embedding of train') a1.set_aspect('equal') plt.show(block=False) plt.savefig('DM2layer_embedding_.png', bbox_inches='tight', transparent=True) # # Diffusion Net P = tf.cast(tf.constant(P), tf.float32)
import Diffusion diff = Diffusion.Diffusion() while diff.epoch < 10000: diff.update() if diff.epoch % 50 == 0: print("Yep\n Epoch = ", diff.epoch) diff.save('Diffusion{0}'.format(diff.epoch))
import networkx as nx import Diffusion as lfm distance_factor = 0.5 delta = 3 machine_out_file = open("output", "w") LFD = lfm.Diffusion("network", "week_user_artist_count", "user_artist_totalistening_periodlength") actions = LFD.get_action_list() for act in actions: asg = LFD.build_action_subgraph(act, delta) leaders = LFD.compute_action_leaders(asg) for l in leaders: l_t = nx.dfs_tree(asg, l) tribe = l_t.nodes() frontier = [] for l_n in l_t.nodes(): if l_t.out_degree(l_n) == 0: frontier.append(l_n) depth = LFD.compute_max_depth(l_t, l, frontier) mean_depth = float(depth) / len(frontier) width = LFD.compute_width(l_t, l) l_strength = LFD.compute_level_strength(l_t, l, distance_factor, act) machine_out_file.write(