def get_qual(args, topic_thresh, sim_bits, test_perc=0.4): # get new values on test set based on current thresholds args.topic_thr = topic_thresh create_vecs(args) if args.exp == 'vk': g, n, C1, C2, C3, C4 = get_aux_vk(args) learn_params(args, g, n, C1, C2, C3, C4) # read the learned parameters basefile = open(os.path.join(args.data_dir, args.exp, 'base_weights.txt'), newline='') basefile = csv.reader(basefile, delimiter=' ') qfile = open(os.path.join(args.data_dir, args.exp, 'marg_weights.txt'), newline='') qfile = csv.reader(qfile, delimiter=' ') topic_features = np.load(args.topic_features) user_features = np.load(args.user_features) b = dict() q = dict() for u, v, val in basefile: b[int(u), int(v)] = float(val) for u, v, val in qfile: q[int(u), int(v)] = float(val) # calculate tp, fp, tn, fn tp = [] fp = [] tn = [] fn = [] logfile = open(args.log_file, newline='') logs = list(csv.reader(logfile, delimiter=' ')) num_logs = sum(1 for row in logs) templogs = logs[:int(num_logs * test_perc)] testlogs = [] for log in templogs: nplog = np.array(log, dtype=np.int) testlogs.append(nplog) testlogs = np.array(testlogs) print("Test log in consideration: ", testlogs.shape) # testlogs is now a numpy array # to estimate auc, taking 100 points on the curve for mu in tqdm(np.linspace(0, 1, 100)): print(mu) tpx = 0 fpx = 0 tnx = 0 fnx = 0 for log in reversed(testlogs): # [v, a, t_v] = [int(x) for x in log], already converted into an integer v, a_v, t_v = [x for x in log] # v published a message in the test log # set based on similarity action_set = [] for u in g.predecessors(v): # list of permissible actions performed by u a_performed_by_u = get_actions_from_logs( testlogs, u, t_v, sim_bits) for au in a_performed_by_u: similar = False for a in action_set: if check_sim(topic_features[au], topic_features[a], nbits=sim_bits): similar = True break if not similar: action_set.append(a) # now they are unique actions prob = 0.0 for a in action_set: for u in g.predecessors(v): # get valid predecessors for the actions if checklog(logs, u, a, t_v, sim_bits): prob += b[(u, v)] + q[(u, v)] * get_alpha( user_features[v], topic_features[a]) prob = min(1, max(prob, 0)) prediction = (prob > mu) gt = checklog(logs, v, a) if prediction == True: if gt == True: tpx += 1 else: fpx += 1 else: if gt == True: fnx += 1 else: tnx += 1 tp.append(tpx) fp.append(fpx) tn.append(tnx) fn.append(fnx) tp = np.array(tp) fp = np.array(fp) tn = np.array(tn) fn = np.array(fn) tpr = tp / (tp + fn) fpr = fp / (fp + tn) auc = -1 * np.trapz(tpr, fpr) print(auc) return auc
def _get_qual_citation(args, topic_thresh, test_perc=0.4): print("Calling inner function") create_vecs(args) # get new values on test set based on current thresholds if args.exp == 'citation': g, n, C1, C2, C3, C4 = get_aux_cit(args) b, q = learn_params(args, g, n, C1, C2, C3, C4) # need to reload since re-saved when creating the vectors topic_features = np.load(args.topic_features) user_features = np.load(args.user_features) # calculate tp, fp, tn, fn tp = [] fp = [] tn = [] fn = [] # to estimate auc, taking 100 points on the curve for mu in tqdm(np.linspace(0, 1, 100)): # print(mu) tpx = 0 fpx = 0 tnx = 0 fnx = 0 for v in cited.keys(): # per entry of the log for msg in cited[v]: # get action set for u in g.predecessors(v): if u in published.keys(): action_set = set([p for p in published[u]]) prob = 0.0 # prob for an action for a in action_set: for u in g.predecessors(v): # get valid predecessors for the actions if u in published.keys() and a in published[u]: prob += b[(u, v)] + q[(u, v)] * get_alpha( user_features[v], topic_features[a]) prob = min(1, max(prob, 0)) prediction = (prob > mu) gt = (a == msg) if prediction == True: if gt == True: tpx += 1 else: fpx += 1 else: if gt == True: fnx += 1 else: tnx += 1 print("Results for mu = ", mu, "(tp, fp, tn, fn)", tpx, fpx, tnx, fnx) tp.append(tpx) fp.append(fpx) tn.append(tnx) fn.append(fnx) tp = np.array(tp) fp = np.array(fp) tn = np.array(tn) fn = np.array(fn) tpr = tp / (tp + fn) fpr = fp / (fp + tn) print("TPR: ", tpr) print("FPR: ", fpr) auc = -1 * np.trapz(tpr, fpr) print('auc: ', auc) return auc
def _get_qual(args, topic_thresh, sim_bits): print("Inner qual function") # get new values on test set based on current thresholds args.topic_thr = topic_thresh args.nbits = sim_bits create_vecs(args) if args.exp == 'vk': g, n, C1, C2, C3, C4 = get_aux_vk(args) b, q = learn_params(args, g, n, C1, C2, C3, C4) topic_features = np.load(args.topic_features) user_features = np.load(args.user_features) # calculate tp, fp, tn, fn tp = [] fp = [] tn = [] fn = [] # testlogs is now a numpy array # to estimate auc, taking 100 points on the curve for mu in tqdm(np.linspace(0, 1, 10)): tpx = 0 fpx = 0 tnx = 0 fnx = 0 for log in reversed(testlogs): # [v, a, t_v] = [int(x) for x in log], already converted into an integer v, a_v, t_v = [int(x) for x in log] # v published a message in the test log # set based on similarity action_list = [] action_set = [] for u in g.predecessors(v): # list of permissible actions performed by u for au in actions_of[u]: if action_table[(u, au)] > t_v: continue action_list.append(au) for a1 in action_list: similar = False for a2 in action_set: if check_sim(topic_features[a1], topic_features[a2], nbits=sim_bits): similar = True break if not similar: action_set.append(a1) print("Formed action set for node: ", v) # now they are unique actions prob = 0.0 for a in action_set: for u in g.predecessors(v): # get valid predecessors for the actions for au in actions_of[u]: if action_table[(u, au)] > t_v: continue if check_sim(topic_features[au], topic_features[a], nbits=sim_bits): prob += b[(u, v)] + q[(u, v)] * get_alpha( user_features[v], topic_features[a]) # summed up prob for a by all users and through all their actions prob = min(1, max(prob, 0)) prediction = (prob > mu) # gt -> this log entry is similar to the action # gt = checklog(logs, v, a) gt = check_sim(topic_features[a_v], topic_features[a], nbits=sim_bits) if prediction == True: if gt == True: tpx += 1 else: fpx += 1 else: if gt == True: fnx += 1 else: tnx += 1 print("(tp, fp, tn, fn): ", tpx, fpx, tnx, fnx) tp.append(tpx) fp.append(fpx) tn.append(tnx) fn.append(fnx) tp = np.array(tp) fp = np.array(fp) tn = np.array(tn) fn = np.array(fn) tpr = tp / (tp + fn) fpr = fp / (fp + tn) print("TPR: ", tpr) print("FPR: ", fpr) auc = -1 * np.trapz(tpr, fpr) print("auc: ", auc) return auc
def get_qual_citation(args, topic_thresh, test_perc=0.4): args.topic_thr = topic_thresh create_vecs(args) # get new values on test set based on current thresholds if args.exp == 'citation': g, n, C1, C2, C3, C4 = get_aux_cit(args) learn_params(args, g, n, C1, C2, C3, C4) # read the learned parameters basefile = open(os.path.join(args.data_dir, args.exp, 'base_weights.txt'), newline='') basefile = csv.reader(basefile, delimiter=' ') qfile = open(os.path.join(args.data_dir, args.exp, 'marg_weights.txt'), newline='') qfile = csv.reader(qfile, delimiter=' ') topic_features = np.load(args.topic_features) user_features = np.load(args.user_features) b = dict() q = dict() for u, v, val in basefile: b[int(u), int(v)] = float(val) for u, v, val in qfile: q[int(u), int(v)] = float(val) # calculate tp, fp, tn, fn tp = [] fp = [] tn = [] fn = [] logfile = open(args.log_file, newline='') logs = list(csv.reader(logfile, delimiter=' ')) num_logs = sum(1 for row in logs) templogs = logs[:int(num_logs * test_perc)] testlogs = [] for log in templogs: nplog = np.array(log, dtype=np.int) testlogs.append(nplog) testlogs = np.array(testlogs) print("Size of Test log in consideration: ", testlogs.shape) # testlogs is now a numpy array published = dict() cited = dict() for log in testlogs: [u, v, c, p] = [int(x) for x in log] # if v == 1344: # print(c) if u in published.keys(): published[u].add(c) else: published[u] = set() published[u].add(c) if v in published.keys(): published[v].add(p) else: published[v] = set() published[v].add(p) if v in cited.keys(): cited[v].add(c) else: cited[v] = set() cited[v].add(c) print("Preprocessed test log.") # to estimate auc, taking 100 points on the curve for mu in tqdm(np.linspace(0, 1, 100)): tpx = 0 fpx = 0 tnx = 0 fnx = 0 for v in cited.keys(): # per entry of the log for msg in cited[v]: # get action set for u in g.predecessors(v): if u in published.keys(): action_set = set([p for p in published[u]]) prob = 0.0 # prob for an action for a in action_set: for u in g.predecessors(v): # get valid predecessors for the actions if u in published.keys() and a in published[u]: prob += b[(u, v)] + q[(u, v)] * get_alpha( user_features[v], topic_features[a]) prob = min(1, max(prob, 0)) prediction = (prob > mu) gt = (a == msg) if prediction == True: if gt == True: tpx += 1 else: fpx += 1 else: if gt == True: fnx += 1 else: tnx += 1 tp.append(tpx) fp.append(fpx) tn.append(tnx) fn.append(fnx) tp = np.array(tp) fp = np.array(fp) tn = np.array(tn) fn = np.array(fn) tpr = tp / (tp + fn) fpr = fp / (fp + tn) auc = -1 * np.trapz(tpr, fpr) print('auc: ', auc) return auc