def classification(X, params): res = {} X_scaled = scale(X) ground_truth_path=os.path.join(DATA_PATH, params["data"],params["ground_truth"]) y = dh.load_ground_truth(ground_truth_path) y = y[:len(X)] #print(X_scaled.shape) #print(len(y)) #print("y_0=",y[0]) acc = 0.0 micro_f1 = 0.0 macro_f1 = 0.0 n_train = params["n_train"] print("number_of_train_set", n_train) for _ in range(params["times"]): X_train, X_test, y_train, y_test = X[:n_train, :], X[n_train:, :], y[:n_train], y[n_train:] clf = getattr(mll, params["model"]["func"])(X_train, y_train, params["model"]) ret = mll.infer(clf, X_test, y_test) acc += ret[1] y_score = ret[0] micro_f1 += f1_score(y_test, y_score, average='micro') macro_f1 += f1_score(y_test, y_score, average='macro') acc /= float(params["times"]) micro_f1 /= float(params["times"]) macro_f1 /= float(params["times"]) res = {"acc" : acc, "micro_f1": micro_f1, "macro_f1": macro_f1} print({"acc" : acc, "micro_f1": micro_f1, "macro_f1": macro_f1}) return res
def visualization(X, params): ground_truth_path = os.path.join(DATA_PATH, params["data"], params["ground_truth"]) y = dh.load_ground_truth(ground_truth_path) y = y[:len(X)] row = len(X) column = len(X[0]) if column > 2: X = ct.reduce_embedding_dim(X, 2) X = scale(X) fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.set_title('Scatter Plot') plt.xlabel('X') plt.ylabel('Y') cValue = ct.label2color(y) ax.scatter(X[:, 0], X[:, 1], c=cValue, cmap='viridis', marker='s') #plt.legend('x1') scatter_path = os.path.join(params["res_home"], params["embeddings_file"] + "scatter.pdf") plt.savefig(scatter_path) plt.show() return {"scatter_path": scatter_path}
def metric(params): G_truth = dh.load_ground_truth( os.path.join(DATA_PATH, params["ground_truth_file"])) ret = [] for metric in params["metric_function"]: ret.append(getattr(Metric, metric["func"])(G_truth, metric)) return ret
def classification(X, params): res = {} X_scaled = scale(X) y = dh.load_ground_truth(params["ground_truth"]) y = y[:len(X)] #print(len(y)) #print("y_0=",y[0]) ts = 0.0 for i in range(9): ts += 0.1 acc = 0.0 micro_f1 = 0.0 macro_f1 = 0.0 for _ in range(params["times"]): X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size = ts, stratify = y,random_state=params["np_seed"]) clf = getattr(mll, params["model"]["func"])(X_train, y_train, params["model"]) ret = mll.infer(clf, X_test, y_test) acc += ret[1] y_score = ret[0] micro_f1 += f1_score(y_test, y_score, average='micro') macro_f1 += f1_score(y_test, y_score, average='macro') acc /= float(params["times"]) micro_f1 /= float(params["times"]) macro_f1 /= float(params["times"]) print("test_size:",ts) res["%.2f" % ts] = {"acc" : acc, "micro_f1": micro_f1, "macro_f1": macro_f1} print({"acc" : acc, "micro_f1": micro_f1, "macro_f1": macro_f1}) return res
def classification(X, params): X_scaled = scale(X) ground_truth_path = os.path.join(DATA_PATH, params["data"], params["ground_truth"]) y = dh.load_ground_truth(ground_truth_path) y = y[:len(X)] #print(X_scaled.shape) print(len(y)) print("y_0=", y[0]) acc = 0.0 micro_f1 = 0.0 macro_f1 = 0.0 ts = 0 for i in range(9): ts = ts + 0.1 for _ in range(params["times"]): X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=ts, stratify=y) clf = getattr(mll, params["model"]["func"])(X_train, y_train, params["model"]) ret = mll.infer(clf, X_test, y_test) acc += ret[1] y_score = ret[0] micro_f1 += f1_score(y_test, y_score, average='micro') macro_f1 += f1_score(y_test, y_score, average='macro') acc /= float(params["times"]) micro_f1 /= float(params["times"]) macro_f1 /= float(params["times"]) print("test_size:", ts) print({"acc": acc, "micro_f1": micro_f1, "macro_f1": macro_f1}) return {"acc": acc, "micro_f1": micro_f1, "macro_f1": macro_f1}
def classification(X, params): X_scaled = scale(X) y = dh.load_ground_truth(os.path.join(DATA_PATH, params["ground_truth"])) y = y[:len(X)] acc = 0.0 for _ in xrange(params["times"]): X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size = params["test_size"], stratify = y) clf = getattr(mll, params["classification"]["func"])(X_train, y_train, params["classification"]) acc += mll.infer(clf, X_test, y_test)[1] acc /= float(params["times"]) return acc
def metric(save_path, label_path): with open(save_path, "rb") as f: X = pickle.load(f)["embeddings"] X_scaled = scale(X) y = dh.load_ground_truth(label_path) y = y[:len(X)] acc = 0.0 for _ in range(metric_times): X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size = test_size, stratify = y) clf = mll.logistic(X_train, y_train, {}) ret = mll.infer(clf, X_test, y_test) acc += ret[1] acc /= float(metric_times) print(acc)
def classification(X, params): X_scaled = scale(X) y = dh.load_ground_truth(params["ground_truth"]) y = y[:len(X)] acc = 0.0 micro_f1 = 0.0 macro_f1 = 0.0 for _ in xrange(params["times"]): X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size = params["test_size"], stratify = y) clf = getattr(mll, params["model"]["func"])(X_train, y_train, params["model"]) ret = mll.infer(clf, X_test, y_test) acc += ret[1] y_score = ret[0] micro_f1 += f1_score(y_test, y_score, average='micro') macro_f1 += f1_score(y_test, y_score, average='macro') acc /= float(params["times"]) micro_f1 /= float(params["times"]) macro_f1 /= float(params["times"]) return {"acc" : acc, "micro_f1": micro_f1, "macro_f1": macro_f1}
sys.path.insert(0, os.path.join(FILE_PATH, '../src')) from utils.data_handler import DataHandler as dh from utils.metric import Metric data_set = dh.load_cascades( os.path.join(FILE_PATH, r"../data/cascades256_sorted")) n = 256 res = np.zeros((n, n), dtype=float) cnt = np.zeros(n, dtype=float) for data in data_set: for i in xrange(len(data)): for j in xrange(i, len(data)): res[data[i][0]][data[j][0]] += math.exp( float(sys.argv[1]) * abs(data[i][1] - data[j][1])) cnt[data[i][0]] += 1.0 G = dh.load_ground_truth(os.path.join(FILE_PATH, r"../data/network256")) avg = 0.0 avg_have = 0.0 avg_not = 0.0 for i in xrange(n): for j in xrange(n): ans = res[i][j] / cnt[i] avg += ans if j in G[i]: avg_have += ans else: avg_not += ans avg /= float(n * n) avg_have /= float(G.number_of_edges()) avg_not /= float(n * n - G.number_of_edges())