예제 #1
0
def classification(X, params):
    res = {}
    X_scaled = scale(X)
    ground_truth_path=os.path.join(DATA_PATH, params["data"],params["ground_truth"])
    y = dh.load_ground_truth(ground_truth_path)
    y = y[:len(X)]
    #print(X_scaled.shape)
    #print(len(y))
    #print("y_0=",y[0])
    acc = 0.0
    micro_f1 = 0.0
    macro_f1 = 0.0
    n_train = params["n_train"]
    print("number_of_train_set", n_train)
    for _ in range(params["times"]):
        X_train, X_test, y_train, y_test = X[:n_train, :], X[n_train:, :], y[:n_train], y[n_train:]
        clf = getattr(mll, params["model"]["func"])(X_train, y_train, params["model"])
        ret = mll.infer(clf, X_test, y_test)
        acc += ret[1]
        y_score = ret[0]
        micro_f1 += f1_score(y_test, y_score, average='micro')
        macro_f1 += f1_score(y_test, y_score, average='macro')

    acc /= float(params["times"])
    micro_f1 /= float(params["times"])
    macro_f1 /= float(params["times"])
    res = {"acc" : acc, "micro_f1": micro_f1, "macro_f1": macro_f1}
    print({"acc" : acc, "micro_f1": micro_f1, "macro_f1": macro_f1})
    return res
예제 #2
0
def visualization(X, params):
    ground_truth_path = os.path.join(DATA_PATH, params["data"],
                                     params["ground_truth"])
    y = dh.load_ground_truth(ground_truth_path)
    y = y[:len(X)]

    row = len(X)
    column = len(X[0])

    if column > 2:
        X = ct.reduce_embedding_dim(X, 2)

    X = scale(X)

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.set_title('Scatter Plot')
    plt.xlabel('X')
    plt.ylabel('Y')
    cValue = ct.label2color(y)
    ax.scatter(X[:, 0], X[:, 1], c=cValue, cmap='viridis', marker='s')
    #plt.legend('x1')
    scatter_path = os.path.join(params["res_home"],
                                params["embeddings_file"] + "scatter.pdf")
    plt.savefig(scatter_path)
    plt.show()

    return {"scatter_path": scatter_path}
예제 #3
0
def metric(params):
    G_truth = dh.load_ground_truth(
        os.path.join(DATA_PATH, params["ground_truth_file"]))
    ret = []
    for metric in params["metric_function"]:
        ret.append(getattr(Metric, metric["func"])(G_truth, metric))
    return ret
예제 #4
0
def classification(X, params):
    res = {}
    X_scaled = scale(X)
    y = dh.load_ground_truth(params["ground_truth"])
    y = y[:len(X)]
    #print(len(y))
    #print("y_0=",y[0])
    ts = 0.0
    for i in range(9):
        ts += 0.1
        acc = 0.0
        micro_f1 = 0.0
        macro_f1 = 0.0
        for _ in range(params["times"]):
            X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size = ts, stratify = y,random_state=params["np_seed"])
            clf = getattr(mll, params["model"]["func"])(X_train, y_train, params["model"])
            ret = mll.infer(clf, X_test, y_test)
            acc += ret[1]
            y_score = ret[0]
            micro_f1 += f1_score(y_test, y_score, average='micro')
            macro_f1 += f1_score(y_test, y_score, average='macro')

        acc /= float(params["times"])
        micro_f1 /= float(params["times"])
        macro_f1 /= float(params["times"])
        print("test_size:",ts)
        res["%.2f" % ts] = {"acc" : acc, "micro_f1": micro_f1, "macro_f1": macro_f1}
        print({"acc" : acc, "micro_f1": micro_f1, "macro_f1": macro_f1})
    return res
예제 #5
0
def classification(X, params):
    X_scaled = scale(X)
    ground_truth_path = os.path.join(DATA_PATH, params["data"],
                                     params["ground_truth"])
    y = dh.load_ground_truth(ground_truth_path)
    y = y[:len(X)]
    #print(X_scaled.shape)
    print(len(y))
    print("y_0=", y[0])
    acc = 0.0
    micro_f1 = 0.0
    macro_f1 = 0.0
    ts = 0
    for i in range(9):
        ts = ts + 0.1
        for _ in range(params["times"]):
            X_train, X_test, y_train, y_test = train_test_split(X_scaled,
                                                                y,
                                                                test_size=ts,
                                                                stratify=y)
            clf = getattr(mll, params["model"]["func"])(X_train, y_train,
                                                        params["model"])
            ret = mll.infer(clf, X_test, y_test)
            acc += ret[1]
            y_score = ret[0]
            micro_f1 += f1_score(y_test, y_score, average='micro')
            macro_f1 += f1_score(y_test, y_score, average='macro')

        acc /= float(params["times"])
        micro_f1 /= float(params["times"])
        macro_f1 /= float(params["times"])
        print("test_size:", ts)
        print({"acc": acc, "micro_f1": micro_f1, "macro_f1": macro_f1})
    return {"acc": acc, "micro_f1": micro_f1, "macro_f1": macro_f1}
 def classification(X, params):
     X_scaled = scale(X)
     y = dh.load_ground_truth(os.path.join(DATA_PATH, params["ground_truth"]))
     y = y[:len(X)]
     acc = 0.0
     for _ in xrange(params["times"]):
          X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size = params["test_size"], stratify = y)
          clf = getattr(mll, params["classification"]["func"])(X_train, y_train, params["classification"])
          acc += mll.infer(clf, X_test, y_test)[1]
     acc /= float(params["times"])
     return acc
예제 #7
0
파일: main_test.py 프로젝트: luke28/FastHNE
def metric(save_path, label_path):
    with open(save_path, "rb") as f:
        X = pickle.load(f)["embeddings"]
    X_scaled = scale(X)
    y = dh.load_ground_truth(label_path)
    y = y[:len(X)]
    acc = 0.0
    for _ in range(metric_times):
        X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size = test_size, stratify = y)
        clf = mll.logistic(X_train, y_train, {})
        ret = mll.infer(clf, X_test, y_test)
        acc += ret[1]
    acc /= float(metric_times)
    print(acc)
예제 #8
0
def classification(X, params):
    X_scaled = scale(X)
    y = dh.load_ground_truth(params["ground_truth"])
    y = y[:len(X)]
    acc = 0.0
    micro_f1 = 0.0
    macro_f1 = 0.0
    for _ in xrange(params["times"]):
        X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size = params["test_size"], stratify = y)
        clf = getattr(mll, params["model"]["func"])(X_train, y_train, params["model"])
        ret = mll.infer(clf, X_test, y_test)
        acc += ret[1]
        y_score = ret[0]
        micro_f1 += f1_score(y_test, y_score, average='micro')
        macro_f1 += f1_score(y_test, y_score, average='macro')

    acc /= float(params["times"])
    micro_f1 /= float(params["times"])
    macro_f1 /= float(params["times"])
    return {"acc" : acc, "micro_f1": micro_f1, "macro_f1": macro_f1}
예제 #9
0
sys.path.insert(0, os.path.join(FILE_PATH, '../src'))
from utils.data_handler import DataHandler as dh
from utils.metric import Metric

data_set = dh.load_cascades(
    os.path.join(FILE_PATH, r"../data/cascades256_sorted"))
n = 256
res = np.zeros((n, n), dtype=float)
cnt = np.zeros(n, dtype=float)
for data in data_set:
    for i in xrange(len(data)):
        for j in xrange(i, len(data)):
            res[data[i][0]][data[j][0]] += math.exp(
                float(sys.argv[1]) * abs(data[i][1] - data[j][1]))
        cnt[data[i][0]] += 1.0
G = dh.load_ground_truth(os.path.join(FILE_PATH, r"../data/network256"))
avg = 0.0
avg_have = 0.0
avg_not = 0.0
for i in xrange(n):
    for j in xrange(n):
        ans = res[i][j] / cnt[i]
        avg += ans
        if j in G[i]:
            avg_have += ans
        else:
            avg_not += ans

avg /= float(n * n)
avg_have /= float(G.number_of_edges())
avg_not /= float(n * n - G.number_of_edges())