def main():
    """Problem 1(e): Gaussian discriminant analysis (GDA)

    Args:
        train_path: Path to CSV file containing dataset for training.
        eval_path: Path to CSV file containing dataset for evaluation.
        pred_path: Path to save predictions.
    """
    # Load dataset
    x_24, y_24 = util.load_csv('../Data/ds1_x2_x4.csv', add_intercept=False)
    Gauss_analysis = GDA()
    theta = Gauss_analysis.fit(x_24, y_24)
    print(theta)
    np.savetxt('../Output/GDA_1.txt', theta)
    util.plot(x_24, y_24, theta, 'Bridge Age', 'Earthquake Magnitude',
              '../Output/GDA_visual_1.png')

    x_25, y_25 = util.load_csv('../Data/ds1_x2_x5.csv', add_intercept=False)
    Gauss_analysis = GDA()
    theta = Gauss_analysis.fit(x_25, y_25)
    print(theta)
    np.savetxt('../Output/GDA_2.txt', theta)
    util.plot(x_25, y_25, theta, 'Bridge Age', 'Distance to Epicenter',
              '../Output/GDA_visual_2.png')

    x_45, y_45 = util.load_csv('../Data/ds1_x4_x5.csv', add_intercept=False)
    Gauss_analysis = GDA()
    theta = Gauss_analysis.fit(x_45, y_45)
    print(theta)
    np.savetxt('../Output/GDA_3.txt', theta)
    util.plot(x_45, y_45, theta, 'Earthquake Magnitude',
              'Distance to Epicenter', '../Output/GDA_visual_3.png')
예제 #2
0
def train_imdb():
    train_texts, train_labels = util.load_csv('data/imdb_train.csv')
    val_texts, val_labels = util.load_csv('data/imdb_valid.csv')
    test_texts, test_labels = util.load_csv('data/imdb_test.csv')
    train_m_path = "saved/imdb_train_matrix.gz"
    val_m_path = "saved/imdb_val_matrix.gz"
    test_m_path = "saved/imdb_test_matrix.gz"

    if path.exists(train_m_path):
        train_matrix = np.loadtxt(train_m_path)
    else:
        train_matrix = util.transform_text_to_phon_cnts(train_texts)
        np.savetxt(train_m_path, train_matrix)

    if path.exists(val_m_path):
        val_matrix = np.loadtxt(val_m_path)
    else:
        val_matrix = util.transform_text_to_phon_cnts(val_texts)
        np.savetxt(val_m_path, val_matrix)

    if path.exists(test_m_path):
        test_matrix = np.loadtxt(test_m_path)
    else:
        test_matrix = util.transform_text_to_phon_cnts(test_texts)
        np.savetxt(test_m_path, test_matrix)

    model = {}
    model["train_matrix"] = train_matrix
    model["val_matrix"] = val_matrix
    model["test_matrix"] = test_matrix
    model["train_labels"] = train_labels
    model["val_labels"] = val_labels
    model["test_labels"] = test_labels
    return model
예제 #3
0
def train_perceptron(kernel_name, kernel, learning_rate):
    """Train a perceptron with the given kernel.

    This function trains a perceptron with a given kernel and then
    uses that perceptron to make predictions.
    The output predictions are saved to src/output/p05_{kernel_name}_predictions.txt.
    The output plots are saved to src/output_{kernel_name}_output.pdf.

    Args:
        kernel_name: The name of the kernel.
        kernel: The kernel function.
        learning_rate: The learning rate for training.
    """
    train_x, train_y = util.load_csv('../data/ds5_train.csv')

    state = initial_state()

    for x_i, y_i in zip(train_x, train_y):
        update_state(state, kernel, learning_rate, x_i, y_i)

    test_x, test_y = util.load_csv('../data/ds5_train.csv')

    plt.figure(figsize=(12, 8))
    util.plot_contour(lambda a: predict(state, kernel, a))
    util.plot_points(test_x, test_y)
    plt.savefig('./output/p05_{}_output.pdf'.format(kernel_name))

    predict_y = [
        predict(state, kernel, test_x[i, :]) for i in range(test_y.shape[0])
    ]

    np.savetxt('./output/p05_{}_predictions'.format(kernel_name), predict_y)
예제 #4
0
def main():
    # plot
    DEBUG = False
    if DEBUG:
        from util import plot_points
        from matplotlib import pyplot as plt
        plt.figure()
        Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=False)
        Ya = (Ya == 1).astype(np.float)
        plot_points(Xa, Ya)

        plt.figure()
        Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=False)
        Yb = (Yb == 1).astype(np.float)
        plot_points(Xb, Yb)
        plt.show()
        import sys
        sys.exit()

    # print('==== Training model on data set A ====')
    # Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=True)
    # logistic_regression(Xa, Ya)

    print('\n==== Training model on data set B ====')
    Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=True)
    logistic_regression(Xb, Yb)
def process_eth_staking(th_stake_path, beth_distr_path):

    stake_rows = util.load_csv(th_stake_path)
    distr_rows = util.load_csv(beth_distr_path)
    stake_fe = extract_stake_flow_events(stake_rows)
    distr_fe = extract_distr_flow_events(distr_rows)
    res = stake_fe + distr_fe
    return res
예제 #6
0
def main():
    print('==== Training model on data set A ====')
    Xa, Ya = util.load_csv('ds1_a.csv', add_intercept=True)
    logistic_regression(Xa, Ya)

    print('\n==== Training model on data set B ====')
    Xb, Yb = util.load_csv('ds1_b.csv', add_intercept=True)
    logistic_regression(Xb, Yb)
예제 #7
0
def main():
    print('==== Training model on data set A ====')
    Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=True)
    logistic_regression(Xa, Ya)
    plot(Xa, Ya, 'output/data_a.png')
    print('\n==== Training model on data set B ====')
    Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=True)
    #logistic_regression(Xb, Yb)
    plot(Xb, Yb, 'output/data_b.png')
예제 #8
0
def main():
    print('==== Training model on data set A ====')
    Xa, Ya = util.load_csv('ds1_a.csv', add_intercept=True)
    a1, a2, a3, a4, a5 = logistic_regression(Xa, Ya)

    print('\n==== Training model on data set B ====')
    Xb, Yb = util.load_csv('ds1_b.csv', add_intercept=True)
    b1, b2, b3, b4, b5 = logistic_regression(Xb, Yb)
    return a1, a2, a3, a4, a5, b1, b2, b3, b4, b5
예제 #9
0
def main(train_path, eval_path):
    x_train, y_train = util.load_csv(train_path)
    x_eval, y_eval = util.load_csv(eval_path)

    pct = Perceptron('dot', dot_kernel)
    train(pct, x_train, y_train, x_eval, y_eval)

    pct = Perceptron('rbf', rbf_kernel)
    train(pct, x_train, y_train, x_eval, y_eval)
def load_dataset(path):
    train = util.load_csv(os.path.join(path, 'train.txt'))
    test = util.load_csv(os.path.join(path, 'test.txt'))

    X_train = train[:,1:].astype('float32')
    X_test  = test[:,1:].astype('float32')
    Y_train = train[:,0].astype('int')
    Y_test  = test[:,0].astype('int')
    
    return X_train, X_test, Y_train, Y_test
예제 #11
0
파일: p01_b.py 프로젝트: mtjin96/CS229
def main():
    Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=True)
    Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=True)
    plot_points(Xa, Ya)
    plt.savefig('output/p01_b_a.png')

    plot_points(Xb, Yb)
    plt.savefig('output/p01_b_b.png')

    #logistic_regression(Xa, Ya)
    logistic_regression(Xa, Ya)
예제 #12
0
def test():
    #==== Plot data set A ====')
    x, y = util.load_csv('../data/ds1_a.csv', add_intercept=True)
    plt.figure()
    plt.plot(x[y == 1, -2], x[y == 1, -1], 'bx', linewidth=2)
    plt.plot(x[y == -1, -2], x[y == -1, -1], 'go', linewidth=2)
    plt.show()
    #==== Plot data set B ====')
    x, y = util.load_csv('../data/ds1_b.csv', add_intercept=True)
    plt.figure()
    plt.plot(x[y == 1, -2], x[y == 1, -1], 'bx', linewidth=2)
    plt.plot(x[y == -1, -2], x[y == -1, -1], 'go', linewidth=2)
    plt.show()
def drawPlotInstance():
    attr1, classes1 = util.load_csv(train1)
    attr2, classes2 = util.load_csv(train2)

    #listOfY contains the last column of the training data set
    counts_1 = Counter(
        classes1)  #counter counts the number of instances of each class
    counts_2 = Counter(classes2)
    x_array_1 = []
    x_array_2 = []
    y_count_array_1 = []
    y_count_array_2 = []
    length_1 = len(counts_1)
    length_2 = len(counts_2)

    #==================== dataset 1
    #building x axis
    for i in range(length_1):
        x_array_1.append(i)

    #building y axis
    for i in range(length_1):
        y_count_array_1.append(counts_1[i])

    #==================== dataset 2
    #building x axis
    for i in range(length_2):
        x_array_2.append(i)

    #building y axis
    for i in range(length_2):
        y_count_array_2.append(counts_2[i])

    #plotting
    figures, axes = plt.subplots(2)

    axes[0].plot(x_array_1, y_count_array_1)
    #axes[0].set_title("Dataset 1")
    axes[0].set(ylabel="Dataset 1")
    axes[0].set(xlabel="Classes")

    axes[1].plot(x_array_2, y_count_array_2)
    #axes[1].set_title("Dataset 2")
    axes[1].set(ylabel="Dataset 2")
    axes[1].set(xlabel="Classes")


#drawPlotInstance(classes1, classes2)
예제 #14
0
def get_ge(net_name, model_parameters, load_parameters):
    args = util.EmptySpace()
    for key, value in load_parameters.items():
        setattr(args, key, value)
    folder = "/media/rico/Data/TU/thesis/runs{}/{}".format(
        args.experiment, util.generate_folder_name(args))

    ge_x, ge_y = [], []
    lta, lva, ltl, lvl = [], [], [], []
    for run in runs:
        filename = '{}/model_r{}_{}'.format(
            folder, run, get_save_name(net_name, model_parameters))
        ge_path = '{}.exp'.format(filename)

        y_r = util.load_csv(ge_path, delimiter=' ', dtype=np.float)
        x_r = range(len(y_r))
        ge_x.append(x_r)
        ge_y.append(y_r)

        if show_losses or show_acc:
            ta, va, tl, vl = util.load_loss_acc(filename)
            lta.append(ta)
            lva.append(va)
            ltl.append(tl)
            lvl.append(vl)

    return ge_x, ge_y, (lta, lva, ltl, lvl)
예제 #15
0
def main():
    print('==== Training model on data set A ====')
    Xa, Ya = util.load_csv('ds1_a.csv', add_intercept=True)
    plot_cost(Xa, Ya)
    # util.plot_points(Xa[:,1:], Ya, theta)
    plt.show()
    thetas = logistic_regression(Xa, Ya)
    # util.plot_points(Xa[:,1:], Ya, thetas)
    
    print('\n==== Training model on data set B ====')
    Xb, Yb = util.load_csv('ds1_b.csv', add_intercept=True)
    plot_cost(Xb, Yb)
    # Xb += np.random.normal(scale=0.03, size=Xb.shape)
    # util.plot_points(Xb[:,1:], Yb)
    plt.show()
    thetas = logistic_regression(Xb, Yb)
예제 #16
0
def process_trade_history(filepath):
    res = {}
    info = {}
    rows = util.load_csv(filepath)

    clean_data(rows)

    flow_events = extract_flow_events(rows, coins_used)
    pairs = util.get_all_instances(rows, tdcols["pair"])
    info["Pairs"] = pairs

    # rows_by_month = util.group_by_month(rows, tdcols["date"])
    # # TODO: Revamp
    # avg_price = {}
    # for month in rows_by_month:
    #     if month not in res:
    #         res[month] = {}
    #     for pair in pairs:
    #         pair_rows = util.filter_by_kv(rows, tdcols["pair"], pair)
    #         sell_pair_rows = util.filter_by_kv(
    #             pair_rows, tdcols["side"], "SELL")
    #         buy_pair_rows = util.filter_by_kv(pair_rows, tdcols["side"], "BUY")
    #         if len(sell_pair_rows) > 0:
    #             avg_price[f"{pair}_SELL"] = util.weighted_average(
    #                 sell_pair_rows, tdcols["executed"], tdcols["price"])
    #         if len(buy_pair_rows) > 0:
    #             avg_price[f"{pair}_BUY"] = util.weighted_average(
    #                 buy_pair_rows, tdcols["executed"], tdcols["price"])
    #         res[month]["Mean Buy Price"] = avg_price

    return flow_events, info
예제 #17
0
def run():
    # ========= DATASET 1 ========= #
    filepath = "./output/Base-DT-DS1.csv"

    X_train, Y_train = util.load_csv(util.train_1_filepath)
    X_test, Y_test = util.load_csv(util.test_with_label_1_filepath)

    clf = tree.DecisionTreeClassifier(criterion="entropy")
    # Train
    clf = clf.fit(X_train, Y_train)
    # Test/Predict
    Y_pred = clf.predict(X_test)
    # Confusion Matrix
    confusion_matrix = metrics.confusion_matrix(Y_test, Y_pred)
    metrics.plot_confusion_matrix(clf, X_test, Y_test)
    # Evaluation
    classification_report = metrics.classification_report(Y_test, Y_pred)
    # Debug print
    print_debug(1, clf, Y_pred, confusion_matrix, classification_report)
    # Save
    util.write_csv(filepath, Y_test, Y_pred, confusion_matrix)

    # ========= DATASET 2 ========= #
    filepath = "./output/Base-DT-DS2.csv"

    X_train, Y_train = util.load_csv(util.train_2_filepath)
    X_test, Y_test = util.load_csv(util.test_with_label_2_filepath)

    clf = tree.DecisionTreeClassifier(criterion="entropy")
    # Train
    clf = clf.fit(X_train, Y_train)
    # Test/Predict
    Y_pred = clf.predict(X_test)
    # Confusion Matrix
    confusion_matrix = metrics.confusion_matrix(Y_test, Y_pred)
    metrics.plot_confusion_matrix(clf, X_test, Y_test)
    # Evaluation
    classification_report = metrics.classification_report(Y_test, Y_pred)
    # Debug print
    print_debug(2, clf, Y_pred, confusion_matrix, classification_report)
    # Save
    util.write_csv(filepath, Y_test, Y_pred, confusion_matrix)


# DEBUG--------------------------------------------------------------------
#run()
예제 #18
0
def main():
    print('==== Training model on data set A ====')

    X_24, Y_24 = util.load_csv('../Data/ds1_x2_x4.csv', add_intercept=True)
    theta = logistic_regression(X_24, Y_24)
    np.savetxt('../Output/lr_1.txt', theta)
    util.plot(X_24, Y_24, theta, 'Bridge Age', 'Earthquake Magnitude', '../Output/lr_visual_1.png')

    X_25, Y_25 = util.load_csv('../Data/ds1_x2_x5.csv', add_intercept=True)
    theta = logistic_regression(X_25, Y_25)
    np.savetxt('../Output/lr_2.txt', theta)
    util.plot(X_25, Y_25, theta, 'Bridge Age', 'Distance to Epicenter', '../Output/lr_visual_2.png')

    X_45, Y_45 = util.load_csv('../Data/ds1_x4_x5.csv', add_intercept=True)
    theta = logistic_regression(X_45, Y_45)
    np.savetxt('../Output/lr_3.txt', theta)
    util.plot(X_45, Y_45, theta, 'Earthquake Magnitude', 'Distance to Epicenter', '../Output/lr_visual_3.png')
예제 #19
0
def main():
    print('==== Training model on data set A ====')
    Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=True)
    # logistic_regression(Xa, Ya)

    print('\n==== Training model on data set B ====')
    Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=True)
    # logistic_regression(Xb, Yb)

    # Training for Xb, Yb is not converging as in Xa, Ya.
    # Lets examine the data

    # Each y is either {1, -1}
    # Each x has x1, x2 and an x0=1 intercept
    print("A:")
    print(Xa.shape, Ya.shape)
    print(Xa[0])
    print(set(Ya))

    Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=False)
    x1 = Xa[:, 0]
    x2 = Xa[:, 1]
    plt.figure()
    plt.scatter(x1[Ya == -1], x2[Ya == -1], color='red')
    plt.scatter(x1[Ya == 1], x2[Ya == 1], color='blue')
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.title("Xa")
    plt.savefig("p01_lr_Xa")

    print("B:")
    print(Xb.shape, Yb.shape)
    print(Xb[0])
    print(set(Yb))

    Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=False)
    x1 = Xb[:, 0]
    x2 = Xb[:, 1]
    plt.figure()
    plt.scatter(x1[Yb == -1], x2[Yb == -1], color='red')
    plt.scatter(x1[Yb == 1], x2[Yb == 1], color='blue')
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.title("Xb")
    plt.savefig("p01_lr_Xb")
예제 #20
0
def train_perceptron(train_path, test_path, kernel_name, kernel,
                     learning_rate):
    """Train a perceptron with the given kernel.

    This function trains a perceptron with a given kernel and then uses that perceptron to make predictions.
    The output predictions are saved to src/output/p05_{kernel_name}_predictions.txt
    The output plots are saved to src/output_{kernel_name}_output.pdf

    Args:
        kernel_name: The name of the kernel
        kernel: The kernel function
        learning_rate: The learning rate for training
    """

    train_x, train_y = util.load_csv(train_path)
    test_x, test_y = util.load_csv(test_path)

    state = initial_state()

    for x_i, y_i in zip(train_x, train_y):
        update_state(state, kernel, learning_rate, x_i, y_i)

    plt.figure()
    util.plot_contour(lambda a: predict(state, kernel, a))
    util.plot_points(test_x, test_y)
    plt.title(
        f"Kernel: {kernel_name} || Test data, color corresponds to real label")
    plt.legend()
    plt.xlabel("x1")
    plt.ylabel("x2")

    y_pred = np.array(
        [predict(state, kernel, test_x[i, :]) for i in range(test_y.shape[0])])
    plt.figure()
    util.plot_points(test_x, y_pred)
    plt.title(
        f"Kernel: {kernel_name} || Test data, color corresponds to predicted label"
    )
    plt.legend()
    plt.xlabel("x1")
    plt.ylabel("x2")
    plt.show()
예제 #21
0
def load_all_data(use_hw, data_set):
    # Load Data
    loader = util.load_data_set(data_set)
    data_set_name = str(data_set)
    total_x_attack, total_y_attack = loader({
        'use_hw':
        use_hw,
        'traces_path':
        '/media/rico/Data/TU/thesis/data'
    })
    total_key_guesses = np.transpose(
        util.load_csv(
            '/media/rico/Data/TU/thesis/data/{}/Value/key_guesses_ALL.csv'.
            format(data_set_name),
            delimiter=' ',
            dtype=np.int))
    real_key = util.load_csv(
        '/media/rico/Data/TU/thesis/data/{}/secret_key.csv'.format(
            data_set_name),
        dtype=np.int)
    return total_x_attack, total_y_attack, total_key_guesses, real_key
예제 #22
0
def _cache(data: TextIO, model_name: Text, output: BinaryIO, **kwargs):
    cpu = require_device(prefer_cuda=False)
    model_type = models.select(model_name)
    model = ModelInterface(model_type, cpu, False)

    csv = util.load_csv(data)
    cache = {}
    for smiles in csv.keys():
        cache_key = (smiles, )  # memcached is indexed on argument list
        data = model.process(smiles)
        cache[cache_key] = model.encode_data(data, **kwargs)

    pickle.dump(cache, output)
def main():
    print('==== Training model on data set A ====')
    Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=True)
    #logistic_regression(Xa, Ya)
    for i in range(Ya.shape[0]):
        if Ya[i] == 1:
            plt.plot(Xa[i][1],Xa[i][2],'bx')
        else:
            plt.plot(Xa[i][1],Xa[i][2],'go')
    plt.figure(1)


    print('\n==== Training model on data set B ====')
    Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=True)
    logistic_regression(Xb, Yb)
    plt.figure(2)
    for i in range(Ya.shape[0]):
        if Yb[i] == 1:
            plt.plot(Xb[i][1],Xb[i][2],'bx')
        else:
            plt.plot(Xb[i][1],Xb[i][2],'go')

    plt.show()
예제 #24
0
def main():

    # # Plot dataset A and B
    # from util import plot_points
    # import matplotlib.pyplot as plt

    # Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=False)
    # plt.figure()
    # plot_points(Xa, (Ya == 1).astype(int))
    # plt.savefig('output/ds1_a.png')

    # Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=False)
    # plt.figure()
    # plot_points(Xb, (Yb == 1).astype(int))
    # plt.savefig('output/ds1_b.png')

    print('==== Training model on data set A ====')
    Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=True)
    logistic_regression(Xa, Ya)

    print('\n==== Training model on data set B ====')
    Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=True)
    logistic_regression(Xb, Yb)
예제 #25
0
def main():
    # print('==== Training model on data set A ====')
    # Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=False)
    # util.plot_points(Xa, Ya)
    # Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=False)
    # util.plot_points(Xb, Yb)


    # Xa, Ya = util.load_csv('../data/ds1_a.csv', add_intercept=True)
    # logistic_regression(Xa, Ya)

    print('\n==== Training model on data set B ====')
    Xb, Yb = util.load_csv('../data/ds1_b.csv', add_intercept=True)
    logistic_regression(Xb, Yb)
예제 #26
0
def run_dataset(filepath_train, filepath_test, filepath_output):
    
    x_train, y_train = util.load_csv(filepath_train)
    x_test, y_test = util.load_csv(filepath_test)
    
    clf = Perceptron()
    y_pred = clf.fit(x_train,y_train).predict(x_test)
    
    train_accuracy = clf.score(x_train, y_train)
    test_accuracy = metrics.accuracy_score(y_test, y_pred)
    
    #confusion matrix
    cmatrix = metrics.confusion_matrix(y_test, y_pred)
    metrics.plot_confusion_matrix(clf, x_test, y_test)
    
    #evalution
    classification_report = metrics.classification_report(y_test, y_pred)
    
    #print to output file
    util.write_csv(filepath_output, y_test, y_pred, cmatrix)
    
    #print to console for debug purposes
    print_result(clf, train_accuracy, test_accuracy, y_pred, cmatrix, classification_report, filepath_output)
예제 #27
0
def main():
    """Problem 1(e): Gaussian discriminant analysis (GDA)

    Args:
        train_path: Path to CSV file containing dataset for training.
        eval_path: Path to CSV file containing dataset for evaluation.
        pred_path: Path to save predictions.
    """
    # Load dataset
    x_45, y_45 = util.load_csv('../Data/ds1_x4_x5.csv', add_intercept=True)
    Gauss_analysis = GDA()
    theta = Gauss_analysis.fit(x_45, y_45)
    print(theta)
    np.savetxt('../Output/GDA_3.txt', theta)
    util.plot(x_45, y_45, theta, '../Output/GDA_visual_3.png')
예제 #28
0
def load_data(args):
    _x_attack, _y_attack, _real_key, _dk_plain, _key_guesses = None, None, None, None, None
    ###################
    # Load the traces #
    ###################
    loader = util.load_data_set(args.data_set)
    total_x_attack, total_y_attack, plain = loader({'use_hw': args.use_hw,
                                                    'traces_path': args.traces_path,
                                                    'raw_traces': args.raw_traces,
                                                    'start': args.train_size + args.validation_size,
                                                    'size': args.attack_size,
                                                    'domain_knowledge': True,
                                                    'use_noise_data': args.use_noise_data,
                                                    'data_set': args.data_set,
                                                    'noise_level': args.noise_level})
    if plain is not None:
        _dk_plain = torch.from_numpy(plain).cuda()
    print('Loading key guesses')

    ####################################
    # Load the key guesses and the key #
    ####################################
    data_set_name = str(args.data_set)
    _key_guesses = util.load_csv('{}/{}/Value/key_guesses_ALL_transposed.csv'.format(
        args.traces_path,
        data_set_name),
        delimiter=' ',
        dtype=np.int,
        start=args.train_size + args.validation_size,
        size=args.attack_size)
    _real_key = util.load_csv('{}/{}/secret_key.csv'.format(args.traces_path, data_set_name),
                              dtype=np.int)

    _x_attack = total_x_attack
    _y_attack = total_y_attack
    return _x_attack, _y_attack, _key_guesses, _real_key, _dk_plain
예제 #29
0
def run_dataset(filepath_train, filepath_test, filepath_output):

    x_train, y_train = util.load_csv(filepath_train)
    x_test, y_test = util.load_csv(filepath_test)

    gnb = GaussianNB()
    y_pred = gnb.fit(x_train, y_train).predict(x_test)

    train_accuracy = gnb.score(x_train, y_train)
    test_accuracy = metrics.accuracy_score(y_test, y_pred)

    #confusion matrix
    cmatrix = metrics.confusion_matrix(y_test, y_pred)
    metrics.plot_confusion_matrix(gnb, x_test, y_test)

    #evalution
    classification_report = metrics.classification_report(y_test, y_pred)

    #output file
    util.write_csv(filepath_output, y_test, y_pred, cmatrix)

    #Print result to console
    print_result(gnb, train_accuracy, test_accuracy, y_pred, cmatrix,
                 classification_report, filepath_output)
예제 #30
0
def _combine_csvs(data_dir):
    data = []
    csvs = [os.path.join(data_dir, x) for x in os.listdir(data_dir)
            if x.split('.')[-1] == 'csv']
    sub_dirs = [x for x in os.listdir(data_dir)
                if os.path.isdir(os.path.join(data_dir, x))]

    for sd in sub_dirs:
        csvs.extend([os.path.join(data_dir, sd, x) for x in os.listdir(os.path.join(data_dir, sd))
                     if x.split('.')[-1] == 'csv'])

    for csv_ in csvs:
        data.extend(util.load_csv(csv_))

    return data
예제 #31
0
    for i in range(1, len(a)+1):
        prev = present
        present = [i]
        for j in range(1, len(b)+1):
            if(a[i-1] == b[j-1]):
                present.append(prev[j-1])
            else:
                present.append(min(prev[j-1], prev[j], present[j-1])+1)
    return present[-1]

if(__name__ == '__main__'):
    if(len(sys.argv)!=3):
        print "Usage: python validate.py <ground_truth_file> <output_label_file>"
        exit()

    ground_truth = load_csv(sys.argv[1])
    label = load_csv(sys.argv[2])

    # match ground truth in output
    all_dist = 0
    for truth in ground_truth:
        idx = -1
        for i in range(0, len(label)):
            if(truth[0] == label[i][0]):
                idx = i
                break
        if(idx==-1):
            print "Entry not found: %s" %truth[0]
            break
        all_dist += edit_dist(truth[1], label[i][1])
    print "Average edit distance = %f" %(float(all_dist)/len(ground_truth))