Exemplo n.º 1
0
def test_all_detections_class(fn):
    folder_name = 'test'
    test_detections = preprocessing.all_detections(folder_name)
    view_name = fn.split('/')[-1].split('.feat')[0]
    with open(fn) as f:
        lines = f.readlines()
    test_detections.update(lines, view_name)
    labels = test_detections.get_all_tp_fp()
    print(' ')
Exemplo n.º 2
0
def test_save_to_txt_class(fn):
    out_dir = '//lyta/tomodev/DeepLearning/Playground_stage3/output_pick1mark/score_txt/'
    folder_name = 'test'
    test_detections = preprocessing.all_detections(folder_name)
    view_name = fn.split('/')[-1].split('_feat.txt')[0]
    with open(fn) as f:
        lines = f.readlines()
    test_detections.update(lines, view_name)
    labels = test_detections.get_all_tp_fp()
    postprocessing.pick_one_mark_one_group(test_detections)
    postprocessing.save_to_txt(test_detections, out_dir)
def run_all_test_weighted_logit_save_to_txt():
    out_dir = '//lyta/tomodev/DeepLearning/Playground_stage3/output_pick1mark/score_txt/weighted_logit/'
    for fn in glob.glob('C:/experiments/temp_feature_vector/test/*.txt'):
        # hard code to make the program run through
        folder_name = 'test'
        test_detections = preprocessing.all_detections(folder_name)

        view_name = fn.split('\\')[-1].split('_feat.txt')[0]
        with open(fn) as f:
            lines = f.readlines()
        test_detections.update(lines, view_name)
        postprocessing.pick_one_mark_one_group_use_logit(test_detections)
        postprocessing.save_to_txt(test_detections, out_dir)
Exemplo n.º 4
0
def run():
    # get training set
    os.chdir(test_dir)
    folder_name = train_dir.split('/')[-2]
    # initiate train detections
    train_detections = preprocessing.all_detections(folder_name)
    for file in glob.glob('*.txt'):
        with open(file) as f:
            lines = f.readlines()
            view_name = file.split('_feat.txt')[0]
            # adding detections from one view to this instance
            train_detections.update(lines, view_name)

    # get testing set
    os.chdir(test_dir)
    folder_name = test_dir.split('/')[-2]
    # initiate train detections
    test_detections = preprocessing.all_detections(folder_name)
    for file in glob.glob('*.txt'):
        with open(file) as f:
            lines = f.readlines()
            view_name = file.split('_feat.txt')[0]
            # adding detections from one view to this instance
            test_detections.update(lines, view_name)

    # get train_x and train_y
    train_y = np.array([
        train_detections.groups[i].group_tp_fp
        for i in range(train_detections.get_total_group_count())
    ])
    train_x = lstm_processing.form_lstm_features_v3(train_detections)
    train_x = lstm_processing.repeat_and_pad(train_x, maxlen)
    train_x = sequence.pad_sequences(train_x, maxlen=maxlen, dtype='float')
    train_x = np.array(train_x)

    del train_detections

    # get test_x and test_y
    test_y = np.array([
        test_detections.groups[i].group_tp_fp
        for i in range(test_detections.get_total_group_count())
    ])
    test_x = lstm_processing.form_lstm_features_v3(test_detections)
    test_x = lstm_processing.repeat_and_pad(test_x, maxlen)
    test_x = sequence.pad_sequences(test_x, maxlen=maxlen, dtype='float')
    test_x = np.array(test_x)

    del test_detections

    model = Sequential()
    model.add(Bidirectional(LSTM(128), input_shape=(maxlen, 4)))
    model.add(Dropout(0.5))
    # model.add(Bidirectional(LSTM(512, return_sequences=True)))
    # model.add(Dropout(0.5))
    # model.add(Bidirectional(LSTM(128)))
    # model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    # try using different optimizers and different optimizer configs
    model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

    print('Train...')
    model.fit(train_x,
              train_y,
              batch_size=batch_size,
              epochs=20,
              validation_data=[test_x, test_y])

    # predictions on train set
    predictions_train = model.predict(train_x)
    loss_train = model.evaluate(train_x, train_y, verbose=1)
    print('This is the training set result: \n')

    # Compute AUC
    fpr, tpr, thresholds = roc_curve(train_y, predictions_train, pos_label=1)
    az_train = auc(fpr, tpr)
    print('AUC for train set is ' + str(az_train))

    # predictions on test set
    predictions_test = model.predict(test_x)
    loss_test = model.evaluate(test_x, test_y, verbose=1)
    print('This is the training set result: \n')

    # Compute AUC
    fpr, tpr, thresholds = roc_curve(test_y, predictions_test, pos_label=1)
    az_test = auc(fpr, tpr)
    print('AUC for testing set is ' + str(az_test))

    # # record the predictions into the class
    # train_detections.record_group_lstm_scores(predictions_train[:,0])
    # test_detections.record_group_lstm_scores(predictions_test[:,0])

    # save model
    model.save_weights(out_dir + 'model.h5')

    # Now test on each view and dump txt for scoring
    for fn in glob.glob('C:/experiments/temp_feature_vector/test/*.txt'):
        folder_name = 'test'
        test_detections_one_view = preprocessing.all_detections(folder_name)

        view_name = fn.split('\\')[-1].split('_feat.txt')[0]
        with open(fn) as f:
            lines = f.readlines()
            test_detections_one_view.update(lines, view_name)

        # Get the feature values
        test_x = lstm_processing.form_lstm_features_v3(
            test_detections_one_view)
        test_x = lstm_processing.repeat_and_pad(test_x, maxlen)
        test_x = sequence.pad_sequences(test_x, maxlen=maxlen, dtype='float')
        test_x = np.array(test_x)

        # predict
        predictions_test_one_view = model.predict(test_x)

        # record the predictions
        test_detections_one_view.record_group_lstm_scores(
            predictions_test_one_view[:, 0])
        lstm_processing.save_to_txt_no_pick_one_mark(test_detections_one_view,
                                                     out_dir)
def run():
    # get training set
    os.chdir(train_dir)
    folder_name = train_dir.split('/')[-2]
    # initiate train detections
    train_detections = preprocessing.all_detections(folder_name)
    for file in glob.glob('*.txt')[0:300]:
        with open(file) as f:
            lines = f.readlines()
            view_name = file.split('_feat.txt')[0]
            # adding detections from one view to this instance
            train_detections.update(lines, view_name)

    # get testing set
    os.chdir(test_dir)
    folder_name = test_dir.split('/')[-2]
    # initiate train detections
    test_detections = preprocessing.all_detections(folder_name)
    for file in glob.glob('*.txt')[0:1]:
        with open(file) as f:
            lines = f.readlines()
            view_name = file.split('_feat.txt')[0]
            # adding detections from one view to this instance
            test_detections.update(lines, view_name)

    # get train_x and train_y
    train_y = np.array(train_detections.get_all_tp_fp())
    train_x = np.array(train_detections.get_all_dl_feat_1536())
    del train_detections
    tsne = TSNE(n_components=n_components,
                init='random',
                random_state=0,
                perplexity=perplexity)
    train_x = tsne.fit_transform(train_x)

    # get test_x and test_y
    test_y = np.array(test_detections.get_all_tp_fp())
    test_x = np.array(test_detections.get_all_dl_feat_1536())
    del test_detections
    test_x = tsne.fit_transform(test_x)

    # scale the features
    scaler = StandardScaler()
    scaler.fit(train_x)
    train_x = scaler.transform(train_x)
    test_x = scaler.transform(test_x)

    # start the training
    mlp = MLPClassifier(hidden_layer_sizes=(30, 30, 30))
    mlp.fit(train_x, train_y)

    # predictions on train set
    predictions_train = mlp.predict(train_x)
    prob_train = mlp.predict_proba(train_x)[:, 1]
    print('This is the training set result: \n')
    print(confusion_matrix(train_y, predictions_train))
    print(classification_report(train_y, predictions_train))

    # Compute AUC
    fpr, tpr, thresholds = roc_curve(train_y, prob_train, pos_label=1)
    az_train = auc(fpr, tpr)
    print('AUC for train set is ' + str(az_train))

    # predictions on test set
    predictions_test = mlp.predict(test_x)
    prob_test = mlp.predict_proba(test_x)[:, 1]
    print('This is the testing set result: \n')
    print(confusion_matrix(test_y, predictions_test))
    print(classification_report(test_y, predictions_test))

    # Compute AUC
    fpr, tpr, thresholds = roc_curve(test_y, prob_test, pos_label=1)
    az_test = auc(fpr, tpr)
    print('AUC for testing set is ' + str(az_test))

    # record the predictions into the class
    # train_detections.record_scores(prob_train)
    # test_detections.record_scores(prob_test)

    # Now test on each view and dump txt for scoring
    for fn in glob.glob('C:/experiments/temp_feature_vector/test/*.txt'):
        folder_name = 'test'
        test_detections_one_view = preprocessing.all_detections(folder_name)

        view_name = fn.split('\\')[-1].split('_feat.txt')[0]
        with open(fn) as f:
            lines = f.readlines()
            test_detections_one_view.update(lines, view_name)

        # Get the feature values
        test_x = np.array(test_detections_one_view.get_all_dl_feat_1536())
        test_x = tsne.fit_transform(test_x)
        test_x = scaler.transform(test_x)
        predictions_test_one_view = mlp.predict_proba(test_x)[:, 1]

        # record the predictions
        test_detections_one_view.record_scores(predictions_test_one_view)

        postprocessing.pick_one_mark_one_group_use_prediction(
            test_detections_one_view)
        postprocessing.save_to_txt(test_detections_one_view, out_dir)
Exemplo n.º 6
0
def run():
    # get training set
    os.chdir(train_dir)
    folder_name = train_dir.split('/')[-2]
    # initiate train detections
    train_detections = preprocessing.all_detections(folder_name)
    for file in glob.glob('*.txt')[0:10]:
        with open(file) as f:
            lines = f.readlines()
            view_name = file.split('_feat.txt')[0]
            # adding detections from one view to this instance
            train_detections.update(lines, view_name)

    # get testing set
    os.chdir(test_dir)
    folder_name = test_dir.split('/')[-2]
    # initiate train detections
    test_detections = preprocessing.all_detections(folder_name)
    for file in glob.glob('*.txt')[0:1]:
        with open(file) as f:
            lines = f.readlines()
            view_name = file.split('_feat.txt')[0]
            # adding detections from one view to this instance
            test_detections.update(lines, view_name)

    # get train_x and train_y
    y = np.array(train_detections.get_all_tp_fp())
    X = np.array(train_detections.get_all_dl_feat_1536())
    del train_detections

    n_components = 2
    (fig, subplots) = plt.subplots(3, 5, figsize=(15, 8))
    perplexities = [5, 30, 50, 100, 200]
    n_iters = [500, 1000, 5000]

    red = y == 0
    green = y == 1

    ax = subplots[0][0]
    ax.scatter(X[red, 0], X[red, 1], c="r")
    ax.scatter(X[green, 0], X[green, 1], c="g")
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    plt.axis('tight')

    for i, n_iter in enumerate(n_iters):
        for j, perplexity in enumerate(perplexities):
            ax = subplots[i][j]

            t0 = time()
            tsne = manifold.TSNE(n_components=n_components,
                                 init='random',
                                 random_state=0,
                                 perplexity=perplexity,
                                 n_iter=n_iter)
            Y = tsne.fit_transform(X)
            t1 = time()
            print("perplexity=%d in %.2g sec" % (perplexity, t1 - t0))
            ax.set_title("Perplexity=%d, iteration =%d" % (perplexity, n_iter))
            ax.scatter(Y[red, 0], Y[red, 1], c="r")
            ax.scatter(Y[green, 0], Y[green, 1], c="g")
            ax.xaxis.set_major_formatter(NullFormatter())
            ax.yaxis.set_major_formatter(NullFormatter())
            ax.axis('tight')

    plt.show()

    print(' ')
def run():
    # get training set
    os.chdir(train_dir)
    folder_name = train_dir.split('/')[-2]
    # initiate train detections
    train_detections = preprocessing.all_detections(folder_name)
    for file in glob.glob('*.feat'):
        with open(file) as f:
            lines = f.readlines()
            view_name = file.split('.feat')[0]
            # adding detections from one view to this instance
            train_detections.update(lines, view_name)#, update_tp_group_only = True)

    # get testing set
    os.chdir(test_dir)
    folder_name = test_dir.split('/')[-2]
    # initiate train detections
    test_detections = preprocessing.all_detections(folder_name)
    for file in glob.glob('*.feat'):
        with open(file) as f:
            lines = f.readlines()
            view_name = file.split('.feat')[0]
            # adding detections from one view to this instance
            test_detections.update(lines, view_name)

    # get train_x and train_y
    train_y = np.array(train_detections.get_all_tp_fp())
    train_x = np.array([train_detections.get_all_od(), train_detections.get_all_dl(), train_detections.get_all_logit(), train_detections.get_all_area()]).transpose()


    # get test_x and test_y
    test_y = np.array(test_detections.get_all_tp_fp())
    test_x = np.array([test_detections.get_all_od(), test_detections.get_all_dl(), test_detections.get_all_logit(), test_detections.get_all_area()]).transpose()

    # scale the features
    scaler = StandardScaler()
    scaler.fit(train_x)
    train_x = scaler.transform(train_x)
    test_x = scaler.transform(test_x)

    # start the training
    mlp = MLPClassifier(hidden_layer_sizes=(8,4))
    mlp.fit(train_x, train_y)

    # save the model
    joblib.dump(mlp, out_dir + 'pick_one_mark_nn.pkl')
    joblib.dump(scaler, out_dir + 'pick_one_mark_scaler.pkl')

    # predictions on train set
    predictions_train = mlp.predict(train_x)
    prob_train = mlp.predict_proba(train_x)[:,1]
    print('This is the training set result: \n')
    print(confusion_matrix(train_y, predictions_train))
    print(classification_report(train_y, predictions_train))

    # Compute AUC
    fpr, tpr, thresholds = roc_curve(train_y, prob_train, pos_label=1)
    az_train = auc(fpr, tpr)
    print('AUC for train set is ' + str(az_train))

    # predictions on test set
    predictions_test = mlp.predict(test_x)
    prob_test = mlp.predict_proba(test_x)[:,1]
    print('This is the testing set result: \n')
    print(confusion_matrix(test_y, predictions_test))
    print(classification_report(test_y, predictions_test))

    # Compute AUC
    fpr, tpr, thresholds = roc_curve(test_y, prob_test, pos_label=1)
    az_test = auc(fpr, tpr)
    print('AUC for testing set is ' + str(az_test))

    # record the predictions into the class
    train_detections.record_scores(prob_train)
    test_detections.record_scores(prob_test)

    # Now test on each view and dump txt for scoring
    for fn in glob.glob(test_dir + '*.feat'):
        folder_name = 'test'
        view_name = fn.split('\\')[-1].split('.feat')[0]

        test_detections_one_view = preprocessing.all_detections_one_view(folder_name, view_name)

        with open(fn) as f:
            lines = f.readlines()
            test_detections_one_view.update(lines, view_name)

        # Get the feature values
        test_x = np.array([test_detections_one_view.get_all_od(), test_detections_one_view.get_all_dl(), test_detections_one_view.get_all_logit()]).transpose()
        test_x = scaler.transform(test_x)
        predictions_test_one_view = mlp.predict_proba(test_x)[:,1]

        # record the predictions
        test_detections_one_view.record_scores(predictions_test_one_view)

        postprocessing.pick_one_mark_one_group_use_prediction(test_detections_one_view)
        postprocessing.save_to_txt(test_detections_one_view, out_dir, view_name)