Пример #1
0
from sklearn.neighbors import KNeighborsClassifier

from load_dataset import load_dataset
from load_dataset import split_data
from load_dataset import accuracy_metric

import numpy as np

if __name__ == "__main__":

    X_train, Y_train, MPIDs_train, X_valid, Y_valid, MPIDs_valid, X_test, Y_test, MPIDs_test = split_data(
        load_dataset('unit_cell_data_16.csv', 0.2))

    print("Training set information:")
    print("Positive examples: " + str(sum(Y_train)))
    print("Negative examples: " + str(len(Y_train) - sum(Y_train)))

    clf = KNeighborsClassifier(n_neighbors=2, weights='distance')
    clf.fit(X_train, Y_train)

    Y_valid_predictions = np.zeros(len(Y_valid))
    for i, example in enumerate(X_valid):
        Y_valid_predictions[i] = clf.predict(example.reshape(1, -1))

    accuracy_metric(Y_valid_predictions, Y_valid)
Пример #2
0
from sklearn.ensemble import RandomForestClassifier

from load_dataset import load_dataset
from load_dataset import split_data
from load_dataset import accuracy_metric

import numpy as np

if __name__ == "__main__":

    X_train, Y_train, MPIDs_train, X_valid, Y_valid, MPIDs_valid, X_test, Y_test, MPIDs_test = split_data(
        load_dataset('material_average_data.csv', 0.2))

    print("Training set information:")
    print("Positive examples: " + str(sum(Y_train)))
    print("Negative examples: " + str(len(Y_train) - sum(Y_train)))

    clf = RandomForestClassifier(n_estimators=100)
    clf.fit(X_train, Y_train)

    Y_valid_predictions = np.zeros(len(Y_valid))
    for i, example in enumerate(X_valid):
        Y_valid_predictions[i] = clf.predict(example.reshape(1, -1))

    accuracy_metric(Y_valid_predictions, Y_valid)
Пример #3
0
    total_exps = len(total)
    print("Total Examples: " + str(total_exps))

    for i in range(16):
        col_name = "elem" + str(i)
        col_arr = np.array(total[col_name])
        num_elem_i = np.sum(col_arr > 0)
        print("Num elem " + str(i) + ": " + str(num_elem_i))

    Y_arr = np.array(total["min emittance"])
    pos_exps = np.sum(Y_arr <= 0.2)
    print("Num pos exps (<= 0.2): " + str(pos_exps))

    inf_exps = np.sum(Y_arr == float('inf'))
    print("Num inf exps: " + str(inf_exps))

    total_arr = np.array(total)
    print(total_arr[0].shape)
    total_no_inf = np.array([
        total_arr[i] for i in range(total_exps)
        if total_arr[i, 48] != float('inf')
    ])
    print(total_no_inf.shape)

    X_train, Y_train, MPIDs_train, X_valid, Y_valid, MPIDs_valid, X_test, Y_test, MPIDs_test = split_data(
        load_dataset(0.2))

    print("Training set information:")
    print("Positive examples: " + str(sum(Y_train)))
    print("Negative examples: " + str(len(Y_train) - sum(Y_train)))
Пример #4
0
import numpy as np
from sklearn.linear_model import LinearRegression

from load_dataset import load_dataset
from load_dataset import split_data

if __name__ == "__main__":

    X_train, Y_train, MPIDs_train, X_valid, Y_valid, MPIDs_valid, X_test, Y_test, MPIDs_test = split_data(
        load_dataset(threshold=-1))

    lin_clf = LinearRegression().fit(X_train, Y_train)
    predictions = lin_clf.predict(X_test)