Пример #1
0
    def fit(self, train_ops, data_dir='lfe/data', save_dir='lfe'):
        '''
        :param train_ops: list for train_ops
        :param data_dir: directory for training data
        :param save_dir: directory to save models
        :return:
        '''
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)

        train_x, train_y = self.load_training_data(data_dir)

        for train_op in train_ops:
            save_path = "lfe_" + self.name_prefix + "_" + train_op
            save_path = os.path.join(save_dir, save_path)
            if train_op == 'log':
                clf = MLP(hidden_layer_sizes=(500, ),
                          max_iter=3000,
                          verbose=1,
                          n_iter_no_change=20,
                          tol=1e-5)
            elif train_op == 'sqrt':
                clf = MLP(hidden_layer_sizes=(500, ),
                          max_iter=3000,
                          verbose=1,
                          n_iter_no_change=20,
                          tol=1e-5)
            elif train_op == 'square':
                clf = MLP(hidden_layer_sizes=(500, ),
                          max_iter=3000,
                          verbose=1,
                          n_iter_no_change=20,
                          tol=1e-5)
            elif train_op == 'freq':
                clf = MLP(hidden_layer_sizes=(500, ),
                          max_iter=3000,
                          verbose=1,
                          n_iter_no_change=20,
                          tol=1e-5)
            elif train_op == 'round':
                clf = MLP(hidden_layer_sizes=(500, ),
                          max_iter=3000,
                          verbose=1,
                          n_iter_no_change=20,
                          tol=1e-5)
            elif train_op == 'tanh':
                clf = MLP(hidden_layer_sizes=(500, ),
                          max_iter=3000,
                          verbose=1,
                          n_iter_no_change=20,
                          tol=1e-5)
            elif train_op == 'sigmoid':
                clf = MLP(hidden_layer_sizes=(500, ),
                          max_iter=3000,
                          verbose=1,
                          n_iter_no_change=20,
                          tol=1e-5)
            elif train_op == 'isoreg':
                clf = MLP(hidden_layer_sizes=(500, ),
                          max_iter=3000,
                          verbose=1,
                          n_iter_no_change=20,
                          tol=1e-5)
            elif train_op == 'zscore':
                clf = MLP(hidden_layer_sizes=(500, ),
                          max_iter=3000,
                          verbose=1,
                          n_iter_no_change=20,
                          tol=1e-5)
            elif train_op == 'norm':
                clf = MLP(hidden_layer_sizes=(500, ),
                          max_iter=3000,
                          verbose=1,
                          n_iter_no_change=20,
                          tol=1e-5)
            else:
                raise ValueError("Unexpected operation %s" % train_op)
            clf.fit(train_x, train_y[train_op])
            from sklearn.metrics import accuracy_score
            print(accuracy_score(clf.predict(train_x), train_y[train_op]))
            with open(save_path, 'wb') as f:
                pkl.dump(clf, f)
Пример #2
0
# printing class distribution of test dataset
print(f'Classes: {np.unique(y_test)}')
print(f'Class distribution for test data: {np.bincount(y_test)}')

# MLP is sensitive to feature scaling, hence performing scaling
# Options: MinmaxScaler and Standardscaler
# from sklearn.preprocessing import MinMaxScaler
# scaler = MinMaxScaler()
from sklearn.preprocessing import StandardScaler as SS
X_train_stdsc = SS().fit_transform(X_train)
X_test_stdsc = SS().fit_transform(X_test)

# Setting of hyperparameters of the network
from sklearn.neural_network import MLPClassifier as MLP
mlp = MLP(hidden_layer_sizes=(10, ), learning_rate_init=0.001, max_iter=5000)

# Calculating Training Time : more neurons, more time
from time import time
start = time()
# Train the model using the scaled training sets
mlp.fit(X_train_stdsc, y_train)
end = time()
print(f'Training Time: {(end-start)*1000:.3f}ms')

# Predict the response for test dataset
y_pred = mlp.predict(X_test_stdsc)  # scaled

# Import scikit-learn metrics module for evaluating model performance
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
Пример #3
0
    criterion='gini',
)

from neupy import algorithms

modeloPNN = algorithms.PNN(
    std=5,
    verbose=False,
)

from sklearn.neural_network import MLPClassifier as MLP

modeloMLP = MLP(hidden_layer_sizes=(
    175,
    100,
    50,
    25,
),
                max_iter=500,
                random_state=1)

from sklearn import svm

modeloSVM = svm.LinearSVC()

modelos = {
    'LDA': modeloLDA,
    'QDA': modeloQDA,
    'KNN': modeloKNN,
    'FOREST': modelFOREST,
    'SVM': modeloSVM,
    'PNN': modeloPNN,
Пример #4
0
    if x!=103:
        pca = decomposition.PCA(n_components = x)

        
        train_X = pca.fit_transform(Train_X)
        test_X = pca.transform(Test_X)
    else:
        train_X = Train_X
        test_X = Test_X
    print(test_X.shape)
    print(train_X.shape)
    ##print(n_classes)



    mlp = MLP(hidden_layer_sizes = (128,64),batch_size = 128,learning_rate_init=0.001,epsilon = 1e-08,max_iter=100)
    mlp.fit(train_X,train_Y)

    pred = mlp.predict(test_X)
    pred = pred.argmax(axis=1)
    
    c_matrix = confusion_matrix(test_Y,pred)
    print(c_matrix)
    accuracy = accuracy_score(test_Y,pred)
    print('Accuracy : ',accuracy)
    a.append(accuracy)
    #precision = true positive / total predicted positive(True positive + False positive)
    #recall = true positive / total actual positive(True positive + False Negative)
    print(classification_report(test_Y,pred))

print(a)
def simpleAlgorithm(trainData):
    trainx=trainData[[ 'Pclass',  'Sex',  'Age',  'Fare',  'Embarked',  'familySize']]
    trainy=trainData['Survived'].values.ravel()
    tr_tr_x,tr_te_x,tr_tr_y,tr_te_y=train_test_split(trainx,trainy,test_size=0.3,random_state=0)

    clf_svms=[]
    clf_KNNs = []
    clf_LRs = []
    clf_MNBs = []
    clf_MLPs = []
    clf_rfcs = []
    clf_XGBs = []
    clf_ADAs = []

    # C=np.arange(0.1,2,0.1)
    # gamma=np.arange(0.01,0.2,0.01)
    # for p1 in C:
    #     for p2 in gamma:
    #         clf_svm = SVC(C=p1, gamma=p2, kernel='rbf', random_state=1)
    #         clf_svms.append(clf_svm)
    # tols=[1.0,0.01,0.001,0.0001,0.00001,0.000001]
    # for ele in tols:
    #     clf_svm=SVC(C=4.7,gamma=0.07,kernel='rbf',random_state=0,tol=ele)
    #     clf_svms.append(clf_svm)
    # for p2 in coef0s:
    #     clf_svm = SVC(C=0.9, gamma=0.0999999999, kernel='rbf', random_state=0,probability=True,coef0=p2)
    #     clf_svms.append(clf_svm)
    # parameters={
    #     'C':[0.01,0.1,1,2,5],
    #     'kernel':['rbf','linear','poly']
    # }
    # clf=SVC()
    # gsearch=GridSearchCV(clf,param_grid=parameters,scoring='roc_auc',cv=5)
    # gsearch.fit(trainx,trainy)
    # print(gsearch.best_params_,gsearch.best_score_)


    clf_svm = SVC(C=0.5, gamma=0.19, kernel='rbf', random_state=0,probability=True)
    clf_svms.append(clf_svm)


    # clf = SVC(C=2,kernel='poly')
    # cv=ShuffleSplit(n_splits=100,test_size=0.2,random_state=1)
    # plot_learning_curve(clf,tr_tr_x,tr_tr_y,ylim=(0.7,0.95),cv=cv)

    # n_neighbors=range(1,20)
    # for n_neighbor in n_neighbors:
    #     clf_KNN = KNN(n_neighbors=n_neighbor)
    #     clf_KNNs.append(clf_KNN)
    # sizes=range(20,100,10)
    # for size in sizes:
    #     clf_KNN = KNN(n_neighbors=4,leaf_size=size)
    #     clf_KNNs.append(clf_KNN)
    clf_KNN = KNN(n_neighbors=4,leaf_size=80)
    clf_KNNs.append(clf_KNN)

    C=np.arange(0.018,0.021,0.001)
    for c in C:
        clf_LR = LR(tol=0.0001, random_state=0, solver='lbfgs', penalty='l2', C=c)
        clf_LRs.append(clf_LR)

    clf_MNB=MNB()
    clf_MNBs.append(clf_MNB)
    hidden_layer_sizes=[]
    # for i in range(6,15,6):
    #     tuple=(i,)
    #     hidden_layer_sizes.append(tuple)
    # for  i in range(3,8,1):
    #     for j in range(3, 8, 1):
    #         for k in range(3,8,1):
    #             tuple = (i,j,k)
    #             hidden_layer_sizes.append(tuple)
    # for  i in range(7,15,1):
    #     tuple = (i,i,i)
    #     hidden_layer_sizes.append(tuple)
    # hidden_layer_sizes=[(6,6),(7, 3),(6,6,6),(7,3,7),(7,7,7),(70,70,70)]
    hidden_layer_sizes = [(12,12,12)]
    for hidden_layer_size in hidden_layer_sizes:
        clf_MLP = MLP(hidden_layer_sizes=hidden_layer_size, max_iter=10000,tol=0.00001,solver='lbfgs')
        clf_MLPs.append(clf_MLP)


    # n_estimators=range(10,50)
    # max_depths=range(3,5)
    # max_features=range(1,7)
    # min_samples_splits=range(2,23,1)
    # min_samples_leafs=range(1,101,10)
    n_estimators = np.arange(17,18,1)
    for n_estimator in n_estimators:
        clf_rfc = rfc(n_estimators=n_estimator, max_depth=4, random_state=0,max_features=2,oob_score=False,min_samples_split=18)
        clf_rfcs.append(clf_rfc)
    #     for depth in max_depths:
    #         clf_rfc=rfc(n_estimators=n_estimator,max_depth=depth,random_state=0)
    # for min_samples_leaf in min_samples_leafs:
    #     clf_rfc = rfc(n_estimators=30, max_depth=4, random_state=0,max_features=1,oob_score=True,min_samples_split=18)
    #     clf_rfcs.append(clf_rfc)
    # min_samples_leafs=np.arange(0.1,0.6,0.1)
    # min_samples_leafs.append(1)
    # for leaf in min_samples_leafs:
    clf_rfc = rfc(n_estimators=26, max_depth=4, random_state=0,max_features=2,oob_score=False,min_samples_split=18)
    clf_rfcs.append(clf_rfc)


    # n_estimators=range(62,63,1)
    # max_depths=range(3,4)
    # for n_estimator in n_estimators:
    #     for depth in  max_depths:
    #         clf_XGB=XGB(n_estimators=n_estimator,max_depth=depth)
    #         clf_XGBs.append(clf_XGB)
    # min_child_weights=range(1,101,20)
    # subsamples=np.arange(0.5,1.0,0.2)
    # gammas=np.arange(0,1.0,0.2)
    # for min_child_weight in min_child_weights:
    #     for subsample in subsamples:
    #         for gamma in gammas:
    #             clf_XGB = XGB(n_estimators=62, max_depth=3,min_child_weight=min_child_weight,subsample=subsample,gamma=gamma)
    #             clf_XGBs.append(clf_XGB)

    # n_estimators=np.arange(60,80,1)
    # for n_estimator in n_estimators:
    #     clf_XGB = XGB(random_state=0,n_estimators=n_estimator, max_depth=3, min_child_weight=1, subsample=0.9, gamma=0.2,reg_alpha=0.2,learning_rate=0.30000000000000004,reg_lambda=0.5)
    #     clf_XGBs.append(clf_XGB)

    clf_XGB = XGB(random_state=0, n_estimators=67, max_depth=3, min_child_weight=1, subsample=0.9, gamma=0.2,
                  reg_alpha=0.2, learning_rate=0.30000000000000004, reg_lambda=0.5)
    clf_XGBs.append(clf_XGB)

    # n_estimators=range(1,20,1)
    # learning_rates=np.arange(0.8,1.0,0.01)
    # for learning_rate in learning_rates:
    #     for n_estimator in n_estimators:
    #         clf_ada = ADA(n_estimators=n_estimator,learning_rate=learning_rate)
    #         clf_ADAs.append(clf_ada)
    # n_estimators=np.arange(1,13,1)
    # for n_estimator in n_estimators:
    #     clf_ada = ADA(n_estimators=n_estimator, learning_rate=0.9800000000000002,random_state=0)
    #     clf_ADAs.append(clf_ada)
    clf_ada = ADA(n_estimators=9, learning_rate=0.9800000000000002, random_state=0)
    clf_ADAs.append(clf_ada)

    clfs=[clf_svms,clf_KNNs,clf_LRs,clf_MNBs,clf_MLPs,clf_rfcs,clf_XGBs,clf_ADAs]
    Algorithms = ['svm', 'KNN', 'LR', 'MNB', 'MLP', 'rfc', 'XGB','ADA']
    savemodels = []
    savemodelpaths=[]

    for clf,Algorithm in zip(clfs,Algorithms):
        bigges_tr_score = 0
        best_tr_clf = ''
        bigges_te_score = 0
        best_te_clf = ''
        corr_te_score=0
        corr_tr_score=0
        for ele in clf:
            if Algorithm!='MNB':
                tr_x_scale=scale(tr_tr_x)
                tr_te_x_scale=scale(tr_te_x)
                tr_score,te_score,tr_mean_score = trainModel(ele, Algorithm, tr_x_scale,tr_tr_y,tr_te_x_scale,tr_te_y)
            else:
                tr_score,te_score,tr_mean_score = trainModel(ele, Algorithm, tr_tr_x,tr_tr_y,tr_te_x,tr_te_y)
            if tr_score>bigges_tr_score:
                bigges_tr_score=tr_score
                corr_te_score=te_score
                best_tr_clf=ele
            if te_score > bigges_te_score:
                bigges_te_score = te_score
                corr_tr_score=tr_score
                best_te_clf = ele
        print('训练集切分训练数据最佳得分:%s score is:%.4f    对应的测试数据:%s score is:%.4f'%(Algorithm,bigges_tr_score,Algorithm,corr_te_score))
        print('训练集切分测试数据最佳得分:%s score is:%.4f    对应的训练数据:%s score is:%.4f' % (Algorithm, bigges_te_score,Algorithm,corr_tr_score))
        print('best Algorithm is:',best_tr_clf)
        print('best Algorithm is:', best_te_clf)
        # cv=ShuffleSplit(n_splits=10,test_size=0.2,random_state=1)
        # plot_learning_curve(best_te_clf,Algorithm,trainx,trainy,ylim=(0.7,0.95),cv=cv)
        savemodels.append(best_te_clf)

    for Algorithm in Algorithms:
        str='../models/'+Algorithm+'_model.m'
        savemodelpaths.append(str)

    return savemodels,savemodelpaths
Пример #6
0
labelsTrain = pickle.load(open("labelsTrain.pkl","rb"))
labelsVal = pickle.load(open("labelsVal.pkl","rb"))
"""

# Determine optimal number of hidden nodes
print(1)
runs = 10
nodes = 14
lowestAvgMAE = float('inf')
runPoints = []
avgPoints = []
for i in range(2, nodes + 2):
    errorSum = []
    for j in range(runs):
        ANN = MLP(hidden_layer_sizes=(i, ),
                  max_iter=200,
                  activation='logistic',
                  solver='lbfgs')
        ANN.fit(featTrain, labelsTrain)
        N = ANN.predict(featVal)
        k = len(labelsVal)
        labelDeg = np.rad2deg(np.arctan2(labelsVal[:, 0], labelsVal[:, 1]))
        NDeg = np.rad2deg(np.arctan2(N[:, 0], N[:, 1]))
        labelDeg[labelDeg < 0] = labelDeg[labelDeg < 0] + 360
        NDeg[NDeg < 0] = NDeg[NDeg < 0] + 360
        AE = np.abs(labelDeg - NDeg)
        AE[AE > 180] = 360 - AE[AE > 180]
        MAE = np.mean(AE)
        runPoints.append([i, MAE])
        errorSum.append(MAE)
    avgMAE = np.mean(errorSum)
    avgPoints.append([i, avgMAE])
Пример #7
0
if __name__ == "__main__":
    # desabilita mensagens de warning
    warnings.filterwarnings("ignore")

    # classificadores lineares
    linear = {
        "LDA": LDA(solver="svd", n_components=1),
        "Logit": LogisticClassifier(solver="liblinear")
    }

    # classificadores não-lineares
    nonlinear = {
        "MLP":
        MLP(hidden_layer_sizes=(400, ),
            solver="sgd",
            learning_rate="constant",
            max_iter=1000,
            power_t=0.4),
        "QDA":
        QDA(reg_param=1),
        "SVM":
        SVM(kernel="rbf", C=1.41),
        "KNN":
        KNN(metric="manhattan", n_neighbors=24)
    }

    # carregando dados (conjunto original)
    training = pd.read_csv("data/training.csv")
    testing = pd.read_csv("data/testing.csv")
    X_train = training.drop(["Class"], axis=1)
    y_train = training["Class"]
Пример #8
0
# 		pickle.dump(new_data, open('bertheads.pkl'	, 'wb'))

for data in ["original", "heads", "shortened"]:

    print("**************" + data + "*****************")
    if data == "shortened":
        data = shortened
    elif data == "original":
        data = original
    else:
        data = head

    train, test = train_test_split(data,
                                   shuffle=True,
                                   random_state=1,
                                   train_size=int(len(data) * 0.9))

    x_train = [i[1].squeeze() for i in train]
    y_train = [i[0] for i in train]

    x_test = [i[1].squeeze() for i in test]
    y_test = [i[0] for i in test]

    clf = MLP()
    clf.fit(x_train, y_train)

    y_pred = clf.predict(x_test)
    print(classification_report(y_test, y_pred, output_dict=True))

    y_pred = clf.predict(x_train)
    print(classification_report(y_train, y_pred, output_dict=True))
Пример #9
0
ytest = M.item().get('y_test')

#%%
# This is for capturing the best value
start_time = time()

param_grid = {
    'hidden_layer_sizes': [(50, 50, 50), (50, 100, 50), (8, 8, 8), (8, 10, 8)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant', 'adaptive'],
}

clf = GridSearchCV(MLP(max_iter=1000,
                       early_stopping=True,
                       validation_fraction=1.0 / 6),
                   param_grid,
                   cv=10,
                   verbose=1,
                   n_jobs=-1)
clf.fit(Xtrain, ytrain)

elapsed_time = time() - start_time
print("Time final: {}".format(elapsed_time))
dump(clf, 'archivo_grey.joblib')

#%%
nn_acc = clf.score(Xtest, ytest)
print(f"NN: El accuracy encontrado fue {nn_acc * 100.0}%")
Пример #10
0
#MAEValidAngle = []
numNodes = []
node = []
avgMAEs = []
bestMAE = float('inf')
history = np.array([float('inf')] * 10)
itr = -1
priorValidAngles = []
for k in range(4, 12, 2):
    #ANN = MLP(hidden_layer_sizes = (k,),max_iter=30,activation='logistic',solver='lbfgs',warm_start=True)
    MAECurrent = []
    MAETrainCurrent = []
    for l in range(10):
        ANN = MLP(hidden_layer_sizes=(k, ),
                  max_iter=30,
                  activation='logistic',
                  solver='lbfgs',
                  warm_start=True)
        #ANNs.append(ANN.fit(scaleTrainFeat,trainLabel))
        condition = True
        #set almost all lists to be empty here
        MAETrain = []
        MAEValid = []
        MAETrainAngle = []
        MAEValidAngle = []
        epoch = []
        loop = -1

        #itr = -1
        while (condition):
            itr += 1
Пример #11
0
X_train = np.random.rand(2, 2)
y_train = np.random.rand(2, )

my_hidden_layer_sizes = (4, 4)
XOR_MLP = MLP(activation='tanh',
              alpha=0.99,
              batch_size='auto',
              beta_1=0.9,
              beta_2=0.999,
              early_stopping=False,
              epsilon=1e-08,
              hidden_layer_sizes=my_hidden_layer_sizes,
              learning_rate='constant',
              learning_rate_init=0.1,
              max_iter=5000,
              momentum=0.5,
              nesterovs_momentum=True,
              power_t=0.5,
              random_state=0,
              shuffle=True,
              solver='sgd',
              tol=0.0001,
              validation_fraction=0.1,
              verbose=False,
              warm_start=False)
XOR_MLP.fit(X_train, y_train)

# Read layer weights and bias weights together
weights = XOR_MLP.coefs_
biases_weights = XOR_MLP.intercepts_
Пример #12
0
centered_train_features = train_features.copy()
ncols = features.shape[1]
train_col_means = centered_train_features.mean(axis=0)
for i in range(ncols):
    centered_train_features[:,
                            i] = centered_train_features[:,
                                                         i] - train_col_means[i]

centered_test_features = test_features.copy()
test_col_means = centered_test_features.mean(axis=0)
for i in range(ncols):
    centered_test_features[:,
                           i] = centered_test_features[:,
                                                       i] - test_col_means[i]

mlp = MLP(hidden_layer_sizes=(2 * len(centered_train_features)))
mlp.fit(centered_train_features, train_outcome)
predicted_outcome = mlp.predict(
    centered_test_features)  #don't forget to center the test features
score = accuracy_score(test_outcome, predicted_outcome)
print(score)
# about 82 % accuracy

pca = PCA(train_features.shape[1])
transformed_train_features = pca.fit_transform(train_features)
transformed_test_features = pca.transform(test_features)

mlp = MLP(hidden_layer_sizes=(2 * len(transformed_train_features)))
mlp.fit(transformed_train_features, train_outcome)
predicted_outcome = mlp.predict(transformed_test_features)
score = accuracy_score(test_outcome, predicted_outcome)
Пример #13
0
for train_index, test_index in folds:
    m = SVC(kernel='linear', class_weight='balanced')
    m.fit(np.concatenate((X1[train_index], X2[train_index]), axis=-1),
          Y[train_index])
    yp = m.predict(np.concatenate((X1[test_index], X2[test_index]), axis=-1))
    rst = precision_recall_fscore_support(Y[test_index], yp)
    print(rst)
    results.append(rst)

print('Linear SVM\n', file=fp)
print(np.mean(results, axis=0), file=fp)
print('\n\n', file=fp)

results = []
for train_index, test_index in folds:
    m = MLP(class_weight='balanced')
    m.fit(np.concatenate((X1[train_index], X2[train_index]), axis=-1),
          Yl[train_index])
    yp = m.predict(np.concatenate((X1[test_index], X2[test_index]), axis=-1))
    rst = precision_recall_fscore_support(Y[test_index], yp)
    print(rst)
    results.append(rst)

print('MLP\n', file=fp)
print(np.mean(results, axis=0), file=fp)
print('\n\n', file=fp)

results = []
for train_index, test_index in folds:
    m = LR(class_weight='balanced')
    m.fit(X1[train_index] - X2[train_index], Y[train_index])
Пример #14
0
def main(argv):
    del argv  # Unused.

    # -- Load data

    # ---- Embedding
    logging.info('Loading embeddings')
    emb0 = Embedding(join(FLAGS.data_root, FLAGS.lang0_emb_file))
    emb1 = Embedding(join(FLAGS.data_root, FLAGS.lang1_emb_file))
    emb = MultiLanguageEmbedding(emb0, emb1)

    logging.info('Loadding desc - word pairs')
    # ---- desc, word pairs
    dw_pair_train_01 = DescCorpus.build_dw_pair_from_file(join(
        FLAGS.data_root, FLAGS.lang01_desc_file),
                                                          emb,
                                                          src_lan_id=0,
                                                          tgt_lan_id=1)
    dw_pair_train_10 = DescCorpus.build_dw_pair_from_file(join(
        FLAGS.data_root, FLAGS.lang10_desc_file),
                                                          emb,
                                                          src_lan_id=1,
                                                          tgt_lan_id=0)
    dw_pair_test_01 = DescCorpus.build_dw_pair_from_file(join(
        FLAGS.data_root, FLAGS.lang01_desc_test_file),
                                                         emb,
                                                         src_lan_id=0,
                                                         tgt_lan_id=1)
    dw_pair_test_10 = DescCorpus.build_dw_pair_from_file(join(
        FLAGS.data_root, FLAGS.lang10_desc_test_file),
                                                         emb,
                                                         src_lan_id=1,
                                                         tgt_lan_id=0)

    # ---- build candidate set
    lang0_candidate_set = []  # lang# is the lang_id of *target*
    for dw_pair in chain(dw_pair_train_10, dw_pair_test_10):
        d, w = dw_pair
        lang0_candidate_set.append(w)
    lang0_candidate_set = set(lang0_candidate_set)
    lang1_candidate_set = []  # lang# is the lang_id of *target*
    for dw_pair in chain(dw_pair_train_01, dw_pair_test_01):
        d, w = dw_pair
        lang1_candidate_set.append(w)
    lang1_candidate_set = set(lang1_candidate_set)

    # -- Load model(s)
    logging.info('Loading models.')
    tag = ''
    word2vec_model_infer = keras.models.load_model(
        join(FLAGS.model_root, tag + 'word2vec_model_infer'))
    encoder_model_infer = keras.models.load_model(
        join(FLAGS.model_root, tag + 'encoder_model_infer'))
    # encoder_model_infer.compile(optimizer=Adam(amsgrad=True), loss='mse')
    logging.info("Models are not compiled, but that's fine.")

    # -- Predicting
    logging.info("predicting...")
    # ---- Get embedding matrix
    emb_matrix = word2vec_model_infer.predict(
        np.arange(0, len(emb)), batch_size=FLAGS.word2vec_batch_size)
    logging.info('emb_matrix.shape = %s', emb_matrix.shape)

    # -- Test Task 1

    def run_test_task_1(dw_pair, lang_target_candidate_set):

        desc_iter = DescIterator(
            DescCorpus(dw_pair),
            desc_length=FLAGS.encoder_desc_length,
            batch_size=FLAGS.encoder_batch_size,
            shuffle=False,
            epochs=1,
        )
        desc_embedded = []
        for batch in desc_iter.iter(is_inference=True):
            r = encoder_model_infer.predict_on_batch(batch)
            desc_embedded.append(r)
        desc_embedded = np.concatenate(desc_embedded)
        desc_target = [_[1] for _ in dw_pair]

        def get_knn(embedded, emb_matrix, candidate_set):
            r = []
            for id_ in candidate_set:
                dist = np.linalg.norm(embedded - emb_matrix[id_])
                r.append((dist, id_))
            r.sort()
            return [_[1] for _ in r]

        def get_rank(target, candidates):
            rank = 0
            while candidates[rank] != target and rank + 1 < len(candidates):
                rank += 1
            assert candidates[rank] == target
            return rank + 1  # starting from 1

        hits_key = [1, 10, 100]
        hits_counter = defaultdict(int)
        total = 0
        mrr = []
        for embedeed, target in tqdm(zip(desc_embedded, desc_target),
                                     desc='test sents'):
            total += 1
            assert target in lang_target_candidate_set
            plist = get_knn(
                embedeed,
                emb_matrix,
                lang_target_candidate_set,
            )
            for hits in hits_key:
                hits_counter[hits] += int(target in plist[:hits])
            mrr.append(1.0 / get_rank(target, plist))

        hits_ratio = [1.0 * hits_counter[hits] / total for hits in hits_key]
        return hits_counter, hits_ratio, np.mean(mrr)

    lang_01_result = run_test_task_1(dw_pair_test_01, lang1_candidate_set)
    lang_10_result = run_test_task_1(dw_pair_test_10, lang0_candidate_set)

    print('TASK 1:')
    fout = open(join(FLAGS.model_root, 'test_task_1.txt'), 'w')

    def dual_print(*args):
        print(*args)
        print(*args, file=fout)

    dual_print('LANG 0 sent -> LANG 1 word')
    dual_print(lang_01_result[0])
    dual_print(lang_01_result[1])
    dual_print(lang_01_result[2])
    dual_print('LANG 1 sent -> LANG 0 word')
    dual_print(lang_10_result[0])
    dual_print(lang_10_result[1])
    dual_print(lang_10_result[2])

    fout.close()

    # -- Run test task 2 (paraphrasing)

    def desc_to_ids(desc, lang_id):
        r = emb.encode(
            [_.lower() for _ in desc.split()],
            lang_id=lang_id,
        )
        r = [_ for _ in r if _ != -1]
        return r

    def load_suite(filepath):
        lang0_d, lang1_d, target, n = [], [], [], 0
        for line in tqdm(
                csv.reader(
                    open(filepath, newline=''),
                    delimiter=',',
                    quotechar='"',
                    quoting=csv.QUOTE_MINIMAL,
                ),
                desc=('Loading paraphrase from %s' % filepath),
        ):
            lang0_d.append(desc_to_ids(line[0], lang_id=0))
            lang1_d.append(desc_to_ids(line[1], lang_id=1))
            target.append(float(line[2]))
            n += 1

        desc_length = FLAGS.encoder_desc_length

        def get_embedded_sent(d):
            sent = np.zeros(shape=(n, desc_length), dtype=np.int32)
            mask = np.zeros(shape=(n, desc_length), dtype=np.float32)
            for i in range(n):
                this_d = d[i][:desc_length]
                lth = len(this_d)
                sent[i, :lth] = this_d
                mask[i, :lth] = 1.0
            embedded = encoder_model_infer.predict(
                [sent, mask], batch_size=FLAGS.encoder_batch_size)
            return embedded

        lang0_embedded = get_embedded_sent(lang0_d)
        lang1_embedded = get_embedded_sent(lang1_d)

        result_d, result_w, result_w1d = [], [], []
        for i in range(n):
            result_d.append([lang0_embedded[i], lang1_embedded[i]])
            result_w.append([target[i], 1 - target[i]])
            result_w1d.append(target[i])

        result_d = np.array(result_d)
        result_w = np.array(result_w)
        result_w1d = np.array(result_w1d, dtype='int32')

        # import pdb
        # pdb.set_trace()

        return result_d, result_w, result_w1d

    train_d, train_w, train_w1d = load_suite(
        join(FLAGS.data_root, FLAGS.lang01_paraphrase_train_file))

    test_d, _, test_w = load_suite(
        join(FLAGS.data_root, FLAGS.lang01_paraphrase_test_file))
    '''
    print('train_d.shape', train_d.shape)
    print('train_w.shape', train_w.shape)
    print('train_w1d.shape', train_w1d.shape)
    print('test_d.shape', test_d.shape)
    print('test_w.shape', test_w.shape)
    '''

    encode_train1 = train_d[:, 0]
    encode_train2 = train_d[:, 1]
    assert (len(encode_train1) == len(train_w))

    diff_train = np.array([
        encode_train1[i] - encode_train2[i] for i in range(len(encode_train1))
    ])
    dist_train = np.array([[np.linalg.norm(x)] for x in diff_train])

    encode_test1 = test_d[:, 0]
    encode_test2 = test_d[:, 1]

    diff_test = np.array(
        [encode_test1[i] - encode_test2[i] for i in range(len(encode_test1))])
    dist_test = np.array([[np.linalg.norm(x)] for x in diff_test])

    def match(pred, t):
        #if (len(pred.shape)) > 1:
        #pred = pred[0]
        if (pred > 0.5 and t < 0.5) or (pred < 0.5 and t > 0.5):
            return 0.
        return 1.

    logging.info('Fitting LR')
    logreg = LR()
    logreg.fit(dist_train, train_w1d)

    lr_rst = logreg.predict(dist_test)

    lr_accuracy = 0.
    for i in range(len(lr_rst)):
        lr_accuracy += match(lr_rst[i], test_w[i])
    lr_accuracy /= len(lr_rst)

    logging.info('fitting MLP')
    mlp = MLP(hidden_layer_sizes=[25, 13])

    mlp.fit(diff_train, train_w1d)

    mlp_rst = mlp.predict(diff_test)
    mlp_accuracy = 0.
    for i in range(len(mlp_rst)):
        mlp_accuracy += match(mlp_rst[i], test_w[i])
    mlp_accuracy /= len(mlp_rst)

    print('TASK 2:')
    fout = open(join(FLAGS.model_root, 'test_task_2.txt'), 'w')

    def dual_print(*args):
        print(*args)
        print(*args, file=fout)

    dual_print('LR accuracy', lr_accuracy)
    dual_print('MLP accuracy', mlp_accuracy)
    fout.close()
Пример #15
0
file = open('../data/pima-indians-diabetes-database/diabetes.csv')

all_data = list(csv.reader(file))
data_size = len(all_data) - 1

max_dim = 8  #max number of dimensions plus 1

train_frac = 0.6
val_frac = 0.2
test_frac = 0.2

(x_train, y_train), (x_val, y_val), (x_test,
                                     y_test) = process(all_data, train_frac,
                                                       val_frac, test_frac)

nn = MLP()
print(x_train.shape)
print(y_train.shape)
nn.fit(x_train, y_train)
real_train_acc = accuracy_score(nn.predict(x_train), y_train)
real_test_acc = accuracy_score(nn.predict(x_test), y_test)
train_acc = []
test_acc = []

for dim in range(1, max_dim):
    pca = PCA(n_components=dim)
    pca.fit(x_train)
    reduced_x_train = np.array(pca.transform(x_train))
    reduced_x_test = np.array(pca.transform(x_test))
    nn = MLP()
    nn.fit(reduced_x_train, y_train)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc="lower right")
plt.show()

report_with_auc = class_report(y_true=Y_test, y_pred=preds)

print("Report", report_with_auc)

print("KNN accuracy", accknn)

mlp = MLP(hidden_layer_sizes=(100),
          activation='logistic',
          solver='adam',
          random_state=42,
          verbose=True,
          max_iter=200)
mlp.fit(X_train, Y_train)
preds = mlp.predict(X_test)
accmlp = accuracy_score(preds, Y_test)
for i in Y_test:
    fpr, tpr, _ = roc_curve(Y_test, preds)
    roc_auc = auc(fpr, tpr)

fig = plt.figure()
lw = 2
plt.plot(fpr,
         tpr,
         color='darkorange',
         lw=lw,
Пример #17
0
    winner = (calendar[ii], calendar[ii].winner)
    loser = (calendar[ii], calendar[ii].loser)
    pre_predict = [calendar[ii].lrank - calendar[ii].wrank, calendar[ii].welo-calendar[ii].lelo, calendar[ii].welosur-calendar[ii].lelosur, surf_into_num[calendar[ii].surface], \
                   round(wins_percent(*winner) - wins_percent(*loser), 3), round(wins_per_surface(*winner) - wins_per_surface(*loser), 3), \
                   round(av_first_serve(*winner) - av_first_serve(*loser), 3), round(av_first_serve_surface(*winner) - av_first_serve_surface(*loser), 3), \
                   round(av_second_serve(*winner) - av_second_serve(*loser), 3), round(av_second_serve_surface(*winner) - av_second_serve_surface(*loser), 3), \
                   round(av_first_return(*winner) - av_first_return(*loser), 3), round(av_first_return_surface(*winner) - av_first_return_surface(*loser), 3), \
                   round(av_second_return(*winner) - av_second_return(*loser), 3), round(av_second_return_surface(*winner) - av_second_return_surface(*loser), 3), \
                   round(av_aces(*winner) - av_aces(*loser), 3), round(av_aces_surface(*winner) - av_aces_surface(*loser), 3), \
                   round(av_dfs(*winner) - av_dfs(*loser), 3), round(av_dfs_surface(*winner) - av_dfs_surface(*loser), 3), \
                   round(av_bps(*winner) - av_bps(*loser), 3), round(av_bps_surface(*winner) - av_bps_surface(*loser), 3)]
    to_predict = scaler.transform([pre_predict])

    clf = MLP(random_state=7,
              solver='lbfgs',
              hidden_layer_sizes=(12, ),
              activation='logistic',
              alpha=1e-3)
    clf.fit(X, Y)

    #print(round(clf.score(X_test, y_test), 3))

    #predictions = []
    #probas = []
    #for match in X_test:
    #    predictions.append(clf.predict([match])[0])
    #    probas.append(clf.predict_proba([match])[0])
    #predictions = np.array(predictions)
    #probas = np.array(probas)

    #print("roi1: {}%".format(int(round(100*roi_1(predictions), 0))))
Пример #18
0
        
        cur_char = np.array([float(x) for x in list_vals[start:end]])
        data.append(cur_char)
        
        if len(data) >= num_data:
            break
            


# In[ ]:


data_r=(np.array(data).reshape(50,128))
labels_r=np.array(labels).reshape(50,num_orig_labels)
labels_r[0].shape

data_train=data_r[:num_train]
data_test=data_r[num_train:]
labels_train=labels_r[:num_train]
labels_test=labels_r[num_train:]


# In[130]:


from sklearn.neural_network import MLPClassifier as MLP
nn=MLP(hidden_layer_sizes=(128,16,num_orig_labels),max_iter=20000,tol=0.01)
nn=nn.fit(data_train,labels_train)
nn.score(data_test,labels_test)

Пример #19
0
def supervised_stratified(x_tr, y_tr, x_ts, y_ts, lea):
    acc = []
    f1 = []
    for i in range(0, len(y_ts)):
        lea.fit(x_tr[i], y_tr[i])
        prec, rec, ff1, sup = precision_recall_fscore_support(y_ts[i], lea.predict(x_ts[i]), average='weighted')
        acc.append(lea.score(x_ts[i] ,y_ts[i]) * 100)
        f1.append(ff1 * 100)
    #print acc
    return np.mean(acc) , np.std(acc), np.mean(f1) , np.std(f1)

rf_super   = RandomForestClassifier(n_estimators = 100, class_weight = 'balanced', random_state = 23) #'balanced_subsample')
ext_super  = EXT(n_estimators=100, class_weight = 'balanced', random_state = 23)
nb_super   = NB()
knn_super  = KNN()
mlp_super  = MLP(random_state = 23)

rf_upper_bound   = np.mean(cross_val_score(  rf_super, X, y, cv = 3)) * 100
nb_upper_bound   = np.mean(cross_val_score(  nb_super, X, y, cv = 3)) * 100
knn_upper_bound  = np.mean(cross_val_score( knn_super, X, y, cv = 3)) * 100
ext_upper_bound  = np.mean(cross_val_score( ext_super, X, y, cv = 3)) * 100
mlp_upper_bound  = np.mean(cross_val_score( ext_super, X, y, cv = 3)) * 100

sss = StratifiedShuffleSplit(n_splits = 3, test_size = 0.1, random_state = 23)
x_tr, y_tr, x_ts, y_ts = [], [], [], []
for train_index, test_index in sss.split(X, y):
    
        X_train, X_test = X.loc[train_index], X.loc[test_index]
        y_train, y_test = y.loc[train_index], y.loc[test_index]
        x_tr.append(X_train)
        y_tr.append(y_train)
Пример #20
0
for x in n_features:
    if x != 103:
        pca = decomposition.PCA(n_components=x)

        train_X = pca.fit_transform(Train_X)
        test_X = pca.transform(Test_X)
    else:
        train_X = Train_X
        test_X = Test_X
    print(test_X.shape)
    print(train_X.shape)
    ##print(n_classes)

    mlp = MLP(hidden_layer_sizes=(128, 64),
              batch_size=64,
              alpha=0.001,
              learning_rate_init=0.001,
              epsilon=1e-05,
              max_iter=500)
    mlp.fit(train_X, train_Y)

    pred = mlp.predict(test_X)
    pred = pred.argmax(axis=1)

    c_matrix = confusion_matrix(test_Y, pred)
    print(c_matrix)
    accuracy = accuracy_score(test_Y, pred)
    print('Accuracy : ', accuracy)
    a.append(accuracy)
    #precision = true positive / total predicted positive(True positive + False positive)
    #recall = true positive / total actual positive(True positive + False Negative)
    print(classification_report(test_Y, pred))
Пример #21
0
x = int(input())

scores = ['average_precision', 'recall', 'f1', 'accuracy']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    if (x == 1):
        clf = GridSearchCV(DTC(),
                           DTC_tuned_parameter,
                           cv=5,
                           scoring='%s' % score)
    elif (x == 2):
        clf = GridSearchCV(MLP(),
                           MLP_tunned_parameter,
                           cv=5,
                           scoring='%s' % score)
    elif (x == 3):
        clf = GridSearchCV(SVC(),
                           SVC_tuned_parameters,
                           cv=5,
                           scoring='%s' % score)
    elif (x == 4):
        clf = GridSearchCV(NB(),
                           NB_tuned_parameter,
                           cv=7,
                           scoring='%s' % score)
    elif (x == 5):
        clf = GridSearchCV(LR(),
Пример #22
0
    y = npXy[:, -1]
    # Normalize data
    X = ((X - np.min(X, 0)) / (np.max(X, 0) - np.min(X, 0) + .0001))

    shuffleX, shuffley = shuffle_data(X, y)

    trainX = shuffleX[:int(.8 * len(X)), :]
    trainy = shuffley[:int(.8 * len(y))]

    #validX = trainX[:int(.15 * len(X)), :]
    #validy = trainy[:int(.15 * len(y))]

    testX = shuffleX[int(.8 * len(X)):, :]
    testy = shuffley[int(.8 * len(y)):]

    mMadness = MLP(random_state=0, hidden_layer_sizes=(15, 15), activation='identity',
                   alpha=.0001, learning_rate_init=.2, max_iter=30).fit(trainX, trainy)
    lossCurve = mMadness.loss_curve_
    predictions = mMadness.predict(testX)

    MSE = sum([(int(predictions[i]) - testy[i]) ** 2 for i in range(len(predictions))]) / len(predictions)

    score = mMadness.score(testX, testy)
    MSEs.append(MSE)
    scores.append(score)

    print(f)
    print("MSE: " + str(MSE))
    print("Score: " + str(score))
    print()
    plt.plot(range(len(lossCurve)), lossCurve, label=str(n + 2000))
avgMSE = sum(MSEs)/len(MSEs)
 def __init__(self, link, position):
     super().__init__(link, position, MLP())
Пример #24
0
def main():

    # for i in range(100):
    overall_time = time.time()
    # Prepare the labels for TRAF videos.
    # 0 # impatient
    # 1 # threatening
    # 2 # reckless
    # 3 # careful
    # 4 # timid
    # 5 # cautious

    dataset = 'argo'
    nbrs = 4

    if not os.path.exists('laps_and_embs/lap.npy') or not os.path.exists(
            'laps_and_embs/argo_lap.npy'):

        # convert TRAF file into list of dicts. Indexing of the list corresponds to the frames, ...
        # and each dict consits of key:value pairs where keys refers to the IDs in the frame, ...
        # and the values for each ID is the X-Y position
        video = []
        video_list_output = []

        if dataset == 'traf':
            data_path = 'data/behavior_data/'
            labels_list = generate_labels(data_path)
            np.save('labels', labels_list)
            video_list = [
                'TRAF53_1', 'TRAF53_2', 'TRAF53_5', 'TRAF53_6', 'TRAF53_7',
                'TRAF53_8', 'TRAF53_9', 'TRAF29'
            ]
            for i in range(len(video_list)):
                video_path = 'data/behavior_data_gt/' + video_list[
                    i] + '_gt.txt'
                video = []
                with open(video_path) as file:
                    lines = file.readlines()
                    for line in lines:
                        toks = line.split(',')
                        dict_item = {}
                        for i in range(int(toks[1])):
                            dict_item[int(toks[5 * i + 6])] = [
                                int(toks[5 * i + 2]),
                                int(toks[5 * i + 3])
                            ]
                        video.append(dict_item)
                video_list_output.append(video)
                # List of adjacency matrices corresponding to each TRAF video
            Adjacency_Matrices = computeA(video_list_output, labels_list, nbrs,
                                          dataset)
            # List of lists: Each element of the list corresponds to a list of [L_1,L_2,...,L_T] for each TRAF video
            Laplacian_Matrices = extractLi(Adjacency_Matrices)
            np.save('laps_and_embs/lap', Laplacian_Matrices)

        elif dataset == 'argo':
            video_list = ['ARGO1', 'ARGO2']
            data_argo = np.load('data/argo/argo_data.npy', allow_pickle=True)
            labels_argo = np.load('data/argo/argo_labels.npy',
                                  allow_pickle=True)

            # List of adjacency matrices corresponding to each TRAF video
            Adjacency_Matrices = computeA(data_argo, labels_argo, nbrs,
                                          dataset)
            # List of lists: Each element of the list corresponds to a list of [L_1,L_2,...,L_T] for each TRAF video
            Laplacian_Matrices = extractLi(Adjacency_Matrices)
            np.save('laps_and_embs/argo_lap', Laplacian_Matrices)

    # ===================================MAIN ALGORITHM==================================================
    if not os.path.exists('laps_and_embs/emb.npy') or not os.path.exists(
            'laps_and_embs/argo_emb.npy'):
        # time_start_all = time.time ()

        Laplacian_Matrices = np.load(
            'laps_and_embs/lap.npy',
            allow_pickle=True) if dataset == 'traf' else np.load(
                'laps_and_embs/argo_lap.npy', allow_pickle=True)
        U_Matrices = []
        from scipy import linalg as LA
        for Lis_for_each_video in Laplacian_Matrices:
            time_start_all = time.time()
            # ListofUs = []
            for L_index, L in enumerate(Lis_for_each_video):
                if first_laplacian(L_index):
                    Lambda_prev, U_prev = la.eig(L)  # need top k eigenvectors
                    Lambda_prev = np.diag(np.real(Lambda_prev))
                    # Lambda_prev = Lambda_prev[0:10,0:10]
                    U_prev = np.real(U_prev)
                else:
                    U_curr, Lambda = GraphRQI(U_prev, Lis_for_each_video,
                                              L_index, Lambda_prev)
                    Lambda_prev = Lambda
                    # ListofUs.append(U_curr)
                    U_prev = U_curr[-1]
                # Daeig , Va , X = LA.svd ( L , lapack_driver='gesvd' )
            print("time for computing spectrum for one video: ",
                  (time.time() - time_start_all))
            U_Matrices.append(U_curr[0])
        # embedding = np.hstack ( U_Matrices )
        # embedding = embedding.T
        np.save('laps_and_embs/emb',
                U_Matrices) if dataset == 'traf' else np.save(
                    'laps_and_embs/argo_emb', U_Matrices)

    # =========================================ML==================================================

    all_embedding = np.load(
        'laps_and_embs/emb.npy',
        allow_pickle=True) if dataset == 'traf' else np.load(
            'laps_and_embs/argo_emb.npy', allow_pickle=True)

    # embedding = pad(all_embedding)
    # sheets_label = ['c1.xlsx','c2.xlsx','c8.xlsx','r5.xlsx','r6.xlsx','r7.xlsx','u8.xlsx','u9.xlsx']
    # video_list = [ 'TRAF53_1' , 'TRAF53_2' , 'TRAF53_5' , 'TRAF53_6' , 'TRAF53_7' , 'TRAF53_8' , 'TRAF53_9' , 'TRAF29' ]
    labels_list = np.load('laps_and_embs/labels.npy',
                          allow_pickle=True) if dataset == 'traf' else np.load(
                              'data/argo/argo_labels.npy', allow_pickle=True)
    labels = []
    index = 0
    # for index in [0,1,2,3,4,5,7]:
    if dataset == 'traf':
        for j in range(len(labels_list[index])):
            labels.append(list(labels_list[index][j].values())[0])
    else:
        for j in range(len(labels_list[index])):
            labels.append(labels_list[index][j][1])
    # for i,sheet_label in enumerate(sheets_label):
    #     spath = 'data/behavior_data/'+sheet_label
    #     df = pd.read_excel ( spath )
    #     behavior_gt = df.as_matrix ()
    #     prev_labels = list ( behavior_gt[ : , 2 ] )
    #     # labels= labels + [''] * (max_ID - len(labels))
    #     [ prev_labels.append ( 0 ) for i in range ( max_ID-1 - len ( prev_labels ) ) ]
    #     if i == 0:
    #         curr_labels = np.array ( prev_labels )
    #     else:
    #         curr_labels = np.vstack((curr_labels,np.array(prev_labels)))
    # print(Counter(labels))

    # embedding = preprocessing.scale ( embedding )
    embedding = all_embedding[index]
    [labels.append(0) for i in range(embedding.shape[0] - len(labels))]
    Xtrain, Xtest = train_test_split(embedding, test_size=0.1)
    ytrain, ytest = train_test_split(labels, test_size=0.1)
    # Xtrain = embedding[0:800,:]
    # ytrain = labels[:,0:800].T
    # Xtest = embedding[800:,:]
    # ytest = labels[:,800:].T

    lr = LogisticRegression(max_iter=1)
    mlp = MLP(hidden_layer_sizes=(10, 50), max_iter=4000)
    clf = svm.SVC(max_iter=100)
    #
    iters = 1
    score = 0
    for _ in range(iters):

        #
        # lr.fit(Xtrain, ytrain)
        # y_pred = lr.predict(Xtest)
        # score += lr.score(Xtest, ytest)

        mlp.fit(Xtrain, ytrain)
        y_pred = mlp.predict(Xtest)
        score += mlp.score(Xtest, ytest)

        # clf.fit ( Xtrain , ytrain )
        # y_pred = clf.predict ( Xtest )
        # score += clf.score ( Xtest , ytest )
    print(time.time() - overall_time)
    print(score / iters)
    from sklearn.metrics import multilabel_confusion_matrix
    # cm = confusion_matrix ( ytest , y_pred )
    cm = multilabel_confusion_matrix(ytest, y_pred, labels=[0, 1, 2, 3, 4, 5])
    # print ( cm )

    f = []
    e = embedding[:, 70]
    e = e.tolist()
    for i, el in enumerate(e):
        if i < 3 or i > 14:
            f.append(0)
        else:
            f.append(e[i])
    plt.plot(e, linewidth=16, alpha=0.8)
    plt.plot(range(3, 14), f[3:14], c='black', linewidth=8)
    # plt.plot ( range ( 64 , 70 ) , e[ 64:70 ] , c='black' , linewidth=8 )
    gca().set_xticklabels([''] * len(e))
    gca().set_yticklabels([''] * len(e))
Пример #25
0
#Import required modules
from sklearn.datasets import load_boston
from sklearn.neural_network import MLPRegressor as MLP
from sklearn.metrics import mean_absolute_error,mean_squared_error 
import matplotlib.pyplot as plt

#Load Dataset
data=load_boston()
X=data.data
y=data.target

#Configure Multilayer Perceptron,Train,Predict
mlp=MLP(solver='lbfgs',hidden_layer_sizes=(800),activation='tanh',max_iter=2000,verbose=True)
mlp.fit(X,y)
p=mlp.predict(X)

#Result
print "\n Mean Squared Error : ",mean_squared_error(p,y)
print "\n Mean Absolute Error : ",mean_absolute_error(p,y)
plt.scatter(y,p)
plt.show()

Пример #26
0
 def train(self, X, Y):
     clf = MLP(hidden_layer_sizes=(100, 50),
               solver='adam',
               alpha=1e-5,
               learning_rate_init=0.01)
     return clf.fit(X, Y)
Пример #27
0
def Find_Optimal_Features(ML_DATA):
    # Creates all combinations of features
    lst = list(itertools.product([0, 1], repeat=8))
    lst.remove((0, 0, 0, 0, 0, 0, 0, 0))
    for i in range(len(lst)):
        lst[i] = list(lst[i])

    outs = []
    for i in range(8):
        outs.append([])

    # Clasifiers put into array for easier access and expandability
    clfs = [
        DTC(),
        KNN(),
        BC(ETC()),
        LSVC(max_iter=10_000),
        RFC(),
        SVC(),
        SGDC(),
        MLP(max_iter=10_000)
    ]
    tot = len(lst)
    y = []

    # Trains all models on all feature sets and gets scores
    for i, item in enumerate(lst[:15]):
        X_train, ys_train, ya_train, X_test, ys_test, ya_test = ML_DATA.Make_Set(
            item, 0.25)
        for item in clfs:
            item.fit(X_train, ys_train)

        scores = []

        for j, item in enumerate(clfs):
            scores.append(item.score(X_test, ys_test))
            outs[j].append(scores[j])

        PBR(i + 1, tot, name="Test Set", end="\r")
        y = ys_test
    print()

    temp = Counter(y)
    total = len(y)
    rand_ratio = 0
    for item in temp.values():
        rand_ratio += item / total
    rand_ratio = (rand_ratio / 8) * 100
    print("Random Choice :", rand_ratio)

    final = np.zeros(8)
    final_top = np.zeros(8)
    final_btop = np.zeros(8)

    names = [
        "Decision Tree", "KNN", "Bag Extra Tree", "Linear SVC",
        "Random Forest", "SVC", "SGDC", "MLP"
    ]

    # Prints all results
    for k, name in enumerate(names):
        o = RAS(outs[k], 10)
        final_top += np.asarray(lst[o[0]])
        f = np.zeros(8)
        count = 0
        best = outs[k][0]
        print(name + ":")
        for i, item in enumerate(o):
            if i == 0:
                best = outs[k][item]
            print("\t\t{:2}) {:7.4f}% {}".format(i, outs[k][item] * 100,
                                                 lst[item]))
            if outs[k][item] == best:
                count += 1
                f += np.asarray(lst[item])
            final += np.asarray(lst[item])
        final_btop += f / count
        print()

    print(final)
    print(final_top)
    print(final_btop)
Пример #28
0
interval = [0.1,0.5,1,5,10,30]
pred_results = []
for i in range(6):
    num = int(interval[i]*10)
    
    X = np.array(data[['best_ask_size','best_bid_size','ask_total_size','bid_total_size','ask_weighted_average_price',
                       'bid_weighted_average_price','best_ask_price','best_bid_price']])
    Y = np.array(data['output_%s'%interval[i]])
    sample = len(X)
    train_num = int(sample*0.7)
    X = X[:-num]
    Y = Y[:-num]
    X_train = X[:train_num]
    Y_train = Y[:train_num]
    X_test = X[train_num:]
    Y_test = Y[train_num:]
    XOR_MLP = MLP(activation='relu', alpha=1e-05, batch_size='auto', early_stopping=False,
       epsilon=1e-08, hidden_layer_sizes=(12,8,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
       solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)
    XOR_MLP.fit(X_train,Y_train)
    Y_pred = XOR_MLP.predict(X_test)
    pred_results.append(Y_pred)
    
    print(pred_results)
    predictions = pd.DataFrame(pred_results).T
    predictions.columns = ['output%s'%i for i in interval]
    predictions.to_csv('predictions1.csv', encoding='utf-8', index=False)
Пример #29
0
def build_classifiers(exclude, scale, feature_selection, nCols):
    '''
    Input:
    - exclude: list of names of classifiers to exclude from the analysis
    - scale: True or False. Scale data before fitting classifier
    - feature_selection: True or False. Run feature selection before
    fitting classifier
    - nCols: Number of columns in input dataset to classifiers

    Output:
    Dictionary with classifier name as keys.
    - 'clf': Classifier object
    - 'parameters': Dictionary with parameters of 'clf' as keys
    '''
    classifiers = collections.OrderedDict()

    if 'Multilayer Perceptron' not in exclude:
        classifiers['Multilayer Perceptron'] = {
            'clf': MLP(),
            'parameters': {
                'hidden_layer_sizes': [(100, 50), (50, 25)],
                'max_iter': [500]
            }
        }

    if 'Nearest Neighbors' not in exclude:
        classifiers['Nearest Neighbors'] = {
            'clf': KNeighborsClassifier(),
            'parameters': {
                'n_neighbors': [1, 5, 10, 20]
            }
        }

    if 'SVM' not in exclude:
        classifiers['SVM'] = {
            'clf':
            SVC(C=1,
                probability=True,
                cache_size=10000,
                class_weight='balanced'),
            'parameters': {
                'kernel': ['rbf', 'poly'],
                'C': [0.01, 0.1, 1]
            }
        }

    if 'Linear SVM' not in exclude:
        classifiers['Linear SVM'] = {
            'clf': LinearSVC(dual=False, class_weight='balanced'),
            'parameters': {
                'C': [0.01, 0.1, 1],
                'penalty': ['l1', 'l2']
            }
        }

    if 'Decision Tree' not in exclude:
        classifiers['Decision Tree'] = {
            'clf': DecisionTreeClassifier(max_depth=None, max_features='auto'),
            'parameters': {}
        }

    if 'Random Forest' not in exclude:
        classifiers['Random Forest'] = {
            'clf':
            RandomForestClassifier(max_depth=None,
                                   n_estimators=10,
                                   max_features='auto'),
            'parameters': {
                'n_estimators': list(range(5, 20))
            }
        }

    if 'Logistic Regression' not in exclude:
        classifiers['Logistic Regression'] = {
            'clf':
            LogisticRegression(fit_intercept=True,
                               solver='lbfgs',
                               penalty='l2'),
            'parameters': {
                'C': [0.001, 0.1, 1]
            }
        }

    if 'Naive Bayes' not in exclude:
        classifiers['Naive Bayes'] = {'clf': GaussianNB(), 'parameters': {}}
    # classifiers['Voting'] = {}

    def name(x):
        """
        :param x: The name of the classifier
        :return: The class of the final estimator in lower case form
        """
        return x['clf']._final_estimator.__class__.__name__.lower()

    for key, val in classifiers.items():
        if not scale and not feature_selection:
            break
        steps = []
        if scale:
            steps.append(StandardScaler())
        if feature_selection:
            steps.append(SelectKBest(f_regression, k='all'))
        steps.append(classifiers[key]['clf'])
        classifiers[key]['clf'] = make_pipeline(*steps)
        # Reorganize paramenter list for grid search
        new_dict = {}
        for keyp in classifiers[key]['parameters']:
            new_dict[name(classifiers[key]) + '__' +
                     keyp] = classifiers[key]['parameters'][keyp]
        classifiers[key]['parameters'] = new_dict
        if nCols > 5 and feature_selection:
            classifiers[key]['parameters']['selectkbest__k'] = np.linspace(
                np.round(nCols / 5), nCols, 5).astype('int').tolist()

    return classifiers
Пример #30
0
all_data = list(csv.reader(file))
data_size = len(all_data) - 1

train_frac = 0.6
val_frac = 0.2
test_frac = 0.2

(x_train, y_train), (x_val, y_val), (x_test, y_test) = process(all_data, train_frac, val_frac, test_frac)

n_samples = 200
maxc = 20 #maximum number of clusters plus 1

y_train = y_train.flatten()
print(y_train.shape)

nn = MLP()
nn.fit(x_train, y_train)
real_train_acc = accuracy_score(nn.predict(x_train), y_train)
real_test_acc = accuracy_score(nn.predict(x_test), y_test)

print("GMM")
# print(y_train[0])
# gmm = GaussianMixture(n_components=3)
# gmm.fit(x_train)
# gmm_x_train = gmm.predict_proba(x_train)
# gmm_x_val = gmm.predict_proba(x_val)
# gmm_x_test = gmm.predict_proba(x_test)

# print(gmm_x_train)
# print(y_train)