示例#1
0
def selectParametersForMLPC(a, b, c, d):
    """http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
    http://scikit-learn.org/stable/modules/grid_search.html#grid-search"""
    model = MLPC()
    parameters = {
        'verbose': [False],
        'activation': ['logistic', 'relu'],
        'max_iter': [1000, 2000],
        'learning_rate': ['constant', 'adaptive']
    }
    accuracy_scorer = make_scorer(accuracy_score)
    grid_obj = GridSearchCV(model, parameters, scoring=accuracy_scorer)
    grid_obj = grid_obj.fit(a, b)
    model = grid_obj.best_estimator_
    model.fit(a, b)
    print('Selected Parameters for Multi-Layer Perceptron NN:\n')
    print(model)
    print('')
    #    predictions = model.predict(c)
    #    print(accuracy_score(d, predictions))
    #    print('Logistic Regression - Training set accuracy: %s' % accuracy_score(d, predictions))
    kfold = model_selection.KFold(n_splits=10)
    accuracy = model_selection.cross_val_score(model,
                                               a,
                                               b,
                                               cv=kfold,
                                               scoring='accuracy')
    mean = accuracy.mean()
    stdev = accuracy.std()
    print('SKlearn Multi-Layer Perceptron - Training set accuracy: %s (%s)' %
          (mean, stdev))
    print('')
示例#2
0
    def train(self):
        classifiers = [["Rfc",
                        Rfc(criterion="entropy", n_estimators=100)],
                       ["knn", Knn(10, algorithm="auto")],
                       ["svc",
                        SVC(kernel="linear", C=0.025, verbose=True)],
                       [
                           "MLPC",
                           MLPC(activation='identity',
                                learning_rate_init=0.01,
                                hidden_layer_sizes=(3, 2, 2),
                                learning_rate='adaptive',
                                solver='adam',
                                verbose=True,
                                max_iter=100)
                       ]]

        def dump_Data(fileName, model):
            try:
                f = open(PathFile.PREDICRFILE + fileName + ".pkl", "wb")
                pickle.dump(model, f)
                f.close()
                print(fileName, 'Dump_file OK...')
            except IOError as e:
                print(e)

        for name, model in classifiers:
            model.fit(self.X, self.z.values.ravel())
            dump_Data(name, model)
        return model
def MLPC_pack(xtrain, xtest, ytrain):
    model = MLPC(hidden_layer_sizes=(5, 2),
                 solver='adam',
                 alpha=1e-5,
                 random_state=11)
    model.fit(xtrain, ytrain)
    ypre = model.predict(xtest)
    return ypre
示例#4
0
 def fit_predict(self, dfit, dpre, tournament):
     clf = MLPC(hidden_layer_sizes=self.p['layers'],
                alpha=self.p['alpha'],
                activation=self.p['activation'],
                learning_rate_init=self.p['learn'],
                random_state=self.p['seed'],
                max_iter=200)
     clf.fit(dfit.x, dfit.y[tournament])
     yhat = clf.predict_proba(dpre.x)[:, 1]
     return dpre.ids, yhat
示例#5
0
def get_models(dataset):
    if dataset in ["mnist12", "mnist28"]:
        classifiers = [(DTC(max_depth=30, class_weight='balanced'),
                        "Decision Tree (max_depth=30)"),
                       (LRC(solver='lbfgs',
                            n_jobs=2,
                            multi_class="auto",
                            class_weight='balanced',
                            max_iter=50), "Logistic Regression"),
                       (MLPC((100, ), max_iter=50), "MLP (100)")]
        return classifiers
    if dataset in ['adult']:
        classifiers = [(DTC(max_depth=15, class_weight='balanced'),
                        "Decision Tree (max_depth=20)"),
                       (ABC(), "Adaboost (estimator=50)"),
                       (LRC(solver='lbfgs',
                            n_jobs=2,
                            class_weight='balanced',
                            max_iter=50), "Logistic Regression"),
                       (MLPC((50, ), max_iter=50), "MLP (50)")]
        return classifiers
    if dataset in ['census', 'credit']:
        classifiers = [
            (DTC(max_depth=30,
                 class_weight='balanced'), "Decision Tree (max_depth=30)"),
            (ABC(), "Adaboost (estimator=50)"),
            (MLPC((100, ), max_iter=50), "MLP (100)"),
        ]
        return classifiers
    if dataset in ['intrusion', 'covtype']:
        classifiers = [
            (DTC(max_depth=30,
                 class_weight='balanced'), "Decision Tree (max_depth=30)"),
            (MLPC((100, ), max_iter=50), "MLP (100)"),
        ]
        return classifiers
    if dataset in ['news']:
        regressors = [(LRR(), "Linear Regression"),
                      (MLPR((100, ), max_iter=50), "MLP (100)")]
        return regressors

    assert 0
示例#6
0
def MLPClassifier(trainData, trainLable, testData, testLable):
    clf = MLPC(solver='adam',
               activation='relu',
               alpha=1e-4,
               random_state=1,
               max_iter=200,
               learning_rate_init=.1)
    clf.fit(trainData, trainLable)
    pickle.dump(clf, open('./models/MLPClassifier.pkl', 'wb'))
    predict = clf.predict(testData)
    return calculateScores(testLable, predict)
示例#7
0
def runVotingClassifier(a, b, c, d):
    """http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.VotingClassifier.html
    http://scikit-learn.org/stable/modules/ensemble.html#voting-classifier"""
    global votingC, mean, stdev  # eventually I should get rid of these global variables and use classes instead.  in this case i need these variables for the submission function.
    votingC = VotingClassifier(estimators=[('LSVM',
                                            LinearSVC(C=0.0001,
                                                      class_weight=None,
                                                      dual=True,
                                                      fit_intercept=True,
                                                      intercept_scaling=1,
                                                      loss='squared_hinge',
                                                      max_iter=1000,
                                                      multi_class='ovr',
                                                      penalty='l2',
                                                      random_state=None,
                                                      tol=0.0001,
                                                      verbose=0)),
                                           ('MLPC',
                                            MLPC(activation='logistic',
                                                 alpha=0.0001,
                                                 batch_size='auto',
                                                 beta_1=0.9,
                                                 beta_2=0.999,
                                                 early_stopping=False,
                                                 epsilon=1e-08,
                                                 hidden_layer_sizes=(100, ),
                                                 learning_rate='constant',
                                                 learning_rate_init=0.001,
                                                 max_iter=2000,
                                                 momentum=0.9,
                                                 nesterovs_momentum=True,
                                                 power_t=0.5,
                                                 random_state=None,
                                                 shuffle=True,
                                                 solver='adam',
                                                 tol=0.0001,
                                                 validation_fraction=0.1,
                                                 verbose=False,
                                                 warm_start=False))],
                               voting='hard')
    votingC = votingC.fit(a, b)
    kfold = model_selection.KFold(n_splits=10)
    accuracy = model_selection.cross_val_score(votingC,
                                               a,
                                               b,
                                               cv=kfold,
                                               scoring='accuracy')
    meanC = accuracy.mean()
    stdevC = accuracy.std()
    print('Ensemble Voting Method - Training set accuracy: %s (%s)' %
          (meanC, stdevC))
    print('')
    return votingC, meanC, stdevC
示例#8
0
def learnData(xData,yData,f_obj,MLtype):
	f_obj.write('Accuracy for {}:\n'.format(MLtype))
	for test in [0.10,0.15,0.20,0.25]:
		xData_train,xData_test,yData_train,yData_test = tts(xData,yData,test_size=test,random_state=42)
		if(MLtype=='LSVC'):	clf = LSVC()
		if(MLtype=='LR'): clf = LR()
		if(MLtype=='MNB'): clf = MNB()
		else: clf = MLPC()
		clf.fit(xData_train,yData_train)
		score = clf.score(xData_test,yData_test)
		f_obj.write('\ttest partition {} yields {} accuracy\n'.format(test,score))
	f_obj.write('\n')
示例#9
0
def runMLPC(a, b, c, d):
    classifier = MLPC(activation='relu', max_iter=1000)
    classifier.fit(a, b)
    kfold = model_selection.KFold(n_splits=10)
    accuracy = model_selection.cross_val_score(classifier,
                                               a,
                                               b,
                                               cv=kfold,
                                               scoring='accuracy')
    mean = accuracy.mean()
    stdev = accuracy.std()
    print(
        'SKlearn Multi-layer Perceptron NN - Training set accuracy: %s (%s)' %
        (mean, stdev))
    print('')
示例#10
0
 def __init__(
         self,
         hidden_layer_sizes=(100, ),
         activation='relu',  # ‘identity’, ‘logistic’, ‘tanh’, ‘relu’
         solver="adam",  # ‘lbfgs’, ‘sgd’, ‘adam’
         alpha=0.0001,  # l2 penalty param
         batch_size="auto",
         learning_rate="constant",  # ‘constant’, ‘invscaling’, ‘adaptive’
         learning_rate_init=0.001,
         power_t=0.5,  # exponent for exp. lr decay
         max_iter=200,  # max epochs
         shuffle=True,  # shuffle batches
         tol=1e-4,  # tolerance for loss/score decrease/increase
         momentum=0.9,  # momentum for sgd
         nesterovs_momentum=True,
         early_stopping=False,  # stops when n_iter_no_change epochs yielded no increase
         n_iter_no_change=10,
         validation_fraction=0.1,  # percentage of training set used for validation
         beta_1=0.9,  # beta1 for adam
         beta_2=0.999,  # beta2 for adam
         epsilon=1e-8  # adam numerical stability 
 ):
     self._clf = MLPC(hidden_layer_sizes=hidden_layer_sizes,
                      activation=activation,
                      solver=solver,
                      alpha=alpha,
                      batch_size=batch_size,
                      learning_rate=learning_rate,
                      learning_rate_init=learning_rate_init,
                      power_t=power_t,
                      max_iter=max_iter,
                      shuffle=shuffle,
                      tol=tol,
                      early_stopping=early_stopping,
                      n_iter_no_change=n_iter_no_change,
                      momentum=momentum,
                      nesterovs_momentum=nesterovs_momentum,
                      validation_fraction=validation_fraction,
                      beta_1=beta_1,
                      beta_2=beta_2,
                      epsilon=epsilon)
     self._last_score = None
     self._model_path = None
示例#11
0
 def __init__(self,
              hidden_layer_sizes=100,
              activation='relu',
              solver='adam',
              alpha=0.0001,
              batch_size='auto',
              learning_rate='constant',
              learning_rate_init=0.001,
              power_t=0.5,
              max_iter=200,
              shuffle=True,
              random_state=None,
              tol=0.0001,
              warm_start=False,
              momentum=0.9,
              nesterovs_momentum=True,
              early_stopping=False,
              validation_fraction=0.1,
              beta_1=0.9,
              beta_2=0.999,
              epsilon=1e-08,
              n_iter_no_change=10,
              max_fun=15000):
     self.tol = tol
     self.n_iter_no_change = n_iter_no_change
     self.momentum = momentum
     self.nesterovs_momentum = nesterovs_momentum
     self.beta_2 = beta_2
     self.hidden_layer_sizes = hidden_layer_sizes
     self.alpha = alpha
     self.max_fun = max_fun
     self.beta_1 = beta_1
     self.activation = activation
     self.early_stopping = early_stopping
     self.epsilon = epsilon
     self.warm_start = warm_start
     self.learning_rate_init = learning_rate_init
     self.solver = solver
     self.shuffle = shuffle
     self.random_state = random_state
     self.max_iter = max_iter
     self.batch_size = batch_size
     self.validation_fraction = validation_fraction
     self.learning_rate = learning_rate
     self.power_t = power_t
     self.model = MLPC(validation_fraction=self.validation_fraction,
                       activation=self.activation,
                       learning_rate=self.learning_rate,
                       alpha=self.alpha,
                       beta_1=self.beta_1,
                       solver=self.solver,
                       learning_rate_init=self.learning_rate_init,
                       hidden_layer_sizes=self.hidden_layer_sizes,
                       epsilon=self.epsilon,
                       nesterovs_momentum=self.nesterovs_momentum,
                       max_fun=self.max_fun,
                       momentum=self.momentum,
                       shuffle=self.shuffle,
                       n_iter_no_change=self.n_iter_no_change,
                       early_stopping=self.early_stopping,
                       power_t=self.power_t,
                       beta_2=self.beta_2,
                       warm_start=self.warm_start,
                       tol=self.tol,
                       batch_size=self.batch_size,
                       max_iter=self.max_iter,
                       random_state=self.random_state)
示例#12
0
 def __init__(self, featureset=None, target=None, mode='predict', path=''):
     if (mode == 'train'):
         self.__svm = SVC(C=1.0,
                          cache_size=200,
                          class_weight=None,
                          coef0=0.0,
                          decision_function_shape='ovr',
                          degree=3,
                          gamma='auto',
                          kernel='rbf',
                          max_iter=-1,
                          probability=False,
                          random_state=None,
                          shrinking=True,
                          tol=0.001,
                          verbose=False)
         self.__svr = SVR(C=1.0,
                          cache_size=200,
                          coef0=0.0,
                          degree=3,
                          epsilon=0.1,
                          gamma='auto',
                          kernel='rbf',
                          max_iter=-1,
                          shrinking=True,
                          tol=0.001,
                          verbose=False)
         self.__nusvm = NuSVC(cache_size=200,
                              class_weight=None,
                              coef0=0.0,
                              decision_function_shape='ovr',
                              degree=3,
                              gamma='auto',
                              kernel='rbf',
                              max_iter=-1,
                              nu=0.5,
                              probability=False,
                              random_state=None,
                              shrinking=True,
                              tol=0.001,
                              verbose=False)
         self.__nusvr = NuSVR(C=1.0,
                              cache_size=200,
                              coef0=0.0,
                              degree=3,
                              gamma='auto',
                              kernel='rbf',
                              max_iter=-1,
                              nu=0.5,
                              shrinking=True,
                              tol=0.001,
                              verbose=False)
         self.__linsvm = LinearSVC(C=1.0,
                                   class_weight=None,
                                   dual=True,
                                   fit_intercept=True,
                                   intercept_scaling=1,
                                   loss='squared_hinge',
                                   max_iter=1000,
                                   multi_class='ovr',
                                   penalty='l2',
                                   random_state=None,
                                   tol=0.0001,
                                   verbose=0)
         self.__linsvr = LinearSVR(C=1.0,
                                   dual=True,
                                   epsilon=0.0,
                                   fit_intercept=True,
                                   intercept_scaling=1.0,
                                   loss='epsilon_insensitive',
                                   max_iter=1000,
                                   random_state=None,
                                   tol=0.0001,
                                   verbose=0)
         self.__mlpc = MLPC(activation='relu',
                            alpha=1e-05,
                            batch_size='auto',
                            beta_1=0.9,
                            beta_2=0.999,
                            early_stopping=False,
                            epsilon=1e-08,
                            hidden_layer_sizes=(100, 25),
                            learning_rate='constant',
                            learning_rate_init=0.001,
                            max_iter=200,
                            momentum=0.9,
                            nesterovs_momentum=True,
                            power_t=0.5,
                            random_state=1,
                            shuffle=True,
                            solver='lbfgs',
                            tol=0.0001,
                            validation_fraction=0.1,
                            verbose=False,
                            warm_start=False)
         self.__mlpr = MLPR(activation='relu',
                            alpha=0.0001,
                            batch_size='auto',
                            beta_1=0.9,
                            beta_2=0.999,
                            early_stopping=False,
                            epsilon=1e-08,
                            hidden_layer_sizes=(100, 25),
                            learning_rate='constant',
                            learning_rate_init=0.001,
                            max_iter=200,
                            momentum=0.9,
                            nesterovs_momentum=True,
                            power_t=0.5,
                            random_state=None,
                            shuffle=True,
                            solver='adam',
                            tol=0.0001,
                            validation_fraction=0.1,
                            verbose=False,
                            warm_start=False)
         self.__dtc = DTC(class_weight=None,
                          criterion='gini',
                          max_depth=None,
                          max_features=None,
                          max_leaf_nodes=None,
                          min_impurity_decrease=0.0,
                          min_impurity_split=None,
                          min_samples_leaf=1,
                          min_samples_split=2,
                          min_weight_fraction_leaf=0.0,
                          presort=False,
                          random_state=None,
                          splitter='best')
         self.__dtr = DTR(criterion='mse',
                          max_depth=None,
                          max_features=None,
                          max_leaf_nodes=None,
                          min_impurity_decrease=0.0,
                          min_impurity_split=None,
                          min_samples_leaf=1,
                          min_samples_split=2,
                          min_weight_fraction_leaf=0.0,
                          presort=False,
                          random_state=None,
                          splitter='best')
         self.__rfc = RFC(bootstrap=True,
                          class_weight=None,
                          criterion='gini',
                          max_depth=100,
                          max_features='auto',
                          max_leaf_nodes=None,
                          min_impurity_decrease=0.0,
                          min_impurity_split=None,
                          min_samples_leaf=1,
                          min_samples_split=2,
                          min_weight_fraction_leaf=0.0,
                          n_estimators=50,
                          n_jobs=1,
                          oob_score=False,
                          random_state=None,
                          verbose=0,
                          warm_start=False)
         self.__rfr = RFR(bootstrap=True,
                          criterion='mse',
                          max_depth=None,
                          max_features='auto',
                          max_leaf_nodes=None,
                          min_impurity_decrease=0.0,
                          min_impurity_split=None,
                          min_samples_leaf=1,
                          min_samples_split=2,
                          min_weight_fraction_leaf=0.0,
                          n_estimators=10,
                          n_jobs=1,
                          oob_score=False,
                          random_state=None,
                          verbose=0,
                          warm_start=False)
         (self.__svm, self.__svr, self.__nusvm, self.__nusvr, self.__linsvm,
          self.__linsvr, self.__mlpc, self.__mlpr, self.__dtc, self.__dtr,
          self.__rfc, self.__rfr) = self.__trainAll(X=list(featureset),
                                                    Y=list(target))
         self.__saveModelsToFile(path)
     else:
         self.__svm = joblib.load(path + 'Mel_SVM.pkl')
         self.__svr = joblib.load(path + 'Mel_SVR.pkl')
         self.__nusvm = joblib.load(path + 'Mel_NuSVM.pkl')
         self.__nusvr = joblib.load(path + 'Mel_NuSVR.pkl')
         self.__linsvm = joblib.load(path + 'Mel_LinSVM.pkl')
         self.__linsvr = joblib.load(path + 'Mel_LinSVR.pkl')
         self.__mlpc = joblib.load(path + 'Mel_MLPC.pkl')
         self.__mlpr = joblib.load(path + 'Mel_MLPR.pkl')
         self.__dtc = joblib.load(path + 'Mel_DTC.pkl')
         self.__dtr = joblib.load(path + 'Mel_DTR.pkl')
         self.__rfc = joblib.load(path + 'Mel_RFC.pkl')
         self.__rfr = joblib.load(path + 'Mel_RFR.pkl')
示例#13
0
"""
#####RFC pipes#####
"""
rfc_norm_pipe = mp(MinMaxScaler(), RFC(random_state=47))
rfc_stand_pipe = mp(StandardScaler(), RFC(random_state=47))
rfc_pca_pipe = mp(PCA(), RFC())
"""
#####SVC pipes#####
"""
svc_norm_pipe = mp(MinMaxScaler(), SVC())
svc_stand_pipe = mp(StandardScaler(), SVC())
svc_pca_pipe = mp(PCA(), SVC())
"""
#####MLPC pipes#####
"""
mlpc_norm_pipe = mp(MinMaxScaler(), MLPC(random_state=47))
mlpc_stand_pipe = mp(StandardScaler(), MLPC(random_state=47))
mlpc_pca_pipe = mp(PCA(), MLPC())
"""
#####kNN grid#####
"""
kNN_param_grid = {
    'kneighborsclassifier__n_neighbors': [1, 2, 3, 4, 5],
    'kneighborsclassifier__weights': ['uniform', 'distance'],
    'kneighborsclassifier__p': [1, 2, 3]
}
"""
Test set score: 0.12
Best parameters: {'kneighborsclassifier__n_neighbors': 1, 'kneighborsclassifier__p': 1, 'kneighborsclassifier__weights': 'uniform'}
"""
kNN_norm_grid = GSCV(knn_norm_pipe, kNN_param_grid, scoring='f1', cv=5)
示例#14
0
def fit_model(features, sumstats, train_genes, test_genes, model='logit'):
    """
    Fit classifier to train_genes and calculate RMSE on test_genes
    """

    all_genes = train_genes + test_genes

    # Join sumstats with features for logistic regression, subset to
    # genes of interest, and drop genes with NaN BFDPs
    full_df = sumstats.merge(features,
                             how='left',
                             left_index=True,
                             right_index=True)
    full_df = full_df.loc[full_df.index.isin(all_genes), :].dropna()
    train_df = full_df.loc[full_df.index.isin(train_genes), :].\
                   drop(labels='chrom', axis=1)
    test_df = full_df.loc[full_df.index.isin(test_genes), :].\
                  drop(labels='chrom', axis=1)

    # Instantiate classifier dependent on model
    if model == 'logit':
        grid_params = {
            'C': [10**x for x in range(-2, 3, 1)],
            'l1_ratio': [x / 10 for x in range(0, 11, 1)]
        }
        base_class = logit(solver='saga', penalty='elasticnet')
    elif model == 'svm':
        grid_params = {'C': [10**x for x in range(-2, 2, 1)]}
        base_class = SVC(random_state=0,
                         probability=True,
                         break_ties=True,
                         kernel='rbf')
    elif model == 'randomforest':
        grid_params = {
            'n_estimators': [50, 100, 500],
            'criterion': ['gini', 'entropy']
        }
        base_class = RFC(random_state=0, bootstrap=True, oob_score=True)
    elif model == 'lda':
        grid_params = {
            'shrinkage': [None, 0, 0.5, 1, 'auto'],
            'solver': ['svd', 'lsqr', 'eigen']
        }
        base_class = LDAC()
    elif model == 'naivebayes':
        grid_params = {'var_smoothing': [10**x for x in range(-4, -11, -1)]}
        base_class = GNBC()
    elif model == 'neuralnet':
        grid_params = {
            'hidden_layer_sizes': [(10, 5, 2), (20, 10, 5), (20, 10, 5, 2),
                                   (50, 20, 10), (50, 20, 10, 5),
                                   (50, 20, 10, 5, 2)],
            'alpha': [10**x for x in range(-4, 5, 1)]
        }
        base_class = MLPC(activation='relu',
                          solver='adam',
                          early_stopping=True,
                          random_state=0)
    elif model == 'gbdt':
        grid_params = {'n_estimators': [50, 100], 'subsample': [0.5, 1]}
        base_class = GBDT(random_state=0)
    elif model == 'knn':
        grid_params = {
            'n_neighbors': [10, 50, 100, 500],
            'weights': ['uniform', 'distance'],
            'leaf_size': [5, 10, 25, 50, 100]
        }
        base_class = KNN()

    # Learn best parameters for classifier using cross-validated grid search
    classifier = GridSearchCV(base_class, grid_params, verbose=1, n_jobs=-1)

    # Fit sklearn model & predict on test set
    # (Models parameterized by grid search need to be treated separately)
    if isinstance(classifier, GridSearchCV):
        fitted_model = classifier.fit(train_df.drop(labels='bfdp', axis=1),
                                      np.round(train_df.bfdp)).best_estimator_
    else:
        fitted_model = classifier.fit(train_df.drop(labels='bfdp', axis=1),
                                      np.round(train_df.bfdp))
    test_bfdps = pd.Series(fitted_model.predict_proba(
        test_df.drop(labels='bfdp', axis=1))[:, 1],
                           name='pred',
                           index=test_df.index)

    # Compute RMSE of bfdps for test set
    test_vals = test_df.merge(test_bfdps, left_index=True, right_index=True).\
                    loc[:, 'bfdp pred'.split()]
    test_rmse = rmse(test_vals.to_records(index=False))

    return fitted_model, test_rmse
示例#15
0
from sklearn.metrics import log_loss

# grid search cross validation
from sklearn.model_selection import GridSearchCV

# ignore ConverenceWarning
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

##################################
## 3.1 train and test models using GridSearchCV
models = {
    'DT': DTC(),
    'LR': LR(),
    'MLP': MLPC(),
    'SVC': SVC(),
    'NB': NB(),
    'KNN': KNNC(),
    'Bagging': BaggingC(),
    'RF': RFC(),
    'AdaBoost': AdaBoostC(),
    'GB': GBC(),
    'XGB': XGB(),
}

param_dict = {
    # 0.67 {'max_depth': 1, 'max_leaf_nodes': None, 'min_samples_leaf': 1, 'min_samples_split': 2}
    'DT': {
        'max_depth': [1,2,3,None],
        'max_leaf_nodes': [4,6,8,10,None],
示例#16
0
print(count)

X1 = X1.reshape(int(X1.shape[0] / 50), 50)
X2 = X2.reshape(int(X2.shape[0] / 50), 50)
X = np.concatenate((X1, X2), axis=1)
X = np.nan_to_num(X)

y = train_df['project_is_approved'].as_matrix()

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=.2,
                                                    random_state=47)

mlpc_norm_pipe = mp(MinMaxScaler(), MLPC(random_state=47))

mlp_param_grid1 = {
    'mlpclassifier__hidden_layer_sizes': [10, 100, (10, 10), (100, 100)],
    'mlpclassifier__activation': ['identity', 'logistic', 'tanh', 'relu'],
    'mlpclassifier__solver': ['lbfgs', 'sgd', 'adam']
}
mlp_param_grid2 = {
    'hidden_layer_sizes': [10, 100, (10, 10), (100, 100)],
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'solver': ['lbfgs', 'sgd', 'adam']
}

mlp_norm_grid = GSCV(mlpc_norm_pipe, mlp_param_grid1, scoring='f1', cv=5)
mlp_norm_grid.fit(X_train, y_train)
print("Test set score: {:.2f}".format(mlp_norm_grid.score(X_test, y_test)))
示例#17
0
from sklearn.neural_network import MLPClassifier as MLPC
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

filename = '/usr/src/app/sentiment/models/pickles/MLPC.pickle'

if isfile(filename) == False:

    train, test = train_test_split(utils.read_data(), test_size=0.2)

    train_embeddings = utils.combined_embeddings(train['text'].tolist())
    test_embeddings = utils.combined_embeddings(test['text'].tolist())

    clf = MLPC(
        hidden_layer_sizes=(256),
        learning_rate='adaptive',
        max_iter=1000
    )
    clf.fit(train_embeddings, train['sentiment'])

    prediction = clf.predict(test_embeddings)
    report = classification_report(test['sentiment'], prediction)
    print(report)

    with open(filename, 'wb') as f:
        pickle.dump(clf, f)

else:
    print('Already Trained!')
示例#18
0
X = data['data']
y = data['target']

# Dividindo um porcentagem de 20% do data set para test e 80% para treino
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

scaler = StandardScaler()

# Convertendo para uma escala padrao de acordo com  os atributos de treino
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# Criando a RNA com 3 layers de 100 neuronios cada uma, e com 1000 epocas
clf = MLPC(hidden_layer_sizes=(100, 100, 100), max_iter=1000)

# Criando o modelo atraves do treinamento
clf.fit(X_train, y_train)

# Classificando os atributos do teste
predictions = clf.predict(X_test)

# Calculando o erro medio quadratico
mean = mean_squared_error(y_test, predictions)

print(mean)
# Criando uma matriz de confusao
confusion_m = pd.DataFrame(confusion_matrix(y_test, predictions),
                           columns=['Benigno', 'Maligno'],
                           index=['Benigno', 'Maligno'])
for current_para in parameter_set:
    best_classifier = None
    highest_accuracy = 0
    
    """ get current parameters for the classifier """
    current_alpha = current_para[0]
    current_activition_function = current_para[1]
    current_hidden_layer = current_para[2]
    current_iteration = max_iteration[3]
    
    fold_num = 0
    for i in range(0, 5):
        fold_num = fold_num + 1
        """ build up a classifier """
        current_classifier = MLPC(hidden_layer_sizes = current_hidden_layer, activation = current_activition_function, alpha = current_alpha, max_iter = current_iteration)
        """ get training and validation set """
        current_training_feature, current_training_label, current_validation_feature, current_validation_label = five_folds(training_data, i)

        """ training """
        current_classifier.fit(current_training_feature, current_training_label)

        """ validation """
        correct_num = 0
        number_of_instance_in_validation_set = len(current_validation_feature)

        for i in range(number_of_instance_in_validation_set):
            if current_classifier.predict(current_validation_feature[i].reshape(1, -1)) == current_validation_label[i]:
                correct_num = correct_num + 1

        print ("     Correct number for this time is ", correct_num)
示例#20
0
    dtc_people.score(X_test_people_stand, y_test_people)))

###DONE###
rfc_people = RFC(n_estimators=100,
                 max_depth=25,
                 bootstrap=False,
                 random_state=37).fit(X_train_people_norm, y_train_people)
print("Test set score of RFC people: {:.3f}".format(
    rfc_people.score(X_test_people_norm, y_test_people)))

svc_people = SVC(C=4, kernel='linear',
                 random_state=37).fit(X_train_people_stand, y_train_people)
print("Test set score of SVC people: {:.3f}".format(
    svc_people.score(X_test_people_stand, y_test_people)))

mlpc_people = MLPC(alpha=.1, random_state=37).fit(X_train_people_nmf,
                                                  y_train_people)
print("Test set score of MLPC people: {:.3f}".format(
    mlpc_people.score(X_test_people_nmf, y_test_people)))

print('people\n')
#Mnist
###DONE###
knc_mnist = KNC(weights='distance').fit(X_train_mnist_norm, y_train_mnist)
print("Test set score of kNN mnist: {:.3f}".format(
    knc_mnist.score(X_test_mnist_norm, y_test_mnist)))

###DONE###
dtc_mnist = DTC(criterion='entropy',
                max_depth=15,
                min_samples_split=3,
                random_state=37).fit(X_train_mnist_nmf, y_train_mnist)
示例#21
0
                 #"DTC",
                 #"GNB",
                 "QDA"]
model_types = [LR,
               RFC,
               #ABC,
               MLPC,
               KNC,
               SVC,
               #DTC,
               #GNB,
               QDA]
models = [LR(),
          RFC(n_estimators=30),
          #ABC(),
          MLPC(),
          KNC(),
          SVC(probability=True),
          #DTC(),
          #GNB(),
          QDA()]
models2 = copy.deepcopy(models)


### experiment bright students math finance
N = 10000 ## 1000 of each group (groups S and T)

minority_percent = 0.3
MIN = int(minority_percent * N)
MAJ = int((1 - minority_percent) * N)
# print(MIN, MAJ)
示例#22
0
from sklearn.neural_network import MLPClassifier as MLPC

# The data set from MMNIST
images, labels = load_mnist()
images = images[:1000]
labels = labels[:1000]
# To apply a classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:

n_samples = len(images)

data = images.reshape((n_samples, -1))
print(data[3])

# Create a classifier: a support vector classifier
classifier = MLPC()
labels = np.ravel(labels)
# We learn the digits on the first half of the digits
classifier.fit(data[:n_samples / 2], labels[:n_samples / 2])

# Now predict the value of the digit on the second half:
expected = labels[n_samples / 2:]

predicted = classifier.predict(data[n_samples / 2:])

print("Classification report for classifier %s:\n%s\n" %
      (classifier, metrics.classification_report(expected, predicted)))
print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))
"""
images_and_predictions = list(zip(digits.images[n_samples / 2:], predicted))