示例#1
0
    def add_candidate_feat(self,
                           X_train,
                           X_test,
                           y_train,
                           y_test,
                           constructor_kwargs,
                           scorer=mcc):
        """
        Build, fit, and score a model using a subset of input features plus one candidate features

        @params:
            X_train            - Required : Pandas dataframe containing training set input data (Dataframe)
            X_test             - Required : Pandas dataframe containing test set input data (Dataframe)
            y_train            - Required : Pandas dataframe containing training set labels (Dataframe)
            y_test             - Required : Pandas dataframe containing test set labels (Dataframe)
            constructor_kwargs - Required : kwargs parameterizing for the model constructor function, except for n_features
            scorer             - Optional : Metric which accepts true and predicted labels as inputs; used to score model
        """

        # Create compatibility-wrapped model with dim(X_train) input features, then fit and score it
        model = KerasClassifier(build_fn=construct_network,
                                n_features=len(X_train.columns.values),
                                **constructor_kwargs)
        model.fit(X_train, y_train)
        score = scorer(y_test, model.predict(X_test))
        cm = confusion_matrix(y_test, model.predict(X_test))

        return score, cm
示例#2
0
def fit_base_learner(X_tra, y_tra, X_val, y_val, test_fold, X_test, y_test,
            keras_model, random_state=None, description=''):
    rus = RandomUnderSampler(random_state=random_state)
    X_train_batch, y_train_batch = get_random_batch(X_tra, y_tra, rus)
    X_val_batch, y_val_batch = get_random_batch(X_val, y_val, rus)  
    model = KerasClassifier(keras_model)
    
    checkpointer = ModelCheckpoint(filepath='_data/model.best.hdf5',
                               verbose=0, save_best_only=True)
    history = model.fit(X_train_batch.reshape(-1, 9, 9, 1), 
                        y_train_batch.reshape(-1), 
                         validation_data=(X_val_batch.reshape(-1, 9, 9, 1),
                                          y_val_batch.reshape(-1)),
                         batch_size=1024, epochs=100, verbose=0, 
                         callbacks=[checkpointer], shuffle=True)
    
    y_pred = model.predict(X_train_batch.reshape(-1, 9, 9, 1), batch_size=1024)
    y_pred_proba = model.predict_proba(X_train_batch.reshape(-1, 9, 9, 1), batch_size=1024)
    df_train = evaluate_metrics(y_train_batch.reshape(-1), y_pred, y_pred_proba[:,1],
                              'CNN', test_fold, description)
    
    y_pred = model.predict(X_val_batch.reshape(-1, 9, 9, 1), batch_size=1024)
    y_pred_proba = model.predict_proba(X_val_batch.reshape(-1, 9, 9, 1), batch_size=1024)
    df_val = evaluate_metrics(y_val_batch.reshape(-1), y_pred, y_pred_proba[:,1],
                              'CNN', test_fold, description)
    
    y_pred = model.predict(X_test.reshape(-1, 9, 9, 1), batch_size=1024)
    y_pred_proba = model.predict_proba(X_test.reshape(-1, 9, 9, 1), batch_size=1024)
    df_test = evaluate_metrics(y_test.reshape(-1), y_pred, y_pred_proba[:,1],
                              'CNN', test_fold, description)
    return df_train, df_val, df_test, history, model
示例#3
0
def run_exp_nn(X_train, y_train, X_val, y_val, param_name, param_range, other_params):

    result = defaultdict(list)

    '''
    ########## BEST FOUND PARAMETERS from HW1 #####
    n1 = 75
    n2 = 14
    mid_act = 'relu'  # useleakyrelu is enabled...
    num_layers = 3
    optimizer = 'adam'
    activation = 'sigmoid'
    epo = 100  # 10
    bat = 44  # 18
    ##############################################
    '''

    for param in param_range:
        clear_session()
        result['param'].append(param)
        params = {param_name: param}
        params.update(other_params)
        result['params'].append(params)
        result['metrics'].append('accuracy')
        # Motions
        t0 = time.time()

        num_features = X_train.shape[1]
        print('num_features = {}'.format(num_features))
        def classification_model(n1=75, n2=14, n3=14, num_layers=3,  input_dim=num_features,
                                 optimizer='adam', activation='sigmoid', epo=100, bat=44):
            model = Sequential()
            model.add(Dense(n1, input_dim=64))
            model.add(LeakyReLU())
            model.add(Dense(n2))
            model.add(LeakyReLU())
            for i in range(num_layers - 2):
                model.add(Dense(n3))
                model.add(LeakyReLU())
            model.add(Dense(4, activation=activation))
            model.compile(optimizer=optimizer,
                          loss='sparse_categorical_crossentropy',
                          metrics=['accuracy'])
            return model

        model = KerasClassifier(build_fn=classification_model, verbose=0, **params)

        model.fit(X_train, y_train.values.ravel('C'))

        y_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)
        result['accuracy_m'].append(accuracy_score(y_val, y_pred))
        result['accuracy_val_m'].append(accuracy_score(y_val, y_val_pred))
        print("took {} seconds".format(time.time() - t0))
        result['time'].append(time.time() - t0)

    # matplotlib is clunky in trying to plot bars side by side, BUT
    plot_lines1(result['param_range'], result['time'], result['param'], result['param_range'], label='Motions', col='blue')

    return result
示例#4
0
def neural_network_1(X_train, X_test, y_train, y_test):
	def create_model():
		DROP_OUT_RATE = 0.1

		model = Sequential()
		model.add(Dense(units=4, activation='relu', input_dim=X_train.shape[1]))
		model.add(Dense(units=4, activation='relu'))
		model.add(Dense(units=1))
	
		opt = keras.optimizers.Adam(lr = 0.001)
		model.compile(loss = keras.losses.mean_squared_error, optimizer = opt, metrics = ["accuracy"])

		return model

	estimator = KerasClassifier(build_fn=create_model, epochs=1000, batch_size=128)

	history = estimator.fit(X_train, y_train, validation_data = (X_test, y_test), verbose = 2)

	print(history.history.keys())

	# summarize history for accuracy
	plt.plot(history.history['acc'])
	plt.plot(history.history['val_acc'])
	plt.title('model accuracy')
	plt.ylabel('accuracy')
	plt.xlabel('epoch')
	plt.legend(['train', 'test'], loc='upper right')
	plt.savefig("history.png")

	y_pred = estimator.predict(X_test)

	score = accuracy_score(y_test, y_pred)

	print("Accuracy neural network = {}".format(score))
示例#5
0
def start_fit(dataSet):
    index = [i for i in range(len(dataSet))]
    random.shuffle(index)
    data = dataSet[index]
    X = dataSet[:, 0:148]
    Y = dataSet[:, 148]
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.2,
                                                        random_state=0)
    # normalization
    scaler = StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    dbn_model = KerasClassifier(model_init,
                                epochs=500,
                                batch_size=64,
                                verbose=0)
    dbn_model.fit(X_train, y_train)
    y_ped = dbn_model.predict(X_test)
    acc, precision, npv, sensitivity, specificity, mcc, f1 = calculate_performace(
        len(y_ped), y_ped, y_test)
    print(
        'DBN:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,roc_auc=%f'
        % (acc, precision, npv, sensitivity, specificity, mcc, roc_auc))
示例#6
0
def ann_classifier(final_X, final_Y):
    # defining grid search parameters
    neurons = [2, 4, 6, 8, 10]  #[2, 4, 6, 8, 10]
    optimizer = ['adam', 'rmsprop']  #['adam', 'sgd', 'rmsprop']
    activation = ['relu', 'sigmoid',
                  'linear']  #['relu', 'sigmoid', 'tanh', 'linear']
    epochs = [10]
    batch_size = [500]
    param_grid = dict(epochs=epochs,
                      batch_size=batch_size,
                      optimizer=optimizer,
                      activation=activation,
                      neurons=neurons)

    # Grid Search
    model = KerasClassifier(build_fn=create_ann_model)
    grid = GridSearchCV(estimator=model,
                        param_grid=param_grid,
                        n_jobs=-1,
                        cv=10,
                        verbose=2)
    grid_results = grid.fit(final_X, final_Y)
    model = grid.best_estimator_

    # Best combination of hyper-parameters
    print('Best parameter: ', grid_results.best_score_,
          grid_results.best_params_)
    # params = grid_results.best_params_

    y_pred = model.predict(final_X)
    print('\nAccuracy: ', accuracy_score(final_Y, y_pred) * 100)
    precision, recall, fscore, support = precision_recall_fscore_support(
        final_Y, y_pred, average='micro')
    print('\nPrecision: ', precision, '\nRecall: ', recall, '\nF-score: ',
          fscore)
示例#7
0
class Keras(BaseEstimator):
    def __init__(self, build_function, multi_class=False, keras_params = None):
        if not callable(build_function):
            raise ValueError('Model construction function must be callable.')

        self.multi_class = multi_class
        self.build_function = build_function
        if keras_params is None:
            keras_params = {}

        self.keras_params = keras_params

    def fit(self, X, y):
        if self.multi_class:
            self.n_classes_ = len(set(y))
        else:
            self.n_classes_ = 1

        build_callable = lambda: self.build_function(X.shape[1], self.n_classes_)
        keras_params=copy(self.keras_params)
        keras_params['build_fn']=build_callable

        self.classifier_ = KerasClassifier(**keras_params)
        self.classifier_.fit(X, y)

    def predict(self, X):
        return self.classifier_.predict(X)
示例#8
0
 def fit(self, model, c_m):
     #class_weight = CW.compute_class_weight('balanced', np.unique(self.y[0]), self.y[0])
     model = KerasClassifier(build_fn=c_m,
                             epochs=200,
                             batch_size=10,
                             verbose=0)  #class_weight = class_weight,
     # optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
     # batch_size = [100]
     # epochs = [50]
     # learn_rate = [0.001]
     # activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
     # momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
     # init_mode = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal',
     #             'he_uniform']
     kfold = KFold(n_splits=10, shuffle=True, random_state=1234)
     neurons = [1, 5, 10, 15, 20, 25, 30]
     #
     #param_grid = dict(neurons=neurons)
     #grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
     #grid_result = grid.fit(self.X, self.y)
     results = cross_val_score(model, self.X, self.y, cv=kfold)
     model.fit(self.X, self.y)
     y_pred = model.predict(self.X, batch_size=128, verbose=1)
     # pdb.set_trace()
     # y_pred = model.predict_classes(self.X, verbose=1)
     #y_pred = cross_val_score.score
     #y_true = self.y
     print("Baseline: %.2f%% (%.2f%%)" %
           (results.mean() * 100, results.std() * 100))
     print(y_pred)
     return y_pred
示例#9
0
def MakingKerasModel_Class(dataframe):
    #dataframe = pd.read_csv("./ModelData.csv")
    X=dataframe.iloc[:,1:38]
    Y=dataframe.iloc[:,-2]

    encoder = LabelEncoder()
    encoded_Y = encoder.fit_transform(Y)
    # convert integers to dummy variables (one hot encoding)
    dummy_y = np_utils.to_categorical(encoded_Y)
    print "=================================\n"
    print dummy_y

    estimator = KerasClassifier(build_fn=baseline_model,  batch_size=256)
    # splitting data into training set and test set. If random_state is set to an integer, the split datasets are fixed.
    X_train, X_test, Y_train, Y_test = train_test_split(X, dummy_y, test_size=0.3, random_state=0)
    #print (X_train)
    result = estimator.fit(np.array(X_train), Y_train,nb_epoch=100)

    # make predictions
    pred = estimator.predict(np.array(X_test))

    # inverse numeric variables to initial categorical labels
    init_lables = encoder.inverse_transform(pred)

    # k-fold cross-validate
    seed = 42
    np.random.seed(seed)
    kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
    results = cross_val_score(estimator, np.array(X), np.array(dummy_y), cv=kfold)
    return result,results
示例#10
0
class NeuralNetworkClassificationModelFast(object):
    """ Accepts SingleStockDataSet object as input and
        trains the benchmark model on train set and
        evaluate on test set. """
    def __init__(self, dset, random_state=16):
        self.dset = dset
        self.random_state = random_state
        self.trainX, self.valX, self.testX, self.trainY, self.valY, self.testY = self.dset.get_train_val_test_sets(
            0.8, 0.1, 0.1)
        self.predictions = None
        self.num_targets = self.trainY.shape[1]
        #self.build_regressor()

    def build_nn_arch(self):
        input_dim = self.trainX.shape[1]
        num_classes = 6
        model = Sequential()
        model.add(
            Dense(80, input_dim=input_dim, init='normal', activation='relu'))
        #model.add(Dropout(0.1))
        model.add(Dense(40, init='normal', activation='sigmoid'))
        #model.add(Dense(10, init='normal', activation='relu'))
        model.add(Dense(num_classes, init='normal', activation='sigmoid'))
        sgd = SGD(lr=0.4)
        #adam = Adam(lr=0.001)
        # Compile model
        model.compile(loss='binary_crossentropy', optimizer=sgd)
        return model

    def build_classifier(self):
        # fix random seed for reproducibility
        np.random.seed(self.random_state)
        # evaluate model with standardized dataset
        self.classifier = KerasClassifier(build_fn=self.build_nn_arch,
                                          nb_epoch=100,
                                          batch_size=8,
                                          verbose=0)

    def fit(self):
        self.build_classifier()
        self.classifier.fit(self.trainX, self.trainY[:, 1])

    def predict(self):
        self.predictions = self.classifier.predict(self.testX)
        return self.predictions

    def score(self):
        scr = f1_score(self.testY[:, 1], self.predictions)
        print 'f1 score = %f' % scr
        return scr

    def evaluate(self):
        """ fits the model, predicts the targets and returns evaluation score """
        self.fit()
        self.predict()
        return self.score()

    def to_categorical(self, y):
        # convert integers to dummy variables (i.e. one hot encoded)
        return np_utils.to_categorical(y)
示例#11
0
def main():
	# files should be a list of string file names
	files = ["rt-polaritydata/rt-polarity.neg", "rt-polaritydata/rt-polarity.pos"]
	print('Loading data...')
	phrases, labels = load_data(files)
	print('Preprocessing data...')
	data = data_to_embedding(phrases, sent_len=51)

	# splitting into test (60%) validation(20%) and test (20%)
	x_first_split, x_test, y_first_split, y_test = train_test_split(data, labels, test_size=0.2)
	x_train, x_val, y_train, y_val = train_test_split(x_first_split, y_first_split, test_size=0.2)

	# --------------- simple way to make a model, train and test it ------------------
	print('Training the model...')
	model = KerasClassifier(build_fn=create_model, epochs=4, dropout=0.2, input_dim=5100, verbose=0)
	model.fit(x_train, y_train)
	# -------------- example cross validation -----------------------
	seed = 7
	kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
	# do cross validation on only the training and validation set (i.e. x_first_split)
	results = cross_val_score(model, x_first_split, y_first_split, cv=kfold)
	print("average result:{0} , std: {1}".format(results.mean(),results.std()))

	# -------------- finally, produce predictions on test set ------
	preds = model.predict(x_test)
	acc = accuracy_score(y_test, preds)
	print(acc * 100)
 def param_tune(self):
     """
     Creates, fits, and predicts a model multiple times with every combination of hyperparameters, given below,
     in an attempt to fine-tune the model using more precise possibilities than the random tuning above.
     """
     best_params = ()
     best_acc = 0
     for comb in list(
             product(
                 [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000
                  ],  # epochs
                 [1, 3, 5, 7, 9],  # batch_size
                 ['glorot_uniform', 'normal', 'uniform'],  #init
                 ['rmsprop', 'adam'])):  #optimizer
         auto = KerasClassifier(build_fn=self.model_build,
                                epochs=comb[0],
                                batch_size=comb[1],
                                init=comb[2],
                                optimizer=comb[3])
         auto.fit(self.X_train, self.y_train)
         predictions = auto.predict(self.X_test)
         predictions = np_utils.to_categorical(predictions)
         accu_test = np.sum(self.y_test == predictions) / self.y_test.size
         if accu_test > best_acc:
             best_params = comb
             best_acc = accu_test
     self.results.write("Param Tune Results\n")
     self.results.write(str(best_params) + "\n")
     self.results.write(str(best_acc) + "\n")
    def random_param_tune(self):
        """
        Creates, fits, and predicts a model multiple times with random combinations of hyperparameters, given below,
        in an attempt to find the best set of hyperparameters from a wide range of possibilities.
        """
        best_params = ()
        best_acc = 0

        all_comb = list(
            product(
                [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],  # epochs
                [1, 3, 5, 7, 9],  # batch_size
                ['glorot_uniform', 'normal', 'uniform'],  #init
                ['rmsprop', 'adam']))  #optimizer
        if len(all_comb) > 250:
            all_comb = sample(all_comb, 250)

        for comb in all_comb:
            auto = KerasClassifier(build_fn=self.model_build,
                                   epochs=comb[0],
                                   batch_size=comb[1],
                                   init=comb[2],
                                   optimizer=comb[3])
            auto.fit(self.X_train, self.y_train)
            predictions = auto.predict(self.X_test)
            predictions = np_utils.to_categorical(predictions)
            accu_test = np.sum(self.y_test == predictions) / self.y_test.size
            if accu_test > best_acc:
                best_params = comb
                best_acc = accu_test
        self.results.write("Random Param Tune Results\n")
        self.results.write(str(best_params) + "\n")
        self.results.write(str(best_acc) + "\n")
示例#14
0
class Keras(BaseEstimator):
    def __init__(self, build_function, multi_class=False, keras_params=None):
        if not callable(build_function):
            raise ValueError('Model construction function must be callable.')

        self.multi_class = multi_class
        self.build_function = build_function
        if keras_params is None:
            keras_params = {}

        self.keras_params = keras_params

    def fit(self, X, y):
        if self.multi_class:
            self.n_classes_ = len(set(y))
        else:
            self.n_classes_ = 1

        build_callable = lambda: self.build_function(X.shape[1], self.
                                                     n_classes_)
        keras_params = copy(self.keras_params)
        keras_params['build_fn'] = build_callable

        self.classifier_ = KerasClassifier(**keras_params)
        self.classifier_.fit(X, y)

    def predict(self, X):
        return self.classifier_.predict(X)
示例#15
0
def automl_basic(X_train,
                 X_test,
                 y_train,
                 y_test,
                 baseline,
                 min_neurons,
                 max_neurons,
                 max_layers,
                 num_runs=3):
    accuracy_scores = defaultdict(list)
    for layers_neurons in itertools.product(range(max_layers),
                                            range(min_neurons, max_neurons)):
        layers = layers_neurons[0]
        neurons = layers_neurons[1]
        print("Number of hidden layers", layers)
        for i in range(num_runs):
            deep_broad_model = partial(baseline, neurons, layers)
            estimator = KerasClassifier(build_fn=deep_broad_model,
                                        epochs=100,
                                        batch_size=5,
                                        verbose=0)
            estimator.fit(X_train, y_train)
            y_pred = estimator.predict(X_test)
            accuracy_scores[layers_neurons].append(
                metrics.accuracy_score(y_test, y_pred))
    return accuracy_scores
def trainNNmodel(X, Y):
    # Get the training and test data
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)

    # Function to create the NN model, required for the wrapper
    def create_keras_model():
        model = Sequential()
        model.add(
            Dense(64,
                  input_dim=X.shape[1],
                  kernel_initializer='glorot_normal',
                  activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))
        model.add(
            Dense(128, kernel_initializer='glorot_normal', activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))
        model.add(
            Dense(16, kernel_initializer='glorot_normal', activation='relu'))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer="adam",
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        return model

    # Fit the model
    early_stop = callbacks.EarlyStopping(monitor="accuracy",
                                         patience=50,
                                         mode='max')
    callbacks_list = [early_stop]

    estimator = KerasClassifier(build_fn=create_keras_model,
                                epochs=200,
                                batch_size=12,
                                verbose=0,
                                callbacks=callbacks_list)
    estimator.fit(X_train,
                  y_train,
                  batch_size=12,
                  epochs=200,
                  verbose=1,
                  callbacks=callbacks_list)

    y_pred = estimator.predict(X_test)
    y_pred = [item for sublist in y_pred for item in sublist]
    y_pred_rt = estimator.predict_proba(X_test)[:, 1]

    accuracy = str(accuracy_score(y_test, y_pred))
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_rt)
    auc_value = str(auc(fpr, tpr))
    precision = str(precision_score(y_test, y_pred))
    recall = str(recall_score(y_test, y_pred))
    f1score = str(f1_score(y_test, y_pred, average="weighted"))

    return [
        accuracy, auc_value, precision, recall, f1score, y_test, y_pred,
        y_pred_rt, estimator.model
    ]
示例#17
0
def testing(X_train=[],
            X_test=[],
            V_train=[],
            V_test=[],
            t_train=[],
            t_test=[],
            Y_train=[],
            Y_test=[],
            top_words=9444,
            max_review_length=1000,
            embedding_length=300,
            batch_size=128,
            nb_epoch=100,
            preset={},
            option='lstm'):
    X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
    X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
    if option == 'cnn':
        preset.update({
            'build_fn': cnn_train,
            'top_words': top_words,
            'max_length': max_review_length,
            'embedding_length': embedding_length,
            'batch_size': batch_size,
            'nb_epoch': nb_epoch,
            'verbose': 1
        })
        model = KerasClassifier(**preset)
    elif option == 'lstm':
        preset.update({
            'build_fn': lstm_train,
            'top_words': top_words,
            'max_length': max_review_length,
            'embedding_length': embedding_length,
            'batch_size': batch_size,
            'nb_epoch': nb_epoch,
            'verbose': 1
        })
        model = KerasClassifier(**preset)

    else:
        print("ERROR AT TRAINING PHASE OF TESTING.")

    if option == 'cnn' or option == 'lstm':
        model.fit(X_train, Y_train)
    elif option == 'classic':
        model.fit(
            decay_norm(x=np.array(V_train),
                       t_stamps=t_train,
                       embedding_length=embedding_length,
                       max_review_length=max_review_length)[0], Y_train)

    predict = model.predict(X_test)
    acc = accuracy_score(Y_test, predict)
    f1 = f1_score(Y_test, predict)
    auc = roc_auc_score(Y_test, predict)
    return ({'acc': acc, 'f1': f1, 'auc': auc})
示例#18
0
    def _model_build(self, *arg):
        self._prepare_test_data()
        model = KerasClassifier(
            build_fn=self.create_model, verbose=0)
        optimizers = [
            'adam']
        init = [
            'normal', 'uniform']
        epochs = [
            100, 150]
        batches = [
            5, 10]
        param_grid = dict(
            optimizer=optimizers, nb_epoch=epochs, batch_size=batches, init=init)
        grid = GridSearchCV(
            estimator=model, param_grid=param_grid, cv=5)
        grid_result = grid.fit(
            self.x_train, self.y_train)
        print("Best: %f using %s" % (
            grid_result.best_score_, grid_result.best_params_))
        # means = grid_result.cv_results_[
        #     'mean_test_score']
        # stds = grid_result.cv_results_[
        #     'std_test_score']
        # params = grid_result.cv_results_[
        #     'params']
        # for mean, stdev, param in zip(means, stds, params):
        #     print("%f (%f) with: %r" % (
        # mean,
        # stdev,
        # param))

        # Training
        # with Best
        # Parameter
        model = Sequential()
        model.add(Dense(
            12, input_dim=8, init=grid_result.best_params_['init'], activation='relu'))
        model.add(Dense(
            8, init=grid_result.best_params_['init'], activation='relu'))
        model.add(Dense(
            1, init=grid_result.best_params_['init'], activation='sigmoid'))
        model.compile(loss='binary_crossentropy',
                      optimizer=grid_result.best_params_['optimizer'], metrics=['accuracy'])
        # Compile
        # model
        model.fit(
            self.x_train, self.y_train, nb_epoch=grid_result.best_params_['nb_epoch'], batch_size=grid_result.best_params_['batch_size'])
        yy_pred = model.predict(
            self.x_test)
        self.y_pred = [np.round(
            x) for x in yy_pred]
        self.y_true = self.y_test
        self.prob = model.predict_proba(
            self.x_test)
        self._analyse_result()
示例#19
0
def find_dropout(d_range):
    with open('dump.txt', 'a') as f:
        # res = list()
        for d in d_range:
            X_train, X_test, y_train, y_test = train_test_split(data,
                                                                labels,
                                                                train_size=0.9,
                                                                random_state=0)
            model = KerasClassifier(build_fn=lambda: create_model_dropout(d),
                                    epochs=50,
                                    batch_size=8,
                                    validation_split=0.11,
                                    verbose=1)
            history = model.fit(X_train, y_train)

            preds = model.predict(X_test)
            acc = accuracy_score(y_test, preds)

            plt.figure(figsize=(15, 10))
            plt.plot(history.history['acc'])
            plt.plot(history.history['val_acc'])
            plt.title('model accuracy')
            plt.ylabel('accuracy')
            plt.xlabel('epoch')
            plt.legend(['train', 'validation'], loc='upper left')
            plt.savefig('result/drop_' + str(d) + '_accuracy.png')
            plt.title('Test accuracy: {}'.format(acc))

            plt.figure(figsize=(15, 10))
            plt.plot(history.history['loss'])
            plt.plot(history.history['val_loss'])
            plt.title('model loss')
            plt.ylabel('loss')
            plt.xlabel('epoch')
            plt.legend(['train', 'validation'], loc='upper left')
            plt.savefig('result/drop_' + str(d) + '_loss.png')
            plt.title('Test accuracy: {}'.format(acc))

            del model
            gc.collect(0)
            gc.collect(1)
            gc.collect(2)

            # f.write('{}:{}\n'.format(d, acc))

    plt.figure(figsize=(15, 10))
    plt.plot(d_range, res)
    plt.ylabel('Accuracy')
    plt.xlabel('droptout rate')
    plt.savefig('result/dropout_accuracy.png')

    best_idx = np.argmax(res)
    print 'best accuracy: {}, with dropout: {}'.format(res[best_idx],
                                                       d_range[best_idx])
示例#20
0
def main():
    print("Done loading the libraries")
    t0 = time.time()
    # fix random seed for reproducibility
    seed = 7
    np.random.seed(seed)

    # load dataset
    data = massageData.massageData()
    X, Y = data.getTrain()
    X_dev, Y_dev = data.getDev()

    print("Done load dataset")

    # do some more preprocessing
    # encode class values as integers
    encoder = LabelEncoder()
    # dummy_y will be one-hot encoding of classes
    dummy_y = encode_values(encoder, Y)
    dummy_y_dev = encode_values(encoder, Y_dev)

    print('Dummy_y (should be one vector if class numbers):', dummy_y)

    print("Done preprocessing dataset")

    # build the model
    tensorboard = TensorBoard()
    estimator = KerasClassifier(build_fn=baseline_model,
                                epochs=10,
                                batch_size=500,
                                verbose=1)
    print('Y passing into model: ', dummy_y)
    estimator.fit(X, dummy_y)
    dummy_y_pred_dev = estimator.predict(X_dev)

    print("Dummy y predict (should be one long vector of number): {}".format(
        dummy_y_pred_dev))

    # print("predictions ", estimator.predict(X_dev))
    # print("actual output ", Y_dev)

    print("Done building estimator")

    # kfold = KFold(n_splits=2, shuffle=True, random_state=seed)

    # results = cross_val_score(estimator, X, dummy_y, cv=kfold, verbose=1, fit_params={'callbacks': [tensorboard]})
    t1 = time.time()

    print("Time elapsed: ", t1 - t0)
    # print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

    print("dummy y dev", dummy_y_pred_dev.shape)
    print("Confusion matrix",
          confusion_matrix(Y_dev, decode_values(encoder, dummy_y_pred_dev)))
def KappaLoss(individual, data, labels, layers, activation, *_):

	network = KerasClassifier(build_fn=CreateNeuralNetwork,
		input_size=data['train'].shape[1],
		output_size=2 if len(labels['train'].shape) < 2 else labels['train'].shape[1],
		layers=layers,activation=activation,lr=individual[1],
		dropout=individual[2],epochs=int(individual[0]),verbose=0)
	network.fit(data['train'],labels['train'])
	predictions = network.predict(data['test'])
	score = cohen_kappa_score(predictions,labels['test'].argmax(1))
	return 1 - score
示例#22
0
    def model_run(self):
        classifier = KerasClassifier(build_fn=self.model_build,
                                     epochs=200,
                                     batch_size=5)
        classifier.fit(self.X_train, self.y_train)
        predictions = classifier.predict(self.X_test)
        predictions = np_utils.to_categorical(predictions)
        accu_test = np.sum(self.y_test == predictions) / self.y_test.size

        self.results.write("Model Results\n")
        self.results.write("Model Accuracy:" + str(accu_test) + "\n")
        return classifier
示例#23
0
def cross_validation_classifier(k, training, target):
    #folds
    fold = 100 / k
    fold = fold / 100

    seed = 7
    np.random.seed(seed)

    #build a regressor
    k_model = KerasClassifier(build_fn=neural_network_classifier,
                              epochs=20000,
                              batch_size=30,
                              verbose=0)
    mse = 0
    accuracy = 0
    #  for i in range(k):
    #split
    x_train, x_test, y_train, y_test = train_test_split(training,
                                                        target,
                                                        test_size=fold,
                                                        random_state=seed)

    #fit the model
    k_model.fit(np.array(x_train), np.array(y_train))

    #make a prediction
    y_pred = k_model.predict(np.array(x_test))

    #print comparision
    #for i in range(len(y_pred)):
    #   print(y_pred[i], y_test[i])

    #print mse
    #print('mse: ', mean_squared_error(y_test, y_pred))
    mse += mean_squared_error(y_test, y_pred)

    #prepare for accuracy
    y_pred_round = nearestHalf(y_pred)

    #change data to string values
    y_pred_round = ['%.2f' % score for score in y_pred_round]
    y_test = ['%.2f' % test for test in y_test]

    accuracy += accuracy_score(y_test, y_pred_round)
    #accuracy
    #print ('accuracy: ', round (accuracy_score(y_test, y_pred_round),3)*100, '%')
    #print(i)

    #print('mse: ', (mse/k))
    #print ('accuracy: ', round (accuracy/k,3)*100, '%')
    print('mse: ', mse)
    print('accuracy: ', round(accuracy, 3) * 100, '%')
示例#24
0
def main():
    np.random.seed(seed)
    trainfile = r"/Arun/ML/Practice/Leaves/train.csv"
    testfile = r"/Arun/ML/Practice/Leaves/test.csv"  #File for submission
    traindata = pd.read_csv(trainfile)
    testdata = pd.read_csv(testfile)  #Data for prediction and submission
    print "\nDataset has {1} columns and {0} Rows".format(
        traindata.shape[0], traindata.shape[1])
    print "\nBasic Stats: \n{0}".format(traindata.describe())
    #----Missing Value Treatment----
    tr_mv_vars = getMissingVars(traindata)
    if len(tr_mv_vars) > 0: replaceMVwithMode(traindata, tr_mv_vars)
    te_mv_vars = getMissingVars(testdata)
    if len(te_mv_vars) > 0: replaceMVwithMode(testdata, te_mv_vars)
    #-----X, y assignment-----
    tr_cat_vars = list(getCatVars(traindata))
    tr_con_vars = list(getConVars(traindata))
    te_cat_vars = list(getCatVars(testdata))
    te_con_vars = list(getConVars(testdata))
    target_var = 'species'
    X = traindata.values[:, 2:]
    y = traindata.values[:, 1]  #target_var's all data values
    X_submit = testdata.values[:,
                               1:]  #There's no target_var or y value in this dataset
    submission_ids = testdata['id']
    #-------Encoding----------
    if target_var in tr_cat_vars:
        tr_cat_vars.remove(target_var)
        y, y_classes = encodeY(y)
    if len(tr_cat_vars) > 0: encodeCatVariables(X, tr_cat_vars)
    if len(te_cat_vars) > 0: encodeCatVariables(X_submit, te_cat_vars)
    #--------Scaling--------
    ###X = standardNormalization(X) #Disabled to provide improvised approach via estimators[]. Enable it while using it w/ GridSearchCV.
    #--------Modeling-------
    model = KerasClassifier(build_fn=create_nn_model, verbose=0)
    estimators = []
    estimators.append(('normalization', StandardScaler()))
    estimators.append(
        ('MLP',
         model))  #Multi Layer Perceptron (a.k.a Artificial Neural Network)
    leaf_pipeline = Pipeline(estimators)
    kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
    result = cross_val_score(leaf_pipeline, X, y, cv=kfold, n_jobs=-1)
    print "\nResult: Acc{0}% std({1}%)".format(result.mean() * 100,
                                               result.std() * 100)
    model.fit(X, y)
    y_pred = model.predict(X_submit)
    y_prob = model.predict_proba(X_submit)
    submission = pd.DataFrame(y_prob, index=submission_ids, columns=y_classes)
    submission.to_csv('submit.csv')
示例#25
0
def main():
    code_dir = '/home/john/git/kaggle/OttoGroup/'
    data_dir = '/home/john/data/otto/'
    training_file = 'train.csv'

    os.chdir(code_dir)
    np.random.seed(1337)

    print('Starting script...')

    print('Loading data...')
    X, labels = load_training_data(data_dir, training_file)

    print('Pre-processing...')
    scaler = create_scaler(X)
    X = apply_scaler(X, scaler)
    y, y_onehot, encoder = preprocess_labels(labels)
    num_features = X.shape[1]
    num_classes = y_onehot.shape[1]
    print('Features = ' + str(num_features))
    print('Classes = ' + str(num_classes))

    print('Building model...')
    model = define_model(num_features, num_classes)
    print('Complete.')

    print('Training model...')
    wrapper = KerasClassifier(model)
    wrapper.fit(X, y_onehot, nb_epoch=20)
    print('Complete.')

    print('Training score = ' + str(wrapper.score(X, y_onehot)))

    preds = wrapper.predict(X)
    print('Predictions shape = ' + str(preds.shape))

    proba = wrapper.predict_proba(X)
    print('Probabilities shape = ' + str(proba.shape))

    print('Building ensemble...')
    ensemble = BaggingClassifier(wrapper, n_estimators=3, max_samples=1.0, max_features=1.0)
    print('Complete.')

    print('Training ensemble...')
    ensemble.fit(X, y)
    print('Complete.')

    print('Ensemble score = ' + str(ensemble.score(X, y)))

    print('Script complete.')
示例#26
0
def get_results():
    print("Firing the laz0rs.")
    estimator = KerasClassifier(build_fn=make_model)
    estimator.fit(Xtrain,
                  ytrain,
                  epochs=config['epochs'],
                  batch_size=config['batch_size'],
                  verbose=1)
    print("Classifying / predicting results.")
    ynew = estimator.predict(test.values)
    ynew = np.array(ynew, dtype=np.int32)
    estimates = le.inverse_transform(ynew)
    result = {'id': ids, 'cuisine': estimates}
    nr = pd.DataFrame(data=result)
    nr.to_csv(str(time.time()) + ".csv", index=False)
    def model_run(self):
        """
        Runs the CNN model using the above model_build function and fits/predicts the data using it.
        """
        classifier = KerasClassifier(build_fn=self.model_build,
                                     epochs=200,
                                     batch_size=5,
                                     verbose=0)
        classifier.fit(self.X_train, self.y_train)
        predictions = classifier.predict(self.X_test)
        predictions = np_utils.to_categorical(predictions)
        accu_test = np.sum(self.y_test == predictions) / self.y_test.size

        self.results.write("Model Results\n")
        self.results.write("Model Accuracy:" + str(accu_test) + "\n")
        return classifier
    def get_grid(self, x, y):
        """
        
        :param x:
        :param y:
        :return:
        """
        # define the grid search parameters
        optimizer = ['SGD', 'Adam', 'Adamax']
        init_mode = ['uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform']

        param_grid = dict(epochs=[20], init_mode=init_mode)
        model = KerasClassifier(build_fn=self.create_model, batch_size=20)
        # history = LossHistory()
        #
        grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=2)
        # # callbacks = [ModelCheckpoint("mlp_tanaya_10_gs.hdf5", monitor="val_loss", save_best_only=True)]

        grid_result = grid.fit(x, y, callbacks=[
            ModelCheckpoint("mlp_tanaya_10_gs.hdf5", monitor="val_loss", save_best_only=True)])
        print("grid", grid_result.best_estimator_)
        model = grid_result.best_estimator_
        # # summarize results
        print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
        means = grid_result.cv_results_['mean_test_score']
        stds = grid_result.cv_results_['std_test_score']
        params = grid_result.cv_results_['params']

        for mean, stdev, param in zip(means, stds, params):
            print("%f (%f) with: %r" % (mean, stdev, param))

        y_pred = model.predict(np.load('x_test.npy'))
        print(y_pred)
        print("unique", len(np.unique(y_pred)))

        y_test = np.argmax(np.load('y_test.npy'), axis=1)
        print(y_test)

        # %% ------------------------------------------ Final test# -------------------------------------------------------------
        # print("Final accuracy on validations set:", 100 * grid_result.evaluate(x_test, y_test)[1], "%")
        print("Cohen Kappa", cohen_kappa_score(y_pred, y_test))
        print("F1 score",
              f1_score(y_pred, y_test, average='macro'))

        return model
def cross_validation(training_sample_size: int = None):
    # Load images data
    X_train, X_test, y_train, y_test = utils.prepare_data()

    X_train, y_train = sample_training_data(X_train, y_train,
                                            training_sample_size)

    X_train, X_test = preprocess.process_data(X_train, X_test)

    model = KerasClassifier(build_fn=get_model,
                            input_shape=X_train.shape[1],
                            num_classes=10,
                            verbose=0)

    param_dist = {
        'num_units': [10, 20, 30, 40],
        'num_layers': [1, 2],
        'epochs': [50, 100, 150],
        'activation': ['relu', 'sigmoid']
    }

    # to quickly test if your code is working, set `n_iter` to be a small number, e.g., 1
    additional_training_params = {
        'callbacks': [EarlyStopping(monitor='val_loss', patience=1)],
        'batch_size': 32,
        'validation_split': 0.2
    }
    search_cv = RandomizedSearchCV(model, param_dist, n_iter=5, refit=True)
    search_cv.fit(X_train, y_train, **additional_training_params)
    # search_cv.best_estimator_
    utils.report_parameter_tuning(search_cv.cv_results_, n_top=5)

    model = search_cv.best_estimator_
    predictions = model.predict(X_test)

    utils.evaluate_prediction(predictions=predictions, y_test=y_test)
示例#30
0
class FinalModelATC(BaseEstimator, TransformerMixin):
    def __init__(self,
                 model,
                 model_name=None,
                 ml_for_analytics=False,
                 type_of_estimator='classifier',
                 output_column=None,
                 name=None,
                 scoring_method=None,
                 training_features=None,
                 column_descriptions=None):

        self.model = model
        self.model_name = model_name
        self.ml_for_analytics = ml_for_analytics
        self.type_of_estimator = type_of_estimator
        self.name = name
        self.training_features = training_features
        self.column_descriptions = column_descriptions

        if self.type_of_estimator == 'classifier':
            self._scorer = scoring_method
        else:
            self._scorer = scoring_method

    def fit(self, X, y):
        self.model_name = get_name_from_model(self.model)

        # if self.model_name[:3] == 'XGB' and scipy.sparse.issparse(X):
        #     ones = [[1] for x in range(X.shape[0])]
        #     # Trying to force XGBoost to play nice with sparse matrices
        #     X_fit = scipy.sparse.hstack((X, ones))

        # else:

        X_fit = X

        if self.model_name[:12] == 'DeepLearning' or self.model_name in [
                'BayesianRidge', 'LassoLars', 'OrthogonalMatchingPursuit',
                'ARDRegression', 'Perceptron', 'PassiveAggressiveClassifier',
                'SGDClassifier', 'RidgeClassifier', 'LogisticRegression'
        ]:
            if scipy.sparse.issparse(X_fit):
                X_fit = X_fit.todense()

            if self.model_name[:12] == 'DeepLearning':
                if keras_installed:

                    # For Keras, we need to tell it how many input nodes to expect, which is our num_cols
                    num_cols = X_fit.shape[1]

                    model_params = self.model.get_params()
                    del model_params['build_fn']

                    if self.type_of_estimator == 'regressor':
                        self.model = KerasRegressor(
                            build_fn=utils_models.make_deep_learning_model,
                            num_cols=num_cols,
                            **model_params)
                    elif self.type_of_estimator == 'classifier':
                        self.model = KerasClassifier(
                            build_fn=utils_models.
                            make_deep_learning_classifier,
                            num_cols=num_cols,
                            **model_params)
                else:
                    print(
                        'WARNING: We did not detect that Keras was available.')
                    raise TypeError(
                        'A DeepLearning model was requested, but Keras was not available to import'
                    )

        try:
            if self.model_name[:12] == 'DeepLearning':

                print(
                    'Stopping training early if we have not seen an improvement in training accuracy in 25 epochs'
                )
                from keras.callbacks import EarlyStopping
                early_stopping = EarlyStopping(monitor='loss',
                                               patience=25,
                                               verbose=1)
                self.model.fit(X_fit, y, callbacks=[early_stopping])

            else:
                self.model.fit(X_fit, y)

        except TypeError as e:
            if scipy.sparse.issparse(X_fit):
                X_fit = X_fit.todense()
            self.model.fit(X_fit, y)

        except KeyboardInterrupt as e:
            pass

        return self

    def remove_categorical_values(self, features):
        clean_features = set([])
        for feature in features:
            if '=' not in feature:
                clean_features.add(feature)
            else:
                clean_features.add(feature[:feature.index('=')])

        return clean_features

    def verify_features(self, X, raw_features_only=False):

        if self.column_descriptions is None:
            print(
                'This feature is not enabled by default. Depending on the shape of the training data, it can add hundreds of KB to the saved file size.'
            )
            print(
                'Please pass in `ml_predictor.train(data, verify_features=True)` when training a model, and we will enable this function, at the cost of a potentially larger file size.'
            )
            warnings.warn(
                'Please pass verify_features=True when invoking .train() on the ml_predictor instance.'
            )
            return None

        print(
            '\n\nNow verifying consistency between training features and prediction features'
        )
        if isinstance(X, dict):
            prediction_features = set(X.keys())
        elif isinstance(X, pd.DataFrame):
            prediction_features = set(X.columns)

        # If the user passed in categorical features, we will effectively one-hot-encode them ourselves here
        # Note that this assumes we're using the "=" as the separater in DictVectorizer/DataFrameVectorizer
        date_col_names = []
        categorical_col_names = []
        for key, value in self.column_descriptions.items():
            if value == 'categorical' and 'day_part' not in key:
                try:
                    # This covers the case that the user passes in a value in column_descriptions that is not present in their prediction data
                    column_vals = X[key].unique()
                    for val in column_vals:
                        prediction_features.add(key + '=' + str(val))

                    categorical_col_names.append(key)
                except:
                    print(
                        '\nFound a column in your column_descriptions that is not present in your prediction data:'
                    )
                    print(key)

            elif 'day_part' in key:
                # We have found a date column. Make sure this date column is in our prediction data
                # It is outside the scope of this function to make sure that the same date parts are available in both our training and testing data
                raw_date_col_name = key[:key.index('day_part') - 1]
                date_col_names.append(raw_date_col_name)

            elif value == 'output':
                try:
                    prediction_features.remove(key)
                except KeyError:
                    pass

        # Now that we've added in all the one-hot-encoded categorical columns (name=val1, name=val2), remove the base name from our prediction data
        prediction_features = prediction_features - set(categorical_col_names)

        # Get only the unique raw_date_col_names
        date_col_names = set(date_col_names)

        training_features = set(self.training_features)

        # Remove all of the transformed date column feature names from our training data
        features_to_remove = []
        for feature in training_features:
            for raw_date_col_name in date_col_names:
                if raw_date_col_name in feature:
                    features_to_remove.append(feature)
        training_features = training_features - set(features_to_remove)

        # Make sure the raw_date_col_name is in our training data after we have removed all the transformed feature names
        training_features = training_features | date_col_names

        # MVP means ignoring text features
        print_nlp_warning = False
        nlp_example = None
        for feature in training_features:
            if 'nlp_' in feature:
                print_nlp_warning = True
                nlp_example = feature
                training_features.remove(feature)

        if print_nlp_warning == True:
            print('\n\nWe found an NLP column in the training data')
            print(
                'verify_features() currently does not support checking all of the values within an NLP column, so if the text of your NLP column has dramatically changed, you will have to check that yourself.'
            )
            print(
                'Here is one example of an NLP feature in the training data:')
            print(nlp_example)

        training_not_prediction = training_features - prediction_features

        if raw_features_only == True:
            training_not_prediction = self.remove_categorical_values(
                training_not_prediction)

        if len(training_not_prediction) > 0:

            print(
                '\n\nHere are the features this model was trained on that were not present in this prediction data:'
            )
            print(sorted(list(training_not_prediction)))
        else:
            print(
                'All of the features this model was trained on are included in the prediction data'
            )

        prediction_not_training = prediction_features - training_features
        if raw_features_only == True:
            prediction_not_training = self.remove_categorical_values(
                prediction_not_training)

        if len(prediction_not_training) > 0:

            # Separate out those values we were told to ignore by column_descriptions
            ignored_features = []
            for feature in prediction_not_training:
                if self.column_descriptions.get(feature, 'False') == 'ignore':
                    ignored_features.append(feature)
            prediction_not_training = prediction_not_training - set(
                ignored_features)

            print(
                '\n\nHere are the features available in the prediction data that were not part of the training data:'
            )
            print(sorted(list(prediction_not_training)))

            if len(ignored_features) > 0:
                print(
                    '\n\nAdditionally, we found features in the prediction data that we were told to ignore in the training data'
                )
                print(sorted(list(ignored_features)))

        else:
            print(
                'All of the features in the prediction data were in this model\'s training data'
            )

        print('\n\n')
        return {
            'training_not_prediction': training_not_prediction,
            'prediction_not_training': prediction_not_training
        }

    def score(self, X, y, verbose=False):
        # At the time of writing this, GradientBoosting does not support sparse matrices for predictions
        if (self.model_name[:16] == 'GradientBoosting' or self.model_name in [
                'BayesianRidge', 'LassoLars', 'OrthogonalMatchingPursuit',
                'ARDRegression'
        ]) and scipy.sparse.issparse(X):
            X = X.todense()

        if self._scorer is not None:
            if self.type_of_estimator == 'regressor':
                return self._scorer.score(self, X, y)
            elif self.type_of_estimator == 'classifier':
                return self._scorer.score(self, X, y)

        else:
            return self.model.score(X, y)

    def predict_proba(self, X, verbose=False):

        # if self.model_name[:3] == 'XGB' and scipy.sparse.issparse(X):
        #     ones = [[1] for x in range(X.shape[0])]
        #     # Trying to force XGBoost to play nice with sparse matrices
        #     X = scipy.sparse.hstack((X, ones))

        if (self.model_name[:16] == 'GradientBoosting' or self.model_name[:12]
                == 'DeepLearning' or self.model_name in [
                    'BayesianRidge', 'LassoLars', 'OrthogonalMatchingPursuit',
                    'ARDRegression'
                ]) and scipy.sparse.issparse(X):
            X = X.todense()

        try:
            predictions = self.model.predict_proba(X)

        except AttributeError as e:
            # print('This model has no predict_proba method. Returning results of .predict instead.')
            try:
                predictions = self.model.predict(X)
            except TypeError as e:
                if scipy.sparse.issparse(X):
                    X = X.todense()
                predictions = self.model.predict(X)

        except TypeError as e:
            if scipy.sparse.issparse(X):
                X = X.todense()
            predictions = self.model.predict_proba(X)

        # If this model does not have predict_proba, and we have fallen back on predict, we want to make sure we give results back in the same format the user would expect for predict_proba, namely each prediction is a list of predicted probabilities for each class.
        # Note that this DOES NOT WORK for multi-label problems, or problems that are not reduced to 0,1
        # If this is not an iterable (ignoring strings, which might be iterable), then we will want to turn our predictions into tupled predictions
        if not (hasattr(predictions[0], '__iter__')
                and not isinstance(predictions[0], str)):
            tupled_predictions = []
            for prediction in predictions:
                if prediction == 1:
                    tupled_predictions.append([0, 1])
                else:
                    tupled_predictions.append([1, 0])
            predictions = tupled_predictions

        # This handles an annoying edge case with libraries like Keras that, for a binary classification problem, with return a single predicted probability in a list, rather than the probability of both classes in a list
        if len(predictions[0]) == 1:
            tupled_predictions = []
            for prediction in predictions:
                tupled_predictions.append([1 - prediction[0], prediction[0]])
            predictions = tupled_predictions

        if X.shape[0] == 1:
            return predictions[0]
        else:
            return predictions

    def predict(self, X, verbose=False):

        # if self.model_name[:3] == 'XGB' and scipy.sparse.issparse(X):
        #     ones = [[1] for x in range(X.shape[0])]
        #     # Trying to force XGBoost to play nice with sparse matrices
        #     X_predict = scipy.sparse.hstack((X, ones))

        if (self.model_name[:16] == 'GradientBoosting' or self.model_name[:12]
                == 'DeepLearning' or self.model_name in [
                    'BayesianRidge', 'LassoLars', 'OrthogonalMatchingPursuit',
                    'ARDRegression'
                ]) and scipy.sparse.issparse(X):
            X_predict = X.todense()

        else:
            X_predict = X

        prediction = self.model.predict(X_predict)
        # Handle cases of getting a prediction for a single item.
        # It makes a cleaner interface just to get just the single prediction back, rather than a list with the prediction hidden inside.
        if len(prediction) == 1:
            return prediction[0]
        else:
            return prediction
示例#31
0
    # plt.plot(c,LRscore,'bx-')
    # plt.xlabel('penalty')
    # plt.ylabel('validation score')
    # plt.title('LR Model selection')
    # plt.show()
    # #logisticModel = LogisticRegression(penalty='l2')
    # #scores[1] = cross_val_score(logisticModel,train_data,label_data,cv=5)
    #
    #test model 3 : Neutral network
    #NNModel = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(5000,100), random_state=1,max_iter=500)
    tbCallback = TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)
    NNModel = KerasClassifier(build_fn=create_model,epochs=1200, batch_size=150,verbose=0)
    cv = ShuffleSplit(n_splits=1, test_size=0.3, random_state=0)
    #NNscore = cross_val_score(NNModel,train_data,label_data,fit_params={'callbacks': [tbCallback]},cv=cv)
    NNModel.fit(train_data,label_data)
    prediction = NNModel.predict(test_data)
    prediction = np.array(prediction)
    print(prediction)
    np.savetxt("prediction.csv", prediction, delimiter=",")
    #print('MLPClassifier validation score : ',NNscore)


    #test model 4 : SVM
    # c = [1]
    # SVMscore = np.zeros(len(c))
    # j = 0
    # for i in c:
    #     svmModel = SVC(C=i,kernel='linear')
    #     SVMscore[j] = np.mean(cross_val_score(svmModel,train_data,label_data,cv=5))
    #     j = j+1
    # print(c)
示例#32
0
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(nb_classes))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model


# 데이터 읽어 들이기
# data = json.load(open("./data-mini.json"))
data = json.load(open("./data.json"))
x = data["X"]
y = data["Y"]

# 학습하기
x_train, x_test, y_train, y_test = train_test_split(x, y)
y_train = np_utils.to_categorical(y_train, nb_classes)
print(len(x_train), len(y_train))
model = KerasClassifier(build_fn=build_model,
                        epochs=epochs,
                        batch_size=batch_size)
model.fit(x_train, y_train)

# 예측하기
pre = model.predict(x_test)
ac_score = metrics.accuracy_score(y_test, pre)
cl_report = metrics.classification_report(y_test, y)
print("정답률=", ac_score)
print("리포트=\n", cl_report)
示例#33
0
        optimizer='adam',
        metrics=['accuracy'])
    return model

# データを読み込み --- (※2)
data = json.load(open("./newstext/data-mini.json"))
#data = json.load(open("./newstext/data.json"))
X = data["X"] # テキストを表すデータ
Y = data["Y"] # カテゴリデータ
# 最大単語数を指定
max_words = len(X[0])

# 学習 --- (※3)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)
Y_train = np_utils.to_categorical(Y_train, nb_classes)
print(len(X_train),len(Y_train))
model = KerasClassifier(
    build_fn=build_model, 
    nb_epoch=nb_epoch, 
    batch_size=batch_size)
model.fit(X_train, Y_train)

# 予測 --- (※4)
y = model.predict(X_test)
ac_score = metrics.accuracy_score(Y_test, y)
cl_report = metrics.classification_report(Y_test, y)
print("正解率=", ac_score)
print("レポート=\n", cl_report)


the number of epochs and the batch size.
We pass the number of training epochs to the KerasClassifier, again using
reasonable default values. Verbose output is also turned off given that the
model will be created 10 times for the 10-fold cross validation being
performed.
"""
# Rescale our data
# evaluate baseline model with standardized dataset
estimator =  KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=1)


"""
We are going to use scikit-learn to evaluate the model using stratified k-fold
cross validation. This is a resampling technique that will provide an estimate
of the performance of the model. It does this by splitting the data into
k-parts, training the model on all parts except one which is held out as a test
set to evaluate the performance of the model. This process is repeated k-times
and the average score across all constructed models is used as a robust
estimate of performance. It is stratified, meaning that it will look at the
output values and attempt to balance the number of instances that belong to
each class in the k-splits of the data.
"""
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

estimator.fit(X, Y)
prediction = estimator.predict(X)
print("Real: {}".format(Y))
print("Predicted: {}".format(prediction))
    model.add(Dropout(0.7))   
    model.add(Dense(nb_classes))
    model.add(Activation('softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


# In[43]:

estimator = KerasClassifier(build_fn=baseline_model, nb_epoch=nb_epoch, batch_size=batch_size, verbose=1)
estimator.fit(X_train_mat, y_train_cat)


# In[44]:

predictions = estimator.predict(X_test_mat)
print(set(predictions))
print(encoder.inverse_transform(predictions))


# In[45]:

print 'macro f1:', f1_score(encoded_Y_test, predictions, average='macro')


# In[ ]:



def create_model():
    model = Sequential()
    model.add(SimpleRNN(X_train.shape[1], input_dim=X_train.shape[1]))
    model.add(Activation('relu'))
    model.add(SimpleRNN(20000))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(SimpleRNN(nb_classes))
    model.add(Activation('softmax'))
    model.compile(loss=loss, optimizer=optim, metrics=['accuracy'])
    return model


classifier = KerasClassifier(build_fn=create_model, nb_epoch=nb_epoch, batch_size=batch_size)
history = classifier.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch)
Y_pred = classifier.predict(X_test, batch_size=batch_size)

print(classification_report(y_true=Y_test, y_pred=Y_pred))

plt.figure()
plt.plot(history.history['acc'])
plt.title('Genauigkeit')
plt.ylabel('Genauigkeit')
plt.xlabel('Epoche')
plt.legend(['Training', 'Test'], loc='upper left')
plt.savefig("data/acc.png")

# summarize history for loss
plt.figure()
plt.plot(history.history['loss'])
plt.title('Loss Werte')
model.add(Activation('relu'))
model.add(Dense(50, 10))
model.add(Activation('softmax'))

print('Creating wrapper')
classifier = KerasClassifier(model)

print('Fitting model')
classifier.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch)

print('Testing score function')
score = classifier.score(X_train, Y_train)
print('Score: ', score)

print('Testing predict function')
preds = classifier.predict(X_test)
print('Preds.shape: ', preds.shape)

print('Testing predict proba function')
proba = classifier.predict_proba(X_test)
print('Proba.shape: ', proba.shape)

print('Testing get params')
print(classifier.get_params())

print('Testing set params')
classifier.set_params(optimizer='sgd', loss='mse')
print(classifier.get_params())

print('Testing attributes')
print('Classes')