示例#1
0
def parameter_search(x_train, y_train, x_test, y_test):
    model = KerasClassifier(build_fn=neural_network_model,
                            batch_size=64,
                            epoch=50,
                            verbose=0)
    learning_rate = np.arange(0.001, 0.01, 0.001)
    momentum = np.arange(0.4, 0.9, 0.05)
    param_test1 = dict(lr=learning_rate, momentum=momentum)
    grid_search1 = GridSearchCV(estimator=model,
                                param_grid=param_test1,
                                n_jobs=-1,
                                cv=5)  # scoring='accuracy',
    grid_result1 = grid_search1.fit(x_train, y_train)
    print("Best: %f using %s" %
          (grid_result1.best_score_, grid_result1.best_params_))
    means = grid_result1.cv_results_['mean_test_score']
    stds = grid_result1.cv_results_['std_test_score']
    params1 = grid_result1.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params1):
        print("%f (%f) with: %r" % (mean, stdev, param))

    # model = KerasClassifier(build_fn=neural_network_model,
    #                         batch_size=10,
    #                         epoch=120, verbose=0)
    batch_size = np.arange(10, 100, 10)
    epochs = np.arange(60, 130, 20)
    param_test2 = dict(batch_size=batch_size, epochs=epochs)
    grid_search2 = GridSearchCV(estimator=model,
                                param_grid=param_test2,
                                n_jobs=-1)
    grid_result2 = grid_search2.fit(x_train, y_train)
    # summarize results
    print("Best: %f using %s" %
          (grid_result2.best_score_, grid_result2.best_params_))

    nn_model = neural_network_model(
        lr=grid_result1.best_params_['lr'],
        momentum=grid_result1.best_params_['momentum'])
    nn_model.fit(x_train,
                 y_train,
                 batch_size=grid_result2.best_params_['batch_size'],
                 epochs=grid_result2.best_params_['epochs'],
                 validation_data=(x_test, y_test))
    train_result = model.evaluate(x_train, y_train, batch_size=10000)
    print(train_result)
    return nn_model
示例#2
0
                          cv=10,
                          scoring='accuracy')
print results
print results.mean()  #Media

#Desvio Padrao, vai ver quanto os registros estao desviando da media
#Quanto maior o valor do desvio, maior a change de se ter overfitting na rede, e a rede esteja se adaptando muito aos dados
#e em uma base nova, nao predizer bem, porque esta muito adaptada aos dados de treinamento da rede.
#Overfitting ele perde a capacidade de generalizacao, vc sob estima ela. A rede fica viciado nos dados
#Underfitting, vc tem um problema complexo e vc subestima ele.
desvio = results.std()
print desvio

# Caracteristicas: Underfitting Ele tera resultados ruims na base de treinamento,
#e o Overfitting tera otimos resultados na base de treinamento e ruims na de teste
# Para evitar se pode usar o Dropout que zera os valores de uma camada aleatoriamente dentre um percentual definido
#para evitar Overfitting, um vicio da rede nos dados do treinamento

predictations = network.predict(previsores_teste)
predictations = (predictations > 0.5
                 )  #Converte os valores em True/False se maior 0.5

precision = accuracy_score(classes_teste, predictations)
print precision

matrix = confusion_matrix(classes_teste, predictations)
print matrix

result = network.evaluate(previsores_teste, classes_teste)
print result
示例#3
0
    if title is not None:
        plt.title(title)
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Training data', 'Validation data'], loc=0)
    # plt.show()


def plot_loss(history, title=None):
    # summarize history for accuracy
    if not isinstance(history, dict):
        history = history.history

    plt.plot(history['loss'])
    plt.plot(history['val_loss'])
    if title is not None:
        plt.title(title)
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Training data', 'Validation data'], loc=0)
    # plt.show()


plot_acc(history, '(a) 학습 경과에 따른 정확도 변화 추이')
plt.show()
plot_loss(history, '(b) 학습 경과에 따른 손실값 변화 추이')
plt.show()

loss, acc = model.evaluate(x_test, x_test)
print(loss, acc)
示例#4
0
def main():

    ########################
    ### Parse Input Args ###
    ########################
    parser = argparse.ArgumentParser(
        description='CNN classification code implemented using TensorFlow v2.0',
        epilog='https://github.com/azodichr')

    parser.add_argument('-x', help='Feature numpy dataset', required=True)
    parser.add_argument('-y', help='Class/Y numpy dataset', required=True)
    parser.add_argument('-run', help='T/F to run final models', default='t')
    parser.add_argument('-splits',
                        help='Values for train/val/test',
                        default='70,10,20')
    parser.add_argument('-y_name', help='Phenotype Trait')
    parser.add_argument('-f', help='Function: gs, run, full', default='full')
    parser.add_argument('-save', help='Name for Output File', default='test')
    parser.add_argument('-balance',
                        help='t/f to downsample so balance classes',
                        default='t')
    parser.add_argument('-n_channels',
                        help='Number of channels',
                        default=1,
                        type=int)
    parser.add_argument('-cv',
                        help='Number of cross validation folds',
                        type=int,
                        default=5)
    parser.add_argument('-n_jobs',
                        '-p',
                        help='Number of processors for '
                        'parallel computing (max for HPCC = 14)',
                        type=int,
                        default=1)
    parser.add_argument('-save_model',
                        help='T/F if want to save final models',
                        type=str,
                        default='f')
    parser.add_argument('-tag',
                        help='Identifier String to add to RESULTS',
                        type=str,
                        default='cnn')
    parser.add_argument('-save_detailed',
                        help='T/F Save detailed model performance',
                        type=str,
                        default='f')
    parser.add_argument('-original_df',
                        help='DF fed into input_converter.py',
                        type=str,
                        default='')
    parser.add_argument('-imp_m',
                        help='T/F to calculate importance of each motif',
                        type=str,
                        default='f')
    parser.add_argument('-imp_k',
                        help='T/F to calculate importance of each kernel',
                        type=str,
                        default='f')

    # Default Hyperparameters
    parser.add_argument('-params',
                        help='Output from -f gs (i.e. '
                        'SAVE_GridSearch.txt)',
                        default='default')
    parser.add_argument('-actfun',
                        help='Activation function. (relu, sigmoid)',
                        default='relu')
    parser.add_argument('-learn_rate',
                        help='Learning Rate',
                        default=0.01,
                        type=float)
    parser.add_argument('-dropout',
                        help='Dropout rate',
                        default=0.25,
                        type=float)
    parser.add_argument('-l2',
                        help='Shrinkage parameter for L2 regularization',
                        default=0.25,
                        type=float)
    parser.add_argument('-filters',
                        help='Number of Kernels/filters',
                        default=8,
                        type=int)
    parser.add_argument('-optimizer',
                        help='Optimization function to use)',
                        type=str,
                        default='Adam')
    parser.add_argument('-dense',
                        help='Number of nodes in dense layer',
                        type=int,
                        default=16)
    parser.add_argument('-activation',
                        help='Activation function in all but '
                        'last dense layer, which is set to linear',
                        type=str,
                        default='relu')
    parser.add_argument('-n_reps',
                        '-n',
                        help='Number of replicates (unique '
                        'validation set/starting weights for each)',
                        default=100,
                        type=int)
    parser.add_argument('-clip_value',
                        help='Clip Value',
                        type=float,
                        default=0.5)
    parser.add_argument('-patience',
                        help='Patience for Early Stopping',
                        type=int,
                        default=5)
    parser.add_argument('-min_delta',
                        help='Minimum Delta Value for Early '
                        'Stopping',
                        type=float,
                        default=0)

    # Grid Search reps/space
    parser.add_argument('-gs_reps',
                        '-gs_n',
                        help='Number of Grid Search Reps'
                        '(will append results if SAVE_GridSearch.csv exists)',
                        type=int,
                        default=10)
    parser.add_argument('-actfun_gs',
                        help='Activation functions for Grid '
                        'Search',
                        nargs='*',
                        default=['relu', 'selu', 'elu'])
    parser.add_argument('-dropout_gs',
                        help='Dropout rates for Grid Search',
                        nargs='*',
                        type=float,
                        default=[0.0, 0.1, 0.25])
    parser.add_argument('-l2_gs',
                        help='Shrinkage parameters for L2 for Grid '
                        'Search',
                        nargs='*',
                        type=float,
                        default=[0.01, 0.1, 0.25])
    parser.add_argument('-lrate_gs',
                        help='Learning Rate',
                        nargs='*',
                        type=float,
                        default=[0.1, 0.01, 0.001, 0.0001])
    parser.add_argument('-kernels_gs',
                        help='Number of Kernels for Grid Search',
                        default=[4, 8, 16, 24],
                        type=int)

    args = parser.parse_args()
    k_height = 'tmp'
    args.k_len = 'tmp'

    def downsample(x, y):
        unique, counts = np.unique(y_all, return_counts=True)
        smaller_index = list(counts).index(min(counts))
        bigger_index = list(counts).index(max(counts))

        i_smaller = np.where(y_all == unique[smaller_index])[0]
        i_bigger = np.where(y_all == unique[bigger_index])[0]
        downsample_n = len(i_smaller)
        i_bigger_downsampled = np.random.choice(i_bigger,
                                                size=downsample_n,
                                                replace=False)

        i_keep = list(i_smaller) + list(i_bigger_downsampled)
        y = y_all[i_keep]
        x = x_all[i_keep]

        return x, y

    def make_cnn_model(learn_rate=args.learn_rate,
                       filters=args.filters,
                       dropout=args.dropout,
                       dense=args.dense,
                       l2=args.l2,
                       activation=args.activation,
                       optimizer=args.optimizer,
                       units=1):

        if optimizer.lower() == 'adam':
            opt = tf.keras.optimizers.Adam(lr=learn_rate,
                                           clipvalue=args.clip_value)
        elif optimizer.lower() == 'nadam':
            opt = tf.keras.optimizers.Nadam(lr=learn_rate,
                                            clipvalue=args.clip_value)
        elif optimizer.lower() == 'rmsprop':
            opt = tf.keras.optimizers.RMSprop(lr=learn_rate,
                                              clipvalue=args.clip_value)
        elif optimizer.lower() == 'sgdm':
            opt = tf.keras.optimizers.SGD(lr=learn_rate,
                                          decay=1e-6,
                                          clipvalue=args.clip_value,
                                          momentum=0.9,
                                          nesterov=True)

        conv2d_layer = layers.Conv2D(
            filters=filters,
            kernel_size=tuple([k_height, 1]),
            kernel_regularizer=tf.keras.regularizers.l2(l2),
            activation=activation,
            kernel_initializer='glorot_normal',
            input_shape=(n_rows, n_columns, args.n_channels),
            name='conv2d_layer')
        K.clear_session()
        model = models.Sequential()
        model.add(conv2d_layer)
        model.add(layers.Flatten())
        model.add(layers.Dense(dense, activation=activation))
        model.add(layers.Dropout(dropout))
        model.add(layers.Dense(units=1, activation='sigmoid'))
        model.compile(optimizer=opt, loss='binary_crossentropy')

        return model, conv2d_layer

    ##########################
    ### Data preprocessing ###
    ##########################
    x_all = np.load(args.x)
    y_all = np.load(args.y)
    x_all = x_all.reshape(x_all.shape + (args.n_channels, ))

    if args.balance.lower() in ['t', 'true']:
        x, y = downsample(x_all, y_all)

        print('Y shape (down-sampled): %s' % str(y.shape))
        print('X shape (down-sampled): %s' % str(x.shape))
    else:
        y = y_all
        x = x_all

    print("\nSnapshot of feature data for first instance in data set:")
    print(x[0, :, 0:5, 0])
    n = y.shape[0]
    n_rows = x.shape[1]
    n_columns = x.shape[2]

    k_height = x.shape[1]
    args.k_len = 1
    print('Kernel dimensions: ', k_height, args.k_len)

    ###################
    ### Grid Search ###
    ###################

    if args.params.lower() == 'gs':
        print('\n***** Starting Random Search with %i reps using %i testing '
              'instances and %i fold cross-validation *****\n' %
              (args.gs_reps, x.shape[0], args.cv))
        scoring = {'acc': 'accuracy', 'f1': 'f1'}
        param_grid = dict(
            learn_rate=[0.1, 0.01, 0.001],
            filters=[8, 16],
            dense=[8, 16, 32],
            l2=[0.1, 0.25],  #, 0.5],
            dropout=[0.1, 0.25],  #, 0.5],
            activation=["relu"],  #, 'selu', 'elu'],
            optimizer=['RMSprop', 'Adam', 'nadam'])
        model, conv2d_layer = KerasClassifier(build_fn=make_cnn_model,
                                              batch_size=100,
                                              epochs=30,
                                              verbose=0)
        rand_search = RandomizedSearchCV(estimator=model,
                                         param_distributions=param_grid,
                                         cv=args.cv,
                                         n_iter=args.gs_reps,
                                         n_jobs=args.n_jobs,
                                         scoring=scoring,
                                         refit='acc',
                                         verbose=0)
        gs_result = rand_search.fit(x, y)
        gs_result_df = pd.DataFrame.from_dict(gs_result.cv_results_)

        print("Saving Grid Search Results....")
        print(gs_result_df.head())
        with open(args.save + "_GridSearch.txt", 'a') as out_gs:
            gs_result_df.to_csv(out_gs, header=out_gs.tell() == 0, sep='\t')

    print('\n\n Grid Search results saved to: %s_GridSearch.txt\n' % args.save)

    ################
    ### Run final model
    ################

    if args.run.lower() in ['t', 'true']:
        print('####### Running Final Model(s) ###########')

        # Step 1: Define the parameters from the Grid Search or use default
        if args.params.lower() != 'default':
            if args.params.lower() != 'gs':
                gs_result_df = pd.read_csv(args.params, sep='\t')
                gs_result_df.fillna(0, inplace=True)

            gs_mean = gs_result_df.groupby([
                'param_filters', 'param_optimizer', 'param_learn_rate',
                'param_dropout', 'param_l2', 'param_dense', 'param_activation'
            ]).agg({
                'mean_test_acc': 'mean',
                'std_test_acc': 'mean',
                'mean_fit_time': 'count'
            }).reset_index()

            print('Parameter Search Coverage: \nMin: %i\nMean: %3f\nMax:%i' %
                  (gs_mean['mean_fit_time'].min(),
                   gs_mean['mean_fit_time'].mean(),
                   gs_mean['mean_fit_time'].max()))

            if gs_mean['mean_fit_time'].min() == 1:
                print('Dropping parameter combinations with < 2 replicates...')
                gs_mean = gs_mean[gs_mean['mean_fit_time'] >= 2]

            gs_mean = gs_mean.sort_values(by='mean_test_acc', ascending=False)
            print('\nSnapshot of grid search results:')
            print(gs_mean.head())

            args.learn_rate = float(gs_mean['param_learn_rate'].iloc[0])
            args.l2 = float(gs_mean['param_l2'].iloc[0])
            args.dropout = float(gs_mean['param_dropout'].iloc[0])
            args.filters = int(gs_mean['param_filters'].iloc[0])
            args.dense = int(gs_mean['param_dense'].iloc[0])
            args.activation = gs_mean['param_activation'].iloc[0]
            args.optimizer = gs_mean['param_optimizer'].iloc[0]

        print('\n***** Running CNN models ******')
        print('Optimizer: %s\nActivation function:'
              ' %s\nLearning Rate: %4f\nNumber of kernels: '
              '%i\nL2: %4f\nDropout: %4f\nDense nodes: %s\n' %
              (args.optimizer, args.activation, args.learn_rate, args.filters,
               args.l2, args.dropout, args.dense))

        final_results = pd.DataFrame()
        motif_imps = pd.DataFrame()
        kern_imp = []

        for n in range(args.n_reps):
            print("\nReplicate %i/%i" % (n, args.n_reps))
            x, y = downsample(x_all, y_all)
            print(x.shape)

            model, conv2d_layer = make_cnn_model(learn_rate=args.learn_rate,
                                                 optimizer='sgdm',
                                                 filters=args.filters,
                                                 dense=args.dense,
                                                 l2=args.l2,
                                                 dropout=args.dropout,
                                                 activation=args.activation)
            #print(model.summary())

            # Step 3: Split training into training2 and validation
            x_train, x_test, y_train, y_test = train_test_split(x,
                                                                y,
                                                                stratify=y,
                                                                test_size=0.1)
            x_train, x_val, y_train, y_val = train_test_split(x_train,
                                                              y_train,
                                                              stratify=y_train,
                                                              test_size=0.111)
            print('Train on %i, validate on %i, test on %i' %
                  (x_train.shape[0], x_val.shape[0], x_test.shape[0]))

            # Step 4: Define optimizer and early stopping criteria & train
            model.compile(optimizer=args.optimizer,
                          loss='binary_crossentropy',
                          metrics=['accuracy'])

            earlystop_callback = EarlyStopping(monitor='val_loss',
                                               mode='min',
                                               min_delta=args.min_delta,
                                               patience=args.patience,
                                               restore_best_weights=True,
                                               verbose=0)

            model.fit(x_train,
                      y_train,
                      batch_size=50,
                      epochs=1000,
                      verbose=0,
                      callbacks=[earlystop_callback],
                      validation_data=(x_val, y_val))

            train_loss, train_acc = model.evaluate(x_train, y_train)
            val_loss, val_acc = model.evaluate(x_val, y_val)
            test_loss, test_acc = model.evaluate(x_test, y_test)

            val_yhat = model.predict(x_val)
            max_f1 = 0
            best_thresh = 0
            for thr in np.arange(0.01, 1, 0.01):
                thr_pred = val_yhat.copy()
                thr_pred[thr_pred >= thr] = 1
                thr_pred[thr_pred < thr] = 0
                if sum(
                        thr_pred
                ) > 1:  # Eliminates cases where all predictions are negative and the f1 and auROC are undefined
                    f1 = f1_score(y_val, thr_pred,
                                  pos_label=1)  # Returns F1 for positive class
                    if f1 >= max_f1:
                        max_f1 = f1
                        best_thresh = thr
            print('Threshold for F1 measure: %3f' % best_thresh)

            # Calculate AUC-ROC and F-measure from train, val, and test.
            yhat_train = model.predict(x_train)
            train_auroc = roc_auc_score(y_train, yhat_train)
            yhat_train[yhat_train >= best_thresh] = 1
            yhat_train[yhat_train < best_thresh] = 0
            train_f1 = f1_score(y_train, yhat_train, pos_label=1)

            yhat_val = model.predict(x_val)
            val_auroc = roc_auc_score(y_val, yhat_val)
            yhat_val[yhat_val >= best_thresh] = 1
            yhat_val[yhat_val < best_thresh] = 0
            val_f1 = f1_score(y_val, yhat_val, pos_label=1)

            yhat_test = model.predict(x_test)
            test_auroc = roc_auc_score(y_test, yhat_test)
            yhat_test[yhat_test >= best_thresh] = 1
            yhat_test[yhat_test < best_thresh] = 0
            test_f1 = f1_score(y_test, yhat_test, pos_label=1)

            if args.save_model.lower() in ['t', 'true']:
                model.save(args.save + '_model_' + str(n) + '.h5')

            final_results = final_results.append(
                {
                    'ID': args.save,
                    'Tag': args.tag,
                    'Rep': n,
                    'X_file': args.x,
                    'Y_file': args.y,
                    'ActFun': args.activation,
                    'dropout': args.dropout,
                    'L2': args.l2,
                    'LearnRate': args.learn_rate,
                    'Optimizer': args.optimizer,
                    'n_Kernels': args.filters,
                    'F1_threshold': best_thresh,
                    'n_Dense': args.dense,
                    'Acc_train': train_acc,
                    'Loss_train': train_loss,
                    'auROC_train': train_auroc,
                    'F1_train': train_f1,
                    'Acc_val': val_acc,
                    'Loss_val': val_loss,
                    'auROC_val': val_auroc,
                    'F1_val': val_f1,
                    'Acc_test': test_acc,
                    'Loss_test': test_loss,
                    'auROC_test': test_auroc,
                    'F1_test': test_f1
                },
                ignore_index=True)

            ##########################
            ## Model Interpretation ##
            ##########################

            if (args.imp_m.lower() in ['t', 'true']
                    or args.imp_k.lower() in ['t', 'true']):
                # Step 1: Read in x data meta data
                key = pd.read_csv(
                    args.original_df,
                    sep='\t',
                    index_col=0,
                )
                key_index_list = key.columns.str.split('_', expand=True).values
                key.columns = pd.MultiIndex.from_tuples([
                    (x[1], x[0]) for x in key_index_list
                ])
                key = key.sort_index(axis=1)
                motifs = key.columns.levels[0].values
                omic_stack = list(key[list(key.columns.levels[0])[0]])
                omic_stack.append('PA')

                # Calculate Motif importance (zero-out-each-feature)
                if args.imp_m.lower() in ['t', 'true']:
                    motif_imp = np.empty((0, 2))
                    model_mot_imp = model
                    for mx in range(0, x_test.shape[2] - 1):
                        x_test_tmp = np.copy(x_test)
                        x_test_tmp[:, ..., mx, :] = 0
                        yhat_m_imp = model_mot_imp.predict(x_test_tmp)
                        auroc_m_imp = roc_auc_score(y_test, yhat_m_imp)
                        imp_m_auc = test_auroc - auroc_m_imp
                        motif_imp = np.vstack(
                            (motif_imp, np.array([motifs[mx], imp_m_auc])))
                    motif_imp = pd.DataFrame(
                        motif_imp, columns=['motif', 'auROC_test_decrease'])
                    if n == 0:
                        motif_imps = motif_imp
                    else:
                        motif_imps = pd.merge(motif_imps,
                                              motif_imp,
                                              on='motif')

                # Calculate Kernel Importance (zero-out-weights)
                if args.imp_k.lower() in ['t', 'true']:
                    all_weights = model.get_weights()
                    all_weights_2 = all_weights.copy()
                    print(
                        'Performing Leave-One-Kernel-Out importance analysis...'
                    )
                    for kx in range(0, args.filters):
                        orig_weights = all_weights[0][:, :, 0, kx].copy()
                        orig_weights = orig_weights.tolist()
                        orig_weights = [i for l in orig_weights for i in l]
                        conv2d_drop = copy.deepcopy(all_weights)
                        conv2d_drop[0][:, :, 0, kx] = 0.0
                        print(conv2d_drop[0][1, :, 0, 0:10])
                        model_LOKO = tf.keras.models.clone_model(model)
                        model_LOKO.set_weights(weights=conv2d_drop)
                        yhat_k_imp = model_LOKO.predict(x_test)
                        auroc_k_imp = roc_auc_score(y_test, yhat_k_imp)
                        imp_k_auc = test_auroc - auroc_k_imp
                        old = roc_auc_score(y_test, model.predict(x_test))
                        print(old, imp_k_auc)
                        kern_imp.append([n, imp_k_auc, orig_weights])

        if args.imp_m.lower() in ['t', 'true']:
            print('Snapshor ot motif importance scores...')
            motif_imps = motif_imps.set_index('motif')
            motif_imps = motif_imps.apply(pd.to_numeric, errors='coerce')
            motif_imps['mean_imp'] = motif_imps.mean(axis=1)
            motif_imps = motif_imps.sort_values('mean_imp', 0, ascending=False)
            print(motif_imps['mean_imp'].head())
            motif_imps['mean_imp'].to_csv(args.save + "_Motif_imp",
                                          sep="\t",
                                          index=True)

        if args.imp_k.lower() in ['t', 'true']:
            print('\nSnapshot of kernel importance scores:')
            kern_imp = pd.DataFrame(
                kern_imp, columns=['rep', 'auROC_test_decrease', 'kernel'])
            print(kern_imp.head())
            kern_imp.to_csv(args.save + "_Kernel_imp", sep="\t", index=True)

        final_results.to_csv(args.save + "_results.txt", header=True, sep='\t')

        # Save summary of results to RESULTS.txt
        calc_cols = [
            'F1_threshold', 'Acc_train', 'Acc_val', 'Acc_test', 'Loss_train',
            'Loss_val', 'Loss_test', 'auROC_train', 'auROC_val', 'auROC_test',
            'F1_train', 'F1_val', 'F1_test'
        ]
        final_results = final_results.drop(['Rep'], axis=1)
        std = final_results[calc_cols].std(axis=0, skipna=True)
        std = std.add_suffix('_std')
        mean = final_results[calc_cols].mean(axis=0, skipna=True)
        mean = mean.add_suffix('_mean')
        str_cols = final_results.drop(calc_cols, axis=1).iloc[0]
        str_cols = str_cols.append(pd.Series([args.n_reps], index=['Reps']))
        summary = pd.concat([str_cols, mean, std])

        #summary.set_index('index', inplace=True)
        print('\n### Summary of results on test set ###')
        print(summary.filter(like='test_mean', axis=0))
        with open("RESULTS.txt", 'a') as f:
            summary.to_frame().transpose().to_csv(f,
                                                  header=f.tell() == 0,
                                                  sep='\t')

    print('Done!')
示例#5
0
          (grid_result.best_score_, grid_result.best_params_))
    #test_model = create_model()
    #prediction= model.predict(X_test)
    #auc = roc_auc_score(y_test, prediction)
    #print("%s: %.2f%%; AUC = %.2f%%" % (model.metrics_names[1], scores[1]*100, auc))

if args.cv:
    model = create_model(neurons=[20, 10])
    cvscores = []
    AUC = []
    kfold = StratifiedKFold(5, True, 3456)
    for train, test in kfold.split(input, output):
        X_train, X_test, y_train, y_test = input[train], input[test], output[
            train], output[test]
        model.fit(X_train, y_train, epochs=7, batch_size=10, verbose=0)
        scores = model.evaluate(X_test, y_test)
        prediction = model.predict(X_test)
        auc = roc_auc_score(y_test, prediction)
        print("%s: %.2f%%; AUC = %.2f%%" %
              (model.metrics_names[1], scores[1] * 100, auc))
        cvscores.append(scores[1] * 100)
        AUC.append(auc)

    print("Accuracy = %.2f%% (+/- %.2f%%); AUC = %.2f (+/- %.2f)" %
          (np.mean(cvscores), np.std(cvscores), np.mean(AUC), np.std(AUC)))

if args.makeBDT:

    model = XGBClassifier()
    cvscores = []
    AUC = []
示例#6
0
model = Model(inputs=inputs, outputs=outputs)
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])
model.summary()


# checkpoint = ModelCheckpoint(filepath=filepath,
#                              verbose=1,
#                              save_best_only=True)
# lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
#                                cooldown=0,
#                                patience=5,
#                                min_lr=0.5e-6)
# callbacks = [checkpoint, lr_reducer]


if not data_augmentation:
    print('Not using data augmentation.')
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True,
              callbacks=callbacks)
    
    
scores = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])
示例#7
0
"""We have got an accuracy of **97.19** on our training dataset"""

# summarize history for loss
from matplotlib import pyplot as plt
plt.plot(model_history.history['loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

"""We will check for the acuraccy on testing dataset"""

model.evaluate(test_images,to_categorical(test_labels))

"""### Prediction"""

ans=model.predict(test_kaggle)

import numpy as np
ans=np.argmax(ans,axis=1)

ans[:5]

predicted_classes = model.predict_classes(test_kaggle)
submissions=pd.DataFrame({"ImageId": list(range(1,len(predicted_classes)+1)),
                         "Label": predicted_classes})
submissions.to_csv("subbmision2.csv", index=False, header=True)
示例#8
0
grid_search = GridSearchCV(estimator = classifier, param_grid = parameters, scoring = 'accuracy', cv = 10 )
grid_search_result = grid_search.fit(X_train, y_train)
best_parameters = grid_search_result.best_params_
best_score = grid_search_result.best_score_

#Prediction after Tuning the ANN
classifier = Sequential()
classifier.add(Dense(units = 4, activation = 'relu', kernel_initializer = 'uniform', input_dim = 7))
#classifier.add(Dropout(0.1))
classifier.add(Dense(units = 4, activation = 'relu', kernel_initializer = 'uniform'))
#classifier.add(Dropout(0.1))
classifier.add(Dense(units = 4, activation = 'relu', kernel_initializer = 'uniform'))
#classifier.add(Dropout(0.1))
classifier.add(Dense(units = 1, activation = 'sigmoid', kernel_initializer = 'uniform'))
classifier.compile(optimizer = 'rmsprop', loss = 'binary_crossentropy', metrics = ['accuracy'])
classifier.fit(X_train,y_train, batch_size = 32, epochs = 100)
#Evaluate the model
score = classifier.evaluate(X_train, y_train)
print('test loss', score[0])
print('test accuracy', score[1])
#Prediction on Test set
y_pred = classifier.predict(X_test)   
y_pred = (y_pred > 0.4)







示例#9
0
def main():
    file_names = ["dataset/phishing_url.txt", "dataset/cc_1_first_9617_urls"]
    is_phishing = [True, False]
    x, y = convert_urls_to_vector(file_names, is_phishing)
    # 创建模型 for scikit-learn
    model = KerasClassifier(build_fn=create_model, epochs=1, batch_size=100)
    # 10折交叉验证
    kfold = StratifiedKFold(n_splits=2, shuffle=True, random_state=seed)
    # 训练并验证模型,每个epochs(包含150个epoch)后都有验证集去验证模型,总共进行k=10次。
    results = cross_val_score(model, x, y, cv=kfold)
    print(results.mean())
    # model = create_model()
    #训练模型
    model.fit(x, y, batch_size=128, epochs=2, verbose=2)
    scores = model.evaluate(x, y, verbose=2)
    print("scores:", scores)
    print('Accuracy: %.2f%%' % (scores[1] * 100))
    for epoch in range(1, 31):
        # 测试准确率
        vaccuracy = []
        # 训练准确率
        taccuracy = []
        #测试精准率
        precision = []
        #测试召回率
        recall = []
        #测试F1值
        F1 = []
        # train和validation为数据分割后的(数组)索引
        for train, validation in kfold.split(x, y):
            model = create_model()
            print(x[train], y[train])
            history = model.fit(x[train],
                                y[train],
                                epochs=epoch,
                                batch_size=100).history
            # 获取训练数据epoch的准确率
            tac = history["accuracy"][epoch - 1]
            # 评估模型,通过设置verbose(啰嗦的、冗长的)为0,关闭evaluate()函数的详细输出
            vac = model.evaluate(x[validation], y[validation], verbose=0)
            # 输出评估结果
            print('%s: %.2f%%' % (model.metrics_names[1], vac[1]))
            # 获取k折中每折的训练正确率
            taccuracy.append(tac)
            # 获取k折中每折的测试正确率
            vaccuracy.append(vac[1])
            precision.append(vac[2])
            recall.append(vac[3])
            F1.append(vac[4])
        # 输出训练准确率均值和测试准确率均值
        print(np.mean(taccuracy), np.mean(vaccuracy))
        # 获取每个epoch的训练准确率均值和测试准确率均值
        taccuracy_count.append(np.mean(taccuracy))
        vaccuracy_count.append(np.mean(vaccuracy))
        precision_count.append(np.mean(precision))
        recall_count.append(np.mean(recall))
        F1_count.append(np.mean(F1))
    f = open(r"E:\daima-sx\test2\result\evaluating_indicator_my", "w+")
    f.writelines('taccuracy_count' + str(taccuracy_count) + '\n')
    f.writelines('vaccuracy_count' + str(vaccuracy_count) + '\n')
    f.writelines('precision_count' + str(precision_count) + '\n')
    f.writelines('recall_count' + str(recall_count) + '\n')
    f.writelines('F1_count' + str(F1_count) + '\n')
    f.close()
# define the grid search parameters
optimizer = ['Adagrad', 'Adadelta', 'Adam']
param_grid = dict(opt=optimizer)

# search the grid
grid = GridSearchCV(estimator=model, param_grid=param_grid, verbose=2)
grid_result = grid.fit(X_train, y_train)

print("Best: %f using %s" %
      (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

model = create_model(lyrs=[8], dr=0.2)

print(model.summary())

training = model.fit(X_train,
                     y_train,
                     epochs=50,
                     batch_size=32,
                     validation_split=0.2,
                     verbose=0)

# evaluate the model
scores = model.evaluate(X_train, y_train)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
示例#11
0
ltk_keras = ltk
model = KerasClassifier(build_fn= ltk_keras.Build_Model, verbose=1)
hyperparameters = ltk_keras.create_hyperparameters()
search = RandomizedSearchCV(estimator=model,
                            param_distributions=hyperparameters,
                            n_iter=10, n_jobs=-1, cv=3, verbose=1)
Max_acc = 0

data_generator = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.02,
    height_shift_range=0.02,
    horizontal_flip=True
)
for i in range(10):
    search.fit(X_list_train[i], Y_list_train[i])
    print(search.best_params_)
    model = ltk_keras.Build_Model(search.best_params_["keep_prob"], search.best_params_['optimizer'], search.best_params_['kernel_regularizer_num'])
    history = model.fit_generator(
        data_generator.flow(X_list_train[i], Y_list_train[i], batch_size=search.best_params_["batch_size"]),
        steps_per_epoch= IMG_ROWS * IMG_COLS * IMG_CHANNELS,
        epochs = 10,
        validation_data = (X_test, Y_test),
        verbose=1
    )
    loss, acc = model.evaluate(X_test, Y_test)
    print("정답률: ", acc)
    if Max_acc < acc:
        Max_acc = acc
print("최종정답률: ", acc)
示例#12
0
grid.fit(sequence_matrix, y_train)

print(
    grid.best_score_,
    grid.best_params_)  #returns the best cross validation score and parameters
# gave the result 0.9844460397224216 {'batch_size': 128, 'epochs': 4}

#train the model with the best epoch and and batch size

inputs = Input(name='inputs', shape=[maximum_length])
layer = Embedding(maximum_word, 50, input_length=maximum_length)(inputs)
layer = LSTM(64)(layer)
layer = Dense(units=256, activation='relu')(layer)
layer = Dropout(0.5)(layer)
layer = Dense(units=1, activation='sigmoid')(layer)
model = Model(inputs=inputs, outputs=layer)
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
#run the model with batch_size:128 and epochs 4
model.fit(sequence_matrix, y_train, batch_size=128, epochs=4)

#test performance on test set
test_sequences = token.texts_to_sequences(x_test)
test_sequences_matrix = sequence.pad_sequences(test_sequences,
                                               maxlen=maximum_length)

loss_and_accuracy = model.evaluate(test_sequences_matrix, y_test)

print(loss_and_accuracy)
示例#13
0
def main():
    ########################
    ### Parse Input Args ###
    ########################
    parser = argparse.ArgumentParser(
        description='Predicting Traits Using Convolutional Neural Networks. \
			See README.md for more information about the pipeline usage.\n'
        'Written by: Emily Bolger\nModified by: Christina Azodi',
        epilog='https://github.com/ShiuLab')

    ## Required
    req_group = parser.add_argument_group(title='REQUIRED INPUT')
    req_group.add_argument(
        '-x',
        help='Feature data. Format options available: '
        ' 1) Matrix with one row for every instance. 2) Directory with one '
        'matrix for every instance with file name matching instance name '
        'provided in -y',
        required=True)
    req_group.add_argument('-y',
                           help='Matrix with label to predict',
                           required=True)
    req_group.add_argument('-test',
                           help='List of instances to use as test set',
                           required=True)
    req_group.add_argument('-save', help='Name for Output File', required=True)

    ## Optional
    inp_group = parser.add_argument_group(title='OPTIONAL INPUT')
    inp_group.add_argument('-feat',
                           help="List of column names in -x to use",
                           default=False)
    inp_group.add_argument(
        '-x_sort',
        help='Method to sort feature (-x) data '
        'by column. Options: (False, alpha, file_with_order, cluster)',
        default=False)
    inp_group.add_argument('-shape',
                           help='Dimension of -x (e.g. for input '
                           'with 4 rows and 6 columns: -shape 4,6)',
                           default='default')
    inp_group.add_argument('-onehot',
                           help='T/F to convert 1xX data into matrix'
                           ' by one-hot encoding',
                           default='F')
    inp_group.add_argument('-onehot_order',
                           help='Order for 1-hot y axis. For '
                           'example: -onehot_order 1,0,-1)',
                           default=False)
    inp_group.add_argument('-y_name', help='Col name to predict', default='Y')
    inp_group.add_argument('-sep', help='Deliminator for X & Y', default='\t')

    # How to run CNN
    inp_group.add_argument('-run',
                           help='T/F to run the final model. If F, will'
                           'only run the grid search if -gs T',
                           default='f')
    inp_group.add_argument('-n',
                           help='Num replicates of model... Different'
                           'train/validation split each replicate',
                           type=int,
                           default=10)
    inp_group.add_argument('-n_jobs',
                           '-p',
                           help='Number of processors for '
                           'parallel computing (max for HPCC = 14)',
                           type=int,
                           default=1)
    inp_group.add_argument('-cv',
                           help='Number of cross validation folds',
                           type=int,
                           default=5)

    # Parameter Selection
    inp_group.add_argument(
        '-params',
        help='How to select parameters. Options: '
        'grid search (gs), default, from XXX_GridSearch.txt (provide path).',
        default='default')
    inp_group.add_argument('-gs_reps',
                           help='Number of combinations of '
                           'parameters to test in the grid search',
                           type=int,
                           default=100)

    # Default CNN parameters
    inp_group.add_argument('-cnn_type',
                           help='CNN architecture. Options: '
                           '(simple, DeepGS)',
                           default="simple")
    inp_group.add_argument('-filters',
                           help='Number of kernels/filters in each '
                           'CNN layer',
                           type=int,
                           default=32)
    inp_group.add_argument('-kernel_l',
                           help='Length of kernel (height '
                           'defaults to the full height of the dataset)',
                           type=int,
                           default=16)
    inp_group.add_argument('-stride_len',
                           help='Stride of Convolution kernels '
                           '(width defaults to 1)',
                           type=int,
                           default=1)
    inp_group.add_argument('-activation',
                           help='Activation function in all but '
                           'last dense layer, which is set to linear',
                           type=str,
                           default='relu')
    inp_group.add_argument('-pool_size',
                           help='Size of max pooling layer filter '
                           '(first number only, second defaults to 1)',
                           type=int,
                           default=8)
    inp_group.add_argument('-optimizer',
                           help='Optimization function to use)',
                           type=str,
                           default='Adam')
    inp_group.add_argument('-dropout',
                           help='Value for Dropout Rate',
                           type=float,
                           default=0.5)
    inp_group.add_argument('-l2',
                           help='Value for L2 regularization',
                           type=float,
                           default=0.2)
    inp_group.add_argument('-learn_rate',
                           help='Value for Learning Rate',
                           type=float,
                           default=0.01)
    inp_group.add_argument('-clip_value',
                           help='Clip Value',
                           type=float,
                           default=0.5)
    inp_group.add_argument('-patience',
                           help='Patience for Early Stopping',
                           type=int,
                           default=10)
    inp_group.add_argument('-min_delta',
                           help='Minimum Delta Value for Early '
                           'Stopping',
                           type=float,
                           default=0)
    inp_group.add_argument('-num_epochs',
                           help='Max number of Epochs',
                           type=int,
                           default=1000)
    inp_group.add_argument('-n_channels',
                           help='Num channels',
                           type=int,
                           default=1)

    # Argument parsing
    args = parser.parse_args()

    if args.shape == 'default':
        tmp = pd.read_csv(args.x, sep=args.sep, index_col=0)
        shape_r, shape_c = 1, tmp.shape[1]
    else:
        shape_r, shape_c = args.shape.strip().split(',')
        shape_r = int(shape_r)
        shape_c = int(shape_c)

    ########################
    ### Parse Input Data ###
    ########################
    print("\n***** Loading Data ******\n")

    # Step 1: Read in x file, if feat file given only keep those features
    if os.path.isfile(args.x):
        x = pd.read_csv(args.x, sep=args.sep, index_col=0)
        x.index = x.index.astype('str')
        instance_order = list(x.index.values)
        with open(args.test) as test_file:
            test_instances = test_file.read().splitlines()
            test_instances = [str(i) for i in test_instances]
            train_val_instances = list(
                set(instance_order) - set(test_instances))
            test_index = [x.index.get_loc(i) for i in test_instances]
            train_val_index = [x.index.get_loc(i) for i in train_val_instances]
        if args.feat:
            with open(args.feat) as f:
                features = f.read().strip().splitlines()
            x = x.loc[:, features]
    elif os.path.isdir(args.x):
        x = ANN.fun.Image2Features(args.x, shape_r, shape_c)
    n_instances = x.shape[0]
    n_feats = x.shape[1]
    print("Total number of instances: %i" % n_instances)
    print("Number of features used: %i" % n_feats)

    # Step 2: Sort x data
    if args.x_sort == 'alpha':
        print('Sorting feature data by column alpha numerically...')
        x = x.reindex(sorted(x.columns), axis=1)
    elif args.x_sort == 'cluster':
        print('Sorting feature data by column using clustering...')
        print('\n\nNOT IMPLEMENTED YET... PROGRESSING WITHOUT SORTING...\n\n')
    else:
        if not args.x_sort:
            print('Using -x in the order provided in -x or in -feat')
        else:
            with open(args.x_sort) as order:
                order_list = order.read().strip().splitlines()
            x = x.loc[:, order_list]
    print('\nSnapshot of input feature data:')
    print(x.head())

    # Step 3: One-hot-encode X if required
    if args.onehot.lower() in ['t', 'true']:
        x_1hot_list = []
        x = x.round(0)
        labels = pd.unique(x.values.ravel())
        ohe = preprocessing.OneHotEncoder(categories='auto', sparse=False)
        for i in range(len(x)):
            x_row = np.array(x.iloc[i, ]).reshape(n_feats, -1)
            oh_matrix = ohe.fit_transform(x_row)
            if oh_matrix.shape[1] < len(labels):
                labels_present = pd.unique(x_row.ravel())
                missing = list(set(labels) - set(labels_present))
                print(
                    "Instance in row %i is has no '%s', so adding by hand..." %
                    (i, missing))
                x_row = np.append(x_row, np.array([missing]), axis=0)
                oh_matrix = ohe.fit_transform(x_row)
                oh_matrix = oh_matrix[:-1, :]
            x_1hot_list.append(oh_matrix)
        x = np.swapaxes(np.array(x_1hot_list), 1, 2)

    data_height = x.shape[1]
    data_width = x.shape[2]
    x = x.reshape((n_instances, data_height, data_width, args.n_channels))
    print("\nShape of feature data used for training/validation/testing:")
    print(x.shape)

    print("\nSnapshot of feature data for first instance in data set:")
    print(x[0, :, :, 0])

    # Step 4: Read in Y data and make sure sorted as in -x
    y = pd.read_csv(args.y, sep=args.sep, index_col=0)
    y.index = y.index.astype('str')
    y = y[[args.y_name]]

    print("\nShape of Label Data:")
    print(y.shape)

    # Step 5: Remove testing data
    x_test = x[test_index, :, :, :]
    x_train = x[train_val_index, :, :, :]
    y_test = y.ix[test_instances]
    y_train = y.ix[train_val_instances]

    ################################
    ### Define CNN architectures ###
    ################################

    def tfp_pearson(y_true, y_pred):
        return tfp.stats.correlation(y_pred, y_true, event_axis=None)

    def make_cnn_model(cnn_type=args.cnn_type,
                       learn_rate=args.learn_rate,
                       filters=args.filters,
                       pool_size=args.pool_size,
                       kernel_l=args.kernel_l,
                       kernel_h=data_height,
                       activation=args.activation,
                       optimizer=args.optimizer,
                       units=1):

        if optimizer.lower() == 'adam':
            opt = tf.keras.optimizers.Adam(lr=learn_rate,
                                           clipvalue=args.clip_value)
        elif optimizer.lower() == 'nadam':
            opt = tf.keras.optimizers.Nadam(lr=learn_rate,
                                            clipvalue=args.clip_value)
        elif optimizer.lower() == 'rmsprop':
            opt = tf.keras.optimizers.RMSprop(lr=learn_rate,
                                              clipvalue=args.clip_value)

        if cnn_type.lower() == 'simple':
            K.clear_session()
            model = models.Sequential()
            model.add(
                layers.Conv2D(
                    filters=filters,
                    kernel_size=tuple([kernel_h, kernel_l]),
                    kernel_regularizer=tf.keras.regularizers.l2(args.l2),
                    strides=tuple([args.stride_len, 1]),
                    activation=activation,
                    kernel_initializer='glorot_normal',
                    input_shape=(data_height, data_width, args.n_channels)))
            model.add(layers.MaxPooling2D(pool_size=tuple([1, pool_size])))
            model.add(layers.Flatten())
            model.add(layers.Dropout(args.dropout))
            model.add(layers.Dense(24, activation=activation))
            model.add(layers.BatchNormalization())
            model.add(layers.Dense(units=units, activation='linear'))
            model.compile(optimizer=opt, loss='mean_squared_error')

        elif cnn_type.lower() == 'deepgs':
            K.clear_session()
            model = models.Sequential()
            model.add(
                layers.Conv2D(filters=filters,
                              kernel_size=tuple([kernel_h, kernel_l]),
                              strides=tuple([args.stride_len, 1]),
                              activation=activation,
                              kernel_initializer='glorot_normal',
                              input_shape=(data_height, data_width,
                                           args.n_channels)))
            model.add(layers.MaxPooling2D(pool_size=tuple([1, pool_size])))
            model.add(
                layers.Conv2D(filters=filters,
                              kernel_size=tuple([1, kernel_l]),
                              strides=tuple([args.stride_len, 1]),
                              activation=activation))
            model.add(layers.MaxPooling2D(pool_size=tuple([1, pool_size])))
            model.add(layers.Dropout(args.dropout))
            model.add(layers.Flatten())
            model.add(layers.Dense(units=24, activation=activation))
            model.add(layers.BatchNormalization())
            model.add(layers.Dropout(args.dropout))
            model.add(layers.Dense(units=units, activation='linear'))
            model.compile(optimizer=opt, loss='mean_squared_error')
        return model

    ####################
    ### Grid Search  ###
    ####################

    if args.params.lower() == 'gs':
        print('\n***** Starting Random Search with %i reps using %i testing '
              'instances and %i fold cross-validation *****\n' %
              (args.gs_reps, x_train.shape[0], args.cv))
        scoring = {
            'neg_mse': 'neg_mean_squared_error',
            'exp_var': 'explained_variance'
        }
        param_grid = dict(learn_rate=[1, 0.1, 0.01, 0.001, 0.0001, 0.00001],
                          filters=[8, 16, 32],
                          kernel_l=[8, 16, 32],
                          pool_size=[4, 8, 16],
                          activation=["relu", "selu", "elu"],
                          optimizer=['RMSprop', 'Adam', 'nadam'],
                          cnn_type=['simple', 'deepgs'])
        model = KerasClassifier(build_fn=make_cnn_model,
                                batch_size=100,
                                epochs=50,
                                verbose=1)
        rand_search = RandomizedSearchCV(estimator=model,
                                         param_distributions=param_grid,
                                         cv=args.cv,
                                         n_iter=args.gs_reps,
                                         n_jobs=args.n_jobs,
                                         verbose=1,
                                         scoring=scoring,
                                         refit='neg_mse')
        gs_result = rand_search.fit(x_train, y_train)
        gs_result_df = pd.DataFrame.from_dict(gs_result.cv_results_)

        print("Saving Grid Search Results....")
        print(gs_result_df.head())
        with open(args.save + "_GridSearch.txt", 'a') as out_gs:
            gs_result_df.to_csv(out_gs, header=out_gs.tell() == 0, sep='\t')

    ########################
    ### Run Final Models ###
    ########################

    if args.run.lower() in ['t', 'true']:

        # Step 1: Define the parameters from the Grid Search or use default
        if args.params.lower() != 'default':
            if args.params.lower() != 'gs':
                gs_result_df = pd.read_csv(args.params, sep='\t')
                gs_result_df.fillna(0, inplace=True)

            gs_mean = gs_result_df.groupby([
                'param_filters', 'param_optimizer', 'param_learn_rate',
                'param_kernel_l', 'param_pool_size', 'param_cnn_type',
                'param_activation'
            ]).agg({
                'mean_test_score': 'mean',
                'std_test_score': 'mean'
            }).reset_index()

            gs_mean = gs_mean.sort_values(by='mean_test_score',
                                          ascending=False)
            print('\nSnapshot of grid search results:')
            print(gs_mean.head())

            args.cnn_type = gs_mean['param_cnn_type'].iloc[0]
            args.pool_size = int(gs_mean['param_pool_size'].iloc[0])
            args.learn_rate = float(gs_mean['param_learn_rate'].iloc[0])
            args.kernel_l = int(gs_mean['param_kernel_l'].iloc[0])
            args.filters = int(gs_mean['param_filters'].iloc[0])
            args.activation = gs_mean['param_activation'].iloc[0]
            args.optimizer = gs_mean['param_optimizer'].iloc[0]

        print('\n***** Running CNN models ******')
        print('CNN Architecture: %s\nOptimizer: %s\nActivation function:'
              ' %s\nLearning Rate: %f\nNumber of kernels: '
              '%i\nKernel shape: [%i, %i]\nPooling Size: [%i, 1]\n' %
              (args.cnn_type, args.optimizer, args.activation, args.learn_rate,
               args.filters, args.kernel_l, data_height, args.pool_size))

        for i in range(args.n):
            print('Rep %i of %i' % (i, args.n))
            run = True

            while run:
                # Step 2: Creating CNN model using Tensorflow
                model = make_cnn_model(cnn_type=args.cnn_type,
                                       learn_rate=args.learn_rate,
                                       optimizer=args.optimizer,
                                       filters=args.filters,
                                       pool_size=args.pool_size,
                                       kernel_l=args.kernel_l,
                                       kernel_h=data_height,
                                       activation=args.activation,
                                       units=1)
                #print(model.summary())

                # Step 3: Split training into training2 and validation
                x_train2, x_val, y_train2, y_val = train_test_split(
                    x_train, y_train, test_size=0.2)
                print('Train on %i, validate on %i, test on %i' %
                      (x_train2.shape[0], x_val.shape[0], x_test.shape[0]))

                # Step 4: Define optimizer and early stopping criteria & train
                model.compile(optimizer=args.optimizer,
                              loss='mean_squared_error',
                              metrics=[tfp_pearson])

                earlystop_callback = EarlyStopping(monitor='val_loss',
                                                   mode='min',
                                                   min_delta=args.min_delta,
                                                   patience=args.patience,
                                                   restore_best_weights=True,
                                                   verbose=1)

                model.fit(x_train2,
                          y_train2,
                          batch_size=100,
                          epochs=args.num_epochs,
                          verbose=1,
                          callbacks=[earlystop_callback],
                          validation_data=(x_val, y_val))

                # Step 5: Apply best model to train, val, test, and report results
                train_mse, train_pcc = model.evaluate(x_train2, y_train2)
                val_mse, val_pcc = model.evaluate(x_val, y_val)
                if val_pcc > 0:
                    run = False
                else:
                    print('\nPCC was negative on valid data.. retraining...')

            test_mse, test_pcc = model.evaluate(x_test, y_test)
            if np.isnan(test_pcc):
                # Still don't know why this happens, but this fixes it...
                print('Recalculating PCC using Numpy...')
                pred = model.predict(x_test).tolist()
                pred2 = [i for sublist in pred for i in sublist]
                test_pcc = np.corrcoef(pred2, y_test[args.y_name].values)[0, 1]

            print('PCC: train, val, and test: %3f, %3f, %3f' %
                  (train_pcc, val_pcc, test_pcc))

            if not os.path.isfile('RESULTS.txt'):
                out = open('RESULTS.txt', 'w')
                out.write(
                    'ID\tX\tY\ty_name\ttest_set\t'
                    'CNN_Type\tLearn_Rate\tMin_Delta\tPatience\tActivation\t'
                    'Optimizer\tKernel_num\tKernel_len\tPooling_Size\tDropout'
                    '\tTrain_mse\tTrain_PCC\tVal_mse\tVal_PCC\tTest_mse\t'
                    'Test_PCC\n')
                out.close()

            out = open('RESULTS.txt', "a")
            out.write(
                '%s\t%s\t%s\t%s\t%s\t'
                '%s\t%f\t%f\t%i\t%s\t%s\t'
                '%i\t%i\t%i\t%f\t%f\t'
                '%f\t%f\t%f\t%f\t%f\n' %
                (args.save, args.x, args.y, args.y_name, args.test,
                 args.cnn_type, args.learn_rate, args.min_delta, args.patience,
                 args.activation, args.optimizer, args.filters, args.kernel_l,
                 args.pool_size, args.dropout, train_mse, train_pcc, val_mse,
                 val_pcc, test_mse, test_pcc))
            out.close()

        print('\nDone!')
    model.compile(loss=l, optimizer=opt, metrics=m)
    return model


model = Build_Model(activation='relu', learning_rate=0.0001, neurons=16)

history = model.fit(train_data,
                    train_labels,
                    epochs=50,
                    validation_split=0.2,
                    verbose=True
                    # ,callbacks= [monitor_val_loss]
                    )

# Test the network against your testing data set
test_loss, mae, test_acc, mse = model.evaluate(test_data, test_labels)
mae


def plot_history(history):
    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Mean Absolute Error')
    plt.plot(history['epoch'],
             history['mean_absolute_error'],
             label='Train Mean Absolute Error')
    plt.plot(history['epoch'],
             history['val_mean_absolute_error'],
             label='Val Mean Absolute Error')
    plt.legend()
    #plt.ylim([0,1])
示例#15
0
# summarize results
#print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

# create model
model = build_model(x_train.shape[1:], y_train.shape[-1], activation[0],
                    learn_rate[0], loss[0], optimizer[0], drop_rate[0])

print('built model..')

# Data Augmentation
datagen = ImageDataGenerator(featurewise_center=True,
                             featurewise_std_normalization=True,
                             rotation_range=0.0,
                             fill_mode='nearest',
                             horizontal_flip=True,
                             vertical_flip=True,
                             rescale=1. / 255,
                             preprocessing_function=None,
                             validation_split=0.25)

datagen.fit(x_train)

model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size[0]),
                    epochs=epochs[0])

# Save model with weights
model.save(data_dir + '/models/yelp_model.h5')

score = model.evaluate(x_test, y_test)
print('Model loss:', score)
#print('Model accuracy:', score)
示例#16
0
def main():
    np.random.seed(7)
    data = np.loadtxt(
        '/home/af1tang/Desktop/Practice Files/pima-indians-diabetes.csv',
        delimiter=',')
    X = data[:, 0:8]
    Y = data[:, 8]

    #build model using KerasClassifier and Gridsearch
    model = KerasClassifier(build_fn=basic_model, verbose=0)
    # define the grid search parameters
    batch_size = [10, 20, 40, 60, 80, 100]
    epochs = [10, 50, 100]
    optimizer = [
        'SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam'
    ]
    learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
    momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
    neurons = [1, 5, 10, 15, 20, 25, 30]
    init_mode = [
        'uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal',
        'glorot_uniform', 'he_normal', 'he_uniform'
    ]
    activation = [
        'softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid',
        'hard_sigmoid', 'linear'
    ]

    param_grid = dict(batch_size=batch_size, nb_epoch=epochs)
    #setup GridSearch w/ cross validation
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
    # Fit the model
    grid_result = grid.fit(X, Y)
    #grid_search results:
    print("Best: %f using %s" %
          (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))

    #otherwise, use:
    model.fit(X, Y, nb_epoch=150, batch_size=10)
    # evaluate the model
    scores = model.evaluate(X, Y)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
    predictions = model.predict(X)
    # round predictions
    predictions = [round(x[0]) for x in predictions]
    acc = accuracy_score(Y, predictions)
    f1 = f1_score(Y, predictions)
    auc = roc_auc_score(Y, predictions)
    scores = [("Accuracy", acc), ("F1 Score", f1), ("AUC Score", auc)]
    for s in scores:
        print("%s: %.2f" % (s[0], s[1]))

    # define 10-fold cross validation test harness
    kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
    cvscores = []
    for train, test in kfold.split(X, Y):
        # create model
        model = basic_model()
        # Fit the model
        model.fit(X[train], Y[train], nb_epoch=150, batch_size=10, verbose=0)
        # evaluate the model
        scores = model.evaluate(X[test], Y[test], verbose=0)
        print("%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
        cvscores.append(scores[1] * 100)
    print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))
示例#17
0
def main(_neurons,
         _activationFunctionHidden,
         _activationFunctionOutput,
         _lossFunction,
         _batchSize,
         _learningRate,
         _numberOfEpochs,
         _writeToCSV=False,
         _hyperparameterTuning=False):
    dataset = np.loadtxt("ROI_dataset.dat")

    #######################################################################
    #                       ** START OF YOUR CODE **
    #######################################################################
    # Setup hyperparameters and neural network
    input_dim = 3  # CONSTANT: Stated in specification

    np.random.shuffle(dataset)
    #numOfRows = int(0.8*dataset.shape[0])
    #output = predict_hidden(dataset[:numOfRows, :])
    #print(output)
    # Separate data columns into x (input features) and y (output)
    x = dataset[:, :input_dim]
    y = dataset[:, input_dim:]

    split_idx = int(0.8 * len(x))

    # Split data by rows into a training set and a validation set. We then augment the training data into the desired proportions
    x_train = x[:split_idx]
    y_train = y[:split_idx]
    # Validation dataset
    x_val = x[split_idx:]
    y_val = y[split_idx:]

    # Apply preprocessing to the data
    x_prep_input = Preprocessor(x_train)
    #y_prep_input = Preprocessor(y_train)

    x_train_pre = x_prep_input.apply(x_train)
    #y_train_pre = y_prep_input.apply(y_train)
    y_train_pre = y_train

    x_val_pre = x_prep_input.apply(x_val)
    #y_val_pre = y_prep_input.apply(y_val)
    y_val_pre = y_val

    seed = 7
    np.random.seed(seed)

    if _hyperparameterTuning == True:
        #create model
        model = KerasClassifier(build_fn=create_model,
                                nb_epoch=_numberOfEpochs,
                                batch_size=_batchSize)

        # Use scikit-learn to grid search - these are all possible paramaters, takes a long time so I only left in few values
        batch_size = [16, 32, 128]  #32
        epochs = [10, 50, 250]  #10, 100, 250, 500, 1000?
        learn_rate = [1e-1, 1e-3, 1e-6]
        neurons = [5, 15, 20, 50]
        hidden_layers = [3, 5, 10, 25]

        param_grid = dict(epochs=epochs,
                          batch_size=batch_size,
                          learn_rate=learn_rate,
                          neurons=neurons,
                          hidden_layers=hidden_layers)

        #perform grid search with 10-fold cross validation
        grid = RandomizedSearchCV(estimator=model,
                                  param_distributions=param_grid,
                                  n_jobs=-1,
                                  cv=10)

        grid_result = grid.fit(x_train_pre, y_train_pre)

        print("Best: %f using %s" %
              (grid_result.best_score_, grid_result.best_params_))
        best_model = grid.best_estimator_.model

        # Evaluate the neural network
        preds = best_model.predict(x_val_pre)
        targets = y_val_pre
        accuracy, confusionMatrix, labelDict = evaluate_architecture(
            targets, preds)

        # Optional: Print results
        print(confusionMatrix)
        for i in range(len(labelDict)):
            key = "label" + str(i + 1)
            print(key, labelDict[key])
        print("Accuracy: ", accuracy)

        # Optional: Append x and y values, to be plotted at the end
        global xValues, yValues
        xValues.append(len(neurons) - 1)
        for i in range(len(labelDict)):
            key = "label" + str(i + 1)
            metric = "f1"
            yValues[i].append(labelDict[key][metric])
            yValues[len(yValues) - 1].append(accuracy)

        filename = 'trained_ROI.pickle'
        pickle.dump(best_model, open(filename, 'wb'))

    else:
        model = create_model()
        history = model.fit(x_train_pre,
                            y_train_pre,
                            batch_size=_batchSize,
                            epochs=numberOfEpochs,
                            verbose=1,
                            validation_data=(x_val_pre, y_val_pre))

        #model.fit(x_train_pre,y_train_pre)
        score = model.evaluate(x_val_pre, y_val_pre, verbose=0)
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])

        # Evaluate the neural network
        preds = model.predict(x_val_pre)
        targets = y_val_pre
        accuracy, confusionMatrix, labelDict = evaluate_architecture(
            targets, preds)

        # Optional: Print results
        print(confusionMatrix)
        for i in range(len(labelDict)):
            key = "label" + str(i + 1)
            print(key, labelDict[key])

        print("Accuracy: ", accuracy)

    #predict hidden dataset using best model
    predictions = predict_hidden(dataset)
    print(predictions)
示例#18
0
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
checkpointer = ModelCheckpoint(filepath='mnist.model.best.hdf5',verbose=1, save_best_only=True)

results = model.fit(X_train_processed_norm, y_train_encoded, batch_size=150, epochs=30,
          validation_split=0.33, callbacks=[checkpointer],
          verbose=0, shuffle=True)


# Test accuracy

# In[17]:


model.load_weights('mnist.model.best.hdf5')
score = model.evaluate(X_test_processed_norm, y_test_encoded, verbose=0)
print('Test accuracy: %f' % score[1])


# ## Convolutional Neural Network

# In[21]:


from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D


# In[22]:


X_train_processed_norm = X_train_processed_norm.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
示例#19
0
clf4.best_estimator_

model_dic = clf4.best_params_
from keras.callbacks import EarlyStopping
early = EarlyStopping(monitor="loss", patience=50, mode="auto")
model = build_network_cnn(model_dic["model__keep_prob"],
                          model_dic["model__optimizer"])
model.fit(x,
          y,
          batch_size=model_dic["model__batch_size"],
          epochs=500,
          callbacks=[early],
          validation_split=0.2)

print("acc:", model.evaluate(x_test, y_test))

# model_dic = clf5.best_params_

# model2.fit(x,y)

# print(model2.score(x_test,y_test))

model_dic = clf6.best_params_
model2 = KNeighborsClassifier(n_neighbors=model_dic["knn__n_neighbors"],
                              weights=model_dic["knn__weights"],
                              leaf_size=model_dic["knn__leaf_size"],
                              algorithm=model_dic["knn__algorithm"])
model2.fit(x, y)

print(model2.score(x_test, y_test))
示例#20
0
model = utils.create_cnn_model(init_mode='he_uniform', activity_regularizer = regularizers.l2(1e-4))

#%% Load CNN to LSTM model
model_best = load_model('C:\\Projects\yelp_analysis\\best_model.h5', compile=False)

#%% Train the model

# Early stopping callback and model checkpoint, which saves the best model dynamically.
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=50)
mc = ModelCheckpoint('best_model.h5', monitor='val_loss', mode='min', save_best_only=True)
callback_list = [es, mc]

training_hist = model.fit(x_train, y_train, batch_size=20, epochs=20, verbose=1, validation_split=0.2, callbacks=callback_list)

result = model.evaluate(x_test, y_test, verbose=1)
print(result)

#%% View history

utils.plot_history(training_hist, 'CNN to LSTM')

#%% Check saved model
model_best.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
result = model_best.evaluate(x_test, y_test, verbose=1)
print(result)
# 90.8% accurate.
#%% Let's compare the vader predictions and my own classifier to the actual
# star scores.

# Set a range of values to test. We'll pull the vader sentiment results and the associated review text together.
示例#21
0
model.add(Dense(20, activation='relu', input_dim=len(elo_data.columns) - 1))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(X_train_elo,
          Y_train_elo,
          epochs=10,
          batch_size=50,
          validation_split=0.2,
          verbose=1)
model.test_on_batch(X_test_elo, Y_test_elo, sample_weight=None)
model.evaluate(X_test_elo, Y_test_elo, verbose=1)
pred = model.predict_classes(X_test_elo, verbose=1)

plot_model(model, to_file='model.png', show_shapes=True)

SVG(model_to_dot(model).create(prog='dot', format='svg'))

print(confusion_matrix(Y_test_elo, pred))
print classification_report(Y_test_elo, pred)
print(accuracy_score(Y_test_elo, pred))
fpr_elo, tpr_elo, thresholds_elo = roc_curve(Y_test_elo, pred)

auc = auc(fpr_elo, tpr_elo)

plt.figure(1)
plt.plot([0, 1], [0, 1], 'k--')
param_grid = dict(batch_size=batch_size, epochs=epochs, verbose=verbose, validation_data=[(validationData, tuneLabels)])
model = KerasClassifier(build_fn=createModel)
clf = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

print("Performing grid search...")
grid_result = clf.fit(trainData, trainLabels)
print("Best: %f using batch_size = %.1f, epochs = %.1f" % (grid_result.best_score_, grid_result.best_params_["batch_size"], grid_result.best_params_["epochs"]))
bestEpochs = grid_result.best_params_["epochs"]
bestBatchSize = grid_result.best_params_["batch_size"]

print("Training model with 'best params': num_epochs = " + str(bestEpochs) + ", batch_size = " + str(bestBatchSize) + ", NUM_UNITS = " + str(NUM_UNITS) + ", dropout = " + str(DROPOUT_RATE))
model = createModel()
model.fit(trainData, trainLabels, batch_size=bestBatchSize, epochs=bestEpochs, validation_data=(validationData, tuneLabels))

print("Evaluating model...")
scores = model.evaluate(testData, testLabels, verbose=0)
predictions = model.predict(testData, verbose=0)
predictions = np.round(predictions)
acc = accuracy_score(testLabels, predictions)

print("Using num_epochs = " + str(bestEpochs) + ", batch_size = " + str(bestBatchSize) + ", NUM_UNITS = " + str(NUM_UNITS) + ", dropout = " + str(DROPOUT_RATE)) 

for metricIter in xrange(0, len(scores)):
    metricName = model.metrics_names[metricIter]
    metricValue = scores[metricIter]
    print("Models's " + metricName + " = " + str(metricValue))
    
target_names = ["credible", "malicious"]

print("Confusion matrix:")
print(classification_report(testLabels, predictions, target_names=target_names))
示例#23
0
          kernel_regularizer=regularizers.l2(0.001))(L)
print('Dense layer is:', L)

model = Model(inputs=sequence_input, outputs=L)

# Optimization and compile
opt = Adam(lr=0.005, beta_1=0.9, beta_2=0.999, decay=0.01)
print('Begin compiling...')
model.compile(loss='categorical_crossentropy', 
              optimizer=opt, 
              metrics=['accuracy'])
model.summary()

# Begin training
model.fit(data_train, 
          Y_train, 
          batch_size=batch_size, 
          epochs=epochs, 
          verbose=2,
          validation_data=(data_val, Y_val))
score = model.evaluate(data_test, Y_test, batch_size=batch_size)
print ('The evaluation is: ', score)

# Evaluate testing set
test_accuracy = grid.score(X_test, y_test)


# Save model
print ('Saving model...')
model.save('CNN-GRU-Turkish corpus-200d')
def LSTM_start(df):

    df, toMultiply = get_dataset(df)
    janela = 24
    print("Dataset Total", df.shape)
    X_train, y_train, X_test, y_test = load_data(df, janela)
    print("X_train",X_train.shape)
    print("y_train",y_train.shape)
    print("X_test", X_test.shape)
    print("y_test", y_test.shape)


    #Duvida
    # Precisa ser chamado executado aqui uma vez que em seguida chamamos o build model?
    #model = build_model(janela)

    #https://github.com/meenavyas/Misc/blob/master/UCICreditCardKerasGridSearch.py
    model=KerasClassifier(build_fn=build_model, verbose=1)

    # define the hyperparameters for grid search
    epochs = [50] #number of epochs
    batch_size = [24] #number of epochs
    

    optimizers = ['RMSprop','adam']
    activation = ['sigmoid', 'relu']
    
     
    
    # How should I use the learning_rate?
    # this is done in that way
    # self.__model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['categorical_accuracy'])
    learning_rate = [0.0001]

    # 3*24 = 1 dia
    # split pelo numero total do dataset
    # Dataset Total (13002, 17)
    # In this position we will check the 180 days
    tm_split = TimeSeriesSplit(n_splits=3)

    param_grid = dict(epochs = epochs,
                  activation=activation,
                  optimizer=optimizers)

    
    #This is the scorer that we have used
    scorer = make_scorer(mean_squared_error)    
    
    #grid = RandomizedSearchCV(estimator = model, cv = tm_split,n_iter=10, param_distributions = param_grid, n_jobs = 1, scoring = scorer,refit=True)

    
    best_rmse=9999
    final_value = ""

    for opt in optimizers:
        for act in activation:          
          for train_index, test_index in tm_split.split(X_train):            
            X_train_s, X_test_s = X_train[train_index], X_train[test_index]
            y_train_s, y_test_s = y_train[train_index], y_train[test_index]

            model = build_model(janela, act, opt)
            #model=KerasClassifier(build_fn=build_model, verbose=1)
            history =  model.fit(X_train_s,y_train_s,batch_size=24,epochs=epochs[0],validation_split = 0.1,verbose= 1)   
            trainScore = model.evaluate(X_test_s, y_test_s, verbose= 0)
            
            p=model.predict(X_test_s)
            pre=np.squeeze(np.asarray(p))
            
            
            print('Train Score in epoch -> %s  (%.5f MSE) (%.5f RMSE)' % (str(epochs[0]),trainScore[0]*toMultiply,math.sqrt(trainScore[0]*toMultiply)))
            if math.sqrt(trainScore[0]*toMultiply)< best_rmse :
              best_rmse = math.sqrt(trainScore[0]*toMultiply)              
              final_value = "Best Score: %f - rmse  using %s" % (best_rmse, " ---- epochs -> " + str(epochs[0])+ " --- optimizer -> " + opt + " --- activation function -> " + act)
              print(final_value)
            

    print(final_value)
示例#25
0
    print("構築")
    model = Sequential()
    model.add(Dense(6, input_dim=2, activation='relu'))
    model.add(Dense(3, activation='relu'))
    model.summary()
    print(model.get_weights())

    #学習
    print("学習")
    model.compile(loss='binary_crossentropy',
                  optimizer='sgd',
                  metrics=['accuracy'])
    return model


model = KerasClassifier(createModel, epochs=200, batch_size=64, verbose=0)

hist = model.fit(X, y)

#print("X_test:",X_test)
#print("↓")
print("推論")
print(model.predict(X_test))
#正答率の計算
print("回答")
print("y_test:", y_test)
print("評価")
score = model.evaluate(X_test, y_test, verbose=0)
print("loss:", score[0])
print("acc :", score[1])
示例#26
0
grid = GridSearchCV(estimator=model,
                    param_grid=param_grid3,
                    n_jobs=1,
                    scoring='f1_weighted',
                    verbose=1)
grid_result = grid.fit(Xtrain_encoded, Ytrain_integer)
print("Best: %f using %s" %
      (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

#final model
model = Sequential()
model.add(Embedding(20000, 100, input_length=MAX_SEQUENCE_LENGTH))
model.add(LSTM(100))
model.add(Dropout(0.3))
model.add(Dense(16, activation='relu'))
model.compile(loss='categorical_crossentropy',
              optimizer='Adam',
              metrics=['accuracy'])
model.fit(Xtrain_encoded,
          Ytrain_encoded,
          validation_split=0.2,
          epochs=20,
          batch_size=16,
          verbose=1)
loss, accuracy = model.evaluate(Xtest_encoded, Ytest, verbose=1)
print('Accuracy: %f' % (accuracy * 100))
示例#27
0
layer_3_weights = classificador.layers[2].get_weights()
layer_3_weights_layer_2 = classificador.layers[2].get_weights()[0]
layer_3_bias = classificador.layers[2].get_weights()[1]
#%% Previsões da rede
#Previsões:
previsoes = classificador.predict(X_test)

# Se definirmos o threshold em 0.1:
previsoes_bit = np.array(list(map(lambda x: 0 if x < 0.1 else 1, previsoes)))

# count number of Trues (Contar quantos são as previsões iguais)
list(previsoes_bit == np.ndarray.ravel(y_test.values)).count(True)

#Divertido para aprender a utilizar map, mas há uma maneira mais fácil:
previsoes_bit = (previsoes > 0.1)
# %% Métricas comuns, acurácia e matriz de confusão
from sklearn.metrics import accuracy_score, confusion_matrix

acuracia = accuracy_score(y_test, previsoes_bit)
matriz_confusao = confusion_matrix(y_test, previsoes_bit)

# %% Relatório de métricas do sklearn
from sklearn.metrics import classification_report

report_metrics = classification_report(y_test, previsoes_bit)
print(report_metrics)

# %%Avaliação do modelo com evaluate: (Função evaluate do próprio keras)
#Retorna o valor da função de perda e o valor da acurácia
resultado = classificador.evaluate(X_test, y_test)
示例#28
0
    # compile the model
    model1.compile(optimizer='adam',
                   loss='binary_crossentropy',
                   metrics=['acc'])
    return model1

    # summarize the model


model1 = KerasClassifier(build_fn=create_model, verbose=0)

print(model1.summary())
# fit the model
model1.fit(x_train, y_train, epochs=10)
# evaluate the model
loss1, accuracy1 = model1.evaluate(x_test, y_test)
print('Accuracy: %f' % (accuracy1 * 100))

batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model1, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(x_test[:250], y_test[:250])
print("Best: %f using %s" %
      (grid_result.best_score_, grid_result.best_params_))

#########

model = Sequential()
e = Embedding(vocab_size,
              100,
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(2,activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
    return model
# print(model.summary())

labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['v1'])

y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size = 0.33, random_state = 42)

batch_size = 32
model = createmodel()
model.fit(X_train, Y_train, epochs = 5, batch_size=batch_size, verbose = 2)
score,acc = model.evaluate(X_test,Y_test,verbose=2,batch_size=batch_size)
print(score)
print(acc)
print(model.metrics_names)


from keras.wrappers.scikit_learn import KerasClassifier
model= KerasClassifier(build_fn=createmodel, verbose=0,epochs=2)

epochs=[1,2]
batch_size=[32,64]
param_grid= dict(epochs=epochs, batch_size=batch_size)

from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result=  grid.fit(X_train,Y_train)
示例#30
0
                            n_jobs=-1,
                            cv=3)
results_neurons = neurons_grid.fit(x_train, y_train)


#######################################################################################################################
def final_model():
    model = keras.Sequential()
    model.add(
        layers.Dense(20,
                     input_dim=8,
                     activation='softplus',
                     kernel_initializer='glorot_normal',
                     kernel_constraint=keras.constraints.maxnorm(2)))
    model.add(layers.Dropout(0.2))
    model.add(
        layers.Dense(1,
                     activation="sigmoid",
                     kernel_initializer='glorot_normal'))
    model.compile(loss="binary_crossentropy",
                  optimizer='Nadam',
                  metrics=["accuracy"])
    return model


model = final_model()
model.fit(x_train, y_train, batch_size=10, epochs=200)
accuracy = model.evaluate(x_train, y_train)
test = model.predict(x_test)
print(f'Accuracy is: {accuracy[1] * 100:.2f} %')