示例#1
0
def do_classification():

    climate.enable_default_logging()
    df = pd.read_csv("glass.csv")
    #recode the class variable to go from 0 through 5
    df["GT"] = map(recode, df["GT"])
    train, valid = train_test_split(df, test_size=0.3)
    train_X = train.ix[:, 0:9].as_matrix()
    train_Y = train.ix[:, 9].as_matrix()
    valid_X = valid.ix[:, 0:9].as_matrix()
    valid_Y = valid.ix[:, 9].as_matrix()
    train_X = train_X.astype('f')
    train_Y = train_Y.astype('i')
    valid_X = valid_X.astype('f')
    valid_Y = valid_Y.astype('i')
    t0 = (train_X, train_Y)
    t1 = (valid_X, valid_Y)

    exp = theanets.Experiment(theanets.Classifier, layers=(9, 18, 18, 6))
    exp.train(t0, t1, algorithm='sgd',\
              learning_rate=1e-4, momentum=0.1,\
              hidden_l1=0.001, weight_l2=0.001)

    cm = confusion_matrix(valid_Y, exp.network.predict(valid_X))

    return cm
    def __init__(
            self,
            rank=10,
            initializer=np.random.randn,
            learning_rate=0.001,
            patience=5,
            l1_penalty=0.05,
            l2_penalty=0.05,
            min_improvement=0.005,
            max_gradient_norm=5,
            optimization_algorithm="adam",
            min_value=None,
            max_value=None,
            verbose=True):
        Solver.__init__(
            self,
            fill_method="zero",
            min_value=min_value,
            max_value=max_value)
        self.rank = rank
        self.initializer = initializer
        self.learning_rate = learning_rate
        self.patience = patience
        self.l1_penalty = l1_penalty
        self.l2_penalty = l2_penalty
        self.max_gradient_norm = max_gradient_norm
        self.optimization_algorithm = optimization_algorithm
        self.min_improvement = min_improvement
        self.verbose = verbose

        if self.verbose:
            climate.enable_default_logging()
示例#3
0
def do_classification():
    
    climate.enable_default_logging()
    df = pd.read_csv("glass.csv")
    #recode the class variable to go from 0 through 5
    df["GT"] = map(recode, df["GT"])
    train, valid = train_test_split(df, test_size = 0.3)
    train_X = train.ix[:, 0:9].as_matrix()
    train_Y = train.ix[:, 9].as_matrix()
    valid_X = valid.ix[:, 0:9].as_matrix()
    valid_Y = valid.ix[:,9].as_matrix()
    train_X = train_X.astype('f')
    train_Y = train_Y.astype('i')
    valid_X = valid_X.astype('f')
    valid_Y = valid_Y.astype('i')
    t0 = (train_X, train_Y)
    t1 = (valid_X, valid_Y)

    exp = theanets.Experiment(theanets.Classifier, layers = (9,18,18,6))
    exp.train(t0, t1, algorithm='sgd',\
              learning_rate=1e-4, momentum=0.1,\
              hidden_l1=0.001, weight_l2=0.001)

    cm = confusion_matrix(valid_Y, exp.network.predict(valid_X))

    return cm
示例#4
0
def do_regression():
    climate.enable_default_logging()

    train, test = create_datasets()
    x_train = train[:, 0:6]
    y_train = train[:, 6]
    y_train = np.reshape(y_train, (y_train.shape[0], 1))
    y_test = test[:, 6]
    y_test = np.reshape(y_test, (y_test.shape[0], 1))
    exp = theanets.Experiment(theanets.Regressor, layers=(6, 6, 1))
    exp.train([x_train, y_train])

    # do the testing
    x_test = test[:, 0:6]
    y_test = test[:, 6]

    yp = exp.network.predict(x_test)

    xi = [(i + 1) for i in range(x_test.shape[0])]

    pb.scatter(xi, y_test, color="red")
    pb.scatter(xi, yp, color="blue")

    pb.show()

    return
示例#5
0
文件: nn_hf1.py 项目: bi-/nn
def tn_main():
    handle_args()
    load_type = get_load_type(sys.argv[1])
    files = myfiles(PATH_PREFIX, load_type)
    # load the matlab files to a dict
    inputs = loadMat(files)
    # determine the max on all data
    maximum = findMax(loadMat(myfiles(PATH_PREFIX, "")))
    # extend the x y z coordinates
    inputs = extendTo(inputs, maximum)
    # split them into train/validation/test datasets (70,20,10)
    Tr,V,Tst = create_dataset(inputs)

    climate.enable_default_logging()
    layer_input = 3 * maximum
    layer_hidden = layer_input / 2
    layer_output = layer_input / 10
    exp = theanets.Experiment(theanets.Classifier, layers=(layer_input, (layer_hidden, 'relu'), (layer_hidden, 'relu') , (layer_output, 'softmax')))

# Train the model 
    exp.train(Tr, V, learning_rate=1e-4, momentum=0.9, min_improvement=0.01)
    np.set_printoptions(threshold='nan')

# Show confusion matrices on the training/validation/test splits.
    for label, (X, y) in (('Training:', Tr), ('Validation:', V), ('Test:', Tst)):
        predicted = exp.network.predict(X)
        result = [(chr(f[0]),chr(f[1]))  for f in zip( predicted, y)]
        failures = [(b,a) for a,b in result if a != b]
        print(label, len(failures),len(y))
        print sum([ 1 if (a==b) else 0 for a,b in result ]) / float(len(result))
        print "Failures (expected,predicted):{}".format(failures)
        print(confusion_matrix(y, predicted))
示例#6
0
    def __init__(self,
                 rank=10,
                 initializer=np.random.randn,
                 learning_rate=0.001,
                 patience=5,
                 l1_penalty=0.05,
                 l2_penalty=0.05,
                 min_improvement=0.005,
                 max_gradient_norm=5,
                 optimization_algorithm="adam",
                 min_value=None,
                 max_value=None,
                 verbose=True):
        Solver.__init__(self,
                        fill_method="zero",
                        min_value=min_value,
                        max_value=max_value)
        self.rank = rank
        self.initializer = initializer
        self.learning_rate = learning_rate
        self.patience = patience
        self.l1_penalty = l1_penalty
        self.l2_penalty = l2_penalty
        self.max_gradient_norm = max_gradient_norm
        self.optimization_algorithm = optimization_algorithm
        self.min_improvement = min_improvement
        self.verbose = verbose

        if self.verbose:
            climate.enable_default_logging()
示例#7
0
def do_regression():
    climate.enable_default_logging()

    train, test = create_datasets()
    x_train = train[:, 0:6]
    y_train = train[:, 6]
    y_train = np.reshape(y_train, (y_train.shape[0], 1))
    y_test = test[:, 6]
    y_test = np.reshape(y_test, (y_test.shape[0], 1))
    exp = theanets.Experiment(theanets.Regressor, layers=(6, 6, 1))
    exp.train([x_train, y_train])

    #do the testing
    x_test = test[:, 0:6]
    y_test = test[:, 6]

    yp = exp.network.predict(x_test)

    xi = [(i + 1) for i in range(x_test.shape[0])]

    pb.scatter(xi, y_test, color="red")
    pb.scatter(xi, yp, color="blue")

    pb.show()

    return
示例#8
0
def main(input_file, model_path):
    batch_size = 128
    nb_classes = 62  # A-Z, a-z and 0-9
    nb_epoch = 2

    # Input image dimensions
    img_rows, img_cols = 32, 32

    # Path of data files
    path = input_file

    # Load the preprocessed data and labels
    X_train_all = np.load(path + "/trainPreproc_" + str(img_rows) + "_" +
                          str(img_cols) + ".npy")
    Y_train_all = np.load(path + "/labelsPreproc.npy")

    X_train, X_val, Y_train, Y_val = \
        train_test_split(X_train_all, Y_train_all, test_size=0.25, stratify=np.argmax(Y_train_all, axis=1))

    print X_train.shape

    labels = convert_(Y_train)
    validation = convert_(Y_val)

    X_train = X_train.reshape(
        (X_train.shape[0], X_train.shape[2] * X_train.shape[3]))
    X_val = X_val.reshape((X_val.shape[0], X_val.shape[2] * X_val.shape[3]))

    print 'Training...'
    class_input = 62
    climate.enable_default_logging()
    # Build a classifier model with 100 inputs and 10 outputs.
    net = theanets.Classifier(layers=[X_train.shape[1], class_input])

    X_train = X_train.astype('f')
    labels = labels.astype('i')

    X_val = X_val.astype('f')
    validation = validation.astype('i')

    train = X_train, labels
    valid = X_val, validation

    arg = 'adadelta'
    net.train(train,
              valid,
              algo=arg,
              learning_rate=1e-10,
              momentum=0.00001,
              input_noise=0.3,
              hidden_l1=0.1)

    print 'saving model paramters to {}'.format(model_path)
    with open(model_path, 'wb') as fid:
        pickle.dump(net, fid)
    print 'Done.'
示例#9
0
def main():
    training_data, validation_data, test_data, std_scale = load_training_data()
    climate.enable_default_logging()

    targets = ['esgd','layerwise','rmsprop','nag','rprop','sgd','sample','adadelta']
    layers = [(93,  dict(size=512, sparsity=0.2, activation='relu'),
                    dict(size=512, sparsity=0.2, activation='relu'),
                    dict(size=512, sparsity=0.2, activation='relu'),
                    9)]

    for l in layers:
        for t in targets:
            exp = theanets.Experiment(
                theanets.Classifier,
                layers=l,
                weighted=True,
                output_activation='softmax'
            )

            exp.train(training_data,
                        validation_data,
                        optimize=t,
                      )

            exp.train(training_data,
                        validation_data,
                        optimize=t,

                      )
            exp.train(training_data,
                        validation_data,
                        optimize=t,

                      )

            #get an prediction of the accuracy from the test_data
            test_results = exp.network.predict(test_data[0])
            loss = multiclass_log_loss(test_data[1], test_results)

            print 'Test multiclass log loss:', loss

            out_file = 'results/' + str(loss) + t
            exp.save(out_file + '.pkl')


            #save the kaggle results
            kaggle_test_features = load_test_data(std_scale)
            results = exp.network.predict(kaggle_test_features)
            save_results(out_file + '.csv', kaggle_test_features, results)
示例#10
0
import climate
import ConfigParser

import theano
import theano.tensor as T
import theano.sandbox.rng_mrg

import lasagne
from lasagne.layers import ReshapeLayer, Layer
from lasagne.init import Normal
from lasagne.regularization import regularize_layer_params_weighted, l2, l1
from lasagne.regularization import regularize_layer_params

logging = climate.get_logger('trainer')

climate.enable_default_logging()


def load_model(filename):
    f = file(filename, 'rb')
    params = cPickle.load(f)
    f.close()
    return params


def save_model(filename, model):
    params = lasagne.layers.get_all_param_values(model)
    f = file(filename, 'wb')
    cPickle.dump(params, f, protocol=cPickle.HIGHEST_PROTOCOL)
    f.close()
    return None
def experiment(
    X_learn, y_learn, X_test, y_test,
    algo='rmsprop',
    learning_rate=0.0001,
    momentum=0,
    neurons=10**2,
    patience=100*1000,
    # Sparse hidden activations
    # have shown much promise in computational neural networks.
    hidden_l1 = 0.01,
    weight_l2 = 0.0001,
    ):

    import climate

    climate.enable_default_logging()

    TRAIN_PART = 0.7
    data_threshold = int(TRAIN_PART * len(X_learn))

    print 'Divide sets...'

    np.random.seed(2016)
    np.random.shuffle(X_learn)
    np.random.seed(2016)
    np.random.shuffle(y_learn)
    X_train, y_train = X_learn[:data_threshold], y_learn[:data_threshold]
    X_valid, y_valid = X_learn[data_threshold:], y_learn[data_threshold:]

    datasets = {
        'training':     (X_train, y_train),
        'validation':   (X_valid, y_valid),
    }

    layers=(X_learn.shape[1], (neurons, 'relu'), 2)

    import theanets

    exp = theanets.Experiment(
        theanets.Classifier,
        layers=layers,
    )

    train_acc_history = []
    valid_acc_history = []
    valid_0_acc_history = []
    valid_1_acc_history = []

    init_learning_plot()

    from sklearn.metrics import accuracy_score

    print 'Start learning...'

    iteration = 0

    for tm, vm in exp.itertrain(
            datasets['training'],
            datasets['validation'],
            algo=algo,
            learning_rate=learning_rate,
            momentum=momentum,
            hidden_l1=hidden_l1,
            weight_l2=weight_l2,
            patience=patience,
            ):
        iteration += 1

        # Validate every class separately
        y_pred_0 = exp.network.classify(X_valid[y_valid == 0])
        y_pred_1 = exp.network.classify(X_valid[y_valid == 1])
        valid_0_acc_history.append(
            accuracy_score(y_valid[y_valid == 0], y_pred_0) * 100)
        valid_1_acc_history.append(
            accuracy_score(y_valid[y_valid == 1], y_pred_1) * 100)

        train_acc_history.append(tm['acc'] * 100)
        valid_acc_history.append(vm['acc'] * 100)

        # FIXME: First validation ACC is 1.0
        update_learning_plot(
            train_acc_history[10:],
            valid_acc_history[10:],
            valid_0_acc_history[10:],
            valid_1_acc_history[10:])

        if iteration == 490:
            save_learning_plot()

    save_learning_plot('current_learning_end.png')

    from sklearn.metrics import classification_report, confusion_matrix

    y_pred = exp.network.classify(X_test)

    print 'classification_report:\n', classification_report(y_test, y_pred)
    print 'confusion_matrix:\n', confusion_matrix(y_test, y_pred)
示例#12
0
THEANO_FLAGS = 'device=cpu,device=gpu0'
THEANO_FLAGS = 'floatX=float32,device=gpu0,lib.cnmem=1'

import theanets
import climate
import numpy as np
import pandas as pd
import math
from sklearn import preprocessing
from datetime import datetime

climate.enable_default_logging()  # to print downhill's iteration result

path = 'C:/Users/Administrator/Desktop/Data'

SNR2 = [0, 5, 10]
MFCC = 22

X_train = [None] * MFCC
X_train = np.array(np.float64(X_train))
# print X_train.shape

start0 = datetime.now()

print('loading MAG_y_train...')
for n in range(1, 6):
    for level in SNR2:
        X_train_0 = np.genfromtxt(path + '/MFCCs_train' + '_SNR_' +
                                  str(level) + '_noise_' + str(n) + '.csv',
                                  delimiter=',')
        X_train = np.vstack((X_train, X_train_0))
示例#13
0
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import division, print_function, absolute_import
from sklearn.preprocessing.data import StandardScaler
from rep.test.test_estimators import check_classifier, check_regression, check_params, \
    check_classification_reproducibility
from rep.test.test_estimators import generate_classification_data
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import BaggingClassifier
from rep.estimators.sklearn import SklearnClassifier
from rep.estimators.theanets import TheanetsClassifier, TheanetsRegressor
import climate
from tests import known_failure, retry_if_fails

climate.enable_default_logging(default_level='ERROR')

__author__ = 'Lisa Ignatyeva, Tatiana Likhomanenko, Alex Rogozhnikov'

classifier_params = {
    'has_staged_pp': False,
    'has_importances': False,
    'supports_weight': True,
}

regressor_params = {
    'has_staged_predictions': False,
    'has_importances': False,
    'supports_weight': True,
}
示例#14
0
def experiment(
    X_learn,
    y_learn,
    X_test,
    y_test,
    neurons,
    theanets_kwargs,
    column_names=None,
    name='my_exp',
    max_iters=float('inf'),
    gather_metrics=[
        'accuracy_score',
        'matthews_corrcoef',
        'roc_auc_score',
        'f1_score',
    ],
    plot_every=50,
    regression=False,
):

    import climate

    climate.enable_default_logging()

    TRAIN_PART = 0.7
    data_threshold = int(TRAIN_PART * len(X_learn))

    print 'Divide sets...'

    np.random.seed(2016)
    np.random.shuffle(X_learn)
    np.random.seed(2016)
    np.random.shuffle(y_learn)
    X_train, y_train = X_learn[:data_threshold], y_learn[:data_threshold]
    X_valid, y_valid = X_learn[data_threshold:], y_learn[data_threshold:]

    datasets = {
        'training': (X_train, y_train),
        'validation': (X_valid, y_valid),
        '0 validation': (X_valid[y_valid == 0], y_valid[y_valid == 0]),
        '1 validation': (X_valid[y_valid == 1], y_valid[y_valid == 1]),
        '0 test': (X_test[y_test == 0], y_test[y_test == 0]),
        '1 test': (X_test[y_test == 1], y_test[y_test == 1]),
        'test': (X_test, y_test)
    }

    for key in datasets.keys():
        datasets[key] = (datasets[key][0], datasets[key][1])

    layers = (X_learn.shape[1], ) + neurons + (2, )

    if regression:
        layers = (X_learn.shape[1], ) + neurons + (1, )

    import theanets

    exp = theanets.Experiment(
        theanets.Classifier,
        layers=layers,
    )

    if regression:
        exp = theanets.Experiment(theanets.Regressor, layers=layers)

    network = exp.network

    print 'Start learning...'

    iteration = 0

    # Metrics that we want to measure while observing results
    metrics = []
    # ACC
    if 'accuracy_score' in gather_metrics:
        metrics.append({
            'name': 'accuracy_score',
            'function': sklearn.metrics.accuracy_score
        })
    # MCC
    if 'matthews_corrcoef' in gather_metrics:
        metrics.append({
            'name': 'matthews_corrcoef',
            'function': sklearn.metrics.matthews_corrcoef
        })
    # AUC
    if 'roc_auc_score' in gather_metrics:
        metrics.append({
            'name': 'roc_auc_score',
            'function': sklearn.metrics.roc_auc_score
        })
    # F1
    if 'f1_score' in gather_metrics:
        metrics.append({
            'name': 'f1_score',
            'function': sklearn.metrics.f1_score
        })

    # Regression

    # MSE
    if 'mse' in gather_metrics:
        metrics.append({
            'name': 'mse',
            'function': sklearn.metrics.mean_squared_error
        })

    if 'r2_score' in gather_metrics:
        metrics.append({
            'name': 'r2_score',
            'function': sklearn.metrics.r2_score
        })

    if column_names is not None:
        for w_i in xrange(X_learn.shape[1]):
            metrics.append({
                'name': 'hid1_w_' + column_names[w_i],
                'inspect': ('hid1', 'w', w_i)
            })

    # Sets that we want to test
    plot_sets = ['training', 'validation', '0 test', '1 test', 'test']

    init_learning_plot()

    # Init histories
    plot_history = dict()
    for metric in metrics:
        plot_history[metric['name']] = dict()
        for plot_set in plot_sets:
            if metric['name'] == 'roc_auc_score' \
                    and plot_set[0] == '0':
                continue
            if metric['name'] == 'roc_auc_score' \
                    and plot_set[0] == '1':
                continue
            if metric['name'] == 'f1_score' \
                    and plot_set[0] == '0':
                continue
            if metric['name'] == 'f1_score' \
                    and plot_set[0] == '1':
                continue
            if metric['name'] == 'matthews_corrcoef' \
                    and plot_set[0] == '0':
                continue
            if metric['name'] == 'matthews_corrcoef' \
                    and plot_set[0] == '1':
                continue
            plot_history[metric['name']][plot_set] = []
    iters = []

    # Automatic metrics from theanets
    #plot_history['error'] = dict()
    #plot_history['error']['validation'] = []
    #plot_history['loss'] = dict()
    #plot_history['loss']['validation'] = []

    # XXX
    #plot_history['accuracy_score']['training'] = []

    print 'Collecting following metrics:'
    print gather_metrics

    for tm, vm in exp.itertrain(datasets['training'], datasets['validation'],
                                **theanets_kwargs):
        iteration += 1

        if iteration > max_iters:
            break

        if iteration % plot_every:
            continue

        for metric in metrics:

            if not metric['name'] in gather_metrics:
                continue

            for plot_set in plot_sets:

                X_set, y_set = datasets[plot_set]

                if metric['name'] == 'roc_auc_score' \
                        and plot_set[0] == '0':
                    continue
                if metric['name'] == 'roc_auc_score' \
                        and plot_set[0] == '1':
                    continue
                if metric['name'] == 'f1_score' \
                        and plot_set[0] == '0':
                    continue
                if metric['name'] == 'f1_score' \
                        and plot_set[0] == '1':
                    continue
                if metric['name'] == 'matthews_corrcoef' \
                        and plot_set[0] == '0':
                    continue
                if metric['name'] == 'matthews_corrcoef' \
                        and plot_set[0] == '1':
                    continue

                if metric['name'] == 'accuracy_score' \
                        and plot_set == 'validation':

                    # Get it directly from theanets
                    plot_history[metric['name']][plot_set].append(vm['acc'] *
                                                                  100)

                    continue

                if metric['name'] == 'accuracy_score' \
                        and plot_set == 'training':

                    # Get it directly from theanets
                    plot_history[metric['name']]['training'].append(tm['acc'] *
                                                                    100)

                    continue

                if not regression:
                    y_pred = exp.network.classify(X_set)
                else:
                    y_pred = exp.network.predict(X_set)

                if 'function' in metric:
                    scaling = 100.
                    mini = 0.
                    if metric['name'] == 'mse' \
                            or metric['name'] == 'r2_score':
                        scaling = 1.
                        mini = float('-inf')
                    plot_history[metric['name']][plot_set].append(
                        max(mini, metric['function'](y_set, y_pred) * scaling))

            if 'inspect' in metric:
                layer, param_name, param_x = metric['inspect']
                param = network.find(layer, param_name)
                values = param.get_value()
                mean_value = np.mean(values[param_x]) * 100.
                plot_history[metric['name']][plot_set].append(mean_value)

        iters.append(iteration)

        print 'iteration', iteration, {
            m + '_' + s: plot_history[m][s][-1:]
            for m in plot_history for s in plot_history[m]
        }
        update_learning_plot(iters, plot_history)

        if iteration in [500, 1000, 2000, 5000]:
            save_learning_plot('current_learning_' + str(iteration) + '_' +
                               name + '.png')

    save_learning_plot('current_learning_end.png')

    from sklearn.metrics import classification_report, confusion_matrix

    if not regression:
        y_pred = exp.network.classify(X_test)
    else:
        y_pred = exp.network.predict(X_test)

    if not regression:
        print 'classification_report:\n', \
            classification_report(y_test, y_pred)
        print 'confusion_matrix:\n', \
            confusion_matrix(y_test, y_pred)

    for metric in metrics:
        plot_history[metric['name']]['test_max'] = max(
            plot_history[metric['name']]['test'])

    return plot_history
示例#15
0
def run(path_project,
        path_analysis,
        cfg_project,
        cfg_ann,
        sgls_all,
        plots=False,
        debug=False):
    '''
    Compile subglide data, tune network architecture and test dataset size

    Args
    ----
    cfg_project: OrderedDict
        Dictionary of configuration parameters for the current project
    cfg_ann: OrderedDict
        Dictionary of configuration parameters for the ANN
    debug: bool
        Swith for running single network configuration
    plots: bool
        Switch for generating diagnostic plots after each network training

    Returns
    -------
    cfg: dict
        Dictionary of network configuration parameters used
    data: tuple
        Tuple collecting training, validation, and test sets. Also includes bin
        deliniation values
    results: tuple
        Tuple collecting results dataframes and confusion matrices

    Note
    ----
    The validation set is split into `validation` and `test` sets, the
    first used for initial comparisons of various net configuration
    accuracies and the second for a clean test set to get an true accuracy,
    as reusing the `validation` set can cause the routine to overfit to the
    validation set.
    '''

    from collections import OrderedDict
    import climate
    import numpy
    import os
    import pandas
    import theano
    import yamlord

    from . import utils_ann

    from .utils_ann import ppickle
    from ..config import paths, fnames

    # Environment settings - logging, Theano, load configuration, set paths
    #---------------------------------------------------------------------------
    climate.enable_default_logging()
    theano.config.compute_test_value = 'ignore'

    # Configuration settings
    if debug is True:
        for key in cfg_ann['net_tuning'].keys():
            cfg_ann['net_tuning'][key] = [
                cfg_ann['net_tuning'][key][0],
            ]

    # Drop fields missing values
    sgls_nonan = sgls_all.dropna()

    print('\nSplit and normalize input/output data')
    features = cfg_ann['net_all']['features']
    target = cfg_ann['net_all']['target']
    n_targets = cfg_ann['net_all']['n_targets']
    valid_frac = cfg_ann['net_all']['valid_frac']

    # Normalize input (features) and output (target)
    nsgls, bins = _normalize_data(sgls_nonan, features, target, n_targets)

    # Get indices of train, validation and test datasets
    ind_train, ind_valid, ind_test = _split_indices(nsgls, valid_frac)

    # Split dataframes into train, validation and test  (features, targets) tuples
    train, valid, test = _create_datasets(nsgls, ind_train, ind_valid,
                                          ind_test, features, target)
    print('train', len(train[0]), len(train[1]))
    print('valid', len(valid[0]), len(valid[1]))
    print('test', len(test[0]), len(test[1]))

    # Save information on input data to config
    cfg_ann['net_all']['targets'] = [float(b) for b in bins]

    # Tuning - find optimal network architecture
    #---------------------------------------------------------------------------
    print('\nTune netork configuration')

    # Get all dict of all configuration permutations of params in `tune_params`
    configs = _get_configs(cfg_ann['net_tuning'])

    # Cycle through configurations storing configuration, net in `results_tune`
    n_features = len(cfg_ann['net_all']['features'])
    n_targets = cfg_ann['net_all']['n_targets']

    print('\nNumber of features: {}'.format(n_features))
    print('Number of targets: {}\n'.format(n_targets))

    results_tune, tune_accuracy, cms_tune = _tune_net(
        train,
        valid,
        test,
        bins,
        configs,
        n_features,
        n_targets,
        plots,
    )

    # Get neural net configuration with best accuracy
    best_config = get_best(results_tune, 'config')

    # Test effect of dataset size
    #---------------------------------------------------------------------------
    print('\nRun percentage of datasize tests')

    # Get randomly sorted and subsetted datasets to test effect of dataset_size
    # i.e. - a dataset with the first `subset_fraction` of samples.
    results_dataset, data_accuracy, cms_data = _test_dataset_size(
        best_config, train, valid, test, bins, n_features, n_targets, plots,
        debug)

    print('\nTest data accuracy (Configuration tuning): {}'.format(
        tune_accuracy))
    print(
        'Test data accuracy (Datasize test):        {}'.format(data_accuracy))

    # Save results and configuration to output directory
    #---------------------------------------------------------------------------

    # Create output directory if it does not exist
    path_output = os.path.join(path_project, paths['ann'], path_analysis)
    os.makedirs(path_output, exist_ok=True)

    # Save updated `cfg_ann` to output directory
    file_cfg_ann = os.path.join(path_output, fnames['cfg']['ann'])
    yamlord.write_yaml(cfg_ann, os.path.join(path_output, file_cfg_ann))

    # Compiled SGLs before NaN drop and normalization
    utils_ann.ppickle(sgls_all, os.path.join(path_output,
                                             fnames['ann']['sgls']))

    # Compiled SGLs after NaN drop and normalization
    utils_ann.ppickle(nsgls,
                      os.path.join(path_output, fnames['ann']['sgls_norm']))

    # Save output data to analysis output directory
    tune_fname = fnames['ann']['tune']
    datasize_fname = fnames['ann']['dataset']
    ppickle(results_tune, os.path.join(path_output, tune_fname))
    ppickle(results_dataset, os.path.join(path_output, datasize_fname))

    # Save train, validation, test datasets
    ppickle(train, os.path.join(path_output, fnames['ann']['train']))
    ppickle(valid, os.path.join(path_output, fnames['ann']['valid']))
    ppickle(test, os.path.join(path_output, fnames['ann']['test']))

    ppickle(cms_tune, os.path.join(path_output, fnames['ann']['cms_tune']))
    ppickle(cms_data, os.path.join(path_output, fnames['ann']['cms_data']))

    return cfg_ann, (train, valid, test), (results_tune, results_dataset,
                                           cms_tune, cms_data)
#!/usr/bin/env python

import climate
import matplotlib.pyplot as plt
import numpy as np
import numpy.random as rng
import theanets

climate.enable_default_logging()

S = np.linspace(0, 4 * np.pi, 256)

def wave(i=0):
    return (0.4 * np.sin(S) + 0.3 * np.sin(i * S / 2))[:, None, None]

def waves(n=64):
    return np.concatenate([wave(rng.randint(15, 30)) for _ in range(n)], axis=1).astype('f')

# set up a network and train it using some sinusoidal data.

e = theanets.Experiment(
    theanets.recurrent.Regressor,
    layers=(2, 10, 1),
    train_batches=16)

def sum_waves():
    x = waves()
    y = waves()
    return [np.concatenate([x, y], axis=2), x + y]

e.run(sum_waves, sum_waves)
示例#17
0
# limitations under the License.


from __future__ import division, print_function, absolute_import
from sklearn.preprocessing.data import StandardScaler
from rep.test.test_estimators import check_classifier, check_regression, check_params, \
    check_classification_reproducibility
from rep.test.test_estimators import generate_classification_data
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import BaggingClassifier
from rep.estimators.sklearn import SklearnClassifier
from rep.estimators.theanets import TheanetsClassifier, TheanetsRegressor
import climate
from tests import known_failure, retry_if_fails

climate.enable_default_logging(default_level='ERROR')

__author__ = 'Lisa Ignatyeva, Tatiana Likhomanenko, Alex Rogozhnikov'

classifier_params = {
    'has_staged_pp': False,
    'has_importances': False,
    'supports_weight': True,
}

regressor_params = {
    'has_staged_predictions': False,
    'has_importances': False,
    'supports_weight': True,
}