예제 #1
0
def get_estimator(n_features, files, labels, eval_size=0.1):
    layers = [
        (InputLayer, {'shape': (None, n_features)}),
        (DenseLayer, {'num_units': N_HIDDEN_1, 'nonlinearity': rectify,
                      'W': init.Orthogonal('relu'),
                      'b': init.Constant(0.01)}),
        (FeaturePoolLayer, {'pool_size': 2}),
        (DenseLayer, {'num_units': N_HIDDEN_2, 'nonlinearity': rectify,
                      'W': init.Orthogonal('relu'),
                      'b': init.Constant(0.01)}),
        (FeaturePoolLayer, {'pool_size': 2}),
        (DenseLayer, {'num_units': 1, 'nonlinearity': None}),
    ]
    args = dict(
        update=adam,
        update_learning_rate=theano.shared(util.float32(START_LR)),
        batch_iterator_train=ResampleIterator(BATCH_SIZE),
        batch_iterator_test=BatchIterator(BATCH_SIZE),
        objective=nn.get_objective(l1=L1, l2=L2),
        eval_size=eval_size,
        custom_score=('kappa', util.kappa) if eval_size > 0.0 else None,
        on_epoch_finished=[
            nn.Schedule('update_learning_rate', SCHEDULE),
        ],
        regression=True,
        max_epochs=N_ITER,
        verbose=1,
    )
    net = BlendNet(layers, **args)
    net.set_split(files, labels)
    return net
def create_net(config, **kwargs):
    args = {
        'layers': config.layers,
        'batch_iterator_train': iterator.ResampleIterator(
            config, batch_size=config.get('batch_size_train')),
        'batch_iterator_test': iterator.SharedIterator(
            config, deterministic=True, 
            batch_size=config.get('batch_size_test')),
        'on_epoch_finished': [
            Schedule('update_learning_rate', config.get('schedule'),
                     weights_file=config.final_weights_file),
            SaveBestWeights(weights_file=config.weights_file, 
                            loss='F1', greater_is_better=True,),
            SaveWeights(config.weights_epoch, every_n_epochs=5),
            SaveWeights(config.weights_best, every_n_epochs=1, only_best=True),
        ],
        'objective': get_objective(),
        'use_label_encoder': False,
        'eval_size': 0.1,
        'regression': True,
        'max_epochs': 100,
        'verbose': 2,
        'update_learning_rate': theano.shared(
            util.float32(config.get('schedule')[0])),
        'update': nesterov_momentum,
        'update_momentum': 0.9,
        'custom_score': ('F1', util.F1Score),

    }
    args.update(kwargs)
    net = Net(**args)
    return net
예제 #3
0
def get_estimator(n_features, files, labels, eval_size=0.1):
    layers = [
        (InputLayer, {'shape': (None, n_features)}),
        (DenseLayer, {'num_units': N_HIDDEN_1, 'nonlinearity': rectify,
                      'W': init.Orthogonal('relu'),
                      'b': init.Constant(0.01)}),
        (FeaturePoolLayer, {'pool_size': 2}),
        (DenseLayer, {'num_units': N_HIDDEN_2, 'nonlinearity': rectify,
                      'W': init.Orthogonal('relu'),
                      'b': init.Constant(0.01)}),
        (FeaturePoolLayer, {'pool_size': 2}),
        (DenseLayer, {'num_units': 1, 'nonlinearity': None}),
    ]
    args = dict(
        layers=layers,
        update=adam,
        update_learning_rate=theano.shared(util.float32(START_LR)),
        batch_iterator_train=ResampleIterator(BATCH_SIZE),
        batch_iterator_test=BatchIterator(BATCH_SIZE),
        objective=nn.get_objective(l1=L1, l2=L2),
        #eval_size=eval_size,
        custom_score=('kappa', util.kappa) if eval_size > 0.0 else None,
        on_epoch_finished=[
            nn.Schedule('update_learning_rate', SCHEDULE),
        ],
        regression=True,
        max_epochs=N_ITER,
        verbose=1,
    )
    net = BlendNet(eval_size=eval_size, **args)
    net.set_split(files, labels)
    return net
예제 #4
0
파일: nn.py 프로젝트: bmelab513/teamOo-CPU
def create_net(config, **kwargs):
    args = {
        'layers': config.layers,
        'batch_iterator_train': iterator.ResampleIterator(
            config, batch_size=config.get('batch_size_train')),
        'batch_iterator_test': iterator.SharedIterator(
            config, deterministic=True, 
            batch_size=config.get('batch_size_test')),
        'on_epoch_finished': [
            Schedule('update_learning_rate', config.get('schedule'),
                     weights_file=config.final_weights_file),
            SaveBestWeights(weights_file=config.weights_file, 
                            loss='kappa', greater_is_better=True,),
            SaveWeights(config.weights_epoch, every_n_epochs=5),
            SaveWeights(config.weights_best, every_n_epochs=1, only_best=True),
        ],
        'objective': get_objective(),
        'use_label_encoder': False,
        'eval_size': 0.1,
        'regression': True,
        'max_epochs': 200,
        'verbose': 1,
        'update_learning_rate': theano.shared(
            util.float32(config.get('schedule')[0])),
        'update': nesterov_momentum,
        'update_momentum': 0.9,
        'custom_score': ('kappa', util.kappa),

    }
    args.update(kwargs)
    net = Net(**args)
    return net
 def __call__(self, nn, train_history):
     if self.ls is None:
         self.ls = numpy.linspace(self.start, self.stop, nn.max_epochs)
     
     epoch = train_history[-1]["epoch"]
     new_value = float32(self.ls[epoch - 1])
     getattr(nn, self.name).set_value(new_value)
 def __call__(self, nn, train_history):
     epoch = train_history[-1]['epoch']
     if epoch in self.schedule:
         new_value = self.schedule[epoch]
         if new_value == 'stop':
             if self.weights_file is not None:
                 nn.save_params_to(self.weights_file)
             raise StopIteration
         getattr(nn, self.name).set_value(util.float32(new_value))
예제 #7
0
 def __call__(self, nn, train_history):
     epoch = train_history[-1]['epoch']
     if epoch in self.schedule:
         new_value = self.schedule[epoch]
         if new_value == 'stop':
             if self.weights_file is not None:
                 nn.save_params_to(self.weights_file)
             raise StopIteration
         getattr(nn, self.name).set_value(util.float32(new_value))
def define_net():
    define_net_specific_parameters()
    io = ImageIO()

    # Read pandas csv labels
    y = util.load_labels()

    if params.SUBSET is not 0:
        y = y[:params.SUBSET]

    X = np.arange(y.shape[0])

    mean, std = io.load_mean_std(circularized=params.CIRCULARIZED_MEAN_STD)
    keys = y.index.values

    if params.AUGMENT:
        train_iterator = AugmentingParallelBatchIterator(keys, params.BATCH_SIZE, std, mean, y_all = y)
    else:
        train_iterator = ParallelBatchIterator(keys, params.BATCH_SIZE, std, mean, y_all = y)

    test_iterator = ParallelBatchIterator(keys, params.BATCH_SIZE, std, mean, y_all = y)

    if params.REGRESSION:
        y = util.float32(y)
        y = y[:, np.newaxis]

    if 'gpu' in theano.config.device:
        # Half of coma does not support cuDNN, check whether we can use it on this node
        # If not, use cuda_convnet bindings
        from theano.sandbox.cuda.dnn import dnn_available
        if dnn_available():
            from lasagne.layers import dnn
            Conv2DLayer = dnn.Conv2DDNNLayer
            MaxPool2DLayer = dnn.MaxPool2DDNNLayer
        else:
            from lasagne.layers import cuda_convnet
            Conv2DLayer = cuda_convnet.Conv2DCCLayer
            MaxPool2DLayer = cuda_convnet.MaxPool2DCCLayer
    else:
        Conv2DLayer = layers.Conv2DLayer
        MaxPool2DLayer = layers.MaxPool2DLayer

    Maxout = layers.pool.FeaturePoolLayer

    net = NeuralNet(
        layers=[
            ('input', layers.InputLayer),
            ('conv0', Conv2DLayer),
            ('pool0', MaxPool2DLayer),
            ('conv1', Conv2DLayer),
            ('pool1', MaxPool2DLayer),
            ('conv2', Conv2DLayer),
            ('pool2', MaxPool2DLayer),
            ('conv3', Conv2DLayer),
            ('pool3', MaxPool2DLayer),
            ('conv4', Conv2DLayer),
            ('pool4', MaxPool2DLayer),
            ('dropouthidden1', layers.DropoutLayer),
            ('hidden1', layers.DenseLayer),
            ('maxout1', Maxout),
            ('dropouthidden2', layers.DropoutLayer),
            ('hidden2', layers.DenseLayer),
            ('maxout2', Maxout),
            ('dropouthidden3', layers.DropoutLayer),
            ('output', layers.DenseLayer),
        ],

        input_shape=(None, params.CHANNELS, params.PIXELS, params.PIXELS),

        conv0_num_filters=32, conv0_filter_size=(5, 5), conv0_stride=(2, 2), pool0_pool_size=(2, 2), pool0_stride=(2, 2),
        conv1_num_filters=64, conv1_filter_size=(5, 5), conv1_border_mode = 'same', pool1_pool_size=(2, 2), pool1_stride=(2, 2),
        conv2_num_filters=128, conv2_filter_size=(3, 3), conv2_border_mode = 'same', pool2_pool_size=(2, 2), pool2_stride=(2, 2),
        conv3_num_filters=192, conv3_filter_size=(3, 3), conv3_border_mode = 'same', pool3_pool_size=(2, 2), pool3_stride=(2, 2),
        conv4_num_filters=256, conv4_filter_size=(3, 3), conv4_border_mode = 'same', pool4_pool_size=(2, 2), pool4_stride=(2, 2),

        hidden1_num_units=1024,
        hidden2_num_units=1024,

        dropouthidden1_p=0.5,
        dropouthidden2_p=0.5,
        dropouthidden3_p=0.5,

        maxout1_pool_size=2,
        maxout2_pool_size=2,

        output_num_units=1 if params.REGRESSION else 5,
        output_nonlinearity=None if params.REGRESSION else nonlinearities.softmax,

        update_learning_rate=theano.shared(util.float32(params.START_LEARNING_RATE)),
        update_momentum=theano.shared(util.float32(params.MOMENTUM)),
        custom_score=('kappa', quadratic_kappa),

        regression=params.REGRESSION,
        batch_iterator_train=train_iterator,
        batch_iterator_test=test_iterator,
        on_epoch_finished=[
            AdjustVariable('update_learning_rate', start=params.START_LEARNING_RATE),
            stats.Stat(),
            ModelSaver()
        ],
        max_epochs=500,
        verbose=1,

        # Only relevant when create_validation_split = True
        eval_size=0.1,

        # Need to specify splits manually like indicated below!
        create_validation_split=params.SUBSET>0,
    )

    # It is recommended to use the same training/validation split every model for ensembling and threshold optimization
    #
    # To set specific training/validation split:
    net.X_train = np.load(params.IMAGE_SOURCE + "/X_train.npy")
    net.X_valid = np.load(params.IMAGE_SOURCE + "/X_valid.npy")
    net.y_train = np.load(params.IMAGE_SOURCE + "/y_train.npy")
    net.y_valid = np.load(params.IMAGE_SOURCE + "/y_valid.npy")

    return net, X, y
        dense1_nonlinearity=leaky_rectify,
        # dense1_W=lg.init.Uniform(),
        dropout1_p=0.25,
        dense2_num_units=300,
        dense2_nonlinearity=leaky_rectify,
        # dense2_W=lg.init.Uniform(),
        dropout2_p=0.25,
        # dense3_num_units=100,
        # dense3_nonlinearity=rectify,
        # dense3_W=lg.init.Uniform(),
        # dropout3_p=0.25,
        output_num_units=num_classes,
        output_nonlinearity=softmax,
        # output_W=lg.init.Uniform(),
        # update=nesterov_momentum,
        update_learning_rate=theano.shared(float32(0.01)),
        update_momentum=theano.shared(float32(0.9)),
        # update=adagrad,
        # update_learning_rate=theano.shared(float32(0.01)),
        # update_epsilon=1e-06,
        on_epoch_finished=[
            AdjustVariable("update_learning_rate", start=0.015, stop=0.0001),
            AdjustVariable("update_momentum", start=0.9, stop=0.999),
            EarlyStopping(patience=20),
        ],
        eval_size=0.2,
        verbose=1,
        max_epochs=10000,
    )

    net0.fit(X, y)
def define_net():
    define_net_specific_parameters()
    io = ImageIO()

    # Read pandas csv labels
    y = util.load_labels()

    if params.SUBSET is not 0:
        y = y[:params.SUBSET]

    X = np.arange(y.shape[0])

    mean, std = io.load_mean_std(circularized=params.CIRCULARIZED_MEAN_STD)
    keys = y.index.values

    if params.AUGMENT:
        train_iterator = AugmentingParallelBatchIterator(keys,
                                                         params.BATCH_SIZE,
                                                         std,
                                                         mean,
                                                         y_all=y)
    else:
        train_iterator = ParallelBatchIterator(keys,
                                               params.BATCH_SIZE,
                                               std,
                                               mean,
                                               y_all=y)

    test_iterator = ParallelBatchIterator(keys,
                                          params.BATCH_SIZE,
                                          std,
                                          mean,
                                          y_all=y)

    if params.REGRESSION:
        y = util.float32(y)
        y = y[:, np.newaxis]

    if 'gpu' in theano.config.device:
        # Half of coma does not support cuDNN, check whether we can use it on this node
        # If not, use cuda_convnet bindings
        from theano.sandbox.cuda.dnn import dnn_available
        if dnn_available() and not params.DISABLE_CUDNN:
            from lasagne.layers import dnn
            Conv2DLayer = dnn.Conv2DDNNLayer
            MaxPool2DLayer = dnn.MaxPool2DDNNLayer
        else:
            from lasagne.layers import cuda_convnet
            Conv2DLayer = cuda_convnet.Conv2DCCLayer
            MaxPool2DLayer = cuda_convnet.MaxPool2DCCLayer
    else:
        Conv2DLayer = layers.Conv2DLayer
        MaxPool2DLayer = layers.MaxPool2DLayer

    Maxout = layers.pool.FeaturePoolLayer

    net = NeuralNet(
        layers=[
            ('input', layers.InputLayer),
            ('conv0', Conv2DLayer),
            ('pool0', MaxPool2DLayer),
            ('conv1', Conv2DLayer),
            ('pool1', MaxPool2DLayer),
            ('conv2', Conv2DLayer),
            ('pool2', MaxPool2DLayer),
            ('conv3', Conv2DLayer),
            ('pool3', MaxPool2DLayer),
            ('conv4', Conv2DLayer),
            ('pool4', MaxPool2DLayer),
            ('dropouthidden1', layers.DropoutLayer),
            ('hidden1', layers.DenseLayer),
            ('maxout1', Maxout),
            ('dropouthidden2', layers.DropoutLayer),
            ('hidden2', layers.DenseLayer),
            ('maxout2', Maxout),
            ('dropouthidden3', layers.DropoutLayer),
            ('output', layers.DenseLayer),
        ],
        input_shape=(None, params.CHANNELS, params.PIXELS, params.PIXELS),
        conv0_num_filters=32,
        conv0_filter_size=(5, 5),
        conv0_stride=(2, 2),
        pool0_pool_size=(2, 2),
        pool0_stride=(2, 2),
        conv1_num_filters=64,
        conv1_filter_size=(3, 3),
        conv1_border_mode='same',
        pool1_pool_size=(2, 2),
        pool1_stride=(2, 2),
        conv2_num_filters=128,
        conv2_filter_size=(3, 3),
        conv2_border_mode='same',
        pool2_pool_size=(2, 2),
        pool2_stride=(2, 2),
        conv3_num_filters=192,
        conv3_filter_size=(3, 3),
        conv3_border_mode='same',
        pool3_pool_size=(2, 2),
        pool3_stride=(2, 2),
        conv4_num_filters=256,
        conv4_filter_size=(3, 3),
        conv4_border_mode='same',
        pool4_pool_size=(2, 2),
        pool4_stride=(2, 2),
        hidden1_num_units=1024,
        hidden2_num_units=1024,
        dropouthidden1_p=0.5,
        dropouthidden2_p=0.5,
        dropouthidden3_p=0.5,
        maxout1_pool_size=2,
        maxout2_pool_size=2,
        output_num_units=1 if params.REGRESSION else 5,
        output_nonlinearity=None
        if params.REGRESSION else nonlinearities.softmax,
        update_learning_rate=theano.shared(
            util.float32(params.START_LEARNING_RATE)),
        update_momentum=theano.shared(util.float32(params.MOMENTUM)),
        custom_score=('kappa', quadratic_kappa),
        regression=params.REGRESSION,
        batch_iterator_train=train_iterator,
        batch_iterator_test=test_iterator,
        on_epoch_finished=[
            AdjustVariable('update_learning_rate',
                           start=params.START_LEARNING_RATE),
            stats.Stat(),
            ModelSaver()
        ],
        max_epochs=500,
        verbose=1,

        # Only relevant when create_validation_split = True
        eval_size=0.1,

        # Need to specify splits manually like indicated below!
        create_validation_split=params.SUBSET > 0,
    )

    # It is recommended to use the same training/validation split every model for ensembling and threshold optimization
    #
    # To set specific training/validation split:
    net.X_train = np.load(params.IMAGE_SOURCE + "/X_train.npy")
    net.X_valid = np.load(params.IMAGE_SOURCE + "/X_valid.npy")
    net.y_train = np.load(params.IMAGE_SOURCE + "/y_train.npy")
    net.y_valid = np.load(params.IMAGE_SOURCE + "/y_valid.npy")

    return net, X, y
예제 #11
0
def estimator(protocol,
              classifier,
              n_features,
              files,
              X,
              labels,
              run,
              fold,
              eval_size=0.1):

    final_weights = 'weights/final_%s_%s_fold_%s.pkl' % (classifier, run, fold)

    if classifier == "SVM":
        if os.path.exists(final_weights):
            est = joblib.load(final_weights)

        else:
            svm = SVC(kernel='linear',
                      class_weight='balanced',
                      cache_size=5500,
                      probability=True)
            if protocol != 'protocol3':
                svm_model = svm
                param_grid = {"C": [1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4]}
                cv = StratifiedShuffleSplit(labels.reshape(
                    (labels.shape[0], )),
                                            n_iter=10,
                                            test_size=0.1,
                                            random_state=0)
                est = GridSearchCV(svm_model,
                                   param_grid=param_grid,
                                   scoring='roc_auc',
                                   n_jobs=15,
                                   cv=cv,
                                   verbose=2)
                est.fit(X, labels.reshape((labels.shape[0], )))
            else:
                param_grid = {
                    "estimator__C":
                    [1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4]
                }
                binarized_labels = label_binarize(np.squeeze(labels),
                                                  classes=[0, 1, 2])
                svm_model = OneVsRestClassifier(svm)
                cv = StratifiedShuffleSplit(binarized_labels,
                                            n_iter=10,
                                            test_size=0.1,
                                            random_state=0)
                est = GridSearchCV(svm_model,
                                   param_grid=param_grid,
                                   scoring='roc_auc',
                                   n_jobs=15,
                                   cv=cv,
                                   verbose=2)
                est.fit(X, binarized_labels)

            est = est.best_estimator_
            print("Best estimator found by grid search for %s: " %
                  (classifier))
            print(est)

            # Persistence
            #joblib.dump(est, final_weights)

    elif classifier == "RF":
        if os.path.exists(final_weights):
            est = joblib.load(final_weights)

        else:
            #for criterion in ["gini","entropy"]:
            #                    for n_estimators in [10, 50, 100, 200]:#, 200, 250, 500, 750, 1000]:
            #                            for max_features in [None]: #"auto", "sqrt", "log2",
            #                                    # We are not using class_weight='auto'. Error in sklearn

            param_grid = {
                'criterion': ['gini', 'entropy'],
                'n_estimators': [50, 100, 200, 300, 10, 250, 500, 750]
            }
            est = GridSearchCV(RandomForestClassifier(max_features="auto"),
                               param_grid=param_grid,
                               n_jobs=-1,
                               verbose=2)
            print(X[:3])
            est.fit(X, labels.reshape((labels.shape[0], )))

            est = est.best_estimator_
            print("Best estimator found by grid search for %s: " %
                  (classifier))
            print(est)

            # Persistence
            joblib.dump(est, final_weights)

    else:
        layers = [
            (InputLayer, {
                'shape': (None, n_features)
            }),
            (DenseLayer, {
                'num_units': N_HIDDEN_1,
                'nonlinearity': rectify,
                'W': init.Orthogonal('relu'),
                'b': init.Constant(0.01)
            }),
            (FeaturePoolLayer, {
                'pool_size': 2
            }),
            (DenseLayer, {
                'num_units': N_HIDDEN_2,
                'nonlinearity': rectify,
                'W': init.Orthogonal('relu'),
                'b': init.Constant(0.01)
            }),
            (FeaturePoolLayer, {
                'pool_size': 2
            }),
            (DenseLayer, {
                'num_units': 2,
                'nonlinearity': softmax
            }),
        ]
        args = dict(
            update=adam,
            update_learning_rate=theano.shared(util.float32(START_LR)),
            batch_iterator_train=ResampleIterator(BATCH_SIZE),
            batch_iterator_test=BatchIterator(BATCH_SIZE),
            objective=nn.get_objective(l1=L1, l2=L2),
            eval_size=eval_size,
            custom_scores=[('kappa',
                            metrics.kappa)] if eval_size > 0.0 else None,
            on_epoch_finished=[
                nn.Schedule('update_learning_rate', SCHEDULE),
            ],
            regression=False,
            max_epochs=N_ITER,
            verbose=1,
        )
        est = BlendNet(layers, **args)
        if os.path.exists(final_weights):
            est.load_params_from(str(final_weights))
            print("loaded weights from {}".format(final_weights))

        else:
            est.set_split(files, labels)
            est.fit(X, labels)

            #Persistence
            #est.save_params_to(final_weights)

    return est