def get_estimator(n_features, files, labels, eval_size=0.1): layers = [ (InputLayer, {'shape': (None, n_features)}), (DenseLayer, {'num_units': N_HIDDEN_1, 'nonlinearity': rectify, 'W': init.Orthogonal('relu'), 'b': init.Constant(0.01)}), (FeaturePoolLayer, {'pool_size': 2}), (DenseLayer, {'num_units': N_HIDDEN_2, 'nonlinearity': rectify, 'W': init.Orthogonal('relu'), 'b': init.Constant(0.01)}), (FeaturePoolLayer, {'pool_size': 2}), (DenseLayer, {'num_units': 1, 'nonlinearity': None}), ] args = dict( update=adam, update_learning_rate=theano.shared(util.float32(START_LR)), batch_iterator_train=ResampleIterator(BATCH_SIZE), batch_iterator_test=BatchIterator(BATCH_SIZE), objective=nn.get_objective(l1=L1, l2=L2), eval_size=eval_size, custom_score=('kappa', util.kappa) if eval_size > 0.0 else None, on_epoch_finished=[ nn.Schedule('update_learning_rate', SCHEDULE), ], regression=True, max_epochs=N_ITER, verbose=1, ) net = BlendNet(layers, **args) net.set_split(files, labels) return net
def create_net(config, **kwargs): args = { 'layers': config.layers, 'batch_iterator_train': iterator.ResampleIterator( config, batch_size=config.get('batch_size_train')), 'batch_iterator_test': iterator.SharedIterator( config, deterministic=True, batch_size=config.get('batch_size_test')), 'on_epoch_finished': [ Schedule('update_learning_rate', config.get('schedule'), weights_file=config.final_weights_file), SaveBestWeights(weights_file=config.weights_file, loss='F1', greater_is_better=True,), SaveWeights(config.weights_epoch, every_n_epochs=5), SaveWeights(config.weights_best, every_n_epochs=1, only_best=True), ], 'objective': get_objective(), 'use_label_encoder': False, 'eval_size': 0.1, 'regression': True, 'max_epochs': 100, 'verbose': 2, 'update_learning_rate': theano.shared( util.float32(config.get('schedule')[0])), 'update': nesterov_momentum, 'update_momentum': 0.9, 'custom_score': ('F1', util.F1Score), } args.update(kwargs) net = Net(**args) return net
def get_estimator(n_features, files, labels, eval_size=0.1): layers = [ (InputLayer, {'shape': (None, n_features)}), (DenseLayer, {'num_units': N_HIDDEN_1, 'nonlinearity': rectify, 'W': init.Orthogonal('relu'), 'b': init.Constant(0.01)}), (FeaturePoolLayer, {'pool_size': 2}), (DenseLayer, {'num_units': N_HIDDEN_2, 'nonlinearity': rectify, 'W': init.Orthogonal('relu'), 'b': init.Constant(0.01)}), (FeaturePoolLayer, {'pool_size': 2}), (DenseLayer, {'num_units': 1, 'nonlinearity': None}), ] args = dict( layers=layers, update=adam, update_learning_rate=theano.shared(util.float32(START_LR)), batch_iterator_train=ResampleIterator(BATCH_SIZE), batch_iterator_test=BatchIterator(BATCH_SIZE), objective=nn.get_objective(l1=L1, l2=L2), #eval_size=eval_size, custom_score=('kappa', util.kappa) if eval_size > 0.0 else None, on_epoch_finished=[ nn.Schedule('update_learning_rate', SCHEDULE), ], regression=True, max_epochs=N_ITER, verbose=1, ) net = BlendNet(eval_size=eval_size, **args) net.set_split(files, labels) return net
def create_net(config, **kwargs): args = { 'layers': config.layers, 'batch_iterator_train': iterator.ResampleIterator( config, batch_size=config.get('batch_size_train')), 'batch_iterator_test': iterator.SharedIterator( config, deterministic=True, batch_size=config.get('batch_size_test')), 'on_epoch_finished': [ Schedule('update_learning_rate', config.get('schedule'), weights_file=config.final_weights_file), SaveBestWeights(weights_file=config.weights_file, loss='kappa', greater_is_better=True,), SaveWeights(config.weights_epoch, every_n_epochs=5), SaveWeights(config.weights_best, every_n_epochs=1, only_best=True), ], 'objective': get_objective(), 'use_label_encoder': False, 'eval_size': 0.1, 'regression': True, 'max_epochs': 200, 'verbose': 1, 'update_learning_rate': theano.shared( util.float32(config.get('schedule')[0])), 'update': nesterov_momentum, 'update_momentum': 0.9, 'custom_score': ('kappa', util.kappa), } args.update(kwargs) net = Net(**args) return net
def __call__(self, nn, train_history): if self.ls is None: self.ls = numpy.linspace(self.start, self.stop, nn.max_epochs) epoch = train_history[-1]["epoch"] new_value = float32(self.ls[epoch - 1]) getattr(nn, self.name).set_value(new_value)
def __call__(self, nn, train_history): epoch = train_history[-1]['epoch'] if epoch in self.schedule: new_value = self.schedule[epoch] if new_value == 'stop': if self.weights_file is not None: nn.save_params_to(self.weights_file) raise StopIteration getattr(nn, self.name).set_value(util.float32(new_value))
def define_net(): define_net_specific_parameters() io = ImageIO() # Read pandas csv labels y = util.load_labels() if params.SUBSET is not 0: y = y[:params.SUBSET] X = np.arange(y.shape[0]) mean, std = io.load_mean_std(circularized=params.CIRCULARIZED_MEAN_STD) keys = y.index.values if params.AUGMENT: train_iterator = AugmentingParallelBatchIterator(keys, params.BATCH_SIZE, std, mean, y_all = y) else: train_iterator = ParallelBatchIterator(keys, params.BATCH_SIZE, std, mean, y_all = y) test_iterator = ParallelBatchIterator(keys, params.BATCH_SIZE, std, mean, y_all = y) if params.REGRESSION: y = util.float32(y) y = y[:, np.newaxis] if 'gpu' in theano.config.device: # Half of coma does not support cuDNN, check whether we can use it on this node # If not, use cuda_convnet bindings from theano.sandbox.cuda.dnn import dnn_available if dnn_available(): from lasagne.layers import dnn Conv2DLayer = dnn.Conv2DDNNLayer MaxPool2DLayer = dnn.MaxPool2DDNNLayer else: from lasagne.layers import cuda_convnet Conv2DLayer = cuda_convnet.Conv2DCCLayer MaxPool2DLayer = cuda_convnet.MaxPool2DCCLayer else: Conv2DLayer = layers.Conv2DLayer MaxPool2DLayer = layers.MaxPool2DLayer Maxout = layers.pool.FeaturePoolLayer net = NeuralNet( layers=[ ('input', layers.InputLayer), ('conv0', Conv2DLayer), ('pool0', MaxPool2DLayer), ('conv1', Conv2DLayer), ('pool1', MaxPool2DLayer), ('conv2', Conv2DLayer), ('pool2', MaxPool2DLayer), ('conv3', Conv2DLayer), ('pool3', MaxPool2DLayer), ('conv4', Conv2DLayer), ('pool4', MaxPool2DLayer), ('dropouthidden1', layers.DropoutLayer), ('hidden1', layers.DenseLayer), ('maxout1', Maxout), ('dropouthidden2', layers.DropoutLayer), ('hidden2', layers.DenseLayer), ('maxout2', Maxout), ('dropouthidden3', layers.DropoutLayer), ('output', layers.DenseLayer), ], input_shape=(None, params.CHANNELS, params.PIXELS, params.PIXELS), conv0_num_filters=32, conv0_filter_size=(5, 5), conv0_stride=(2, 2), pool0_pool_size=(2, 2), pool0_stride=(2, 2), conv1_num_filters=64, conv1_filter_size=(5, 5), conv1_border_mode = 'same', pool1_pool_size=(2, 2), pool1_stride=(2, 2), conv2_num_filters=128, conv2_filter_size=(3, 3), conv2_border_mode = 'same', pool2_pool_size=(2, 2), pool2_stride=(2, 2), conv3_num_filters=192, conv3_filter_size=(3, 3), conv3_border_mode = 'same', pool3_pool_size=(2, 2), pool3_stride=(2, 2), conv4_num_filters=256, conv4_filter_size=(3, 3), conv4_border_mode = 'same', pool4_pool_size=(2, 2), pool4_stride=(2, 2), hidden1_num_units=1024, hidden2_num_units=1024, dropouthidden1_p=0.5, dropouthidden2_p=0.5, dropouthidden3_p=0.5, maxout1_pool_size=2, maxout2_pool_size=2, output_num_units=1 if params.REGRESSION else 5, output_nonlinearity=None if params.REGRESSION else nonlinearities.softmax, update_learning_rate=theano.shared(util.float32(params.START_LEARNING_RATE)), update_momentum=theano.shared(util.float32(params.MOMENTUM)), custom_score=('kappa', quadratic_kappa), regression=params.REGRESSION, batch_iterator_train=train_iterator, batch_iterator_test=test_iterator, on_epoch_finished=[ AdjustVariable('update_learning_rate', start=params.START_LEARNING_RATE), stats.Stat(), ModelSaver() ], max_epochs=500, verbose=1, # Only relevant when create_validation_split = True eval_size=0.1, # Need to specify splits manually like indicated below! create_validation_split=params.SUBSET>0, ) # It is recommended to use the same training/validation split every model for ensembling and threshold optimization # # To set specific training/validation split: net.X_train = np.load(params.IMAGE_SOURCE + "/X_train.npy") net.X_valid = np.load(params.IMAGE_SOURCE + "/X_valid.npy") net.y_train = np.load(params.IMAGE_SOURCE + "/y_train.npy") net.y_valid = np.load(params.IMAGE_SOURCE + "/y_valid.npy") return net, X, y
dense1_nonlinearity=leaky_rectify, # dense1_W=lg.init.Uniform(), dropout1_p=0.25, dense2_num_units=300, dense2_nonlinearity=leaky_rectify, # dense2_W=lg.init.Uniform(), dropout2_p=0.25, # dense3_num_units=100, # dense3_nonlinearity=rectify, # dense3_W=lg.init.Uniform(), # dropout3_p=0.25, output_num_units=num_classes, output_nonlinearity=softmax, # output_W=lg.init.Uniform(), # update=nesterov_momentum, update_learning_rate=theano.shared(float32(0.01)), update_momentum=theano.shared(float32(0.9)), # update=adagrad, # update_learning_rate=theano.shared(float32(0.01)), # update_epsilon=1e-06, on_epoch_finished=[ AdjustVariable("update_learning_rate", start=0.015, stop=0.0001), AdjustVariable("update_momentum", start=0.9, stop=0.999), EarlyStopping(patience=20), ], eval_size=0.2, verbose=1, max_epochs=10000, ) net0.fit(X, y)
def define_net(): define_net_specific_parameters() io = ImageIO() # Read pandas csv labels y = util.load_labels() if params.SUBSET is not 0: y = y[:params.SUBSET] X = np.arange(y.shape[0]) mean, std = io.load_mean_std(circularized=params.CIRCULARIZED_MEAN_STD) keys = y.index.values if params.AUGMENT: train_iterator = AugmentingParallelBatchIterator(keys, params.BATCH_SIZE, std, mean, y_all=y) else: train_iterator = ParallelBatchIterator(keys, params.BATCH_SIZE, std, mean, y_all=y) test_iterator = ParallelBatchIterator(keys, params.BATCH_SIZE, std, mean, y_all=y) if params.REGRESSION: y = util.float32(y) y = y[:, np.newaxis] if 'gpu' in theano.config.device: # Half of coma does not support cuDNN, check whether we can use it on this node # If not, use cuda_convnet bindings from theano.sandbox.cuda.dnn import dnn_available if dnn_available() and not params.DISABLE_CUDNN: from lasagne.layers import dnn Conv2DLayer = dnn.Conv2DDNNLayer MaxPool2DLayer = dnn.MaxPool2DDNNLayer else: from lasagne.layers import cuda_convnet Conv2DLayer = cuda_convnet.Conv2DCCLayer MaxPool2DLayer = cuda_convnet.MaxPool2DCCLayer else: Conv2DLayer = layers.Conv2DLayer MaxPool2DLayer = layers.MaxPool2DLayer Maxout = layers.pool.FeaturePoolLayer net = NeuralNet( layers=[ ('input', layers.InputLayer), ('conv0', Conv2DLayer), ('pool0', MaxPool2DLayer), ('conv1', Conv2DLayer), ('pool1', MaxPool2DLayer), ('conv2', Conv2DLayer), ('pool2', MaxPool2DLayer), ('conv3', Conv2DLayer), ('pool3', MaxPool2DLayer), ('conv4', Conv2DLayer), ('pool4', MaxPool2DLayer), ('dropouthidden1', layers.DropoutLayer), ('hidden1', layers.DenseLayer), ('maxout1', Maxout), ('dropouthidden2', layers.DropoutLayer), ('hidden2', layers.DenseLayer), ('maxout2', Maxout), ('dropouthidden3', layers.DropoutLayer), ('output', layers.DenseLayer), ], input_shape=(None, params.CHANNELS, params.PIXELS, params.PIXELS), conv0_num_filters=32, conv0_filter_size=(5, 5), conv0_stride=(2, 2), pool0_pool_size=(2, 2), pool0_stride=(2, 2), conv1_num_filters=64, conv1_filter_size=(3, 3), conv1_border_mode='same', pool1_pool_size=(2, 2), pool1_stride=(2, 2), conv2_num_filters=128, conv2_filter_size=(3, 3), conv2_border_mode='same', pool2_pool_size=(2, 2), pool2_stride=(2, 2), conv3_num_filters=192, conv3_filter_size=(3, 3), conv3_border_mode='same', pool3_pool_size=(2, 2), pool3_stride=(2, 2), conv4_num_filters=256, conv4_filter_size=(3, 3), conv4_border_mode='same', pool4_pool_size=(2, 2), pool4_stride=(2, 2), hidden1_num_units=1024, hidden2_num_units=1024, dropouthidden1_p=0.5, dropouthidden2_p=0.5, dropouthidden3_p=0.5, maxout1_pool_size=2, maxout2_pool_size=2, output_num_units=1 if params.REGRESSION else 5, output_nonlinearity=None if params.REGRESSION else nonlinearities.softmax, update_learning_rate=theano.shared( util.float32(params.START_LEARNING_RATE)), update_momentum=theano.shared(util.float32(params.MOMENTUM)), custom_score=('kappa', quadratic_kappa), regression=params.REGRESSION, batch_iterator_train=train_iterator, batch_iterator_test=test_iterator, on_epoch_finished=[ AdjustVariable('update_learning_rate', start=params.START_LEARNING_RATE), stats.Stat(), ModelSaver() ], max_epochs=500, verbose=1, # Only relevant when create_validation_split = True eval_size=0.1, # Need to specify splits manually like indicated below! create_validation_split=params.SUBSET > 0, ) # It is recommended to use the same training/validation split every model for ensembling and threshold optimization # # To set specific training/validation split: net.X_train = np.load(params.IMAGE_SOURCE + "/X_train.npy") net.X_valid = np.load(params.IMAGE_SOURCE + "/X_valid.npy") net.y_train = np.load(params.IMAGE_SOURCE + "/y_train.npy") net.y_valid = np.load(params.IMAGE_SOURCE + "/y_valid.npy") return net, X, y
def estimator(protocol, classifier, n_features, files, X, labels, run, fold, eval_size=0.1): final_weights = 'weights/final_%s_%s_fold_%s.pkl' % (classifier, run, fold) if classifier == "SVM": if os.path.exists(final_weights): est = joblib.load(final_weights) else: svm = SVC(kernel='linear', class_weight='balanced', cache_size=5500, probability=True) if protocol != 'protocol3': svm_model = svm param_grid = {"C": [1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4]} cv = StratifiedShuffleSplit(labels.reshape( (labels.shape[0], )), n_iter=10, test_size=0.1, random_state=0) est = GridSearchCV(svm_model, param_grid=param_grid, scoring='roc_auc', n_jobs=15, cv=cv, verbose=2) est.fit(X, labels.reshape((labels.shape[0], ))) else: param_grid = { "estimator__C": [1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4] } binarized_labels = label_binarize(np.squeeze(labels), classes=[0, 1, 2]) svm_model = OneVsRestClassifier(svm) cv = StratifiedShuffleSplit(binarized_labels, n_iter=10, test_size=0.1, random_state=0) est = GridSearchCV(svm_model, param_grid=param_grid, scoring='roc_auc', n_jobs=15, cv=cv, verbose=2) est.fit(X, binarized_labels) est = est.best_estimator_ print("Best estimator found by grid search for %s: " % (classifier)) print(est) # Persistence #joblib.dump(est, final_weights) elif classifier == "RF": if os.path.exists(final_weights): est = joblib.load(final_weights) else: #for criterion in ["gini","entropy"]: # for n_estimators in [10, 50, 100, 200]:#, 200, 250, 500, 750, 1000]: # for max_features in [None]: #"auto", "sqrt", "log2", # # We are not using class_weight='auto'. Error in sklearn param_grid = { 'criterion': ['gini', 'entropy'], 'n_estimators': [50, 100, 200, 300, 10, 250, 500, 750] } est = GridSearchCV(RandomForestClassifier(max_features="auto"), param_grid=param_grid, n_jobs=-1, verbose=2) print(X[:3]) est.fit(X, labels.reshape((labels.shape[0], ))) est = est.best_estimator_ print("Best estimator found by grid search for %s: " % (classifier)) print(est) # Persistence joblib.dump(est, final_weights) else: layers = [ (InputLayer, { 'shape': (None, n_features) }), (DenseLayer, { 'num_units': N_HIDDEN_1, 'nonlinearity': rectify, 'W': init.Orthogonal('relu'), 'b': init.Constant(0.01) }), (FeaturePoolLayer, { 'pool_size': 2 }), (DenseLayer, { 'num_units': N_HIDDEN_2, 'nonlinearity': rectify, 'W': init.Orthogonal('relu'), 'b': init.Constant(0.01) }), (FeaturePoolLayer, { 'pool_size': 2 }), (DenseLayer, { 'num_units': 2, 'nonlinearity': softmax }), ] args = dict( update=adam, update_learning_rate=theano.shared(util.float32(START_LR)), batch_iterator_train=ResampleIterator(BATCH_SIZE), batch_iterator_test=BatchIterator(BATCH_SIZE), objective=nn.get_objective(l1=L1, l2=L2), eval_size=eval_size, custom_scores=[('kappa', metrics.kappa)] if eval_size > 0.0 else None, on_epoch_finished=[ nn.Schedule('update_learning_rate', SCHEDULE), ], regression=False, max_epochs=N_ITER, verbose=1, ) est = BlendNet(layers, **args) if os.path.exists(final_weights): est.load_params_from(str(final_weights)) print("loaded weights from {}".format(final_weights)) else: est.set_split(files, labels) est.fit(X, labels) #Persistence #est.save_params_to(final_weights) return est