def train_eval( train_set, valid_set, args, hidden_size = 100, clip_gradients = True, gradient_limit = 5): # weight initialization init = Uniform(low=-0.08, high=0.08) # model initialization layers = [ LSTM(hidden_size, init, Logistic(), Tanh()), LSTM(hidden_size, init, Logistic(), Tanh()), Affine(2, init, bias=init, activation=Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) model = Model(layers=layers) optimizer = RMSProp(clip_gradients=clip_gradients, gradient_limit=gradient_limit, stochastic_round=args.rounding) # configure callbacks callbacks = Callbacks(model, train_set, progress_bar=args.progress_bar) # train model model.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) pred = model.get_outputs(valid_set) pred_neg_rate = model.eval(valid_set, metric=Misclassification()) return (pred[:,1], pred_neg_rate)
def run(train, test): init = Gaussian(scale=0.01) layers = [ Conv((3, 3, 128), init=init, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), Conv((3, 3, 256), init=init, batch_norm=True, activation=Rectlin()), Pooling(2, strides=2), Conv((2, 2, 512), init=init, batch_norm=True, activation=Rectlin()), DeepBiRNN(256, init=init, activation=Rectlin(), reset_cells=True, depth=3), RecurrentLast(), Affine(32, init=init, batch_norm=True, activation=Rectlin()), Affine(nout=common['nclasses'], init=init, activation=Softmax()) ] model = Model(layers=layers) opt = Adadelta() metric = Misclassification() callbacks = Callbacks(model, eval_set=test, metric=metric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) return model
def main(args): # load up the mnist data set dataset = MNIST(path=args.data_dir) # initialize model object mlp = Model(layers=[ Affine(nout=100, init=Gaussian(loc=0.0, scale=0.01), activation=Rectlin()), Affine(nout=10, init=Gaussian(loc=0.0, scale=0.01), activation=Logistic(shortcut=True)) ]) # setup optimizer optimizer = GradientDescentMomentum(0.1, momentum_coef=0.9, stochastic_round=args.rounding) # configure callbacks callbacks = Callbacks(mlp, eval_set=dataset.valid_iter, **args.callback_args) # run fit # setup cost function as CrossEntropy mlp.fit(dataset.train_iter, optimizer=optimizer, num_epochs=args.epochs, cost=GeneralizedCost(costfunc=CrossEntropyBinary()), callbacks=callbacks) error_rate = mlp.eval(dataset.valid_iter, metric=Misclassification()) neon_logger.display('Classification accuracy = %.4f' % (1 - error_rate))
def main(): parser = get_parser() args = parser.parse_args() print('Args:', args) loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') ext = extension_from_parameters(args) loader = p1b3.DataLoader(feature_subsample=args.feature_subsample, scaling=args.scaling, drug_features=args.drug_features, scramble=args.scramble, min_logconc=args.min_logconc, max_logconc=args.max_logconc, subsample=args.subsample, category_cutoffs=args.category_cutoffs) # initializer = Gaussian(loc=0.0, scale=0.01) initializer = GlorotUniform() activation = get_function(args.activation)() layers = [] reshape = None if args.convolution and args.convolution[0]: reshape = (1, loader.input_dim, 1) layer_list = list(range(0, len(args.convolution), 3)) for l, i in enumerate(layer_list): nb_filter = args.convolution[i] filter_len = args.convolution[i+1] stride = args.convolution[i+2] # print(nb_filter, filter_len, stride) # fshape: (height, width, num_filters). layers.append(Conv((1, filter_len, nb_filter), strides={'str_h':1, 'str_w':stride}, init=initializer, activation=activation)) if args.pool: layers.append(Pooling((1, args.pool))) for layer in args.dense: if layer: layers.append(Affine(nout=layer, init=initializer, activation=activation)) if args.drop: layers.append(Dropout(keep=(1-args.drop))) layers.append(Affine(nout=1, init=initializer, activation=neon.transforms.Identity())) model = Model(layers=layers) train_iter = ConcatDataIter(loader, ndata=args.train_samples, lshape=reshape, datatype=args.datatype) val_iter = ConcatDataIter(loader, partition='val', ndata=args.val_samples, lshape=reshape, datatype=args.datatype) cost = GeneralizedCost(get_function(args.loss)()) optimizer = get_function(args.optimizer)() callbacks = Callbacks(model, eval_set=val_iter, **args.callback_args) model.fit(train_iter, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
def fit_model(train_set, val_set, num_epochs=50): relu = Rectlin() conv_params = { 'strides': 1, 'padding': 1, 'init': Xavier(local=True), # Xavier sqrt(3)/num_inputs [CHECK THIS] 'bias': Constant(0), 'activation': relu } layers = [] layers.append(Conv((3, 3, 128), **conv_params)) # 3x3 kernel * 128 nodes layers.append(Pooling(2)) layers.append(Conv((3, 3, 128), **conv_params)) layers.append(Pooling(2)) # Highest value from 2x2 window. layers.append(Conv((3, 3, 128), **conv_params)) layers.append( Dropout(keep=0.5) ) # Flattens Convolution into a flat array, with probability 0.5 sets activation values to 0 layers.append( Affine(nout=128, init=GlorotUniform(), bias=Constant(0), activation=relu) ) # 1 value per conv kernel - Linear Combination of layers layers.append(Dropout(keep=0.5)) layers.append( Affine(nout=2, init=GlorotUniform(), bias=Constant(0), activation=Softmax(), name="class_layer")) # initialize model object cnn = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) optimizer = Adam() # callbacks = Callbacks(cnn) # out_fname = 'yarin_fdl_out_data.h5' callbacks = Callbacks(cnn, eval_set=val_set, eval_freq=1) # , output_file=out_fname cnn.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks) return cnn
def train_regressor(orig_wordvecs, w2v_W, w2v_vocab): """ Return regressor to map word2vec to RNN word space Function modified from: https://github.com/ryankiros/skip-thoughts/blob/master/training/tools.py """ # Gather all words from word2vec that appear in wordvecs d = defaultdict(lambda: 0) for w in w2v_vocab.keys(): d[w] = 1 shared = OrderedDict() count = 0 for w in list(orig_wordvecs.keys())[:-2]: if d[w] > 0: shared[w] = count count += 1 # Get the vectors for all words in 'shared' w2v = np.zeros((len(shared), 300), dtype='float32') sg = np.zeros((len(shared), 620), dtype='float32') for w in shared.keys(): w2v[shared[w]] = w2v_W[w2v_vocab[w]] sg[shared[w]] = orig_wordvecs[w] train_set = ArrayIterator(X=w2v, y=sg, make_onehot=False) layers = [ Linear(nout=620, init=Gaussian(loc=0.0, scale=0.1)), Bias(init=Constant(0.0)) ] clf = Model(layers=layers) # regression model is trained using default global batch size cost = GeneralizedCost(costfunc=SumSquared()) opt = GradientDescentMomentum(0.1, 0.9, gradient_clip_value=5.0) callbacks = Callbacks(clf) clf.fit(train_set, num_epochs=20, optimizer=opt, cost=cost, callbacks=callbacks) return clf
def run(args, train, test): init_uni = Uniform(low=-0.1, high=0.1) opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, stochastic_round=args.rounding) layers = [Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=True), Pooling((2, 2)), Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=True), Pooling((2, 2)), Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=True), Affine(nout=10, init=init_uni, activation=Softmax())] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) callbacks = Callbacks(mlp, eval_set=test, **args.callback_args) mlp.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks) err = mlp.eval(test, metric=Misclassification())*100 print('Misclassification error = %.2f%%' % err) return err
def run(args, train, test): init_uni = Uniform(low=-0.1, high=0.1) opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, stochastic_round=args.rounding) layers = [Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=True), Pooling((2, 2)), Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=True), Pooling((2, 2)), Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=True), Affine(nout=10, init=init_uni, activation=Softmax())] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) callbacks = Callbacks(mlp, train, eval_set=test, **args.callback_args) mlp.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks) err = mlp.eval(test, metric=Misclassification())*100 print('Misclassification error = %.2f%%' % err) return err
def run(train, test): init = Gaussian(scale=0.01) layers = [Conv((3, 3, 128), init=init, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), Conv((3, 3, 256), init=init, batch_norm=True, activation=Rectlin()), Pooling(2, strides=2), Conv((2, 2, 512), init=init, batch_norm=True, activation=Rectlin()), DeepBiRNN(256, init=init, activation=Rectlin(), reset_cells=True, depth=3), RecurrentLast(), Affine(32, init=init, batch_norm=True, activation=Rectlin()), Affine(nout=common['nclasses'], init=init, activation=Softmax())] model = Model(layers=layers) opt = Adadelta() metric = Misclassification() callbacks = Callbacks(model, eval_set=test, metric=metric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) return model
class MostCommonWordSense: def __init__(self, rounding, callback_args, epochs): # setup weight initialization function self.init = Gaussian(loc=0.0, scale=0.01) # setup optimizer self.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9, stochastic_round=rounding) # setup cost function as CrossEntropy self.cost = GeneralizedCost(costfunc=SumSquared()) self.epochs = epochs self.model = None self.callback_args = callback_args def build(self): # setup model layers layers = [Affine(nout=100, init=self.init, bias=self.init, activation=Rectlin()), Affine(nout=2, init=self.init, bias=self.init, activation=Softmax())] # initialize model object self.model = Model(layers=layers) def fit(self, valid_set, train_set): # configure callbacks callbacks = Callbacks(self.model, eval_set=valid_set, **self.callback_args) self.model.fit(train_set, optimizer=self.optimizer, num_epochs=self.epochs, cost=self.cost, callbacks=callbacks) def save(self, save_path): self.model.save_params(save_path) def load(self, model_path): self.model = Model(model_path) def eval(self, valid_set): eval_rate = self.model.eval(valid_set, metric=Misclassification()) return eval_rate def get_outputs(self, valid_set): return self.model.get_outputs(valid_set)
def DeepCascadeLearning(modelLayers,X_train,Y_train,callbacks,init_uni=Uniform(low=-0.1, high=0.1), testIterator=None,epochs=2, cost=GeneralizedCost(costfunc=CrossEntropyMulti()), opt_gdm=GradientDescentMomentum(learning_rate=0.01,momentum_coef=0.9)): importantLayersIndexes = list() i = 0 outputLayer = Affine(nout=10, init=init_uni, activation=Softmax()) modelToPredict = None for currentLayer in modelLayers: if(np.shape(currentLayer)): currentLayer = currentLayer[0] if((currentLayer.classnm == 'Convolution') or (currentLayer.classnm == 'Affine')): importantLayersIndexes.append(i) i += 1 for i in importantLayersIndexes: modelToTrain = list() for currentLayer in modelLayers[i:importantLayersIndexes[i+1]]: modelToTrain.append(currentLayer) modelToTrain.append(outputLayer) modelToTrain = Model(modelToTrain) if(modelToPredict == None): trainIterator = ArrayIterator(X_train, Y_train, nclass=10, lshape=(3,32,32)) x = trainIterator.__iter__() callbacks = Callbacks(modelToTrain) modelToTrain.fit(trainIterator, optimizer=opt_gdm, num_epochs=epochs, cost=GeneralizedCost(costfunc=CrossEntropyMulti()), callbacks=callbacks) else: tmpIterator = ArrayIterator(X_train,lshape=(3,32,32)) tmpTrain = modelToPredict.get_outputs(tmpIterator) tmpIterator = ArrayIterator(tmpTrain[0:20],Y_train[0:20],nclass=10,lshape=(32,30,30)) modelToTrain.fit(tmpIterator, optimizer=opt_gdm, num_epochs=epochs, cost=cost) if modelToPredict == None: modelToPredict = list() else: modelToPredict = modelToPredict.layers.layers for currentLayer in modelToTrain.layers.layers[0:-2]: modelToPredict.append(currentLayer) modelToPredict = Model(modelToPredict) return modelToPredict
def train_regressor(orig_wordvecs, w2v_W, w2v_vocab): """ Return regressor to map word2vec to RNN word space Function modified from: https://github.com/ryankiros/skip-thoughts/blob/master/training/tools.py """ # Gather all words from word2vec that appear in wordvecs d = defaultdict(lambda: 0) for w in w2v_vocab.keys(): d[w] = 1 shared = OrderedDict() count = 0 for w in list(orig_wordvecs.keys())[:-2]: if d[w] > 0: shared[w] = count count += 1 # Get the vectors for all words in 'shared' w2v = np.zeros((len(shared), 300), dtype='float32') sg = np.zeros((len(shared), 620), dtype='float32') for w in shared.keys(): w2v[shared[w]] = w2v_W[w2v_vocab[w]] sg[shared[w]] = orig_wordvecs[w] train_set = ArrayIterator(X=w2v, y=sg, make_onehot=False) layers = [Linear(nout=620, init=Gaussian(loc=0.0, scale=0.1)), Bias(init=Constant(0.0))] clf = Model(layers=layers) # regression model is trained using default global batch size cost = GeneralizedCost(costfunc=SumSquared()) opt = GradientDescentMomentum(0.1, 0.9, gradient_clip_value=5.0) callbacks = Callbacks(clf) clf.fit(train_set, num_epochs=20, optimizer=opt, cost=cost, callbacks=callbacks) return clf
class NpSemanticSegClassifier: """ NP Semantic Segmentation classifier model (based on Neon framework). Args: num_epochs(int): number of epochs to train the model **callback_args (dict): callback args keyword arguments to init a Callback for the model cost: the model's cost function. Default is 'neon.transforms.CrossEntropyBinary' cost optimizer (:obj:`neon.optimizers`): the model's optimizer. Default is 'neon.optimizers.GradientDescentMomentum(0.07, momentum_coef=0.9)' """ def __init__(self, num_epochs, callback_args, optimizer=GradientDescentMomentum(0.07, momentum_coef=0.9)): """ Args: num_epochs(int): number of epochs to train the model **callback_args (dict): callback args keyword arguments to init Callback for the model cost: the model's cost function. Default is 'neon.transforms.CrossEntropyBinary' cost optimizer (:obj:`neon.optimizers`): the model's optimizer. Default is `neon.optimizers.GradientDescentMomentum(0.07, momentum_coef=0.9)` """ self.model = None self.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) self.optimizer = optimizer self.epochs = num_epochs self.callback_args = callback_args def build(self): """ Build the model's layers """ first_layer_dens = 64 second_layer_dens = 64 output_layer_dens = 2 # setup weight initialization function init_norm = Gaussian(scale=0.01) # setup model layers layers = [Affine(nout=first_layer_dens, init=init_norm, activation=Rectlin()), Affine(nout=second_layer_dens, init=init_norm, activation=Rectlin()), Affine(nout=output_layer_dens, init=init_norm, activation=Logistic(shortcut=True))] # initialize model object self.model = Model(layers=layers) def fit(self, test_set, train_set): """ Train and fit the model on the datasets Args: test_set (:obj:`neon.data.ArrayIterators`): The test set train_set (:obj:`neon.data.ArrayIterators`): The train set args: callback_args and epochs from ArgParser input """ # configure callbacks callbacks = Callbacks(self.model, eval_set=test_set, **self.callback_args) self.model.fit(train_set, optimizer=self.optimizer, num_epochs=self.epochs, cost=self.cost, callbacks=callbacks) def save(self, model_path): """ Save the model's prm file in model_path location Args: model_path(str): local path for saving the model """ self.model.save_params(model_path) def load(self, model_path): """ Load pre-trained model's .prm file to NpSemanticSegClassifier object Args: model_path(str): local path for loading the model """ self.model = Model(model_path) def eval(self, test_set): """ Evaluate the model's test_set on error_rate, test_accuracy_rate and precision_recall_rate Args: test_set (ArrayIterator): The test set Returns: tuple(int): error_rate, test_accuracy_rate and precision_recall_rate """ error_rate = self.model.eval(test_set, metric=Misclassification()) test_accuracy_rate = self.model.eval(test_set, metric=Accuracy()) precision_recall_rate = self.model.eval(test_set, metric=PrecisionRecall(2)) return error_rate, test_accuracy_rate, precision_recall_rate def get_outputs(self, test_set): """ Classify the dataset on the model Args: test_set (:obj:`neon.data.ArrayIterators`): The test set Returns: list(float): model's predictions """ return self.model.get_outputs(test_set)
Conv((1, 1, 16), **conv), Pooling(8, op="avg"), Activation(Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) # configure callbacks callbacks = Callbacks(mlp) def do_nothing(_): pass callbacks.callbacks = [] callbacks.on_train_begin = do_nothing callbacks.on_epoch_end = do_nothing mlp.fit(train_set, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) opt_metric = 1.0 - mlp.eval(valid_set, metric=Misclassification()) print('Metric = {}'.format(opt_metric)) conn.experiments(experiment.id).observations().create( suggestion=suggestion.id, value=float(opt_metric[0]), )
def main(): # Get command-line parameters parser = get_p1b1_parser() args = parser.parse_args() #print('Args:', args) # Get parameters from configuration file fileParameters = p1b1.read_config_file(args.config_file) #print ('Params:', fileParameters) # Correct for arguments set by default by neon parser # (i.e. instead of taking the neon parser default value fall back to the config file, # if effectively the command-line was used, then use the command-line value) # This applies to conflictive parameters: batch_size, epochs and rng_seed if not any("--batch_size" in ag or "-z" in ag for ag in sys.argv): args.batch_size = fileParameters['batch_size'] if not any("--epochs" in ag or "-e" in ag for ag in sys.argv): args.epochs = fileParameters['epochs'] if not any("--rng_seed" in ag or "-r" in ag for ag in sys.argv): args.rng_seed = fileParameters['rng_seed'] # Consolidate parameter set. Command-line parameters overwrite file configuration gParameters = p1_common.args_overwrite_config(args, fileParameters) print('Params:', gParameters) # Determine verbosity level loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') # Construct extension to save model ext = p1b1.extension_from_parameters(gParameters, '.neon') # Get default parameters for initialization and optimizer functions kerasDefaults = p1_common.keras_default_config() seed = gParameters['rng_seed'] # Load dataset X_train, X_val, X_test = p1b1.load_data(gParameters, seed) print("Shape X_train: ", X_train.shape) print("Shape X_val: ", X_val.shape) print("Shape X_test: ", X_test.shape) print("Range X_train --> Min: ", np.min(X_train), ", max: ", np.max(X_train)) print("Range X_val --> Min: ", np.min(X_val), ", max: ", np.max(X_val)) print("Range X_test --> Min: ", np.min(X_test), ", max: ", np.max(X_test)) input_dim = X_train.shape[1] output_dim = input_dim # Re-generate the backend after consolidating parsing and file config gen_backend(backend=args.backend, rng_seed=seed, device_id=args.device_id, batch_size=gParameters['batch_size'], datatype=gParameters['datatype'], max_devices=args.max_devices, compat_mode=args.compat_mode) # Set input and target to X_train train = ArrayIterator(X_train) val = ArrayIterator(X_val) test = ArrayIterator(X_test) # Initialize weights and learning rule initializer_weights = p1_common_neon.build_initializer( gParameters['initialization'], kerasDefaults) initializer_bias = p1_common_neon.build_initializer( 'constant', kerasDefaults, 0.) activation = p1_common_neon.get_function(gParameters['activation'])() # Define Autoencoder architecture layers = [] reshape = None # Autoencoder layers_params = gParameters['dense'] if layers_params != None: if type(layers_params) != list: layers_params = list(layers_params) # Encoder Part for i, l in enumerate(layers_params): layers.append( Affine(nout=l, init=initializer_weights, bias=initializer_bias, activation=activation)) # Decoder Part for i, l in reversed(list(enumerate(layers_params))): if i < len(layers) - 1: layers.append( Affine(nout=l, init=initializer_weights, bias=initializer_bias, activation=activation)) layers.append( Affine(nout=output_dim, init=initializer_weights, bias=initializer_bias, activation=activation)) # Build Autoencoder model ae = Model(layers=layers) # Define cost and optimizer cost = GeneralizedCost(p1_common_neon.get_function(gParameters['loss'])()) optimizer = p1_common_neon.build_optimizer(gParameters['optimizer'], gParameters['learning_rate'], kerasDefaults) callbacks = Callbacks(ae, eval_set=val, eval_freq=1) # Seed random generator for training np.random.seed(seed) ae.fit(train, optimizer=optimizer, num_epochs=gParameters['epochs'], cost=cost, callbacks=callbacks) # model save #save_fname = "model_ae_W" + ext #ae.save_params(save_fname) # Compute errors X_pred = ae.get_outputs(test) scores = p1b1.evaluate_autoencoder(X_pred, X_test) print('Evaluation on test data:', scores) diff = X_pred - X_test # Plot histogram of errors comparing input and output of autoencoder plt.hist(diff.ravel(), bins='auto') plt.title("Histogram of Errors with 'auto' bins") plt.savefig('histogram_neon.png')
def train_mlp(): """ Train data and save scaling and network weights and biases to file to be used by forward prop phase on test data """ parser = NeonArgparser(__doc__) args = parser.parse_args() logger = logging.getLogger() logger.setLevel(args.log_thresh) # hyperparameters num_epochs = args.epochs #preprocessor std_scale = preprocessing.StandardScaler(with_mean=True,with_std=True) #std_scale = feature_scaler(type='Standardizer',with_mean=True,with_std=True) #number of non one-hot encoded features, including ground truth num_feat = 4 # load up the mnist data set # split into train and tests sets #load data from csv-files and rescale #training traindf = pd.DataFrame.from_csv('data/train.csv') ncols = traindf.shape[1] #tmpmat=std_scale.fit_transform(traindf.as_matrix()) #print std_scale.scale_ #print std_scale.mean_ tmpmat = traindf.as_matrix() #print tmpmat[:,1:num_feat] tmpmat[:,:num_feat] = std_scale.fit_transform(tmpmat[:,:num_feat]) X_train = tmpmat[:,1:] y_train = np.reshape(tmpmat[:,0],(tmpmat[:,0].shape[0],1)) #validation validdf = pd.DataFrame.from_csv('data/validate.csv') ncols = validdf.shape[1] tmpmat = validdf.as_matrix() tmpmat[:,:num_feat] = std_scale.transform(tmpmat[:,:num_feat]) X_valid = tmpmat[:,1:] y_valid = np.reshape(tmpmat[:,0],(tmpmat[:,0].shape[0],1)) #test testdf = pd.DataFrame.from_csv('data/test.csv') ncols = testdf.shape[1] tmpmat = testdf.as_matrix() tmpmat[:,:num_feat] = std_scale.transform(tmpmat[:,:num_feat]) X_test = tmpmat[:,1:] y_test = np.reshape(tmpmat[:,0],(tmpmat[:,0].shape[0],1)) # setup a training set iterator train_set = CustomDataIterator(X_train, lshape=(X_train.shape[1]), y_c=y_train) # setup a validation data set iterator valid_set = CustomDataIterator(X_valid, lshape=(X_valid.shape[1]), y_c=y_valid) # setup a validation data set iterator test_set = CustomDataIterator(X_test, lshape=(X_test.shape[1]), y_c=y_test) # setup weight initialization function init_norm = Xavier() # setup model layers layers = [Affine(nout=X_train.shape[1], init=init_norm, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=X_train.shape[1]/2, init=init_norm, activation=Rectlin()), Linear(nout=1, init=init_norm)] # setup cost function as CrossEntropy cost = GeneralizedCost(costfunc=SmoothL1Loss()) # setup optimizer #schedule #schedule = ExpSchedule(decay=0.3) #optimizer = GradientDescentMomentum(0.0001, momentum_coef=0.9, stochastic_round=args.rounding, schedule=schedule) optimizer = Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1.e-8) # initialize model object mlp = Model(layers=layers) # configure callbacks if args.callback_args['eval_freq'] is None: args.callback_args['eval_freq'] = 1 # configure callbacks callbacks = Callbacks(mlp, eval_set=valid_set, **args.callback_args) callbacks.add_early_stop_callback(stop_func) callbacks.add_save_best_state_callback(os.path.join(args.data_dir, "early_stop-best_state.pkl")) # run fit mlp.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) #evaluate model print('Evaluation Error = %.4f'%(mlp.eval(valid_set, metric=SmoothL1Metric()))) print('Test set error = %.4f'%(mlp.eval(test_set, metric=SmoothL1Metric()))) # Saving the model print 'Saving model parameters!' mlp.save_params("model/homeapp_model.prm") # Reloading saved model # This should go in run.py mlp=Model("model/homeapp_model.prm") print('Test set error = %.4f'%(mlp.eval(test_set, metric=SmoothL1Metric()))) # save the preprocessor vectors: np.savez("model/homeapp_preproc", mean=std_scale.mean_, std=std_scale.scale_) return 1
args = parser.parse_args() train_idx, val_idx = create_index_files(args.data_dir) common_params = dict(sampling_freq=22050, clip_duration=31000, frame_duration=20) train_params = AudioParams(random_scale_percent=5, **common_params) val_params = AudioParams(**common_params) common = dict(target_size=1, nclasses=10, repo_dir=args.data_dir) train = DataLoader(set_name='genres-train', media_params=train_params, index_file=train_idx, shuffle=True, **common) val = DataLoader(set_name='genres-val', media_params=val_params, index_file=val_idx, shuffle=False, **common) init = Gaussian(scale=0.01) layers = [Conv((5, 5, 64), init=init, activation=Rectlin(), strides=dict(str_h=2, str_w=4)), Pooling(2, strides=2), Conv((5, 5, 64), init=init, batch_norm=True, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), BiRNN(256, init=init, activation=Rectlin(), reset_cells=True), RecurrentMean(), Affine(128, init=init, batch_norm=True, activation=Rectlin()), Affine(nout=common['nclasses'], init=init, activation=Softmax())] model = Model(layers=layers) opt = Adadelta() metric = Misclassification() callbacks = Callbacks(model, eval_set=val, metric=metric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) print('Misclassification error = %.1f%%' % (model.eval(val, metric=metric)*100))
# scale LR by 0.1 every 20 epochs (this assumes batch_size = 256) weight_sched = Schedule(20, 0.1) opt_gdm = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005, schedule=weight_sched) opt_biases = GradientDescentMomentum(0.02, 0.9, schedule=weight_sched) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases}) # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, eval_set=test, metric=valmetric, **args.callback_args) if args.model_file is not None: model.load_params(args.model_file) if not args.test_only: cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) mets = model.eval(test, metric=valmetric) print 'Validation set metrics:' print 'LogLoss: %.2f, Accuracy: %.1f %% (Top-1), %.1f %% (Top-5)' % ( mets[0], (1.0 - mets[1]) * 100, (1.0 - mets[2]) * 100)
stochastic_round=args.rounding) bn = True layers = [ Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling((2, 2)), Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling((2, 2)), Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=bn), Affine(nout=10, init=init_uni, activation=Softmax()) ] if args.datatype in [np.float32, np.float64]: cost = GeneralizedCost(costfunc=CrossEntropyMulti()) elif args.datatype in [np.float16]: cost = GeneralizedCost(costfunc=CrossEntropyMulti(scale=cost_scale)) model = Model(layers=layers) # configure callbacks callbacks = Callbacks(model, eval_set=test, **args.callback_args) model.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) error_rate = model.eval(test, metric=Misclassification()) neon_logger.display('Misclassification error = %.1f%%' % (error_rate * 100))
# define layers layers = [ LookupTable(vocab_size=vocab_size, embedding_dim=embedding_dim, init=init_emb, pad_idx=0, update=embedding_update), LSTM(hidden_size, init_glorot, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentSum(), Dropout(keep=0.5), Affine(nclass, init_glorot, bias=init_glorot, activation=Softmax()) ] # set the cost, metrics, optimizer cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) metric = Accuracy() model = Model(layers=layers) optimizer = Adagrad(learning_rate=0.01) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) # train model model.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks) # eval model print "\nTrain Accuracy -", 100 * model.eval(train_set, metric=metric) print "Test Accuracy -", 100 * model.eval(valid_set, metric=metric)
Dropout(0.4), Conv((3, 3, 256), init=gauss, strides=small, **common), Dropout(0.2), Conv((2, 2, 512), init=gauss, strides=tiny, **common), Conv((2, 2, 128), init=gauss, strides=tiny, **common), DeepBiRNN(64, init=glorot, reset_cells=True, depth=5, **common), RecurrentMean(), Affine(nout=2, init=gauss, activation=Softmax()) ] }[subj] model = Model(layers=layers) opt = Adagrad(learning_rate=rate) callbacks = Callbacks(model, eval_set=test, **args.callback_args) if args.validate_mode: evaluator = Evaluator(subj, data_dir, test) callbacks.add_callback(evaluator) preds_name = 'eval.' else: preds_name = 'test.' cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.fit(tain, optimizer=opt, num_epochs=nepochs, cost=cost, callbacks=callbacks) preds = model.get_outputs(test)[:, 1] preds_file = preds_name + str(subj) + '.npy' np.save(os.path.join(out_dir, preds_file), preds)
class SequenceChunker(object): """ Sequence chunker model (Neon based) Args: sentence_length (str): max sentence length token_vocab_size (int): word vocabulary size pos_vocab_size (int, optional): POS vocabulary size char_vocab_size (int, optional): characters vocabulary size max_char_word_length (int, optional): max word length in characters token_embedding_size (int, optional): word embedding dims pos_embedding_size (int, optional): POS embedding dims char_embedding_size (int, optional): character embedding dims num_labels (int, optional): number of output labels possible per token lstm_hidden_size (int, optional): LSTM hidden size num_lstm_layers (int, optional): number of LSTM layers use_external_embedding (bool, optional): input is provided as external word embedding dropout (float, optional): dropout rate """ def __init__(self, sentence_length, token_vocab_size, pos_vocab_size=None, char_vocab_size=None, max_char_word_length=20, token_embedding_size=None, pos_embedding_size=None, char_embedding_size=None, num_labels=None, lstm_hidden_size=100, num_lstm_layers=1, use_external_embedding=None, dropout=0.5 ): init = GlorotUniform() tokens = [] if use_external_embedding is None: tokens.append(LookupTable(vocab_size=token_vocab_size, embedding_dim=token_embedding_size, init=init, pad_idx=0)) else: tokens.append(DataInput()) tokens.append(Reshape((-1, sentence_length))) f_layers = [tokens] # add POS tag input if pos_vocab_size is not None and pos_embedding_size is not None: f_layers.append([ LookupTable(vocab_size=pos_vocab_size, embedding_dim=pos_embedding_size, init=init, pad_idx=0), Reshape((-1, sentence_length)) ]) # add Character RNN input if char_vocab_size is not None and char_embedding_size is not None: char_lut_layer = LookupTable(vocab_size=char_vocab_size, embedding_dim=char_embedding_size, init=init, pad_idx=0) char_nn = [char_lut_layer, TimeDistBiLSTM(char_embedding_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True, reset_freq=max_char_word_length), TimeDistributedRecurrentLast(timesteps=max_char_word_length), Reshape((-1, sentence_length))] f_layers.append(char_nn) layers = [] if len(f_layers) == 1: layers.append(f_layers[0][0]) else: layers.append(MergeMultistream(layers=f_layers, merge="stack")) layers.append(Reshape((-1, sentence_length))) layers += [DeepBiLSTM(lstm_hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True, depth=num_lstm_layers), Dropout(keep=dropout), Affine(num_labels, init, bias=init, activation=Softmax())] self._model = Model(layers=layers) def fit(self, dataset, optimizer, cost, callbacks, epochs=10): """ fit a model Args: dataset: train/test set of CONLL2000 dataset optimizer: optimizer (Neon based) cost: cost function (Neon based) callbacks: callbacks (Neon based) epochs (int, optional): number of epochs to train """ self._model.fit(dataset, optimizer=optimizer, num_epochs=epochs, cost=cost, callbacks=callbacks) def predict(self, dataset): """ predict output of given dataset Args: dataset: Neon based iterator Returns: prediction on given dataset """ return self._model.get_outputs(dataset) def save(self, path): """ Save model weights to path Args: path (str): path to weights file """ self._model.save_params(path) def get_model(self): """ Get model Returns: Neon model object """ return self._model
init=init_uni, bias=init_cst, padding=0, activation=Rectlin())) layers.append(Pooling(fshape=2, strides=2)) layers.append(Affine(nout=2, init=init_uni, activation=Softmax())) from neon.models import Model model = Model(layers) from neon.layers import GeneralizedCost from neon.transforms import CrossEntropyBinary cost = GeneralizedCost(costfunc=CrossEntropyBinary()) from neon.optimizers import GradientDescentMomentum optimizer = GradientDescentMomentum(0.1, momentum_coef=0.9) from neon.callbacks.callbacks import Callbacks callbacks = Callbacks(model, train_set) model.fit(dataset=train_set, cost=cost, optimizer=optimizer, num_epochs=10, callbacks=callbacks) from neon.transforms import Misclassification error_pct = 100 * model.eval(test_set, metric=Misclassification()) accuracy_fp = 100 - error_pct print 'Model Accuracy : %.1f%%' % accuracy_fp
class WordseqRegressor(): def __init__(self, pickle_model="", datadir=None): self.maxlen = 100 self.n_words = 100000 parser = NeonArgparser(__doc__) self.args = parser.parse_args() self.args.batch_size = self.batch_size = 2048 # self.args.deterministic = None self.args.rng_seed = 0 print extract_valid_args(self.args, gen_backend) self.be = gen_backend(**extract_valid_args(self.args, gen_backend)) embedding_dim = 100 init_emb = Uniform(-0.1 / embedding_dim, 0.1 / embedding_dim) init_glorot = GlorotUniform() self.layers = [ LookupTable(vocab_size=self.n_words, embedding_dim=embedding_dim, init=init_emb, pad_idx=0, update=True, name="LookupTable"), Dropout(keep=0.5), BiLSTM(100, init=init_glorot, activation=Tanh(), gate_activation=Logistic(), reset_cells=True, split_inputs=False, name="BiLSTM"), RecurrentMean(), Affine(1, init_glorot, bias=init_glorot, activation=Identity(), name="Affine") ] self.wordbatch = wordbatch.WordBatch(normalize_text, n_words=self.n_words, extractors=[(wordbatch.WordSeq, { "seq_maxlen": self.maxlen })]) if datadir == None: self.model = Model(self.layers) self.model.load_params(pickle_model) self.wordbatch = pkl.load(gzip.open(pickle_model + ".wb", 'rb')) else: self.train(datadir, pickle_model) def remove_unks(self, x): return [[self.n_words if w >= self.n_words else w for w in sen] for sen in x] def format_texts(self, texts): return self.remove_unks(self.wordbatch.transform(texts)) class ThreadWithReturnValue(Thread): def __init__(self, group=None, target=None, name=None, args=(), kwargs={}, Verbose=None): Thread.__init__(self, group, target, name, args, kwargs, Verbose) self._return = None def run(self): if self._Thread__target is not None: self._return = self._Thread__target(*self._Thread__args, **self._Thread__kwargs) def join(self): Thread.join(self) return self._return def train(self, datadir, pickle_model=""): texts = [] labels = [] training_data = os.listdir(datadir) rcount = 0 texts2 = [] batchsize = 100000 t = None for jsonfile in training_data: with open(datadir + "/" + jsonfile, u'r') as inputfile: for line in inputfile: #if rcount > 1000000: break try: line = json.loads(line.strip()) except: continue for review in line["Reviews"]: rcount += 1 if rcount % 100000 == 0: print rcount if rcount % 8 != 0: continue if "Overall" not in review["Ratings"]: continue texts.append(review["Content"]) labels.append( (float(review["Ratings"]["Overall"]) - 3) * 0.5) if len(texts) % batchsize == 0: if t != None: texts2.append(t.join()) t = self.ThreadWithReturnValue( target=self.wordbatch.transform, args=(texts, )) t.start() texts = [] texts2.append(t.join()) texts2.append(self.wordbatch.transform(texts)) del (texts) texts = sp.vstack(texts2) self.wordbatch.dictionary_freeze = True train = [ np.asarray(texts, dtype='int32'), np.asanyarray(labels, dtype='float32') ] train[1].shape = (train[1].shape[0], 1) num_epochs = 10 cost = GeneralizedCost(costfunc=SumSquared()) self.model = Model(layers=self.layers) optimizer = Adam(learning_rate=0.01) index_shuf = list(range(len(train[0]))) random.shuffle(index_shuf) train[0] = np.asarray([train[0][x] for x in index_shuf], dtype='int32') train[1] = np.asarray([train[1][x] for x in index_shuf], dtype='float32') train_iter = ArrayIterator(train[0], train[1], nclass=1, make_onehot=False) self.model.fit(train_iter, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=Callbacks(self.model, **self.args.callback_args)) if pickle_model != "": self.model.save_params(pickle_model) with gzip.open(pickle_model + ".wb", 'wb') as model_file: pkl.dump(self.wordbatch, model_file, protocol=2) def predict_batch(self, texts): input = np.array(self.format_texts(texts)) output = np.zeros((texts.shape[0], 1)) test = ArrayIterator(input, output, nclass=1, make_onehot=False) results = [row[0] for row in self.model.get_outputs(test)] return results
class SequenceChunker(object): """ Sequence chunker model (Neon based) Args: sentence_length (str): max sentence length token_vocab_size (int): word vocabulary size pos_vocab_size (int, optional): POS vocabulary size char_vocab_size (int, optional): characters vocabulary size max_char_word_length (int, optional): max word length in characters token_embedding_size (int, optional): word embedding dims pos_embedding_size (int, optional): POS embedding dims char_embedding_size (int, optional): character embedding dims num_labels (int, optional): number of output labels possible per token lstm_hidden_size (int, optional): LSTM hidden size num_lstm_layers (int, optional): number of LSTM layers use_external_embedding (bool, optional): input is provided as external word embedding dropout (float, optional): dropout rate """ def __init__(self, sentence_length, token_vocab_size, pos_vocab_size=None, char_vocab_size=None, max_char_word_length=20, token_embedding_size=None, pos_embedding_size=None, char_embedding_size=None, num_labels=None, lstm_hidden_size=100, num_lstm_layers=1, use_external_embedding=None, dropout=0.5): init = GlorotUniform() tokens = [] if use_external_embedding is None: tokens.append( LookupTable(vocab_size=token_vocab_size, embedding_dim=token_embedding_size, init=init, pad_idx=0)) else: tokens.append(DataInput()) tokens.append(Reshape((-1, sentence_length))) f_layers = [tokens] # add POS tag input if pos_vocab_size is not None and pos_embedding_size is not None: f_layers.append([ LookupTable(vocab_size=pos_vocab_size, embedding_dim=pos_embedding_size, init=init, pad_idx=0), Reshape((-1, sentence_length)) ]) # add Character RNN input if char_vocab_size is not None and char_embedding_size is not None: char_lut_layer = LookupTable(vocab_size=char_vocab_size, embedding_dim=char_embedding_size, init=init, pad_idx=0) char_nn = [ char_lut_layer, TimeDistBiLSTM(char_embedding_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True, reset_freq=max_char_word_length), TimeDistributedRecurrentLast(timesteps=max_char_word_length), Reshape((-1, sentence_length)) ] f_layers.append(char_nn) layers = [] if len(f_layers) == 1: layers.append(f_layers[0][0]) else: layers.append(MergeMultistream(layers=f_layers, merge="stack")) layers.append(Reshape((-1, sentence_length))) layers += [ DeepBiLSTM(lstm_hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True, depth=num_lstm_layers), Dropout(keep=dropout), Affine(num_labels, init, bias=init, activation=Softmax()) ] self._model = Model(layers=layers) def fit(self, dataset, optimizer, cost, callbacks, epochs=10): """ fit a model Args: dataset: train/test set of CONLL2000 dataset optimizer: optimizer (Neon based) cost: cost function (Neon based) callbacks: callbacks (Neon based) epochs (int, optional): number of epochs to train """ self._model.fit(dataset, optimizer=optimizer, num_epochs=epochs, cost=cost, callbacks=callbacks) def predict(self, dataset): """ predict output of given dataset Args: dataset: Neon based iterator Returns: prediction on given dataset """ return self._model.get_outputs(dataset) def save(self, path): """ Save model weights to path Args: path (str): path to weights file """ self._model.save_params(path) def get_model(self): """ Get model Returns: Neon model object """ return self._model
def main(): parser = NeonArgparser(__doc__) args = parser.parse_args(gen_be=False) #mat_data = sio.loadmat('../data/timeseries/02_timeseries.mat') #ts = V1TimeSeries(mat_data['timeseries'], mat_data['stim'], binning=10) seq_len = 30 hidden = 20 be = gen_backend(**extract_valid_args(args, gen_backend)) kohn = KohnV1Dataset(path='../tmp/') kohn.gen_iterators(seq_len) import pdb; pdb.set_trace() train_spike_set = V1IteratorSequence(ts.train, seq_len, return_sequences=False) valid_spike_set = V1IteratorSequence(ts.test, seq_len, return_sequences=False) init = GlorotUniform() # dataset = MNIST(path=args.data_dir) # (X_train, y_train), (X_test, y_test), nclass = dataset.load_data() # train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) # valid_set = ArrayIterator([X_test, X_test], y_test, nclass=nclass, lshape=(1, 28, 28)) # # weight initialization # init_norm = Gaussian(loc=0.0, scale=0.01) # # initialize model # path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), # Affine(nout=100, init=init_norm, activation=Rectlin())]) # path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), # Affine(nout=100, init=init_norm, activation=Rectlin())]) # layers = [MergeMultistream(layers=[path1, path2], merge="stack"), # Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] spike_rnn_path = Sequential( layers = [ LSTM(hidden, init, activation=Logistic(), gate_activation=Logistic(), reset_cells=False), Dropout(keep=0.5), LSTM(hidden, init, activation=Logistic(), gate_activation=Logistic(), reset_cells=False), #Dropout(keep=0.85), RecurrentLast(), Affine(train_set.nfeatures, init, bias=init, activation=Identity(), name='spike_in')]) stim_rnn_path = Sequential( layers = [ LSTM(hidden, init, activation=Logistic(), gate_activation=Logistic(), reset_cells=False), Dropout(keep=0.5), RecurrentLast(), Affine(1, init, bias=init, activation=Identity(), name='stim')]) layers = [ MergeMultiStream( layers = [ spike_rnn_path, stim_rnn_path], merge="stack"), Affine(train_set.nfeatures, init, bias=init, activation=Identity(), name='spike_out'), Round() ] model = Model(layers=layers) sched = ExpSchedule(decay=0.7) # cost = GeneralizedCost(SumSquared()) cost = GeneralizedCost(MeanSquared()) optimizer_two = RMSProp(stochastic_round=args.rounding) optimizer_one = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9, schedule=sched) opt = MultiOptimizer({'default': optimizer_one, 'Bias': optimizer_two, 'special_linear': optimizer_two}) callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) callbacks.add_hist_callback(filter_key = ['W']) #callbacks.add_callback(MetricCallback(eval_set=valid_set, metric=FractionExplainedVariance(), epoch_freq=args.eval_freq)) #callbacks.add_callback(MetricCallback(eval_set=valid_set,metric=Accuracy(), epoch_freq=args.eval_freq)) model.fit(train_set, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) train_output = model.get_outputs( train_set).reshape(-1, train_set.nfeatures) valid_output = model.get_outputs( valid_set).reshape(-1, valid_set.nfeatures) train_target = train_set.y_series valid_target = valid_set.y_series tfev = fev(train_output, train_target, train_set.mean) vfev = fev(valid_output, valid_target, valid_set.mean) neon_logger.display('Train FEV: %g, Valid FEV: %g' % (tfev, vfev)) # neon_logger.display('Train Mean: %g, Valid Mean: %g' % (train_set.mean, valid_set.mean)) plt.figure() plt.plot(train_output[:, 0], train_output[ :, 1], 'bo', label='prediction') plt.plot(train_target[:, 0], train_target[:, 1], 'r.', label='target') plt.legend() plt.title('Neon on training set') plt.savefig('neon_series_training_output.png') plt.figure() plt.plot(valid_output[:, 0], valid_output[ :, 1], 'bo', label='prediction') plt.plot(valid_target[:, 0], valid_target[:, 1], 'r.', label='target') plt.legend() plt.title('Neon on validation set') plt.savefig('neon_series_validation_output.png')
layers.append(Pooling(2, strides=2)) layers.append(Conv((3, 3, 256), **conv_params)) layers.append(Conv((3, 3, 256), **conv_params)) layers.append(Conv((3, 3, 256), **conv_params)) layers.append(Pooling(2, strides=2)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Pooling(2, strides=2)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Conv((3, 3, 512), **conv_params)) # not used after this layer model = Model(layers=layers) # cost = GeneralizedCost(costfunc=CrossEntropyBinary()) cost = GeneralizedCost(costfunc=SumSquared()) # fit and validate optimizer = Adam(learning_rate=0.001) # configure callbacks # callbacks = Callbacks(model, eval_set=eval_set) callbacks = Callbacks(model, train_set=train) model.fit(train, cost=cost, optimizer=optimizer, num_epochs=10, callbacks=callbacks)
def main(): # larger batch sizes may not fit on GPU parser = NeonArgparser(__doc__, default_overrides={'batch_size': 4}) parser.add_argument("--bench", action="store_true", help="run benchmark instead of training") parser.add_argument("--num_classes", type=int, default=12, help="number of classes in the annotation") parser.add_argument("--height", type=int, default=256, help="image height") parser.add_argument("--width", type=int, default=512, help="image width") args = parser.parse_args(gen_be=False) # check that image dimensions are powers of 2 if((args.height & (args.height - 1)) != 0): raise TypeError("Height must be a power of 2.") if((args.width & (args.width - 1)) != 0): raise TypeError("Width must be a power of 2.") (c, h, w) = (args.num_classes, args.height, args.width) # need to use the backend with the new upsampling layer implementation be = NervanaGPU_Upsample(rng_seed=args.rng_seed, device_id=args.device_id) # set batch size be.bsz = args.batch_size # couple backend to global neon object NervanaObject.be = be shape = dict(channel_count=3, height=h, width=w, subtract_mean=False) train_params = ImageParams(center=True, flip=False, scale_min=min(h, w), scale_max=min(h, w), aspect_ratio=0, **shape) test_params = ImageParams(center=True, flip=False, scale_min=min(h, w), scale_max=min(h, w), aspect_ratio=0, **shape) common = dict(target_size=h*w, target_conversion='read_contents', onehot=False, target_dtype=np.uint8, nclasses=args.num_classes) train_set = PixelWiseImageLoader(set_name='train', repo_dir=args.data_dir, media_params=train_params, shuffle=False, subset_percent=100, index_file=os.path.join(args.data_dir, 'train_images.csv'), **common) val_set = PixelWiseImageLoader(set_name='val', repo_dir=args.data_dir,media_params=test_params, index_file=os.path.join(args.data_dir, 'val_images.csv'), **common) # initialize model object layers = gen_model(c, h, w) segnet_model = Model(layers=layers) # configure callbacks callbacks = Callbacks(segnet_model, eval_set=val_set, **args.callback_args) opt_gdm = GradientDescentMomentum(1.0e-6, 0.9, wdecay=0.0005, schedule=Schedule()) opt_biases = GradientDescentMomentum(2.0e-6, 0.9, schedule=Schedule()) opt_bn = GradientDescentMomentum(1.0e-6, 0.9, schedule=Schedule()) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases, 'BatchNorm': opt_bn}) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) if args.bench: segnet_model.initialize(train_set, cost=cost) segnet_model.benchmark(train_set, cost=cost, optimizer=opt) sys.exit(0) else: segnet_model.fit(train_set, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) # get the trained segnet model outputs for valisation set outs_val = segnet_model.get_outputs(val_set) with open('outputs.pkl', 'w') as fid: pickle.dump(outs_val, fid, -1)
p3 = [b2, Affine(nout=16, linear_name="b2_l1", **normrelu), Affine(nout=10, linear_name="b2_l2", **normsigm)] # setup cost function as CrossEntropy cost = Multicost( costs=[ GeneralizedCost(costfunc=CrossEntropyMulti()), GeneralizedCost(costfunc=CrossEntropyBinary()), GeneralizedCost(costfunc=CrossEntropyBinary()), ], weights=[1, 0.0, 0.0], ) # setup optimizer optimizer = GradientDescentMomentum(0.1, momentum_coef=0.9, stochastic_round=args.rounding) # initialize model object alphas = [1, 0.25, 0.25] mlp = Model(layers=Tree([p1, p2, p3], alphas=alphas)) # setup standard fit callbacks callbacks = Callbacks(mlp, train_set, eval_set=valid_set, **args.callback_args) # run fit mlp.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) logging.getLogger("neon").info( "Misclassification error = %.1f%%", (mlp.eval(valid_set, metric=Misclassification()) * 100) ) print("Misclassification error = %.1f%%" % (mlp.eval(valid_set, metric=Misclassification()) * 100))
layers = [ Affine(nout=50, init=w, bias=b, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=50, init=w, bias=b, activation=Rectlin()), Dropout(keep=0.4), Affine(nout=3, init=w, bias=b, activation=Softmax()), Dropout(keep=0.3) ] # Optimizer optimizer = GradientDescentMomentum(0.1, momentum_coef=0.9, stochastic_round=args.rounding) # Cost cost = GeneralizedCost(costfunc=MeanSquared()) model = Model(layers=layers) callbacks = Callbacks(model, eval_set=val_iter, **args.callback_args) # Training model.fit(train_iter, optimizer=optimizer, num_epochs=1, cost=cost, callbacks=callbacks) # Evluate evaluate(model, val_iter, Metric=Misclassification())
if args.datatype in [np.float32, np.float64]: opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, stochastic_round=args.rounding) elif args.datatype in [np.float16]: opt_gdm = GradientDescentMomentum(learning_rate=0.01/cost_scale, momentum_coef=0.9, stochastic_round=args.rounding) layers = [Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=True), Pooling((2, 2)), Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=True), Pooling((2, 2)), Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=True), Affine(nout=10, init=init_uni, activation=Softmax())] if args.datatype in [np.float32, np.float64]: cost = GeneralizedCost(costfunc=CrossEntropyMulti()) elif args.datatype in [np.float16]: cost = GeneralizedCost(costfunc=CrossEntropyMulti(scale=cost_scale)) model = Model(layers=layers) # configure callbacks callbacks = Callbacks(model, eval_set=test, **args.callback_args) # callbacks = Callbacks.load_callbacks(callbacks.get_description(), model, data=[train, test]) model.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) print 'Misclassification error = %.1f%%' % (model.eval(test, metric=Misclassification())*100)
# Affine(nout=500, init=init_uni, activation=Rectlin()), # Affine(nout=10, init=init_uni, activation=Softmax())] # learning_rate = 0.005 # momentum = 0.9 cnn = Model(layers=layers) # - cost function cost = GeneralizedCost(costfunc=CrossEntropyMulti()) # - learning rule optimizer = GradientDescentMomentum(learning_rate, momentum_coef=momentum) # Progress bar for each epoch - what's an epoch again? by default 10 Crazy magic - don't even go here! callbacks = Callbacks(cnn, eval_set=test_set, **args.callback_args) # put everything together! cnn.fit(train_set, optimizer=optimizer, num_epochs=epochs, cost=cost, callbacks=callbacks) # # Calculate test set results # results = cnn.get_outputs(test_set) # dump(cnn, "cnn_0_005.jbl") # # work out the performance! # error = cnn.eval(test_set, metric=Misclassification())
def main(): # Get command-line parameters parser = get_p1b3_parser() args = parser.parse_args() #print('Args:', args) # Get parameters from configuration file fileParameters = p1b3.read_config_file(args.config_file) #print ('Params:', fileParameters) # Correct for arguments set by default by neon parser # (i.e. instead of taking the neon parser default value fall back to the config file, # if effectively the command-line was used, then use the command-line value) # This applies to conflictive parameters: batch_size, epochs and rng_seed if not any("--batch_size" in ag or "-z" in ag for ag in sys.argv): args.batch_size = fileParameters['batch_size'] if not any("--epochs" in ag or "-e" in ag for ag in sys.argv): args.epochs = fileParameters['epochs'] if not any("--rng_seed" in ag or "-r" in ag for ag in sys.argv): args.rng_seed = fileParameters['rng_seed'] # Consolidate parameter set. Command-line parameters overwrite file configuration gParameters = p1_common.args_overwrite_config(args, fileParameters) print('Params:', gParameters) # Determine verbosity level loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') # Construct extension to save model ext = p1b3.extension_from_parameters(gParameters, '.neon') # Get default parameters for initialization and optimizer functions kerasDefaults = p1_common.keras_default_config() seed = gParameters['rng_seed'] # Build dataset loader object loader = p1b3.DataLoader( seed=seed, dtype=gParameters['datatype'], val_split=gParameters['validation_split'], test_cell_split=gParameters['test_cell_split'], cell_features=gParameters['cell_features'], drug_features=gParameters['drug_features'], feature_subsample=gParameters['feature_subsample'], scaling=gParameters['scaling'], scramble=gParameters['scramble'], min_logconc=gParameters['min_logconc'], max_logconc=gParameters['max_logconc'], subsample=gParameters['subsample'], category_cutoffs=gParameters['category_cutoffs']) # Re-generate the backend after consolidating parsing and file config gen_backend(backend=args.backend, rng_seed=seed, device_id=args.device_id, batch_size=gParameters['batch_size'], datatype=gParameters['datatype'], max_devices=args.max_devices, compat_mode=args.compat_mode) # Initialize weights and learning rule initializer_weights = p1_common_neon.build_initializer( gParameters['initialization'], kerasDefaults, seed) initializer_bias = p1_common_neon.build_initializer( 'constant', kerasDefaults, 0.) activation = p1_common_neon.get_function(gParameters['activation'])() # Define model architecture layers = [] reshape = None if 'dense' in gParameters: # Build dense layers for layer in gParameters['dense']: if layer: layers.append( Affine(nout=layer, init=initializer_weights, bias=initializer_bias, activation=activation)) if gParameters['drop']: layers.append(Dropout(keep=(1 - gParameters['drop']))) else: # Build convolutional layers reshape = (1, loader.input_dim, 1) layer_list = list(range(0, len(gParameters['conv']), 3)) for l, i in enumerate(layer_list): nb_filter = gParameters['conv'][i] filter_len = gParameters['conv'][i + 1] stride = gParameters['conv'][i + 2] # print(nb_filter, filter_len, stride) # fshape: (height, width, num_filters). layers.append( Conv((1, filter_len, nb_filter), strides={ 'str_h': 1, 'str_w': stride }, init=initializer_weights, activation=activation)) if gParameters['pool']: layers.append(Pooling((1, gParameters['pool']))) layers.append( Affine(nout=1, init=initializer_weights, bias=initializer_bias, activation=neon.transforms.Identity())) # Build model model = Model(layers=layers) # Define neon data iterators train_samples = int(loader.n_train) val_samples = int(loader.n_val) if 'train_samples' in gParameters: train_samples = gParameters['train_samples'] if 'val_samples' in gParameters: val_samples = gParameters['val_samples'] train_iter = ConcatDataIter(loader, ndata=train_samples, lshape=reshape, datatype=gParameters['datatype']) val_iter = ConcatDataIter(loader, partition='val', ndata=val_samples, lshape=reshape, datatype=gParameters['datatype']) # Define cost and optimizer cost = GeneralizedCost(p1_common_neon.get_function(gParameters['loss'])()) optimizer = p1_common_neon.build_optimizer(gParameters['optimizer'], gParameters['learning_rate'], kerasDefaults) callbacks = Callbacks(model, eval_set=val_iter, eval_freq=1) #**args.callback_args) model.fit(train_iter, optimizer=optimizer, num_epochs=gParameters['epochs'], cost=cost, callbacks=callbacks)
relu = Rectlin() layers = [] layers.append(Dropout(keep=.8)) layers.append(Conv((3, 3, 96), init=init_uni, batch_norm=True, activation=relu)) layers.append(Conv((3, 3, 96), init=init_uni, batch_norm=True, activation=relu, pad=1)) layers.append(Conv((3, 3, 96), init=init_uni, batch_norm=True, activation=relu, pad=1, strides=2)) layers.append(Dropout(keep=.5)) layers.append(Conv((3, 3, 192), init=init_uni, batch_norm=True, activation=relu, pad=1)) layers.append(Conv((3, 3, 192), init=init_uni, batch_norm=True, activation=relu, pad=1)) layers.append(Conv((3, 3, 192), init=init_uni, batch_norm=True, activation=relu, pad=1, strides=2)) layers.append(Dropout(keep=.5)) layers.append(Conv((3, 3, 192), init=init_uni, batch_norm=True, activation=relu)) layers.append(Conv((1, 1, 192), init=init_uni, batch_norm=True, activation=relu)) layers.append(Conv((1, 1, 16), init=init_uni, activation=relu)) layers.append(Pooling(6, op="avg")) layers.append(Activation(Softmax())) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) # configure callbacks callbacks = Callbacks(mlp, train_set, output_file=args.output_file, valid_set=valid_set, valid_freq=args.validation_freq, progress_bar=args.progress_bar) mlp.fit(train_set, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) print('Misclassification error = %.1f%%' % (mlp.eval(valid_set, metric=Misclassification())*100))
class NpSemanticSegClassifier: """ NP Semantic Segmentation classifier model (based on Neon framework). Args: num_epochs(int): number of epochs to train the model **callback_args (dict): callback args keyword arguments to init a Callback for the model cost: the model's cost function. Default is 'neon.transforms.CrossEntropyBinary' cost optimizer (:obj:`neon.optimizers`): the model's optimizer. Default is 'neon.optimizers.GradientDescentMomentum(0.07, momentum_coef=0.9)' """ def __init__(self, num_epochs, callback_args, optimizer=GradientDescentMomentum(0.07, momentum_coef=0.9)): """ Args: num_epochs(int): number of epochs to train the model **callback_args (dict): callback args keyword arguments to init Callback for the model cost: the model's cost function. Default is 'neon.transforms.CrossEntropyBinary' cost optimizer (:obj:`neon.optimizers`): the model's optimizer. Default is `neon.optimizers.GradientDescentMomentum(0.07, momentum_coef=0.9)` """ self.model = None self.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) self.optimizer = optimizer self.epochs = num_epochs self.callback_args = callback_args def build(self): """ Build the model's layers """ first_layer_dens = 64 second_layer_dens = 64 output_layer_dens = 2 # setup weight initialization function init_norm = Gaussian(scale=0.01) # setup model layers layers = [ Affine(nout=first_layer_dens, init=init_norm, activation=Rectlin()), Affine(nout=second_layer_dens, init=init_norm, activation=Rectlin()), Affine(nout=output_layer_dens, init=init_norm, activation=Logistic(shortcut=True)) ] # initialize model object self.model = Model(layers=layers) def fit(self, test_set, train_set): """ Train and fit the model on the datasets Args: test_set (:obj:`neon.data.ArrayIterators`): The test set train_set (:obj:`neon.data.ArrayIterators`): The train set args: callback_args and epochs from ArgParser input """ # configure callbacks callbacks = Callbacks(self.model, eval_set=test_set, **self.callback_args) self.model.fit(train_set, optimizer=self.optimizer, num_epochs=self.epochs, cost=self.cost, callbacks=callbacks) def save(self, model_path): """ Save the model's prm file in model_path location Args: model_path(str): local path for saving the model """ self.model.save_params(model_path) def load(self, model_path): """ Load pre-trained model's .prm file to NpSemanticSegClassifier object Args: model_path(str): local path for loading the model """ self.model = Model(model_path) def eval(self, test_set): """ Evaluate the model's test_set on error_rate, test_accuracy_rate and precision_recall_rate Args: test_set (ArrayIterator): The test set Returns: tuple(int): error_rate, test_accuracy_rate and precision_recall_rate """ error_rate = self.model.eval(test_set, metric=Misclassification()) test_accuracy_rate = self.model.eval(test_set, metric=Accuracy()) precision_recall_rate = self.model.eval(test_set, metric=PrecisionRecall(2)) return error_rate, test_accuracy_rate, precision_recall_rate def get_outputs(self, test_set): """ Classify the dataset on the model Args: test_set (:obj:`neon.data.ArrayIterators`): The test set Returns: list(float): model's predictions """ return self.model.get_outputs(test_set)
# layers = [Conv(fshape=(5,5,16), init=init_uni, activation=Rectlin()), # Pooling(fshape=2, strides=2), # Conv(fshape=(5,5,32), init=init_uni, activation=Rectlin()), # Pooling(fshape=2, strides=2), # Affine(nout=500, init=init_uni, activation=Rectlin()), # Affine(nout=10, init=init_uni, activation=Softmax())] # learning_rate = 0.005 # momentum = 0.9 cnn = Model(layers=layers) # - cost function cost = GeneralizedCost(costfunc=CrossEntropyMulti()) # - learning rule optimizer = GradientDescentMomentum(learning_rate, momentum_coef=momentum) # Progress bar for each epoch - what's an epoch again? by default 10 Crazy magic - don't even go here! callbacks = Callbacks(cnn, eval_set=test_set, **args.callback_args) # put everything together! cnn.fit(train_set, optimizer=optimizer, num_epochs=epochs, cost=cost, callbacks=callbacks) # # Calculate test set results # results = cnn.get_outputs(test_set) # dump(cnn, "cnn_0_005.jbl") # # work out the performance! # error = cnn.eval(test_set, metric=Misclassification())
def main(): # Get command-line parameters parser = get_p1b2_parser() args = parser.parse_args() #print('Args:', args) # Get parameters from configuration file fileParameters = p1b2.read_config_file(args.config_file) #print ('Params:', fileParameters) # Correct for arguments set by default by neon parser # (i.e. instead of taking the neon parser default value fall back to the config file, # if effectively the command-line was used, then use the command-line value) # This applies to conflictive parameters: batch_size, epochs and rng_seed if not any("--batch_size" in ag or "-z" in ag for ag in sys.argv): args.batch_size = fileParameters['batch_size'] if not any("--epochs" in ag or "-e" in ag for ag in sys.argv): args.epochs = fileParameters['epochs'] if not any("--rng_seed" in ag or "-r" in ag for ag in sys.argv): args.rng_seed = fileParameters['rng_seed'] # Consolidate parameter set. Command-line parameters overwrite file configuration gParameters = p1_common.args_overwrite_config(args, fileParameters) print('Params:', gParameters) # Determine verbosity level loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') # Construct extension to save model ext = p1b2.extension_from_parameters(gParameters, '.neon') # Get default parameters for initialization and optimizer functions kerasDefaults = p1_common.keras_default_config() seed = gParameters['rng_seed'] # Load dataset #(X_train, y_train), (X_test, y_test) = p1b2.load_data(gParameters, seed) (X_train, y_train), (X_val, y_val), (X_test, y_test) = p1b2.load_data(gParameters, seed) print("Shape X_train: ", X_train.shape) print("Shape X_val: ", X_val.shape) print("Shape X_test: ", X_test.shape) print("Shape y_train: ", y_train.shape) print("Shape y_val: ", y_val.shape) print("Shape y_test: ", y_test.shape) print("Range X_train --> Min: ", np.min(X_train), ", max: ", np.max(X_train)) print("Range X_val --> Min: ", np.min(X_val), ", max: ", np.max(X_val)) print("Range X_test --> Min: ", np.min(X_test), ", max: ", np.max(X_test)) print("Range y_train --> Min: ", np.min(y_train), ", max: ", np.max(y_train)) print("Range y_val --> Min: ", np.min(y_val), ", max: ", np.max(y_val)) print("Range y_test --> Min: ", np.min(y_test), ", max: ", np.max(y_test)) input_dim = X_train.shape[1] num_classes = int(np.max(y_train)) + 1 output_dim = num_classes # The backend will represent the classes using one-hot representation (but requires an integer class as input !) # Re-generate the backend after consolidating parsing and file config gen_backend(backend=args.backend, rng_seed=seed, device_id=args.device_id, batch_size=gParameters['batch_size'], datatype=gParameters['data_type'], max_devices=args.max_devices, compat_mode=args.compat_mode) train = ArrayIterator(X=X_train, y=y_train, nclass=num_classes) val = ArrayIterator(X=X_val, y=y_val, nclass=num_classes) test = ArrayIterator(X=X_test, y=y_test, nclass=num_classes) # Initialize weights and learning rule initializer_weights = p1_common_neon.build_initializer( gParameters['initialization'], kerasDefaults, seed) initializer_bias = p1_common_neon.build_initializer( 'constant', kerasDefaults, 0.) activation = p1_common_neon.get_function(gParameters['activation'])() # Define MLP architecture layers = [] reshape = None for layer in gParameters['dense']: if layer: layers.append( Affine(nout=layer, init=initializer_weights, bias=initializer_bias, activation=activation)) if gParameters['dropout']: layers.append(Dropout(keep=(1 - gParameters['dropout']))) layers.append( Affine(nout=output_dim, init=initializer_weights, bias=initializer_bias, activation=activation)) # Build MLP model mlp = Model(layers=layers) # Define cost and optimizer cost = GeneralizedCost(p1_common_neon.get_function(gParameters['loss'])()) optimizer = p1_common_neon.build_optimizer(gParameters['optimizer'], gParameters['learning_rate'], kerasDefaults) callbacks = Callbacks(mlp, eval_set=val, metric=Accuracy(), eval_freq=1) # Seed random generator for training np.random.seed(seed) mlp.fit(train, optimizer=optimizer, num_epochs=gParameters['epochs'], cost=cost, callbacks=callbacks) # model save #save_fname = "model_mlp_W_" + ext #mlp.save_params(save_fname) # Evalute model on test set print('Model evaluation by neon: ', mlp.eval(test, metric=Accuracy())) y_pred = mlp.get_outputs(test) #print ("Shape y_pred: ", y_pred.shape) scores = p1b2.evaluate_accuracy(p1_common.convert_to_class(y_pred), y_test) print('Evaluation on test data:', scores)
# hyperparameters num_epochs = args.epochs (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) valid_set = ArrayIterator([X_test, X_test], y_test, nclass=nclass, lshape=(1, 28, 28)) # weight initialization init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), Affine(nout=100, init=init_norm, activation=Rectlin())]) path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), Affine(nout=100, init=init_norm, activation=Rectlin())]) layers = [MergeMultistream(layers=[path1, path2], merge="stack"), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) # fit and validate optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) model.fit(train_set, cost=cost, optimizer=optimizer, num_epochs=num_epochs, callbacks=callbacks)
layers = [Conv((3, 3, 32), init=init_uni, activation=Rectlin(), batch_norm=False), Conv((3,3,32),init=init_uni,activation=Rectlin(), batch_norm=False), Pooling((2, 2)), Dropout(keep=0.75), Conv((3, 3, 64), init=init_uni, activation=Rectlin(), batch_norm=False), Conv((3, 3, 64), init=init_uni, activation=Rectlin(), batch_norm=False), Pooling((2, 2)), Dropout(keep=0.75), Affine(nout=512, init=init_uni, activation=Rectlin(), batch_norm=False), Dropout(keep=0.5), Affine(nout=10, init=init_uni, activation=Softmax())] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) # configure callbacks callbacks = Callbacks(mlp, eval_set=test, **args.callback_args) pretrainedModel = DeepCascadeLearning(layers,X_train,y_train,callbacks) mlp.fit(train, optimizer=opt_gdm, num_epochs=5, cost=cost, callbacks=callbacks) newLayers = list() for i in mlp.layers.layers: newLayers.append(i) newLayers = Model(newLayers) callbacks = Callbacks(newLayers, eval_set=test, **args.callback_args) newLayers.fit(train, optimizer=opt_gdm, num_epochs=5, cost=cost, callbacks=callbacks) print 'Misclassification error = %.1f%%' % (mlp.eval(test, metric=Misclassification())*100)
Affine(nout=16, linear_name="b1_l1", **normrelu), Affine(nout=10, linear_name="b1_l2", **normsigm)] p3 = [b2, Affine(nout=16, linear_name="b2_l1", **normrelu), Affine(nout=10, linear_name="b2_l2", **normsigm)] # setup cost function as CrossEntropy cost = Multicost(costs=[GeneralizedCost(costfunc=CrossEntropyMulti()), GeneralizedCost(costfunc=CrossEntropyBinary()), GeneralizedCost(costfunc=CrossEntropyBinary())], weights=[1, 0., 0.]) # setup optimizer optimizer = GradientDescentMomentum(0.1, momentum_coef=0.9, stochastic_round=args.rounding) # initialize model object alphas = [1, 0.25, 0.25] mlp = Model(layers=Tree([p1, p2, p3], alphas=alphas)) # setup standard fit callbacks callbacks = Callbacks(mlp, train_set, eval_set=valid_set, **args.callback_args) # run fit mlp.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) logging.getLogger('neon').info("Misclassification error = %.1f%%", (mlp.eval(valid_set, metric=Misclassification())*100)) print('Misclassification error = %.1f%%' % (mlp.eval(valid_set, metric=Misclassification())*100))
# Model construction story_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] query_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] layers = [ MergeMultistream(layers=[story_path, query_path], merge="stack"), Affine(babi.vocab_size, init=GlorotUniform(), activation=Softmax()) ] model = Model(layers=layers) # setup callbacks callbacks = Callbacks(model, train_set, eval_set=valid_set, **args.callback_args) # train model model.fit(train_set, optimizer=Adam(), num_epochs=args.epochs, cost=GeneralizedCost(costfunc=CrossEntropyMulti()), callbacks=callbacks) # output accuracies print('Train Accuracy = %.1f%%' % (model.eval(train_set, metric=Accuracy()) * 100)) print('Test Accuracy = %.1f%%' % (model.eval(valid_set, metric=Accuracy()) * 100))
layers.append(Conv((3, 3, 384), init=init_uni, activation=relu, strides=1, padding=1)) layers.append(Conv((1, 1, 384), init=init_uni, activation=relu, strides=1)) layers.append(Conv((3, 3, 384), init=init_uni, activation=relu, strides=2, padding=1)) # 12->6 layers.append(Dropout(keep=0.5)) layers.append(Conv((3, 3, 1024), init=init_uni, activation=relu, strides=1, padding=1)) layers.append(Conv((1, 1, 1024), init=init_uni, activation=relu, strides=1)) layers.append(Conv((1, 1, 1000), init=init_uni, activation=relu, strides=1)) layers.append(Pooling(6, op='avg')) layers.append(Activation(Softmax())) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file mlp.load_weights(args.model_file) # configure callbacks callbacks = Callbacks(mlp, train, eval_set=test, **args.callback_args) if args.deconv: callbacks.add_deconv_callback(train, test) mlp.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks) test.exit_batch_provider() train.exit_batch_provider()
from neon.optimizers import RMSProp, GradientDescentMomentum optimizer = RMSProp() # progress bar from neon.callbacks.callbacks import Callbacks, MetricCallback, EarlyStopCallback, SerializeModelCallback, DummyClass, CollectWeightsCallback # callbacks = Callbacks(model = mlp , eval_set = train, eval_freq = 1, output_file = "model_stats/some_data_1") callbacks = Callbacks(mlp, eval_set=TRAIN, eval_freq=1) #callbacks.add_callback(SerializeModelCallback('right_here.data',1,10)) #callbacks.add_callback(CollectWeightsCallback('weight_history')) callbacks.add_early_stop_callback(slow_change) # callbacks.add_callback(MetricCallback(train,metric = LogLoss())) # callbacks.add_callback(MetricCallback(eval_set=train, metric=LogLoss(), epoch_freq=1)) # callbacks.add_callback(LossCallback(eval_set=test, epoch_freq=1)) # fit the model mlp.fit(TRAIN, optimizer=optimizer, num_epochs=100000000, cost=cost, callbacks=callbacks) # what is this args.epochs business? # test the model on test set results = mlp.get_outputs(TRAIN) # evaluate the model on test_set using the log loss metric train_error = mlp.eval(TRAIN, metric=LogLoss()) average_train_cost += train_error #test_error = mlp.eval(test, metric=LogLoss()) #average_test_cost += test_error all_train_costs.append(train_error) #all_test_costs.append(test_error) print('Train Log Loss = %f' % train_error) #print('Test Log Loss = %f' % test_error) print('Average Train Log Loss = %f' % average_train_cost)