def test_fw_bw_no_cost_or_optimizer(): model = Model(test_layers) model.initialize(test_dataset) b = Benchmark(model=model) with pytest.raises(RuntimeError): b.time(test_dataset, niterations=1)
def __init__(self, num_actions, args): # remember parameters self.num_actions = num_actions self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.history_length = args.history_length self.screen_dim = (args.screen_height, args.screen_width) self.clip_error = args.clip_error self.min_reward = args.min_reward self.max_reward = args.max_reward self.batch_norm = args.batch_norm # create Neon backend self.be = gen_backend(backend=args.backend, batch_size=args.batch_size, rng_seed=args.random_seed, device_id=args.device_id, datatype=np.dtype(args.datatype).type, stochastic_round=args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.history_length, ) + self.screen_dim + ( self.batch_size, ) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model layers = self._createLayers(num_actions) self.model = Model(layers=layers) self.cost = GeneralizedCost(costfunc=SumSquared()) # Bug fix for l in self.model.layers.layers: l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], self.cost) if args.optimizer == 'rmsprop': self.optimizer = RMSProp(learning_rate=args.learning_rate, decay_rate=args.decay_rate, stochastic_round=args.stochastic_round) elif args.optimizer == 'adam': self.optimizer = Adam(learning_rate=args.learning_rate, stochastic_round=args.stochastic_round) elif args.optimizer == 'adadelta': self.optimizer = Adadelta(decay=args.decay_rate, stochastic_round=args.stochastic_round) else: assert false, "Unknown optimizer" # create target model self.target_steps = args.target_steps self.train_iterations = 0 if self.target_steps: self.target_model = Model(layers=self._createLayers(num_actions)) # Bug fix for l in self.target_model.layers.layers: l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) self.save_weights_prefix = args.save_weights_prefix else: self.target_model = self.model self.callback = None
be = gen_backend(**extract_valid_args(args, gen_backend)) # Setup dataloader eval_manifest = args.manifest['val'] if not os.path.exists(eval_manifest): raise IOError("Manifest file {} not found".format(eval_manifest)) # Setup required dataloader parameters nbands = 13 max_utt_len = 30 max_tscrpt_len = 1300 eval_set = make_loader(eval_manifest, alphabet, nbands, max_tscrpt_len, max_utt_len, backend_obj=be) # Load the model model = Model(args.model_file) # Process data and compute stats wer, sample_size, results = get_wer(model, be, eval_set, argmax_decoder, nout, use_wer=args.use_wer, print_examples=args.print_examples) print("\n" + "-" * 80) if args.use_wer: print("wer = {}".format(wer)) else: print("cer = {}".format(wer)) print("-" * 80 + "\n") if args.inference_file: # Save results in args.inference_file with open(args.inference_file, 'wb') as f:
if (args.rng_seed is None): args.rng_seed = 16 print('Batch size = {}'.format(args.batch_size)) # setup backend be = gen_backend(**extract_valid_args(args, gen_backend)) test_set = HDF5Iterator(testFileName) model_filename = 'LUNA16_CADIMI_subset{}.prm'.format(subset) print('Using model: {}'.format(model_filename)) lunaModel = Model(model_filename) prob, target = lunaModel.get_outputs(test_set, return_targets=True) np.set_printoptions(precision=3, suppress=True) from sklearn.metrics import classification_report, roc_auc_score, average_precision_score from sklearn.metrics import precision_recall_curve, log_loss, confusion_matrix # precision, recall, thresholds = precision_recall_curve(target, prob) print('Average precision = {}'.format( average_precision_score(target, prob[:, 1], average='weighted'))) print( classification_report(target,
def main(): parser = NeonArgparser(__doc__) args = parser.parse_args(gen_be=False) #mat_data = sio.loadmat('../data/timeseries/02_timeseries.mat') #ts = V1TimeSeries(mat_data['timeseries'], mat_data['stim'], binning=10) seq_len = 30 hidden = 20 be = gen_backend(**extract_valid_args(args, gen_backend)) kohn = KohnV1Dataset(path='../tmp/') kohn.gen_iterators(seq_len) import pdb; pdb.set_trace() train_spike_set = V1IteratorSequence(ts.train, seq_len, return_sequences=False) valid_spike_set = V1IteratorSequence(ts.test, seq_len, return_sequences=False) init = GlorotUniform() # dataset = MNIST(path=args.data_dir) # (X_train, y_train), (X_test, y_test), nclass = dataset.load_data() # train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) # valid_set = ArrayIterator([X_test, X_test], y_test, nclass=nclass, lshape=(1, 28, 28)) # # weight initialization # init_norm = Gaussian(loc=0.0, scale=0.01) # # initialize model # path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), # Affine(nout=100, init=init_norm, activation=Rectlin())]) # path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), # Affine(nout=100, init=init_norm, activation=Rectlin())]) # layers = [MergeMultistream(layers=[path1, path2], merge="stack"), # Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] spike_rnn_path = Sequential( layers = [ LSTM(hidden, init, activation=Logistic(), gate_activation=Logistic(), reset_cells=False), Dropout(keep=0.5), LSTM(hidden, init, activation=Logistic(), gate_activation=Logistic(), reset_cells=False), #Dropout(keep=0.85), RecurrentLast(), Affine(train_set.nfeatures, init, bias=init, activation=Identity(), name='spike_in')]) stim_rnn_path = Sequential( layers = [ LSTM(hidden, init, activation=Logistic(), gate_activation=Logistic(), reset_cells=False), Dropout(keep=0.5), RecurrentLast(), Affine(1, init, bias=init, activation=Identity(), name='stim')]) layers = [ MergeMultiStream( layers = [ spike_rnn_path, stim_rnn_path], merge="stack"), Affine(train_set.nfeatures, init, bias=init, activation=Identity(), name='spike_out'), Round() ] model = Model(layers=layers) sched = ExpSchedule(decay=0.7) # cost = GeneralizedCost(SumSquared()) cost = GeneralizedCost(MeanSquared()) optimizer_two = RMSProp(stochastic_round=args.rounding) optimizer_one = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9, schedule=sched) opt = MultiOptimizer({'default': optimizer_one, 'Bias': optimizer_two, 'special_linear': optimizer_two}) callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) callbacks.add_hist_callback(filter_key = ['W']) #callbacks.add_callback(MetricCallback(eval_set=valid_set, metric=FractionExplainedVariance(), epoch_freq=args.eval_freq)) #callbacks.add_callback(MetricCallback(eval_set=valid_set,metric=Accuracy(), epoch_freq=args.eval_freq)) model.fit(train_set, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) train_output = model.get_outputs( train_set).reshape(-1, train_set.nfeatures) valid_output = model.get_outputs( valid_set).reshape(-1, valid_set.nfeatures) train_target = train_set.y_series valid_target = valid_set.y_series tfev = fev(train_output, train_target, train_set.mean) vfev = fev(valid_output, valid_target, valid_set.mean) neon_logger.display('Train FEV: %g, Valid FEV: %g' % (tfev, vfev)) # neon_logger.display('Train Mean: %g, Valid Mean: %g' % (train_set.mean, valid_set.mean)) plt.figure() plt.plot(train_output[:, 0], train_output[ :, 1], 'bo', label='prediction') plt.plot(train_target[:, 0], train_target[:, 1], 'r.', label='target') plt.legend() plt.title('Neon on training set') plt.savefig('neon_series_training_output.png') plt.figure() plt.plot(valid_output[:, 0], valid_output[ :, 1], 'bo', label='prediction') plt.plot(valid_target[:, 0], valid_target[:, 1], 'r.', label='target') plt.legend() plt.title('Neon on validation set') plt.savefig('neon_series_validation_output.png')
branch3 = [Conv((1, 1, p3[0]), **common), Conv((5, 5, p3[1]), **commonp2)] branch4 = [Pooling(op="max", **pool3s1p1), Conv((1, 1, p4[0]), **common)] return MergeBroadcast(layers=[branch1, branch2, branch3, branch4], merge="depth") model = Model(layers=[ Conv((7, 7, 64), padding=3, strides=2, **common), Pooling(**pool3s2p1), Conv((1, 1, 64), **common), Conv((3, 3, 192), **commonp1), Pooling(**pool3s2p1), inception([(64, ), (96, 128), (16, 32), (32, )]), inception([(128, ), (128, 192), (32, 96), (64, )]), Pooling(**pool3s2p1), inception([(192, ), (96, 208), (16, 48), (64, )]), inception([(160, ), (112, 224), (24, 64), (64, )]), inception([(128, ), (128, 256), (24, 64), (64, )]), inception([(112, ), (144, 288), (32, 64), (64, )]), inception([(256, ), (160, 320), (32, 128), (128, )]), Pooling(**pool3s2p1), inception([(256, ), (160, 320), (32, 128), (128, )]), inception([(384, ), (192, 384), (48, 128), (128, )]), Pooling(fshape=7, strides=1, op="avg"), Affine(nout=1000, init=init1) ]) weight_sched = Schedule([22, 44, 65], (1 / 250.)**(1 / 3.)) opt_gdm = GradientDescentMomentum(0.01, 0.0, wdecay=0.0005, schedule=weight_sched)
GeneralizedCost(costfunc=CrossEntropyMulti()), GeneralizedCost(costfunc=CrossEntropyMulti()), GeneralizedCost(costfunc=CrossEntropyMulti()) ], weights=[1, 0., 0.]) # We only want to consider the CE of the main path if not args.resume: # build the model from scratch and run it # Now construct the model branch_nodes = [BranchNode(name='branch' + str(i)) for i in range(2)] main1 = main_branch(branch_nodes) aux1 = aux_branch(branch_nodes[0], ind=1) aux2 = aux_branch(branch_nodes[1], ind=2) model = Model(layers=Tree([main1, aux1, aux2], alphas=[1.0, 0.3, 0.3])) else: # load up the save model model = Model('serialize_test_2.pkl') model.initialize(train, cost=cost) # configure callbacks callbacks = Callbacks(model, progress_bar=True, output_file='temp1.h5', serialize=1, history=3, save_path='serialize_test.pkl') lr_sched = PolySchedule(total_epochs=10, power=0.5)
] # setup cost function as Square Hinge Loss cost = GeneralizedCost(costfunc=SquareHingeLoss()) # setup optimizer LR_start = 1.65e-2 def ShiftAdaMax_with_Scale(LR=1): return ShiftAdaMax(learning_rate=LR_start * LR, schedule=ShiftSchedule(2, shift_size=1)) optimizer = MultiOptimizer({ 'default': ShiftAdaMax_with_Scale(), 'BinaryLinear_0': ShiftAdaMax_with_Scale(57.038), 'BinaryLinear_1': ShiftAdaMax_with_Scale(73.9008), 'BinaryLinear_2': ShiftAdaMax_with_Scale(73.9008), 'BinaryLinear_3': ShiftAdaMax_with_Scale(52.3195) }) # initialize model object bnn = Model(layers=layers) # configure callbacks callbacks = Callbacks(bnn, eval_set=valid_set, **args.callback_args) # run fit bnn.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) print('Misclassification error = %.1f%%' % (bnn.eval(valid_set, metric=Misclassification())*100))
be = gen_backend(**extract_valid_args(args, gen_backend)) # Set up the testset to load via aeon image_config = dict(height=64, width=64, channels=3) label_config = dict(binary=False) config = dict(type="image,label", image=image_config, label=label_config, manifest_filename=testFileName, minibatch_size=args.batch_size, subset_fraction=1, cache_directory='') test_set = DataLoader(config, be) test_set = TypeCast(test_set, index=0, dtype=np.float32) # cast image to float lunaModel = Model('LUNA16_VGG_model_no_batch_sigmoid_pretrained.prm') def round(arr, threshold=0.5): ''' Round to an arbitrary threshold. Above threshold goes to 1. Below goes 0. ''' out = np.zeros(np.shape(arr)) out[np.where(arr > threshold)[0]] = 1 out[np.where(arr <= threshold)[0]] = 0 return out
Conv((3, 3, 192), **convp1), Conv((1, 1, 192), **conv), Conv((1, 1, 16), **conv), Pooling(8, op="avg"), Affine(nout=1024, init=init_uni, activation=relu), Affine(nout=512, init=init_uni, activation=relu), Dropout(keep=.5), Affine(nout=128, init=init_uni, activation=relu), Dropout(keep=.4), Affine(nout=64, init=init_uni, activation=relu), Affine(nout=2, init=init_uni, activation=Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) lunaModel = Model(layers=layers) if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file lunaModel.load_params(args.model_file) # configure callbacks #callbacks = Callbacks(lunaModel, eval_set=valid_set, **args.callback_args) callbacks = Callbacks(lunaModel, eval_set=valid_set, metric=Misclassification(), **args.callback_args) if args.deconv: callbacks.add_deconv_callback(train_set, valid_set)
decoder1 = Affine(nout=image_size, init=init_norm, activation=Logistic(), name='decoder1') encoder2 = Affine(nout=config.encoder_size[1], init=init_norm, activation=Logistic(), name='encoder2') decoder2 = Affine(nout=config.encoder_size[0], init=init_norm, activation=Logistic(), name='decoder2') encoder3 = Affine(nout=config.encoder_size[2], init=init_norm, activation=Logistic(), name='encoder3') decoder3 = Affine(nout=config.encoder_size[1], init=init_norm, activation=Logistic(), name='decoder3') classifier = Affine(nout=config.ydim, init=init_norm, activation=Softmax()) cost_reconst = GeneralizedCost(costfunc=SumSquared()) cost_classification = GeneralizedCost(costfunc=CrossEntropyMulti()) # Setting model layers for AE1 AE1 = Model([encoder1, decoder1]) AE1.cost = cost_reconst AE1.initialize(data, cost_reconst) # AE1.optimizer = optimizer_default measure_time(data, AE1, config, 'AE1') # Setting model layers for AE2 # It has an extra encoder layer compared to what AE should really be. This is # done to avoid saving the outputs for each AE. AE2_mimic = Model([encoder1, encoder2, decoder2]) AE2_mimic.cost = cost_reconst AE2_mimic.initialize(data, cost_reconst) # Learning rates for extra layers that should not be updated are set to zero. # opt = MultiOptimizer({'default': optimizer_default, # 'encoder1': optimizer_helper}) # AE2_mimic.optimizer = opt
X_val = np.hstack([X_val_info, hit_flat]) mu = np.mean(X_train, axis=0).reshape(1, -1) s = np.std(X_train, axis=0).reshape(1, -1) s[s == 0] = 1 X_train = (X_train - mu) / s X_val = (X_val - mu) / s # X = np.random.rand(10000, 100) # y = np.sum(X, axis=1).reshape(-1, 1) # y = np.hstack([y, y]) train_set = ArrayIterator(X=X_train, y=y_train, make_onehot=False) val_set = ArrayIterator(X=X_val, y=y_val, make_onehot=False) print("!!! TEST STARTED !!!") for bs in [8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]: be = gen_backend('gpu', batch_size=bs) bnn = Model("bin_model/final_model.prm") tries = 3 t_start = time.time() for i in range(tries): out = bnn.get_outputs(train_set) t_end = time.time() tot_time = t_end - t_start n = X_train.shape[0] fps = tries * n / tot_time print("fps {}; batch size {}; time elapsed: {}".format(fps, bs, tot_time))
def create_objects(root_yaml, be_type='gpu', batch_size=128, rng_seed=None, device_id=0, default_dtype=np.float32, stochastic_rounding=False): """ Instantiate objects as per the given specifications. Arguments: root_yaml (dict): Model definition dictionary parse from YAML file be_type (str): backend either 'gpu', 'mgpu' or 'cpu' batch_size (int): Batch size. rng_seed (None or int): random number generator seed device_id (int): for GPU backends id of device to use default_dtype (type): numpy data format for default data types, stochastic_rounding (bool or int): number of bits for stochastic rounding use False for no rounding Returns: tuple: Contains model, cost and optimizer objects. """ assert NervanaObject.be is not None, 'Must generate a backend before running this function' # can give filename or parse dictionary if type(root_yaml) is str: with open(root_yaml, 'r') as fid: root_yaml = yaml.safe_load(fid.read()) # in case references were used root_yaml = deepcopy(root_yaml) # initialize layers yaml_layers = root_yaml['layers'] # currently only support sequential in yaml layer_dict = {'layers': yaml_layers} layers = Sequential.gen_class(layer_dict) # initialize model model = Model(layers=layers) # cost (before layers for shortcut derivs) cost_name = root_yaml['cost'] cost = GeneralizedCost.gen_class({'costfunc': {'type': cost_name}}) # create optimizer opt = None if 'optimizer' in root_yaml: yaml_opt = root_yaml['optimizer'] typ = yaml_opt['type'] opt = getattr(neon.optimizers, typ).gen_class(yaml_opt['config']) return model, cost, opt
window_size = 64 # Set up the testset to load via aeon image_config = dict(height=window_size, width=window_size, channels=3) label_config = dict(binary=False) config = dict(type="image,label", image=image_config, label=label_config, manifest_filename=testFileName, minibatch_size=args.batch_size, subset_fraction=1, cache_directory='') test_set = DataLoader(config, be) test_set = TypeCast(test_set, index=0, dtype=np.float32) # cast image to float lunaModel = Model('LUNA16_resnet.prm') def round(arr, threshold=0.5): ''' Round to an arbitrary threshold. Above threshold goes to 1. Below goes 0. ''' out = np.zeros(np.shape(arr)) out[np.where(arr > threshold)[0]] = 1 out[np.where(arr <= threshold)[0]] = 0 return out
init=init_emb, pad_idx=0, update=True), LSTM(hidden_size, init_glorot, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentSum(), Dropout(keep=0.5), Affine(nclass, init_glorot, bias=init_glorot, activation=Softmax()) ] # load the weights print("Initialized the models - ") model_new = Model(layers=layers) print("Loading the weights from {0}".format(args.model_weights)) model_new.load_params(args.model_weights) model_new.initialize(dataset=(sentence_length, batch_size)) # setup buffers before accepting reviews xdev = be.zeros((sentence_length, 1), dtype=np.int32) # bsz is 1, feature size xbuf = np.zeros((1, sentence_length), dtype=np.int32) oov = 2 start = 1 index_from = 3 pad_char = 0 vocab, rev_vocab = pickle.load(open(args.vocab_file, 'rb')) while True:
bb_layers = [ b1, bbox_pred, ] # setup optimizer opt_w = GradientDescentMomentum(0.001 * learning_rate_scale, 0.9, wdecay=0.0005) opt_b = GradientDescentMomentum(0.002 * learning_rate_scale, 0.9) optimizer = MultiOptimizer({'default': opt_w, 'Bias': opt_b}) # setup model model = Model(layers=Tree([frcn_layers, bb_layers])) # if training a new model, seed the Alexnet conv layers with pre-trained weights # otherwise, just load the model file if args.model_file is None: load_imagenet_weights(model, args.data_dir) cost = Multicost(costs=[ GeneralizedCost(costfunc=CrossEntropyMulti()), GeneralizedCostMask(costfunc=SmoothL1Loss()) ], weights=[1, 1]) callbacks = Callbacks(model, **args.callback_args) model.fit(train_set,
def main(): # Get command-line parameters parser = get_p1b1_parser() args = parser.parse_args() #print('Args:', args) # Get parameters from configuration file fileParameters = p1b1.read_config_file(args.config_file) #print ('Params:', fileParameters) # Correct for arguments set by default by neon parser # (i.e. instead of taking the neon parser default value fall back to the config file, # if effectively the command-line was used, then use the command-line value) # This applies to conflictive parameters: batch_size, epochs and rng_seed if not any("--batch_size" in ag or "-z" in ag for ag in sys.argv): args.batch_size = fileParameters['batch_size'] if not any("--epochs" in ag or "-e" in ag for ag in sys.argv): args.epochs = fileParameters['epochs'] if not any("--rng_seed" in ag or "-r" in ag for ag in sys.argv): args.rng_seed = fileParameters['rng_seed'] # Consolidate parameter set. Command-line parameters overwrite file configuration gParameters = p1_common.args_overwrite_config(args, fileParameters) print('Params:', gParameters) # Determine verbosity level loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') # Construct extension to save model ext = p1b1.extension_from_parameters(gParameters, '.neon') # Get default parameters for initialization and optimizer functions kerasDefaults = p1_common.keras_default_config() seed = gParameters['rng_seed'] # Load dataset X_train, X_val, X_test = p1b1.load_data(gParameters, seed) print("Shape X_train: ", X_train.shape) print("Shape X_val: ", X_val.shape) print("Shape X_test: ", X_test.shape) print("Range X_train --> Min: ", np.min(X_train), ", max: ", np.max(X_train)) print("Range X_val --> Min: ", np.min(X_val), ", max: ", np.max(X_val)) print("Range X_test --> Min: ", np.min(X_test), ", max: ", np.max(X_test)) input_dim = X_train.shape[1] output_dim = input_dim # Re-generate the backend after consolidating parsing and file config gen_backend(backend=args.backend, rng_seed=seed, device_id=args.device_id, batch_size=gParameters['batch_size'], datatype=gParameters['datatype'], max_devices=args.max_devices, compat_mode=args.compat_mode) # Set input and target to X_train train = ArrayIterator(X_train) val = ArrayIterator(X_val) test = ArrayIterator(X_test) # Initialize weights and learning rule initializer_weights = p1_common_neon.build_initializer( gParameters['initialization'], kerasDefaults) initializer_bias = p1_common_neon.build_initializer( 'constant', kerasDefaults, 0.) activation = p1_common_neon.get_function(gParameters['activation'])() # Define Autoencoder architecture layers = [] reshape = None # Autoencoder layers_params = gParameters['dense'] if layers_params != None: if type(layers_params) != list: layers_params = list(layers_params) # Encoder Part for i, l in enumerate(layers_params): layers.append( Affine(nout=l, init=initializer_weights, bias=initializer_bias, activation=activation)) # Decoder Part for i, l in reversed(list(enumerate(layers_params))): if i < len(layers) - 1: layers.append( Affine(nout=l, init=initializer_weights, bias=initializer_bias, activation=activation)) layers.append( Affine(nout=output_dim, init=initializer_weights, bias=initializer_bias, activation=activation)) # Build Autoencoder model ae = Model(layers=layers) # Define cost and optimizer cost = GeneralizedCost(p1_common_neon.get_function(gParameters['loss'])()) optimizer = p1_common_neon.build_optimizer(gParameters['optimizer'], gParameters['learning_rate'], kerasDefaults) callbacks = Callbacks(ae, eval_set=val, eval_freq=1) # Seed random generator for training np.random.seed(seed) ae.fit(train, optimizer=optimizer, num_epochs=gParameters['epochs'], cost=cost, callbacks=callbacks) # model save #save_fname = "model_ae_W" + ext #ae.save_params(save_fname) # Compute errors X_pred = ae.get_outputs(test) scores = p1b1.evaluate_autoencoder(X_pred, X_test) print('Evaluation on test data:', scores) diff = X_pred - X_test # Plot histogram of errors comparing input and output of autoencoder plt.hist(diff.ravel(), bins='auto') plt.title("Histogram of Errors with 'auto' bins") plt.savefig('histogram_neon.png')
text_name='report_valid', nwords=vocab_size_layer, max_len=args.max_len_w, index_from=index_from) skip = SkipThought(vocab_size_layer, embed_dim, init_embed_dev, nhidden, rec_layer=GRU, init_rec=Orthonormal(), activ_rec=Tanh(), activ_rec_gate=Logistic(), init_ff=Uniform(low=-0.1, high=0.1), init_const=Constant(0.0)) model = Model(skip) if args.model_file and os.path.isfile(args.model_file): neon_logger.display("Loading saved weights from: {}".format( args.model_file)) model_dict = load_obj(args.model_file) model.deserialize(model_dict, load_states=True) elif args.model_file: neon_logger.display( "Unable to find model file {}, restarting training.".format( args.model_file)) cost = Multicost(costs=[ GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)), GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)) ],
def create_model(args, hyper_params): # setup layers imagenet_layers = [ Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), # The following layers are used in Alexnet, but are not used in the new model Dropout(keep=0.5), # Affine(nout=1000, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()) ] target_layers = imagenet_layers + [ Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=21, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax()) ] # setup optimizer opt = GradientDescentMomentum(hyper_params.learning_rate_scale, hyper_params.momentum, wdecay=0.0005, schedule=hyper_params.learning_rate_sched) # setup model if args.model_file: model = Model(layers=args.model_file) else: model = Model(layers=target_layers) return model, opt
def __init__(self, sentence_length, token_vocab_size, pos_vocab_size=None, char_vocab_size=None, max_char_word_length=20, token_embedding_size=None, pos_embedding_size=None, char_embedding_size=None, num_labels=None, lstm_hidden_size=100, num_lstm_layers=1, use_external_embedding=None, dropout=0.5): init = GlorotUniform() tokens = [] if use_external_embedding is None: tokens.append( LookupTable(vocab_size=token_vocab_size, embedding_dim=token_embedding_size, init=init, pad_idx=0)) else: tokens.append(DataInput()) tokens.append(Reshape((-1, sentence_length))) f_layers = [tokens] # add POS tag input if pos_vocab_size is not None and pos_embedding_size is not None: f_layers.append([ LookupTable(vocab_size=pos_vocab_size, embedding_dim=pos_embedding_size, init=init, pad_idx=0), Reshape((-1, sentence_length)) ]) # add Character RNN input if char_vocab_size is not None and char_embedding_size is not None: char_lut_layer = LookupTable(vocab_size=char_vocab_size, embedding_dim=char_embedding_size, init=init, pad_idx=0) char_nn = [ char_lut_layer, TimeDistBiLSTM(char_embedding_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True, reset_freq=max_char_word_length), TimeDistributedRecurrentLast(timesteps=max_char_word_length), Reshape((-1, sentence_length)) ] f_layers.append(char_nn) layers = [] if len(f_layers) == 1: layers.append(f_layers[0][0]) else: layers.append(MergeMultistream(layers=f_layers, merge="stack")) layers.append(Reshape((-1, sentence_length))) layers += [ DeepBiLSTM(lstm_hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True, depth=num_lstm_layers), Dropout(keep=dropout), Affine(num_labels, init, bias=init, activation=Softmax()) ] self._model = Model(layers=layers)
opt_vgg = GradientDescentMomentum(0.001, 0.9, wdecay=0.0005) opt_class_layer = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005) # also define optimizers for the bias layers, which have a different learning rate # and not weight decay. opt_bias = GradientDescentMomentum(0.002, 0.9) opt_bias_class = GradientDescentMomentum(0.02, 0.9) # set up the mapping of layers to optimizers opt = MultiOptimizer({'default': opt_vgg, 'Bias': opt_bias, 'class_layer': opt_class_layer, 'class_layer_bias': opt_bias_class}) # use cross-entropy cost to train the network cost = GeneralizedCost(costfunc=CrossEntropyMulti()) lunaModel = Model(layers=vgg_layers) if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file lunaModel.load_params(args.model_file) # configure callbacks #callbacks = Callbacks(lunaModel, eval_set=valid_set, **args.callback_args) callbacks = Callbacks(lunaModel, eval_set=valid_set, metric=Misclassification(), **args.callback_args) if args.deconv: callbacks.add_deconv_callback(train_set, valid_set) lunaModel.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks)
def test_conv_rnn(backend_default): train_shape = (1, 17, 142) be = NervanaObject.be inp = be.array(be.rng.randn(np.prod(train_shape), be.bsz)) delta = be.array(be.rng.randn(10, be.bsz)) init_norm = Gaussian(loc=0.0, scale=0.01) bilstm = DeepBiLSTM(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), depth=1, reset_cells=True) birnn_1 = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=False) birnn_2 = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=2, reset_cells=True, batch_norm=False) bibnrnn = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=True) birnnsum = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=False, bi_sum=True) rnn = Recurrent(128, init=init_norm, activation=Rectlin(), reset_cells=True) lstm = LSTM(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), reset_cells=True) gru = GRU(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), reset_cells=True) rlayers = [bilstm, birnn_1, birnn_2, bibnrnn, birnnsum, rnn, lstm, gru] for rl in rlayers: layers = [ Conv((2, 2, 4), init=init_norm, activation=Rectlin(), strides=dict(str_h=2, str_w=4)), Pooling(2, strides=2), Conv((3, 3, 4), init=init_norm, batch_norm=True, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), rl, RecurrentMean(), Affine(nout=10, init=init_norm, activation=Rectlin()), ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(train_shape, cost) model.fprop(inp) model.bprop(delta)
padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()), Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()), Affine(nout=1000, init=Gaussian(scale=0.01), activation=Softmax()) ] weight_sched = Schedule([22, 44, 65], (1 / 250.)**(1 / 3.)) opt_gdm = GradientDescentMomentum(0.01, 0.0, wdecay=0.0005, schedule=weight_sched) opt = MultiOptimizer({'default': opt_gdm}) model = Model(layers=layers, optimizer=opt) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.initialize(train, cost=cost) b = Benchmark(model) res = b.time(train, niterations=5) b.print_stats(res, nskip=2)
def test_model_serialize(backend_default, data): (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data) train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = Sequential([ Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ]) path2 = Sequential([ Affine(nout=100, init=init_norm, bias=Constant(0), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ]) layers = [ MergeMultistream(layers=[path1, path2], merge="stack"), Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp.initialize(train_set, cost=mlp.cost) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model mlp.save_params(tmp_save, keep_states=True) # Load model mlp = Model(tmp_save) mlp.initialize(train_set) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert np.allclose(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert np.allclose(_s, _s_e) else: assert np.allclose(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): assert type(p) == type(p_e) if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert np.allclose(_p, _p_e) elif isinstance(p, np.ndarray): assert np.allclose(p, p_e) else: assert p == p_e os.remove(tmp_save)
train_set = DataIterator(X_train, y_train, nclass=nclass) valid_set = DataIterator(X_test, y_test, nclass=nclass) # weight initialization init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model layers = [] layers.append(Affine(nout=100, init=init_norm, bias=Constant(0), activation=Rectlin())) layers.append(Affine(nout=10, init=init_norm, bias=Constant(0), activation=Logistic(shortcut=True), linear_name='special_linear')) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp = Model(layers=layers) # fit and validate optimizer_one = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) optimizer_two = RMSProp() # all bias layers and the last linear layer will use # optimizer_two. all other layers will use optimizer_one. opt = MultiOptimizer({'default': optimizer_one, 'Bias': optimizer_two, 'special_linear': optimizer_two}) # configure callbacks callbacks = Callbacks(mlp, train_set, args, eval_set=valid_set) mlp.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks)
def main(): # Get command-line parameters parser = get_p1b2_parser() args = parser.parse_args() #print('Args:', args) # Get parameters from configuration file fileParameters = p1b2.read_config_file(args.config_file) #print ('Params:', fileParameters) # Correct for arguments set by default by neon parser # (i.e. instead of taking the neon parser default value fall back to the config file, # if effectively the command-line was used, then use the command-line value) # This applies to conflictive parameters: batch_size, epochs and rng_seed if not any("--batch_size" in ag or "-z" in ag for ag in sys.argv): args.batch_size = fileParameters['batch_size'] if not any("--epochs" in ag or "-e" in ag for ag in sys.argv): args.epochs = fileParameters['epochs'] if not any("--rng_seed" in ag or "-r" in ag for ag in sys.argv): args.rng_seed = fileParameters['rng_seed'] # Consolidate parameter set. Command-line parameters overwrite file configuration gParameters = p1_common.args_overwrite_config(args, fileParameters) print('Params:', gParameters) # Determine verbosity level loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') # Construct extension to save model ext = p1b2.extension_from_parameters(gParameters, '.neon') # Get default parameters for initialization and optimizer functions kerasDefaults = p1_common.keras_default_config() seed = gParameters['rng_seed'] # Load dataset #(X_train, y_train), (X_test, y_test) = p1b2.load_data(gParameters, seed) (X_train, y_train), (X_val, y_val), (X_test, y_test) = p1b2.load_data(gParameters, seed) print("Shape X_train: ", X_train.shape) print("Shape X_val: ", X_val.shape) print("Shape X_test: ", X_test.shape) print("Shape y_train: ", y_train.shape) print("Shape y_val: ", y_val.shape) print("Shape y_test: ", y_test.shape) print("Range X_train --> Min: ", np.min(X_train), ", max: ", np.max(X_train)) print("Range X_val --> Min: ", np.min(X_val), ", max: ", np.max(X_val)) print("Range X_test --> Min: ", np.min(X_test), ", max: ", np.max(X_test)) print("Range y_train --> Min: ", np.min(y_train), ", max: ", np.max(y_train)) print("Range y_val --> Min: ", np.min(y_val), ", max: ", np.max(y_val)) print("Range y_test --> Min: ", np.min(y_test), ", max: ", np.max(y_test)) input_dim = X_train.shape[1] num_classes = int(np.max(y_train)) + 1 output_dim = num_classes # The backend will represent the classes using one-hot representation (but requires an integer class as input !) # Re-generate the backend after consolidating parsing and file config gen_backend(backend=args.backend, rng_seed=seed, device_id=args.device_id, batch_size=gParameters['batch_size'], datatype=gParameters['data_type'], max_devices=args.max_devices, compat_mode=args.compat_mode) train = ArrayIterator(X=X_train, y=y_train, nclass=num_classes) val = ArrayIterator(X=X_val, y=y_val, nclass=num_classes) test = ArrayIterator(X=X_test, y=y_test, nclass=num_classes) # Initialize weights and learning rule initializer_weights = p1_common_neon.build_initializer( gParameters['initialization'], kerasDefaults, seed) initializer_bias = p1_common_neon.build_initializer( 'constant', kerasDefaults, 0.) activation = p1_common_neon.get_function(gParameters['activation'])() # Define MLP architecture layers = [] reshape = None for layer in gParameters['dense']: if layer: layers.append( Affine(nout=layer, init=initializer_weights, bias=initializer_bias, activation=activation)) if gParameters['dropout']: layers.append(Dropout(keep=(1 - gParameters['dropout']))) layers.append( Affine(nout=output_dim, init=initializer_weights, bias=initializer_bias, activation=activation)) # Build MLP model mlp = Model(layers=layers) # Define cost and optimizer cost = GeneralizedCost(p1_common_neon.get_function(gParameters['loss'])()) optimizer = p1_common_neon.build_optimizer(gParameters['optimizer'], gParameters['learning_rate'], kerasDefaults) callbacks = Callbacks(mlp, eval_set=val, metric=Accuracy(), eval_freq=1) # Seed random generator for training np.random.seed(seed) mlp.fit(train, optimizer=optimizer, num_epochs=gParameters['epochs'], cost=cost, callbacks=callbacks) # model save #save_fname = "model_mlp_W_" + ext #mlp.save_params(save_fname) # Evalute model on test set print('Model evaluation by neon: ', mlp.eval(test, metric=Accuracy())) y_pred = mlp.get_outputs(test) #print ("Shape y_pred: ", y_pred.shape) scores = p1b2.evaluate_accuracy(p1_common.convert_to_class(y_pred), y_test) print('Evaluation on test data:', scores)
def load_sent_encoder(model_dict, expand_vocab=False, orig_vocab=None, w2v_vocab=None, w2v_path=None, use_recur_last=False): """ Custom function to load the model saved from skip-thought vector training and reconstruct another model just using the LUT and encoding layer for transfering sentence representations. Arguments: model_dict: saved s2v model dict expand_vocab: Bool to indicate if w2v vocab expansion should be attempted orig_vocab: If using expand_vocab, original vocabulary dict is needed for expansion w2v_vocab: If using expand_vocab, w2v vocab dict w2v_path: Path to trained w2v binary (GoogleNews) use_recur_last: If True a RecurrentLast layer is used as the final layer, if False a RecurrentSum layer is used as the last layer of the returned model. """ embed_dim = model_dict['model']['config']['embed_dim'] model_train = Model(model_dict) # RecurrentLast should be used for semantic similarity evaluation if use_recur_last: last_layer = RecurrentLast() else: last_layer = RecurrentSum() if expand_vocab: assert orig_vocab and w2v_vocab, ("All vocabs and w2v_path " + "need to be specified when using expand_vocab") neon_logger.display("Computing vocab expansion regression...") # Build inverse word dictionary (word -> index) word_idict = dict() for kk, vv in orig_vocab.items(): # Add 2 to the index to allow for padding and oov tokens as 0 and 1 word_idict[vv + 2] = kk word_idict[0] = '' word_idict[1] = 'UNK' # Create dictionary of word -> vec orig_word_vecs = get_embeddings(model_train.layers.layer_dict['lookupTable'], word_idict) # Load GooleNews w2v weights w2v_W, w2v_dim, _ = get_google_word2vec_W(w2v_path, w2v_vocab) # Compute the expanded vocab lookup table from a linear mapping of # words2vec into RNN word space init_embed = compute_vocab_expansion(orig_word_vecs, w2v_W, w2v_vocab, word_idict) init_embed_dev = model_train.be.array(init_embed) w2v_vocab_size = len(w2v_vocab) table = LookupTable(vocab_size=w2v_vocab_size, embedding_dim=embed_dim, init=init_embed_dev, pad_idx=0) model = Model(layers=[table, model_train.layers.layer_dict['encoder'], last_layer]) else: model = Model(layers=[model_train.layers.layer_dict['lookupTable'], model_train.layers.layer_dict['encoder'], last_layer]) return model
# start timer start_time = time.time() # INPUT: load the model model_name = raw_input('Specify file name and extension--> ') if model_name == '': model_name = 'imdb.p' # INPIT: how many to validate how_many_inputs = int(input('How many reviews to validate? (0 for all)--> ')) print '' # Load model including layers, parameters, and weights model = Model('[1]model/' + model_name) # initialize model model.initialize(dataset=(sentence_length, 1)) # CPU-only buffer input_numpy = np.zeros((sentence_length, 1), dtype=np.int32) correct_predictions = 0 neg_files = 0 total_filecount = 0 neg_predictions = 0 pos_predictions = 0 for file in os.listdir('[0]data/test/neg'): if file.endswith('.txt'):
Deconv(fshape=(4, 4, 8), init=init_uni, activation=Rectlin(), batch_norm=bn), Deconv(fshape=(3, 3, 8), init=init_uni, activation=Rectlin(), strides=2, batch_norm=bn), Deconv(fshape=(2, 2, 1), init=init_uni, strides=2, padding=1) ] # Define the cost cost = GeneralizedCost(costfunc=SumSquared()) model = Model(layers=layers) # configure callbacks callbacks = Callbacks(model, **args.callback_args) # Fit the model model.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks) # Plot the reconstructed digits try: from matplotlib import pyplot, cm fi = 0
def test_empty_dataset(): model = Model(test_layers) b = Benchmark(model=model) with pytest.raises(ValueError): b.time([], niterations=5, inference=True)