def get_dnn_cfg(dnn_fname): """ Construct a minimum required NetworkConfig given a model file """ model_data = io.json_load(dnn_fname) cfg = NetworkConfig() cfg.hidden_layers_sizes = [] i = 0 while 'W{}'.format(i) in model_data: W_shape = string_2_array(model_data['W{}'.format(i)]).shape # Currently factored layer can only be the first hidden layer # TODO: change this! if i == 0: if 'side_W{}_0'.format(i) in model_data: factored_cfg = FactoredConfig() j = 0 while 'side_b{}_{}'.format(i, j) in model_data: assert 'side_W{}_{}'.format(i, j) in model_data side_W_shape = string_2_array(model_data['side_W{}_{}'.format(i, j)]).shape if j == 0: factored_cfg.n_in_main = W_shape[0] factored_cfg.n_in_side = side_W_shape[0] # NOTE: this assumes that main and secondary features # are disjoint, but this is not required by the model. # TODO: find a way to relax this assumption. cfg.n_ins = W_shape[0] + side_W_shape[0] factored_cfg.side_layers.append(side_W_shape[1]) j += 1 cfg.factored_cfg = factored_cfg else: cfg.n_ins = W_shape[0] if 'W{}'.format(i + 1) in model_data: cfg.hidden_layers_sizes.append(W_shape[1]) else: cfg.n_outs = W_shape[1] i += 1 return cfg
def init_srbm(rbm_cfg, numpy_rng=None): """ Initialize a SRBM given a RBM config """ if numpy_rng is None: numpy_rng = numpy.random.RandomState() theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # Following pdnn, initialize a parallel DNN and use it to initialize SRBM dnn_cfg = NetworkConfig() dnn_cfg.n_ins = rbm_cfg.n_ins dnn_cfg.hidden_layers_sizes = rbm_cfg.hidden_layers_sizes dnn_cfg.n_outs = rbm_cfg.n_outs dnn = DNN(numpy_rng, theano_rng=theano_rng, cfg=dnn_cfg) return SRBM(numpy_rng, theano_rng=theano_rng, cfg=rbm_cfg, dnn=dnn)
# numpy random generator numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) log('> ... initializing the model') # parse network configuration from arguments, and initialize data reading cfg = RBMConfig() cfg.parse_config_common(arguments) cfg.init_data_reading(train_data_spec) # we also need to set up a DNN model, whose parameters are shared with RBM, for 2 reasons: # first, we can use DNN's model reading and writing functions, instead of designing these functions for RBM specifically # second, DNN generates cfg_dnn = NetworkConfig() cfg_dnn.n_ins = cfg.n_ins; cfg_dnn.hidden_layers_sizes = cfg.hidden_layers_sizes; cfg_dnn.n_outs = cfg.n_outs dnn = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg_dnn) # now set up the RBM model with dnn as an argument srbm = SRBM(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg, dnn = dnn) # get the pre-training function log('> ... getting the pre-training functions') pretraining_fns = srbm.pretraining_functions(train_set_x=cfg.train_x, batch_size=cfg.batch_size, k = 1, weight_cost = 0.0002) start_layer_index = 0 start_epoch_index = 0 if os.path.exists(wdir + '/nnet.tmp') and os.path.exists(wdir + '/training_state.tmp'): start_layer_index, start_epoch_index = read_two_integers(wdir + '/training_state.tmp') log('> ... found nnet.tmp and training_state.tmp, now resume training from layer #' + str(start_layer_index) + ' epoch #' + str(start_epoch_index)) _file2nnet(dnn.layers, filename = wdir + '/nnet.tmp')
# numpy random generator numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2**30)) log('> ... initializing the model') # parse network configuration from arguments, and initialize data reading cfg = RBMConfig() cfg.parse_config_common(arguments) cfg.init_data_reading(train_data_spec) # we also need to set up a DNN model, whose parameters are shared with RBM, for 2 reasons: # first, we can use DNN's model reading and writing functions, instead of designing these functions for RBM specifically # second, DNN generates cfg_dnn = NetworkConfig() cfg_dnn.n_ins = cfg.n_ins cfg_dnn.hidden_layers_sizes = cfg.hidden_layers_sizes cfg_dnn.n_outs = cfg.n_outs dnn = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg_dnn) # now set up the RBM model with dnn as an argument srbm = SRBM(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg, dnn=dnn) # get the pre-training function log('> ... getting the pre-training functions') pretraining_fns = srbm.pretraining_functions(train_set_x=cfg.train_x, batch_size=cfg.batch_size, k=1, weight_cost=0.0002) start_layer_index = 0 start_epoch_index = 0