def load_river_network(nnet_param = 'neural_network/river_network_params', nnet_cfg = 'neural_network/river_network_cfg'): cfg = cPickle.load(smart_open(nnet_cfg,'r')) cfg.init_activation() model = DNN(numpy_rng=numpy_rng, cfg = cfg) _file2nnet(model.layers, filename = nnet_param) get_river_probs = model.build_extract_feat_function(-1) return get_river_probs
log('> ... setting up the model and loading parameters') numpy_rng = np.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2**30)) cfg_dnn = cPickle.load(open(filename, 'r')) cfg_dnn.init_activation() model = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg_dnn) # load model parameters _file2nnet(model.layers, filename=wdir + '/rbm.param') # initialize data reading cfg_dnn.init_data_reading_test(train_data_spec) # get the function for feature extraction log('> ... getting the feat-extraction function') extract_func = model.build_extract_feat_function(-1) output_mat = None # store the features for all the data in memory log('> ... generating features from the specified layer') while (not cfg_dnn.test_sets.is_finish()): # loop over the data cfg_dnn.test_sets.load_next_partition(cfg_dnn.test_xy) batch_num = int(math.ceil(cfg_dnn.test_sets.cur_frame_num / batch_size)) for batch_index in xrange(batch_num): # loop over mini-batches start_index = batch_index * batch_size end_index = min((batch_index + 1) * batch_size, cfg_dnn.test_sets.cur_frame_num ) # the residue may be smaller than a mini-batch output = extract_func(cfg_dnn.test_x.get_value()[start_index:end_index]) if output_mat is None: output_mat = output
log('> ... model type: %s' % cfg.model_type) if cfg.model_type == 'DNN': model = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg) elif cfg.model_type == 'CNN': model = CNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg) elif cfg.model_type == 'DNNV': model = DNNV(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg) elif cfg.model_type == 'CNNV': model = CNNV(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg) # load model parameters _file2nnet(model.layers, filename=nnet_param) # get the function for feature extraction log('> ... getting the feat-extraction function') extract_func = model.build_extract_feat_function(layer_index) kaldiread = KaldiReadIn(in_scp_file) kaldiwrite = KaldiWriteOut(out_ark_file) log('> ... processing the data') utt_number = 0 while True: uttid, in_matrix = kaldiread.read_next_utt() if uttid == '': break # in_matrix = numpy.reshape(in_matrix, (in_matrix.shape[0],) + input_shape_1) out_matrix = extract_func(in_matrix) kaldiwrite.write_kaldi_mat(uttid, out_matrix) utt_number += 1 if utt_number % 100 == 0: log('> ... processed %d utterances' % (utt_number))
cfg.init_activation() model = None if cfg.model_type == 'DNN': model = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg) elif cfg.model_type == 'CNN': model = CNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg, testing = True) # load model parameters _file2nnet(model.layers, filename = nnet_param) # initialize data reading cfg.init_data_reading_test(data_spec) # get the function for feature extraction log('> ... getting the feat-extraction function') extract_func = model.build_extract_feat_function(layer_index) output_mats = [] # store the features for all the data in memory. TODO: output the features in a streaming mode log('> ... generating features from the specified layer') while (not cfg.test_sets.is_finish()): # loop over the data cfg.test_sets.load_next_partition(cfg.test_xy) batch_num = int(math.ceil(1.0 * cfg.test_sets.cur_frame_num / batch_size)) for batch_index in xrange(batch_num): # loop over mini-batches start_index = batch_index * batch_size end_index = min((batch_index+1) * batch_size, cfg.test_sets.cur_frame_num) # the residue may be smaller than a mini-batch output = extract_func(cfg.test_x.get_value()[start_index:end_index]) output_mats.append(output) output_mat = numpy.concatenate(output_mats)
log('> ... setting up the model and loading parameters') numpy_rng = np.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) cfg_dnn = cPickle.load(open(filename,'r')) cfg_dnn.init_activation() model = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg_dnn) # load model parameters _file2nnet(model.layers, filename = wdir + '/rbm.param') # initialize data reading cfg_dnn.init_data_reading_test(train_data_spec) # get the function for feature extraction log('> ... getting the feat-extraction function') extract_func = model.build_extract_feat_function(-1) output_mat = None # store the features for all the data in memory log('> ... generating features from the specified layer') while (not cfg_dnn.test_sets.is_finish()): # loop over the data cfg_dnn.test_sets.load_next_partition(cfg_dnn.test_xy) batch_num = int(math.ceil(cfg_dnn.test_sets.cur_frame_num / batch_size)) for batch_index in xrange(batch_num): # loop over mini-batches start_index = batch_index * batch_size end_index = min((batch_index+1) * batch_size, cfg_dnn.test_sets.cur_frame_num) # the residue may be smaller than a mini-batch output = extract_func(cfg_dnn.test_x.get_value()[start_index:end_index]) if output_mat is None: output_mat = output else: output_mat = np.concatenate((output_mat, output)) # this is not efficient
def main(arg_elements): # check the arguments arguments = parse_arguments(arg_elements) required_arguments = [ 'data', 'nnet_param', 'nnet_cfg', 'output_file', 'layer_index', 'batch_size' ] for arg in required_arguments: if arguments.has_key(arg) == False: print "Error: the argument %s has to be specified" % (arg) exit(1) # mandatory arguments data_spec = arguments['data'] nnet_param = arguments['nnet_param'] nnet_cfg = arguments['nnet_cfg'] output_file = arguments['output_file'] layer_index = int(arguments['layer_index']) batch_size = int(arguments['batch_size']) argmax = arguments.has_key('argmax') and string2bool(arguments['argmax']) # load network configuration and set up the model log('> ... setting up the model and loading parameters') numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2**30)) cfg = cPickle.load(smart_open(nnet_cfg, 'r')) cfg.init_activation() model = None if cfg.model_type == 'DNN': model = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg) elif cfg.model_type == 'CNN': model = CNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=cfg, testing=True) # load model parameters _file2nnet(model.layers, filename=nnet_param) # initialize data reading cfg.init_data_reading_test(data_spec) # get the function for feature extraction log('> ... getting the feat-extraction function') extract_func = model.build_extract_feat_function(layer_index) output_mats = [ ] # store the features for all the data in memory. TODO: output the features in a streaming mode log('> ... generating features from the specified layer') while (not cfg.test_sets.is_finish()): # loop over the data cfg.test_sets.load_next_partition(cfg.test_xy) batch_num = int( math.ceil(1.0 * cfg.test_sets.cur_frame_num / batch_size)) for batch_index in xrange(batch_num): # loop over mini-batches start_index = batch_index * batch_size end_index = min((batch_index + 1) * batch_size, cfg.test_sets.cur_frame_num ) # the residue may be smaller than a mini-batch output = extract_func( cfg.test_x.get_value()[start_index:end_index]) output_mats.append(output) output_mat = numpy.concatenate(output_mats) if argmax: output_mat = output_mat.argmax(axis=1) # output the feature representations using pickle f = smart_open(output_file, 'wb') cPickle.dump(output_mat, f, cPickle.HIGHEST_PROTOCOL) log('> ... the features are stored in ' + output_file)