def test_selector(): b1 = MockBrickBottom(name="b1") b2 = MockBrickBottom(name="b2") b3 = MockBrickBottom(name="b3") t1 = MockBrickTop([b1, b2], name="t1") t2 = MockBrickTop([b2, b3], name="t2") s1 = Selector([t1]) s11 = s1.select("/t1/b1") assert s11.bricks[0] == b1 assert len(s11.bricks) == 1 s12 = s1.select("/t1") assert s12.bricks[0] == t1 assert len(s12.bricks) == 1 s2 = Selector([t1, t2]) s21 = s2.select("/t2/b2") assert s21.bricks[0] == b2 assert len(s21.bricks) == 1 assert s2.select("/t2/b2.V")[0] == b2.parameters[0] parameters = list(s1.get_parameters().items()) assert parameters[0][0] == "/t1/b1.V" assert parameters[0][1] == b1.parameters[0] assert parameters[1][0] == "/t1/b1.W" assert parameters[1][1] == b1.parameters[1] assert parameters[2][0] == "/t1/b2.V" assert parameters[2][1] == b2.parameters[0] assert parameters[3][0] == "/t1/b2.W" assert parameters[3][1] == b2.parameters[1]
def test_selector(): b1 = MockBrickBottom(name="b1") b2 = MockBrickBottom(name="b2") b3 = MockBrickBottom(name="b3") t1 = MockBrickTop([b1, b2], name="t1") t2 = MockBrickTop([b2, b3], name="t2") s1 = Selector([t1]) s11 = s1.select("/t1/b1") assert s11.bricks[0] == b1 assert len(s11.bricks) == 1 s12 = s1.select("/t1") assert s12.bricks[0] == t1 assert len(s12.bricks) == 1 s2 = Selector([t1, t2]) s21 = s2.select("/t2/b2") assert s21.bricks[0] == b2 assert len(s21.bricks) == 1 assert s2.select("/t2/b2.V")[0] == b2.parameters[0] parameters = list(s1.get_parameters().items()) assert parameters[0][0] == "/t1/b1.V" assert parameters[0][1] == b1.parameters[0] assert parameters[1][0] == "/t1/b1.W" assert parameters[1][1] == b1.parameters[1] assert parameters[2][0] == "/t1/b2.V" assert parameters[2][1] == b2.parameters[0] assert parameters[3][0] == "/t1/b2.W" assert parameters[3][1] == b2.parameters[1]
def make_sampling_computation_graph(model_path, num_samples): f = file(model_path, 'rb') model = cPickle.load(f)#main_loop = load(model_path)# f.close() #model = main_loop.model selector = Selector(model.top_bricks) decoder_mlp1, = selector.select('/decoder_network1').bricks decoder_mlp2, = selector.select('/decoder_network2').bricks decoder_mlp3, = selector.select('/decoder_network3').bricks theano_rng = Random().theano_rng z1 = theano_rng.normal(size=(num_samples, decoder_mlp1.input_dim), dtype=theano.config.floatX) z2 = decoder_mlp1.apply(z1) z2 = z2[:, :40]# + theano.tensor.exp(0.5 * z2[:, 40:]) * theano_rng.normal(size=(num_samples, 40), # dtype=theano.config.floatX) z3 = decoder_mlp2.apply(z2) z3 = z3[:, :100] + theano.tensor.exp(0.5 * z3[:, 100:]) * theano_rng.normal(size=(num_samples, 100), dtype=theano.config.floatX) p = decoder_mlp3.apply(z3).reshape((num_samples, 28, 28)) return ComputationGraph([p])
def make_sampling_computation_graph(model_path, num_samples): f = file(model_path, 'rb') model = cPickle.load(f)#main_loop = load(model_path)# f.close() #model = main_loop.model selector = Selector(model.top_bricks) decoder_mlp1, = selector.select('/decoder_network1').bricks decoder_mlp2, = selector.select('/decoder_network2').bricks decoder_mlp3, = selector.select('/decoder_network3').bricks theano_rng = Random().theano_rng z2 = theano_rng.normal(size=(num_samples, decoder_mlp1.input_dim), dtype=theano.config.floatX) h2 = decoder_mlp1.apply(z2) h2 = h2[:, :50] + theano.tensor.exp(0.5 * h2[:, 50:]) * theano_rng.normal(size=(num_samples, 50), dtype=theano.config.floatX) z1 = theano_rng.normal(size=(num_samples, 10), dtype=theano.config.floatX) h1 = decoder_mlp2.apply(theano.tensor.concatenate([h2, z1], axis=1)) h1 = h1[:, :50] + theano.tensor.exp(0.5 * h1[:, 50:]) * theano_rng.normal(size=(num_samples, 50), dtype=theano.config.floatX) p = decoder_mlp3.apply(theano.tensor.concatenate([h1, h2], axis=1)).reshape((num_samples, 28, 28)) return ComputationGraph([p])
def get_decoder_function(model): selector = Selector(model.top_bricks) decoder_mlp, = selector.select("/decoder_mlp").bricks decoder_convnet, = selector.select("/decoder_convnet").bricks print("Building computation graph...") z = tensor.matrix() mu_theta = decoder_convnet.apply(decoder_mlp.apply(z).reshape((-1,) + decoder_convnet.get_dim("input_"))) computation_graph = ComputationGraph([z, mu_theta]) print("Compiling sampling function...") decoder_function = theano.function(computation_graph.inputs, computation_graph.outputs) return decoder_function
def inject_parameter_values(bricks, param_values): """Inject parameter values into a bricks hierarchy. Parameters ---------- bricks : :class:`.Brick` or :class:`.Selector or list of :class:`Brick` The top bricks. param_values : dict of (parameter name, :class:`~numpy.ndarray`) pairs The parameter values. """ if isinstance(bricks, Brick): bricks = Selector([bricks]) if not isinstance(bricks, Selector): bricks = Selector(bricks) for name, value in param_values.items(): selected = bricks.select(name) if len(selected) == 0: logger.error("Unknown parameter {}".format(name)) if not len(selected) == 1: raise ValueError selected = selected[0] assert selected.get_value( borrow=True, return_internal_type=True).shape == value.shape selected.set_value(value) params = bricks.get_params() for name in params.keys(): if name not in param_values: logger.error( "No value is provided for the parameter {}".format(name))
def create_running_graphs(classifier): try: classifier_model = Model(load(classifier).algorithm.cost) except AttributeError: # newer version of blocks with open(classifier, 'rb') as src: classifier_model = Model(load(src).algorithm.cost) selector = Selector(classifier_model.top_bricks) convnet, = selector.select('/convnet').bricks mlp, = selector.select('/mlp').bricks x = tensor.tensor4('features') y_hat = mlp.apply(convnet.apply(x).flatten(ndim=2)) cg = ComputationGraph([y_hat]) return cg
def sample_at(self, z): selector = Selector(self.model.top_bricks) decoder_mlp, = selector.select("/decoder_mlp").bricks decoder_convnet, = selector.select("/decoder_convnet").bricks print("Building computation graph...") sz = shared_floatx(z) mu_theta = decoder_convnet.apply(decoder_mlp.apply(sz).reshape((-1,) + decoder_convnet.get_dim("input_"))) computation_graph = ComputationGraph([mu_theta]) print("Compiling sampling function...") sampling_function = theano.function(computation_graph.inputs, computation_graph.outputs[0]) print("Sampling...") samples = sampling_function() return samples
def create_running_graphs(classifier): try: classifier_model = Model(load(classifier).algorithm.cost) except AttributeError: # newer version of blocks with open(classifier, 'rb') as src: classifier_model = Model(load(src).algorithm.cost) selector = Selector(classifier_model.top_bricks) convnet, = selector.select('/convnet').bricks mlp, = selector.select('/mlp').bricks x = tensor.tensor4('features') y_hat = mlp.apply(convnet.apply(x).flatten(ndim=2)) cg = ComputationGraph([y_hat]) return cg
def get_decoder_function(model): selector = Selector(model.top_bricks) decoder_mlp, = selector.select('/decoder_mlp').bricks decoder_convnet, = selector.select('/decoder_convnet').bricks print('Building computation graph...') z = tensor.matrix() mu_theta = decoder_convnet.apply( decoder_mlp.apply(z).reshape((-1, ) + decoder_convnet.get_dim('input_'))) computation_graph = ComputationGraph([z, mu_theta]) print('Compiling sampling function...') decoder_function = theano.function(computation_graph.inputs, computation_graph.outputs) return decoder_function
def get_image_encoder_function(model): selector = Selector(model.top_bricks) encoder_convnet, = selector.select("/encoder_convnet").bricks encoder_mlp, = selector.select("/encoder_mlp").bricks print("Building computation graph...") x = tensor.tensor4("features") phi = encoder_mlp.apply(encoder_convnet.apply(x).flatten(ndim=2)) nlat = encoder_mlp.output_dim // 2 mu_phi = phi[:, :nlat] log_sigma_phi = phi[:, nlat:] epsilon = Random().theano_rng.normal(size=mu_phi.shape, dtype=mu_phi.dtype) z = mu_phi + epsilon * tensor.exp(log_sigma_phi) computation_graph = ComputationGraph([x, z]) print("Compiling reconstruction function...") encoder_function = theano.function(computation_graph.inputs, computation_graph.outputs) return encoder_function
def test_selector(): class MockBrickTop(Brick): def __init__(self, children, **kwargs): super(MockBrickTop, self).__init__(**kwargs) self.children = children self.params = [] class MockBrickBottom(Brick): def __init__(self, **kwargs): super(MockBrickBottom, self).__init__(**kwargs) self.params = [theano.shared(0, "V"), theano.shared(0, "W")] b1 = MockBrickBottom(name="b1") b2 = MockBrickBottom(name="b2") b3 = MockBrickBottom(name="b3") t1 = MockBrickTop([b1, b2], name="t1") t2 = MockBrickTop([b2, b3], name="t2") s1 = Selector([t1]) s11 = s1.select("/t1/b1") assert s11.bricks[0] == b1 assert len(s11.bricks) == 1 s12 = s1.select("/t1") assert s12.bricks[0] == t1 assert len(s12.bricks) == 1 s2 = Selector([t1, t2]) s21 = s2.select("/t2/b2") assert s21.bricks[0] == b2 assert len(s21.bricks) == 1 assert s2.select("/t2/b2.V")[0] == b2.params[0] params = list(s1.get_params().items()) assert params[0][0] == "/t1/b1.V" assert params[0][1] == b1.params[0] assert params[1][0] == "/t1/b1.W" assert params[1][1] == b1.params[1] assert params[2][0] == "/t1/b2.V" assert params[2][1] == b2.params[0] assert params[3][0] == "/t1/b2.W" assert params[3][1] == b2.params[1]
def sample_at(self, z): selector = Selector(self.model.top_bricks) decoder_mlp, = selector.select('/decoder_mlp').bricks decoder_convnet, = selector.select('/decoder_convnet').bricks print('Building computation graph...') sz = shared_floatx(z) mu_theta = decoder_convnet.apply( decoder_mlp.apply(sz).reshape( (-1,) + decoder_convnet.get_dim('input_'))) computation_graph = ComputationGraph([mu_theta]) print('Compiling sampling function...') sampling_function = theano.function( computation_graph.inputs, computation_graph.outputs[0]) print('Sampling...') samples = sampling_function() return samples
def get_image_encoder_function(model): selector = Selector(model.top_bricks) encoder_convnet, = selector.select('/encoder_convnet').bricks encoder_mlp, = selector.select('/encoder_mlp').bricks print('Building computation graph...') x = tensor.tensor4('features') phi = encoder_mlp.apply(encoder_convnet.apply(x).flatten(ndim=2)) nlat = encoder_mlp.output_dim // 2 mu_phi = phi[:, :nlat] log_sigma_phi = phi[:, nlat:] epsilon = Random().theano_rng.normal(size=mu_phi.shape, dtype=mu_phi.dtype) z = mu_phi + epsilon * tensor.exp(log_sigma_phi) computation_graph = ComputationGraph([x, z]) print('Compiling reconstruction function...') encoder_function = theano.function( computation_graph.inputs, computation_graph.outputs) return encoder_function
def test_selector(): class MockBrickTop(Brick): def __init__(self, children, **kwargs): super(MockBrickTop, self).__init__(**kwargs) self.children = children self.params = [] class MockBrickBottom(Brick): def __init__(self, **kwargs): super(MockBrickBottom, self).__init__(**kwargs) self.params = [theano.shared(0, "V"), theano.shared(0, "W")] b1 = MockBrickBottom(name="b1") b2 = MockBrickBottom(name="b2") b3 = MockBrickBottom(name="b3") t1 = MockBrickTop([b1, b2], name="t1") t2 = MockBrickTop([b2, b3], name="t2") s1 = Selector([t1]) s11 = s1.select("/t1/b1") assert s11.bricks[0] == b1 assert len(s11.bricks) == 1 s12 = s1.select("/t1") assert s12.bricks[0] == t1 assert len(s12.bricks) == 1 s2 = Selector([t1, t2]) s21 = s2.select("/t2/b2") assert s21.bricks[0] == b2 assert len(s21.bricks) == 1 assert s2.select("/t2/b2.V")[0] == b2.params[0] params = list(s1.get_params().items()) assert params[0][0] == "/t1/b1.V" assert params[0][1] == b1.params[0] assert params[1][0] == "/t1/b1.W" assert params[1][1] == b1.params[1] assert params[2][0] == "/t1/b2.V" assert params[2][1] == b2.params[0] assert params[3][0] == "/t1/b2.W" assert params[3][1] == b2.params[1]
def make_sampling_computation_graph(model_path, num_samples): f = file(model_path, 'rb') model = cPickle.load(f)#main_loop = load(model_path)# f.close() #model = main_loop.model selector = Selector(model.top_bricks) decoder_mlp2, = selector.select('/decoder_network2').bricks decoder_mlp1, = selector.select('/decoder_network1').bricks upsample_mlp2, = selector.select('/upsample_network2').bricks upsample_mlp1, = selector.select('/upsample_network1').bricks theano_rng = Random().theano_rng z2 = theano_rng.normal(size=(num_samples, decoder_mlp2.input_dim), dtype=theano.config.floatX) h2_params = decoder_mlp2.apply(z2) length = int(h2_params.eval().shape[1]/2) h2_mu = h2_params[:, :length] h2_lognu = h2_params[:, length:] h2 = h2_mu + theano.tensor.exp(0.5 * h2_lognu) * theano_rng.normal(size=h2_mu.shape, dtype=h2_mu.dtype) z1 = theano_rng.normal(size=(num_samples, decoder_mlp1.input_dim), dtype=theano.config.floatX) h1_tilde_params = decoder_mlp1.apply(z1) length = int(h1_tilde_params.eval().shape[1]/2) h1_tilde_mu = h1_tilde_params[:, :length] h1_tilde_lognu = h1_tilde_params[:, length:] h1_tilde = h1_tilde_mu + theano.tensor.exp(0.5 * h1_tilde_lognu) * theano_rng.normal(size=h1_tilde_mu.shape, dtype=h1_tilde_mu.dtype) import pdb; pdb.set_trace() h1 = upsample_mlp1.apply(h2) + h1_tilde p = upsample_mlp2.apply(h1).reshape((num_samples, 28, 28)) return ComputationGraph([p])
def make_sampling_computation_graph(model_path, num_samples): f = file(model_path, 'rb') model = cPickle.load(f)#main_loop = load(model_path)# f.close() #model = main_loop.model selector = Selector(model.top_bricks) decoder_mlp, = selector.select('/decoder_network').bricks theano_rng = Random().theano_rng z = theano_rng.normal(size=(num_samples, decoder_mlp.input_dim), dtype=theano.config.floatX) p = decoder_mlp.apply(z).reshape((num_samples, 28, 28)) return ComputationGraph([p])
def load_params(bricks, path): """Load brick parameters. Loads parameters from .npz file where they are saved with their pathes. Parameters ---------- bricks : Brick or Selector The bricks. path : str or file Source for loading. """ if isinstance(bricks, Brick): bricks = Selector([bricks]) assert isinstance(bricks, Selector) param_values = { name.replace("-", "/"): value for name, value in numpy.load(path).items() } for name, value in param_values.items(): selected = bricks.select(name) if len(selected) == 0: logger.error("Unknown parameter {}".format(name)) assert len(selected) == 1 selected = selected[0] assert selected.get_value( borrow=True, return_internal_type=True).shape == value.shape selected.set_value(value) params = bricks.get_params() for name in params.keys(): if name not in param_values: logger.error( "No value is provided for the parameter {}".format(name))
def get_zdim(self): selector = Selector(self.model.top_bricks) decoder_mlp, = selector.select('/decoder_mlp').bricks return decoder_mlp.input_dim
def create_training_computation_graphs(z_dim, image_size, net_depth, discriminative_regularization, classifer, vintage, reconstruction_factor, kl_factor, discriminative_factor, disc_weights): x = tensor.tensor4('features') pi = numpy.cast[theano.config.floatX](numpy.pi) bricks = create_model_bricks(z_dim=z_dim, image_size=image_size, depth=net_depth) encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp = bricks if discriminative_regularization: if vintage: classifier_model = Model(load(classifer).algorithm.cost) else: with open(classifer, 'rb') as src: classifier_model = Model(load(src).algorithm.cost) selector = Selector(classifier_model.top_bricks) classifier_convnet, = selector.select('/convnet').bricks classifier_mlp, = selector.select('/mlp').bricks random_brick = Random() # Initialize conditional variances log_sigma_theta = shared_floatx(numpy.zeros((3, image_size, image_size)), name='log_sigma_theta') add_role(log_sigma_theta, PARAMETER) variance_parameters = [log_sigma_theta] num_disc_layers = 0 if discriminative_regularization: # We add discriminative regularization for the batch-normalized output # of the strided layers of the classifier. for layer in classifier_convnet.layers[1::3]: log_sigma = shared_floatx(numpy.zeros(layer.get_dim('output')), name='{}_log_sigma'.format(layer.name)) add_role(log_sigma, PARAMETER) variance_parameters.append(log_sigma) # include mlp # DISABLED # log_sigma = shared_floatx( # numpy.zeros([classifier_mlp.output_dim]), # name='{}_log_sigma'.format("MLP")) # add_role(log_sigma, PARAMETER) # variance_parameters.append(log_sigma) # diagnostic num_disc_layers = len(variance_parameters) - 1 print("Applying discriminative regularization on {} layers".format( num_disc_layers)) # Computation graph creation is encapsulated within this function in order # to allow selecting which parts of the graph will use batch statistics for # batch normalization and which parts will use population statistics. # Specifically, we'd like to use population statistics for the classifier # even in the training graph. def create_computation_graph(): # Encode phi = encoder_mlp.apply(encoder_convnet.apply(x).flatten(ndim=2)) nlat = encoder_mlp.output_dim // 2 mu_phi = phi[:, :nlat] log_sigma_phi = phi[:, nlat:] # Sample from the approximate posterior epsilon = random_brick.theano_rng.normal(size=mu_phi.shape, dtype=mu_phi.dtype) z = mu_phi + epsilon * tensor.exp(log_sigma_phi) # Decode mu_theta = decoder_convnet.apply( decoder_mlp.apply(z).reshape((-1, ) + decoder_convnet.get_dim('input_'))) log_sigma = log_sigma_theta.dimshuffle('x', 0, 1, 2) # Compute KL and reconstruction terms kl_term = 0.5 * (tensor.exp(2 * log_sigma_phi) + mu_phi**2 - 2 * log_sigma_phi - 1).sum(axis=1) reconstruction_term = -0.5 * ( tensor.log(2 * pi) + 2 * log_sigma + (x - mu_theta)**2 / tensor.exp(2 * log_sigma)).sum(axis=[1, 2, 3]) discriminative_layer_terms = [None] * num_disc_layers for i in range(num_disc_layers): discriminative_layer_terms[i] = tensor.zeros_like(kl_term) discriminative_term = tensor.zeros_like(kl_term) if discriminative_regularization: # Propagate both the input and the reconstruction through the classifier acts_cg = ComputationGraph([ classifier_mlp.apply( classifier_convnet.apply(x).flatten(ndim=2)) ]) acts_hat_cg = ComputationGraph([ classifier_mlp.apply( classifier_convnet.apply(mu_theta).flatten(ndim=2)) ]) # Retrieve activations of interest and compute discriminative # regularization reconstruction terms cur_layer = 0 # CLASSIFIER MLP DISABLED # for i, zip_pair in enumerate(zip(classifier_convnet.layers[1::3] + [classifier_mlp], for i, zip_pair in enumerate( zip(classifier_convnet.layers[1::3], variance_parameters[1:])): layer, log_sigma = zip_pair variable_filter = VariableFilter(roles=[OUTPUT], bricks=[layer]) d, = variable_filter(acts_cg) d_hat, = variable_filter(acts_hat_cg) # TODO: this conditional could be less brittle if "mlp" in layer.name.lower(): log_sigma = log_sigma.dimshuffle('x', 0) sumaxis = [1] else: log_sigma = log_sigma.dimshuffle('x', 0, 1, 2) sumaxis = [1, 2, 3] discriminative_layer_term_unweighted = -0.5 * ( tensor.log(2 * pi) + 2 * log_sigma + (d - d_hat)**2 / tensor.exp(2 * log_sigma)).sum( axis=sumaxis) discriminative_layer_terms[ i] = discriminative_factor * disc_weights[ cur_layer] * discriminative_layer_term_unweighted discriminative_term = discriminative_term + discriminative_layer_terms[ i] cur_layer = cur_layer + 1 # scale terms (disc is prescaled by layer) reconstruction_term = reconstruction_factor * reconstruction_term kl_term = kl_factor * kl_term # total_reconstruction_term is reconstruction + discriminative total_reconstruction_term = reconstruction_term + discriminative_term # cost is mean(kl - total reconstruction) cost = (kl_term - total_reconstruction_term).mean() return ComputationGraph( [cost, kl_term, reconstruction_term, discriminative_term] + discriminative_layer_terms) cg = create_computation_graph() with batch_normalization(encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp): bn_cg = create_computation_graph() return cg, bn_cg, variance_parameters
class MultilangDependencyRecognizer(Initializable): def __init__(self, langs, info_data, postfix_manager, parameter_unifications_include, parameter_unifications_exclude, **net_config): super(MultilangDependencyRecognizer, self).__init__(name='recognizer') self.langs = langs self.info_data = info_data self.postfix_manager = postfix_manager self.parameter_unifications_include = [ re.compile(unification) for unification in parameter_unifications_include ] self.parameter_unifications_exclude = [ re.compile(unification) for unification in parameter_unifications_exclude ] self.init_recognizers(**net_config) self.selector = Selector(self) self.child_postfix_regexp = [ re.compile('.*' + chld.names_postfix + '($|_.*)') for chld in self.children ] def init_recognizers(self, **orig_net_config): for lang in self.langs: #net_config = copy.deepcopy(orig_net_config) net_config = dictionaryCopy(orig_net_config) orig_lang = lang lang = self.postfix_manager.get_lang_postfix(lang) addidional_sources = ['labels'] if 'additional_sources' in net_config: addidional_sources += net_config['additional_sources'] net_config['bottom']['lang_postfix'] = lang net_config['input_sources_dims'] = {} for src in net_config['input_sources']: net_config['input_sources_dims'][ src + lang] = self.info_data.num_features(src) net_config['additional_sources_dims'] = {} for src in addidional_sources: net_config['additional_sources_dims'][ src + lang] = self.info_data.num_features( self.info_data.sources_map[src]) net_config['input_sources'] = [ source + lang for source in net_config['input_sources'] ] net_config['additional_sources'] = [ source + lang for source in net_config['additional_sources'] ] recognizer = DependencyRecognizer( eos_label=self.info_data.eos_label, num_phonemes=self.info_data.num_characters, name='recognizer_' + orig_lang, character_map=self.info_data.char2num, names_postfix=lang, **net_config) self.children += [recognizer] def child_id_from_postfix(self, name): empty_postfix = None found_chld = -1 for i in xrange(len(self.children)): if self.children[i].names_postfix == '': if empty_postfix is not None: raise ValueError('Only one child can have empty postfix') empty_postfix = i continue if self.child_postfix_regexp[i].match(name): if found_chld != -1: raise ValueError('Ambigious postfix in ' + name) found_chld = i if found_chld == -1: return empty_postfix else: return found_chld def activate_masks(self, mask_dict): for child in self.children: child.mask_dict = mask_dict @application def cost(self, **kwargs): cost_matrix = 0 split_kwargs = self.pop_dict_by_postfix(kwargs, [ chld.names_postfix for chld in self.children if len(chld.names_postfix) > 0 ]) for chld in self.children: if chld.names_postfix in split_kwargs: chldkwargs = split_kwargs[chld.names_postfix] else: chldkwargs = kwargs cost_matrix += chld.cost(**chldkwargs) return cost_matrix def pop_dict_by_postfix(self, dictionary, postfixes): output = {} for postfix in postfixes: output[postfix] = {} for k in dictionary.keys(): if k.endswith(postfix): output[postfix][k] = dictionary.pop(k) return output @application def generate(self, application_call, **kwargs): main = None for i in xrange(len(self.langs)): args = dictionaryCopy(kwargs) if 'inputs_mask' in args: args['inputs_mask'] = args['inputs_mask'][i] bottom_input = args['bottom_inputs'][i] del args['bottom_inputs'] args = dict_union(args, bottom_input) args['generate_pos'] = False gen = self.children[i].generate(**args) if i == 0: main = gen else: for k in main.keys(): main[k] = main[k] + gen[k] if i == 0: for k in main.keys(): main[k] = main[k] + 0 for k in main.keys(): application_call.add_auxiliary_variable(main[k], name=k) return main def load_params(self, path): graphs = [ self.get_cost_graph().outputs[0], ComputationGraph(self.get_generate_graph()['outputs']) ] param_values = load_parameter_values(path) for graph in graphs: SpeechModel(graph).set_parameter_values(param_values) def get_generate_graph(self, use_mask=True, n_steps=None): inputs_mask = None if use_mask: inputs_mask = [chld.inputs_mask for chld in self.children] bottom_inputs = [chld.inputs for chld in self.children] return self.generate(n_steps=n_steps, inputs_mask=inputs_mask, bottom_inputs=bottom_inputs) def get_cost_graph(self, batch=True): params_dict = {} for chld in self.children: if batch: inputs = chld.inputs inputs_mask = chld.inputs_mask labels = chld.labels labels_mask = chld.labels_mask else: inputs, inputs_mask = chld.bottom.single_to_batch_inputs( chld.single_inputs) labels = chld.single_labels[:, None] labels_mask = None params_dict = dict_union(params_dict, inputs) params_dict['additional_sources' + chld.names_postfix] = dict( chld.additional_sources) params_dict['inputs_mask' + chld.names_postfix] = inputs_mask params_dict['labels' + chld.names_postfix] = labels params_dict['labels_mask' + chld.names_postfix] = labels_mask cost = self.cost(**params_dict) cost_cg = ComputationGraph(cost) return cost_cg def get_top_brick(self, param): brick = get_brick(param) while len(brick.parents) > 0 and not isinstance( brick, DependencyRecognizer): brick = brick.parents[0] return brick def replace_parameter(self, path, value): path = path.split('.') param_name = path[1] path = path[0] brick = self.selector.select(path).bricks if len(brick) != 1: raise ValueError('Cannot replace parameter from path {}. \ Wrong number of bricks ({})'.format( path, len(brick))) brick = brick[0] for i in xrange(len(brick.parameters)): if brick.parameters[i].name == param_name: orig_val = brick.parameters[i] brick.parameters[i] = value.copy(name=param_name) brick.parameters[i].tag.annotations = orig_val.tag.annotations brick.parameters[i].tag.roles = orig_val.tag.roles def unify_parameters(self, source_id, dest_id): source = self.children[source_id] source_name = self.children[source_id].name source_prefix = '/' + source_name + '/' dest_name = self.children[dest_id].name dest_prefix = '/' + self.name + '/' + dest_name + '/' source_params = Selector(source).get_parameters() replaced = [] self.unified_parameters = [] for param, var in source_params.iteritems(): if not param.startswith(source_prefix): continue source_param = '/' + self.name + param param = param[len(source_prefix):] for unification in self.parameter_unifications_include: if unification.match(param): exclude = False for ex_unification in self.parameter_unifications_exclude: if ex_unification.match(param): exclude = True break if exclude: continue self.replace_parameter(dest_prefix + param, var) replaced += [dest_prefix + param] self.unified_parameters += [source_param] self.unified_parameters = self.convert_names_to_bricks( set(self.unified_parameters) | set(replaced)) return replaced def convert_names_to_bricks(self, names): bricks = [] for name in names: if '.' in name: name = name[:name.rindex('.')] bricks += self.selector.select(name).bricks return bricks def find_params(self, brick, path): path = path + '/' + brick.name params = ", ".join([param.__str__() for param in brick.parameters]) print path, '->', params for chld in brick.children: self.find_params(chld, path) def get_bricks_children(self, cg): bricks = [ get_brick(var) for var in cg.variables + cg.scan_variables if get_brick(var) ] children = set(chain(*(brick.children for brick in bricks))) return bricks, children def init_beam_search(self, lang_id, beam_size): self.children[lang_id].init_beam_search(beam_size) def beam_search(self, lang_id, *args, **kwargs): return self.children[lang_id].beam_search(*args, **kwargs) def all_children(self): return MultiGet(self.children) def __getstate__(self): state = dict(self.__dict__) for attr in ['_analyze', '_beam_search']: state.pop(attr, None) return state
def create_training_computation_graphs(discriminative_regularization): x = tensor.tensor4('features') pi = numpy.cast[theano.config.floatX](numpy.pi) bricks = create_model_bricks() encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp = bricks if discriminative_regularization: classifier_model = Model(load('celeba_classifier.zip').algorithm.cost) selector = Selector(classifier_model.top_bricks) classifier_convnet, = selector.select('/convnet').bricks random_brick = Random() # Initialize conditional variances log_sigma_theta = shared_floatx( numpy.zeros((3, 64, 64)), name='log_sigma_theta') add_role(log_sigma_theta, PARAMETER) variance_parameters = [log_sigma_theta] if discriminative_regularization: # We add discriminative regularization for the batch-normalized output # of the strided layers of the classifier. for layer in classifier_convnet.layers[4::6]: log_sigma = shared_floatx( numpy.zeros(layer.get_dim('output')), name='{}_log_sigma'.format(layer.name)) add_role(log_sigma, PARAMETER) variance_parameters.append(log_sigma) # Computation graph creation is encapsulated within this function in order # to allow selecting which parts of the graph will use batch statistics for # batch normalization and which parts will use population statistics. # Specifically, we'd like to use population statistics for the classifier # even in the training graph. def create_computation_graph(): # Encode phi = encoder_mlp.apply(encoder_convnet.apply(x).flatten(ndim=2)) nlat = encoder_mlp.output_dim // 2 mu_phi = phi[:, :nlat] log_sigma_phi = phi[:, nlat:] # Sample from the approximate posterior epsilon = random_brick.theano_rng.normal( size=mu_phi.shape, dtype=mu_phi.dtype) z = mu_phi + epsilon * tensor.exp(log_sigma_phi) # Decode mu_theta = decoder_convnet.apply( decoder_mlp.apply(z).reshape( (-1,) + decoder_convnet.get_dim('input_'))) log_sigma = log_sigma_theta.dimshuffle('x', 0, 1, 2) # Compute KL and reconstruction terms kl_term = 0.5 * ( tensor.exp(2 * log_sigma_phi) + mu_phi ** 2 - 2 * log_sigma_phi - 1 ).sum(axis=1) reconstruction_term = -0.5 * ( tensor.log(2 * pi) + 2 * log_sigma + (x - mu_theta) ** 2 / tensor.exp(2 * log_sigma) ).sum(axis=[1, 2, 3]) total_reconstruction_term = reconstruction_term if discriminative_regularization: # Propagate both the input and the reconstruction through the # classifier acts_cg = ComputationGraph([classifier_convnet.apply(x)]) acts_hat_cg = ComputationGraph( [classifier_convnet.apply(mu_theta)]) # Retrieve activations of interest and compute discriminative # regularization reconstruction terms for layer, log_sigma in zip(classifier_convnet.layers[4::6], variance_parameters[1:]): variable_filter = VariableFilter(roles=[OUTPUT], bricks=[layer]) d, = variable_filter(acts_cg) d_hat, = variable_filter(acts_hat_cg) log_sigma = log_sigma.dimshuffle('x', 0, 1, 2) total_reconstruction_term += -0.5 * ( tensor.log(2 * pi) + 2 * log_sigma + (d - d_hat) ** 2 / tensor.exp(2 * log_sigma) ).sum(axis=[1, 2, 3]) cost = (kl_term - total_reconstruction_term).mean() return ComputationGraph([cost, kl_term, reconstruction_term]) cg = create_computation_graph() with batch_normalization(encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp): bn_cg = create_computation_graph() return cg, bn_cg, variance_parameters
def get_zdim(self): selector = Selector(self.model.top_bricks) decoder_mlp, = selector.select("/decoder_mlp").bricks return decoder_mlp.input_dim
def create_training_computation_graphs( z_dim, image_size, net_depth, discriminative_regularization, classifer, vintage, reconstruction_factor, kl_factor, discriminative_factor, disc_weights, ): x = tensor.tensor4("features") pi = numpy.cast[theano.config.floatX](numpy.pi) bricks = create_model_bricks(z_dim=z_dim, image_size=image_size, depth=net_depth) encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp = bricks if discriminative_regularization: if vintage: classifier_model = Model(load(classifer).algorithm.cost) else: with open(classifer, "rb") as src: classifier_model = Model(load(src).algorithm.cost) selector = Selector(classifier_model.top_bricks) classifier_convnet, = selector.select("/convnet").bricks classifier_mlp, = selector.select("/mlp").bricks random_brick = Random() # Initialize conditional variances log_sigma_theta = shared_floatx(numpy.zeros((3, image_size, image_size)), name="log_sigma_theta") add_role(log_sigma_theta, PARAMETER) variance_parameters = [log_sigma_theta] num_disc_layers = 0 if discriminative_regularization: # We add discriminative regularization for the batch-normalized output # of the strided layers of the classifier. for layer in classifier_convnet.layers[1::3]: log_sigma = shared_floatx(numpy.zeros(layer.get_dim("output")), name="{}_log_sigma".format(layer.name)) add_role(log_sigma, PARAMETER) variance_parameters.append(log_sigma) # include mlp # DISABLED # log_sigma = shared_floatx( # numpy.zeros([classifier_mlp.output_dim]), # name='{}_log_sigma'.format("MLP")) # add_role(log_sigma, PARAMETER) # variance_parameters.append(log_sigma) # diagnostic num_disc_layers = len(variance_parameters) - 1 print("Applying discriminative regularization on {} layers".format(num_disc_layers)) # Computation graph creation is encapsulated within this function in order # to allow selecting which parts of the graph will use batch statistics for # batch normalization and which parts will use population statistics. # Specifically, we'd like to use population statistics for the classifier # even in the training graph. def create_computation_graph(): # Encode phi = encoder_mlp.apply(encoder_convnet.apply(x).flatten(ndim=2)) nlat = encoder_mlp.output_dim // 2 mu_phi = phi[:, :nlat] log_sigma_phi = phi[:, nlat:] # Sample from the approximate posterior epsilon = random_brick.theano_rng.normal(size=mu_phi.shape, dtype=mu_phi.dtype) z = mu_phi + epsilon * tensor.exp(log_sigma_phi) # Decode mu_theta = decoder_convnet.apply(decoder_mlp.apply(z).reshape((-1,) + decoder_convnet.get_dim("input_"))) log_sigma = log_sigma_theta.dimshuffle("x", 0, 1, 2) # Compute KL and reconstruction terms kl_term = 0.5 * (tensor.exp(2 * log_sigma_phi) + mu_phi ** 2 - 2 * log_sigma_phi - 1).sum(axis=1) reconstruction_term = -0.5 * ( tensor.log(2 * pi) + 2 * log_sigma + (x - mu_theta) ** 2 / tensor.exp(2 * log_sigma) ).sum(axis=[1, 2, 3]) discriminative_layer_terms = [None] * num_disc_layers for i in range(num_disc_layers): discriminative_layer_terms[i] = tensor.zeros_like(kl_term) discriminative_term = tensor.zeros_like(kl_term) if discriminative_regularization: # Propagate both the input and the reconstruction through the classifier acts_cg = ComputationGraph([classifier_mlp.apply(classifier_convnet.apply(x).flatten(ndim=2))]) acts_hat_cg = ComputationGraph([classifier_mlp.apply(classifier_convnet.apply(mu_theta).flatten(ndim=2))]) # Retrieve activations of interest and compute discriminative # regularization reconstruction terms cur_layer = 0 # CLASSIFIER MLP DISABLED # for i, zip_pair in enumerate(zip(classifier_convnet.layers[1::3] + [classifier_mlp], for i, zip_pair in enumerate(zip(classifier_convnet.layers[1::3], variance_parameters[1:])): layer, log_sigma = zip_pair variable_filter = VariableFilter(roles=[OUTPUT], bricks=[layer]) d, = variable_filter(acts_cg) d_hat, = variable_filter(acts_hat_cg) # TODO: this conditional could be less brittle if "mlp" in layer.name.lower(): log_sigma = log_sigma.dimshuffle("x", 0) sumaxis = [1] else: log_sigma = log_sigma.dimshuffle("x", 0, 1, 2) sumaxis = [1, 2, 3] discriminative_layer_term_unweighted = -0.5 * ( tensor.log(2 * pi) + 2 * log_sigma + (d - d_hat) ** 2 / tensor.exp(2 * log_sigma) ).sum(axis=sumaxis) discriminative_layer_terms[i] = ( discriminative_factor * disc_weights[cur_layer] * discriminative_layer_term_unweighted ) discriminative_term = discriminative_term + discriminative_layer_terms[i] cur_layer = cur_layer + 1 # scale terms (disc is prescaled by layer) reconstruction_term = reconstruction_factor * reconstruction_term kl_term = kl_factor * kl_term # total_reconstruction_term is reconstruction + discriminative total_reconstruction_term = reconstruction_term + discriminative_term # cost is mean(kl - total reconstruction) cost = (kl_term - total_reconstruction_term).mean() return ComputationGraph([cost, kl_term, reconstruction_term, discriminative_term] + discriminative_layer_terms) cg = create_computation_graph() with batch_normalization(encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp): bn_cg = create_computation_graph() return cg, bn_cg, variance_parameters
def create_training_computation_graphs(discriminative_regularization): x = tensor.tensor4('features') pi = numpy.cast[theano.config.floatX](numpy.pi) bricks = create_model_bricks() encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp = bricks if discriminative_regularization: classifier_model = Model(load('celeba_classifier.zip').algorithm.cost) selector = Selector(classifier_model.top_bricks) classifier_convnet, = selector.select('/convnet').bricks random_brick = Random() # Initialize conditional variances log_sigma_theta = shared_floatx(numpy.zeros((3, 64, 64)), name='log_sigma_theta') add_role(log_sigma_theta, PARAMETER) variance_parameters = [log_sigma_theta] if discriminative_regularization: # We add discriminative regularization for the batch-normalized output # of the strided layers of the classifier. for layer in classifier_convnet.layers[4::6]: log_sigma = shared_floatx(numpy.zeros(layer.get_dim('output')), name='{}_log_sigma'.format(layer.name)) add_role(log_sigma, PARAMETER) variance_parameters.append(log_sigma) # Computation graph creation is encapsulated within this function in order # to allow selecting which parts of the graph will use batch statistics for # batch normalization and which parts will use population statistics. # Specifically, we'd like to use population statistics for the classifier # even in the training graph. def create_computation_graph(): # Encode phi = encoder_mlp.apply(encoder_convnet.apply(x).flatten(ndim=2)) nlat = encoder_mlp.output_dim // 2 mu_phi = phi[:, :nlat] log_sigma_phi = phi[:, nlat:] # Sample from the approximate posterior epsilon = random_brick.theano_rng.normal(size=mu_phi.shape, dtype=mu_phi.dtype) z = mu_phi + epsilon * tensor.exp(log_sigma_phi) # Decode mu_theta = decoder_convnet.apply( decoder_mlp.apply(z).reshape((-1, ) + decoder_convnet.get_dim('input_'))) log_sigma = log_sigma_theta.dimshuffle('x', 0, 1, 2) # Compute KL and reconstruction terms kl_term = 0.5 * (tensor.exp(2 * log_sigma_phi) + mu_phi**2 - 2 * log_sigma_phi - 1).sum(axis=1) reconstruction_term = -0.5 * ( tensor.log(2 * pi) + 2 * log_sigma + (x - mu_theta)**2 / tensor.exp(2 * log_sigma)).sum(axis=[1, 2, 3]) total_reconstruction_term = reconstruction_term if discriminative_regularization: # Propagate both the input and the reconstruction through the # classifier acts_cg = ComputationGraph([classifier_convnet.apply(x)]) acts_hat_cg = ComputationGraph( [classifier_convnet.apply(mu_theta)]) # Retrieve activations of interest and compute discriminative # regularization reconstruction terms for layer, log_sigma in zip(classifier_convnet.layers[4::6], variance_parameters[1:]): variable_filter = VariableFilter(roles=[OUTPUT], bricks=[layer]) d, = variable_filter(acts_cg) d_hat, = variable_filter(acts_hat_cg) log_sigma = log_sigma.dimshuffle('x', 0, 1, 2) total_reconstruction_term += -0.5 * ( tensor.log(2 * pi) + 2 * log_sigma + (d - d_hat)**2 / tensor.exp(2 * log_sigma)).sum( axis=[1, 2, 3]) cost = (kl_term - total_reconstruction_term).mean() return ComputationGraph([cost, kl_term, reconstruction_term]) cg = create_computation_graph() with batch_normalization(encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp): bn_cg = create_computation_graph() return cg, bn_cg, variance_parameters