Пример #1
0
def test_selector():
    b1 = MockBrickBottom(name="b1")
    b2 = MockBrickBottom(name="b2")
    b3 = MockBrickBottom(name="b3")
    t1 = MockBrickTop([b1, b2], name="t1")
    t2 = MockBrickTop([b2, b3], name="t2")

    s1 = Selector([t1])
    s11 = s1.select("/t1/b1")
    assert s11.bricks[0] == b1
    assert len(s11.bricks) == 1
    s12 = s1.select("/t1")
    assert s12.bricks[0] == t1
    assert len(s12.bricks) == 1

    s2 = Selector([t1, t2])
    s21 = s2.select("/t2/b2")
    assert s21.bricks[0] == b2
    assert len(s21.bricks) == 1

    assert s2.select("/t2/b2.V")[0] == b2.parameters[0]

    parameters = list(s1.get_parameters().items())
    assert parameters[0][0] == "/t1/b1.V"
    assert parameters[0][1] == b1.parameters[0]
    assert parameters[1][0] == "/t1/b1.W"
    assert parameters[1][1] == b1.parameters[1]
    assert parameters[2][0] == "/t1/b2.V"
    assert parameters[2][1] == b2.parameters[0]
    assert parameters[3][0] == "/t1/b2.W"
    assert parameters[3][1] == b2.parameters[1]
Пример #2
0
def test_selector():
    b1 = MockBrickBottom(name="b1")
    b2 = MockBrickBottom(name="b2")
    b3 = MockBrickBottom(name="b3")
    t1 = MockBrickTop([b1, b2], name="t1")
    t2 = MockBrickTop([b2, b3], name="t2")

    s1 = Selector([t1])
    s11 = s1.select("/t1/b1")
    assert s11.bricks[0] == b1
    assert len(s11.bricks) == 1
    s12 = s1.select("/t1")
    assert s12.bricks[0] == t1
    assert len(s12.bricks) == 1

    s2 = Selector([t1, t2])
    s21 = s2.select("/t2/b2")
    assert s21.bricks[0] == b2
    assert len(s21.bricks) == 1

    assert s2.select("/t2/b2.V")[0] == b2.parameters[0]

    parameters = list(s1.get_parameters().items())
    assert parameters[0][0] == "/t1/b1.V"
    assert parameters[0][1] == b1.parameters[0]
    assert parameters[1][0] == "/t1/b1.W"
    assert parameters[1][1] == b1.parameters[1]
    assert parameters[2][0] == "/t1/b2.V"
    assert parameters[2][1] == b2.parameters[0]
    assert parameters[3][0] == "/t1/b2.W"
    assert parameters[3][1] == b2.parameters[1]
Пример #3
0
def make_sampling_computation_graph(model_path, num_samples):
    f = file(model_path, 'rb')
    model = cPickle.load(f)#main_loop = load(model_path)#
    f.close()
    #model = main_loop.model
    selector = Selector(model.top_bricks)
    decoder_mlp1, = selector.select('/decoder_network1').bricks
    decoder_mlp2, = selector.select('/decoder_network2').bricks
    decoder_mlp3, = selector.select('/decoder_network3').bricks
    theano_rng = Random().theano_rng

    z1 = theano_rng.normal(size=(num_samples, decoder_mlp1.input_dim),
                           dtype=theano.config.floatX)

    z2 = decoder_mlp1.apply(z1)
    z2 = z2[:, :40]# + theano.tensor.exp(0.5 * z2[:, 40:]) * theano_rng.normal(size=(num_samples, 40),
                    #                                                          dtype=theano.config.floatX)

    z3 = decoder_mlp2.apply(z2)
    z3 = z3[:, :100] + theano.tensor.exp(0.5 * z3[:, 100:]) * theano_rng.normal(size=(num_samples, 100),
                                                                                dtype=theano.config.floatX)

    p = decoder_mlp3.apply(z3).reshape((num_samples, 28, 28))

    return ComputationGraph([p])
Пример #4
0
def make_sampling_computation_graph(model_path, num_samples):
    f = file(model_path, 'rb')
    model = cPickle.load(f)#main_loop = load(model_path)#
    f.close()
    #model = main_loop.model
    selector = Selector(model.top_bricks)
    decoder_mlp1, = selector.select('/decoder_network1').bricks
    decoder_mlp2, = selector.select('/decoder_network2').bricks
    decoder_mlp3, = selector.select('/decoder_network3').bricks
    theano_rng = Random().theano_rng

    z2 = theano_rng.normal(size=(num_samples, decoder_mlp1.input_dim),
                           dtype=theano.config.floatX)

    h2 = decoder_mlp1.apply(z2) 
    h2 = h2[:, :50] + theano.tensor.exp(0.5 * h2[:, 50:]) * theano_rng.normal(size=(num_samples, 50),
                                                                              dtype=theano.config.floatX)


    z1 = theano_rng.normal(size=(num_samples, 10),
                           dtype=theano.config.floatX)

    h1 = decoder_mlp2.apply(theano.tensor.concatenate([h2, z1], axis=1)) 
    h1 = h1[:, :50] + theano.tensor.exp(0.5 * h1[:, 50:]) * theano_rng.normal(size=(num_samples, 50),
                                                                              dtype=theano.config.floatX)

    p = decoder_mlp3.apply(theano.tensor.concatenate([h1, h2], axis=1)).reshape((num_samples, 28, 28))

    return ComputationGraph([p])
Пример #5
0
def get_decoder_function(model):
    selector = Selector(model.top_bricks)
    decoder_mlp, = selector.select("/decoder_mlp").bricks
    decoder_convnet, = selector.select("/decoder_convnet").bricks

    print("Building computation graph...")
    z = tensor.matrix()
    mu_theta = decoder_convnet.apply(decoder_mlp.apply(z).reshape((-1,) + decoder_convnet.get_dim("input_")))
    computation_graph = ComputationGraph([z, mu_theta])

    print("Compiling sampling function...")
    decoder_function = theano.function(computation_graph.inputs, computation_graph.outputs)

    return decoder_function
Пример #6
0
def inject_parameter_values(bricks, param_values):
    """Inject parameter values into a bricks hierarchy.

    Parameters
    ----------
    bricks : :class:`.Brick` or :class:`.Selector or list of :class:`Brick`
        The top bricks.
    param_values : dict of (parameter name, :class:`~numpy.ndarray`) pairs
        The parameter values.

    """
    if isinstance(bricks, Brick):
        bricks = Selector([bricks])
    if not isinstance(bricks, Selector):
        bricks = Selector(bricks)

    for name, value in param_values.items():
        selected = bricks.select(name)
        if len(selected) == 0:
            logger.error("Unknown parameter {}".format(name))
        if not len(selected) == 1:
            raise ValueError
        selected = selected[0]

        assert selected.get_value(
            borrow=True, return_internal_type=True).shape == value.shape
        selected.set_value(value)

    params = bricks.get_params()
    for name in params.keys():
        if name not in param_values:
            logger.error(
                "No value is provided for the parameter {}".format(name))
Пример #7
0
def create_running_graphs(classifier):
    try:
        classifier_model = Model(load(classifier).algorithm.cost)
    except AttributeError:
        # newer version of blocks
        with open(classifier, 'rb') as src:
            classifier_model = Model(load(src).algorithm.cost)

    selector = Selector(classifier_model.top_bricks)
    convnet, = selector.select('/convnet').bricks
    mlp, = selector.select('/mlp').bricks

    x = tensor.tensor4('features')
    y_hat = mlp.apply(convnet.apply(x).flatten(ndim=2))
    cg = ComputationGraph([y_hat])
    return cg
Пример #8
0
    def sample_at(self, z):
        selector = Selector(self.model.top_bricks)
        decoder_mlp, = selector.select("/decoder_mlp").bricks
        decoder_convnet, = selector.select("/decoder_convnet").bricks

        print("Building computation graph...")
        sz = shared_floatx(z)
        mu_theta = decoder_convnet.apply(decoder_mlp.apply(sz).reshape((-1,) + decoder_convnet.get_dim("input_")))
        computation_graph = ComputationGraph([mu_theta])

        print("Compiling sampling function...")
        sampling_function = theano.function(computation_graph.inputs, computation_graph.outputs[0])

        print("Sampling...")
        samples = sampling_function()
        return samples
Пример #9
0
def create_running_graphs(classifier):
    try:
        classifier_model = Model(load(classifier).algorithm.cost)
    except AttributeError:
        # newer version of blocks
        with open(classifier, 'rb') as src:
            classifier_model = Model(load(src).algorithm.cost)

    selector = Selector(classifier_model.top_bricks)
    convnet, = selector.select('/convnet').bricks
    mlp, = selector.select('/mlp').bricks

    x = tensor.tensor4('features')
    y_hat = mlp.apply(convnet.apply(x).flatten(ndim=2))
    cg = ComputationGraph([y_hat])
    return cg
Пример #10
0
def get_decoder_function(model):
    selector = Selector(model.top_bricks)
    decoder_mlp, = selector.select('/decoder_mlp').bricks
    decoder_convnet, = selector.select('/decoder_convnet').bricks

    print('Building computation graph...')
    z = tensor.matrix()
    mu_theta = decoder_convnet.apply(
        decoder_mlp.apply(z).reshape((-1, ) +
                                     decoder_convnet.get_dim('input_')))
    computation_graph = ComputationGraph([z, mu_theta])

    print('Compiling sampling function...')
    decoder_function = theano.function(computation_graph.inputs,
                                       computation_graph.outputs)

    return decoder_function
Пример #11
0
def get_image_encoder_function(model):
    selector = Selector(model.top_bricks)
    encoder_convnet, = selector.select("/encoder_convnet").bricks
    encoder_mlp, = selector.select("/encoder_mlp").bricks

    print("Building computation graph...")
    x = tensor.tensor4("features")
    phi = encoder_mlp.apply(encoder_convnet.apply(x).flatten(ndim=2))
    nlat = encoder_mlp.output_dim // 2
    mu_phi = phi[:, :nlat]
    log_sigma_phi = phi[:, nlat:]
    epsilon = Random().theano_rng.normal(size=mu_phi.shape, dtype=mu_phi.dtype)
    z = mu_phi + epsilon * tensor.exp(log_sigma_phi)
    computation_graph = ComputationGraph([x, z])

    print("Compiling reconstruction function...")
    encoder_function = theano.function(computation_graph.inputs, computation_graph.outputs)
    return encoder_function
Пример #12
0
def test_selector():
    class MockBrickTop(Brick):

        def __init__(self, children, **kwargs):
            super(MockBrickTop, self).__init__(**kwargs)
            self.children = children
            self.params = []

    class MockBrickBottom(Brick):

        def __init__(self, **kwargs):
            super(MockBrickBottom, self).__init__(**kwargs)
            self.params = [theano.shared(0, "V"), theano.shared(0, "W")]

    b1 = MockBrickBottom(name="b1")
    b2 = MockBrickBottom(name="b2")
    b3 = MockBrickBottom(name="b3")
    t1 = MockBrickTop([b1, b2], name="t1")
    t2 = MockBrickTop([b2, b3], name="t2")

    s1 = Selector([t1])
    s11 = s1.select("/t1/b1")
    assert s11.bricks[0] == b1
    assert len(s11.bricks) == 1
    s12 = s1.select("/t1")
    assert s12.bricks[0] == t1
    assert len(s12.bricks) == 1

    s2 = Selector([t1, t2])
    s21 = s2.select("/t2/b2")
    assert s21.bricks[0] == b2
    assert len(s21.bricks) == 1

    assert s2.select("/t2/b2.V")[0] == b2.params[0]

    params = list(s1.get_params().items())
    assert params[0][0] == "/t1/b1.V"
    assert params[0][1] == b1.params[0]
    assert params[1][0] == "/t1/b1.W"
    assert params[1][1] == b1.params[1]
    assert params[2][0] == "/t1/b2.V"
    assert params[2][1] == b2.params[0]
    assert params[3][0] == "/t1/b2.W"
    assert params[3][1] == b2.params[1]
Пример #13
0
    def sample_at(self, z):
        selector = Selector(self.model.top_bricks)
        decoder_mlp, = selector.select('/decoder_mlp').bricks
        decoder_convnet, = selector.select('/decoder_convnet').bricks

        print('Building computation graph...')
        sz = shared_floatx(z)
        mu_theta = decoder_convnet.apply(
            decoder_mlp.apply(sz).reshape(
                (-1,) + decoder_convnet.get_dim('input_')))
        computation_graph = ComputationGraph([mu_theta])

        print('Compiling sampling function...')
        sampling_function = theano.function(
            computation_graph.inputs, computation_graph.outputs[0])

        print('Sampling...')
        samples = sampling_function()
        return samples
Пример #14
0
def get_image_encoder_function(model):
    selector = Selector(model.top_bricks)
    encoder_convnet, = selector.select('/encoder_convnet').bricks
    encoder_mlp, = selector.select('/encoder_mlp').bricks

    print('Building computation graph...')
    x = tensor.tensor4('features')
    phi = encoder_mlp.apply(encoder_convnet.apply(x).flatten(ndim=2))
    nlat = encoder_mlp.output_dim // 2
    mu_phi = phi[:, :nlat]
    log_sigma_phi = phi[:, nlat:]
    epsilon = Random().theano_rng.normal(size=mu_phi.shape, dtype=mu_phi.dtype)
    z = mu_phi + epsilon * tensor.exp(log_sigma_phi)
    computation_graph = ComputationGraph([x, z])

    print('Compiling reconstruction function...')
    encoder_function = theano.function(
        computation_graph.inputs, computation_graph.outputs)
    return encoder_function
Пример #15
0
def test_selector():
    class MockBrickTop(Brick):
        def __init__(self, children, **kwargs):
            super(MockBrickTop, self).__init__(**kwargs)
            self.children = children
            self.params = []

    class MockBrickBottom(Brick):
        def __init__(self, **kwargs):
            super(MockBrickBottom, self).__init__(**kwargs)
            self.params = [theano.shared(0, "V"), theano.shared(0, "W")]

    b1 = MockBrickBottom(name="b1")
    b2 = MockBrickBottom(name="b2")
    b3 = MockBrickBottom(name="b3")
    t1 = MockBrickTop([b1, b2], name="t1")
    t2 = MockBrickTop([b2, b3], name="t2")

    s1 = Selector([t1])
    s11 = s1.select("/t1/b1")
    assert s11.bricks[0] == b1
    assert len(s11.bricks) == 1
    s12 = s1.select("/t1")
    assert s12.bricks[0] == t1
    assert len(s12.bricks) == 1

    s2 = Selector([t1, t2])
    s21 = s2.select("/t2/b2")
    assert s21.bricks[0] == b2
    assert len(s21.bricks) == 1

    assert s2.select("/t2/b2.V")[0] == b2.params[0]

    params = list(s1.get_params().items())
    assert params[0][0] == "/t1/b1.V"
    assert params[0][1] == b1.params[0]
    assert params[1][0] == "/t1/b1.W"
    assert params[1][1] == b1.params[1]
    assert params[2][0] == "/t1/b2.V"
    assert params[2][1] == b2.params[0]
    assert params[3][0] == "/t1/b2.W"
    assert params[3][1] == b2.params[1]
Пример #16
0
def make_sampling_computation_graph(model_path, num_samples):
    f = file(model_path, 'rb')
    model = cPickle.load(f)#main_loop = load(model_path)#
    f.close()
    #model = main_loop.model
    selector = Selector(model.top_bricks)
    decoder_mlp2, = selector.select('/decoder_network2').bricks
    decoder_mlp1, = selector.select('/decoder_network1').bricks
    upsample_mlp2, = selector.select('/upsample_network2').bricks
    upsample_mlp1, = selector.select('/upsample_network1').bricks
    theano_rng = Random().theano_rng

    z2 = theano_rng.normal(size=(num_samples, decoder_mlp2.input_dim),
                           dtype=theano.config.floatX)

    h2_params = decoder_mlp2.apply(z2)
    length = int(h2_params.eval().shape[1]/2)
    h2_mu = h2_params[:, :length]
    h2_lognu = h2_params[:, length:]
    h2 = h2_mu + theano.tensor.exp(0.5 * h2_lognu) * theano_rng.normal(size=h2_mu.shape,
                                                                       dtype=h2_mu.dtype)
    
    z1 = theano_rng.normal(size=(num_samples, decoder_mlp1.input_dim),
                           dtype=theano.config.floatX)

    h1_tilde_params = decoder_mlp1.apply(z1)
    length = int(h1_tilde_params.eval().shape[1]/2)
    h1_tilde_mu = h1_tilde_params[:, :length]
    h1_tilde_lognu = h1_tilde_params[:, length:]
    h1_tilde = h1_tilde_mu + theano.tensor.exp(0.5 * h1_tilde_lognu) * theano_rng.normal(size=h1_tilde_mu.shape,
                                                                                         dtype=h1_tilde_mu.dtype)


    import pdb; pdb.set_trace()
    h1 = upsample_mlp1.apply(h2) + h1_tilde
  
    p = upsample_mlp2.apply(h1).reshape((num_samples, 28, 28))

    return ComputationGraph([p])
Пример #17
0
def make_sampling_computation_graph(model_path, num_samples):
    f = file(model_path, 'rb')
    model = cPickle.load(f)#main_loop = load(model_path)#
    f.close()
    #model = main_loop.model
    selector = Selector(model.top_bricks)
    decoder_mlp, = selector.select('/decoder_network').bricks
    theano_rng = Random().theano_rng

    z = theano_rng.normal(size=(num_samples, decoder_mlp.input_dim),
                          dtype=theano.config.floatX)
    p = decoder_mlp.apply(z).reshape((num_samples, 28, 28))

    return ComputationGraph([p])
Пример #18
0
def load_params(bricks, path):
    """Load brick parameters.

    Loads parameters from .npz file where they are saved with their pathes.

    Parameters
    ----------
    bricks : Brick or Selector
        The bricks.
    path : str or file
        Source for loading.

    """
    if isinstance(bricks, Brick):
        bricks = Selector([bricks])
    assert isinstance(bricks, Selector)

    param_values = {
        name.replace("-", "/"): value
        for name, value in numpy.load(path).items()
    }
    for name, value in param_values.items():
        selected = bricks.select(name)
        if len(selected) == 0:
            logger.error("Unknown parameter {}".format(name))
        assert len(selected) == 1
        selected = selected[0]

        assert selected.get_value(
            borrow=True, return_internal_type=True).shape == value.shape
        selected.set_value(value)

    params = bricks.get_params()
    for name in params.keys():
        if name not in param_values:
            logger.error(
                "No value is provided for the parameter {}".format(name))
Пример #19
0
 def get_zdim(self):
     selector = Selector(self.model.top_bricks)
     decoder_mlp, = selector.select('/decoder_mlp').bricks
     return decoder_mlp.input_dim
Пример #20
0
def create_training_computation_graphs(z_dim, image_size, net_depth,
                                       discriminative_regularization,
                                       classifer, vintage,
                                       reconstruction_factor, kl_factor,
                                       discriminative_factor, disc_weights):
    x = tensor.tensor4('features')
    pi = numpy.cast[theano.config.floatX](numpy.pi)

    bricks = create_model_bricks(z_dim=z_dim,
                                 image_size=image_size,
                                 depth=net_depth)
    encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp = bricks
    if discriminative_regularization:
        if vintage:
            classifier_model = Model(load(classifer).algorithm.cost)
        else:
            with open(classifer, 'rb') as src:
                classifier_model = Model(load(src).algorithm.cost)
        selector = Selector(classifier_model.top_bricks)
        classifier_convnet, = selector.select('/convnet').bricks
        classifier_mlp, = selector.select('/mlp').bricks

    random_brick = Random()

    # Initialize conditional variances
    log_sigma_theta = shared_floatx(numpy.zeros((3, image_size, image_size)),
                                    name='log_sigma_theta')
    add_role(log_sigma_theta, PARAMETER)
    variance_parameters = [log_sigma_theta]
    num_disc_layers = 0
    if discriminative_regularization:
        # We add discriminative regularization for the batch-normalized output
        # of the strided layers of the classifier.
        for layer in classifier_convnet.layers[1::3]:
            log_sigma = shared_floatx(numpy.zeros(layer.get_dim('output')),
                                      name='{}_log_sigma'.format(layer.name))
            add_role(log_sigma, PARAMETER)
            variance_parameters.append(log_sigma)
        # include mlp
        # DISABLED
        # log_sigma = shared_floatx(
        #     numpy.zeros([classifier_mlp.output_dim]),
        #     name='{}_log_sigma'.format("MLP"))
        # add_role(log_sigma, PARAMETER)
        # variance_parameters.append(log_sigma)
        # diagnostic
        num_disc_layers = len(variance_parameters) - 1
        print("Applying discriminative regularization on {} layers".format(
            num_disc_layers))

    # Computation graph creation is encapsulated within this function in order
    # to allow selecting which parts of the graph will use batch statistics for
    # batch normalization and which parts will use population statistics.
    # Specifically, we'd like to use population statistics for the classifier
    # even in the training graph.
    def create_computation_graph():
        # Encode
        phi = encoder_mlp.apply(encoder_convnet.apply(x).flatten(ndim=2))
        nlat = encoder_mlp.output_dim // 2
        mu_phi = phi[:, :nlat]
        log_sigma_phi = phi[:, nlat:]
        # Sample from the approximate posterior
        epsilon = random_brick.theano_rng.normal(size=mu_phi.shape,
                                                 dtype=mu_phi.dtype)
        z = mu_phi + epsilon * tensor.exp(log_sigma_phi)
        # Decode
        mu_theta = decoder_convnet.apply(
            decoder_mlp.apply(z).reshape((-1, ) +
                                         decoder_convnet.get_dim('input_')))
        log_sigma = log_sigma_theta.dimshuffle('x', 0, 1, 2)

        # Compute KL and reconstruction terms
        kl_term = 0.5 * (tensor.exp(2 * log_sigma_phi) + mu_phi**2 -
                         2 * log_sigma_phi - 1).sum(axis=1)

        reconstruction_term = -0.5 * (
            tensor.log(2 * pi) + 2 * log_sigma +
            (x - mu_theta)**2 / tensor.exp(2 * log_sigma)).sum(axis=[1, 2, 3])

        discriminative_layer_terms = [None] * num_disc_layers
        for i in range(num_disc_layers):
            discriminative_layer_terms[i] = tensor.zeros_like(kl_term)
        discriminative_term = tensor.zeros_like(kl_term)
        if discriminative_regularization:
            # Propagate both the input and the reconstruction through the classifier
            acts_cg = ComputationGraph([
                classifier_mlp.apply(
                    classifier_convnet.apply(x).flatten(ndim=2))
            ])
            acts_hat_cg = ComputationGraph([
                classifier_mlp.apply(
                    classifier_convnet.apply(mu_theta).flatten(ndim=2))
            ])

            # Retrieve activations of interest and compute discriminative
            # regularization reconstruction terms
            cur_layer = 0
            # CLASSIFIER MLP DISABLED
            # for i, zip_pair in enumerate(zip(classifier_convnet.layers[1::3] + [classifier_mlp],
            for i, zip_pair in enumerate(
                    zip(classifier_convnet.layers[1::3],
                        variance_parameters[1:])):

                layer, log_sigma = zip_pair
                variable_filter = VariableFilter(roles=[OUTPUT],
                                                 bricks=[layer])

                d, = variable_filter(acts_cg)
                d_hat, = variable_filter(acts_hat_cg)

                # TODO: this conditional could be less brittle
                if "mlp" in layer.name.lower():
                    log_sigma = log_sigma.dimshuffle('x', 0)
                    sumaxis = [1]
                else:
                    log_sigma = log_sigma.dimshuffle('x', 0, 1, 2)
                    sumaxis = [1, 2, 3]

                discriminative_layer_term_unweighted = -0.5 * (
                    tensor.log(2 * pi) + 2 * log_sigma +
                    (d - d_hat)**2 / tensor.exp(2 * log_sigma)).sum(
                        axis=sumaxis)

                discriminative_layer_terms[
                    i] = discriminative_factor * disc_weights[
                        cur_layer] * discriminative_layer_term_unweighted
                discriminative_term = discriminative_term + discriminative_layer_terms[
                    i]

                cur_layer = cur_layer + 1

        # scale terms (disc is prescaled by layer)
        reconstruction_term = reconstruction_factor * reconstruction_term
        kl_term = kl_factor * kl_term

        # total_reconstruction_term is reconstruction + discriminative
        total_reconstruction_term = reconstruction_term + discriminative_term

        # cost is mean(kl - total reconstruction)
        cost = (kl_term - total_reconstruction_term).mean()

        return ComputationGraph(
            [cost, kl_term, reconstruction_term, discriminative_term] +
            discriminative_layer_terms)

    cg = create_computation_graph()
    with batch_normalization(encoder_convnet, encoder_mlp, decoder_convnet,
                             decoder_mlp):
        bn_cg = create_computation_graph()

    return cg, bn_cg, variance_parameters
Пример #21
0
class MultilangDependencyRecognizer(Initializable):
    def __init__(self, langs, info_data, postfix_manager,
                 parameter_unifications_include,
                 parameter_unifications_exclude, **net_config):
        super(MultilangDependencyRecognizer, self).__init__(name='recognizer')
        self.langs = langs
        self.info_data = info_data
        self.postfix_manager = postfix_manager
        self.parameter_unifications_include = [
            re.compile(unification)
            for unification in parameter_unifications_include
        ]
        self.parameter_unifications_exclude = [
            re.compile(unification)
            for unification in parameter_unifications_exclude
        ]
        self.init_recognizers(**net_config)
        self.selector = Selector(self)
        self.child_postfix_regexp = [
            re.compile('.*' + chld.names_postfix + '($|_.*)')
            for chld in self.children
        ]

    def init_recognizers(self, **orig_net_config):
        for lang in self.langs:
            #net_config = copy.deepcopy(orig_net_config)
            net_config = dictionaryCopy(orig_net_config)
            orig_lang = lang
            lang = self.postfix_manager.get_lang_postfix(lang)

            addidional_sources = ['labels']
            if 'additional_sources' in net_config:
                addidional_sources += net_config['additional_sources']

            net_config['bottom']['lang_postfix'] = lang
            net_config['input_sources_dims'] = {}
            for src in net_config['input_sources']:
                net_config['input_sources_dims'][
                    src + lang] = self.info_data.num_features(src)
            net_config['additional_sources_dims'] = {}
            for src in addidional_sources:
                net_config['additional_sources_dims'][
                    src + lang] = self.info_data.num_features(
                        self.info_data.sources_map[src])

            net_config['input_sources'] = [
                source + lang for source in net_config['input_sources']
            ]
            net_config['additional_sources'] = [
                source + lang for source in net_config['additional_sources']
            ]
            recognizer = DependencyRecognizer(
                eos_label=self.info_data.eos_label,
                num_phonemes=self.info_data.num_characters,
                name='recognizer_' + orig_lang,
                character_map=self.info_data.char2num,
                names_postfix=lang,
                **net_config)
            self.children += [recognizer]

    def child_id_from_postfix(self, name):
        empty_postfix = None
        found_chld = -1
        for i in xrange(len(self.children)):
            if self.children[i].names_postfix == '':
                if empty_postfix is not None:
                    raise ValueError('Only one child can have empty postfix')
                empty_postfix = i
                continue
            if self.child_postfix_regexp[i].match(name):
                if found_chld != -1:
                    raise ValueError('Ambigious postfix in ' + name)
                found_chld = i
        if found_chld == -1:
            return empty_postfix
        else:
            return found_chld

    def activate_masks(self, mask_dict):
        for child in self.children:
            child.mask_dict = mask_dict

    @application
    def cost(self, **kwargs):
        cost_matrix = 0
        split_kwargs = self.pop_dict_by_postfix(kwargs, [
            chld.names_postfix
            for chld in self.children if len(chld.names_postfix) > 0
        ])
        for chld in self.children:
            if chld.names_postfix in split_kwargs:
                chldkwargs = split_kwargs[chld.names_postfix]
            else:
                chldkwargs = kwargs
            cost_matrix += chld.cost(**chldkwargs)
        return cost_matrix

    def pop_dict_by_postfix(self, dictionary, postfixes):
        output = {}
        for postfix in postfixes:
            output[postfix] = {}
            for k in dictionary.keys():
                if k.endswith(postfix):
                    output[postfix][k] = dictionary.pop(k)
        return output

    @application
    def generate(self, application_call, **kwargs):
        main = None
        for i in xrange(len(self.langs)):
            args = dictionaryCopy(kwargs)
            if 'inputs_mask' in args:
                args['inputs_mask'] = args['inputs_mask'][i]
            bottom_input = args['bottom_inputs'][i]
            del args['bottom_inputs']
            args = dict_union(args, bottom_input)
            args['generate_pos'] = False
            gen = self.children[i].generate(**args)
            if i == 0:
                main = gen
            else:
                for k in main.keys():
                    main[k] = main[k] + gen[k]
        if i == 0:
            for k in main.keys():
                main[k] = main[k] + 0
        for k in main.keys():
            application_call.add_auxiliary_variable(main[k], name=k)
        return main

    def load_params(self, path):
        graphs = [
            self.get_cost_graph().outputs[0],
            ComputationGraph(self.get_generate_graph()['outputs'])
        ]
        param_values = load_parameter_values(path)
        for graph in graphs:
            SpeechModel(graph).set_parameter_values(param_values)

    def get_generate_graph(self, use_mask=True, n_steps=None):
        inputs_mask = None
        if use_mask:
            inputs_mask = [chld.inputs_mask for chld in self.children]
        bottom_inputs = [chld.inputs for chld in self.children]
        return self.generate(n_steps=n_steps,
                             inputs_mask=inputs_mask,
                             bottom_inputs=bottom_inputs)

    def get_cost_graph(self, batch=True):
        params_dict = {}
        for chld in self.children:
            if batch:
                inputs = chld.inputs
                inputs_mask = chld.inputs_mask
                labels = chld.labels
                labels_mask = chld.labels_mask
            else:
                inputs, inputs_mask = chld.bottom.single_to_batch_inputs(
                    chld.single_inputs)
                labels = chld.single_labels[:, None]
                labels_mask = None
            params_dict = dict_union(params_dict, inputs)
            params_dict['additional_sources' + chld.names_postfix] = dict(
                chld.additional_sources)
            params_dict['inputs_mask' + chld.names_postfix] = inputs_mask
            params_dict['labels' + chld.names_postfix] = labels
            params_dict['labels_mask' + chld.names_postfix] = labels_mask

        cost = self.cost(**params_dict)
        cost_cg = ComputationGraph(cost)

        return cost_cg

    def get_top_brick(self, param):
        brick = get_brick(param)
        while len(brick.parents) > 0 and not isinstance(
                brick, DependencyRecognizer):
            brick = brick.parents[0]
        return brick

    def replace_parameter(self, path, value):
        path = path.split('.')
        param_name = path[1]
        path = path[0]

        brick = self.selector.select(path).bricks

        if len(brick) != 1:
            raise ValueError('Cannot replace parameter from path {}. \
                              Wrong number of bricks ({})'.format(
                path, len(brick)))

        brick = brick[0]
        for i in xrange(len(brick.parameters)):
            if brick.parameters[i].name == param_name:
                orig_val = brick.parameters[i]
                brick.parameters[i] = value.copy(name=param_name)
                brick.parameters[i].tag.annotations = orig_val.tag.annotations
                brick.parameters[i].tag.roles = orig_val.tag.roles

    def unify_parameters(self, source_id, dest_id):
        source = self.children[source_id]
        source_name = self.children[source_id].name
        source_prefix = '/' + source_name + '/'
        dest_name = self.children[dest_id].name
        dest_prefix = '/' + self.name + '/' + dest_name + '/'

        source_params = Selector(source).get_parameters()

        replaced = []

        self.unified_parameters = []

        for param, var in source_params.iteritems():
            if not param.startswith(source_prefix):
                continue
            source_param = '/' + self.name + param
            param = param[len(source_prefix):]
            for unification in self.parameter_unifications_include:
                if unification.match(param):
                    exclude = False
                    for ex_unification in self.parameter_unifications_exclude:
                        if ex_unification.match(param):
                            exclude = True
                            break
                    if exclude:
                        continue
                    self.replace_parameter(dest_prefix + param, var)
                    replaced += [dest_prefix + param]
                    self.unified_parameters += [source_param]
        self.unified_parameters = self.convert_names_to_bricks(
            set(self.unified_parameters) | set(replaced))
        return replaced

    def convert_names_to_bricks(self, names):
        bricks = []
        for name in names:
            if '.' in name:
                name = name[:name.rindex('.')]
            bricks += self.selector.select(name).bricks
        return bricks

    def find_params(self, brick, path):
        path = path + '/' + brick.name
        params = ", ".join([param.__str__() for param in brick.parameters])
        print path, '->', params
        for chld in brick.children:
            self.find_params(chld, path)

    def get_bricks_children(self, cg):
        bricks = [
            get_brick(var) for var in cg.variables + cg.scan_variables
            if get_brick(var)
        ]
        children = set(chain(*(brick.children for brick in bricks)))
        return bricks, children

    def init_beam_search(self, lang_id, beam_size):
        self.children[lang_id].init_beam_search(beam_size)

    def beam_search(self, lang_id, *args, **kwargs):
        return self.children[lang_id].beam_search(*args, **kwargs)

    def all_children(self):
        return MultiGet(self.children)

    def __getstate__(self):
        state = dict(self.__dict__)
        for attr in ['_analyze', '_beam_search']:
            state.pop(attr, None)
        return state
Пример #22
0
def create_training_computation_graphs(discriminative_regularization):
    x = tensor.tensor4('features')
    pi = numpy.cast[theano.config.floatX](numpy.pi)

    bricks = create_model_bricks()
    encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp = bricks
    if discriminative_regularization:
        classifier_model = Model(load('celeba_classifier.zip').algorithm.cost)
        selector = Selector(classifier_model.top_bricks)
        classifier_convnet, = selector.select('/convnet').bricks
    random_brick = Random()

    # Initialize conditional variances
    log_sigma_theta = shared_floatx(
        numpy.zeros((3, 64, 64)), name='log_sigma_theta')
    add_role(log_sigma_theta, PARAMETER)
    variance_parameters = [log_sigma_theta]
    if discriminative_regularization:
        # We add discriminative regularization for the batch-normalized output
        # of the strided layers of the classifier.
        for layer in classifier_convnet.layers[4::6]:
            log_sigma = shared_floatx(
                numpy.zeros(layer.get_dim('output')),
                name='{}_log_sigma'.format(layer.name))
            add_role(log_sigma, PARAMETER)
            variance_parameters.append(log_sigma)

    # Computation graph creation is encapsulated within this function in order
    # to allow selecting which parts of the graph will use batch statistics for
    # batch normalization and which parts will use population statistics.
    # Specifically, we'd like to use population statistics for the classifier
    # even in the training graph.
    def create_computation_graph():
        # Encode
        phi = encoder_mlp.apply(encoder_convnet.apply(x).flatten(ndim=2))
        nlat = encoder_mlp.output_dim // 2
        mu_phi = phi[:, :nlat]
        log_sigma_phi = phi[:, nlat:]
        # Sample from the approximate posterior
        epsilon = random_brick.theano_rng.normal(
            size=mu_phi.shape, dtype=mu_phi.dtype)
        z = mu_phi + epsilon * tensor.exp(log_sigma_phi)
        # Decode
        mu_theta = decoder_convnet.apply(
            decoder_mlp.apply(z).reshape(
                (-1,) + decoder_convnet.get_dim('input_')))
        log_sigma = log_sigma_theta.dimshuffle('x', 0, 1, 2)

        # Compute KL and reconstruction terms
        kl_term = 0.5 * (
            tensor.exp(2 * log_sigma_phi) + mu_phi ** 2 - 2 * log_sigma_phi - 1
        ).sum(axis=1)
        reconstruction_term = -0.5 * (
            tensor.log(2 * pi) + 2 * log_sigma +
            (x - mu_theta) ** 2 / tensor.exp(2 * log_sigma)
        ).sum(axis=[1, 2, 3])
        total_reconstruction_term = reconstruction_term

        if discriminative_regularization:
            # Propagate both the input and the reconstruction through the
            # classifier
            acts_cg = ComputationGraph([classifier_convnet.apply(x)])
            acts_hat_cg = ComputationGraph(
                [classifier_convnet.apply(mu_theta)])

            # Retrieve activations of interest and compute discriminative
            # regularization reconstruction terms
            for layer, log_sigma in zip(classifier_convnet.layers[4::6],
                                        variance_parameters[1:]):
                variable_filter = VariableFilter(roles=[OUTPUT],
                                                 bricks=[layer])
                d, = variable_filter(acts_cg)
                d_hat, = variable_filter(acts_hat_cg)
                log_sigma = log_sigma.dimshuffle('x', 0, 1, 2)

                total_reconstruction_term += -0.5 * (
                    tensor.log(2 * pi) + 2 * log_sigma +
                    (d - d_hat) ** 2 / tensor.exp(2 * log_sigma)
                ).sum(axis=[1, 2, 3])

        cost = (kl_term - total_reconstruction_term).mean()

        return ComputationGraph([cost, kl_term, reconstruction_term])

    cg = create_computation_graph()
    with batch_normalization(encoder_convnet, encoder_mlp,
                             decoder_convnet, decoder_mlp):
        bn_cg = create_computation_graph()

    return cg, bn_cg, variance_parameters
Пример #23
0
 def get_zdim(self):
     selector = Selector(self.model.top_bricks)
     decoder_mlp, = selector.select("/decoder_mlp").bricks
     return decoder_mlp.input_dim
Пример #24
0
def create_training_computation_graphs(
    z_dim,
    image_size,
    net_depth,
    discriminative_regularization,
    classifer,
    vintage,
    reconstruction_factor,
    kl_factor,
    discriminative_factor,
    disc_weights,
):
    x = tensor.tensor4("features")
    pi = numpy.cast[theano.config.floatX](numpy.pi)

    bricks = create_model_bricks(z_dim=z_dim, image_size=image_size, depth=net_depth)
    encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp = bricks
    if discriminative_regularization:
        if vintage:
            classifier_model = Model(load(classifer).algorithm.cost)
        else:
            with open(classifer, "rb") as src:
                classifier_model = Model(load(src).algorithm.cost)
        selector = Selector(classifier_model.top_bricks)
        classifier_convnet, = selector.select("/convnet").bricks
        classifier_mlp, = selector.select("/mlp").bricks

    random_brick = Random()

    # Initialize conditional variances
    log_sigma_theta = shared_floatx(numpy.zeros((3, image_size, image_size)), name="log_sigma_theta")
    add_role(log_sigma_theta, PARAMETER)
    variance_parameters = [log_sigma_theta]
    num_disc_layers = 0
    if discriminative_regularization:
        # We add discriminative regularization for the batch-normalized output
        # of the strided layers of the classifier.
        for layer in classifier_convnet.layers[1::3]:
            log_sigma = shared_floatx(numpy.zeros(layer.get_dim("output")), name="{}_log_sigma".format(layer.name))
            add_role(log_sigma, PARAMETER)
            variance_parameters.append(log_sigma)
        # include mlp
        # DISABLED
        # log_sigma = shared_floatx(
        #     numpy.zeros([classifier_mlp.output_dim]),
        #     name='{}_log_sigma'.format("MLP"))
        # add_role(log_sigma, PARAMETER)
        # variance_parameters.append(log_sigma)
        # diagnostic
        num_disc_layers = len(variance_parameters) - 1
        print("Applying discriminative regularization on {} layers".format(num_disc_layers))

    # Computation graph creation is encapsulated within this function in order
    # to allow selecting which parts of the graph will use batch statistics for
    # batch normalization and which parts will use population statistics.
    # Specifically, we'd like to use population statistics for the classifier
    # even in the training graph.
    def create_computation_graph():
        # Encode
        phi = encoder_mlp.apply(encoder_convnet.apply(x).flatten(ndim=2))
        nlat = encoder_mlp.output_dim // 2
        mu_phi = phi[:, :nlat]
        log_sigma_phi = phi[:, nlat:]
        # Sample from the approximate posterior
        epsilon = random_brick.theano_rng.normal(size=mu_phi.shape, dtype=mu_phi.dtype)
        z = mu_phi + epsilon * tensor.exp(log_sigma_phi)
        # Decode
        mu_theta = decoder_convnet.apply(decoder_mlp.apply(z).reshape((-1,) + decoder_convnet.get_dim("input_")))
        log_sigma = log_sigma_theta.dimshuffle("x", 0, 1, 2)

        # Compute KL and reconstruction terms
        kl_term = 0.5 * (tensor.exp(2 * log_sigma_phi) + mu_phi ** 2 - 2 * log_sigma_phi - 1).sum(axis=1)

        reconstruction_term = -0.5 * (
            tensor.log(2 * pi) + 2 * log_sigma + (x - mu_theta) ** 2 / tensor.exp(2 * log_sigma)
        ).sum(axis=[1, 2, 3])

        discriminative_layer_terms = [None] * num_disc_layers
        for i in range(num_disc_layers):
            discriminative_layer_terms[i] = tensor.zeros_like(kl_term)
        discriminative_term = tensor.zeros_like(kl_term)
        if discriminative_regularization:
            # Propagate both the input and the reconstruction through the classifier
            acts_cg = ComputationGraph([classifier_mlp.apply(classifier_convnet.apply(x).flatten(ndim=2))])
            acts_hat_cg = ComputationGraph([classifier_mlp.apply(classifier_convnet.apply(mu_theta).flatten(ndim=2))])

            # Retrieve activations of interest and compute discriminative
            # regularization reconstruction terms
            cur_layer = 0
            # CLASSIFIER MLP DISABLED
            # for i, zip_pair in enumerate(zip(classifier_convnet.layers[1::3] + [classifier_mlp],
            for i, zip_pair in enumerate(zip(classifier_convnet.layers[1::3], variance_parameters[1:])):

                layer, log_sigma = zip_pair
                variable_filter = VariableFilter(roles=[OUTPUT], bricks=[layer])

                d, = variable_filter(acts_cg)
                d_hat, = variable_filter(acts_hat_cg)

                # TODO: this conditional could be less brittle
                if "mlp" in layer.name.lower():
                    log_sigma = log_sigma.dimshuffle("x", 0)
                    sumaxis = [1]
                else:
                    log_sigma = log_sigma.dimshuffle("x", 0, 1, 2)
                    sumaxis = [1, 2, 3]

                discriminative_layer_term_unweighted = -0.5 * (
                    tensor.log(2 * pi) + 2 * log_sigma + (d - d_hat) ** 2 / tensor.exp(2 * log_sigma)
                ).sum(axis=sumaxis)

                discriminative_layer_terms[i] = (
                    discriminative_factor * disc_weights[cur_layer] * discriminative_layer_term_unweighted
                )
                discriminative_term = discriminative_term + discriminative_layer_terms[i]

                cur_layer = cur_layer + 1

        # scale terms (disc is prescaled by layer)
        reconstruction_term = reconstruction_factor * reconstruction_term
        kl_term = kl_factor * kl_term

        # total_reconstruction_term is reconstruction + discriminative
        total_reconstruction_term = reconstruction_term + discriminative_term

        # cost is mean(kl - total reconstruction)
        cost = (kl_term - total_reconstruction_term).mean()

        return ComputationGraph([cost, kl_term, reconstruction_term, discriminative_term] + discriminative_layer_terms)

    cg = create_computation_graph()
    with batch_normalization(encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp):
        bn_cg = create_computation_graph()

    return cg, bn_cg, variance_parameters
Пример #25
0
def create_training_computation_graphs(discriminative_regularization):
    x = tensor.tensor4('features')
    pi = numpy.cast[theano.config.floatX](numpy.pi)

    bricks = create_model_bricks()
    encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp = bricks
    if discriminative_regularization:
        classifier_model = Model(load('celeba_classifier.zip').algorithm.cost)
        selector = Selector(classifier_model.top_bricks)
        classifier_convnet, = selector.select('/convnet').bricks
    random_brick = Random()

    # Initialize conditional variances
    log_sigma_theta = shared_floatx(numpy.zeros((3, 64, 64)),
                                    name='log_sigma_theta')
    add_role(log_sigma_theta, PARAMETER)
    variance_parameters = [log_sigma_theta]
    if discriminative_regularization:
        # We add discriminative regularization for the batch-normalized output
        # of the strided layers of the classifier.
        for layer in classifier_convnet.layers[4::6]:
            log_sigma = shared_floatx(numpy.zeros(layer.get_dim('output')),
                                      name='{}_log_sigma'.format(layer.name))
            add_role(log_sigma, PARAMETER)
            variance_parameters.append(log_sigma)

    # Computation graph creation is encapsulated within this function in order
    # to allow selecting which parts of the graph will use batch statistics for
    # batch normalization and which parts will use population statistics.
    # Specifically, we'd like to use population statistics for the classifier
    # even in the training graph.
    def create_computation_graph():
        # Encode
        phi = encoder_mlp.apply(encoder_convnet.apply(x).flatten(ndim=2))
        nlat = encoder_mlp.output_dim // 2
        mu_phi = phi[:, :nlat]
        log_sigma_phi = phi[:, nlat:]
        # Sample from the approximate posterior
        epsilon = random_brick.theano_rng.normal(size=mu_phi.shape,
                                                 dtype=mu_phi.dtype)
        z = mu_phi + epsilon * tensor.exp(log_sigma_phi)
        # Decode
        mu_theta = decoder_convnet.apply(
            decoder_mlp.apply(z).reshape((-1, ) +
                                         decoder_convnet.get_dim('input_')))
        log_sigma = log_sigma_theta.dimshuffle('x', 0, 1, 2)

        # Compute KL and reconstruction terms
        kl_term = 0.5 * (tensor.exp(2 * log_sigma_phi) + mu_phi**2 -
                         2 * log_sigma_phi - 1).sum(axis=1)
        reconstruction_term = -0.5 * (
            tensor.log(2 * pi) + 2 * log_sigma +
            (x - mu_theta)**2 / tensor.exp(2 * log_sigma)).sum(axis=[1, 2, 3])
        total_reconstruction_term = reconstruction_term

        if discriminative_regularization:
            # Propagate both the input and the reconstruction through the
            # classifier
            acts_cg = ComputationGraph([classifier_convnet.apply(x)])
            acts_hat_cg = ComputationGraph(
                [classifier_convnet.apply(mu_theta)])

            # Retrieve activations of interest and compute discriminative
            # regularization reconstruction terms
            for layer, log_sigma in zip(classifier_convnet.layers[4::6],
                                        variance_parameters[1:]):
                variable_filter = VariableFilter(roles=[OUTPUT],
                                                 bricks=[layer])
                d, = variable_filter(acts_cg)
                d_hat, = variable_filter(acts_hat_cg)
                log_sigma = log_sigma.dimshuffle('x', 0, 1, 2)

                total_reconstruction_term += -0.5 * (
                    tensor.log(2 * pi) + 2 * log_sigma +
                    (d - d_hat)**2 / tensor.exp(2 * log_sigma)).sum(
                        axis=[1, 2, 3])

        cost = (kl_term - total_reconstruction_term).mean()

        return ComputationGraph([cost, kl_term, reconstruction_term])

    cg = create_computation_graph()
    with batch_normalization(encoder_convnet, encoder_mlp, decoder_convnet,
                             decoder_mlp):
        bn_cg = create_computation_graph()

    return cg, bn_cg, variance_parameters