Пример #1
0
    def _build_model(self):
        """
        implementation of the flow model
        """
        with tf.variable_scope(self.name):
            # adds placeholders, data normalization and data noise to graph as desired. Also sets up a placeholder
            # for dropout
            self.layer_in_x, self.layer_in_y = self._build_input_layers()
            self.y_input = L.get_output(self.layer_in_y)

            flow_classes = [FLOWS[flow_name] for flow_name in self.flows_type]
            # get the individual parameter sizes for each flow
            param_split_sizes = [flow.get_param_size(self.ndim_y) for flow in flow_classes]
            mlp_output_dim = sum(param_split_sizes)
            core_network = MLP(
                name="core_network",
                input_layer=self.layer_in_x,
                output_dim=mlp_output_dim,
                hidden_sizes=self.hidden_sizes,
                hidden_nonlinearity=self.hidden_nonlinearity,
                output_nonlinearity=None,
                weight_normalization=self.weight_normalization,
                dropout_ph=self.dropout_ph if self.dropout else None
            )
            outputs = L.get_output(core_network.output_layer)
            flow_params = tf.split(value=outputs, num_or_size_splits=param_split_sizes, axis=1)

            # instanciate the flows with their parameters
            flows = [flow(params, self.ndim_y) for flow, params in zip(flow_classes, flow_params)]

            # build up the base distribution that will be transformed by the flows
            if self.ndim_y == 1:
                # this is faster for 1-D than the multivariate version
                # it also supports a cdf, which isn't implemented for Multivariate
                base_dist = tf.distributions.Normal(loc=0., scale=1.)
            else:
                base_dist = tf.contrib.distributions.MultivariateNormalDiag(loc=[0.] * self.ndim_y,
                                                                            scale_diag=[1.] * self.ndim_y)

            # chain the flows together and build the transformed distribution using the base_dist + flows
            # Chaining applies the flows in reverse, Chain([a,b]).forward(x) being a.forward(b.forward(x))
            # We reverse them so the flows are stacked ontop of the base distribution in the original order
            flows.reverse()
            chain = tf.contrib.distributions.bijectors.Chain(flows)
            target_dist = tf.contrib.distributions.TransformedDistribution(distribution=base_dist, bijector=chain)

            # since we operate with matrices not vectors, the output would have dimension (?,1)
            # and therefor has to be reduce first to have shape (?,)
            if self.ndim_y == 1:
                # for x shape (batch_size, 1) normal_distribution.pdf(x) outputs shape (batch_size, 1) -> squeeze
                self.pdf_ = tf.squeeze(target_dist.prob(self.y_input), axis=1)
                self.log_pdf_ = tf.squeeze(target_dist.log_prob(self.y_input), axis=1)
                self.cdf_ = tf.squeeze(target_dist.cdf(self.y_input), axis=1)
            else:
                # no squeezing necessary for multivariate_normal, but we don't have a cdf
                self.pdf_ = target_dist.prob(self.y_input)
                self.log_pdf_ = target_dist.log_prob(self.y_input)


            if self.data_normalization:
                self.pdf_ = self.pdf_ / tf.reduce_prod(self.std_y_sym)
                self.log_pdf_ = self.log_pdf_ - tf.reduce_sum(tf.log(self.std_y_sym))
                # cdf is only implemented for 1-D
                if self.ndim_y == 1:
                    self.cdf_ = self.cdf_ / tf.reduce_prod(self.std_y_sym)

            # regularization
            self._add_l1_l2_regularization(core_network)

            self.loss = -tf.reduce_prod(self.pdf_)
            self.reg_loss = tf.reduce_sum(tf.losses.get_regularization_losses(scope=self.name)) #r egularization losses
            self.log_loss = -tf.reduce_sum(self.log_pdf_) + self.reg_loss

            optimizer = AdamWOptimizer(self.weight_decay) if self.weight_decay else tf.train.AdamOptimizer()

            if self.gradient_clipping:
                gradients, variables = zip(*optimizer.compute_gradients(self.log_loss))
                gradients, _ = tf.clip_by_global_norm(gradients, 3e5)
                self.train_step = optimizer.apply_gradients(zip(gradients, variables))
            else:
                self.train_step = optimizer.minimize(self.log_loss)

        # initialize LayersPowered -> provides functions for serializing tf models
        LayersPowered.__init__(self, [self.layer_in_y, core_network.output_layer])
 def __setstate__(self, state):
     LayersPowered.__setstate__(self, state)
     self.fitted = state['fitted']
     self.sess = tf.get_default_session()
 def __getstate__(self):
     state = LayersPowered.__getstate__(self)
     state['fitted'] = self.fitted
     return state
    def __init__(self,
                 name,
                 output_dim,
                 hidden_sizes,
                 hidden_nonlinearity,
                 output_nonlinearity,
                 hidden_W_init=L.XavierUniformInitializer(),
                 hidden_b_init=tf.zeros_initializer(),
                 output_W_init=L.XavierUniformInitializer(),
                 output_b_init=tf.zeros_initializer(),
                 input_var=None,
                 input_layer=None,
                 input_shape=None,
                 batch_normalization=False,
                 weight_normalization=False,
                 dropout_ph=None):
        """
        :param dropout_ph: None if no dropout should be used. Else a scalar placeholder that determines the prob of dropping a node.
        Remember to set placeholder to Zero during test / eval
        """

        Serializable.quick_init(self, locals())

        with tf.variable_scope(name):
            if input_layer is None:
                l_in = L.InputLayer(shape=(None, ) + input_shape,
                                    input_var=input_var,
                                    name="input")
            else:
                l_in = input_layer
            self._layers = [l_in]
            l_hid = l_in
            if batch_normalization:
                l_hid = L.batch_norm(l_hid)
            for idx, hidden_size in enumerate(hidden_sizes):
                l_hid = L.DenseLayer(l_hid,
                                     num_units=hidden_size,
                                     nonlinearity=hidden_nonlinearity,
                                     name="hidden_%d" % idx,
                                     W=hidden_W_init,
                                     b=hidden_b_init,
                                     weight_normalization=weight_normalization)
                if dropout_ph is not None:
                    l_hid = L.DropoutLayer(l_hid, dropout_ph, rescale=False)
                if batch_normalization:
                    l_hid = L.batch_norm(l_hid)
                self._layers.append(l_hid)
            l_out = L.DenseLayer(l_hid,
                                 num_units=output_dim,
                                 nonlinearity=output_nonlinearity,
                                 name="output",
                                 W=output_W_init,
                                 b=output_b_init,
                                 weight_normalization=weight_normalization)
            if batch_normalization:
                l_out = L.batch_norm(l_out)
            self._layers.append(l_out)
            self._l_in = l_in
            self._l_out = l_out
            # self._input_var = l_in.input_var
            self._output = L.get_output(l_out)

            LayersPowered.__init__(self, l_out)
Пример #5
0
    def _build_model(self):
        """
    implementation of the KMN
    """
        with tf.variable_scope(self.name):
            # add placeholders, data_normalization and data_noise if desired. Also sets up the placeholder for dropout prob
            self.layer_in_x, self.layer_in_y = self._build_input_layers()

            self.X_in = L.get_output(self.layer_in_x)
            self.Y_in = L.get_output(self.layer_in_y)

            # get batch size
            self.batch_size = tf.shape(self.X_ph)[0]

            # create core multi-layer perceptron
            core_network = MLP(
                name="core_network",
                input_layer=self.layer_in_x,
                output_dim=self.n_centers * self.n_scales,
                hidden_sizes=self.hidden_sizes,
                hidden_nonlinearity=self.hidden_nonlinearity,
                output_nonlinearity=None,
                dropout_ph=self.dropout_ph if self.dropout else None)

            self.core_output_layer = core_network.output_layer

            # weights of the mixture components
            self.logits = L.get_output(self.core_output_layer)
            self.softmax_layer_weights = L.NonlinearityLayer(
                self.core_output_layer, nonlinearity=tf.nn.softmax)
            self.weights = L.get_output(self.softmax_layer_weights)

            # locations of the kernelfunctions
            self.locs = tf.Variable(
                np.zeros((self.n_centers, self.ndim_y)),
                name="locs",
                trainable=False,
                dtype=tf.float32)  # assign sampled locs when fitting
            self.locs_layer = L.VariableLayer(core_network.input_layer,
                                              (self.n_centers, self.ndim_y),
                                              variable=self.locs,
                                              name="locs",
                                              trainable=False)

            self.locs_array = tf.unstack(
                tf.transpose(tf.multiply(
                    tf.ones((self.batch_size, self.n_centers, self.ndim_y)),
                    self.locs),
                             perm=[1, 0, 2]))
            assert len(self.locs_array) == self.n_centers

            # scales of the gaussian kernels
            log_scales_layer = L.VariableLayer(
                core_network.input_layer, (self.n_scales, ),
                variable=tf.Variable(self.init_scales_softplus,
                                     dtype=tf.float32,
                                     trainable=self.train_scales),
                name="log_scales",
                trainable=self.train_scales)

            self.scales_layer = L.NonlinearityLayer(
                log_scales_layer, nonlinearity=tf.nn.softplus)
            self.scales = L.get_output(self.scales_layer)
            self.scales_array = scales_array = tf.unstack(
                tf.transpose(tf.multiply(
                    tf.ones((self.batch_size, self.ndim_y, self.n_scales)),
                    self.scales),
                             perm=[2, 0, 1]))
            assert len(self.scales_array) == self.n_scales

            # put mixture components together
            self.y_input = L.get_output(self.layer_in_y)
            self.cat = cat = Categorical(logits=self.logits)
            self.components = components = [
                MultivariateNormalDiag(loc=loc, scale_diag=scale)
                for loc in self.locs_array for scale in scales_array
            ]
            self.mixture = mixture = Mixture(cat=cat, components=components)

            # regularization
            self._add_softmax_entropy_regularization()
            self._add_l1_l2_regularization(core_network)

            # tensor to compute probabilities
            if self.data_normalization:
                self.pdf_ = mixture.prob(self.y_input) / tf.reduce_prod(
                    self.std_y_sym)
                self.log_pdf_ = mixture.log_prob(self.y_input) - tf.reduce_sum(
                    tf.log(self.std_y_sym))
            else:
                self.pdf_ = mixture.prob(self.y_input)
                self.log_pdf_ = mixture.log_prob(self.y_input)

            # symbolic tensors for getting the unnormalized mixture components
            if self.data_normalization:
                self.scales_unnormalized = tf.transpose(
                    tf.multiply(tf.ones(
                        (self.ndim_y, self.n_scales)), self.scales)
                ) * self.std_y_sym  # shape = (n_scales, ndim_y)
                self.locs_unnormalized = self.locs * self.std_y_sym + self.mean_y_sym
            else:
                self.scales_unnormalized = tf.transpose(
                    tf.multiply(tf.ones((self.ndim_y, self.n_scales)),
                                self.scales))  # shape = (n_scales, ndim_y)
                self.locs_unnormalized = self.locs

        # initialize LayersPowered --> provides functions for serializing tf models
        LayersPowered.__init__(self, [
            self.core_output_layer, self.locs_layer, self.scales_layer,
            self.layer_in_y
        ])
    def _build_model(self):
        """
    implementation of the MDN
    """

        with tf.variable_scope(self.name):
            # adds placeholders, data_normalization and data_noise if desired. Also adds a placeholder for dropout probability
            self.layer_in_x, self.layer_in_y = self._build_input_layers()

            # create core multi-layer perceptron
            mlp_output_dim = 2 * self.ndim_y * self.n_centers + self.n_centers
            core_network = MLP(
                name="core_network",
                input_layer=self.layer_in_x,
                output_dim=mlp_output_dim,
                hidden_sizes=self.hidden_sizes,
                hidden_nonlinearity=self.hidden_nonlinearity,
                output_nonlinearity=None,
                weight_normalization=self.weight_normalization,
                dropout_ph=self.dropout_ph if self.dropout else None)

            core_output_layer = core_network.output_layer

            # slice output of MLP into three equally sized parts for loc, scale and mixture weights
            slice_layer_locs = L.SliceLayer(core_output_layer,
                                            indices=slice(
                                                0,
                                                self.ndim_y * self.n_centers),
                                            axis=-1)
            slice_layer_scales = L.SliceLayer(
                core_output_layer,
                indices=slice(self.ndim_y * self.n_centers,
                              2 * self.ndim_y * self.n_centers),
                axis=-1)
            slice_layer_weights = L.SliceLayer(
                core_output_layer,
                indices=slice(2 * self.ndim_y * self.n_centers,
                              mlp_output_dim),
                axis=-1)

            # locations mixture components
            self.reshape_layer_locs = L.ReshapeLayer(
                slice_layer_locs, (-1, self.n_centers, self.ndim_y))
            self.locs = L.get_output(self.reshape_layer_locs)

            # scales of the mixture components
            reshape_layer_scales = L.ReshapeLayer(
                slice_layer_scales, (-1, self.n_centers, self.ndim_y))
            self.softplus_layer_scales = L.NonlinearityLayer(
                reshape_layer_scales, nonlinearity=tf.nn.softplus)
            self.scales = L.get_output(self.softplus_layer_scales)

            # weights of the mixture components
            self.logits = L.get_output(slice_layer_weights)
            self.softmax_layer_weights = L.NonlinearityLayer(
                slice_layer_weights, nonlinearity=tf.nn.softmax)
            self.weights = L.get_output(self.softmax_layer_weights)

            # # put mixture components together
            self.y_input = L.get_output(self.layer_in_y)
            self.cat = cat = Categorical(logits=self.logits)
            self.components = components = [
                MultivariateNormalDiag(loc=loc, scale_diag=scale)
                for loc, scale in zip(tf.unstack(self.locs, axis=1),
                                      tf.unstack(self.scales, axis=1))
            ]
            self.mixture = mixture = Mixture(cat=cat,
                                             components=components,
                                             value=tf.zeros_like(self.y_input))

            # regularization
            self._add_softmax_entropy_regularization()
            self._add_l1_l2_regularization(core_network)

            # tensor to store samples
            self.samples = mixture.sample()  #TODO either use it or remove it

            # tensor to compute probabilities
            if self.data_normalization:
                self.pdf_ = mixture.prob(self.y_input) / tf.reduce_prod(
                    self.std_y_sym)
                self.log_pdf_ = mixture.log_prob(self.y_input) - tf.reduce_sum(
                    tf.log(self.std_y_sym))
            else:
                self.pdf_ = mixture.prob(self.y_input)
                self.log_pdf_ = mixture.log_prob(self.y_input)

            # symbolic tensors for getting the unnormalized mixture components
            if self.data_normalization:
                self.scales_unnormalized = self.scales * self.std_y_sym
                self.locs_unnormalized = self.locs * self.std_y_sym + self.mean_y_sym
            else:
                self.scales_unnormalized = self.scales
                self.locs_unnormalized = self.locs

        # initialize LayersPowered --> provides functions for serializing tf models
        LayersPowered.__init__(self, [
            self.softmax_layer_weights, self.softplus_layer_scales,
            self.reshape_layer_locs, self.layer_in_y
        ])