def _build_model(self): """ implementation of the flow model """ with tf.variable_scope(self.name): # adds placeholders, data normalization and data noise to graph as desired. Also sets up a placeholder # for dropout self.layer_in_x, self.layer_in_y = self._build_input_layers() self.y_input = L.get_output(self.layer_in_y) flow_classes = [FLOWS[flow_name] for flow_name in self.flows_type] # get the individual parameter sizes for each flow param_split_sizes = [flow.get_param_size(self.ndim_y) for flow in flow_classes] mlp_output_dim = sum(param_split_sizes) core_network = MLP( name="core_network", input_layer=self.layer_in_x, output_dim=mlp_output_dim, hidden_sizes=self.hidden_sizes, hidden_nonlinearity=self.hidden_nonlinearity, output_nonlinearity=None, weight_normalization=self.weight_normalization, dropout_ph=self.dropout_ph if self.dropout else None ) outputs = L.get_output(core_network.output_layer) flow_params = tf.split(value=outputs, num_or_size_splits=param_split_sizes, axis=1) # instanciate the flows with their parameters flows = [flow(params, self.ndim_y) for flow, params in zip(flow_classes, flow_params)] # build up the base distribution that will be transformed by the flows if self.ndim_y == 1: # this is faster for 1-D than the multivariate version # it also supports a cdf, which isn't implemented for Multivariate base_dist = tf.distributions.Normal(loc=0., scale=1.) else: base_dist = tf.contrib.distributions.MultivariateNormalDiag(loc=[0.] * self.ndim_y, scale_diag=[1.] * self.ndim_y) # chain the flows together and build the transformed distribution using the base_dist + flows # Chaining applies the flows in reverse, Chain([a,b]).forward(x) being a.forward(b.forward(x)) # We reverse them so the flows are stacked ontop of the base distribution in the original order flows.reverse() chain = tf.contrib.distributions.bijectors.Chain(flows) target_dist = tf.contrib.distributions.TransformedDistribution(distribution=base_dist, bijector=chain) # since we operate with matrices not vectors, the output would have dimension (?,1) # and therefor has to be reduce first to have shape (?,) if self.ndim_y == 1: # for x shape (batch_size, 1) normal_distribution.pdf(x) outputs shape (batch_size, 1) -> squeeze self.pdf_ = tf.squeeze(target_dist.prob(self.y_input), axis=1) self.log_pdf_ = tf.squeeze(target_dist.log_prob(self.y_input), axis=1) self.cdf_ = tf.squeeze(target_dist.cdf(self.y_input), axis=1) else: # no squeezing necessary for multivariate_normal, but we don't have a cdf self.pdf_ = target_dist.prob(self.y_input) self.log_pdf_ = target_dist.log_prob(self.y_input) if self.data_normalization: self.pdf_ = self.pdf_ / tf.reduce_prod(self.std_y_sym) self.log_pdf_ = self.log_pdf_ - tf.reduce_sum(tf.log(self.std_y_sym)) # cdf is only implemented for 1-D if self.ndim_y == 1: self.cdf_ = self.cdf_ / tf.reduce_prod(self.std_y_sym) # regularization self._add_l1_l2_regularization(core_network) self.loss = -tf.reduce_prod(self.pdf_) self.reg_loss = tf.reduce_sum(tf.losses.get_regularization_losses(scope=self.name)) #r egularization losses self.log_loss = -tf.reduce_sum(self.log_pdf_) + self.reg_loss optimizer = AdamWOptimizer(self.weight_decay) if self.weight_decay else tf.train.AdamOptimizer() if self.gradient_clipping: gradients, variables = zip(*optimizer.compute_gradients(self.log_loss)) gradients, _ = tf.clip_by_global_norm(gradients, 3e5) self.train_step = optimizer.apply_gradients(zip(gradients, variables)) else: self.train_step = optimizer.minimize(self.log_loss) # initialize LayersPowered -> provides functions for serializing tf models LayersPowered.__init__(self, [self.layer_in_y, core_network.output_layer])
def __setstate__(self, state): LayersPowered.__setstate__(self, state) self.fitted = state['fitted'] self.sess = tf.get_default_session()
def __getstate__(self): state = LayersPowered.__getstate__(self) state['fitted'] = self.fitted return state
def __init__(self, name, output_dim, hidden_sizes, hidden_nonlinearity, output_nonlinearity, hidden_W_init=L.XavierUniformInitializer(), hidden_b_init=tf.zeros_initializer(), output_W_init=L.XavierUniformInitializer(), output_b_init=tf.zeros_initializer(), input_var=None, input_layer=None, input_shape=None, batch_normalization=False, weight_normalization=False, dropout_ph=None): """ :param dropout_ph: None if no dropout should be used. Else a scalar placeholder that determines the prob of dropping a node. Remember to set placeholder to Zero during test / eval """ Serializable.quick_init(self, locals()) with tf.variable_scope(name): if input_layer is None: l_in = L.InputLayer(shape=(None, ) + input_shape, input_var=input_var, name="input") else: l_in = input_layer self._layers = [l_in] l_hid = l_in if batch_normalization: l_hid = L.batch_norm(l_hid) for idx, hidden_size in enumerate(hidden_sizes): l_hid = L.DenseLayer(l_hid, num_units=hidden_size, nonlinearity=hidden_nonlinearity, name="hidden_%d" % idx, W=hidden_W_init, b=hidden_b_init, weight_normalization=weight_normalization) if dropout_ph is not None: l_hid = L.DropoutLayer(l_hid, dropout_ph, rescale=False) if batch_normalization: l_hid = L.batch_norm(l_hid) self._layers.append(l_hid) l_out = L.DenseLayer(l_hid, num_units=output_dim, nonlinearity=output_nonlinearity, name="output", W=output_W_init, b=output_b_init, weight_normalization=weight_normalization) if batch_normalization: l_out = L.batch_norm(l_out) self._layers.append(l_out) self._l_in = l_in self._l_out = l_out # self._input_var = l_in.input_var self._output = L.get_output(l_out) LayersPowered.__init__(self, l_out)
def _build_model(self): """ implementation of the KMN """ with tf.variable_scope(self.name): # add placeholders, data_normalization and data_noise if desired. Also sets up the placeholder for dropout prob self.layer_in_x, self.layer_in_y = self._build_input_layers() self.X_in = L.get_output(self.layer_in_x) self.Y_in = L.get_output(self.layer_in_y) # get batch size self.batch_size = tf.shape(self.X_ph)[0] # create core multi-layer perceptron core_network = MLP( name="core_network", input_layer=self.layer_in_x, output_dim=self.n_centers * self.n_scales, hidden_sizes=self.hidden_sizes, hidden_nonlinearity=self.hidden_nonlinearity, output_nonlinearity=None, dropout_ph=self.dropout_ph if self.dropout else None) self.core_output_layer = core_network.output_layer # weights of the mixture components self.logits = L.get_output(self.core_output_layer) self.softmax_layer_weights = L.NonlinearityLayer( self.core_output_layer, nonlinearity=tf.nn.softmax) self.weights = L.get_output(self.softmax_layer_weights) # locations of the kernelfunctions self.locs = tf.Variable( np.zeros((self.n_centers, self.ndim_y)), name="locs", trainable=False, dtype=tf.float32) # assign sampled locs when fitting self.locs_layer = L.VariableLayer(core_network.input_layer, (self.n_centers, self.ndim_y), variable=self.locs, name="locs", trainable=False) self.locs_array = tf.unstack( tf.transpose(tf.multiply( tf.ones((self.batch_size, self.n_centers, self.ndim_y)), self.locs), perm=[1, 0, 2])) assert len(self.locs_array) == self.n_centers # scales of the gaussian kernels log_scales_layer = L.VariableLayer( core_network.input_layer, (self.n_scales, ), variable=tf.Variable(self.init_scales_softplus, dtype=tf.float32, trainable=self.train_scales), name="log_scales", trainable=self.train_scales) self.scales_layer = L.NonlinearityLayer( log_scales_layer, nonlinearity=tf.nn.softplus) self.scales = L.get_output(self.scales_layer) self.scales_array = scales_array = tf.unstack( tf.transpose(tf.multiply( tf.ones((self.batch_size, self.ndim_y, self.n_scales)), self.scales), perm=[2, 0, 1])) assert len(self.scales_array) == self.n_scales # put mixture components together self.y_input = L.get_output(self.layer_in_y) self.cat = cat = Categorical(logits=self.logits) self.components = components = [ MultivariateNormalDiag(loc=loc, scale_diag=scale) for loc in self.locs_array for scale in scales_array ] self.mixture = mixture = Mixture(cat=cat, components=components) # regularization self._add_softmax_entropy_regularization() self._add_l1_l2_regularization(core_network) # tensor to compute probabilities if self.data_normalization: self.pdf_ = mixture.prob(self.y_input) / tf.reduce_prod( self.std_y_sym) self.log_pdf_ = mixture.log_prob(self.y_input) - tf.reduce_sum( tf.log(self.std_y_sym)) else: self.pdf_ = mixture.prob(self.y_input) self.log_pdf_ = mixture.log_prob(self.y_input) # symbolic tensors for getting the unnormalized mixture components if self.data_normalization: self.scales_unnormalized = tf.transpose( tf.multiply(tf.ones( (self.ndim_y, self.n_scales)), self.scales) ) * self.std_y_sym # shape = (n_scales, ndim_y) self.locs_unnormalized = self.locs * self.std_y_sym + self.mean_y_sym else: self.scales_unnormalized = tf.transpose( tf.multiply(tf.ones((self.ndim_y, self.n_scales)), self.scales)) # shape = (n_scales, ndim_y) self.locs_unnormalized = self.locs # initialize LayersPowered --> provides functions for serializing tf models LayersPowered.__init__(self, [ self.core_output_layer, self.locs_layer, self.scales_layer, self.layer_in_y ])
def _build_model(self): """ implementation of the MDN """ with tf.variable_scope(self.name): # adds placeholders, data_normalization and data_noise if desired. Also adds a placeholder for dropout probability self.layer_in_x, self.layer_in_y = self._build_input_layers() # create core multi-layer perceptron mlp_output_dim = 2 * self.ndim_y * self.n_centers + self.n_centers core_network = MLP( name="core_network", input_layer=self.layer_in_x, output_dim=mlp_output_dim, hidden_sizes=self.hidden_sizes, hidden_nonlinearity=self.hidden_nonlinearity, output_nonlinearity=None, weight_normalization=self.weight_normalization, dropout_ph=self.dropout_ph if self.dropout else None) core_output_layer = core_network.output_layer # slice output of MLP into three equally sized parts for loc, scale and mixture weights slice_layer_locs = L.SliceLayer(core_output_layer, indices=slice( 0, self.ndim_y * self.n_centers), axis=-1) slice_layer_scales = L.SliceLayer( core_output_layer, indices=slice(self.ndim_y * self.n_centers, 2 * self.ndim_y * self.n_centers), axis=-1) slice_layer_weights = L.SliceLayer( core_output_layer, indices=slice(2 * self.ndim_y * self.n_centers, mlp_output_dim), axis=-1) # locations mixture components self.reshape_layer_locs = L.ReshapeLayer( slice_layer_locs, (-1, self.n_centers, self.ndim_y)) self.locs = L.get_output(self.reshape_layer_locs) # scales of the mixture components reshape_layer_scales = L.ReshapeLayer( slice_layer_scales, (-1, self.n_centers, self.ndim_y)) self.softplus_layer_scales = L.NonlinearityLayer( reshape_layer_scales, nonlinearity=tf.nn.softplus) self.scales = L.get_output(self.softplus_layer_scales) # weights of the mixture components self.logits = L.get_output(slice_layer_weights) self.softmax_layer_weights = L.NonlinearityLayer( slice_layer_weights, nonlinearity=tf.nn.softmax) self.weights = L.get_output(self.softmax_layer_weights) # # put mixture components together self.y_input = L.get_output(self.layer_in_y) self.cat = cat = Categorical(logits=self.logits) self.components = components = [ MultivariateNormalDiag(loc=loc, scale_diag=scale) for loc, scale in zip(tf.unstack(self.locs, axis=1), tf.unstack(self.scales, axis=1)) ] self.mixture = mixture = Mixture(cat=cat, components=components, value=tf.zeros_like(self.y_input)) # regularization self._add_softmax_entropy_regularization() self._add_l1_l2_regularization(core_network) # tensor to store samples self.samples = mixture.sample() #TODO either use it or remove it # tensor to compute probabilities if self.data_normalization: self.pdf_ = mixture.prob(self.y_input) / tf.reduce_prod( self.std_y_sym) self.log_pdf_ = mixture.log_prob(self.y_input) - tf.reduce_sum( tf.log(self.std_y_sym)) else: self.pdf_ = mixture.prob(self.y_input) self.log_pdf_ = mixture.log_prob(self.y_input) # symbolic tensors for getting the unnormalized mixture components if self.data_normalization: self.scales_unnormalized = self.scales * self.std_y_sym self.locs_unnormalized = self.locs * self.std_y_sym + self.mean_y_sym else: self.scales_unnormalized = self.scales self.locs_unnormalized = self.locs # initialize LayersPowered --> provides functions for serializing tf models LayersPowered.__init__(self, [ self.softmax_layer_weights, self.softplus_layer_scales, self.reshape_layer_locs, self.layer_in_y ])