def dense(self, input, units, name, reuse, kernel_regularizer, initZero=False): shape = input.get_shape().as_list() input_dim = shape[-1] with tf.variable_scope(name, reuse=reuse): initializer = tf.constant_initializer( 0.0) if initZero else tf.truncated_normal_initializer( stddev=1.0) kernel = tf.get_variable('kernel', shape=[input_dim, units], initializer=initializer, regularizer=kernel_regularizer) bias = tf.get_variable('bias', shape=[units], initializer=tf.constant_initializer(0.0)) scale = variance_scaling_lr([input_dim, units], 'FAN_AVG') kernel = kernel * scale output = standard_ops.matmul(input, kernel) output = tf.nn.bias_add(output, bias) return output
def call(self, inputs): inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) shape = inputs.get_shape().as_list() # binarization weight # self.b_kernel = binarization(self.kernel, self.H) self.b_kernel = self.kernel #r_kernel = self.kernel #self.kernel = self.b_kernel print("shape: ", len(shape)) if len(shape) > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, self.b_kernel, [[len(shape) - 1], [0]]) # Reshape the output back to the original ndim of the input. if context.in_graph_mode(): output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: outputs = standard_ops.matmul(inputs, self.b_kernel) # restore weight #self.kernel = r_kernel if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs): inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) shape = inputs.get_shape().as_list() output_shape = shape[:-1] + [self.units] if len(output_shape) > 2: raise ValueError('len(output_shape) > 2 is not supported') else: xU = standard_ops.matmul(inputs, self.manifold_args[0]) xUS = standard_ops.matmul(xU, standard_ops.diag(self.manifold_args[1])) xUSV = standard_ops.matmul(xUS, self.manifold_args[2]) outputs = xUSV if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def while_loop_body(iteration, eigenvector, old_eigenvector): """Performs one iteration of the power method.""" del old_eigenvector # Needed by the condition, but not the body. iteration += 1 # We need to use tf.matmul() and tf.expand_dims(), instead of # tf.tensordot(), since the former will infer the shape of the result, while # the latter will not (tf.while_loop() needs the shapes). new_eigenvector = standard_ops.matmul( matrix, standard_ops.expand_dims(eigenvector, 1))[:, 0] new_eigenvector /= standard_ops.norm(new_eigenvector) return (iteration, new_eigenvector, eigenvector)
def while_loop_body(iteration, eigenvector, old_eigenvector): """Performs one iteration of the power method.""" del old_eigenvector # Needed by the condition, but not the body. iteration += 1 # We need to use tf.matmul() and tf.expand_dims(), instead of # tf.tensordot(), since the former will infer the shape of the result, while # the latter will not (tf.while_loop() needs the shapes). new_eigenvector = standard_ops.matmul( matrix, standard_ops.expand_dims(eigenvector, 1))[:, 0] new_eigenvector /= standard_ops.norm(new_eigenvector) return (iteration, new_eigenvector, eigenvector)
def fprop(self, x): if len(self.output_shape) > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(x, self.kernel, [[len(self.input_shape) - 1], [0]]) # Reshape the output back to the original ndim of the input. outputs.set_shape(self.output_shape) else: outputs = standard_ops.matmul(x, self.kernel) outputs = nn.bias_add(outputs, self.bias) # if self.activation is not None: # return self.activation(outputs) # pylint: disable=not-callable return outputs
def call(self, inputs): outputs = standard_ops.matmul(inputs, self.W) if self.use_bias: outputs = nn.bias_add(outputs, self.b) # Apply normalizer function / layer. if self.normalizer_fn is not None: outputs = self.normalizer_fn(outputs) if self.activation_fn is not None: outputs = self.activation_fn(outputs) # pylint: disable=not-callable return outputs
def call(self, inputs): shape = inputs.get_shape().as_list() output_shape = shape[:-1] + [self.units] if len(output_shape) > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1], [0]]) # Reshape the output back to the original ndim of the input. outputs.set_shape(output_shape) else: outputs = standard_ops.matmul(inputs, self.kernel) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def call(self,inputs): inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) shape = inputs.get_shape().as_list() if len(shape) > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1], [0]]) # Reshape the output back to the original ndim of the input. if context.in_graph_mode(): output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: outputs = standard_ops.matmul(inputs, self.kernel) outputs = nn.bias_add(outputs, self.bias) return outputs
def call(self, inputs): shape = inputs.get_shape().as_list() output_shape = shape[:-1] + [self.units] if len(output_shape) > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1], [0]]) # Reshape the output back to the original ndim of the input. outputs.set_shape(output_shape) else: outputs = standard_ops.matmul(inputs, self.kernel) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def dense(inputs, kernel, bias=None, activation=None): #inputs = ops.convert_to_tensor(inputs, dtype=dtype) shape = inputs.get_shape().as_list() output_shape = shape[:-1] + [kernel.get_shape().as_list()[-1]] if len(output_shape) > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, kernel, [[len(shape) - 1], [0]]) # Reshape the output back to the original ndim of the input. outputs.set_shape(output_shape) else: outputs = standard_ops.matmul(inputs, kernel) if bias is not None: outputs = nn.bias_add(outputs, bias) if activation is not None: return activation(outputs) # pylint: disable=not-callable return outputs
def call(self, inputs, **kwargs): combined_kernel = tf.concat(axis=1, values=self.dense_kernels, name='combined_kernel') input_shape = tensor_shape.TensorShape(inputs.shape) if input_shape[-1].value is None: raise ValueError('The last dimension of the inputs to `Dense` ' 'should be defined. Found `None`.') self.input_spec = base.InputSpec(min_ndim=2, axes={-1: input_shape[-1].value}) inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) shape = inputs.get_shape().as_list() if len(shape) > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, combined_kernel, [[len(shape) - 1], [0]]) # # Reshape the output back to the original ndim of the input. # if context.in_graph_mode(): # output_shape = shape[:-1] + [self.units] # outputs.set_shape(output_shape) else: outputs = standard_ops.matmul(inputs, combined_kernel) prev = None output_ops = [] if all([d.use_bias for d in self.dense_layers]): combined_bias = tf.concat( axis=0, values=[d.bias for d in self.dense_layers]) outputs = nn.bias_add(outputs, combined_bias) self.bias_combined = True activations = {d.activation for d in self.dense_layers} if None not in activations and len(activations) == 1: outputs = activations.pop()(outputs) self.activations_combined = True for d in self.dense_layers: if prev is None: layer_output = outputs[:, :d.units] else: layer_output = outputs[:, prev:(prev + d.units)] prev = d.units if d.use_bias and not self.bias_combined: layer_output = nn.bias_add(layer_output, d.bias) if d.activation is not None and not self.activations_combined: layer_output = d.activation(layer_output) output_ops.append(layer_output) return output_ops
def call(self, inputs): inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) shape = inputs.get_shape().as_list() output_shape = shape[:-1] + [self.units] # quantize the weights, if there is an weight quantizer if self.weight_quantizer is not None: with tf.variable_scope("quant_weights"): used_kernel = self.weight_quantizer.quantize(self.kernel) with tf.variable_scope("quant_biases"): used_bias = self.weight_quantizer.quantize(self.bias) else: used_kernel = self.kernel used_bias = self.bias # if intrinsic quantization, apply intr. quantization to weights, too! if self.quantizer is not None: used_kernel = self.quantizer.quantize(used_kernel) used_bias = self.quantizer.quantize(used_bias) if len(output_shape) > 2: ## Broadcasting is required for the inputs. #outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1], # [0]]) ## Reshape the output back to the original ndim of the input. #outputs.set_shape(output_shape) raise ValueError( 'output_shape > 2 not supported for quantized operation, tried $d.' % (len(output_shape))) else: if self.quantizer is None: outputs = standard_ops.matmul(inputs, used_kernel) else: # with quantization outputs = qmatmul(inputs, used_kernel, self.quantizer) #TODO: quantize after bias and activation if self.use_bias: outputs = nn.bias_add(outputs, used_bias) if self.quantizer is not None: outputs = self.quantizer.quantize(outputs) if self.activation is not None: # never called, since activation performed in upper hierarchy outputs = self.activation(outputs) # pylint: disable=not-callable if self.quantizer is not None: outputs = self.quantizer.quantize(outputs) return outputs
def call(self, inputs): if binarize_enabled: self.kernel = clip_by_value(self.kernel, -1, 1) inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) shape = inputs.get_shape().as_list() if len(shape) > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, binarize(self.kernel), [[len(shape) - 1], [0]]) # Reshape the output back to the original ndim of the input. if context.in_graph_mode(): output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: outputs = standard_ops.matmul(inputs, binarize(self.kernel)) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def call(self, inputs): shape = inputs.get_shape().as_list() input_dim = shape[-1] output_shape = shape[:-1] + [self.units] if len(output_shape) > 2: # Reshape the input to 2D. output_shape_tensors = array_ops.unpack(array_ops.shape(inputs)) output_shape_tensors[-1] = self.units output_shape_tensor = array_ops.pack(output_shape_tensors) inputs = array_ops.reshape(inputs, [-1, input_dim]) outputs = standard_ops.matmul(inputs, self.w) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if len(output_shape) > 2: # Reshape the output back to the original ndim of the input. outputs = array_ops.reshape(outputs, output_shape_tensor) outputs.set_shape(output_shape) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def call(self, inputs): shape = inputs.get_shape().as_list() input_dim = shape[-1] output_shape = shape[:-1] + [self.units] if len(output_shape) > 2: # Reshape the input to 2D. output_shape_tensors = array_ops.unpack(array_ops.shape(inputs)) output_shape_tensors[-1] = self.units output_shape_tensor = array_ops.pack(output_shape_tensors) inputs = array_ops.reshape(inputs, [-1, input_dim]) outputs = standard_ops.matmul(inputs, self.w) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if len(output_shape) > 2: # Reshape the output back to the original ndim of the input. outputs = array_ops.reshape(outputs, output_shape_tensor) outputs.set_shape(output_shape) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def call(self, inputs): inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) shape = inputs.get_shape().as_list() if len(shape) > 2: # Broadcasting is required for the inputs. outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1], [0]]) # Reshape the output back to the original ndim of the input. if context.in_graph_mode(): output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: outputs = standard_ops.matmul(inputs, self.kernel) scaler = self.scale / tf.sqrt( tf.reduce_sum(tf.square(self.kernel), [0])) outputs = scaler * outputs if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: return self.activation(outputs) # pylint: disable=not-callable return outputs
def _minimize_constrained(self, minimization_problem, global_step=None, var_list=None, gate_gradients=train_optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, name=None, grad_loss=None): """Returns an `Operation` for minimizing the constrained problem. The `optimizer` constructor parameter will be used to update the model parameters, while the constraint/objective weight matrix (the analogue of Lagrange multipliers) will be updated using `constrained_optimizer` (if provided) or `optimizer` (if not). Whether the matrix updates are additive or multiplicative depends on the derived class. Args: minimization_problem: ConstrainedMinimizationProblem, the problem to optimize. global_step: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. var_list: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. gate_gradients: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. aggregation_method: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. colocate_gradients_with_ops: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. name: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. grad_loss: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. Raises: ValueError: If the minimization_problem tensors have different dtypes. Returns: `Operation`, the train_op. """ objective = minimization_problem.objective constraints = minimization_problem.constraints proxy_constraints = minimization_problem.proxy_constraints if proxy_constraints is None: proxy_constraints = constraints # Make sure that the objective, constraints and proxy constraints all have # the same dtype. if (objective.dtype.base_dtype != constraints.dtype.base_dtype or objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype): raise ValueError("objective, constraints and proxy_constraints must " "have the same dtype") # Flatten both constraints tensors to 1d. num_constraints = minimization_problem.num_constraints constraints = standard_ops.reshape(constraints, shape=(num_constraints,)) proxy_constraints = standard_ops.reshape( proxy_constraints, shape=(num_constraints,)) # We use a lambda to initialize the state so that, if this function call is # inside the scope of a tf.control_dependencies() block, the dependencies # will not be applied to the initializer. state = standard_ops.Variable( lambda: self._initial_state(num_constraints), trainable=False, name="swap_regret_optimizer_state") zero_and_constraints = standard_ops.concat((standard_ops.zeros( (1,), dtype=constraints.dtype), constraints), axis=0) objective_and_proxy_constraints = standard_ops.concat( (standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0) distribution = self._distribution(state) loss = standard_ops.tensordot( standard_ops.cast(distribution, objective_and_proxy_constraints.dtype), objective_and_proxy_constraints, 1) matrix_gradient = standard_ops.matmul( standard_ops.expand_dims( standard_ops.cast(zero_and_constraints, distribution.dtype), 1), standard_ops.expand_dims(distribution, 0)) update_ops = [] if self.constraint_optimizer is None: # If we don't have a separate constraint_optimizer, then we use # self._optimizer for both the update of the model parameters, and that of # the internal state. grads_and_vars = self.optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) grads_and_vars.append( self._constraint_grad_and_var(state, matrix_gradient)) update_ops.append( self.optimizer.apply_gradients(grads_and_vars, name="update")) else: # If we have a separate constraint_optimizer, then we use self._optimizer # for the update of the model parameters, and self._constraint_optimizer # for that of the internal state. grads_and_vars = self.optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) matrix_grads_and_vars = [ self._constraint_grad_and_var(state, matrix_gradient) ] gradients = [ gradient for gradient, _ in grads_and_vars + matrix_grads_and_vars if gradient is not None ] with ops.control_dependencies(gradients): update_ops.append( self.optimizer.apply_gradients(grads_and_vars, name="update")) update_ops.append( self.constraint_optimizer.apply_gradients( matrix_grads_and_vars, name="optimizer_state_update")) with ops.control_dependencies(update_ops): if global_step is None: # If we don't have a global step, just project, and we're done. return self._projection_op(state, name=name) else: # If we have a global step, then we need to increment it in addition to # projecting. projection_op = self._projection_op(state, name="project") with ops.colocate_with(global_step): global_step_op = state_ops.assign_add( global_step, 1, name="global_step_increment") return control_flow_ops.group(projection_op, global_step_op, name=name)
fixed_size = 8 fixed_prec = 4 inputs_vals = np.arange( input_width * input_height * input_channels * batch_size).reshape( batch_size, input_width * input_height * input_channels) filters_vals = np.arange( input_channels * input_width * input_height * output_channels).reshape( input_channels * input_width * input_height, output_channels) inputs = tf.constant(inputs_vals, dtype=tf.float64) filters = tf.constant(filters_vals, dtype=tf.float64) quantizer = Quantizers.NoQuantizer() output = QFC.qmatmul(inputs, filters, quantizer) gold_output = standard_ops.matmul(inputs, filters) with tf.Session() as sess: gold_result = gold_output.eval().flatten() result = output.eval().flatten() #print(sess.run(gold_output)) #print(sess.run(filters)) pass failed = False for i in range(len(result)): if result[i] != gold_result[i]: failed = True break print('QFullyConnect test:')
def _matmul(self, inputs, kernel): if inputs.shape.ndims <= 2: return standard_ops.matmul(inputs, kernel) # To handle broadcasting, we must use `tensordot`. return standard_ops.tensordot(inputs, kernel, axes=[[-1], [0]])
def _minimize_constrained(self, minimization_problem, global_step=None, var_list=None, gate_gradients=train_optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, name=None, grad_loss=None): """Returns an `Operation` for minimizing the constrained problem. The `optimizer` constructor parameter will be used to update the model parameters, while the constraint/objective weight matrix (the analogue of Lagrange multipliers) will be updated using `constrained_optimizer` (if provided) or `optimizer` (if not). Whether the matrix updates are additive or multiplicative depends on the derived class. Args: minimization_problem: ConstrainedMinimizationProblem, the problem to optimize. global_step: as in `tf.train.Optimizer`'s `minimize` method. var_list: as in `tf.train.Optimizer`'s `minimize` method. gate_gradients: as in `tf.train.Optimizer`'s `minimize` method. aggregation_method: as in `tf.train.Optimizer`'s `minimize` method. colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize` method. name: as in `tf.train.Optimizer`'s `minimize` method. grad_loss: as in `tf.train.Optimizer`'s `minimize` method. Raises: ValueError: If the minimization_problem tensors have different dtypes. Returns: `Operation`, the train_op. """ objective = minimization_problem.objective constraints = minimization_problem.constraints proxy_constraints = minimization_problem.proxy_constraints if proxy_constraints is None: proxy_constraints = constraints # Make sure that the objective, constraints and proxy constraints all have # the same dtype. if (objective.dtype.base_dtype != constraints.dtype.base_dtype or objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype): raise ValueError("objective, constraints and proxy_constraints must " "have the same dtype") # Flatten both constraints tensors to 1d. num_constraints = minimization_problem.num_constraints constraints = standard_ops.reshape(constraints, shape=(num_constraints,)) proxy_constraints = standard_ops.reshape( proxy_constraints, shape=(num_constraints,)) # We use a lambda to initialize the state so that, if this function call is # inside the scope of a tf.control_dependencies() block, the dependencies # will not be applied to the initializer. state = standard_ops.Variable( lambda: self._initial_state(num_constraints), trainable=False, name="swap_regret_optimizer_state") zero_and_constraints = standard_ops.concat( (standard_ops.zeros((1,), dtype=constraints.dtype), constraints), axis=0) objective_and_proxy_constraints = standard_ops.concat( (standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0) distribution = self._distribution(state) loss = standard_ops.tensordot( standard_ops.cast(distribution, objective_and_proxy_constraints.dtype), objective_and_proxy_constraints, 1) matrix_gradient = standard_ops.matmul( standard_ops.expand_dims( standard_ops.cast(zero_and_constraints, distribution.dtype), 1), standard_ops.expand_dims(distribution, 0)) update_ops = [] if self.constraint_optimizer is None: # If we don't have a separate constraint_optimizer, then we use # self._optimizer for both the update of the model parameters, and that of # the internal state. grads_and_vars = self.optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) grads_and_vars.append( self._constraint_grad_and_var(state, matrix_gradient)) update_ops.append( self.optimizer.apply_gradients(grads_and_vars, name="update")) else: # If we have a separate constraint_optimizer, then we use self._optimizer # for the update of the model parameters, and self._constraint_optimizer # for that of the internal state. grads_and_vars = self.optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) matrix_grads_and_vars = [ self._constraint_grad_and_var(state, matrix_gradient) ] gradients = [ gradient for gradient, _ in grads_and_vars + matrix_grads_and_vars if gradient is not None ] with ops.control_dependencies(gradients): update_ops.append( self.optimizer.apply_gradients(grads_and_vars, name="update")) update_ops.append( self.constraint_optimizer.apply_gradients( matrix_grads_and_vars, name="optimizer_state_update")) with ops.control_dependencies(update_ops): if global_step is None: # If we don't have a global step, just project, and we're done. return self._projection_op(state, name=name) else: # If we have a global step, then we need to increment it in addition to # projecting. projection_op = self._projection_op(state, name="project") with ops.colocate_with(global_step): global_step_op = state_ops.assign_add( global_step, 1, name="global_step_increment") return control_flow_ops.group(projection_op, global_step_op, name=name)
def dnn_sampled_softmax_classifier_model_fn(features, target_indices, mode, params): """model_fn that uses candidate sampling. Args: features: Single Tensor or dict of Tensor (depends on data passed to `fit`) target_indices: A single Tensor of shape [batch_size, n_labels] containing the target indices. mode: Represents if this training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters that are listed below. hidden_units- List of hidden units per layer. All layers are fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second one has 32. feature_columns- An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `FeatureColumn`. n_classes- number of target classes. It must be greater than 2. n_samples- number of sample target classes. Needs to be tuned - A good starting point could be 2% of n_classes. n_labels- number of labels in each example. top_k- The number of classes to predict. optimizer- An instance of `tf.Optimizer` used to train the model. If `None`, will use an Adagrad optimizer. dropout- When not `None`, the probability we will drop out a given coordinate. gradient_clip_norm- A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. See tf.clip_by_global_norm for more details. num_ps_replicas- The number of parameter server replicas. Returns: predictions: A single Tensor or a dict of Tensors. loss: A scalar containing the loss of the step. train_op: The op for training. """ hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] n_classes = params["n_classes"] n_samples = params["n_samples"] n_labels = params["n_labels"] top_k = params["top_k"] optimizer = params["optimizer"] dropout = params["dropout"] gradient_clip_norm = params["gradient_clip_norm"] num_ps_replicas = params["num_ps_replicas"] parent_scope = "dnn_ss" # Setup the input layer partitioner. input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Create the input layer. with variable_scope.variable_scope( parent_scope + "/input_from_feature_columns", features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( features, feature_columns, weight_collections=[parent_scope], scope=scope) # Setup the hidden layer partitioner. hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) final_hidden_layer_dim = None # Create hidden layers using fully_connected. for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( parent_scope + "/hiddenlayer_%d" % layer_id, [net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected(net, num_hidden_units, variables_collections=[parent_scope], scope=scope) final_hidden_layer_dim = num_hidden_units # Add dropout if it is enabled. if dropout is not None and mode == estimator.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) # Create the weights and biases for the logit layer. with variable_scope.variable_scope( parent_scope + "/logits", [net], partitioner=hidden_layer_partitioner) as scope: dtype = net.dtype.base_dtype weights_shape = [n_classes, final_hidden_layer_dim] weights = variables.model_variable( "weights", shape=weights_shape, dtype=dtype, initializer=initializers.xavier_initializer(), trainable=True, collections=[parent_scope]) biases = variables.model_variable( "biases", shape=[n_classes,], dtype=dtype, initializer=init_ops.zeros_initializer, trainable=True, collections=[parent_scope]) if mode == estimator.ModeKeys.TRAIN: # Call the candidate sampling APIs and calculate the loss. sampled_values = nn.learned_unigram_candidate_sampler( true_classes=math_ops.to_int64(target_indices), num_true=n_labels, num_sampled=n_samples, unique=True, range_max=n_classes) sampled_softmax_loss = nn.sampled_softmax_loss( weights=weights, biases=biases, inputs=net, labels=math_ops.to_int64(target_indices), num_sampled=n_samples, num_classes=n_classes, num_true=n_labels, sampled_values=sampled_values) loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss") train_op = optimizers.optimize_loss( loss=loss, global_step=contrib_framework.get_global_step(), learning_rate=_DEFAULT_LEARNING_RATE, optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm, name=parent_scope) return None, loss, train_op elif mode == estimator.ModeKeys.EVAL: logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)), biases) predictions = {} predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) _, predictions[_TOP_K] = nn.top_k(logits, top_k) # Since the targets have multiple labels, setup the target probabilities # as 1.0/n_labels for each of the labels. target_one_hot = array_ops.one_hot( indices=target_indices, depth=n_classes, on_value=1.0 / n_labels) target_one_hot = math_ops.reduce_sum( input_tensor=target_one_hot, reduction_indices=[1]) loss = math_ops.reduce_mean( nn.softmax_cross_entropy_with_logits(logits, target_one_hot)) return predictions, loss, None elif mode == estimator.ModeKeys.INFER: logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)), biases) predictions = {} predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) _, predictions[_TOP_K] = nn.top_k(logits, top_k) return predictions, None, None
def legacy_fully_connected(x, num_output_units, activation_fn=None, weight_init=initializers.xavier_initializer(), bias_init=init_ops.zeros_initializer, name=None, weight_collections=(ops.GraphKeys.WEIGHTS,), bias_collections=(ops.GraphKeys.BIASES,), output_collections=(ops.GraphKeys.ACTIVATIONS,), trainable=True, weight_regularizer=None, bias_regularizer=None): # pylint: disable=anomalous-backslash-in-string r"""Adds the parameters for a fully connected layer and returns the output. A fully connected layer is generally defined as a matrix multiply: `y = f(w * x + b)` where `f` is given by `activation_fn`. If `activation_fn` is `None`, the result of `y = w * x + b` is returned. If `x` has shape [\\\(\\text{dim}_0, \\text{dim}_1, ..., \\text{dim}_n\\\)] with more than 2 dimensions (\\\(n > 1\\\)), then we repeat the matrix multiply along the first dimensions. The result r is a tensor of shape [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`], where \\\( r_{i_0, ..., i_{n-1}, k} = \\sum_{0 \\leq j < \\text{dim}_n} x_{i_0, ... i_{n-1}, j} \cdot w_{j, k}\\\). This is accomplished by reshaping `x` to 2-D [\\\(\\text{dim}_0 \\cdot ... \\cdot \\text{dim}_{n-1}, \\text{dim}_n\\\)] before the matrix multiply and afterwards reshaping it to [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`]. This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting `bias_init` to `None`. The variable creation is compatible with `tf.variable_scope` and so can be reused with `tf.variable_scope` or `tf.make_template`. Most of the details of variable creation can be controlled by specifying the initializers (`weight_init` and `bias_init`) and in which collections to place the created variables (`weight_collections` and `bias_collections`; note that the variables are always added to the `VARIABLES` collection). The output of the layer can be placed in custom collections using `output_collections`. The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`, respectively. A per layer regularization can be specified by setting `weight_regularizer` and `bias_regularizer`, which are applied to the weights and biases respectively, and whose output is added to the `REGULARIZATION_LOSSES` collection. Args: x: The input `Tensor`. num_output_units: The size of the output. activation_fn: A function that requires a single Tensor that is applied as a non-linearity. If None is used, do not apply any activation. weight_init: An optional weight initialization, defaults to `xavier_initializer`. bias_init: An initializer for the bias, defaults to 0. Set to `None` in order to disable bias. name: The name for this operation is used to name operations and to find variables. If specified it must be unique for this scope, otherwise a unique name starting with "fully_connected" will be created. See `tf.variable_op_scope` for details. weight_collections: List of graph collections to which weights are added. bias_collections: List of graph collections to which biases are added. output_collections: List of graph collections to which outputs are added. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). weight_regularizer: A regularizer like the result of `l1_regularizer` or `l2_regularizer`. Used for weights. bias_regularizer: A regularizer like the result of `l1_regularizer` or `l2_regularizer`. Used for biases. Returns: The output of the fully connected layer. Raises: ValueError: if x has rank less than 2 or if its last dimension is not set. """ with variable_scope.variable_op_scope([x], name, 'fully_connected'): dims = x.get_shape().dims if dims is None: raise ValueError('dims of x must be known but is None') if len(dims) < 2: raise ValueError('rank of x must be at least 2 not: %d' % len(dims)) num_input_units = dims[-1].value if num_input_units is None: raise ValueError('last dimension of x must be known but is None') dtype = x.dtype.base_dtype weight_collections = set(list(weight_collections or []) + [ops.GraphKeys.VARIABLES]) w = variable_scope.get_variable('weights', shape=[num_input_units, num_output_units], dtype=dtype, initializer=weight_init, collections=weight_collections, regularizer=weight_regularizer, trainable=trainable) x_2_dim = x if len(dims) <= 2 else array_ops.reshape(x, [-1, num_input_units]) y = standard_ops.matmul(x_2_dim, w) if bias_init is not None: bias_collections = set(list(bias_collections or []) + [ops.GraphKeys.VARIABLES]) b = variable_scope.get_variable('bias', shape=[num_output_units], dtype=dtype, initializer=bias_init, collections=bias_collections, regularizer=bias_regularizer, trainable=trainable) y = nn.bias_add(y, b) if len(dims) > 2: out_shape = array_ops.unpack(array_ops.shape(x)) out_shape[-1] = num_output_units y = array_ops.reshape(y, array_ops.pack(out_shape)) static_shape = x.get_shape().as_list() static_shape[-1] = num_output_units y.set_shape(static_shape) return _apply_activation(y, activation_fn, output_collections)
def build(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape) if input_shape.ndims is None: raise ValueError('Inputs to `Dense` should have known rank.') if len(input_shape) < 2: raise ValueError('Inputs to `Dense` should have rank >= 2.') if input_shape[-1].value is None: raise ValueError('The last dimension of the inputs to `Dense` ' 'should be defined. Found `None`.') # Note that we set `trainable=True` because this is a trainable # weight of the layer. If the layer is not trainable # (self.trainable = False), the variable will not be added to # tf.trainable_variables(), and self.trainable_weights will be empty. m = input_shape[-1].value k = self.rank n = self.units # glorot unifiorm limit = (6.0 / (m + n))**0.5 kernel0 = random_ops.random_uniform([m, n], minval=-limit, maxval=limit) s0, u0, v0 = linalg_ops.svd(kernel0, full_matrices=False) v0 = array_ops.transpose(v0) u0 = array_ops.slice(u0, [0, 0], [m, k]) #u0[:,:k] s0 = array_ops.slice(s0, [ 0, ], [ k, ]) #s0[:k] v0 = array_ops.slice(v0, [0, 0], [k, n]) #v0[:k,:] self.manifold_args = [ vs.get_variable( 'U', #shape=[m, k], initializer=u0, #init_ops.orthogonal_initializer(), regularizer=self.kernel_regularizer, dtype=self.dtype, trainable=True), vs.get_variable( 'S', #shape=[k,], initializer=s0, #self.kernel_initializer, regularizer=self.kernel_regularizer, dtype=self.dtype, trainable=True), vs.get_variable( 'V', #shape=[k, n], initializer=v0, #init_ops.orthogonal_initializer(), regularizer=self.kernel_regularizer, dtype=self.dtype, trainable=True) ] U = self.manifold_args[0] US = standard_ops.matmul(U, standard_ops.diag(self.manifold_args[1])) USV = standard_ops.matmul(US, self.manifold_args[2]) self.kernel = USV if 'norm' in self.summaries: summary.scalar('krenel-norm', linalg_ops.norm(self.kernel)) if 'histogram' in self.summaries: summary.histogram('kernel-histogram', self.kernel) manifold = manifolds.FixedRankEmbedded(m, n, k) if self.use_bias: self.bias = vs.get_variable('bias', shape=[n], initializer=self.bias_initializer, regularizer=self.bias_regularizer, dtype=self.dtype, trainable=True) self.manifold_args.append(self.bias) manifold = manifolds.Product([manifold, manifolds.Euclidean(n)]) else: self.bias = None self.manifold = manifold
def dense_layer_ot(x, in_size, out_size, sequence_length, scope_name, activation_fn=tf.nn.elu, batch_norm=fu.create_BNParams()): ''' Apply a dense layer over all the time_stamp. This is for filtering the timeseries :param x: input data :param in_size: input size or number of feature :param out_size: output size :param sequence_length: length of the sequence. Number of timestemp to iterate of :param scope_name: scope name of this transformation :param activation_fn: activation function :param batch_norm: named indicating if applying batch normalization and the phase(true if training, false if tensing) :return: ''' layers_output = [] with tf.variable_scope(scope_name) as vs: W = tf.get_variable( 'weight_filter', shape=[in_size, out_size], initializer=tf.contrib.layers.xavier_initializer(), collections=[GraphKeys.WEIGHTS, GraphKeys.GLOBAL_VARIABLES], trainable=True) if not batch_norm.apply: b = tf.get_variable( 'bias_filter', shape=[out_size], initializer=tf.constant_initializer(0.), collections=[GraphKeys.BIASES, GraphKeys.GLOBAL_VARIABLES], trainable=True) for t in range(0, sequence_length): layer_output = standard_ops.matmul(x[:, t, :], W) if batch_norm.apply: layer_output = tf.contrib.layers.batch_norm( layer_output, center=batch_norm.center, scale=batch_norm.scale, is_training=batch_norm.phase, scope=vs.name + '_bn') else: # apply batch norm layer_output = standard_ops.add(layer_output, b) if activation_fn: layer_output = activation_fn(layer_output) layers_output.append(tf.expand_dims( layer_output, 1)) # add again the timestemp dimention to allow concatenation # proved to be the same weights s.add_hidden_layer_summary(layers_output[-1], vs.name, weight=W) if not batch_norm.apply: tf.summary.histogram(vs.name + '_bias', b) return tf.concat(layers_output, axis=1)
def fully_connected(inputs, num_outputs, activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer, biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, scope=None): """Adds a fully connected layer. `fully_connected` creates a variable called `weights`, representing a fully connected weight matrix, which is multiplied by the `inputs` to produce a `Tensor` of hidden units. If a `normalizer_fn` is provided (such as `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` variable would be created and added the hidden units. Finally, if `activation_fn` is not `None`, it is applied to the hidden units as well. Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened prior to the initial matrix multiply by `weights`. Args: inputs: A tensor of with at least rank 2 and value for the last dimension, i.e. `[batch_size, depth]`, `[None, None, None, channels]`. num_outputs: Integer, the number of output units in the layer. activation_fn: activation function. normalizer_fn: normalization function to use instead of `biases`. If `normalize_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. normalizer_params: normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional list of collections for all the variables or a dictionay containing a different list of collection per variable. outputs_collections: collection to add the outputs. scope: Optional scope for variable_op_scope. Returns: the tensor variable representing the result of the series of operations. Raises: ValueError: if x has rank less than 2 or if its last dimension is not set. """ with variable_scope.variable_op_scope([inputs], scope, 'fully_connected', reuse=reuse) as sc: dtype = inputs.dtype.base_dtype num_input_units = utils.last_dimension(inputs.get_shape(), min_rank=2) static_shape = inputs.get_shape().as_list() static_shape[-1] = num_outputs out_shape = array_ops.unpack(array_ops.shape(inputs)) out_shape[-1] = num_outputs weights_shape = [num_input_units, num_outputs] weights_collections = utils.get_variable_collections( variables_collections, 'weights') weights = variables.model_variable('weights', shape=weights_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, collections=weights_collections) if len(static_shape) > 2: # Reshape inputs inputs = array_ops.reshape(inputs, [-1, num_input_units]) outputs = standard_ops.matmul(inputs, weights) if normalizer_fn: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable( 'biases', shape=[ num_outputs, ], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, collections=biases_collections) outputs = nn.bias_add(outputs, biases) if len(static_shape) > 2: # Reshape back outputs outputs = array_ops.reshape(outputs, array_ops.pack(out_shape)) outputs.set_shape(static_shape) if activation_fn: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def fully_connected(x, num_output_nodes, activation_fn=None, weight_init=None, bias_init=standard_ops.constant_initializer(0.), num_input_nodes=None, name=None, weight_collections=None, bias_collections=None, weight_regularizer=None, create_summaries=True): """Adds the parameters for a fully connected layer and returns the output. A fully connected layer is generally defined as a matrix multiply: \\\\(y = f(w * x + b)\\\\) where **f** is given by `activation_fn` This op creates `w` and optionally `b` and adds various summaries that can be useful for visualizing learning or diagnosing training problems. Bias can be disabled by setting `bias_init` to `None`. The variable creation is compatible with `tf.variable_scope` and so can be reused with `tf.variable_scope` or `tf.make_template`. In almost all cases, the number of input nodes can be inferred from the shape of `x`, but if it is unspecified or additional size checks are desired, then `num_input_nodes` can be specified. Most of the details of variable creation can be controlled by specifying the initializers (`weight_init` and `bias_init`) and which collections to place the created variables in (`weight_collections` and `bias_collections`). A per layer regularization can be specified by setting `weight_regularizer`. This is only applied to weights and not the bias. Args: x: The input `Tensor`. Must be 2D. num_output_nodes: The size of the output. activation_fn: A function that requires a single Tensor that is applied as a non-linearity. If None is used, do not apply any activation. weight_init: An optional initialization. If not specified, uses Xavier initialization (see `tf.learn.xavier_initializer`). bias_init: An initializer for the bias, defaults to 0. Set to`None` in order to disable bias. num_input_nodes: The number of input nodes. name: The name for this operation is used to name operations and to find variables. If specified it must be unique for this scope, otherwise a unique name starting with "fully_connected" will be created. See `tf.variable_op_scope` for details. weight_collections: List of graph collections for just weights. bias_collections: List of graph collections for just bias. weight_regularizer: A regularizer like the result of `tf.learn.l1_regularizer` or `tf.learn.l2_regularizer`. create_summaries: Set to false to disable summaries. Returns: The result of applying a fully connected layer. Raises: ValueError: if `x` is not rank 2; or `x`'s second dimension is not known and `num_input_nodes` is not specified. """ with variable_scope.variable_op_scope([x], name, 'fully_connected'): # Check rank and if num_input_nodes is specified, make sure it matches. # TODO(wicke): This does not work with scalar inputs (shape [batch_size,]) # TODO(wicke): We'd have to encode the broadcasting rules here to be safe. x.get_shape().assert_is_compatible_with([None, num_input_nodes]) if not num_input_nodes: if x.get_shape().dims is None or x.get_shape( ).dims[1].value is None: raise ValueError( 'If x has an unknown second dimension then num_input_nodes ' 'must be specified; shape: %s num_input_nodes: %s' % (x.get_shape(), num_input_nodes)) else: num_input_nodes = x.get_shape().dims[1].value dtype = x.dtype.base_dtype # Regularization is only applied to the weights and not bias. w = _weight_variable_2d(num_input_nodes, num_output_nodes, dtype=dtype, init=weight_init, collections=weight_collections, regularizer=weight_regularizer, create_summaries=create_summaries) y = standard_ops.matmul(x, w) if bias_init is not None: b = _bias_variable(num_output_nodes, dtype=dtype, init=bias_init, collections=bias_collections, create_summaries=create_summaries) y = nn.bias_add(y, b) if create_summaries: return _apply_activation_with_summaries(y, activation_fn) if activation_fn: y = activation_fn(y) return y
def fully_connected(x, num_output_units, activation_fn=None, weight_init=initializers.xavier_initializer(), bias_init=standard_ops.constant_initializer(0.), name=None, weight_collections=(ops.GraphKeys.WEIGHTS,), bias_collections=(ops.GraphKeys.BIASES,), output_collections=(ops.GraphKeys.ACTIVATIONS,), weight_regularizer=None, bias_regularizer=None): """Adds the parameters for a fully connected layer and returns the output. A fully connected layer is generally defined as a matrix multiply: `y = f(w * x + b)` where `f` is given by `activation_fn`. If `activation_fn` is `None`, the result of `y = w * x + b` is returned. This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting `bias_init` to `None`. The variable creation is compatible with `tf.variable_scope` and so can be reused with `tf.variable_scope` or `tf.make_template`. Most of the details of variable creation can be controlled by specifying the initializers (`weight_init` and `bias_init`) and which in collections to place the created variables (`weight_collections` and `bias_collections`; note that the variables are always added to the `VARIABLES` collection). The output of the layer can be placed in custom collections using `output_collections`. The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`, respectively. A per layer regularization can be specified by setting `weight_regularizer` and `bias_regularizer`, which are applied to the weights and biases respectively, and whose output is added to the `REGULARIZATION_LOSSES` collection. Args: x: The input `Tensor`. num_output_units: The size of the output. activation_fn: A function that requires a single Tensor that is applied as a non-linearity. If None is used, do not apply any activation. weight_init: An optional weight initialization, defaults to `xavier_initializer`. bias_init: An initializer for the bias, defaults to 0. Set to `None` in order to disable bias. name: The name for this operation is used to name operations and to find variables. If specified it must be unique for this scope, otherwise a unique name starting with "fully_connected" will be created. See `tf.variable_op_scope` for details. weight_collections: List of graph collections to which weights are added. bias_collections: List of graph collections to which biases are added. output_collections: List of graph collections to which outputs are added. weight_regularizer: A regularizer like the result of `l1_regularizer` or `l2_regularizer`. Used for weights. bias_regularizer: A regularizer like the result of `l1_regularizer` or `l2_regularizer`. Used for biases. Returns: The output of the fully connected layer. """ with variable_scope.variable_op_scope([x], name, 'fully_connected'): num_input_units = x.get_shape().dims[1].value dtype = x.dtype.base_dtype w = _weight_variable(shape=[num_input_units, num_output_units], dtype=dtype, initializer=weight_init, collections=weight_collections, regularizer=weight_regularizer) y = standard_ops.matmul(x, w) if bias_init is not None: b = _bias_variable(shape=[num_output_units], dtype=dtype, initializer=bias_init, collections=bias_collections, regularizer=bias_regularizer) y = nn.bias_add(y, b) return _apply_activation(y, activation_fn, output_collections)
def fully_connected(x, num_output_units, activation_fn=None, weight_init=initializers.xavier_initializer(), bias_init=standard_ops.constant_initializer(0.), name=None, weight_collections=(ops.GraphKeys.WEIGHTS, ), bias_collections=(ops.GraphKeys.BIASES, ), output_collections=(ops.GraphKeys.ACTIVATIONS, ), weight_regularizer=None, bias_regularizer=None): """Adds the parameters for a fully connected layer and returns the output. A fully connected layer is generally defined as a matrix multiply: `y = f(w * x + b)` where `f` is given by `activation_fn`. If `activation_fn` is `None`, the result of `y = w * x + b` is returned. This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting `bias_init` to `None`. The variable creation is compatible with `tf.variable_scope` and so can be reused with `tf.variable_scope` or `tf.make_template`. Most of the details of variable creation can be controlled by specifying the initializers (`weight_init` and `bias_init`) and which in collections to place the created variables (`weight_collections` and `bias_collections`; note that the variables are always added to the `VARIABLES` collection). The output of the layer can be placed in custom collections using `output_collections`. The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`, respectively. A per layer regularization can be specified by setting `weight_regularizer` and `bias_regularizer`, which are applied to the weights and biases respectively, and whose output is added to the `REGULARIZATION_LOSSES` collection. Args: x: The input `Tensor`. num_output_units: The size of the output. activation_fn: A function that requires a single Tensor that is applied as a non-linearity. If None is used, do not apply any activation. weight_init: An optional weight initialization, defaults to `xavier_initializer`. bias_init: An initializer for the bias, defaults to 0. Set to `None` in order to disable bias. name: The name for this operation is used to name operations and to find variables. If specified it must be unique for this scope, otherwise a unique name starting with "fully_connected" will be created. See `tf.variable_op_scope` for details. weight_collections: List of graph collections to which weights are added. bias_collections: List of graph collections to which biases are added. output_collections: List of graph collections to which outputs are added. weight_regularizer: A regularizer like the result of `l1_regularizer` or `l2_regularizer`. Used for weights. bias_regularizer: A regularizer like the result of `l1_regularizer` or `l2_regularizer`. Used for biases. Returns: The output of the fully connected layer. """ with variable_scope.variable_op_scope([x], name, 'fully_connected'): num_input_units = x.get_shape().dims[1].value dtype = x.dtype.base_dtype w = _weight_variable(shape=[num_input_units, num_output_units], dtype=dtype, initializer=weight_init, collections=weight_collections, regularizer=weight_regularizer) y = standard_ops.matmul(x, w) if bias_init is not None: b = _bias_variable(shape=[num_output_units], dtype=dtype, initializer=bias_init, collections=bias_collections, regularizer=bias_regularizer) y = nn.bias_add(y, b) return _apply_activation(y, activation_fn, output_collections)
def fully_connected(x, num_output_nodes, activation_fn=None, weight_init=None, bias_init=standard_ops.constant_initializer(0.), num_input_nodes=None, name=None, weight_collections=None, bias_collections=None, weight_regularizer=None, create_summaries=True): """Adds the parameters for a fully connected layer and returns the output. A fully connected layer is generally defined as a matrix multiply: \\\\(y = f(w * x + b)\\\\) where **f** is given by `activation_fn` This op creates `w` and optionally `b` and adds various summaries that can be useful for visualizing learning or diagnosing training problems. Bias can be disabled by setting `bias_init` to `None`. The variable creation is compatible with `tf.variable_scope` and so can be reused with `tf.variable_scope` or `tf.make_template`. In almost all cases, the number of input nodes can be inferred from the shape of `x`, but if it is unspecified or additional size checks are desired, then `num_input_nodes` can be specified. Most of the details of variable creation can be controlled by specifying the initializers (`weight_init` and `bias_init`) and which collections to place the created variables in (`weight_collections` and `bias_collections`). A per layer regularization can be specified by setting `weight_regularizer`. This is only applied to weights and not the bias. Args: x: The input `Tensor`. Must be 2D. num_output_nodes: The size of the output. activation_fn: A function that requires a single Tensor that is applied as a non-linearity. If None is used, do not apply any activation. weight_init: An optional initialization. If not specified, uses Xavier initialization (see `tf.learn.xavier_initializer`). bias_init: An initializer for the bias, defaults to 0. Set to`None` in order to disable bias. num_input_nodes: The number of input nodes. name: The name for this operation is used to name operations and to find variables. If specified it must be unique for this scope, otherwise a unique name starting with "fully_connected" will be created. See `tf.variable_op_scope` for details. weight_collections: List of graph collections for just weights. bias_collections: List of graph collections for just bias. weight_regularizer: A regularizer like the result of `tf.learn.l1_regularizer` or `tf.learn.l2_regularizer`. create_summaries: Set to false to disable summaries. Returns: The result of applying a fully connected layer. Raises: ValueError: if `x` is not rank 2; or `x`'s second dimension is not known and `num_input_nodes` is not specified. """ with variable_scope.variable_op_scope([x], name, 'fully_connected'): # Check rank and if num_input_nodes is specified, make sure it matches. # TODO(wicke): This does not work with scalar inputs (shape [batch_size,]) # TODO(wicke): We'd have to encode the broadcasting rules here to be safe. x.get_shape().assert_is_compatible_with([None, num_input_nodes]) if not num_input_nodes: if x.get_shape().dims is None or x.get_shape().dims[1].value is None: raise ValueError( 'If x has an unknown second dimension then num_input_nodes ' 'must be specified; shape: %s num_input_nodes: %s' % (x.get_shape(), num_input_nodes)) else: num_input_nodes = x.get_shape().dims[1].value dtype = x.dtype.base_dtype # Regularization is only applied to the weights and not bias. w = _weight_variable_2d( num_input_nodes, num_output_nodes, dtype=dtype, init=weight_init, collections=weight_collections, regularizer=weight_regularizer, create_summaries=create_summaries) y = standard_ops.matmul(x, w) if bias_init is not None: b = _bias_variable( num_output_nodes, dtype=dtype, init=bias_init, collections=bias_collections, create_summaries=create_summaries) y = nn.bias_add(y, b) if create_summaries: return _apply_activation_with_summaries(y, activation_fn) if activation_fn: y = activation_fn(y) return y
def fully_connected(inputs, num_outputs, activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer, biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Adds a fully connected layer. `fully_connected` creates a variable called `weights`, representing a fully connected weight matrix, which is multiplied by the `inputs` to produce a `Tensor` of hidden units. If a `normalizer_fn` is provided (such as `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` variable would be created and added the hidden units. Finally, if `activation_fn` is not `None`, it is applied to the hidden units as well. Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened prior to the initial matrix multiply by `weights`. Args: inputs: A tensor of with at least rank 2 and value for the last dimension, i.e. `[batch_size, depth]`, `[None, None, None, channels]`. num_outputs: Integer, the number of output units in the layer. activation_fn: activation function. normalizer_fn: normalization function to use instead of `biases`. If `normalize_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. normalizer_params: normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional list of collections for all the variables or a dictionary containing a different list of collections per variable. outputs_collections: collection to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_op_scope. Returns: the tensor variable representing the result of the series of operations. Raises: ValueError: if x has rank less than 2 or if its last dimension is not set. """ if not isinstance(num_outputs, int): raise ValueError('num_outputs should be integer, got %s.', num_outputs) with variable_scope.variable_op_scope([inputs], scope, 'fully_connected', reuse=reuse) as sc: dtype = inputs.dtype.base_dtype num_input_units = utils.last_dimension(inputs.get_shape(), min_rank=2) static_shape = inputs.get_shape().as_list() static_shape[-1] = num_outputs out_shape = array_ops.unpack(array_ops.shape(inputs)) out_shape[-1] = num_outputs weights_shape = [num_input_units, num_outputs] weights_collections = utils.get_variable_collections( variables_collections, 'weights') weights = variables.model_variable('weights', shape=weights_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, collections=weights_collections, trainable=trainable) if len(static_shape) > 2: # Reshape inputs inputs = array_ops.reshape(inputs, [-1, num_input_units]) outputs = standard_ops.matmul(inputs, weights) if normalizer_fn: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable('biases', shape=[num_outputs,], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, collections=biases_collections, trainable=trainable) outputs = nn.bias_add(outputs, biases) if len(static_shape) > 2: # Reshape back outputs outputs = array_ops.reshape(outputs, array_ops.pack(out_shape)) outputs.set_shape(static_shape) if activation_fn: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def dnn_sampled_softmax_classifier_model_fn(features, target_indices, mode, params): """model_fn that uses candidate sampling. Args: features: Single Tensor or dict of Tensor (depends on data passed to `fit`) target_indices: A single Tensor of shape [batch_size, n_labels] containing the target indices. mode: Represents if this training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters that are listed below. hidden_units- List of hidden units per layer. All layers are fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second one has 32. feature_columns- An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `FeatureColumn`. n_classes- number of target classes. It must be greater than 2. n_samples- number of sample target classes. Needs to be tuned - A good starting point could be 2% of n_classes. n_labels- number of labels in each example. top_k- The number of classes to predict. optimizer- An instance of `tf.Optimizer` used to train the model. If `None`, will use an Adagrad optimizer. dropout- When not `None`, the probability we will drop out a given coordinate. gradient_clip_norm- A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. See tf.clip_by_global_norm for more details. num_ps_replicas- The number of parameter server replicas. Returns: predictions: A single Tensor or a dict of Tensors. loss: A scalar containing the loss of the step. train_op: The op for training. """ hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] n_classes = params["n_classes"] n_samples = params["n_samples"] n_labels = params["n_labels"] top_k = params["top_k"] optimizer = params["optimizer"] dropout = params["dropout"] gradient_clip_norm = params["gradient_clip_norm"] num_ps_replicas = params["num_ps_replicas"] parent_scope = "dnn_ss" # Setup the input layer partitioner. input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Create the input layer. with variable_scope.variable_scope( parent_scope + "/input_from_feature_columns", features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( features, feature_columns, weight_collections=[parent_scope], scope=scope) # Setup the hidden layer partitioner. hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) final_hidden_layer_dim = None # Create hidden layers using fully_connected. for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( parent_scope + "/hiddenlayer_%d" % layer_id, [net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected(net, num_hidden_units, variables_collections=[parent_scope], scope=scope) final_hidden_layer_dim = num_hidden_units # Add dropout if it is enabled. if dropout is not None and mode == estimator.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) # Create the weights and biases for the logit layer. with variable_scope.variable_scope( parent_scope + "/logits", [net], partitioner=hidden_layer_partitioner) as scope: dtype = net.dtype.base_dtype weights_shape = [n_classes, final_hidden_layer_dim] weights = variables.model_variable( "weights", shape=weights_shape, dtype=dtype, initializer=initializers.xavier_initializer(), trainable=True, collections=[parent_scope]) biases = variables.model_variable( "biases", shape=[n_classes,], dtype=dtype, initializer=init_ops.zeros_initializer, trainable=True, collections=[parent_scope]) if mode == estimator.ModeKeys.TRAIN: # Call the candidate sampling APIs and calculate the loss. sampled_values = nn.learned_unigram_candidate_sampler( true_classes=math_ops.to_int64(target_indices), num_true=n_labels, num_sampled=n_samples, unique=True, range_max=n_classes) sampled_softmax_loss = nn.sampled_softmax_loss( weights=weights, biases=biases, inputs=net, labels=math_ops.to_int64(target_indices), num_sampled=n_samples, num_classes=n_classes, num_true=n_labels, sampled_values=sampled_values) loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss") train_op = optimizers.optimize_loss( loss=loss, global_step=contrib_framework.get_global_step(), learning_rate=_DEFAULT_LEARNING_RATE, optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm, name=parent_scope) return None, loss, train_op elif mode == estimator.ModeKeys.EVAL: logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)), biases) predictions = {} predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) _, predictions[_TOP_K] = nn.top_k(logits, top_k) # Since the targets have multiple labels, setup the target probabilities # as 1.0/n_labels for each of the labels. target_one_hot = array_ops.one_hot( indices=target_indices, depth=n_classes, on_value=1.0 / n_labels) target_one_hot = math_ops.reduce_sum( input_tensor=target_one_hot, reduction_indices=[1]) loss = math_ops.reduce_mean( nn.softmax_cross_entropy_with_logits(logits, target_one_hot)) return predictions, loss, None elif mode == estimator.ModeKeys.INFER: logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)), biases) predictions = {} predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) _, predictions[_TOP_K] = nn.top_k(logits, top_k) return predictions, None, None
def fully_connected(inputs, num_outputs, activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, weights_normalizer_fn=None, weights_normalizer_params=None, weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer(), biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): # Be copied and modified from tensorflow-0.12.0.contrib.layer.fully_connected, # add weights_nomalizer_* options. """Adds a fully connected layer. `fully_connected` creates a variable called `weights`, representing a fully connected weight matrix, which is multiplied by the `inputs` to produce a `Tensor` of hidden units. If a `normalizer_fn` is provided (such as `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` variable would be created and added the hidden units. Finally, if `activation_fn` is not `None`, it is applied to the hidden units as well. Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened prior to the initial matrix multiply by `weights`. Args: inputs: A tensor of with at least rank 2 and value for the last dimension, i.e. `[batch_size, depth]`, `[None, None, None, channels]`. num_outputs: Integer or long, the number of output units in the layer. activation_fn: activation function, set to None to skip it and maintain a linear activation. normalizer_fn: normalization function to use instead of `biases`. If `normalizer_fn` is provided then `biases_initializer` and `biases_regularizer` are ignored and `biases` are not created nor added. default set to None for no normalizer function normalizer_params: normalization function parameters. weights_normalizer_fn: weights normalization function. weights_normalizer_params: weights normalization function parameters. weights_initializer: An initializer for the weights. weights_regularizer: Optional regularizer for the weights. biases_initializer: An initializer for the biases. If None skip biases. biases_regularizer: Optional regularizer for the biases. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional list of collections for all the variables or a dictionary containing a different list of collections per variable. outputs_collections: collection to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_scope. Returns: the tensor variable representing the result of the series of operations. Raises: ValueError: if x has rank less than 2 or if its last dimension is not set. """ if not (isinstance(num_outputs, six.integer_types)): raise ValueError('num_outputs should be int or long, got %s.', num_outputs) with variable_scope.variable_scope(scope, 'fully_connected', [inputs], reuse=reuse) as sc: inputs = ops.convert_to_tensor(inputs) dtype = inputs.dtype.base_dtype inputs_shape = inputs.get_shape() num_input_units = utils.last_dimension(inputs_shape, min_rank=2) static_shape = inputs_shape.as_list() static_shape[-1] = num_outputs out_shape = array_ops.unpack(array_ops.shape(inputs), len(static_shape)) out_shape[-1] = num_outputs weights_shape = [num_input_units, num_outputs] weights_collections = utils.get_variable_collections( variables_collections, 'weights') weights = variables.model_variable('weights', shape=weights_shape, dtype=dtype, initializer=weights_initializer, regularizer=weights_regularizer, collections=weights_collections, trainable=trainable) if weights_normalizer_fn is not None: weights_normalizer_params = weights_normalizer_params or {} weights = weights_normalizer_fn(weights, **weights_normalizer_params) if len(static_shape) > 2: # Reshape inputs inputs = array_ops.reshape(inputs, [-1, num_input_units]) outputs = standard_ops.matmul(inputs, weights) if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable( 'biases', shape=[ num_outputs, ], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, collections=biases_collections, trainable=trainable) outputs = nn.bias_add(outputs, biases) if activation_fn is not None: outputs = activation_fn(outputs) if len(static_shape) > 2: # Reshape back outputs outputs = array_ops.reshape(outputs, array_ops.pack(out_shape)) outputs.set_shape(static_shape) return utils.collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def _matmul(self, inputs, kernel): if inputs.shape.ndims <= 2: return standard_ops.matmul(inputs, kernel) # To handle broadcasting, we must use `tensordot`. return standard_ops.tensordot(inputs, kernel, axes=[[-1], [0]])