def test_readme_example(self):
    data = tf.random.uniform((128, 128), 0, 10, dtype=tf.int32)
    histogram = tf.bincount(data, minlength=10, maxlength=10)
    cdf = tf.cumsum(histogram, exclusive=False)
    cdf = tf.pad(cdf, [[1, 0]])
    cdf = tf.reshape(cdf, [1, 1, -1])

    data = tf.cast(data, tf.int16)
    encoded = range_coding_ops.range_encode(data, cdf, precision=14)
    decoded = range_coding_ops.range_decode(
        encoded, tf.shape(data), cdf, precision=14)

    with self.cached_session() as sess:
      self.assertAllEqual(*sess.run((data, decoded)))
Пример #2
0
    def _compute_w_per_class_vector_for_xentr(self, num_classes, y_gt, eps = 1e-6):
        # Re-weights samples in the cost function on a per-class basis.
        # E.g. to exclude a class, or counter class imbalance.
        # From first to given epoch, start from weighting classes equally to natural frequency, decreasing weighting linearly.
        # Return value: a function of epochs_trained_tfv
        
        if self._reweight_classes_in_cost is None or self._reweight_classes_in_cost["type"] is None: # No re-weighting.
            w_per_cl_vec = tf.ones( shape=[num_classes], dtype='float32' )
            
        else: # A type of reweighting has been specified
            
            if self._reweight_classes_in_cost["type"] == "freq":
                # Frequency re-weighting
                num_lbls_in_ygt = tf.cast( tf.reduce_prod(tf.shape(y_gt)), dtype="float32" )
                num_lbls_in_ygt_per_c = tf.bincount( arr = y_gt, minlength=num_classes, maxlength=num_classes, dtype="float32" ) # without the min/max, length of vector can change.
                y1 = (1./(num_lbls_in_ygt_per_c + eps)) * (num_lbls_in_ygt / num_classes)
                
            elif self._reweight_classes_in_cost["type"] == "per_c":
                # self._reweight_classes_in_cost["prms"] should be a list, with one float per class
                assert len(self._reweight_classes_in_cost["prms"]) == num_classes
                y1 = tf.constant(self._reweight_classes_in_cost["prms"], dtype="float32")
            
            # Linear schedule:
            lin_schedule_min_max_epoch = self._reweight_classes_in_cost["schedule"]
            assert lin_schedule_min_max_epoch[0] < lin_schedule_min_max_epoch[1]
            
            # yx - y1 = (x - x1) * (y2 - y1)/(x2 - x1)
            # yx = the multiplier I currently want, y1 = the multiplier at the beginning, y2 = the multiplier at the end
            # x = current epoch, x1 = epoch where linear decrease starts, x2 = epoch where linear decrease ends
            y2 = 1. # Where weight should be after end of schedule.
            
            x1 = tf.cast(lin_schedule_min_max_epoch[0], dtype="float32")
            x2 = tf.cast(lin_schedule_min_max_epoch[1], dtype="float32")
            x = tf.cast(self._num_epochs_trained_tfv, dtype="float32")
            # To handle the piecewise linear behaviour of x being before x1 and after x2 giving the same y as if =x1 or =x2 :
            x = tf.maximum(x1, x)
            x = tf.minimum(x, x2)
            yx = (x - x1) * (y2 - y1)/(x2 - x1) + y1
            w_per_cl_vec = yx

        return w_per_cl_vec
Пример #3
0
    def model_fun(features, labels, mode, params):
        atomic_contributions = {}
        atom_types = params['atom_types']
        for (t, lays, offs, acts) in zip(atom_types,
            params['layers'], params['offsets'], params['act_funs']):
            with _tf.variable_scope('{}_ANN'.format(t), reuse = _tf.AUTO_REUSE):
                input_tensor = features['%s_input'%t]
                atomic_contributions[t] = BPAtomicNN(
                    input_tensor, lays, offs, acts)

        predicted_energies = _tf.scatter_nd(
            _tf.concat([features['%s_indices'%t] for t in atom_types], 0),
            _tf.concat([_tf.reshape(atomic_contributions[t].output, [-1])
            for t in atom_types], 0), _tf.shape(labels),
            name = 'E_prediction')

        if mode == _tf.estimator.ModeKeys.PREDICT:
            predictions = {'energies': predicted_energies}
            return _tf.estimator.EstimatorSpec(mode, predictions=predictions)

        num_atoms = _tf.reduce_sum([_tf.bincount(features['%s_indices'%t])
            for t in atom_types], axis = 0, name = 'NumberOfAtoms')
        # Compute loss.
        loss = _tf.losses.mean_squared_error(
            labels=labels, predictions=predicted_energies)

        rmse = _tf.metrics.root_mean_squared_error(labels, predicted_energies)
        metrics = {'rmse': rmse}
        _tf.summary.scalar('rmse', rmse[1])

        if mode == _tf.estimator.ModeKeys.EVAL:
            return _tf.estimator.EstimatorSpec(
                mode, loss=loss, eval_metric_ops=metrics)

        assert mode == _tf.estimator.ModeKeys.TRAIN
        optimizer = _tf.train.AdagradOptimizer(learning_rate=0.1)
        train_op = optimizer.minimize(loss, global_step=_tf.train.get_global_step())
        return _tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
Пример #4
0
def _bincount_2d(values, num_values):
  """Bincounts each row of values.

  Args:
    values: The values to bincount. 2D integer tensor.
    num_values: The number of columns of the output. Entries in `values` that
        are `>= num_values` will be ignored.

  Returns:
    The bin counts. Shape `(values.shape[0], num_values)`. The `i`th row
        contains the result of
        `tf.bincount(values[i, :], maxlength=num_values)`.
  """
  num_rows = tf.shape(values)[0]
  # Convert the values in each row to a consecutive range of ids that will not
  # overlap with the other rows.
  row_values = values + tf.range(num_rows)[:, None] * num_values
  # Remove entries that would collide with other rows.
  values_flat = tf.boolean_mask(row_values,
                                (0 <= values) & (values < num_values))
  bins_length = num_rows * num_values
  bins = tf.bincount(values_flat, minlength=bins_length, maxlength=bins_length)
  return tf.reshape(bins, [num_rows, num_values])
Пример #5
0
    def _compute_w_per_class_vector_for_xentr(self, num_classes, y_gt):
        # To counter class imbalance. Return value is a function of epochs_trained_tfv
        # From first to given epoch, start from weighting classes equally to natural frequency, decreasing weighting linearly.
        TINY_FLOAT = 1e-6
        if self._weight_c_in_xentr_and_release_between_eps[
                0] >= 0 and self._weight_c_in_xentr_and_release_between_eps[
                    1] > 0:
            assert self._weight_c_in_xentr_and_release_between_eps[
                0] < self._weight_c_in_xentr_and_release_between_eps[1]
            labels_in_ygt = tf.cast(tf.reduce_prod(tf.shape(y_gt)),
                                    dtype="float32")
            labels_in_ygt_per_c = tf.bincount(
                arr=y_gt,
                minlength=num_classes,
                maxlength=num_classes,
                dtype="float32"
            )  # without the min/max, length of vector can change.
            # yx - y1 = (x - x1) * (y2 - y1)/(x2 - x1)
            # yx = the multiplier I currently want, y1 = the multiplier at the begining, y2 = the multiplier at the end
            # x = current epoch, x1 = epoch where linear decrease starts, x2 = epoch where linear decrease ends
            y1 = (1. / (labels_in_ygt_per_c + TINY_FLOAT)) * (labels_in_ygt /
                                                              num_classes)
            y2 = 1.
            x1 = tf.cast(self._weight_c_in_xentr_and_release_between_eps[0],
                         dtype="float32")
            x2 = tf.cast(self._weight_c_in_xentr_and_release_between_eps[1],
                         dtype="float32")
            x = tf.cast(self._num_epochs_trained_tfv, dtype="float32")
            # To handle the piecewise linear behavior of x being before x1 and after x2 giving the same y as if =x1 or =x2 :
            x = tf.maximum(x1, x)
            x = tf.minimum(x, x2)
            yx = (x - x1) * (y2 - y1) / (x2 - x1) + y1
            w_per_cl_vec = yx
        else:  # Negative given. We are not reweighting.
            w_per_cl_vec = tf.ones(shape=[num_classes], dtype='float32')

        return w_per_cl_vec
Пример #6
0
def sparse_balanced_crossentropy(logits, labels):
    """
    Calculates a class frequency balanced crossentropy loss from sparse labels.

    Args:
        logits (tf.Tensor): logits prediction for which to calculate
            crossentropy error
        labels (tf.Tensor): sparse labels used for crossentropy error
            calculation

    Returns:
        tf.Tensor: Tensor scalar representing the mean loss
    """

    epsilon = tf.constant(np.finfo(np.float32).tiny)

    num_classes = tf.cast(tf.shape(logits)[-1], tf.int32)

    probs = tf.nn.softmax(logits)
    probs += tf.cast(tf.less(probs, epsilon), tf.float32) * epsilon
    log = -1. * tf.log(probs)

    onehot_labels = tf.one_hot(labels, num_classes)

    class_frequencies = tf.stop_gradient(tf.bincount(
        labels, minlength=num_classes, dtype=tf.float32))

    weights = (1. / (class_frequencies + tf.constant(1e-8)))
    weights *= (tf.cast(tf.reduce_prod(tf.shape(labels)), tf.float32) / tf.cast(num_classes, tf.float32))

    new_shape = (([1, ] * len(labels.get_shape().as_list())) + [logits.get_shape().as_list()[-1]])

    weights = tf.reshape(weights, new_shape)

    loss = tf.reduce_mean(tf.reduce_sum(onehot_labels * log * weights, axis=-1))

    return loss
Пример #7
0
    def build_stochastic_layer(self, layer):
        self.a = tf.layers.dense(layer, self.cfg["L2_truncation_level"]-1, activation=self.cfg["dirichlet_ab_fct"],
                                 use_bias=self.cfg["dirichlet_ab_use_bias"],
                                 kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                 bias_initializer=tf.zeros_initializer(),
                                 name="posterior_a_output")
        self.b = tf.layers.dense(layer, self.cfg["L2_truncation_level"]-1, activation=self.cfg["dirichlet_ab_fct"],
                                 use_bias=self.cfg["dirichlet_ab_use_bias"],
                                 kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                 bias_initializer=tf.constant_initializer(self.cfg["b_init"]),
                                 name="posterior_b_output")
        uniform_samples = tf.random_uniform((self.cfg["MC_samples"], tf.shape(self.x)[0], self.cfg["L2_truncation_level"]-1), minval=0.01, maxval=0.99, dtype=tf.floatX)
        self.a = self.a + 1e-5
        self.b = self.b + 1e-5
        self.vs = (1 - uniform_samples ** (1 / self.b)) ** (1 / self.a)

        stick_segments_lst = []
        remaining_sticks = tf.ones((self.cfg["MC_samples"], tf.shape(self.x)[0]), dtype=tf.floatX)
        for i in range(self.cfg["L2_truncation_level"] - 1):
            stick_segments_lst.append(remaining_sticks * self.vs[:, :, i])
            remaining_sticks = remaining_sticks * (1 - self.vs[:, :, i])
        stick_segments = tf.stack(stick_segments_lst) # (self.cfg["L2_truncation_level"] - 1) x (MC samples) x (batch size)
        self.L2_z_3d = tf.transpose(tf.concat((stick_segments, tf.expand_dims(remaining_sticks, axis=0)), axis=0), (1, 2, 0))

        if not self.cfg["posterior_one_c"]:
            # multinomial logits
            self.phi_logits = tf.layers.dense(layer, self.cfg["L2_truncation_level"] * self.topic_dim, activation=None,
                                              use_bias=self.cfg["dirichlet_phi_use_bias"],
                                              kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                              bias_initializer=tf.zeros_initializer(),
                                              name="posterior_phi_output")
            self.phi_prob = tf.nn.softmax(tf.reshape(self.phi_logits,
                                                     [-1, self.cfg["L2_truncation_level"], self.topic_dim]))
        else:
            self.phi_logits = tf.layers.dense(layer, self.topic_dim, activation=None,
                                              use_bias=self.cfg["dirichlet_phi_use_bias"],
                                              kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                              bias_initializer=tf.zeros_initializer(),
                                              name="posterior_phi_output")
            self.phi_prob = tf.tile(tf.expand_dims(tf.nn.softmax(tf.reshape(self.phi_logits,
                                                                            [-1, self.topic_dim])),
                                                   axis=1), [1, self.cfg["L2_truncation_level"], 1])
        # c_4d: (MC samples) x (batch size) x (L2 truncation level) x (L1 truncation level/topic dim)
        self.soft_z_3d = tf.reduce_sum(tf.expand_dims(self.L2_z_3d, axis=-1) * self.phi_prob, axis=2)
        self.c_4d, self.gumbel_tau = gumbel_softmax(self.phi_prob, self.training_placeholder,
                                                    tau_init=self.cfg["gumbel_tau_init"], tau_trainable=self.cfg["gumbel_tau_trainable"],
                                                    MC_samples=self.cfg["MC_samples"], straight_through=True)
        self.z_3d = tf.reduce_sum(tf.expand_dims(self.L2_z_3d, axis=-1) * self.c_4d, axis=2)
        
        # Change
        if self.cfg["effective_indicator"] == "average":
            self.average_of_every_topic = tf.reduce_mean(self.z_3d, axis=(0, 1)) * tf.cast(tf.shape(self.x)[0], tf.floatX)
            effective_dims = self.average_of_every_topic > self.cfg["effective_threshold"]
            self.average_used_dims = tf.reduce_sum(tf.cast(effective_dims, tf.floatX))
            self.effective_dims = tf.squeeze(tf.where(effective_dims))
        elif self.cfg["effective_indicator"] == "assignment" or self.cfg["effective_indicator"] == "ratio":
            self.assignment_of_every_topic = tf.bincount(tf.cast(tf.argmax(self.z_3d, axis=-1), tf.int32), minlength=self.topic_dim)
            effective_dims_bool = tf.cast(self.assignment_of_every_topic, tf.floatX) > self.cfg["assignment_threshold"] * tf.cast(tf.shape(self.x)[0], tf.floatX) * self.cfg["MC_samples"]
            # FIXME: for now, if MC_sample is not 1. This is not correct.
            self.average_used_dims = tf.reduce_sum(tf.cast(effective_dims_bool, tf.floatX))
            self.effective_dims = tf.squeeze(tf.where(effective_dims_bool))

        # self.average_used_dims = tf.Print(self.average_used_dims, [tf.transpose(remaining_sticks, (1, 2, 0))], "print_remaining", summarize=100, first_n=3)
        # self.z = tf.Print(self.z, [self.z], "print_z", summarize=50)
        # self.z = tf.Print(self.z, [tf.reduce_sum(self.z, axis=-1)], "print_z_sum")
        z = tf.reshape(self.z_3d, [-1, self.topic_dim])
        return z
Пример #8
0
    def build_stochastic_layer(self, layer):
        self.a = tf.layers.dense(
            layer,
            self.topic_dim - 1,
            activation=self.cfg["dirichlet_ab_fct"],
            use_bias=self.cfg["dirichlet_ab_use_bias"],
            kernel_initializer=tf.contrib.layers.xavier_initializer(),
            bias_initializer=tf.zeros_initializer(),
            name="posterior_a_output")
        self.b = tf.layers.dense(
            layer,
            self.topic_dim - 1,
            activation=self.cfg["dirichlet_ab_fct"],
            use_bias=self.cfg["dirichlet_ab_use_bias"],
            kernel_initializer=tf.contrib.layers.xavier_initializer(),
            bias_initializer=tf.constant_initializer(self.cfg["b_init"]),
            name="posterior_b_output")
        uniform_samples = tf.random_uniform(
            (self.cfg["MC_samples"], tf.shape(self.x)[0], self.topic_dim - 1),
            minval=0.01,
            maxval=0.99,
            dtype=tf.floatX)
        if self.cfg.get("bias_on_prior", False):
            self.prior_a = np.floatX(self.cfg["prior_alpha"])
            self.prior_b = np.floatX(self.cfg["prior_beta"])
            if self.cfg["pitman_yor"]:
                self.prior_b = np.floatX(
                    np.arange(self.topic_dim - 1) * (1 - self.prior_a) +
                    self.prior_b)
            self.b = self.b + self.prior_b
            self.a = self.a + self.prior_a
        else:
            self.a = self.a + 1e-5
            self.b = self.b + 1e-5
        self.vs = (1 - uniform_samples**(1 / self.b))**(1 / self.a)

        # self.vs = tf.Print(self.vs, [tf.reduce_mean(self.vs), tf.reduce_max(self.vs), self.vs[:, 37, :]], summarize=200, message="print_vs: ")
        # Construct topic vector by stick-breaking process
        # stick_segment = tf.zeros((self.cfg["MC_samples"], tf.shape(self.x)[0]))
        # remaining_stick = tf.ones((self.cfg["MC_samples"], tf.shape(self.x)[0]))
        # def stick_breaking(s, elem):
        #     stick = s[1] * self.vs[:, :, elem]
        #     remain = s[1] * (1 - self.vs[:, :, elem])
        #     return (stick, remain)
        # stick_segments, remaining_sticks = tf.scan(fn=stick_breaking, elems=tf.range(self.topic_dim - 1),
        #                                            initializer=(stick_segment, remaining_stick))
        # self.z = tf.transpose(tf.concat((stick_segments, tf.expand_dims(remaining_sticks[-1, :, :], axis=0)), axis=0), (1, 2, 0))
        # # 0.01 -> 99% stick
        # self.average_used_dims = tf.reduce_mean(tf.reduce_sum(tf.cast(remaining_sticks > self.cfg["stick_epsilon"], tf.floatX), axis=0))

        stick_segments_lst = []
        remaining_sticks = tf.ones(
            (self.cfg["MC_samples"], tf.shape(self.x)[0]), dtype=tf.floatX)
        for i in range(self.topic_dim - 1):
            stick_segments_lst.append(remaining_sticks * self.vs[:, :, i])
            remaining_sticks = remaining_sticks * (1 - self.vs[:, :, i])
        stick_segments = tf.stack(
            stick_segments_lst
        )  # (topic_dim - 1) x (MC samples) x (batch size)
        self.z_3d = tf.transpose(
            tf.concat(
                (stick_segments, tf.expand_dims(remaining_sticks, axis=0)),
                axis=0), (1, 2, 0))
        # Change
        if self.cfg["effective_indicator"] == "average":
            self.average_of_every_topic = tf.reduce_mean(
                self.z_3d, axis=(0, 1)) * tf.cast(
                    tf.shape(self.x)[0], tf.floatX)
            effective_dims = self.average_of_every_topic > self.cfg[
                "effective_threshold"]
            self.average_used_dims = tf.reduce_sum(
                tf.cast(effective_dims, tf.floatX))
            self.effective_dims = tf.squeeze(tf.where(effective_dims))
        elif self.cfg["effective_indicator"] == "assignment" or self.cfg[
                "effective_indicator"] == "ratio":
            self.assignment_of_every_topic = tf.bincount(
                tf.cast(tf.argmax(self.z_3d, axis=-1), tf.int32),
                minlength=self.topic_dim)
            effective_dims_bool = tf.cast(
                self.assignment_of_every_topic,
                tf.floatX) > self.cfg["assignment_threshold"] * tf.cast(
                    tf.shape(self.x)[0], tf.floatX) * self.cfg["MC_samples"]
            # FIXME: for now, if MC_sample is not 1. This is not correct.
            self.average_used_dims = tf.reduce_sum(
                tf.cast(effective_dims_bool, tf.floatX))
            self.effective_dims = tf.squeeze(tf.where(effective_dims_bool))

        # self.average_used_dims = tf.Print(self.average_used_dims, [tf.transpose(remaining_sticks, (1, 2, 0))], "print_remaining", summarize=100, first_n=3)
        # self.z = tf.Print(self.z, [self.z], "print_z", summarize=50)
        # self.z = tf.Print(self.z, [tf.reduce_sum(self.z, axis=-1)], "print_z_sum")
        z = tf.reshape(self.z_3d, [-1, self.topic_dim])
        return z
Пример #9
0
def tensor_operations(num_classes):
    """Create the tensor operations to be used in training and testing, stored in a dictionary."""
    # Placeholders
    ph = {
        "y": tf.placeholder(tf.int32, shape=(None)),
        "train": tf.placeholder(tf.bool)
    }

    for c3d_depth in range(6):
        ph["x_" + str(c3d_depth)] = tf.placeholder(
            tf.float32,
            shape=(None, num_features[c3d_depth], window_size[c3d_depth]))

    # Tensor operations
    loss_arr = []
    train_op_arr = []
    predictions_arr = []
    accuracy_arr = []
    weights = {}

    # for each model generate the tensor ops
    for c3d_depth in range(6):
        # logits
        if (c3d_depth < 3):
            logits = conv_model(ph, c3d_depth, num_classes)
        else:
            logits = model(ph, c3d_depth, num_classes)

        # probabilities and associated weights
        probabilities = tf.nn.softmax(logits, name="softmax_tensor")
        if USE_WEIGHTS:
            weights[c3d_depth] = tf.get_variable(
                "weight_%s" % c3d_depth,
                shape=[1],
                initializer=tf.ones_initializer())
            probabilities = tf.multiply(probabilities, weights[c3d_depth],
                                        "probability_weight")

        # functions for predicting class
        predictions = {
            "classes": tf.argmax(input=logits, axis=1, output_type=tf.int32),
            "probabilities": probabilities
        }
        predictions_arr.append(predictions)

        # functions for training/optimizing the network
        loss = tf.losses.sparse_softmax_cross_entropy(labels=ph["y"],
                                                      logits=logits)
        optimizer = tf.train.AdamOptimizer(learning_rate=ALPHA)
        train_op = optimizer.minimize(loss=loss,
                                      global_step=tf.train.get_global_step())
        loss_arr.append(loss)
        train_op_arr.append(train_op)

        # functions for evaluating the network
        correct_pred = tf.equal(predictions["classes"], ph["y"])
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        accuracy_arr.append(accuracy)

    # combine all of the models together for the ensemble
    all_preds = tf.stack([x["probabilities"] for x in predictions_arr])
    all_preds = tf.transpose(all_preds, [1, 2, 0])

    model_preds = tf.transpose(all_preds, [0, 2, 1])
    model_top_10_values, model_top_10_indices = tf.nn.top_k(model_preds, k=10)
    model_preds = tf.argmax(model_preds, axis=2, output_type=tf.int32)

    if AGGREGATE_METHOD == 'average':
        # average over softmaxes
        test_prob = tf.reduce_mean(all_preds, axis=2)
        test_class = tf.argmax(test_prob, axis=1, output_type=tf.int32)

    elif AGGREGATE_METHOD == 'most_common':
        print("Aggregate method most_common not implemented")
        sys.exit(1)
        test_prob = tf.argmax(all_preds, axis=1, output_type=tf.int32)
        test_class = tf.argmax(tf.bincount(test_prob_max),
                               output_type=tf.int32)

    # verify if prediction is correct
    test_correct_pred = tf.equal(test_class, ph["y"])
    operations = dict()
    operations['ph'] = ph
    operations['loss_arr'] = loss_arr
    operations['train_op_arr'] = train_op_arr
    operations['predictions_arr'] = predictions_arr
    operations['accuracy_arr'] = accuracy_arr
    operations['weights'] = weights
    operations['logits'] = logits
    operations['all_preds'] = all_preds
    operations['model_preds'] = model_preds
    operations['model_top_10_values'] = model_top_10_values
    operations['model_top_10_indices'] = model_top_10_indices
    operations['test_prob'] = test_prob
    operations['test_class'] = test_class
    operations['test_correct_pred'] = test_correct_pred

    return operations
Пример #10
0
def domi(rv):
    return tf.argmax(tf.bincount(rv))
Пример #11
0
def crnn_fn(features, labels, mode, params):
    """
    :param features: dict {
                            'images'
                            'images_widths'
                            'filenames'
                            }
    :param labels: labels. flattend (1D) array with encoded label (one code per character)
    :param mode:
    :param params: dict {
                            'Params'
                        }
    :return:
    """

    parameters = params.get('Params')
    assert isinstance(parameters, Params)

    if mode == tf.estimator.ModeKeys.TRAIN:
        parameters.keep_prob_dropout = 0.7
    else:
        parameters.keep_prob_dropout = 1.0

    conv = deep_cnn(features['images'], (mode == tf.estimator.ModeKeys.TRAIN),
                    summaries=False)
    logprob, raw_pred = deep_bidirectional_lstm(conv,
                                                params=parameters,
                                                summaries=False)

    # Compute seq_len from image width
    n_pools = CONST.DIMENSION_REDUCTION_W_POOLING  # 2x2 pooling in dimension W on layer 1 and 2
    seq_len_inputs = tf.divide(
        features['images_widths'], n_pools, name='seq_len_input_op') - 1

    predictions_dict = {
        'prob': logprob,
        'raw_predictions': raw_pred,
    }
    try:
        predictions_dict['filenames'] = features['filenames']
    except KeyError:
        pass

    if not mode == tf.estimator.ModeKeys.PREDICT:
        # Alphabet and codes
        keys = [c for c in parameters.alphabet]
        values = parameters.alphabet_codes

        # Convert string label to code label
        with tf.name_scope('str2code_conversion'):
            table_str2int = tf.contrib.lookup.HashTable(
                tf.contrib.lookup.KeyValueTensorInitializer(keys, values), -1)
            splited = tf.string_split(
                labels, delimiter=''
            )  # TODO change string split to utf8 split in next tf version
            codes = table_str2int.lookup(splited.values)
            sparse_code_target = tf.SparseTensor(splited.indices, codes,
                                                 splited.dense_shape)

        seq_lengths_labels = tf.bincount(
            tf.cast(sparse_code_target.indices[:, 0], tf.int32),
            minlength=tf.shape(predictions_dict['prob'])[1])

        # Loss
        # ----
        # >>> Cannot have longer labels than predictions -> error
        with tf.control_dependencies([
                tf.less_equal(sparse_code_target.dense_shape[1],
                              tf.reduce_max(tf.cast(seq_len_inputs, tf.int64)))
        ]):
            loss_ctc = tf.nn.ctc_loss(
                labels=sparse_code_target,
                inputs=predictions_dict['prob'],
                sequence_length=tf.cast(seq_len_inputs, tf.int32),
                preprocess_collapse_repeated=False,
                ctc_merge_repeated=True,
                ignore_longer_outputs_than_inputs=
                True,  # returns zero gradient in case it happens -> ema loss = NaN
                time_major=True)
            loss_ctc = tf.reduce_mean(loss_ctc)
            loss_ctc = tf.Print(loss_ctc, [loss_ctc], message='* Loss : ')

        global_step = tf.train.get_or_create_global_step()
        # # Create an ExponentialMovingAverage object
        ema = tf.train.ExponentialMovingAverage(decay=0.99,
                                                num_updates=global_step,
                                                zero_debias=True)
        # Create the shadow variables, and add op to maintain moving averages
        maintain_averages_op = ema.apply([loss_ctc])
        loss_ema = ema.average(loss_ctc)

        # Train op
        # --------
        learning_rate = tf.train.exponential_decay(
            parameters.learning_rate,
            global_step,
            parameters.learning_decay_steps,
            parameters.learning_decay_rate,
            staircase=True)

        if parameters.optimizer == 'ada':
            optimizer = tf.train.AdadeltaOptimizer(learning_rate)
        elif parameters.optimizer == 'adam':
            optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5)
        elif parameters.optimizer == 'rms':
            optimizer = tf.train.RMSPropOptimizer(learning_rate)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        opt_op = optimizer.minimize(loss_ctc, global_step=global_step)
        with tf.control_dependencies(update_ops + [opt_op]):
            train_op = tf.group(maintain_averages_op)

        # Summaries
        # ---------
        tf.summary.scalar('learning_rate', learning_rate)
        tf.summary.scalar('losses/ctc_loss', loss_ctc)
    else:
        loss_ctc, train_op = None, None

    if mode in [
            tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT,
            tf.estimator.ModeKeys.TRAIN
    ]:
        with tf.name_scope('code2str_conversion'):
            keys = tf.cast(parameters.alphabet_decoding_codes, tf.int64)
            values = [c for c in parameters.alphabet_decoding]
            table_int2str = tf.contrib.lookup.HashTable(
                tf.contrib.lookup.KeyValueTensorInitializer(keys, values), '?')

            sparse_code_pred, log_probability = tf.nn.ctc_beam_search_decoder(
                predictions_dict['prob'],
                sequence_length=tf.cast(seq_len_inputs, tf.int32),
                merge_repeated=False,
                beam_width=100,
                top_paths=2)
            # Score
            predictions_dict['score'] = tf.subtract(log_probability[:, 0],
                                                    log_probability[:, 1])
            # around 10.0 -> seems pretty sure, less than 5.0 bit unsure, some errors/challenging images
            sparse_code_pred = sparse_code_pred[0]

            sequence_lengths_pred = tf.bincount(
                tf.cast(sparse_code_pred.indices[:, 0], tf.int32),
                minlength=tf.shape(predictions_dict['prob'])[1])

            pred_chars = table_int2str.lookup(sparse_code_pred)
            predictions_dict['words'] = get_words_from_chars(
                pred_chars.values, sequence_lengths=sequence_lengths_pred)

            tf.summary.text('predicted_words', predictions_dict['words'][:10])

    # Evaluation ops
    # --------------
    if mode == tf.estimator.ModeKeys.EVAL:
        with tf.name_scope('evaluation'):
            CER = tf.metrics.mean(tf.edit_distance(
                sparse_code_pred, tf.cast(sparse_code_target, dtype=tf.int64)),
                                  name='CER')

            # Convert label codes to decoding alphabet to compare predicted and groundtrouth words
            target_chars = table_int2str.lookup(
                tf.cast(sparse_code_target, tf.int64))
            target_words = get_words_from_chars(target_chars.values,
                                                seq_lengths_labels)
            accuracy = tf.metrics.accuracy(target_words,
                                           predictions_dict['words'],
                                           name='accuracy')

            eval_metric_ops = {
                'eval/accuracy': accuracy,
                'eval/CER': CER,
            }
            CER = tf.Print(CER, [CER], message='-- CER : ')
            accuracy = tf.Print(accuracy, [accuracy], message='-- Accuracy : ')

    else:
        eval_metric_ops = None

    export_outputs = {
        'predictions': tf.estimator.export.PredictOutput(predictions_dict)
    }

    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions_dict,
        loss=loss_ctc,
        train_op=train_op,
        eval_metric_ops=eval_metric_ops,
        export_outputs=export_outputs,
        scaffold=tf.train.Scaffold()
        # scaffold=tf.train.Scaffold(init_fn=None)  # Specify init_fn to restore from previous model
    )
Пример #12
0
def distribuited_k_means(data_batch, K, GPU_names, n_max_iters):
    setup_ts = time.time()
    number_of_gpus = len(GPU_names)

    sizes = [len(arg) for arg in np.array_split( data_batch, len(GPU_names))]
    result_matrix = [[] for _ in GPU_names]
    
    partial_directions = []
    partial_values = []
    partial_results = []
    
    initial_centers = k_means_._init_centroids(data_batch, K, init='k-means++')
    
    tf.reset_default_graph()
    with tf.name_scope('global'):
        with tf.device('/cpu:0'):
            all_data = tf.placeholder(data_batch.dtype, shape=(data_batch.shape), name='all_data')
            parts = tf.split(all_data, sizes, 0)

            global_centroids = tf.Variable(initial_centers)
            
    for GPU_num in range(len(GPU_names)):
        GPU_name = GPU_names[GPU_num]
            
        (X_mat) = parts[GPU_num]
        (N, M) = X_mat.get_shape().as_list()
        
        with tf.name_scope('scope_' + str(GPU_num)):
            with tf.device(GPU_name) :
                ####
                # In the coments we denote :
                # => N = Number of Observations
                # => M = Number of Dimensions
                # => K = Number of Centers
                ####

                # Data for GPU GPU_num to Clusterize
                X = tf.Variable(X_mat)

                # Reshapes rep_centroids and rep_points to format N x K x M so that 
                # the 2 matrixes have the same size
                rep_centroids = tf.reshape(tf.tile(global_centroids, [N, 1]), [N, K, M])
                rep_points = tf.reshape(tf.tile(X, [1, K]), [N, K, M])

                # Calculates sum_squares, a matrix of size N x K
                # This matrix is not sqrt((X-Y)^2), it is just(X-Y)^2
                # Since we need just the argmin(sqrt((X-Y)^2)) wich is equal to 
                # argmin((X-Y)^2), it would be a waste of computation
                subtraction = tf.subtract(rep_points, rep_centroids)
                square = tf.square(subtraction)
                sum_squares = tf.reduce_sum(square, axis = 2)

                # Use argmin to select the lowest-distance point
                # This gets a matrix of size N x 1
                best_centroids = tf.argmin(sum_squares, axis = 1)
                result_matrix[GPU_num] = sum_squares
                
                means = []
                for c in range(K):
                    aux_points = tf.gather(X, tf.reshape(tf.where(tf.equal(best_centroids, c)), [1,-1]))
                    means.append(tf.reduce_mean(aux_points, axis=[1]))

                new_centroids = tf.concat(means, 0)
                    
            with tf.device('/cpu:0'):
                y_count = tf.cast(
                    tf.bincount(tf.to_int32(best_centroids), maxlength = K, minlength = K), dtype = tf.float64)
                
                partial_mu =  tf.multiply( tf.transpose(new_centroids), y_count )

                partial_directions.append( y_count )
                partial_values.append( partial_mu )
                
    with tf.name_scope('global') :
        with tf.device('/cpu:0') :
            result_matrix = tf.argmin(tf.concat(result_matrix, 0), axis = 1)
            
            sum_direction = tf.add_n( partial_directions )
            sum_mu = tf.add_n( partial_values )

            rep_sum_direction = tf.reshape(tf.tile(sum_direction, [M]), [M, K])
            new_centers = tf.transpose( tf.div(sum_mu, rep_sum_direction) )

            update_centroid = tf.group( global_centroids.assign(new_centers) )
        
    setup_time = float( time.time() - setup_ts )

    config = tf.ConfigProto( allow_soft_placement = True )
    config.gpu_options.allow_growth = True
    config.gpu_options.allocator_type = 'BFC'

    with tf.Session( config = config ) as sess:
        initialization_ts = time.time()
        sess.run(tf.global_variables_initializer(), feed_dict={all_data: data_batch})
        initialization_time = float( time.time() - initialization_ts ) 
    
        computation_time = 0.0
        for i in range(n_max_iters):
            aux_ts = time.time()
            [result, centroids, _] = sess.run([global_centroids, best_centroids, update_centroid])
            computation_time += float(time.time() - aux_ts)
            
            cluster_idx = sess.run(result_matrix, feed_dict={all_data: data_batch})

    end_resut = {   'end_center'          : result             ,
                    'cluster_idx'         : cluster_idx        ,
                    'centroids'           : centroids          ,
                    'init_center'         : initial_centers    ,
                    'setup_time'          : setup_time         ,
                    'initialization_time' : initialization_time,
                    'computation_time'    : computation_time   ,
                    'n_iter'              : i+1
                }

    return end_resut
Пример #13
0
def approx_kl_divergence(p,
                         logits,
                         partitions,
                         partitions_dist,
                         scope=None,
                         partial_loss=True,
                         partitions_dist_scale=1.0,
                         skip_normalization=False):
    '''
    p: tensor with shape [..., N] in which elements are samples from
       (unormalized) target distribution
    logits: tensor with the same type and shape as p in which
              elements are samples from predicted logits
    partitions: integer tensor in which each element shows the index
              of the partition that each correponding element of p
              and q_logits are comming from. We assume all the
              element in one partition has the same value.
              max(partitions) < M
    partitions_dist: A tensor with shape [M] that shows
                   relative size of each different M partitions
  '''
    with tf.variable_scope(scope,
                           'approx_kl_divergence',
                           values=[p, logits, partitions, partitions]):

        util.add_extra_tensor('logits', logits)
        util.add_extra_tensor('partitions', partitions)
        util.add_extra_tensor('partitions_dist', partitions_dist)

        partitions_dist = tf.convert_to_tensor(partitions_dist)
        p, logits, partitions = [
            _flatt_bach(t) for t in [p, logits, partitions]
        ]
        m = partitions_dist.shape[0].value

        ## Count the number of elements in each partition
        count = tf.map_fn(
            lambda arr: tf.bincount(arr, minlength=m, maxlength=m), partitions)
        ## count shape = [B, M]
        count.set_shape([partitions.shape[0].value, m])

        ## Adjust the weights based on the counts
        ## inf values wont be showing up in weights...
        partitions_dist2 = tf.truediv(partitions_dist[tf.newaxis],
                                      tf.cast(count, tf.float32))
        weights = util.batched_gather(partitions, partitions_dist2)

        if skip_normalization:
            weights = tf.ones_like(weights)
            partitions_dist_scale = 1.0
        ## See tf.reduce_logsumexp implementation
        raw_max = tf.reduce_max(logits, axis=-1)
        my_max = tf.stop_gradient(
            tf.where(tf.is_finite(raw_max), raw_max, tf.zeros_like(raw_max)))

        logits = logits - my_max[..., tf.newaxis]
        q_normalizer = tf.reduce_sum(weights * tf.exp(logits),
                                     keep_dims=True,
                                     axis=-1)

        p_normalizer = tf.reduce_sum(weights * p, keep_dims=True, axis=-1)

        p = tf.truediv(p, p_normalizer)

        if partial_loss:
            loss_scale = partitions_dist_scale
        else:
            loss_scale = 1.0
            p = p * weights

        return -loss_scale * p * (logits - tf.log(q_normalizer) +
                                  tf.log(partitions_dist_scale))
def _compute_word_overlap(context_ids, context_len, question_ids, question_len,
                          reduce_type, weighted, vocab_df):
    """Compute word overlap between question and context ids.

  Args:
    context_ids: <int32> [batch_size, num_contexts, max_context_len]
    context_len: <int32> [batch_size, num_contexts]
    question_ids: <int32> [batch_size, max_question_len]
    question_len: <int32> [batch_size]
    reduce_type: String for reduce type when computing overlap. Choices are: max
      - Allows at most one match per question word. sum - Sums over all matches
      for each question word.
    weighted: Boolean indicate whether or not weight the overlap by IDF.
    vocab_df: Tensor of shape [vocab_size] for word frequency. Computes this at
      the document-level if not given.

  Returns:
    overlap: <float32> [batch_size, num_contexts]

  Raises:
    Exception: If invalid reduce_type is provided.
  """
    # <float> [batch_size, num_contexts, question_len, context_len]
    overlap = tf.to_float(
        _word_overlap_helper(question_ids=question_ids,
                             context_ids=context_ids))

    # <float> [batch_size, question_len]
    question_mask = tf.sequence_mask(question_len,
                                     tf.shape(question_ids)[1],
                                     dtype=tf.float32)

    # <float> [batch_size, num_contexts, context_len]
    context_mask = tf.sequence_mask(context_len,
                                    tf.shape(context_ids)[2],
                                    dtype=tf.float32)

    overlap *= tf.expand_dims(tf.expand_dims(question_mask, 1), -1)
    overlap *= tf.expand_dims(context_mask, 2)

    if weighted:
        if vocab_df is None:
            # Use document-level IDF computed with respect to the current batch.
            flat_context_ids = tf.to_int32(tf.reshape(context_ids, [-1]))

            # <float> [number of unique words]
            vocab_df = tf.bincount(flat_context_ids,
                                   minlength=tf.reduce_max(question_ids) + 1,
                                   dtype=tf.float32)

            # Replace all zeros with ones.
            vocab_df = tf.where(tf.equal(vocab_df, 0),
                                x=tf.ones_like(vocab_df),
                                y=vocab_df)

        # <float>[batch_size, question_len] expanded to
        # <float> [batch_size, 1, question_len, 1]
        question_df = tf.gather(vocab_df, question_ids)
        question_df = tf.expand_dims(tf.expand_dims(question_df, 1), -1)

        # <float> [batch_size, num_contexts, question_len, context_len]
        overlap = tf.divide(tf.to_float(overlap), question_df)

    if reduce_type == "max":
        # <float> [batch_size, num_contexts]
        overlap = tf.reduce_sum(tf.reduce_max(overlap, axis=[3]), axis=[2])
    elif reduce_type == "sum":
        # <float> [batch_size, num_contexts]
        overlap = tf.reduce_sum(overlap, axis=[2, 3])
    else:
        raise Exception("Reduce type %s is invalid." % reduce_type)

    return overlap
Пример #15
0
def expected_calibration_error(y_true, y_pred, nbins=20):
  """Calculates Expected Calibration Error (ECE).

  ECE is a scalar summary statistic of calibration error. It is the
  sample-weighted average of the difference between the predicted and true
  probabilities of a positive detection across uniformly-spaced model
  confidences [0, 1]. See referenced paper for a thorough explanation.

  Reference:
    Guo, et. al, "On Calibration of Modern Neural Networks"
    Page 2, Expected Calibration Error (ECE).
    https://arxiv.org/pdf/1706.04599.pdf

  This function creates three local variables, `bin_counts`, `bin_true_sum`, and
  `bin_preds_sum` that are used to compute ECE.  For estimation of the metric
  over a stream of data, the function creates an `update_op` operation that
  updates these variables and returns the ECE.

  Args:
    y_true: 1-D tf.int64 Tensor of binarized ground truth, corresponding to each
      prediction in y_pred.
    y_pred: 1-D tf.float32 tensor of model confidence scores in range
      [0.0, 1.0].
    nbins: int specifying the number of uniformly-spaced bins into which y_pred
      will be bucketed.

  Returns:
    value_op: A value metric op that returns ece.
    update_op: An operation that increments the `bin_counts`, `bin_true_sum`,
      and `bin_preds_sum` variables appropriately and whose value matches `ece`.

  Raises:
    InvalidArgumentError: if y_pred is not in [0.0, 1.0].
  """
  bin_counts = metrics_impl.metric_variable(
      [nbins], tf.float32, name='bin_counts')
  bin_true_sum = metrics_impl.metric_variable(
      [nbins], tf.float32, name='true_sum')
  bin_preds_sum = metrics_impl.metric_variable(
      [nbins], tf.float32, name='preds_sum')

  with tf.control_dependencies([
      tf.assert_greater_equal(y_pred, 0.0),
      tf.assert_less_equal(y_pred, 1.0),
  ]):
    bin_ids = tf.histogram_fixed_width_bins(y_pred, [0.0, 1.0], nbins=nbins)

  with tf.control_dependencies([bin_ids]):
    update_bin_counts_op = tf.assign_add(
        bin_counts, tf.to_float(tf.bincount(bin_ids, minlength=nbins)))
    update_bin_true_sum_op = tf.assign_add(
        bin_true_sum,
        tf.to_float(tf.bincount(bin_ids, weights=y_true, minlength=nbins)))
    update_bin_preds_sum_op = tf.assign_add(
        bin_preds_sum,
        tf.to_float(tf.bincount(bin_ids, weights=y_pred, minlength=nbins)))

  ece_update_op = _ece_from_bins(
      update_bin_counts_op,
      update_bin_true_sum_op,
      update_bin_preds_sum_op,
      name='update_op')
  ece = _ece_from_bins(bin_counts, bin_true_sum, bin_preds_sum, name='value')
  return ece, ece_update_op
Пример #16
0
  def __init__(self, corpus_file, staffline_extractor, **kwargs):
    """Build a 1-nearest-neighbor classifier with labeled patches.

    Args:
      corpus_file: Path to the TFRecords of Examples with patch (cluster) values
        in the "patch" feature, and the glyph label in the "label" feature.
      staffline_extractor: The staffline extractor.
      **kwargs: Passed through to `Convolutional1DGlyphClassifier`.
    """
    super(NearestNeighborGlyphClassifier, self).__init__(**kwargs)

    patch_height, patch_width = corpus.get_patch_shape(corpus_file)
    centroids, labels = corpus.parse_corpus(corpus_file, patch_height,
                                            patch_width)
    centroids_shape = tf.shape(centroids)
    flattened_centroids = tf.reshape(
        centroids,
        [centroids_shape[0], centroids_shape[1] * centroids_shape[2]])
    self.staffline_extractor = staffline_extractor
    stafflines = staffline_extractor.extract_staves()
    # Collapse the stafflines per stave.
    width = tf.shape(stafflines)[-1]
    # Shape (num_staves, num_stafflines, num_patches, height, patch_width).
    staffline_patches = patches.patches_1d(stafflines, patch_width)
    staffline_patches_shape = tf.shape(staffline_patches)
    flattened_patches = tf.reshape(staffline_patches, [
        staffline_patches_shape[0] * staffline_patches_shape[1] *
        staffline_patches_shape[2],
        staffline_patches_shape[3] * staffline_patches_shape[4]
    ])
    distance_matrix = _squared_euclidean_distance_matrix(
        flattened_patches, flattened_centroids)

    # Take the k centroids with the lowest distance to each patch. Wrap the k
    # constant in a tf.identity, which tests can use to feed in another value.
    k_value = tf.identity(tf.constant(K_NEAREST_VALUE), name='k_nearest_value')
    nearest_centroid_inds = tf.nn.top_k(-distance_matrix, k=k_value)[1]
    # Get the label corresponding to each nearby centroids, and reshape the
    # labels back to the original shape.
    nearest_labels = tf.reshape(
        tf.gather(labels, tf.reshape(nearest_centroid_inds, [-1])),
        tf.shape(nearest_centroid_inds))
    # Make a histogram of counts for each glyph type in the nearest centroids,
    # for each row (patch).
    bins = tf.map_fn(lambda row: tf.bincount(row, minlength=NUM_GLYPHS),
                     tf.to_int32(nearest_labels))
    # Take the argmax of the histogram to get the top prediction. Discard glyph
    # type 1 (NONE) for now.
    mode_out_of_k = tf.argmax(
        bins[:, musicscore_pb2.Glyph.NONE + 1:], axis=1) + 2
    # Force predictions to NONE only if all k nearby centroids were NONE.
    # Otherwise, the non-NONE nearby centroids will contribute to the
    # prediction.
    mode_out_of_k = tf.where(
        tf.equal(bins[:, musicscore_pb2.Glyph.NONE], k_value),
        tf.fill(
            tf.shape(mode_out_of_k), tf.to_int64(musicscore_pb2.Glyph.NONE)),
        mode_out_of_k)
    predictions = tf.reshape(mode_out_of_k, staffline_patches_shape[:3])

    # Pad the output.
    predictions_width = tf.shape(predictions)[-1]
    pad_before = (width - predictions_width) // 2
    pad_shape_before = tf.concat([staffline_patches_shape[:2], [pad_before]],
                                 axis=0)
    pad_shape_after = tf.concat(
        [staffline_patches_shape[:2], [width - predictions_width - pad_before]],
        axis=0)
    self.output = tf.concat(
        [
            # NONE has value 1.
            tf.ones(pad_shape_before, tf.int64),
            predictions,
            tf.ones(pad_shape_after, tf.int64),
        ],
        axis=-1)
Пример #17
0
def knn_kmeans_model(centroids, labels, patches=None):
    """The KNN k-means classifier model.

  Args:
    centroids: The k-means centroids NumPy array. Shape `(num_centroids,
      patch_height, patch_width)`.
    labels: The centroid labels NumPy array. Vector with length `num_centroids`.
    patches: Optional input tensor for the patches. If None, a placeholder will
      be used.

  Returns:
    The predictions (class ids) tensor determined from the input patches. Vector
    with the same length as `patches`.
  """
    with tf.name_scope('knn_model'):
        centroids = tf.identity(_to_float(tf.constant(_to_uint8(centroids))),
                                name='centroids')
        labels = tf.constant(labels, name='labels')
        centroids_shape = tf.shape(centroids)
        num_centroids = centroids_shape[0]
        patch_height = centroids_shape[1]
        patch_width = centroids_shape[2]
        flattened_centroids = tf.reshape(
            centroids, [num_centroids, patch_height * patch_width],
            name='flattened_centroids')
        if patches is None:
            patches = tf.placeholder(
                tf.float32, (None, centroids.shape[1], centroids.shape[2]),
                name='patches')
        patches_shape = tf.shape(patches)
        flattened_patches = tf.reshape(
            patches, [patches_shape[0], patches_shape[1] * patches_shape[2]],
            name='flattened_patches')
        with tf.name_scope('distance_matrix'):
            distance_matrix = _squared_euclidean_distance_matrix(
                flattened_patches, flattened_centroids)

        # Take the k centroids with the lowest distance to each patch. Wrap the k
        # constant in a tf.identity, which tests can use to feed in another value.
        k_value = tf.identity(tf.constant(K_NEAREST_VALUE),
                              name='k_nearest_value')
        nearest_centroid_inds = tf.nn.top_k(-distance_matrix, k=k_value)[1]
        # Get the label corresponding to each nearby centroids, and reshape the
        # labels back to the original shape.
        nearest_labels = tf.reshape(tf.gather(
            labels, tf.reshape(nearest_centroid_inds, [-1])),
                                    tf.shape(nearest_centroid_inds),
                                    name='nearest_labels')
        # Make a histogram of counts for each glyph type in the nearest centroids,
        # for each row (patch).
        length = NUM_GLYPHS
        bins = tf.map_fn(
            lambda row: tf.bincount(row, minlength=length, maxlength=length),
            tf.to_int32(nearest_labels),
            name='bins')
        with tf.name_scope('mode_out_of_k'):
            # Take the argmax of the histogram to get the top prediction. Discard
            # glyph type 1 (NONE) for now.
            mode_out_of_k = tf.argmax(bins[:, musicscore_pb2.Glyph.NONE + 1:],
                                      axis=1) + 2
            # Force predictions to NONE only if all k nearby centroids were NONE.
            # Otherwise, the non-NONE nearby centroids will contribute to the
            # prediction.
            mode_out_of_k = tf.where(
                tf.equal(bins[:, musicscore_pb2.Glyph.NONE], k_value),
                tf.fill(tf.shape(mode_out_of_k),
                        tf.to_int64(musicscore_pb2.Glyph.NONE)), mode_out_of_k)
        return tf.identity(mode_out_of_k, name='predictions')
Пример #18
0
# combine all of the models together for the ensemble
all_preds = tf.stack([x["probabilities"] for x in predictions_arr])
all_preds = tf.transpose(all_preds, [1,2,0])

model_preds = tf.transpose(all_preds, [0, 2, 1])
model_top_10_values, model_top_10_indices = tf.nn.top_k(model_preds, k=10)
model_preds = tf.argmax(model_preds, axis=2, output_type=tf.int32)

if aggregate_method == 'average':
  # average over softmaxes
  test_prob = tf.reduce_mean(all_preds, axis = 2)
  test_class = tf.argmax(test_prob, axis=1, output_type=tf.int32)

elif aggregate_method == 'most_common':
  test_prob = tf.argmax(all_preds, axis=1, output_type=tf.int32)
  test_class = tf.argmax(tf.bincount(test_prob_max), output_type=tf.int32)

# verify if prediction is correct
test_correct_pred = tf.equal(test_class, ph["y"])


##############################################
# File IO
##############################################


def read_file(filename_list):
  all_data, all_labels = [], []
  for file in filename_list:
    infile = np.load(file)
    data, labels = infile["data"], infile["label"]
Пример #19
0
def set_metrics(label, predicted):
    with tf.name_scope("metrics_tn_fn_fp_tp"):
        label = tf.cast(label, tf.int32)
        predicted = tf.cast(predicted, tf.int32)
        return tf.bincount(label + 2 * predicted)
Пример #20
0
def main(_):
    tf.reset_default_graph()

    # Import data
    gztan = GZTan(FLAGS.num_batches, mel=(FLAGS.repr_func == 'mel'))

    # print('num train tracks: {}'.format(gztan.nTrainTracks))

    with tf.variable_scope('inputs'):
        # Create the model
        x = tf.placeholder(tf.float32, [None, 80, 80, 1])
        # Define loss and optimizer
        y_ = tf.placeholder(tf.float32, [None, FLAGS.num_classes])
        train_flag = tf.placeholder(tf.bool, [1])
        label = tf.placeholder(tf.int32, [FLAGS.num_classes])

    # Build the graph for the deep net
    if FLAGS.net_depth == 'shallow':
        print('SHALLOW')
        y_conv = shallownn(x, train_flag)
    elif FLAGS.net_depth == 'deep':
        print('DEEP')
        y_conv = deepnn(x, train_flag)
    else:
        print("Error: Unrecognised depth.")
        return

    # Define loss function - softmax_cross_entropy + L1 regularisation
    with tf.name_scope("regularized_loss"):
        cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
        l1_regularizer = tf.contrib.layers.l1_regularizer(scale=0.0001)
        weights = tf.trainable_variables()
        regularization_penalty = tf.contrib.layers.apply_regularization(l1_regularizer, weights)
        regularized_cross_entropy = tf.add(cross_entropy, regularization_penalty, name='reg_loss')

    # Define AdamOptimiser, using FLAGS.learning_rate to minimize the loss function
    if FLAGS.decay == 'const':
        optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(regularized_cross_entropy)
    else:
        batch_number = tf.Variable(0, trainable=False)
        our_learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, batch_number, 3000, 0.9)
        optimizer = tf.train.AdamOptimizer(our_learning_rate).minimize(regularized_cross_entropy, global_step=batch_number)
        
    # Calculate the prediction and the accuracy
    raw_prediction = tf.argmax(y_conv, 1)
    raw_prediction_correct = tf.cast(tf.equal(raw_prediction, tf.argmax(y_, 1)), tf.float32)
    raw_accuracy = tf.reduce_mean(raw_prediction_correct)

    max_prob_prediction = tf.argmax(tf.reduce_sum(y_conv, 0), 0)
    max_prob_prediction_correct = tf.cast(tf.equal(max_prob_prediction, tf.argmax(label)), tf.int32)

    vote_count = tf.bincount(tf.cast(raw_prediction, tf.int32))
    maj_vote_prediction = tf.argmax(vote_count)
    maj_vote_prediction_correct = tf.cast(tf.equal(maj_vote_prediction, tf.argmax(label)), tf.int32)

    av_confidence = tf.reduce_mean(y_conv, 0)

    # saver for checkpoints
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

    with tf.Session() as sess:
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir + '/_train', sess.graph)
        summary_writer_validation = tf.summary.FileWriter(FLAGS.log_dir + '/_validate', sess.graph, flush_secs=5)

        sess.run(tf.global_variables_initializer())

        # Training and validation
        for step in range(FLAGS.max_steps):
            # Training: Backpropagation using train set
            total_loss = 0
            for batchNum in range(FLAGS.num_batches):
                (train_samples, train_labels) = gztan.getTrainBatch(batchNum)
                _, batch_loss = sess.run([optimizer, regularized_cross_entropy], feed_dict={x: train_samples, train_flag: [True], y_: train_labels})
                total_loss += batch_loss

            if step % (FLAGS.log_frequency + 1) == 0:
                loss_summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Regularized_Loss", simple_value=total_loss), 
                ])
                summary_writer.add_summary(loss_summary, step)

            # Validation: Monitoring accuracy using validation set
            if step % FLAGS.log_frequency == 0:
                total_accuracy = 0.0
                for batchNum in range(FLAGS.num_batches):
                    (test_samples, test_labels) = gztan.getTestBatch(batchNum)
                    validation_accuracy = sess.run(raw_accuracy, feed_dict={x: test_samples, train_flag: [False], y_: test_labels})
                    total_accuracy += validation_accuracy

                total_accuracy = total_accuracy / FLAGS.num_batches
                print('step %d, accuracy on validation batch: %g' % (step, total_accuracy))

                tot_acc_summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Total_Raw_Accuracy", simple_value=total_accuracy), 
                ])
                summary_writer_validation.add_summary(tot_acc_summary, step)

            # # Save the model checkpoint periodically.
            # if step % FLAGS.save_model == 0 or (step + 1) == FLAGS.max_steps:
            #     checkpoint_path = FLAGS.log_dir + '/_train' + '/model.ckpt'
            #     saver.save(sess, checkpoint_path, global_step=step)

            gztan.shuffle()

        # Testing
        mp_pred_correct = []
        mv_pred_correct = []
        raw_pred_acc = []
        done = False

        print('num test tracks: {}'.format(gztan.nTracks))
        confusion_matrix = np.zeros((FLAGS.num_classes, FLAGS.num_classes), dtype=np.int32)
        for track_id in range(gztan.nTracks):
            (track_samples, track_labels) = gztan.getTrackSamples(track_id)
            track_label = track_labels[0]
            test_raw_acc = sess.run(raw_accuracy, feed_dict={x: track_samples, train_flag: [False], y_: track_labels})

            test_mp_prediction = sess.run(max_prob_prediction, feed_dict={x: track_samples, train_flag: [False]})
            test_mp_prediction_correct = (test_mp_prediction == np.argmax(track_label))
            test_mv_prediction = sess.run(maj_vote_prediction, feed_dict={x: track_samples, train_flag: [False], label: track_label})
            test_mv_prediction_correct = (test_mv_prediction == np.argmax(track_label))
            
            confusion_matrix[int(np.argmax(track_label)), int(test_mp_prediction)] += 1

            mp_pred_correct.append(test_mp_prediction_correct)
            mv_pred_correct.append(test_mv_prediction_correct)
            raw_pred_acc.append(test_raw_acc)

            # Find interesting examples and output them
            if not test_mv_prediction_correct and not test_mp_prediction_correct and not done:
                test_raw_confidences = sess.run(y_conv, feed_dict={x: track_samples, train_flag: [False]})
                test_raw_predictions = np.argmax(test_raw_confidences, axis=1)

                test_av_conf_vals = np.mean(test_raw_confidences, axis=1)
                low_correct_confidences = np.where(test_raw_confidences[:,np.argmax(track_label)] < test_av_conf_vals)[0]

                if len(low_correct_confidences) > 0:
                    done = True
                    
                # np.where outputs a 1-tuple so do [0] on this to get actual result
                print('test_mp_prediction: {} test_mv_prediction: {} true label: {}'.format(test_mp_prediction, test_mv_prediction, np.argmax(track_label)))
                incorrect_pred_idxs = np.where(test_raw_predictions != np.argmax(track_label))[0]
                print('found at track_id: {}!'.format(track_id))
                for idx in low_correct_confidences:
                    print('Incorrectly classified sample {} with as {} with confidences {}. Should be {}.'.format(idx, test_raw_predictions[idx], test_raw_confidences[idx], np.argmax(track_label)))
                    gztan.outputSample(track_id, idx)
                    
                    sample_spec = track_samples[idx]
                    specshow(sample_spec.reshape([80, 80]), y_axis=FLAGS.repr_func)

                    pylab.savefig('incorrect_{r}_track{t}_example{e}.png'.format(r=FLAGS.repr_func, t=track_id, e=idx), bbox_inches=None, pad_inches=0)
                    pylab.close()
        
        test_mp_accuracy = sum(mp_pred_correct) / len(mp_pred_correct)
        test_mv_accuracy = sum(mv_pred_correct) / len(mv_pred_correct)
        test_raw_accuracy = sum(raw_pred_acc) / len(raw_pred_acc)
        print('test set: raw accuracy on test set: %0.3f' % test_raw_accuracy)
        print('test set: max prob accuracy on test set: %0.3f' % test_mp_accuracy)
        print('test set: maj vote accuracy on test set: %0.3f' % test_mv_accuracy)

        np.savetxt("confusion.csv", confusion_matrix, delimiter=",")
Пример #21
0
# shape_top_k_xvals = tf.shape(top_k_xvals)
# shape_top_k_indices = tf.shape(top_k_indices)

# x_sums = tf.expand_dims(tf.reduce_sum(top_k_xvals, 1), 1)
# x_sums_repeated = tf.matmul(x_sums, tf.ones([1, k], tf.float32))  # shape = [k,k]
# x_val_weights = tf.expand_dims(tf.div(top_k_xvals, x_sums_repeated), 1)  #
#
# count = tf.unique_with_counts(tf.cast(top_k_yvals, dtype=tf.int32))

# pre = tf.argmax(top_k_xvals, axis=1)

# In[167]:

top_k_yvals = tf.gather(y_input, top_k_indices)

count = tf.bincount(tf.cast(top_k_yvals, dtype=tf.int32))

pre_y = tf.argmax(count)

# In[169]:

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
sess.run(init)
batch_size = 1
target_size = 5000
num_loops = int(np.ceil(len(x_vals_test) / batch_size))
num_loops = 1000
# pre_dic = []
# target_dict=[]
acc = []
    def compute_loss(self, binary_seg_logits, binary_label,
                     instance_seg_logits, instance_label, name, reuse):
        """
        compute lanenet loss
        :param binary_seg_logits:
        :param binary_label:
        :param instance_seg_logits:
        :param instance_label:
        :param name:
        :param reuse:
        :return:
        """
        with tf.variable_scope(name_or_scope=name, reuse=reuse):
            # calculate class weighted binary seg loss
            with tf.variable_scope(name_or_scope='binary_seg'):
                binary_label_onehot = tf.one_hot(tf.reshape(
                    tf.cast(binary_label, tf.int32),
                    shape=[
                        binary_label.get_shape().as_list()[0],
                        binary_label.get_shape().as_list()[1],
                        binary_label.get_shape().as_list()[2]
                    ]),
                                                 depth=self._class_nums,
                                                 axis=-1)

                binary_label_plain = tf.reshape(
                    binary_label,
                    shape=[
                        binary_label.get_shape().as_list()[0] *
                        binary_label.get_shape().as_list()[1] *
                        binary_label.get_shape().as_list()[2] *
                        binary_label.get_shape().as_list()[3]
                    ])
                # unique_labels, unique_id, counts = tf.unique_with_counts(binary_label_plain)
                binary_label_plain = tf.cast(binary_label_plain, tf.int32)
                counts = tf.bincount(binary_label_plain, minlength=6)
                counts = tf.where(tf.equal(counts, 0), tf.ones_like(counts),
                                  counts)
                counts = tf.cast(counts, tf.float32)
                inverse_weights = tf.divide(
                    1.0,
                    tf.log(
                        tf.add(tf.divide(counts, tf.reduce_sum(counts)),
                               tf.constant(1.02))))
                if self._binary_loss_type == 'cross_entropy':
                    binary_segmenatation_loss = self._compute_class_weighted_cross_entropy_loss(
                        onehot_labels=binary_label_onehot,
                        logits=binary_seg_logits,
                        classes_weights=inverse_weights)
                elif self._binary_loss_type == 'focal':
                    binary_segmenatation_loss = self._multi_category_focal_loss(
                        onehot_labels=binary_label_onehot,
                        logits=binary_seg_logits,
                        classes_weights=inverse_weights)
                else:
                    raise NotImplementedError

            # calculate class weighted instance seg loss
            with tf.variable_scope(name_or_scope='instance_seg'):

                pix_bn = self.layerbn(inputdata=instance_seg_logits,
                                      is_training=self._is_training,
                                      name='pix_bn')
                pix_relu = self.relu(inputdata=pix_bn, name='pix_relu')
                pix_embedding = self.conv2d(inputdata=pix_relu,
                                            out_channel=self._embedding_dims,
                                            kernel_size=1,
                                            use_bias=False,
                                            name='pix_embedding_conv')
                pix_image_shape = (pix_embedding.get_shape().as_list()[1],
                                   pix_embedding.get_shape().as_list()[2])
                instance_segmentation_loss, l_var, l_dist, l_reg = \
                    lanenet_discriminative_loss.discriminative_loss(
                        pix_embedding, instance_label, self._embedding_dims,
                        pix_image_shape, 0.4, 3.0, 1.0, 1.0, 0.001
                    )

            l2_reg_loss = tf.constant(0.0, tf.float32)
            for vv in tf.trainable_variables():
                if 'bn' in vv.name or 'gn' in vv.name:
                    continue
                else:
                    l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))
            l2_reg_loss *= 0.001
            total_loss = binary_segmenatation_loss + instance_segmentation_loss + l2_reg_loss

            ret = {
                'total_loss': total_loss,
                'binary_seg_logits': binary_seg_logits,
                'instance_seg_logits': pix_embedding,
                'binary_seg_loss': binary_segmenatation_loss,
                'discriminative_loss': instance_segmentation_loss
            }

        return ret
Пример #23
0
    def lstm_layers(self, feature_maps, features):
        parameters = self.parameters
        mode = self.detection_model._is_training
        logprob, raw_pred = deep_bidirectional_lstm(feature_maps,
                                                    features['corpus'],
                                                    params=parameters,
                                                    summaries=False)
        # Compute seq_len from image width
        # n_pools = CONST.DIMENSION_REDUCTION_W_POOLING  # 2x2 pooling in dimension W on layer 1 and 2
        # seq_len_inputs = tf.divide(features['image_width'], n_pools, name='seq_len_input_op') - 1
        seq_len_inputs = features['image_width']
        batch_size = logprob.shape[1]
        predictions_dict = {
            'prob': logprob,
            'raw_predictions': raw_pred,
            'seq_len_inputs': seq_len_inputs
        }

        if mode in [
                tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT,
                tf.estimator.ModeKeys.TRAIN
        ]:
            with tf.name_scope('code2str_conversion'):
                keys = tf.cast(parameters.alphabet_decoding_codes, tf.int64)
                values = [c for c in parameters.alphabet_decoding]
                table_int2str = tf.contrib.lookup.HashTable(
                    tf.contrib.lookup.KeyValueTensorInitializer(keys, values),
                    '?')

                sparse_code_pred, log_probability = tf.nn.ctc_beam_search_decoder(
                    predictions_dict['prob'],
                    sequence_length=tf.cast([seq_len_inputs] * batch_size,
                                            tf.int32),
                    merge_repeated=False,
                    beam_width=100,
                    top_paths=parameters.nb_logprob)
                # confidence value

                predictions_dict['score'] = log_probability

                sequence_lengths_pred = [
                    tf.bincount(
                        tf.cast(sparse_code_pred[i].indices[:, 0], tf.int32),
                        minlength=tf.shape(predictions_dict['prob'])[1])
                    for i in range(parameters.top_paths)
                ]

                pred_chars = [
                    table_int2str.lookup(sparse_code_pred[i])
                    for i in range(parameters.top_paths)
                ]

                list_preds = [
                    get_words_from_chars(
                        pred_chars[i].values,
                        sequence_lengths=sequence_lengths_pred[i])
                    for i in range(parameters.top_paths)
                ]

                predictions_dict['words'] = tf.stack(list_preds)

                tf.summary.text('predicted_words',
                                predictions_dict['words'][0][:10])

        # Evaluation ops
        # --------------
        if mode == tf.estimator.ModeKeys.EVAL:
            with tf.name_scope('evaluation'):
                CER = tf.metrics.mean(tf.edit_distance(
                    sparse_code_pred[0],
                    tf.cast(sparse_code_target, dtype=tf.int64)),
                                      name='CER')

                # Convert label codes to decoding alphabet to compare predicted and groundtrouth words
                target_chars = table_int2str.lookup(
                    tf.cast(sparse_code_target, tf.int64))
                target_words = get_words_from_chars(target_chars.values,
                                                    seq_lengths_labels)
                accuracy = tf.metrics.accuracy(target_words,
                                               predictions_dict['words'][0],
                                               name='accuracy')

                eval_metric_ops = {
                    'eval/accuracy': accuracy,
                    'eval/CER': CER,
                }
                CER = tf.Print(CER, [CER], message='-- CER : ')
                accuracy = tf.Print(accuracy, [accuracy],
                                    message='-- Accuracy : ')
        else:
            eval_metric_ops = None

        return predictions_dict, eval_metric_ops
Пример #24
0
def _tf_bincount_histogram(image, source_range, sess=None, as_tensor=False):
    """
    Efficient histogram calculation for an image of integers.
    This function is significantly more efficient than tf.histogram_fixed_width but works only on images of integers.
    It is based on tf.bincount.
    
    Args
    ---------------
    image: image: tensor, A tensor of a image
    source_range : string
        'image' determines the range from the input image.
        'dtype' determines the range from the expected range of the images of that data type.
    sess: bool, optional A tensorflow session.
    as_tensor: bool, optional returns the result as a tensor if true otherwise returns the result as evaluated values.
               default value is false
    
    Returns
    ---------------
    hist : array/tensor The values of the histogram.
    bin_centers : array/tensor The values at the center of the bins. 
    """
    # check if a tensorflow session is provided
    my_sess = False
    if sess == None:
        # if not initialize a tensorflow session
        sess = tf.InteractiveSession()
        my_sess = True
    tf_image = image
    # Determine how to calculate value range for the histogram
    if source_range not in ['image', 'dtype']:
        raise ValueError(
            'Incorrect value for `source_range` argument: {}'.format(
                source_range))
    if source_range == 'image':
        # get value range from image
        image_min = tf.cast(tf.math.reduce_min(tf_image), tf.int64).eval()
        image_max = tf.cast(tf.math.reduce_max(tf_image), tf.int64).eval()
    elif source_range == 'dtype':
        # get value range from image datatype
        image_min, image_max = tf_dtype_limits(tf_image, clip_negative=False)
    # offset the image array to get low value bolundary to zero
    image, offset = _tf_offset_array(array=image,
                                     low_boundary=image_min,
                                     high_boundary=image_max,
                                     sess=sess,
                                     as_tensor=True)
    # flatten the image
    tf_image = tf.cast(tf.reshape(tensor=tf_image, shape=[-1]), dtype=tf.int32)
    # get the bincount value
    hist = tf.bincount(arr=tf_image,
                       minlength=image_max - image_min + 1,
                       dtype=tf.int32)
    # get bin centers
    bin_centers = tf.range(start=image_min, limit=image_max + 1)
    # if value range is calculated via image
    if source_range == 'image':
        # get the min value in image
        idx = tf.maximum(image_min, 0)
        hist = hist[idx:]
    # check if results  need to be returned as tensors and close the tensorflow session if it was initialized by this function
    if as_tensor:
        if my_sess:
            sess.close()
            return hist, bin_centers
        else:
            return hist, bin_centers
    else:
        if my_sess:
            hist, bin_centers = hist.eval(), bin_centers.eval()
            sess.close()
            return hist, bin_centers
        else:
            hist, bin_centers = hist.eval(), bin_centers.eval()
            return hist, bin_centers
Пример #25
0
    def postprocess(self, prediction_dict):
        if ('box_encodings' not in prediction_dict
                or 'class_predictions_with_background' not in prediction_dict):
            raise ValueError(
                'prediction_dict does not contain expected entries.')
        with tf.name_scope('Postprocessor'):
            preprocessed_images = prediction_dict['preprocessed_inputs']
            anchors = prediction_dict['anchors']
            box_encodings = prediction_dict['box_encodings']
            class_predictions = prediction_dict[
                'class_predictions_with_background']
            feature_maps = prediction_dict['feature_maps']
            feature_masks = prediction_dict['feature_masks']

            #decode
            detection_boxes = self.batch_decode(anchors, box_encodings)
            tf.logging.info('detection_boxes: %s', detection_boxes)

            #score
            detection_scores_with_background = self._tf_score_converter_fn(
                class_predictions, name='convert_scores')
            detection_scores = tf.slice(detection_scores_with_background,
                                        [0, 0, 1], [-1, -1, -1])
            tf.logging.info('detection_scores: %s', detection_scores)
            debug_detection_scores = tf.slice(detection_scores, [0, 0, 0],
                                              [-1, 1, 1])
            #self.tensors_to_log[debug_detection_scores.op.name] = debug_detection_scores

            #nms
            (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_indices,
             num_detections) = self.batch_non_max_suppression(
                 detection_boxes, detection_scores)
            tf.logging.info('nmsed boxes: %s', nmsed_boxes)
            tf.logging.info('nmsed scores: %s', nmsed_scores)
            tf.logging.info('nmsed classes: %s', nmsed_classes)
            tf.logging.info('nmsed indices: %s', nmsed_indices)
            nmsed_masks = tf.gather(feature_masks, nmsed_indices)
            count_list = []
            for i in range(len(feature_maps)):
                count = tf.reduce_sum(tf.to_int32(tf.equal(nmsed_masks, i)))
                count_list.append(count)
            nmsed_feature_distribute = tf.to_int32(count_list)
            self.tensors_to_log['nmsed_feature'] = nmsed_feature_distribute
            #y, _, count = tf.unique_with_counts(tf.reshape(tf.to_int32(nmsed_classes), [-1]))
            #classes_distribute = tf.stack([y, count])
            count = tf.bincount(tf.reshape(tf.to_int32(nmsed_classes), [-1]))
            self.tensors_to_log['top_classes_count'] = count

            #tensor log
            max_scores = tf.squeeze(tf.slice(nmsed_scores, [0, 0], [-1, 1]),
                                    name='max_scores')
            #max_scores = tf.slice(nmsed_scores,
            #        [0, 0], [-1, 5], name='max_scores')
            max_classes = tf.squeeze(tf.slice(nmsed_classes, [0, 0], [-1, 1]),
                                     name='max_classes')
            #max_classes = tf.slice(nmsed_classes,
            #        [0, 0], [-1, 5], name='max_classes')
            self.tensors_to_log[max_scores.op.name] = max_scores
            self.tensors_to_log[max_classes.op.name] = max_classes

            detection_dict = {
                'detection_boxes': nmsed_boxes,
                'detection_scores': nmsed_scores,
                'detection_classes': nmsed_classes,
                'num_detections': tf.to_float(num_detections)
            }
            for name in detection_dict:
                tf.logging.info('[detection] %s: %s', name,
                                detection_dict[name])
            return detection_dict
Пример #26
0
    def loss(self, prediction_dict, labels, scope=None):
        with tf.name_scope(scope, 'Loss', prediction_dict.values()):
            groundtruth_boxes_list = labels['groundtruth_boxes']
            groundtruth_classes_list = labels['groundtruth_classes']
            groundtruth_labels_list = labels['groundtruth_labels']
            anchors = prediction_dict['anchors']
            box_encodings = prediction_dict['box_encodings']
            class_predictions = prediction_dict[
                'class_predictions_with_background']
            feature_maps = prediction_dict['feature_maps']
            feature_masks = prediction_dict['feature_masks']

            groundtruth_classes_with_background_list = [
                tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT')
                for one_hot_encoding in groundtruth_classes_list
            ]

            cls_targets_list = []
            cls_weights_list = []
            reg_targets_list = []
            reg_weights_list = []
            cls_labels_list = []
            matches_list = []
            for gt_boxes, gt_classes, gt_lables in zip(
                    groundtruth_boxes_list,
                    groundtruth_classes_with_background_list,
                    groundtruth_labels_list):
                cls_targets, cls_weights, reg_targets, reg_weights, \
                        cls_labels, matches = self.assign_targets(
                        anchors, gt_boxes, gt_classes, gt_lables)
                cls_targets_list.append(cls_targets)
                cls_weights_list.append(cls_weights)
                reg_targets_list.append(reg_targets)
                reg_weights_list.append(reg_weights)
                cls_labels_list.append(cls_labels)
                matches_list.append(matches)

            batch_cls_targets = tf.stack(cls_targets_list)
            batch_cls_weights = tf.stack(cls_weights_list)
            batch_reg_targets = tf.stack(reg_targets_list)
            batch_reg_weights = tf.stack(reg_weights_list)
            batch_cls_labels = tf.stack(cls_labels_list)

            tf.logging.info('batch_cls_targets: %s', batch_cls_targets)
            tf.logging.info('batch_cls_weights: %s', batch_cls_weights)
            tf.logging.info('batch_reg_targets: %s', batch_reg_targets)
            tf.logging.info('batch_reg_weights: %s', batch_reg_weights)
            tf.logging.info('batch_cls_labels: %s', batch_cls_labels)
            self._summarize_target_assignment(groundtruth_boxes_list,
                                              matches_list)

            #loss
            location_losses = self._localization_loss(
                box_encodings,
                batch_reg_targets,
                ignore_nan_targets=True,
                weights=batch_reg_weights)
            cls_losses = ops.reduce_sum_trailing_dimensions(
                self._classification_loss(class_predictions,
                                          batch_cls_targets,
                                          weights=batch_cls_weights),
                ndims=2)
            tf.logging.info('location_losses: %s', location_losses)
            tf.logging.info('cls_losses: %s', cls_losses)

            if self._hard_example_miner:
                (localization_loss, classification_loss,
                 selected_masks) = self.apply_hard_mining(
                     location_losses, cls_losses, prediction_dict,
                     matches_list)
                self._hard_example_miner.summarize()
                self.tensors_to_log.update(
                    self._hard_example_miner.tensors_to_log())
            else:
                selected_masks = tf.ones_like(location_losses, dtype=bool)
                localization_loss = tf.reduce_sum(location_losses)
                classification_loss = tf.reduce_sum(cls_losses)
            normalizer = tf.maximum(
                tf.to_float(tf.reduce_sum(batch_reg_weights)), 1.0)

            #tensor to log
            feature_loc_losses = tf.reduce_sum(tf.where(
                selected_masks, location_losses,
                tf.zeros_like(location_losses)),
                                               axis=0)
            feature_cls_losses = tf.reduce_sum(tf.where(
                selected_masks, cls_losses, tf.zeros_like(cls_losses)),
                                               axis=0)
            match_counts = tf.reduce_sum(batch_reg_weights, axis=0)
            selected_counts = tf.reduce_sum(tf.to_int32(selected_masks),
                                            axis=0)
            feature_loc_losses_list = []
            feature_cls_losses_list = []
            counts_list = []
            selected_counts_list = []
            for i, feature_map in enumerate(feature_maps):
                feature_mask = tf.equal(feature_masks, i)

                selected_count = tf.reduce_sum(
                    tf.where(feature_mask, selected_counts,
                             tf.zeros_like(selected_counts)))
                selected_counts_list.append(selected_count)

                feature_loc_losses_list.append(
                    tf.reduce_sum(
                        tf.where(feature_mask, feature_loc_losses,
                                 tf.zeros_like(feature_loc_losses))) /
                    tf.to_float(selected_count))
                feature_cls_losses_list.append(
                    tf.reduce_sum(
                        tf.where(feature_mask, feature_cls_losses,
                                 tf.zeros_like(feature_cls_losses))) /
                    tf.to_float(selected_count))

                counts = tf.reduce_sum(
                    tf.where(feature_mask, match_counts,
                             tf.zeros_like(match_counts)))
                counts_list.append(counts)
            feature_losses = tf.stack([
                tf.to_float(feature_loc_losses_list),
                tf.to_float(feature_cls_losses_list)
            ],
                                      name='feature_losses')
            counts = tf.stack(
                [tf.to_int32(counts_list),
                 tf.to_int32(selected_counts_list)],
                name='counts')
            self.tensors_to_log[feature_losses.op.name] = feature_losses
            self.tensors_to_log[counts.op.name] = counts

            class_loc_losses_list = []
            class_cls_losses_list = []
            class_counts_list = []
            for i in range(self.num_classes + 1):
                class_mask = tf.logical_and(selected_masks,
                                            tf.equal(batch_cls_labels, i))
                selected_count = tf.reduce_sum(tf.to_int32(class_mask))
                class_counts_list.append(selected_count)

                class_loc_losses_list.append(
                    tf.reduce_sum(
                        tf.where(class_mask, location_losses,
                                 tf.zeros_like(location_losses))) /
                    tf.to_float(selected_count))
                class_cls_losses_list.append(
                    tf.reduce_sum(
                        tf.where(class_mask, cls_losses,
                                 tf.zeros_like(cls_losses))) /
                    tf.to_float(selected_count))
            label_count_loss = tf.stack([
                tf.to_float(class_counts_list),
                tf.to_float(class_loc_losses_list),
                tf.to_float(class_cls_losses_list),
            ],
                                        name='label_count_loss')
            self.tensors_to_log[label_count_loss.op.name] = label_count_loss

            count = tf.bincount(
                tf.reshape(tf.to_int32(tf.concat(groundtruth_labels_list, 0)),
                           [-1]))
            self.tensors_to_log['gt_classes_count'] = count

            #sigma * 1/N
            tf.summary.scalar('normalizer', normalizer)

            localization_loss_normalizer = normalizer

            localization_loss = tf.multiply((self._localization_loss_weight /
                                             localization_loss_normalizer),
                                            localization_loss,
                                            name='localization_loss')
            classification_loss = tf.multiply(
                (self._classification_loss_weight / normalizer),
                classification_loss,
                name='classification_loss')

            loss_dict = {
                str(localization_loss.op.name): localization_loss,
                str(classification_loss.op.name): classification_loss
            }
        return loss_dict
def crnn_fn(features, labels, mode, params):
    """
    :param features: dict {
                            'images'
                            'images_widths'
                            'filenames'
                            }
    :param labels: labels. flattend (1D) array with encoded label (one code per character)
    :param mode:
    :param params: dict {
                            'Params'
                        }
    :return:
    """

    parameters = params.get('Params')
    # 如果不是Params类型,报错
    assert isinstance(parameters, Params)
    # 设置训练和其他阶段的dropout比例
    if mode == tf.estimator.ModeKeys.TRAIN:
        parameters.keep_prob_dropout = 0.7
    else:
        parameters.keep_prob_dropout = 1.0
    # 开始执行网络-cnn阶段  128*32*304*3 -> 128*75*512
    conv = resnet(features['images'], (mode == tf.estimator.ModeKeys.TRAIN),
                  summaries=False)
    # rnn阶段 128*75*512 -> 75*128*3851
    logprob, raw_pred = deep_bidirectional_lstm(conv,
                                                params=parameters,
                                                summaries=False)

    # 计算图片宽度
    n_pools = CONST.DIMENSION_REDUCTION_W_POOLING  # 2x2 pooling in dimension W on layer 1 and 2
    # seq_len_inputs是输入到rnn图像的长度,在deep_cnn中,宽度减少了input_w/*/4 -1
    seq_len_inputs = tf.divide(features['images_widths'], n_pools) - 1
    # 构造输出词典
    predictions_dict = {
        'prob': logprob,
        'raw_predictions': raw_pred,
    }
    try:
        predictions_dict['filenames'] = features['filenames']
    except KeyError:
        pass

    if not mode == tf.estimator.ModeKeys.PREDICT:
        # Convert string label to code label 将字符串label转换成数字label,即,每个数字在字母表中的索引

        #  ************************************* start  *************************************
        #  当前tensorflow版本的string_split不支持utf8字符,采取一种折中方案,保存标签的时候就保存为索引,以'$'分隔
        # 待支持后将下面代码解禁
        # keys = [c for c in parameters.alphabet]
        # values = parameters.alphabet_codes
        #
        # # Convert string label to code label
        # with tf.name_scope('str2code_conversion'):
        #     table_str2int = tf.contrib.lookup.HashTable(tf.contrib.lookup.KeyValueTensorInitializer(keys, values), -1)
        #     splited = tf.string_split(labels, delimiter='')  # TODO change string split to utf8 split in next tf version
        #     codes = table_str2int.lookup(splited.values)
        #     sparse_code_target = tf.SparseTensor(splited.indices, codes, splited.dense_shape)
        # ************************************* end  ****************************************

        # ************临时解决方案的代码 -start****************************************************************************
        with tf.name_scope('str2code_conversion'):
            splited = tf.string_split(labels, delimiter='$')
            sparse_code_target = tf.SparseTensor(
                splited.indices,
                tf.cast(tf.string_to_number(splited.values), tf.int32),
                splited.dense_shape)

        seq_lengths_labels = tf.bincount(
            tf.cast(sparse_code_target.indices[:, 0], tf.int32),
            minlength=tf.shape(predictions_dict['prob'])[1])
        # ************临时解决方案的代码-end*******************************************************************************

        # 开始计算Loss
        # ----
        # >>> Cannot have longer labels than predictions -> error
        with tf.control_dependencies([
                tf.less_equal(sparse_code_target.dense_shape[1],
                              tf.reduce_max(tf.cast(seq_len_inputs, tf.int64)))
        ]):
            loss_ctc = tf.nn.ctc_loss(
                labels=sparse_code_target,
                inputs=predictions_dict['prob'],
                sequence_length=tf.cast(seq_len_inputs, tf.int32),
                preprocess_collapse_repeated=False,
                ctc_merge_repeated=True,
                ignore_longer_outputs_than_inputs=True,
                # returns zero gradient in case it happens -> ema loss = NaN
                time_major=True)
            loss_ctc = tf.reduce_mean(loss_ctc)

        global_step = tf.train.get_or_create_global_step()
        # 创建一个学习率指数衰减器
        ema = tf.train.ExponentialMovingAverage(decay=0.99,
                                                num_updates=global_step,
                                                zero_debias=True)
        # Create the shadow variables, and add op to maintain moving averages
        maintain_averages_op = ema.apply([loss_ctc])
        loss_ema = ema.average(loss_ctc)

        # 创建一个Train op 并且制定优化策略
        # --------
        learning_rate = tf.train.exponential_decay(
            parameters.learning_rate,
            global_step,
            parameters.learning_decay_steps,
            parameters.learning_decay_rate,
            staircase=True)

        if parameters.optimizer == 'ada':
            optimizer = tf.train.AdadeltaOptimizer(learning_rate)
        elif parameters.optimizer == 'adam':
            optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5)
        elif parameters.optimizer == 'rms':
            optimizer = tf.train.RMSPropOptimizer(learning_rate)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        opt_op = optimizer.minimize(loss_ctc, global_step=global_step)
        with tf.control_dependencies(update_ops + [opt_op]):
            train_op = tf.group(maintain_averages_op)

        # 写入tensorboard Summaries
        # ---------

        tf.summary.scalar('learning_rate', learning_rate)
        tf.summary.scalar('losses/ctc_loss', loss_ctc)
    else:
        loss_ctc, train_op = None, None

    if mode in [
            tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT,
            tf.estimator.ModeKeys.TRAIN
    ]:
        # 将预测的label转换为字符
        with tf.name_scope('code2str_conversion'):
            # 构造hash表
            keys = tf.cast(parameters.alphabet_decoding_codes, tf.int64)
            values = [c for c in parameters.alphabet_decoding]
            table_int2str = tf.contrib.lookup.HashTable(
                tf.contrib.lookup.KeyValueTensorInitializer(keys, values), '?')

            sparse_code_pred, log_probability = tf.nn.ctc_beam_search_decoder(
                predictions_dict['prob'],
                sequence_length=tf.cast(seq_len_inputs, tf.int32),
                merge_repeated=False,
                beam_width=100,
                top_paths=2)
            # Score
            predictions_dict['score'] = tf.subtract(log_probability[:, 0],
                                                    log_probability[:, 1])
            # around 10.0 -> seems pretty sure, less than 5.0 bit unsure, some errors/challenging images
            sparse_code_pred = sparse_code_pred[0]

            sequence_lengths_pred = tf.bincount(
                tf.cast(sparse_code_pred.indices[:, 0], tf.int32),
                minlength=tf.shape(predictions_dict['prob'])[1])

            pred_chars = table_int2str.lookup(sparse_code_pred)
            predictions_dict['words'] = get_words_from_chars(
                pred_chars.values, sequence_lengths=sequence_lengths_pred)

            tf.summary.text('predicted_words', predictions_dict['words'][:10])
    # 计算训练准确率
    if mode == tf.estimator.ModeKeys.TRAIN:
        CER = tf.metrics.mean(tf.edit_distance(
            sparse_code_pred, tf.cast(sparse_code_target, dtype=tf.int64)),
                              name='CER')
        # Convert label codes to decoding alphabet to compare predicted and groundtrouth words
        target_chars = table_int2str.lookup(
            tf.cast(sparse_code_target, tf.int64))
        target_words = get_words_from_chars(target_chars.values,
                                            seq_lengths_labels)
        accuracy = tf.metrics.accuracy(target_words,
                                       predictions_dict['words'],
                                       name='accuracy')

        tf.identity(accuracy[1], name='train_accuracy')
        tf.summary.scalar('train_accuracy', accuracy[1])

    # Evaluation ops
    # --------------
    if mode == tf.estimator.ModeKeys.EVAL:
        with tf.name_scope('evaluation'):
            CER = tf.metrics.mean(tf.edit_distance(
                sparse_code_pred, tf.cast(sparse_code_target, dtype=tf.int64)),
                                  name='CER')
            # Convert label codes to decoding alphabet to compare predicted and groundtrouth words
            target_chars = table_int2str.lookup(
                tf.cast(sparse_code_target, tf.int64))
            target_words = get_words_from_chars(target_chars.values,
                                                seq_lengths_labels)
            accuracy = tf.metrics.accuracy(target_words,
                                           predictions_dict['words'],
                                           name='accuracy')
            eval_metric_ops = {
                'eval/accuracy': accuracy,
                'eval/CER': CER,
            }
    else:
        eval_metric_ops = None

    # 需要输出的op
    export_outputs = {
        'predictions': tf.estimator.export.PredictOutput(predictions_dict)
    }

    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions_dict,
        loss=loss_ctc,
        train_op=train_op,
        eval_metric_ops=eval_metric_ops,
        export_outputs=export_outputs,
        scaffold=tf.train.Scaffold()
        # scaffold=tf.train.Scaffold(init_fn=None)  # Specify init_fn to restore from previous model
    )
Пример #28
0
def crnn_fn(features, labels, mode, params):
    """
    :param features: dict {
                            'image'
                            'images_width'
                            'corpora'
                            }
    :param labels: labels. flattend (1D) array with encoded label (one code per character)
    :param mode:
    :param params: dict {
                            'Params'
                        }
    :return:
    """

    parameters = params.get('Params')
    assert isinstance(parameters, Params)

    # Load pre-trained cnn model
    if parameters.cnn_pretained_ckpt_path:
        exclude = ['deep_bidirectional_lstm']
        variables_to_restore = tf.contrib.slim.get_variables_to_restore(
            exclude=exclude)
        tf.train.init_from_checkpoint(
            parameters.cnn_pretained_ckpt_path,
            {v.name.split(':')[0]: v
             for v in variables_to_restore})

    if mode != tf.estimator.ModeKeys.TRAIN:
        parameters.keep_prob_dropout = 1.0

    conv = deep_cnn(features['image'], (mode == tf.estimator.ModeKeys.TRAIN),
                    parameters.cnn_model,
                    summaries=False)

    logprob, raw_pred = deep_bidirectional_lstm(conv,
                                                features['corpus'],
                                                params=parameters,
                                                summaries=False)

    # Compute seq_len from image width
    n_pools = parameters.width_down_sampling

    seq_len_inputs = tf.divide(
        features['image_width'], n_pools, name='seq_len_input_op') - 1

    predictions_dict = {'prob': logprob, 'raw_predictions': raw_pred}

    if not mode == tf.estimator.ModeKeys.PREDICT:
        # Alphabet and codes
        keys = [c for c in parameters.alphabet.encode('latin1')]
        values = parameters.alphabet_codes

        # Convert string label to code label
        with tf.name_scope('str2code_conversion'):
            table_str2int = tf.contrib.lookup.HashTable(
                tf.contrib.lookup.KeyValueTensorInitializer(
                    keys, values, key_dtype=tf.int64, value_dtype=tf.int64),
                -1)
            splitted = tf.string_split(labels, delimiter='')
            values_int = tf.cast(
                tf.squeeze(tf.decode_raw(splitted.values, tf.uint8)), tf.int64)
            codes = table_str2int.lookup(values_int)
            codes = tf.cast(codes, tf.int32)
            sparse_code_target = tf.SparseTensor(splitted.indices, codes,
                                                 splitted.dense_shape)

        seq_lengths_labels = tf.bincount(
            tf.cast(sparse_code_target.indices[:, 0],
                    tf.int32),  #array of labels length
            minlength=tf.shape(predictions_dict['prob'])[1])

        # Loss
        # ----
        # >>> Cannot have longer labels than predictions -> error

        with tf.control_dependencies([
                tf.less_equal(sparse_code_target.dense_shape[1],
                              tf.reduce_max(tf.cast(seq_len_inputs, tf.int64)))
        ]):
            loss_ctc = tf.nn.ctc_loss(
                labels=sparse_code_target,
                inputs=predictions_dict['prob'],
                sequence_length=tf.cast(seq_len_inputs, tf.int32),
                preprocess_collapse_repeated=False,
                ctc_merge_repeated=True,
                ignore_longer_outputs_than_inputs=
                True,  # returns zero gradient in case it happens -> ema loss = NaN
                time_major=True)
            loss_ctc = tf.reduce_mean(loss_ctc)
            loss_ctc = tf.Print(loss_ctc, [loss_ctc], message='* Loss : ')

        global_step = tf.train.get_or_create_global_step()
        # # Create an ExponentialMovingAverage object
        ema = tf.train.ExponentialMovingAverage(decay=0.99,
                                                num_updates=global_step,
                                                zero_debias=True)
        # Create the shadow variables, and add op to maintain moving averages
        maintain_averages_op = ema.apply([loss_ctc])
        loss_ema = ema.average(loss_ctc)

        # Train op
        # --------
        if parameters.learning_rate_decay:
            learning_rate = tf.train.exponential_decay(
                parameters.learning_rate,
                global_step,
                parameters.learning_rate_steps,
                parameters.learning_rate_decay,
                staircase=True)
        else:
            learning_rate = tf.constant(parameters.learning_rate)

        if parameters.optimizer == 'ada':
            optimizer = tf.train.AdadeltaOptimizer(learning_rate)
        elif parameters.optimizer == 'momentum':
            optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)
        elif parameters.optimizer == 'adam':
            optimizer = tf.train.AdamOptimizer(
                learning_rate, beta1=0.5,
                epsilon=1e-07)  # at 1e-08 sometimes exploding gradient
        elif parameters.optimizer == 'rms':
            optimizer = tf.train.RMSPropOptimizer(learning_rate)

        if not parameters.train_cnn:
            trainable = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          'deep_bidirectional_lstm')
            print('Training LSTM only')
        else:
            trainable = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        opt_op = optimizer.minimize(loss_ctc,
                                    global_step=global_step,
                                    var_list=trainable)

        with tf.control_dependencies(update_ops + [opt_op]):
            train_op = tf.group(maintain_averages_op)

        # Summaries
        # ---------
        tf.summary.scalar('learning_rate', learning_rate)
        tf.summary.scalar('losses/ctc_loss', loss_ctc)
    else:
        loss_ctc, train_op = None, None

    if mode in [
            tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT,
            tf.estimator.ModeKeys.TRAIN
    ]:
        with tf.name_scope('code2str_conversion'):
            keys = tf.cast(parameters.alphabet_decoding_codes, tf.int64)
            values = [c for c in parameters.alphabet_decoding]
            table_int2str = tf.contrib.lookup.HashTable(
                tf.contrib.lookup.KeyValueTensorInitializer(keys, values), '?')

            sparse_code_pred, log_probability = tf.nn.ctc_beam_search_decoder(
                predictions_dict['prob'],
                sequence_length=tf.cast(seq_len_inputs, tf.int32),
                merge_repeated=False,
                beam_width=100,
                top_paths=parameters.nb_logprob)

            # likelihoood. For future rename it as confidence and take softmax of log_probability
            predictions_dict['score'] = log_probability

            sequence_lengths_pred = [
                tf.bincount(tf.cast(sparse_code_pred[i].indices[:, 0],
                                    tf.int32),
                            minlength=tf.shape(predictions_dict['prob'])[1])
                for i in range(parameters.top_paths)
            ]

            pred_chars = [
                table_int2str.lookup(sparse_code_pred[i])
                for i in range(parameters.top_paths)
            ]

            list_preds = [
                get_words_from_chars(pred_chars[i].values,
                                     sequence_lengths=sequence_lengths_pred[i])
                for i in range(parameters.top_paths)
            ]

            predictions_dict['words'] = tf.stack(list_preds)

            tf.summary.text('predicted_words',
                            predictions_dict['words'][0][:10])

    # Evaluation ops
    # --------------
    if mode == tf.estimator.ModeKeys.EVAL:
        with tf.name_scope('evaluation'):
            CER = tf.metrics.mean(tf.edit_distance(
                sparse_code_pred[0], tf.cast(sparse_code_target,
                                             dtype=tf.int64)),
                                  name='CER')

            # Convert label codes to decoding alphabet to compare predicted and groundtrouth words
            target_chars = table_int2str.lookup(
                tf.cast(sparse_code_target, tf.int64))
            target_words = get_words_from_chars(target_chars.values,
                                                seq_lengths_labels)
            accuracy = tf.metrics.accuracy(target_words,
                                           predictions_dict['words'][0],
                                           name='accuracy')

            eval_metric_ops = {
                'eval/accuracy': accuracy,
                'eval/CER': CER,
            }
            CER = tf.Print(CER, [CER], message='-- CER : ')
            accuracy = tf.Print(accuracy, [accuracy], message='-- Accuracy : ')

    else:
        eval_metric_ops = None

    export_outputs = {
        'predictions': tf.estimator.export.PredictOutput(predictions_dict)
    }

    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=predictions_dict,
                                      loss=loss_ctc,
                                      train_op=train_op,
                                      eval_metric_ops=eval_metric_ops,
                                      export_outputs=export_outputs,
                                      scaffold=tf.train.Scaffold())
Пример #29
0
import tensorflow as tf
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

a = [0, 1, 1, 2, 2, 10]

sess = tf.Session()
print(sess.run(tf.bincount(a)))
Пример #30
0
    def _shadownet_fun(features, labels, mode, params):
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        tower_features = features
        tower_labels = labels
        tower_losses = []
        tower_gradvars = []
        tower_preds = []
        tower_tensor_dict = []
        tower_seq_len = []

        num_devices = num_gpus
        device_type = 'gpu'
        tower_batch_size = int(params.batch_size / num_devices)

        for i in range(num_devices):
            worker_device = '/{}:{}'.format(device_type, i)
            device_setter = local_device_setter(worker_device=worker_device)

            with tf.variable_scope('shadownet', reuse=bool(i != 0)):
                with tf.name_scope('tower_%d' % i) as name_scope:
                    with tf.device(device_setter):
                        loss, gradvars, preds, tensor_dict, seq_len = _tower_fn(
                            is_training, tower_features[i], tower_labels[i],
                            tower_batch_size, params.l_size)
                        tower_losses.append(loss)
                        tower_gradvars.append(gradvars)
                        tower_preds.append(preds)
                        tower_tensor_dict.append(tensor_dict)
                        tower_seq_len.append(seq_len)

                        if i == 0:
                            # Only trigger batch_norm moving mean and variance update from
                            # the 1st tower. Ideally, we should grab the updates from all
                            # towers but these stats accumulate extremely fast so we can
                            # ignore the other stats from the other towers without
                            # significant detriment.
                            update_ops = tf.get_collection(
                                tf.GraphKeys.UPDATE_OPS, name_scope)
        # Now compute global loss and gradients.
        gradvars = []
        with tf.name_scope('gradient_averaging'):
            all_grads = {}
            for grad, var in itertools.chain(*tower_gradvars):
                if grad is not None:
                    all_grads.setdefault(var, []).append(grad)
            for var, grads in six.iteritems(all_grads):
                # Average gradients on the same device as the variables
                with tf.device(var.device):
                    if len(grads) == 1:
                        avg_grad = grads[0]
                    else:
                        avg_grad = tf.multiply(tf.add_n(grads),
                                               1. / len(grads))
                gradvars.append((avg_grad, var))

        # Device that runs the ops to apply global gradient updates.
        consolidation_device = '/gpu:0' if variable_strategy == 'GPU' else '/cpu:0'
        with tf.device(consolidation_device):
            global_step = tf.train.get_global_step()
            starter_learning_rate = params.learning_rate
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       global_step,
                                                       params.decay_steps,
                                                       params.decay_rate,
                                                       staircase=True)
            loss = tf.reduce_mean(tower_losses, name='loss')
            decoded, log_prob = tf.nn.ctc_beam_search_decoder(
                tower_preds[0],
                tower_seq_len[0] * np.ones(tower_batch_size),
                merge_repeated=False)
            sequence_dist = tf.reduce_mean(
                tf.edit_distance(tf.cast(decoded[0], tf.int32),
                                 tower_labels[0]))

            sequence_lengths_pred = tf.bincount(
                tf.cast(decoded[0].indices[:, 0], tf.int32),
                minlength=tf.shape(tower_labels[0])[1])
            label_lengths_pred = tf.bincount(
                tf.cast(labels[0].indices[:, 0], tf.int32),
                minlength=tf.shape(tower_labels[0])[1])

            tensors_to_log = {
                'global_step': global_step,
                'learning_rate': learning_rate,
                'loss': loss
            }
            dist_to_log = {
                'global_step': global_step,
                'learning_rate': learning_rate,
                'loss': loss,
                'train_seq_dist': sequence_dist,
                'sequence_lengths_pred': sequence_lengths_pred,
                'label_lengths_pred': label_lengths_pred
            }

            logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                                      every_n_iter=10)
            dist_hook = tf.train.LoggingTensorHook(tensors=dist_to_log,
                                                   every_n_iter=1000)
            train_hooks = [logging_hook, dist_hook]

            seq_dist_sum = tf.summary.scalar(name='Seq_Dist',
                                             tensor=sequence_dist)
            lr_sum = tf.summary.scalar(name='Learning_rate',
                                       tensor=learning_rate)
            summaries = [seq_dist_sum, lr_sum]

            summary_hook = tf.train.SummarySaverHook(
                save_steps=1000,
                output_dir='/data/output/',
                summary_op=summaries)

            optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate)
            if params.sync:
                optimizer = tf.train.SyncReplicasOptimizer(
                    optimizer, replicas_to_aggregate=num_workers)

                sync_replicas_hook = optimizer.make_session_run_hook(
                    params.is_chief)
                train_hooks.append(sync_replicas_hook)

            # Create single grouped train op
            train_op = [
                optimizer.apply_gradients(
                    gradvars, global_step=tf.train.get_global_step())
            ]
            train_op.extend(update_ops)
            train_op = tf.group(*train_op)

        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op,
                                          training_hooks=train_hooks)
    def should_continue_translating(self, model, stack):
        """
        Returns a bool vector for all hypotheses where True means hypo should be kept, 0 means it should be dropped.
        A hypothesis is dropped if it is either finished or pruned by beam_spread or by beam_size
        Note: this function assumes hypotheses for each input sample are sorted by scores(best first)!!!
        """

        # drop finished hypotheses
        should_keep = tf.logical_not(
            tf.reduce_any(tf.equal(stack.out, model.out_voc.eos), axis=-1))  # [batch_size x beam_size]

        n_hypos = tf.shape(stack.out)[0]
        batch_size = tf.shape(stack.best_out)[0]
        batch_indices = hypo_to_batch_index(n_hypos, stack.slices)

        # prune by length
        if self.max_len is not None:
            within_max_length = tf.less_equal(stack.out_len, self.max_len)

            # if we're given one max_len per each sentence, repeat it for each batch
            if not is_scalar(self.max_len):
                within_max_length = tf.gather(within_max_length, batch_indices)

            should_keep = tf.logical_and(
                should_keep,
                within_max_length,
            )

        # prune by beam spread
        if self.beam_spread is not None:
            best_scores_for_hypos = tf.gather(stack.best_scores, batch_indices)
            pruned_by_spread = tf.less(stack.scores + self.beam_spread, best_scores_for_hypos)
            should_keep = tf.logical_and(should_keep, tf.logical_not(pruned_by_spread))

        if self.beam_spread_raw:
            best_raw_scores_for_hypos = tf.gather(stack.best_raw_scores, batch_indices)
            pruned_by_raw_spread = tf.less(stack.raw_scores + self.beam_spread_raw, best_raw_scores_for_hypos)
            should_keep = tf.logical_and(should_keep,
                                         tf.logical_not(pruned_by_raw_spread))


        # pruning anything exceeding beam_size
        if self.beam_size is not None:
            # This code will use a toy example to explain itself: slices=[0,2,5,5,8], n_hypos=10, beam_size=2
            # should_keep = [1,1,1,0,1,1,1,1,0,1] (two hypotheses have been pruned/finished)

            # 1. compute index of each surviving hypothesis globally over full batch,  [0,1,2,3,3,4,5,6,7,7]
            survived_hypo_id = tf.cumsum(tf.cast(should_keep, 'int32'), exclusive=True)
            # 2. compute number of surviving hypotheses for each batch sample, [2,2,3,1]
            survived_hypos_per_input = tf.bincount(batch_indices, weights=tf.cast(should_keep, 'int32'),
                                                   minlength=batch_size, maxlength=batch_size)
            # 3. compute the equivalent of slices for hypotheses excluding pruned: [0,2,4,4,7]
            slices_exc_pruned = tf.cumsum(survived_hypos_per_input, exclusive=True)
            # 4. compute index of surviving hypothesis within one sample (for each sample)
            # index of input sentence in batch:       inp0  /inp_1\  /inp_2\, /inp_3\
            # index of hypothesis within input:      [0, 1, 0, 1, 1, 0, 1, 2, 0, 0, 1]
            # 'e' = pruned earlier, 'x' - pruned now:         'e'         'x'   'e'
            beam_index = survived_hypo_id - tf.gather(slices_exc_pruned, batch_indices)

            # 5. prune hypotheses with index exceeding beam_size
            pruned_by_beam_size = tf.greater_equal(beam_index, self.beam_size)
            should_keep = tf.logical_and(should_keep, tf.logical_not(pruned_by_beam_size))

        return should_keep
Пример #32
0
def expected_calibration_error(y_true, y_pred, nbins=20):
    """Calculates Expected Calibration Error (ECE).

  ECE is a scalar summary statistic of calibration error. It is the
  sample-weighted average of the difference between the predicted and true
  probabilities of a positive detection across uniformly-spaced model
  confidences [0, 1]. See referenced paper for a thorough explanation.

  Reference:
    Guo, et. al, "On Calibration of Modern Neural Networks"
    Page 2, Expected Calibration Error (ECE).
    https://arxiv.org/pdf/1706.04599.pdf

  This function creates three local variables, `bin_counts`, `bin_true_sum`, and
  `bin_preds_sum` that are used to compute ECE.  For estimation of the metric
  over a stream of data, the function creates an `update_op` operation that
  updates these variables and returns the ECE.

  Args:
    y_true: 1-D tf.int64 Tensor of binarized ground truth, corresponding to each
      prediction in y_pred.
    y_pred: 1-D tf.float32 tensor of model confidence scores in range
      [0.0, 1.0].
    nbins: int specifying the number of uniformly-spaced bins into which y_pred
      will be bucketed.

  Returns:
    value_op: A value metric op that returns ece.
    update_op: An operation that increments the `bin_counts`, `bin_true_sum`,
      and `bin_preds_sum` variables appropriately and whose value matches `ece`.

  Raises:
    InvalidArgumentError: if y_pred is not in [0.0, 1.0].
  """
    bin_counts = metrics_impl.metric_variable([nbins],
                                              tf.float32,
                                              name='bin_counts')
    bin_true_sum = metrics_impl.metric_variable([nbins],
                                                tf.float32,
                                                name='true_sum')
    bin_preds_sum = metrics_impl.metric_variable([nbins],
                                                 tf.float32,
                                                 name='preds_sum')

    with tf.control_dependencies([
            tf.assert_greater_equal(y_pred, 0.0),
            tf.assert_less_equal(y_pred, 1.0),
    ]):
        bin_ids = tf.histogram_fixed_width_bins(y_pred, [0.0, 1.0],
                                                nbins=nbins)

    with tf.control_dependencies([bin_ids]):
        update_bin_counts_op = tf.assign_add(
            bin_counts,
            tf.cast(tf.bincount(bin_ids, minlength=nbins), dtype=tf.float32))
        update_bin_true_sum_op = tf.assign_add(
            bin_true_sum,
            tf.cast(tf.bincount(bin_ids, weights=y_true, minlength=nbins),
                    dtype=tf.float32))
        update_bin_preds_sum_op = tf.assign_add(
            bin_preds_sum,
            tf.cast(tf.bincount(bin_ids, weights=y_pred, minlength=nbins),
                    dtype=tf.float32))

    ece_update_op = _ece_from_bins(update_bin_counts_op,
                                   update_bin_true_sum_op,
                                   update_bin_preds_sum_op,
                                   name='update_op')
    ece = _ece_from_bins(bin_counts, bin_true_sum, bin_preds_sum, name='value')
    return ece, ece_update_op
Пример #33
0
    def __init__(self,
                 preds,
                 labels,
                 model,
                 num_nodes,
                 pos_weight,
                 norm,
                 target_list,
                 global_step,
                 new_learning_rate,
                 if_drop_edge=True):
        """
        The initial functions
        :param preds: it is not used in model
        :param labels: it is not used in model
        :param model: the model built from cdattack.py
        :param num_nodes: the number of the nodes
        :param pos_weight: not used in the model
        :param norm: not used in the model
        :param target_list: the target nodes: core members
        :param global_step: the global learning steps of model
        :param new_learning_rate: teh learning rate
        :param if_drop_edge: if drop the edges when learning the model
        """
        en_preds_sub = preds
        en_labels_sub = labels
        self.opt_op = 0  # this is the minimize function
        self.cost = 0  # this is the loss
        self.accuracy = 0  # this is the accuracy
        self.G_comm_loss = 0
        self.G_comm_loss_KL = 0
        self.num_nodes = num_nodes
        self.if_drop_edge = if_drop_edge
        # this is for vae, it contains two parts of losses:
        self.generate_optimizer = tf.train.RMSPropOptimizer(
            learning_rate=new_learning_rate)
        self.community_optimizer = tf.train.RMSPropOptimizer(
            learning_rate=new_learning_rate)
        generate_varlist = [
            var for var in tf.trainable_variables()
            if ('generate' in var.name) or ('encoder' in var.name)
        ]  # the first part is generator and the second part is community detection
        community_varlist = [
            var for var in tf.trainable_variables() if 'community' in var.name
        ]
        #################### the new G_comm_loss
        for targets in target_list:
            targets_indices = [[x] for x in targets]
            self.G_target_pred = tf.gather_nd(model.vaeD_tilde,
                                              targets_indices)
            ## calculate the KL divergence
            for i in range(len(targets)):
                for j in range(i + 1, len(targets)):
                    if (i == 0) and (j == 1):
                        self.G_comm_loss_KL = -1 * tf.reduce_sum(
                            (self.G_target_pred[i] *
                             tf.log(self.G_target_pred[i] /
                                    self.G_target_pred[j])))
                    else:
                        self.G_comm_loss_KL += -1 * tf.reduce_sum(
                            (self.G_target_pred[i] *
                             tf.log(self.G_target_pred[i] /
                                    self.G_target_pred[j])))
                    # to maximize the KL is to minimize the neg KL
        ######################################################
        ######################################################
        if if_drop_edge == True:
            self.mu = 0
            ## the new G_comm_loss
            for idx, targets in enumerate(target_list):
                target_pred = tf.gather(model.vaeD_tilde, targets)
                max_index = tf.argmax(target_pred, axis=1)
                max_index = tf.cast(max_index, tf.int32)
                if idx == 0:
                    self.mu = ((len(tf.unique(max_index)) - 1) /
                               (np.max([FLAGS.n_clusters - 1, 1]) *
                                (tf.reduce_max(tf.bincount(max_index)))))
                else:
                    self.mu += ((len(tf.unique(max_index)) - 1) /
                                (np.max([FLAGS.n_clusters - 1, 1]) *
                                 (tf.reduce_max(tf.bincount(max_index)))))
            self.mu = tf.cast(self.mu, tf.float32)
            eij = tf.gather_nd(model.x_tilde_deleted,
                               tf.where(model.x_tilde_deleted > 0))
            eij = tf.reduce_sum(tf.log(eij))
            self.G_comm_loss = (
                -1) * self.mu * eij + FLAGS.G_KL_r * self.G_comm_loss_KL
            ######################################################
            # because the generate part is only inner product , there is no variable to optimize, we should change the format and try again
            self.G_min_op = self.generate_optimizer.minimize(
                self.G_comm_loss,
                global_step=global_step,
                var_list=generate_varlist)
        #######################################################
        ## the cutminloss for community detection
        # if it is the modified model
        if if_drop_edge == True:
            A_pool = tf.matmul(
                tf.transpose(tf.matmul(model.adj_ori_dense, model.vaeD_tilde)),
                model.vaeD_tilde)
            num = tf.diag_part(A_pool)

            D = tf.reduce_sum(model.adj_ori_dense, axis=-1)
            D = tf.matrix_diag(D)
            D_pooled = tf.matmul(tf.transpose(tf.matmul(D, model.vaeD_tilde)),
                                 model.vaeD_tilde)
            den = tf.diag_part(D_pooled)
            D_mincut_loss = -(1 / FLAGS.n_clusters) * (num / den)
            D_mincut_loss = tf.reduce_sum(D_mincut_loss)
            ## the orthogonal part loss
            St_S = (FLAGS.n_clusters / self.num_nodes) * tf.matmul(
                tf.transpose(model.vaeD_tilde), model.vaeD_tilde)
            I_S = tf.eye(FLAGS.n_clusters)  # here is I_k
            ortho_loss = tf.square(tf.norm(St_S - I_S))
            ## the overall cutmin_loss
            self.D_mincut_loss = D_mincut_loss + FLAGS.mincut_r * ortho_loss
            ###### at first we need to train the community detection with clean one
            A_pool_clean = tf.matmul(
                tf.transpose(tf.matmul(model.adj_ori_dense,
                                       model.realD_tilde)), model.realD_tilde)
            num_clean = tf.diag_part(A_pool_clean)

            D_clean = tf.reduce_sum(model.adj_ori_dense, axis=-1)
            D_clean = tf.matrix_diag(D_clean)
            D_pooled_clean = tf.matmul(
                tf.transpose(tf.matmul(D_clean, model.realD_tilde)),
                model.realD_tilde)
            den_clean = tf.diag_part(D_pooled_clean)
            D_mincut_loss_clean = -(1 / FLAGS.n_clusters) * (num_clean /
                                                             den_clean)
            D_mincut_loss_clean = tf.reduce_sum(D_mincut_loss_clean)
            ## the orthogonal part loss
            St_S_clean = (FLAGS.n_clusters / self.num_nodes) * tf.matmul(
                tf.transpose(model.realD_tilde), model.realD_tilde)
            I_S_clean = tf.eye(FLAGS.n_clusters)
            ortho_loss_clean = tf.square(tf.norm(St_S_clean - I_S_clean))
            self.D_mincut_loss_clean = D_mincut_loss_clean + FLAGS.mincut_r * ortho_loss_clean
            ########
            self.D_min_op_clean = self.community_optimizer.minimize(
                self.D_mincut_loss_clean,
                global_step=global_step,
                var_list=community_varlist)
        ###################################### the clean community detection model loss ##################
        else:
            A_pool = tf.matmul(
                tf.transpose(tf.matmul(model.adj_ori_dense,
                                       model.realD_tilde)), model.realD_tilde)
            num = tf.diag_part(A_pool)

            D = tf.reduce_sum(model.adj_ori_dense, axis=-1)
            D = tf.matrix_diag(D)
            D_pooled = tf.matmul(tf.transpose(tf.matmul(D, model.realD_tilde)),
                                 model.realD_tilde)
            den = tf.diag_part(D_pooled)
            D_mincut_loss = -(1 / FLAGS.n_clusters) * (num / den)
            D_mincut_loss = tf.reduce_sum(D_mincut_loss)
            ## the orthogonal part loss
            St_S = (FLAGS.n_clusters / self.num_nodes) * tf.matmul(
                tf.transpose(model.realD_tilde), model.realD_tilde)
            I_S = tf.eye(FLAGS.n_clusters)
            ortho_loss = tf.square(tf.norm(St_S - I_S))

            ## the overall cutmin_loss
            self.D_mincut_loss_test = D_mincut_loss + FLAGS.mincut_r * ortho_loss
        ########
        if self.if_drop_edge == False:
            self.D_min_op = self.community_optimizer.minimize(
                self.D_mincut_loss_test,
                global_step=global_step,
                var_list=community_varlist)
        else:
            self.D_min_op = self.community_optimizer.minimize(
                self.D_mincut_loss,
                global_step=global_step,
                var_list=community_varlist)
        ## this part is not correct now
        self.correct_prediction = tf.equal(
            tf.cast(tf.greater_equal(tf.sigmoid(model.realD_tilde), 0.5),
                    tf.int32),
            tf.cast(tf.ones_like(model.realD_tilde), tf.int32))
        self.D_accuracy = tf.reduce_mean(
            tf.cast(self.correct_prediction, tf.float32))
        return
Пример #34
0
def ctc_loss(prob,
             labels,
             input_shape,
             alphabet,
             alphabet_codes,
             batch_size,
             n_pools=2 * 2,
             decode=True):
    # Compute seq_len from image width
    # 2x2 pooling in dimension W on layer 1 and 2 -> n-pools = 2*2
    seq_len_inputs = tf.divide(
        [input_shape[1]] * batch_size, n_pools, name='seq_len_input_op') - 1

    # Get keys (letters) and values (integer stand ins for letters)
    # Alphabet and codes
    keys = [c for c in alphabet]  # the letters themselves
    values = alphabet_codes  # integer representations

    # Create non-string labels from the keys and values above
    # Convert string label to code label
    with tf.name_scope('str2code_conversion'):
        table_str2int = tf.contrib.lookup.HashTable(
            tf.contrib.lookup.KeyValueTensorInitializer(keys, values), -1)
        splited = tf.string_split(
            labels, delimiter=''
        )  # TODO change string split to utf8 split in next tf version
        codes = table_str2int.lookup(splited.values)
        sparse_code_target = tf.SparseTensor(splited.indices, codes,
                                             splited.dense_shape)

    seq_lengths_labels = tf.bincount(tf.cast(sparse_code_target.indices[:, 0],
                                             tf.int32),
                                     minlength=tf.shape(prob)[1])

    # Use ctc loss on probabilities from lstm output
    # Loss
    # ----
    # >>> Cannot have longer labels than predictions -> error
    with tf.control_dependencies([
            tf.less_equal(sparse_code_target.dense_shape[1],
                          tf.reduce_max(tf.cast(seq_len_inputs, tf.int64)))
    ]):
        loss_ctc = tf.nn.ctc_loss(
            labels=sparse_code_target,
            inputs=prob,
            sequence_length=tf.cast(seq_len_inputs, tf.int32),
            preprocess_collapse_repeated=False,
            ctc_merge_repeated=True,
            ignore_longer_outputs_than_inputs=
            True,  # returns zero gradient in case it happens -> ema loss = NaN
            time_major=True)
        loss_ctc = tf.reduce_mean(loss_ctc)
        # loss_ctc = tf.Print(loss_ctc, [loss_ctc], message='* Loss : ')

    if decode:
        with tf.name_scope('code2str_conversion'):
            keys = tf.cast(alphabet_codes, tf.int64)
            values = [c for c in alphabet]

            table_int2str = tf.contrib.lookup.HashTable(
                tf.contrib.lookup.KeyValueTensorInitializer(keys, values), '?')

            sparse_code_pred, log_probability = tf.nn.ctc_beam_search_decoder(
                prob,
                sequence_length=tf.cast(seq_len_inputs, tf.int32),
                merge_repeated=False,
                beam_width=100,
                top_paths=2)
            # Score
            pred_score = tf.subtract(log_probability[:, 0], log_probability[:,
                                                                            1])

            sparse_code_pred = sparse_code_pred[0]

            sequence_lengths_pred = tf.bincount(tf.cast(
                sparse_code_pred.indices[:, 0], tf.int32),
                                                minlength=tf.shape(prob)[1])

            pred_chars = table_int2str.lookup(sparse_code_pred)
            words = get_words_from_chars(
                pred_chars.values, sequence_lengths=sequence_lengths_pred)

            # tf.summary.text('predicted_words', words[:10])

        with tf.name_scope('evaluation'):
            CER = tf.metrics.mean(tf.edit_distance(
                sparse_code_pred, tf.cast(sparse_code_target, dtype=tf.int64)),
                                  name='CER')
            CER = tf.reduce_mean(tf.edit_distance(
                sparse_code_pred, tf.cast(sparse_code_target, dtype=tf.int64)),
                                 name='CER')

            # Convert label codes to decoding alphabet to compare predicted and groundtrouth words
            target_chars = table_int2str.lookup(
                tf.cast(sparse_code_target, tf.int64))
            target_words = get_words_from_chars(target_chars.values,
                                                seq_lengths_labels)
            accuracy = tf.metrics.accuracy(target_words,
                                           words,
                                           name='accuracy')

            # CER = tf.Print(CER, [CER], message='-- CER : ')
            # accuracy = tf.Print(accuracy, [accuracy], message='-- Accuracy : ')
    else:
        CER = None
        accuracy = None

    return loss_ctc, words, pred_score, CER, accuracy