Пример #1
0
 def _fd_conditional(y_true, y_pred):
     # if there are no masks annotations, return 0; else, compute fdl loss
     return tf.cond(
         K.any(K.equal(K.shape(y_true), 0)),
         lambda: K.cast_to_floatx(0.0), lambda: _fd_batch(
             y_true,
             y_pred,
             iou_threshold=self.fdl_iou_threshold,
             parallel_iterations=self.parallel_iterations))
Пример #2
0
    def __init__(self,
                 cost_mat=None,
                 name='WeightedCategoricalCrossentropy',
                 **kwargs):
        assert cost_mat.ndim == 2
        assert cost_mat.shape[0] == cost_mat.shape[1]

        super().__init__(name=name, **kwargs)
        self.cost_mat = K.cast_to_floatx(cost_mat)
Пример #3
0
 def regularization(x):
     l_units = loss_units(x)
     t = x / K.max(K.abs(x))
     p = K.switch(K.less(t, K.epsilon()), K.zeros_like(x), x)
     cost = K.cast_to_floatx(0.)
     cost += K.sum(p) - K.sum(K.square(p)) + 2. * l_units
     # cost += K.sum(p * (1. - p)) + l_units
     # cost += K.sum(K.relu(x - 1.))
     return cost
def iou_loss_core(y_true, y_pred, smooth=1):
    y_true = K.cast_to_floatx(y_true)
    y_true = K.flatten(y_true)
    y_pred = K.flatten(y_pred)
    intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
    union = K.sum(y_true,-1) + K.sum(y_pred,-1) - intersection
    iou = (intersection + smooth) / ( union + smooth)

    return iou
Пример #5
0
    def __init__(self,
                 cost_mat,
                 name='weighted_categorical_crossentropy',
                 **kwargs):
        assert (cost_mat.ndim == 2)
        assert (cost_mat.shape[0] == cost_mat.shape[1])

        super().__init__(name=name, **kwargs)
        self.cost_mat = K.cast_to_floatx(cost_mat)
Пример #6
0
    def cls_loss(y_true, y_pred):
        """
        计算rpn 是否有物体的loss,可以直接用交叉熵计算损失,但这里计算
        :param y_true: 真实值 [batch_size, num_anchor, 1]
        :param y_pred: 预测值 [batch_size, num_anchor, 1]
        :return: rpn cls_loss
        """

        label_true = y_true[:, :, -1]       # 取出真实值的label rpn的label 1是物体,0是背景,-1是需要忽略的
        label_pred = y_pred

        # 找出存在目标的先验框 有目标是1
        indices_for_object = tf.where(backend.equal(label_true, 1))     # 如果x,y都为None,就返回condition的坐标
        labels_for_object = tf.gather_nd(y_true, indices_for_object)    # 根据indices选取真实值标签
        classification_for_object = tf.gather_nd(label_pred, indices_for_object)    # 选取预测值标签

        cls_loss_for_object = backend.binary_crossentropy(labels_for_object, classification_for_object)

        # 找出实际上为背景的先验框 没有目标是0
        indices_for_back = tf.where(backend.equal(label_true, 0))
        labels_for_back = tf.gather_nd(y_true, indices_for_back)
        classification_for_back = tf.gather_nd(label_pred, indices_for_back)

        # 计算每一个先验框应该有的权重
        cls_loss_for_back = backend.binary_crossentropy(labels_for_back, classification_for_back)

        # 标准化,计算是正样本的数量
        normalizer_pos = tf.where(backend.equal(label_true, 1))
        normalizer_pos = backend.cast(backend.shape(normalizer_pos)[0], 'float32')
        normalizer_pos = backend.maximum(backend.cast_to_floatx(1.0), normalizer_pos)

        # 计算负样本的数量
        normalizer_neg = tf.where(backend.equal(label_true, 0))
        normalizer_neg = backend.cast(backend.shape(normalizer_neg)[0], 'float32')
        normalizer_neg = backend.maximum(backend.cast_to_floatx(1.0), normalizer_neg)

        # 将所获得的loss除上样本的数量
        cls_loss_for_object = backend.sum(cls_loss_for_object) / normalizer_pos         # 物体的loss
        cls_loss_for_back = ratio * backend.sum(cls_loss_for_back) / normalizer_neg     # 背景的loss

        # 总的loss
        loss = cls_loss_for_object + cls_loss_for_back

        return loss
Пример #7
0
def test_quantized_sigmoid(bits, sigmoid_type, use_real_sigmoid, test_values,
                           expected_values):
    """Test quantized_sigmoid function with three different sigmoid variants."""
    # store previous sigmoid type
    if quantized_sigmoid(4)(K.cast_to_floatx([1.0])).numpy()[0] == 1.0:
        previous_sigmoid = "hard"
    elif quantized_sigmoid(4)(K.cast_to_floatx([2.5])).numpy()[0] == 1.0:
        previous_sigmoid = "smooth"
    else:
        previous_sigmoid = "real"

    set_internal_sigmoid(sigmoid_type)
    x = K.placeholder(ndim=2)
    f = K.function(
        [x], [quantized_sigmoid(bits, use_real_sigmoid=use_real_sigmoid)(x)])
    set_internal_sigmoid(previous_sigmoid)

    result = f([test_values])[0]
    assert_allclose(result, expected_values, rtol=1e-05)
Пример #8
0
    def multi_track_prf_fn(y_true, y_probs):
        flat_y_true, flat_y_predictions = (
            _convert_to_multi_instrument_predictions(
                y_true, y_probs, threshold, multiple_instruments_threshold,
                hparams))

        flat_y_predictions = K.cast_to_floatx(flat_y_predictions)
        flat_y_true = K.cast_to_floatx(flat_y_true)
        ignoring_melodic = (flat_y_predictions *
                            K.expand_dims(K.flatten(get_last_channel(y_true))))
        individual_sums = K.sum(K.cast(ignoring_melodic, 'int32'), 0)

        print(
            f'num_agnostic: '
            f'{K.sum(K.cast_to_floatx(get_last_channel(y_probs) > threshold))}'
        )
        print(
            f'true_num_agnostic: {K.sum(K.cast_to_floatx(get_last_channel(y_true) > 0))}'
        )
        print(
            f'both: '
            f"""{K.sum(K.cast_to_floatx(get_last_channel(y_probs) > threshold)
                     * K.cast_to_floatx(get_last_channel(y_true) > 0))}""")
        print(f'total predicted {K.sum(individual_sums)}')
        if print_report:
            print(
                classification_report(flat_y_true,
                                      ignoring_melodic,
                                      digits=4,
                                      zero_division=0))
            print([f'{i}:{x}' for i, x in enumerate(individual_sums)])

        # Definitely don't use macro accuracy here
        # because some instruments won't be present.
        precision, recall, f1, _ = (precision_recall_fscore_support(
            flat_y_true, ignoring_melodic, average='weighted',
            zero_division=0))
        scores = {
            'precision': K.constant(precision),
            'recall': K.constant(recall),
            'f1_score': K.constant(f1)
        }
        return scores
Пример #9
0
def load_hits_padded(n_samples_by_class=12500 * 2):
  data_path = os.path.join(PROJECT_PATH, '..', 'datasets',
                           'HiTS2013_300k_samples.pkl')
  params = {
    param_keys.DATA_PATH_TRAIN: data_path,
    param_keys.BATCH_SIZE: 50
  }
  hits_loader = HiTSLoader(params, label_value=-1,
                           first_n_samples_by_class=n_samples_by_class)

  (X_train, y_train), (X_test, y_test) = hits_loader.load_data()

  X_train = normalize_hits_minus1_1(
      cast_to_floatx(
          np.pad(X_train, ((0, 0), (6, 5), (6, 5), (0, 0)), 'constant')))
  X_test = normalize_hits_minus1_1(
      cast_to_floatx(
          np.pad(X_test, ((0, 0), (6, 5), (6, 5), (0, 0)), 'constant')))
  return (X_train, y_train), (X_test, y_test)
Пример #10
0
def CalcPed(inputs, vmin=-30.0, vmax=100.0):

    # Calculate mask of values in the inputs that fall between the minimum and maximum
    min_mask = vmin * K.ones_like(inputs)
    min_mask = K.cast_to_floatx(K.greater(inputs, min_mask))
    max_mask = vmax * K.ones_like(inputs)
    max_mask = K.cast_to_floatx(K.less(inputs, max_mask))

    # Combine upper and lower limit masks into single mask of values to average and apply
    mask = min_mask * max_mask
    sparse_waveforms = inputs * mask

    # Find average of elements not filtered by mask
    Nvals = K.sum(mask, axis=1)
    Sum = K.sum(sparse_waveforms, axis=1)

    avg = tf.math.divide_no_nan(Sum, Nvals)
    ped = K.expand_dims(avg)

    return -ped  # minus sign  is because waveform is sign inverted
Пример #11
0
def WAVCategoricalCrossentropy(l, embedding_matrix):
    emb_matrix = K.cast_to_floatx(embedding_matrix)
    emb_matrix = K.expand_dims(emb_matrix, 0)

    def loss(y_true, y_pred):
        t_vector = K.dot(y_true, emb_matrix)
        p_vector = K.dot(y_pred, emb_matrix)
        wav_term = l * tf.norm(p_vector - t_vector)
        return K.categorical_crossentropy(y_true, y_pred) + wav_term

    return loss
Пример #12
0
 def _get_update_list(self, kernel):
     super(E2EFSRanking, self)._get_update_list(kernel)
     self.moving_factor.assign(
         K.switch(
             K.less(self.moving_T, self.warmup_T), self.start_alpha,
             K.minimum(
                 self.alpha_M, self.start_alpha + (1. - self.start_alpha) *
                 (self.moving_T - self.warmup_T) / self.T)))
     self.moving_T.assign_add(1.)
     self.moving_units.assign(
         K.switch(
             K.less_equal(self.moving_T, self.warmup_T),
             K.cast_to_floatx(
                 (1. - self.start_alpha) * np.prod(K.int_shape(kernel))),
             K.maximum(
                 self.alpha_M,
                 np.prod(K.int_shape(kernel)) * K.pow(
                     K.cast_to_floatx(1. / np.prod(K.int_shape(kernel))),
                     self.speedup *
                     (self.moving_T - self.warmup_T) / self.T))))
Пример #13
0
    def timbre_loss_fn(y_true, y_probs):
        # Permute to: num_instruments, batch.
        permuted_y_true = K.transpose(
            K.reshape(K.cast_to_floatx(y_true), (-1, y_true.shape[-1])))
        permuted_y_probs = K.transpose(
            K.reshape(y_probs, (-1, y_probs.shape[-1])))
        permuted_y_probs = (
            permuted_y_probs *
            K.expand_dims(K.cast_to_floatx(K.sum(permuted_y_true, 0) > 0), 0))
        loss_list = []

        for instrument_idx in range(hparams.timbre_num_classes):
            loss_list.append(
                _get_instrument_loss(permuted_y_true,
                                     permuted_y_probs[instrument_idx],
                                     instrument_idx,
                                     hparams=hparams,
                                     recall_weighing=recall_weighing))

        return tf.reduce_mean(loss_list)
Пример #14
0
    def __init__(self, name='WeightedCategoricalCrossentropy', **kwargs):
        # assert cost_mat.ndim == 2
        # assert cost_mat.shape[0] == cost_mat.shape[1]

        self.cost_mat = np.ones((3, 3))
        minor_cost = 1.5
        self.cost_mat[0, 1] = minor_cost
        self.cost_mat[0, 2] = minor_cost
        self.cost_mat[1, 0] = minor_cost
        self.cost_mat[2, 0] = minor_cost

        super().__init__(name=name, **kwargs)
        self.cost_mat = K.cast_to_floatx(self.cost_mat)
Пример #15
0
 def _get_update_list(self, kernel):
     update_list = super(E2EFSRanking, self)._get_update_list(kernel)
     update_list += [
         (self.moving_factor, K.switch(K.less_equal(self.moving_T, self.warmup_T),
                                       self.start_alpha,
                                       K.minimum(self.alpha_M, self.start_alpha + (1. - self.start_alpha) * (self.moving_T - self.warmup_T) / self.T))),
         (self.moving_T, self.moving_T + 1),
         (self.moving_units, K.switch(K.less_equal(self.moving_T, self.warmup_T),
                                      K.cast_to_floatx((1. - self.start_alpha) * np.prod(K.int_shape(kernel))),
                                      K.maximum(self.alpha_M, np.prod(K.int_shape(kernel)) * K.pow(K.cast_to_floatx(1. / np.prod(K.int_shape(kernel))), self.speedup * (self.moving_T - self.warmup_T) / self.T)))),
                                      # K.maximum(1., (self.T - self.start_alpha - self.speedup * (self.moving_T - self.warmup_T)) * np.prod(K.int_shape(kernel)) / self.T))),
     ]
     return update_list
Пример #16
0
def load_hits1c(n_samples_by_class=10000, test_size=0.20, val_size=0.10,
    return_val=False, channels_to_get=[2]):  #
  data_path = os.path.join(PROJECT_PATH, '..', 'datasets',
                           'HiTS2013_300k_samples.pkl')
  params = {
    param_keys.DATA_PATH_TRAIN: data_path,
    param_keys.BATCH_SIZE: 50
  }
  hits_loader = HiTSLoader(params, label_value=-1,
                           first_n_samples_by_class=n_samples_by_class,
                           test_size=test_size, validation_size=val_size,
                           channels_to_get=channels_to_get)

  (X_train, y_train), (X_val, y_val), (X_test, y_test) = hits_loader.load_data()

  X_train = normalize_hits_minus1_1(cast_to_floatx(X_train))
  X_val = normalize_hits_minus1_1(cast_to_floatx(X_val))
  X_test = normalize_hits_minus1_1(cast_to_floatx(X_test))

  if return_val:
    return (X_train, y_train), (X_val, y_val), (X_test, y_test)
  return (X_train, y_train), (X_test, y_test)
Пример #17
0
  def call(self, inputs):
    """Performs quantized AveragePooling followed by QActivation.

    Since there is no specific parameter for averaging op, we couldn't apply
    averaging quantizer to the averaging op. We have two options:
    1. we perform our own average as sum first then multiply with the
       inversion
       of the division factor: sum(x) * quantize(1/pool_area)
    2. first, we call keras version of averaging first: y1 = keras_average(x)
       then multiply it with pool_size^2: y2 = y1 * pool_area
       Last, y3 = y2 * quantize(1/ pool_area)
    3. Improved based on #2, but multiply x with pool_area before averaging
       so that we don't lose precision during averaging. The order now becomes:
       first, multiply x with pool_area: y1 = x * pool_area
       then we call keras version of averaging: y2 = keras_average(y1)
       Last, y3 = y2 * quantize(1/ pool_area)
    4. Since there is sum_pooling operation, another solution is to use
       depthwise_conv2d with kernel weights = 1 to get the pooling sum. In this
       case we don't lose precision due to averaging. However, this solution
       will introduce extra weights to the layer, which might break our code
       elsewhere.

    Since we need to match software and hardware inference numerics, we are now
    using #3 in the implementation.
    """

    if self.average_quantizer:
      # Calculates the pool area
      if isinstance(self.pool_size, int):
        pool_area = self.pool_size * self.pool_size
      else:
        pool_area = np.prod(self.pool_size)

      # Calculates the pooling average of x*pool_area
      x = super(QAveragePooling2D, self).call(inputs*pool_area)

      # Quantizes the multiplication factor.
      mult_factor = 1.0 / pool_area
      q_mult_factor = self.average_quantizer_internal(mult_factor)
      q_mult_factor = K.cast_to_floatx(q_mult_factor)

      # Computes pooling average.
      x = x * q_mult_factor

    else:
      # Since no quantizer is available, we directly call the keras layer
      x = super(QAveragePooling2D, self).call(inputs)

    if self.activation is not None:
      return self.activation(x)
    return x
Пример #18
0
def get_croppings_for_single_image(conv_output, note_croppings,
                                   hparams=None, temporal_scale=1.0):
    """Separate the note regions for an individual spectrogram.
    A high-pass filter removes the effect of values below
    the fundamental frequency.
    """
    num_notes = K.int_shape(note_croppings)[0]
    pitch_idx_fn = functools.partial(
        get_cqt_index if hparams.timbre_spec_type == 'cqt' else get_mel_index,
        hparams=hparams)
    pitch_to_spec_index = tf.map_fn(
        pitch_idx_fn,
        tf.gather(note_croppings, indices=0, axis=1))
    gathered_pitches = (K.cast_to_floatx(pitch_to_spec_index)
                        * K.int_shape(conv_output)[1]
                        / constants.TIMBRE_SPEC_BANDS)
    pitch_mask = K.expand_dims(
        K.cast(tf.where(tf.sequence_mask(
            K.cast(
                gathered_pitches, dtype='int32'
            ), K.int_shape(conv_output)[1]
            # Don't lose gradient completely so multiply by 2e-3.
        ), 2e-3, 1), tf.float32), -1)

    trimmed_list = []
    start_idx = K.cast(
        tf.gather(note_croppings, indices=1, axis=1)
        / hparams.timbre_hop_length
        / temporal_scale, dtype='int32'
    )
    end_idx = K.cast(
        tf.gather(note_croppings, indices=2, axis=1)
        / hparams.timbre_hop_length
        / temporal_scale, dtype='int32'
    )
    for i in range(num_notes):
        if end_idx[i] < 0:
            # This must be a padded value note.
            trimmed_list.append(
                np.zeros(shape=(1, K.int_shape(conv_output)[1], K.int_shape(conv_output)[2]),
                         dtype=K.floatx()))
        else:
            trimmed_spec = conv_output[min(start_idx[i], K.int_shape(conv_output)[0] - 1)
                                       :max(end_idx[i], start_idx[i] + 1)]
            max_pool = K.max(trimmed_spec, 0)
            trimmed_list.append(K.expand_dims(max_pool, 0))

    broadcasted_spec = K.concatenate(trimmed_list, axis=0)

    mask = broadcasted_spec * pitch_mask
    return normalize_and_weigh(mask, num_notes, gathered_pitches, hparams)
Пример #19
0
def gdice(y_true, y_pred):
    '''
    Args:
        y_true: label map of size B x H x W x 1
        y_pred: feature map of size B x H x W x C, 'softmax' activated
    '''
    y_true_onehot = tf.cast(tf.squeeze(y_true, axis=-1), tf.int32)
    y_true_onehot = K.cast_to_floatx(K.one_hot(y_true_onehot, y_pred.shape[-1]))
    y_pred = K.cast_to_floatx(y_pred)

    w = tf.reduce_sum(y_true_onehot, axis=[1, 2])
    # w = 1 / (w + 1)
    w = 1 / (w ** 2 + K.epsilon())
    w = tf.stop_gradient(w)

    numerator = tf.reduce_sum(y_true_onehot * y_pred, axis=[1, 2])
    numerator = w * numerator

    denominator = tf.reduce_sum(y_true_onehot + y_pred, axis=[1, 2])
    denominator = w * denominator

    dice_loss = 1 - (2 * tf.reduce_sum(numerator, axis=1) + 1)/ (tf.reduce_sum(denominator, axis=1) + 1)
    return tf.reduce_mean(dice_loss)
Пример #20
0
def normalize_and_weigh(inputs, num_notes, pitches, hparams):
    """Decrease the values higher above the fundamental frequency."""
    gradient_pitch_mask = 1 + K.int_shape(inputs)[-2] - K.arange(K.int_shape(inputs)[-2])
    gradient_pitch_mask = gradient_pitch_mask / K.max(gradient_pitch_mask)
    gradient_pitch_mask = K.expand_dims(K.cast_to_floatx(gradient_pitch_mask), 0)
    gradient_pitch_mask = tf.repeat(gradient_pitch_mask, axis=0, repeats=num_notes)
    gradient_pitch_mask = (gradient_pitch_mask
                           + K.expand_dims(pitches / K.int_shape(inputs)[-2], -1))
    exp = (math.log(hparams.timbre_gradient_exp) if hparams.timbre_spec_log_amplitude
           else hparams.timbre_gradient_exp)
    gradient_pitch_mask = tf.minimum(gradient_pitch_mask ** exp, 1.0)
    gradient_pitch_mask = K.expand_dims(gradient_pitch_mask, -1)
    gradient_product = inputs * gradient_pitch_mask
    return gradient_product
Пример #21
0
def WAVSparseCategoricalCrossentropy(l, embedding_matrix):
    emb_matrix = K.cast_to_floatx(embedding_matrix)
    emb_matrix = K.expand_dims(emb_matrix, 0)

    def loss(y_true, y_pred):
        y_true = K.cast(
            y_true, dtype='int32'
        )  # cast needed for some reason, even though the output of the dataset is int32
        t_vector = tf.gather_nd(emb_matrix, y_true)
        p_vector = K.dot(y_pred, emb_matrix)
        wav_term = l * tf.norm(p_vector - t_vector)
        return K.sparse_categorical_crossentropy(y_true, y_pred) + wav_term

    return loss
    def get_constants(self, inputs, training=None):
        constants = []
        if self.implementation != 0 and 0 < self.dropout < 1:
            input_shape = K.int_shape(inputs)
            input_dim = input_shape[-1]
            ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
            ones = K.tile(ones, (1, int(input_dim)))

            def dropped_inputs():
                return K.dropout(ones, self.dropout)

            dp_mask = [
                K.in_train_phase(dropped_inputs, ones, training=training)
                for _ in range(4)
            ]
            constants.append(dp_mask)
        else:
            constants.append([K.cast_to_floatx(1.) for _ in range(4)])

        if 0 < self.recurrent_dropout < 1:
            ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
            ones = K.tile(ones, (1, self.units))

            def dropped_inputs():
                return K.dropout(ones, self.recurrent_dropout)

            rec_dp_mask = [
                K.in_train_phase(dropped_inputs, ones, training=training)
                for _ in range(4)
            ]
            constants.append(rec_dp_mask)
        else:
            constants.append([K.cast_to_floatx(1.) for _ in range(4)])

        # append the input as well for use later
        constants.append(inputs)
        return constants
Пример #23
0
def compute_fd_loss(boxes, scores, annotations, iou_threshold=0.75):
    """compute the overlap of boxes with annotations"""
    iou = overlap(boxes, annotations)

    max_iou = K.max(iou, axis=1, keepdims=True)
    targets = K.cast(K.greater_equal(max_iou, iou_threshold), K.floatx())

    # compute the loss
    loss = focal(targets, scores)  # alpha=self.alpha, gamma=self.gamma)

    # compute the normalizer: the number of cells present in the image
    normalizer = K.cast(K.shape(annotations)[0], K.floatx())
    normalizer = K.maximum(K.cast_to_floatx(1.0), normalizer)

    return K.sum(loss) / normalizer
Пример #24
0
    def PretrainedEmbedding(self):

        inputs = Input(shape=(None, ), dtype='int32')
        embeddings = KeyedVectors.load_word2vec_format(
            self.word_embedding_path, binary=False)
        word_embeddings_weights = K.cast_to_floatx(
            np.concatenate((np.zeros((1, embeddings.syn0.shape[-1]),
                                     dtype=np.float32), embeddings.syn0),
                           axis=0))
        embeds = Embedding(len(word_embeddings_weights),
                           word_embeddings_weights.shape[-1],
                           weights=[word_embeddings_weights],
                           trainable=False)(inputs)

        return Model(inputs=inputs, outputs=embeds, name='embedding')
Пример #25
0
 def _split_and_predict(self, melodic_spec, timbre_spec,
                        present_instruments, duration=16):
     samples_length = duration * self.hparams.sample_rate
     frames, onsets, offsets = None, None, None
     melodic_spec_len = K.int_shape(melodic_spec)[1]
     timbre_spec_len = K.int_shape(timbre_spec)[1]
     # Remove edge predictions when splitting because they
     # are less likely to be correct than predictions in the middle.
     #
     # We pad each split and take the middle predictions to allow
     # our conv kernels to work properly.
     edge_spacing = 16
     for i in range(0, (K.int_shape(melodic_spec)[1]
                        * self.hparams.spec_hop_length),
                    samples_length):
         m_start = int(i / self.hparams.spec_hop_length)
         m_end = min(melodic_spec_len,
                     int(edge_spacing * 2 + (i + samples_length)
                         / self.hparams.spec_hop_length))
         # Timbre hop-length is 256, as opposed to 512, so the
         # space must be doubled.
         t_start = int(i / self.hparams.timbre_hop_length)
         t_end = min(timbre_spec_len,
                     int(edge_spacing * 4 + (i + samples_length)
                         / self.hparams.timbre_hop_length))
         split_pred = self.model.call([
             K.expand_dims(melodic_spec[0, m_start:m_end], 0),
             K.expand_dims(timbre_spec[0, t_start:t_end], 0),
             K.cast_to_floatx(present_instruments)],
             training=False)
         if i == 0:
             frames = split_pred[0][0][:-edge_spacing]
             onsets = split_pred[1][0][:-edge_spacing]
             offsets = split_pred[2][0][:-edge_spacing]
         else:
             # Ignore the edge temporal info.
             frames = np.concatenate(
                 [frames, split_pred[0][0][edge_spacing:-edge_spacing]],
                 axis=0)
             onsets = np.concatenate(
                 [onsets, split_pred[1][0][edge_spacing:-edge_spacing]],
                 axis=0)
             offsets = np.concatenate(
                 [offsets, split_pred[2][0][edge_spacing:-edge_spacing]],
                 axis=0)
     return [np.expand_dims(frames, 0),
             np.expand_dims(onsets, 0),
             np.expand_dims(offsets, 0)]
Пример #26
0
    def preprocess_group_entry(self, image, annotations):
        """ Preprocess image and its annotations.
        """
        # preprocess the image
        image = self.preprocess_image(image)

        # resize image
        image, image_scale = self.resize_image(image)

        # apply resizing to annotations too
        annotations['bboxes'] *= image_scale

        # convert to the wanted keras floatx
        image = K.cast_to_floatx(image)

        return image, annotations
Пример #27
0
    def predict(self, image, keep_size=True):

        sz = image.shape
        # model inference
        img = np.squeeze(image)
        img = image_resize_np([img], (self.config.H, self.config.W))
        img = K.cast_to_floatx(img)
        raw = self.model(img)
        raw = {m: o for m, o in zip(self.config.modules, raw)}
        # post processing
        instances = self.postprocess(raw)
        # resize to original resolution
        if keep_size:
            instances = cv2.resize(instances, (sz[1], sz[0]),
                                   interpolation=cv2.INTER_NEAREST)

        return instances, raw
Пример #28
0
def read_data(data_path, placeholder, max_input_length=500):
    logger.info("Logging data {}...".format(placeholder))

    sf = SingleFile(data_path)
    data = []
    all_point_number = 500
    # all_point_number = sf.point_number
    with tqdm(total=all_point_number) as pbar:
        pbar.set_description("Reading {}".format(placeholder))
        for i in range(all_point_number):
            point = sf.get_one_point()
            x_data, y_data = point.get_data()
            output = data_handle(x_data, y_data, MAX_INPUT_LENGTH)
            data.append(output.tolist())
            pbar.update(1)

    return K.cast_to_floatx(np.array(data))
Пример #29
0
    def predict_multi_sequence(self, melodic_spec, timbre_spec,
                               present_instruments=None, qpm=None):
        if present_instruments is None:
            present_instruments = K.expand_dims(np.ones(self.hparams.timbre_num_classes), 0)
        y_pred = self._split_and_predict(melodic_spec, timbre_spec, present_instruments)
        multi_track_prf_wrapper(
            threshold=self.hparams.predict_frame_threshold,
            multiple_instruments_threshold=self.hparams.multiple_instruments_threshold,
            hparams=self.hparams, print_report=True, only_f1=False)(
            K.cast_to_floatx(y_pred[1] > self.hparams.predict_frame_threshold), y_pred[1])
        permuted_y_probs = K.permute_dimensions(y_pred[1][0], (2, 0, 1))
        print(f'total mean: '
              f'{[f"{i}:{K.max(permuted_y_probs[i])}" for i, x in enumerate(permuted_y_probs)]}')

        frame_predictions = convert_multi_instrument_probs_to_predictions(
            y_pred[0],
            self.hparams.predict_frame_threshold,
            self.hparams.multiple_instruments_threshold)[0]
        onset_predictions = convert_multi_instrument_probs_to_predictions(
            y_pred[1],
            self.hparams.predict_onset_threshold,
            self.hparams.multiple_instruments_threshold)[0]
        offset_predictions = convert_multi_instrument_probs_to_predictions(
            y_pred[2],
            self.hparams.predict_offset_threshold,
            self.hparams.multiple_instruments_threshold)[0]
        active_onsets = convert_multi_instrument_probs_to_predictions(
            y_pred[1],
            self.hparams.active_onset_threshold,
            self.hparams.multiple_instruments_threshold)[0]

        if self.hparams.use_all_instruments:
            # Mute the instruments we don't want here.
            # If this happens we are trying to isolate certain
            # instruments, knowing there may be others.
            frame_predictions = tf.logical_and(frame_predictions, present_instruments > 0)
            onset_predictions = tf.logical_and(onset_predictions, present_instruments > 0)
            offset_predictions = tf.logical_and(offset_predictions, present_instruments > 0)
            active_onsets = tf.logical_and(active_onsets, present_instruments > 0)

        return sequence_prediction_util.predict_multi_sequence(
            frame_predictions, onset_predictions,
            offset_predictions, active_onsets,
            qpm=qpm, hparams=self.hparams,
            min_pitch=constants.MIN_MIDI_PITCH)
Пример #30
0
    def _cls_loss(y_true, y_pred):
        #   y_true,y_pred shape=[batch_size, num_anchor, 1]
        #   获得无需忽略的所有样本,-1 是需要忽略的, 0 是背景, 1 是存在目标
        no_ignore_mask = tf.where(K.not_equal(y_true, -1))
        true_label = tf.gather_nd(y_true, no_ignore_mask)
        predict_label = tf.gather_nd(y_pred, no_ignore_mask)

        cls_loss = K.binary_crossentropy(true_label, predict_label)
        cls_loss = K.sum(cls_loss)

        #   进行标准化
        normalizer_no_ignore = K.cast(K.shape(no_ignore_mask)[0], K.floatx())
        normalizer_no_ignore = K.maximum(K.cast_to_floatx(1.0),
                                         normalizer_no_ignore)

        # 总的loss
        loss = cls_loss / normalizer_no_ignore
        return loss