Пример #1
0
    def attention(query, key, value):
        dk = C.reduce_sum(C.ones_like(query))  # cannot use sequence.last, will conflict with recurrence
        # dk: [#, *] [1, ] and value = int(dim_of_query)

        unpacked_key = C.sequence.unpack(key, padding_value=0, no_mask_output=True)  # [#] [-3, key_dim]
        unpacked_value = C.sequence.unpack(value, padding_value=0, no_mask_output=True)  # [#] [-3, value_dim]

        broadcasted_key = C.sequence.broadcast_as(unpacked_key, query)  # [#, *] [-3, key_dim]
        scaled = C.times_transpose(query, broadcasted_key) / dk
        # [#, *] [q_dim] @ [#, *] [key_dim, -3], assert q_dim == key_dim
        # scaled: [#, *] [-3, ] => for every key seq element, there is a corresponding score

        # masked out invalid temporal connections to obey_sequence_order
        if obey_sequence_order and max_seq_len:
            unpacked_scaled, scaled_mask = C.sequence.unpack(scaled, padding_value=0).outputs
            # unpacked_scaled: [#] [-3, -3]  <== matrix will be top right diagonally zero-ed
            # scaled_mask: [#] [-3,]

            minus_inf = C.constant(-1e+30)
            valid_connections = C.Constant(np.tril(np.ones((max_seq_len, max_seq_len)), k=0))  # [] [max_seq, max_seq]
            valid_connections = C.reconcile_dynamic_axes(valid_connections, unpacked_scaled)  # [#] [max_seq, max_seq]
            valid_connections = C.crop_manual(valid_connections, unpacked_scaled, 0, 0)  # [#] [-3, -3]
            unpacked_scaled = C.element_select(valid_connections, unpacked_scaled, minus_inf)  # [#] [-3, -3]
            scaled = C.to_sequence_like(unpacked_scaled, query)  # [#, *] [-3]

        elif obey_sequence_order and not max_seq_len:
            raise ValueError("max_seq_len must be defined when obey_sequence_order is True")

        attended = C.times(C.softmax(scaled, axis=-1), C.sequence.broadcast_as(unpacked_value, query))  # [#, *] [value_dim,]
        return attended
Пример #2
0
def test_auto_broadcast_reconcile_issue():
    x = C.sequence.input((3, ), name='x')
    y = C.input((3, ), name='y')
    y2 = C.reconcile_dynamic_axes(y, x)
    inputs = y2.owner.inputs
    # check does the reconcile_dynamic_axes call trigger the auto broadcast
    assert len(inputs) == 2
    assert inputs[0].name == 'y' and inputs[1].name == 'x'
Пример #3
0
def test_auto_broadcast_reconcile_issue():
    x = C.sequence.input((3,), name='x')
    y = C.input((3,), name='y')
    y2 = C.reconcile_dynamic_axes(y, x)
    inputs = y2.owner.inputs
    # check does the reconcile_dynamic_axes call trigger the auto broadcast
    assert len(inputs) == 2
    assert inputs[0].name == 'y' and inputs[1].name == 'x'
Пример #4
0
def broadcast_xy(input_vec, h, w):
    """ broadcast input vector of length d to tensor (d x h x w) """
    assert(h > 0 and w > 0)
    d = input_vec.shape[0]
    # reshape vector to d x 1 x 1
    x = C.reshape(input_vec, (d, 1, 1))
    # create a zeros-like tensor of size (d x h x w)
    t = np.zeros((d, h, w), dtype=np.float32)
    y = C.constant(t)
    z = C.reconcile_dynamic_axes(y, x)
    z = z + x
    return z
Пример #5
0
def test_to_sequence_backprop(device_id):
    dev = cntk_device(device_id)
    input_vocab_size=3
    emb_dim = 2
    hidden_dim = 2
    num_labels = 2
    x_seq_input = C.sequence.input_variable(input_vocab_size, is_sparse=True, name='features')
    with C.default_options(initial_state=0.1):
        model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.layers.Dense(num_labels, name='classify')(model)

    z = model
    label_seq_input = C.sequence.input_variable(num_labels, is_sparse=True, name='labels')
    ce = C.cross_entropy_with_softmax(z, label_seq_input)

    seq1_data = [[0, 1, 1], [0, 1, 0], [1, 0, 0]]
    seq2_data = [[0, 0, 1], [0, 1, 1]]
    seq1_label_data = [[0, 1], [0, 1], [1, 0]]
    seq2_label_data = [[1, 0], [0, 1]]
    label_seq_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data)]
    param_grads_1, loss_result_1 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_seq_input : label_seq_data},
                                           wrt=ce.parameters, outputs=[ce], as_numpy=False)

    # Create a clone of the model that uses a non-sequence input
    # and converts it to a sequence using to_sequence
    x_non_seq_input = C.input_variable((C.FreeDimension, input_vocab_size), is_sparse=True, name='non_seq_features')
    x_seq_lens = C.input_variable((), name='sequence_lengths')
    x_seq = C.to_sequence(x_non_seq_input, x_seq_lens)
    x_seq = C.reconcile_dynamic_axes(C.times(x_seq, np.eye(input_vocab_size, dtype=np.float32)), label_seq_input)
    ce_clone = ce.clone('share', {x_seq_input : x_seq})

    x_non_seq_data = C.NDArrayView.from_csr(_to_csr([seq1_data, seq2_data + [[0, 0, 0]]]), shape=(2, 3, 3))
    x_seq_lens_data = np.asarray([3, 2], dtype=np.float32)

    x_non_seq_input = next(argument for argument in ce_clone.arguments if argument.name == 'non_seq_features')
    label_seq_input = next(argument for argument in ce_clone.arguments if argument.name == 'labels')
    x_seq_lens = next(argument for argument in ce_clone.arguments if argument.name == 'sequence_lengths')
    param_grads_2, loss_result_2 = ce_clone.grad({x_non_seq_input : x_non_seq_data, x_seq_lens : x_seq_lens_data, label_seq_input : label_seq_data},
                                                 wrt=ce_clone.parameters, outputs=[ce_clone], as_numpy=False)


    assert np.array_equal(loss_result_1.as_sequences()[0], loss_result_2.as_sequences()[0])
    assert np.array_equal(loss_result_1.as_sequences()[1], loss_result_2.as_sequences()[1])

    for param in param_grads_1:
        if not param_grads_1[param].is_sparse:
            reference_grad_value = param_grads_1[param].asarray()
            grad_value = param_grads_2[param].asarray()
            assert np.array_equal(reference_grad_value, grad_value)
Пример #6
0
def test_to_sequence_backprop(device_id):
    dev = cntk_device(device_id)
    input_vocab_size=3
    emb_dim = 2
    hidden_dim = 2
    num_labels = 2
    x_seq_input = C.sequence.input_variable(input_vocab_size, is_sparse=True, name='features')
    with C.default_options(initial_state=0.1):
        model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.layers.Dense(num_labels, name='classify')(model)

    z = model
    label_seq_input = C.sequence.input_variable(num_labels, is_sparse=True, name='labels')
    ce = C.cross_entropy_with_softmax(z, label_seq_input)

    seq1_data = [[0, 1, 1], [0, 1, 0], [1, 0, 0]]
    seq2_data = [[0, 0, 1], [0, 1, 1]]
    seq1_label_data = [[0, 1], [0, 1], [1, 0]]
    seq2_label_data = [[1, 0], [0, 1]]
    label_seq_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data)]
    param_grads_1, loss_result_1 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_seq_input : label_seq_data},
                                           wrt=ce.parameters, outputs=[ce], as_numpy=False)

    # Create a clone of the model that uses a non-sequence input
    # and converts it to a sequence using to_sequence
    x_non_seq_input = C.input_variable((C.FreeDimension, input_vocab_size), is_sparse=True, name='non_seq_features')
    x_seq_lens = C.input_variable((), name='sequence_lengths')
    x_seq = C.to_sequence(x_non_seq_input, x_seq_lens)
    x_seq = C.reconcile_dynamic_axes(C.times(x_seq, np.eye(input_vocab_size, dtype=np.float32)), label_seq_input)
    ce_clone = ce.clone('share', {x_seq_input : x_seq})

    x_non_seq_data = C.NDArrayView.from_csr(_to_csr([seq1_data, seq2_data + [[0, 0, 0]]]), shape=(2, 3, 3))
    x_seq_lens_data = np.asarray([3, 2], dtype=np.float32)

    x_non_seq_input = next(argument for argument in ce_clone.arguments if argument.name == 'non_seq_features')
    label_seq_input = next(argument for argument in ce_clone.arguments if argument.name == 'labels')
    x_seq_lens = next(argument for argument in ce_clone.arguments if argument.name == 'sequence_lengths')
    param_grads_2, loss_result_2 = ce_clone.grad({x_non_seq_input : x_non_seq_data, x_seq_lens : x_seq_lens_data, label_seq_input : label_seq_data},
                                                 wrt=ce_clone.parameters, outputs=[ce_clone], as_numpy=False)


    assert np.array_equal(loss_result_1.as_sequences()[0], loss_result_2.as_sequences()[0])
    assert np.array_equal(loss_result_1.as_sequences()[1], loss_result_2.as_sequences()[1])

    for param in param_grads_1:
        if not param_grads_1[param].is_sparse:
            reference_grad_value = param_grads_1[param].asarray()
            grad_value = param_grads_2[param].asarray()
            assert np.array_equal(reference_grad_value, grad_value)
def create_train_model(s2smodel, embed_layer):
    '''
    return: @input map @softmax @loss
    '''
    q = C.Axis.new_unique_dynamic_axis('q')
    a = C.Axis.new_unique_dynamic_axis('a')
    b = C.Axis.default_batch_axis()
    qwk = C.sequence.input_variable(myConfig['wg_dim'],
                                    sequence_axis=q,
                                    is_sparse=False,
                                    name='qwk')
    qwn = C.sequence.input_variable(myConfig['wn_dim'],
                                    sequence_axis=q,
                                    is_sparse=False,
                                    name='qwn')
    awk = C.sequence.input_variable(myConfig['wg_dim'],
                                    sequence_axis=a,
                                    is_sparse=False,
                                    name='awk')
    awn = C.sequence.input_variable(myConfig['wn_dim'],
                                    sequence_axis=a,
                                    is_sparse=False,
                                    name='awn')

    input_ph = {'qwk': qwk, 'qwn': qwn, 'awk': awk, 'awn': awn}

    a_processed = embed_layer(awk, awn)
    q_processed = embed_layer(qwk, qwn)
    a_onehot = C.splice(awk, awn)
    print("q_onehot shape:{}".format(a_onehot.output))

    # query generate answer
    logits = s2smodel(a_processed, q_processed)
    logits = C.sequence.slice(logits, 0, -1)
    print('logits shape:{}'.format(logits.output))

    labels = C.sequence.slice(a_onehot, 1, 0)  # <s> a b c </s> -> a b c </s>
    print('labels shape:{}'.format(labels.output))
    logits = C.reconcile_dynamic_axes(logits, labels)
    loss = C.cross_entropy_with_softmax(logits, labels)
    errs = C.classification_error(logits, labels)
    return input_ph, logits, C.combine(loss, errs)
Пример #8
0
 def inner(a):
     # reconcile_dynamic_axes is necessary to avoid subtle bugs e.g. sequence.where and one_hot
     return C.expand_dims(C.reconcile_dynamic_axes(
         C.sequence.where(C.sequence.broadcast_as(1, a)), a),
                          axis=-1)
Пример #9
0
def hierarchical_softmax_layer_for_sequence(input_var, num_output_classes, target_class, target_output_in_class, batch_size, w1, b1, w2s, b2s):
    '''
    A two layers hierarchical softmax function with sequence axis input:

    Example:
        >>> input_dim = 2
        >>> num_output_classes = 4
        >>> minibatch_size = 3
        >>> seq_size = 5
        >>> n_classes = int(math.ceil(math.sqrt(num_output_classes)))
        >>> n_outputs_per_class = n_classes

        >>> w1 = C.parameter(shape=(input_dim, n_classes), init=C.glorot_normal(seed=2), name='w1')
        >>> b1 = C.parameter(shape=(n_classes), init=C.glorot_normal(seed=3), name='b1')
        >>> w2s = C.parameter(shape=(n_classes, input_dim, n_outputs_per_class), init=C.glorot_normal(seed=4), name='w2s')
        >>> b2s = C.parameter(shape=(n_classes, n_outputs_per_class), init=C.glorot_normal(seed=5), name='b2s')

        # neural network structure for hierarchical softmax
        >>> h_input = C.sequence.input_variable(input_dim)
        >>> h_target_class = C.sequence.input_variable([1])
        >>> h_target_output_in_class = C.sequence.input_variable([1])
        >>> h_z, class_probs, all_probs = hierarchical_softmax_layer_for_sequence(h_input, num_output_classes, h_target_class, h_target_output_in_class, minibatch_size, w1, b1, w2s, b2s)

        >>> a = np.reshape(np.arange(seq_size * minibatch_size * input_dim, dtype = np.float32), (seq_size, minibatch_size, input_dim))
        >>> labels = np.reshape(np.arange(seq_size * minibatch_size, dtype = np.float32), (seq_size, minibatch_size, 1)) % num_output_classes
        >>> target_labels = labels // n_outputs_per_class
        >>> target_output_in_labels = labels % n_outputs_per_class
        >>> h_z.eval({h_input: a, h_target_class: target_labels, h_target_output_in_class: target_output_in_labels})[1]
        array([[ 0.000859],
               [ 0.      ],
               [ 0.      ]], dtype=float32)

    Args:
        input_var: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis
        num_output_classes: int
        target_class: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis
        target_output_in_class: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis
        batch_size: int
        w1: C.parameter
        b1: C.parameter
        w2s: C.parameter
        b2s: C.parameter
    Returns:
        output_prob: class:`~cntk.ops.functions.Function`
        class_probs: class:`~cntk.ops.functions.Function`
        all_probs: a list of class:`~cntk.ops.functions.Function`
    '''
    input_dim = input_var.shape[0]

    n_classes = int(math.ceil(math.sqrt(num_output_classes)))
    n_outputs_per_class = n_classes

    class_probs = C.softmax(b1 + C.times(input_var, w1))

    w2_temp = C.gather(w2s, target_class)
    w2 = reshape(w2_temp, (input_dim, n_outputs_per_class))
    w2 = C.sequence.broadcast_as(w2, input_var)
    b2 = reshape(C.gather(b2s, target_class), (n_outputs_per_class))
    b2 = C.sequence.broadcast_as(b2, input_var)

    times_result = times(input_var, w2)
    probs_in_class = softmax(b2 + times_result)
    probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class)
    target_output_in_class = C.one_hot(target_output_in_class, n_outputs_per_class, False)
    probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class)
    prob_in_class = C.times_transpose(probs_in_class, target_output_in_class)
    target_class = C.one_hot(target_class, n_classes, False)
    class_probs = C.sequence.broadcast_as(class_probs, target_class)
    class_prob = C.times_transpose(class_probs, target_class)

    output_prob = C.element_times(class_prob, prob_in_class)

    # this is for calculating all the outputs' probabilities
    all_probs = []
    for i in range(n_classes):
        ci = C.constant(i)
        w2a = C.reshape(C.gather(w2s, ci), (input_dim, n_outputs_per_class))
        w2a = C.sequence.broadcast_as(w2a, input_var)
        b2a = C.reshape(C.gather(b2s, ci), (n_outputs_per_class))
        b2a = C.sequence.broadcast_as(b2a, input_var)

        probs_in_classa = C.softmax(b2a + times(input_var, w2a))
        cia = C.constant(i, shape=[1])
        cia = C.reconcile_dynamic_axes(cia, class_probs)
        cia = C.one_hot(cia, n_outputs_per_class, False)
        class_proba = C.times_transpose(class_probs, cia)
        class_proba = C.sequence.broadcast_as(class_proba, probs_in_classa)

        output_proba = C.element_times(class_proba, probs_in_classa)
        all_probs.append(output_proba)

    return output_prob, class_probs, all_probs
Пример #10
0
def hierarchical_softmax_layer_for_sequence(input_var, num_output_classes, target_class, target_output_in_class, batch_size, w1, b1, w2s, b2s):
    '''
    A two layers hierarchical softmax function with sequence axis input:

    Example:
        >>> input_dim = 2
        >>> num_output_classes = 4
        >>> minibatch_size = 3
        >>> seq_size = 5
        >>> n_classes = int(math.ceil(math.sqrt(num_output_classes)))
        >>> n_outputs_per_class = n_classes

        >>> w1 = C.parameter(shape=(input_dim, n_classes), init=C.glorot_normal(seed=2), name='w1')
        >>> b1 = C.parameter(shape=(n_classes), init=C.glorot_normal(seed=3), name='b1')
        >>> w2s = C.parameter(shape=(n_classes, input_dim, n_outputs_per_class), init=C.glorot_normal(seed=4), name='w2s')
        >>> b2s = C.parameter(shape=(n_classes, n_outputs_per_class), init=C.glorot_normal(seed=5), name='b2s')

        # neural network structure for hierarchical softmax
        >>> h_input = C.sequence.input_variable(input_dim)
        >>> h_target_class = C.sequence.input_variable([1])
        >>> h_target_output_in_class = C.sequence.input_variable([1])
        >>> h_z, class_probs, all_probs = hierarchical_softmax_layer_for_sequence(h_input, num_output_classes, h_target_class, h_target_output_in_class, minibatch_size, w1, b1, w2s, b2s)

        >>> a = np.reshape(np.arange(seq_size * minibatch_size * input_dim, dtype = np.float32), (seq_size, minibatch_size, input_dim))
        >>> labels = np.reshape(np.arange(seq_size * minibatch_size, dtype = np.float32), (seq_size, minibatch_size, 1)) % num_output_classes
        >>> target_labels = labels // n_outputs_per_class
        >>> target_output_in_labels = labels % n_outputs_per_class
        >>> h_z.eval({h_input: a, h_target_class: target_labels, h_target_output_in_class: target_output_in_labels})[1]
        array([[ 0.000859],
               [ 0.      ],
               [ 0.      ]], dtype=float32)

    Args:
        input_var: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis
        num_output_classes: int
        target_class: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis
        target_output_in_class: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis
        batch_size: int
        w1: C.parameter
        b1: C.parameter
        w2s: C.parameter
        b2s: C.parameter
    Returns:
        output_prob: class:`~cntk.ops.functions.Function`
        class_probs: class:`~cntk.ops.functions.Function`
        all_probs: a list of class:`~cntk.ops.functions.Function`
    '''
    input_dim = input_var.shape[0]

    n_classes = int(math.ceil(math.sqrt(num_output_classes)))
    n_outputs_per_class = n_classes

    class_probs = C.softmax(b1 + C.times(input_var, w1))

    w2_temp = C.gather(w2s, target_class)
    w2 = reshape(w2_temp, (input_dim, n_outputs_per_class))
    w2 = C.sequence.broadcast_as(w2, input_var)
    b2 = reshape(C.gather(b2s, target_class), (n_outputs_per_class))
    b2 = C.sequence.broadcast_as(b2, input_var)

    times_result = times(input_var, w2)
    probs_in_class = softmax(b2 + times_result)
    probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class)
    target_output_in_class = C.one_hot(target_output_in_class, n_outputs_per_class, False)
    probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class)
    prob_in_class = C.times_transpose(probs_in_class, target_output_in_class)
    target_class = C.one_hot(target_class, n_classes, False)
    class_probs = C.sequence.broadcast_as(class_probs, target_class)
    class_prob = C.times_transpose(class_probs, target_class)

    output_prob = C.element_times(class_prob, prob_in_class)

    # this is for calculating all the outputs' probabilities
    all_probs = []
    for i in range(n_classes):
        ci = C.constant(i)
        w2a = C.reshape(C.gather(w2s, ci), (input_dim, n_outputs_per_class))
        w2a = C.sequence.broadcast_as(w2a, input_var)
        b2a = C.reshape(C.gather(b2s, ci), (n_outputs_per_class))
        b2a = C.sequence.broadcast_as(b2a, input_var)

        probs_in_classa = C.softmax(b2a + times(input_var, w2a))
        cia = C.constant(i, shape=[1])
        cia = C.reconcile_dynamic_axes(cia, class_probs)
        cia = C.one_hot(cia, n_outputs_per_class, False)
        class_proba = C.times_transpose(class_probs, cia)
        class_proba = C.sequence.broadcast_as(class_proba, probs_in_classa)

        output_proba = C.element_times(class_proba, probs_in_classa)
        all_probs.append(output_proba)

    return output_prob, class_probs, all_probs
Пример #11
0
 def inner(a):
     # reconcile_dynamic_axes is necessary to avoid subtle bugs e.g. sequence.where and one_hot
     return C.reconcile_dynamic_axes(C.sequence.where(C.ones_like(Cx.scalar(a))), a)
Пример #12
0
    def call(self, x, mask=None):
        # if hasattr(x, '_keras_shape'):
        #     input_shape = x._keras_shape
        # elif hasattr(K, 'int_shape'):
        #     input_shape = K.int_shape(x)
        # layer_width = input_shape[self.waxis]
        # # layer_height = input_shape[self.haxis]
        # data_length = self.data_length
        # # img_height = self.img_size[1]
        # # define prior boxes shapes
        # box_widths = []
        # # box_heights = []
        # for ar in self.aspect_ratios:
        #     if ar == 1 and len(box_widths) == 0:
        #         box_widths.append(self.min_width)
        #         # box_heights.append(self.min_width)
        #     elif ar == 1 and len(box_widths) > 0:
        #         box_widths.append(np.sqrt(self.min_width * self.max_width))
        #         # box_heights.append(np.sqrt(self.min_width * self.max_width))
        #     elif ar != 1:
        #         box_widths.append(self.min_width * np.sqrt(ar))
        #         # box_heights.append(self.min_size / np.sqrt(ar))
        # box_widths = 0.5 * np.array(box_widths, dtype='float32')
        # # box_heights = 0.5 * np.array(box_heights)
        # # define centers of prior boxes
        # step_x = data_length / layer_width # レイヤー上の1ポイントがカバーするオリジナル画像上のピクセル数(layer_width=19, img_width=300ならstep_x=15.78)
        # # step_y = img_height / layer_height
        # linx = np.linspace(0.5 * step_x, data_length - 0.5 * step_x,
        #                    layer_width, dtype='float32') # img_width=300, layer_width=19 なら0-300の区間を19に分けた時のピクセル中心位置の数列(7.89, 23,68, ..., 292.105)
        # # liny = np.linspace(0.5 * step_y, img_height - 0.5 * step_y, layer_height)

        # # centers_x = np.array(linx)
        # # centers_x, centers_y = np.meshgrid(linx, liny)
        # # centers_x = centers_x.reshape(-1, 1)
        # # centers_y = centers_y.reshape(-1, 1)
        # # define xmin, ymin, xmax, ymax of prior boxes
        # num_priors_ = len(self.aspect_ratios)
        # # prior_boxes = np.concatenate((centers_x, centers_y), axis=1)
        # prior_boxes = linx.reshape(-1,1)
        # prior_boxes = np.tile(prior_boxes, (1, 2 * num_priors_)) # 「1, 」が必要かどうかはよくわからない…1ならなくても結果は同じ?それとも次元が一つ増える?
        # prior_boxes[:, ::2] -= box_widths
        # # prior_boxes[:, 1::4] -= box_heights
        # prior_boxes[:, 1::2] += box_widths
        # # prior_boxes[:, 3::4] += box_heights
        # prior_boxes[:, :] /= data_length
        # # prior_boxes[:, 1::2] /= img_height
        # prior_boxes = prior_boxes.reshape(-1, 2)
        # if self.clip: # prior_boxのxmin, ymin, xmax, ymaxは0-1でクリップしておく
        #     prior_boxes = np.minimum(np.maximum(prior_boxes, 0.0), 1.0)
        # # define variances
        # num_boxes = len(prior_boxes)
        # if len(self.variances) == 1:
        #     variances = np.ones((num_boxes, 2)) * self.variances[0]
        # elif len(self.variances) == 2:
        #     variances = np.tile(self.variances, (num_boxes, 1)) # ここでvalianceを作る
        # else:
        #     raise Exception('Must provide one or two variances.')
        # prior_boxes = np.concatenate((prior_boxes, variances), axis=1) # 作ったvalianceをconcatenateする shape: (priorboxのサイズ, 2+2)
        """priorsを保存する"""
        # filename = 'mschrom_unet_priors_d10.pkl'
        # temp_priors = []
        # if os.path.exists(filename):
        #     with open(filename, mode='rb') as f:
        #         temp_priors = pickle.load(f)
        # if len(temp_priors) != 0:
        #     temp_priors = np.concatenate((temp_priors, prior_boxes), axis=0)
        # else:
        #     temp_priors = prior_boxes
        # with open(filename, mode='wb') as f:
        #     pickle.dump(temp_priors, f)
        """ここまで"""
        prior_boxes_tensor = K.expand_dims(
            K.variable(self.prior_boxes), 0
        )  # バックエンドテンソルに変換(1次元追加)shape:TensorShape([Dimension(1), Dimension(54), Dimension(8)])
        if K.backend() == 'tensorflow':
            pattern = [tf.shape(x)[0], 1,
                       1]  # patternのshapeは(none, 1, 1)的な感じ。tf.shape(x)[0]はバッチ数
            prior_boxes_tensor = K.tile(
                prior_boxes_tensor, pattern
            )  # TensorShape([Dimension(None), Dimension(54), Dimension(8)]) これはバッチ数だけタイルされた形(バッチ数はNoneで予約)
        elif K.backend() == 'cntk':
            #init_parameter = C.parameter(shape=K.shape(prior_boxes), init=prior_boxes)
            # batch_axis = C.Axis.default_batch_axis()
            # input_dynamic_axes = [batch_axis]
            prior_boxes_constants = C.Constant(self.prior_boxes)
            prior_boxes_constants2 = C.reconcile_dynamic_axes(
                prior_boxes_constants, dynamic_axes_as=x)

            # ph = C.ops.placeholder(K.shape(prior_boxes), dynamic_axes=C.Axis.default_batch_axis())
            # zeros = C.zeros_like(x)
            #prior_boxes_tensor = C.plus(zeros, prior_boxes)
            prior_boxes_tensor = prior_boxes_constants2

            #a = C.variables.Variable(K.shape(prior_boxes_tensor), dynamic_axes=C.Axis.default_batch_axis())
            # prior_boxes_tensor = C.Constant(prior_boxes)
            # pattern = [C.axis.Axis.default_dynamic_axis(), 1,1]
            # prior_boxes_tensor = K.tile(prior_boxes_tensor, pattern) # TensorShape([Dimension(None), Dimension(54), Dimension(8)]) これはバッチ数だけタイルされた形(バッチ数はNoneで予約)
            #prior_boxes_tensor = K.variable(prior_boxes) # TensorShape([Dimension(None), Dimension(54), Dimension(8)]) これはバッチ数だけタイルされた形(バッチ数はNoneで予約)
        elif K.backend() == 'theano':
            #TODO
            pass
        return prior_boxes_tensor
Пример #13
0
    def attention_layer(self, context, query, dim):
        input_ph = C.placeholder(shape=(dim, ))
        input_mem = C.placeholder(shape=(dim, ))
        with C.layers.default_options(bias=False, activation=C.relu):
            attn_proj_enc = C.layers.Dense(self.hidden_dim,
                                           init=glorot_uniform(),
                                           input_rank=1,
                                           name="Wqu")
            attn_proj_dec = C.layers.Dense(self.hidden_dim,
                                           init=glorot_uniform(),
                                           input_rank=1)

        inputs_ = attn_proj_enc(input_ph)  # [#,c][d]
        memory_ = attn_proj_dec(input_mem)  # [#,q][d]

        cln_mem_ph = C.placeholder()  # [#,q][?=d]
        cln_inp_ph = C.placeholder()  # [#,c][?=d]
        unpack_inputs, inputs_mask = C.sequence.unpack(
            cln_inp_ph, 0).outputs  # [#][*=c,d] [#][*=c]
        expand_inputs = C.sequence.broadcast_as(unpack_inputs,
                                                cln_mem_ph)  # [#,q][*=c,d]
        matrix = C.reshape(
            C.times_transpose(cln_mem_ph, expand_inputs) /
            (self.hidden_dim**0.5), (-1, ))  # [#,q][*=c]
        matrix = C.element_select(
            C.sequence.broadcast_as(inputs_mask, cln_mem_ph), matrix,
            C.constant(-1e30))
        logits = C.softmax(matrix, axis=0, name='level 1 weight')  # [#,q][*=c]
        trans_expand_inputs = C.transpose(expand_inputs,
                                          [1, 0])  # [#,q][d,*=c]
        q_over_c = C.reshape(
            C.reduce_sum(logits * trans_expand_inputs, axis=1),
            (-1, )) / (self.hidden_dim**0.5)  # [#,q][d]
        new_q = C.splice(cln_mem_ph, q_over_c)  # [#,q][2*d]
        # over
        unpack_matrix, matrix_mask = C.sequence.unpack(
            matrix, 0).outputs  # [#][*=q,*=c] [#][*=q]
        inputs_mask_s = C.to_sequence(C.reshape(inputs_mask,
                                                (-1, 1)))  # [#,c'][1]
        trans_matrix = C.to_sequence_like(C.transpose(unpack_matrix, [1, 0]),
                                          inputs_mask_s)  # [#,c'][*=q]
        trans_matrix = C.sequence.gather(trans_matrix,
                                         inputs_mask_s)  # [#,c2][*=q]
        trans_matrix = C.element_select(
            C.sequence.broadcast_as(matrix_mask, trans_matrix), trans_matrix,
            C.constant(-1e30))
        logits2 = C.softmax(trans_matrix, axis=0,
                            name='level 2 weight')  # [#,c2][*=c]
        unpack_new_q, new_q_mask = C.sequence.unpack(
            new_q, 0).outputs  # [#][*=q,2*d] [#][*=q]
        expand_new_q = C.transpose(
            C.sequence.broadcast_as(unpack_new_q, trans_matrix),
            [1, 0])  # [#,c2][2d,*=q]
        c_over_q = C.reshape(C.reduce_sum(logits2 * expand_new_q, axis=1),
                             (-1, )) / (2 * self.hidden_dim)**0.5  # [#,c2][2d]
        c_over_q = C.reconcile_dynamic_axes(c_over_q, cln_inp_ph)

        weighted_q = c_over_q.clone(C.CloneMethod.share, {
            cln_mem_ph: memory_,
            cln_inp_ph: inputs_
        })  # [#,c][2d]
        c2c = q_over_c.clone(C.CloneMethod.share, {
            cln_mem_ph: inputs_,
            cln_inp_ph: inputs_
        })  # [#,c][2d]

        att_context = C.splice(input_ph, weighted_q, c2c)  # 2d+2d+2d

        return C.as_block(att_context, [(input_ph, context),
                                        (input_mem, query)], 'attention_layer',
                          'attention_layer')