def assert_output_gate_memory_state_convolution_weights_are_equal(multi_directional_mdlstm, one_directional_mdlstm,
                                                                      direction_index: int):
        multi_directional_mdlstm_output_gate_memory_state_convolution =\
            multi_directional_mdlstm.mdlstm_parameters. \
            output_gate_memory_state_convolution

        out_channels_size = multi_directional_mdlstm_output_gate_memory_state_convolution.weight.size(0)
        out_channels_per_direction = out_channels_size / 4
        start_index = int(out_channels_per_direction * direction_index)
        end_index = int(out_channels_per_direction * (direction_index + 1))

        multi_directional_mdlstm_output_gate_memory_state_convolution_weight_for_direction =\
            multi_directional_mdlstm_output_gate_memory_state_convolution.weight[start_index:end_index, :, :]

        multi_directional_mdlstm_output_gate_memory_state_convolution_bias_for_direction = \
            multi_directional_mdlstm_output_gate_memory_state_convolution.bias[start_index:end_index]

        one_directional_mdlstm_output_gate_memory_state_convolution =\
            one_directional_mdlstm.mdlstm_parameters.output_gate_memory_state_convolution

        if not TensorUtils.tensors_are_equal(
                multi_directional_mdlstm_output_gate_memory_state_convolution_weight_for_direction,
                one_directional_mdlstm_output_gate_memory_state_convolution.weight):
                raise RuntimeError("Error: the weight matrices for the output gate memory state convolution for " +
                                   "multi-directional MDLSTM and the corresponding one-directional MDLSTM" +
                                   "are not the same")
        if not TensorUtils.tensors_are_equal(
                multi_directional_mdlstm_output_gate_memory_state_convolution_bias_for_direction,
                one_directional_mdlstm_output_gate_memory_state_convolution.bias):
            raise RuntimeError("Error: the bias matrices for the output gate memory state convolution for " +
                               "multi-directional MDLSTM and the corresponding one-directional MDLSTM" +
                               "are not the same")
    def assert_input_convolution_weights_are_equal(multi_directional_mdlstm, one_directional_mdlstm,
                                                   direction_index: int):
        multi_directional_mdlstm_parallel_input_column_computation = \
            multi_directional_mdlstm.mdlstm_parameters.parallel_input_column_computation

        full_size = multi_directional_mdlstm_parallel_input_column_computation.\
            parallel_convolution.weight.size(0)

        out_channels_size = full_size
        out_channels_per_direction = int(out_channels_size / 4)
        print("out_channels_per_direction: " + str(out_channels_per_direction))
        start_index = int(out_channels_per_direction * direction_index)
        end_index = int(out_channels_per_direction * (direction_index + 1))

        print("start_index: " + str(start_index))
        print("end_index: " + str(end_index))

        multi_directional_mdlstm_weight_for_direction = \
            multi_directional_mdlstm_parallel_input_column_computation.\
            parallel_convolution.weight[start_index:end_index, :, :].unsqueeze(2)

        multi_directional_mdlstm_bias_for_direction = \
            multi_directional_mdlstm_parallel_input_column_computation.\
            parallel_convolution.bias[start_index:end_index]

        one_directional_mdlstm_input_convolution_computation =\
            one_directional_mdlstm.mdlstm_parameters.parallel_multiple_input_convolutions_computation

        print("multi_directional_mdlstm_weight_for_direction.size()" +
              str(multi_directional_mdlstm_weight_for_direction.size()))

        print("one_directional_mdlstm_input_convolution_computation.parallel_convolution.weight.size()" +
              str(one_directional_mdlstm_input_convolution_computation.parallel_convolution.weight.size()))

        if not TensorUtils.tensors_are_equal(
                multi_directional_mdlstm_weight_for_direction,
                one_directional_mdlstm_input_convolution_computation.parallel_convolution.weight):
                raise RuntimeError("Error:  for direction " + str(direction_index) +
                                   " the weight matrices for the input convolution computation for " +
                                   "multi-directional MDLSTM" +
                                   str(multi_directional_mdlstm_weight_for_direction) +
                                   " and the corresponding one-directional MDLSTM" +
                                   str(one_directional_mdlstm_input_convolution_computation.parallel_convolution.weight)
                                   + "are not the same")
        if not TensorUtils.tensors_are_equal(
                multi_directional_mdlstm_bias_for_direction,
                one_directional_mdlstm_input_convolution_computation.parallel_convolution.bias):
            raise RuntimeError("Error: the bias matrices for the input convolution computation for " +
                               "multi-directional MDLSTM and the corresponding one-directional MDLSTM" +
                               "are not the same")
예제 #3
0
    def check_dechunking_chunked_tensor_list_recovers_original(tensor_list_chunking, original_tensor_list,
                                                       input_chunked):
        input_dechunked = tensor_list_chunking.dechunk_block_tensor_concatenated_along_batch_dimension(
            input_chunked)
        if not TensorUtils.tensors_lists_are_equal(original_tensor_list, input_dechunked):
            for index in range(0, len(original_tensor_list)):
                print("original[" + str(index) + "].size()" + str(original_tensor_list[index].size()))

            for index in range(0, len(input_dechunked)):
                print("input_dechunked[" + str(index) + "].size()" + str(input_dechunked[index].size()))

            TensorUtils.find_equal_slices_over_batch_dimension(input_chunked)

            raise RuntimeError("Error: original and de-chunked chunked are not the same")
예제 #4
0
 def assert_results_are_same_with_and_without_packing(activations_example_without_packing,
                                                      activations_example_with_packing):
     if not TensorUtils.tensors_are_equal(activations_example_without_packing, activations_example_with_packing ):
         raise RuntimeError("Error: expected the same activations for MDLSTM forward computation" +
                            "with or without packing, but got different results:\n - without packing:\n"
                            + str(activations_example_without_packing) + "\n - with packing: \n" +
                            str(activations_example_with_packing))
예제 #5
0
def test_ctc_loss_probabilities_match_labels_third_baidu_example_variant_two_extra_padding_wrong_side(
):

    ctc_loss = warpctc_pytorch.CTCLoss()
    print("expected shape of seqLength x batchSize x alphabet_size")
    # https://stackoverflow.com/questions/48915810/pytorch-contiguous
    probs = torch.FloatTensor(
        [
            [[0, 0, 0, 0, 0],
             [0, 0, 0, 0,
              0]],  # Extra padding is added at the top, which is wrong
            [[0, 0, 0, 0, 0], [1, 2, 3, 4, 5]],
            [[0, 0, 0, 0, 0], [6, 7, 8, 9, 10]],
            [[0, 0, 0, 0, 0], [11, 12, 13, 14, 15]],
        ]
    )  # .contiguous() # contiguous is just for performance, does not change results

    print("probs.size(): " + str(probs.size()))

    # labels = Variable(torch.IntTensor([ [1, 0], [3, 3], [2, 3]]))
    # See: https://github.com/SeanNaren/warp-ctc/issues/29
    # IMPORTANT !!!: All label sequences are concatenated, without blanks/padding,
    # and label sizes lists the sizes without padding
    labels = Variable(torch.IntTensor([1, 3, 3]))
    # Labels sizes should be equal to number of labels. Because labels are
    # concatenated, the label sizes essentially instructs where the sequence
    # boundaries are!
    label_sizes = Variable(torch.IntTensor([1, 2]))
    # Prob_sizes instructs on the number of real probabilities, distinguishing
    # real probabilities from padding
    # Padding should presumably
    # (looking at https://github.com/baidu-research/warp-ctc/blob/master/torch_binding/TUTORIAL.md)
    # be at the bottom, but this should be checked
    probs_sizes = Variable(torch.IntTensor([1, 3]))
    probs = Variable(
        probs,
        requires_grad=True)  # tells autograd to compute gradients for probs
    optimizer = optim.SGD(list([probs]), lr=0.001)
    print("probs: " + str(probs))
    cost = ctc_loss(probs, labels, probs_sizes, label_sizes)
    # cost: tensor([ 7.3557]) as in the Baidu tutorial, second example
    print("cost: " + str(cost))
    # Since padding has been added to the wrong side (top instead of bottom)
    # the results are now expected to change
    no_longer_expected_cost_tensor = torch.FloatTensor([8.965181350708008])
    print("zeros_tensor: " + str(no_longer_expected_cost_tensor))
    if TensorUtils.tensors_are_equal(no_longer_expected_cost_tensor, cost):
        raise RuntimeError("Error: cost expected to be not equal to " +
                           str(no_longer_expected_cost_tensor) + "but was:" +
                           str((float(cost))))
    cost.backward()
    print("cost: " + str(cost))
    print("update probabilities...")
    optimizer.step()
    print("probs: " + str(probs))

    print(
        ">>> Success: test_ctc_loss_probabilities_match_labels_third_baidu_example_variant_two_extra_padding_wrong_side"
    )
예제 #6
0
 def check_activation_rows_are_not_equal(activation_rows):
     # For debugging
     # print("activation rows sizes after splitting: ")
     last_activation_row = activation_rows[0]
     for activation_row in activation_rows[1:]:
         # print(str(activation_row.size()))
         if TensorUtils.tensors_are_equal(last_activation_row, activation_row):
             print(">>> WARNING: activation rows are equal")
예제 #7
0
    def assert_input_convolution_weights_are_equal(multi_directional_mdlstm,
                                                   one_directional_mdlstm,
                                                   direction_index: int):
        multi_directional_mdlstm_input_convolution_computation =\
            multi_directional_mdlstm.mdlstm_parameters. \
            parallel_multiple_input_convolutions_computations[direction_index]

        #parallel_multiple_input_convolutions_computations
        #
        # out_channels_size = multi_directional_mdlstm_input_convolution_computation.\
        #     parallel_convolution.weight.size(0)
        # out_channels_per_direction = out_channels_size / 4
        # start_index = int(out_channels_per_direction * direction_index)
        # end_index = int(out_channels_per_direction * (direction_index + 1))
        #
        # one_directional_weight_from_multi_directional =\
        #     multi_directional_mdlstm_input_convolution_computation.parallel_convolution.weight[start_index:end_index,
        #                                                                                        :, :, :]

        # one_directional_bias_from_multi_directional =\
        #     multi_directional_mdlstm_input_convolution_computation.parallel_convolution.bias[start_index:end_index]

        one_directional_mdlstm_input_convolution_computation =\
            one_directional_mdlstm.mdlstm_parameters.parallel_multiple_input_convolutions_computation

        if not TensorUtils.tensors_are_equal(
                multi_directional_mdlstm_input_convolution_computation.
                parallel_convolution.weight,
                one_directional_mdlstm_input_convolution_computation.
                parallel_convolution.weight):
            raise RuntimeError(
                "Error: the weight matrices for the input convolution computation for "
                +
                "multi-directional MDLSTM and the corresponding one-directional MDLSTM"
                + "are not the same")
        if not TensorUtils.tensors_are_equal(
                multi_directional_mdlstm_input_convolution_computation.
                parallel_convolution.bias,
                one_directional_mdlstm_input_convolution_computation.
                parallel_convolution.bias):
            raise RuntimeError(
                "Error: the bias matrices for the input convolution computation for "
                +
                "multi-directional MDLSTM and the corresponding one-directional MDLSTM"
                + "are not the same")
    def test_multi_directional_mdlstm_produces_same_results_as_extracted_one_directional_mdlstms(test_tensor):
        multi_directional_mdlstm_test = MultiDirectionalMDLSTMTest.create_multi_directional_mdlstm_test()
        multi_directional_mdlstm_test.multi_directional_mdlstm = multi_directional_mdlstm_test.\
            multi_directional_mdlstm.cuda()
        one_directional_mdlstms = multi_directional_mdlstm_test.\
            multi_directional_mdlstm.create_one_directional_mdlstms_from_multi_directional_mdlstm()

        activations_multi_directional_mdlstm = multi_directional_mdlstm_test.\
            multi_directional_mdlstm(list([test_tensor]))
        print("activations_multi_directional_mdlstm: " + str(activations_multi_directional_mdlstm))
        assert len(activations_multi_directional_mdlstm) == 1

        activations_for_tensor = activations_multi_directional_mdlstm[0]
        print("activations_for_tensor.size(): " + str(activations_for_tensor.size()))
        # if not activations_for_tensor.size(0) == 4:
        #     raise RuntimeError("Error: expected the output tensor to have a size of 4" +
        #                        "for its first dimension, i.e. for 4-directional MDLSTM")

        tensor_flipping_list = MDLSTMExamplesPacking.create_four_directions_tensor_flippings()

        for direction_index, tensor_flipping in enumerate(tensor_flipping_list):
            print(">>> direction_index: " + str(direction_index))
            one_directional_mdlstm = one_directional_mdlstms[direction_index].cuda()
            MultiDirectionalMDLSTMTest.assert_input_convolution_weights_are_equal(
                multi_directional_mdlstm_test.multi_directional_mdlstm, one_directional_mdlstm, direction_index)
            MultiDirectionalMDLSTMTest.assert_output_gate_memory_state_convolution_weights_are_equal(
                multi_directional_mdlstm_test.multi_directional_mdlstm, one_directional_mdlstm, direction_index)
            MultiDirectionalMDLSTMTest.assert_parallel_hidden_and_memory_state_column_computation_weights_are_equal(
                multi_directional_mdlstm_test.multi_directional_mdlstm, one_directional_mdlstm, direction_index)

            test_tensor_flipped = tensor_flipping.flip(test_tensor).cuda()

            activations_one_directional_mdlstm_flipped = one_directional_mdlstm(list([test_tensor_flipped]))
            # Flip activations back to original orientation
            activations_one_directional_mdlstm = \
                tensor_flipping.flip(activations_one_directional_mdlstm_flipped[0])
            start_index = int(direction_index * (activations_for_tensor.size(1) / 4))
            end_index = int((direction_index + 1) * (activations_for_tensor.size(1) / 4))
            print("start_index: " + str(start_index))
            print("end_index: " + str(end_index))

            # Activations are concatenated along the channel dimension
            activations_one_directional_mdlstm_from_four_directional_mdlstm = \
                activations_for_tensor[:, start_index:end_index, :, :]

            print("activations_one_directional_mdlstm_from_four_directional_mdlstm: " +
                  str(activations_one_directional_mdlstm_from_four_directional_mdlstm))
            print("activations_one_directional_mdlstm: " +
                  str(activations_one_directional_mdlstm))
            if not TensorUtils.tensors_are_equal(activations_one_directional_mdlstm,
                                                 activations_one_directional_mdlstm_from_four_directional_mdlstm):
                raise RuntimeError("Error: expected the activation tensors for the one-directional MDLSTM: \n" +
                                   str(activations_one_directional_mdlstm) + "\n and the corresponding ones " +
                                   " of the 4-directional MDLSTM \n" +
                                   str(activations_one_directional_mdlstm_from_four_directional_mdlstm) +
                                   " to be the same.")
예제 #9
0
def test_ctc_loss_probabilities_match_labels_third_baidu_example_variant():

    ctc_loss = warpctc_pytorch.CTCLoss()
    print("expected shape of seqLength x batchSize x alphabet_size")
    probs = torch.FloatTensor([[[1, 2, 3, 4, 5], [0, 0, 0, 0, 0],
                                [-5, -4, -3, -2, -1]],
                               [[6, 7, 8, 9, 10], [0, 0, 0, 0, 0],
                                [-10, -9, -8, -7, -6]],
                               [[11, 12, 13, 14, 15], [0, 0, 0, 0, 0],
                                [-15, -14, -13, -12, -11]]]).contiguous()

    # probs = torch.FloatTensor([
    #     [[-5, -4, -3, -2, -1], [-10, -9, -8, -7, -6], [-15, -14, -13, -12, -11]]
    # ]). \
    #    transpose(0, 1).contiguous()

    print("probs.size(): " + str(probs.size()))

    # labels = Variable(torch.IntTensor([ [1, 0], [3, 3], [2, 3]]))
    # See: https://github.com/SeanNaren/warp-ctc/issues/29
    # All label sequences are concatenated, without blanks/padding,
    # and label sizes lists the sizes without padding
    labels = Variable(torch.IntTensor([3, 3, 1, 2, 3]))
    # labels = Variable(torch.IntTensor([2, 3]))
    #labels = Variable(torch.IntTensor([3, 3]))
    # Labels sizes should be equal to number of labels
    label_sizes = Variable(torch.IntTensor([2, 1, 2]))
    #label_sizes = Variable(torch.IntTensor([2]))
    # This one must be equal to the number of probabilities to avoid a crash
    probs_sizes = Variable(torch.IntTensor([3, 1, 3]))
    # probs_sizes = Variable(torch.IntTensor([3]))
    probs = Variable(
        probs,
        requires_grad=True)  # tells autograd to compute gradients for probs
    optimizer = optim.SGD(list([probs]), lr=0.001)
    print("probs: " + str(probs))
    cost = ctc_loss(probs, labels, probs_sizes, label_sizes)
    # cost: tensor([ 7.3557]) as in the Baidu tutorial, second example
    print("cost: " + str(cost))
    expected_cost_tensor = torch.FloatTensor([13.904030799865723])
    print("zeros_tensor: " + str(expected_cost_tensor))
    if not TensorUtils.tensors_are_equal(expected_cost_tensor, cost):
        raise RuntimeError("Error: cost expected to be " +
                           str(expected_cost_tensor) + "but was:" +
                           str((float(cost))))
    cost.backward()
    print("cost: " + str(cost))
    print("update probabilities...")
    optimizer.step()
    print("probs: " + str(probs))

    print(
        ">>> Success: test_ctc_loss_probabilities_match_labels_third_baidu_example_variant"
    )
    def assert_parallel_hidden_and_memory_state_column_computation_weights_are_equal(
            multi_directional_mdlstm, one_directional_mdlstm, direction_index: int):
        multi_directional_mdlstm_parallel_hidden_and_memory_state_column_computation = \
            multi_directional_mdlstm.mdlstm_parameters.parallel_hidden_and_memory_state_column_computation

        out_channels_size = \
            multi_directional_mdlstm_parallel_hidden_and_memory_state_column_computation.\
            get_paired_input_weightings_output_size()
        out_channels_per_direction = out_channels_size / 4
        start_index = int(out_channels_per_direction * direction_index)
        end_index = int(out_channels_per_direction * (direction_index + 1))

        multi_directional_mdlstm_weight_for_direction = \
            multi_directional_mdlstm_parallel_hidden_and_memory_state_column_computation.\
            parallel_convolution.weight[start_index:end_index, :, :]

        multi_directional_mdlstm_bias_for_direction = \
            multi_directional_mdlstm_parallel_hidden_and_memory_state_column_computation.\
            parallel_convolution.bias[start_index:end_index]

        one_directional_mdlstm_parallel_hidden_and_memory_state_column_computation = \
            one_directional_mdlstm.mdlstm_parameters.parallel_hidden_and_memory_state_column_computation

        if not TensorUtils.tensors_are_equal(
                multi_directional_mdlstm_weight_for_direction,
                one_directional_mdlstm_parallel_hidden_and_memory_state_column_computation.
                parallel_convolution.weight):
            raise RuntimeError("Error: the weight matrices for the" +
                               "one_directional_mdlstm_parallel_hidden_and_memory_state_column_computation" +
                               "multi-directional MDLSTM and the corresponding one-directional MDLSTM" +
                               "are not the same")
        if not TensorUtils.tensors_are_equal(
                multi_directional_mdlstm_bias_for_direction,
                one_directional_mdlstm_parallel_hidden_and_memory_state_column_computation.
                parallel_convolution.bias):
            raise RuntimeError("Error: the bias matrices for the" +
                               "one_directional_mdlstm_parallel_hidden_and_memory_state_column_computation" +
                               "multi-directional MDLSTM and the corresponding one-directional MDLSTM" +
                               "are not the same")
예제 #11
0
def test_ctc_loss_probabilities_match_labels_three():

    ctc_loss = warpctc_pytorch.CTCLoss()
    print("expected shape of seqLength x batchSize x alphabet_size")

    # Gives no loss
    probs = torch.FloatTensor([[[0, 100, 0, 0, 83],
                                [0, 0, 100, 0, 0],
                                [0, 0, 0, 100, 0]]]).\
        transpose(0, 1).contiguous()

    # # Gives small loss
    # probs = torch.FloatTensor([[[0, 100, 0, 0, 84],
    #                             [0, 0, 100, 0, 0],
    #                             [0, 0, 0, 100, 0]]]). \
    #     transpose(0, 1).contiguous()

    print("probs.size(): " + str(probs.size()))

    # No loss
    # labels = Variable(torch.IntTensor([1, 2, 3]))
    # Also no loss (possibly because not possible!)
    # becomes effectively 2-2-2-2 which is length 6!
    # labels = Variable(torch.IntTensor([2, 2, 2, 2]))
    # labels (becomes 2-2) (Why is loss also zero?)
    labels = Variable(torch.IntTensor([1, 1, 1]))
    # Labels sizes should be equal to the number of labels in the example
    label_sizes = Variable(torch.IntTensor([3]))
    # This one must be equal to the number of probabilities to avoid a crash
    probs_sizes = Variable(torch.IntTensor([3]))
    probs = Variable(
        probs,
        requires_grad=True)  # tells autograd to compute gradients for probs
    optimizer = optim.SGD(list([probs]), lr=0.001)
    print("probs: " + str(probs))
    cost = ctc_loss(probs, labels, probs_sizes, label_sizes)
    # cost: tensor([ 7.3557]) as in the Baidu tutorial, second example
    print("cost: " + str(cost))
    expected_cost_tensor = torch.FloatTensor([0])
    print("zeros_tensor: " + str(expected_cost_tensor))
    if not TensorUtils.tensors_are_equal(expected_cost_tensor, cost):
        raise RuntimeError("Error: cost expected to be " +
                           str(expected_cost_tensor) + "but was:" +
                           str((float(cost))))
    cost.backward()
    print("cost: " + str(cost))
    print("update probabilities...")
    optimizer.step()
    print("probs: " + str(probs))
예제 #12
0
    def test_simple_normal_and_packed_mdlstm_computation_produce_same_results():
        mdlstm_test = MultiDimensionalLSTMTest.create_multi_dimensional_lstm_test()
        input_tensor = torch.ones(1, 1, 2, 2).cuda()
        input_tensor_list = list([torch.ones(1, 2, 2).cuda()])
        activations, activations_with_examples_packing_list = mdlstm_test.\
            get_mdlstm_activations_with_and_without_packing(input_tensor,
                                                            input_tensor_list)
        activations_with_examples_packing = activations_with_examples_packing_list[0]

        print("activations_without_examples_packing: " + str(activations))
        print("activations_with_examples_packing: " + str(activations_with_examples_packing))

        if not TensorUtils.tensors_are_equal(activations, activations_with_examples_packing[0]):
            raise RuntimeError("Error: expected the same activations for MDLSTM forward computation" +
                               "with or without packing, but got different results")
    def get_original_sizes_from_tensor_list(tensor_list: list):
        result = list([])
        for x in tensor_list:
            if TensorUtils.number_of_dimensions(x) != 3:
                raise RuntimeError("Error: tenor x with size " +
                                   str(x.size()) +
                                   " does not have 3 dimensions, as required")

            # print("x.size(): " + str(x.size()))
            original_size = SizeTwoDimensional.create_size_two_dimensional(
                x.size(1), x.size(2))
            # print("original_size: " + str(original_size))
            result.append(original_size)
        # print(" get_original_sizes_from_tensor_list - result: " + str(result))
        return result
    def clamp_grad(grad_input,
                   clamping_bound,
                   variable_name: str,
                   gradient_computation_mask=None):

        if not (gradient_computation_mask is None):
            # print("Applying gradient computation mask " + str(gradient_computation_mask) + " to " +
            #       "grad_output: " + str(grad_input))
            grad_output = TensorUtils.apply_binary_mask(
                grad_input, gradient_computation_mask)
        else:
            grad_output = grad_input

        grad_output = grad_output.clamp(min=-clamping_bound,
                                        max=clamping_bound)

        # if variable_name == "mdlstm - activation_column" or variable_name == "mdlstm - new_memory_state":
        #     print("clamping gradient - " + variable_name)
        #     print("clamp_grad_and_print - grad_input: " + str(grad_input))
        #     print("clamp_grad_and_print - grad_output: " + str(grad_output))

        is_bad_gradient = False

        if InsideModelGradientClamping.is_bad_grad(grad_input):
            print("is_bad_grad - grad_input: " + str(grad_input))
            ##not util.tensor_utils.TensorUtils.tensors_are_equal(grad_input, grad_output):
            # https://stackoverflow.com/questions/900392/getting-the-caller-function-name-inside-another-function-in-python
            print("clamping gradient - " + variable_name)
            print("clamp_grad_and_print - grad_input: " + str(grad_input))
            print("clamp_grad_and_print - grad_output: " + str(grad_output))
            is_bad_gradient = True

        if InsideModelGradientClamping.is_bad_grad(grad_output):
            print("is_bad_grad - grad_output: " + str(grad_output))
            ##not util.tensor_utils.TensorUtils.tensors_are_equal(grad_input, grad_output):
            # https://stackoverflow.com/questions/900392/getting-the-caller-function-name-inside-another-function-in-python
            print("clamping gradient - " + variable_name)
            print("clamp_grad_and_print - grad_input: " + str(grad_input))
            print("clamp_grad_and_print - grad_output: " + str(grad_output))
            is_bad_gradient = True

        if is_bad_gradient:
            raise RuntimeError("Error: found bad gradient")

        return grad_output
예제 #15
0
def test_ctc_loss_probabilities_match_labels():

    ctc_loss = warpctc_pytorch.CTCLoss()
    print("expected shape of seqLength x batchSize x alphabet_size")
    probs = torch.FloatTensor([[[0.9, 1.0, 0.0, 0.0],
                                [0.1, 0.0, 1.0, 1.0]]]).\
        transpose(0, 1).contiguous()

    print("probs.size(): " + str(probs.size()))
    # No cost
    labels = Variable(torch.IntTensor([1, 1, 2, 1]))
    # No cost
    labels = Variable(torch.IntTensor([1, 1, 1, 1]))
    # Cost
    labels = Variable(torch.IntTensor([1, 2, 2, 1]))
    # No cost
    labels = Variable(torch.IntTensor([1, 1]))
    # No cost
    labels = Variable(torch.IntTensor([2, 2]))
    # Crash (Apparently must be minimally 2 elements)
    labels = Variable(torch.IntTensor([2]))
    # No cost
    labels = Variable(torch.IntTensor([3, 3]))

    label_sizes = Variable(torch.IntTensor([2]))
    # This one must be equal to the number of probabilities to avoid a crash
    probs_sizes = Variable(torch.IntTensor([2]))
    probs = Variable(
        probs,
        requires_grad=True)  # tells autograd to compute gradients for probs
    optimizer = optim.SGD(list([probs]), lr=0.001)
    print("probs: " + str(probs))
    cost = ctc_loss(probs, labels, probs_sizes, label_sizes)
    print("cost: " + str(cost))
    zero_tensor = torch.zeros(1)
    print("zeros_tensor: " + str(zero_tensor))
    if not TensorUtils.tensors_are_equal(zero_tensor, cost):
        raise RuntimeError(
            "Error: loss expected to be zero, since probabilities " +
            "are maximum for the right labels, but not the case")
    cost.backward()
    print("cost: " + str(cost))
    print("update probabilities...")
    optimizer.step()
    print("probs: " + str(probs))
예제 #16
0
    def extract_summed_rows_from_chunk_with_concatenated_rows(chunk_multiple_rows, number_of_rows,
                                                              number_of_columns):
        # print("chunk multiple rows.size(): " + str(chunk_multiple_rows.size()))

        # print("number of columns: " + str(number_of_columns) +
        #      " number of rows: " + str(number_of_rows))
        # Notice the dimension to split on is 1, as for example we have
        # chunk multiple rows.size(): torch.Size([1, 14, 80])
        # That is, the last dimension goes over classes, the first one is
        # always 1, and the second dimension goes over the width.
        # Therefore we have to split on dim=1 using the number_of_columns
        # for the tensor containing the horizontally-concatenated
        # row activations
        rows = torch.split(chunk_multiple_rows, number_of_columns, dim=1)
        if len(rows) != number_of_rows:
            raise RuntimeError("Error in split: expected " + str(number_of_rows)
                               + "rows but got: " + str(len(rows)))

        summed_rows = TensorUtils.sum_list_of_tensors(rows)
        # print("summed_rows.size(): " + str(summed_rows.size()))
        return summed_rows
    def clamp_grad_and_print(grad_input,
                             clamping_bound,
                             variable_name: str,
                             gradient_computation_mask=None):
        # print("clamping gradient - " + variable_name)
        # print("number of non-zeros: " + str(TensorUtils.number_of_non_zeros(grad_input)))
        # torch.set_printoptions(precision=10)
        # print("maximum element: " + str(torch.max(grad_input)))
        # print("sum of all elements: " + str(torch.sum(grad_input)))
        # print("tensor norm: " + str(torch.norm(grad_input) * 10000000000))
        # print("clamp_grad_and_print - grad_input: " + str(grad_input))

        # nearly_zero_element_mask = grad_input.abs().lt(0.0000000001)
        # print("nearly_zero_element_mask: " + str(nearly_zero_element_mask))
        # grad_output = grad_input
        # zero_element_indices = torch.masked_select(nearly_zero_element_mask)
        # print("zero element indices: " + str(zero_element_indices))
        # grad_output.view(-1)[zero_element_indices] = 0

        # https://stackoverflow.com/questions/45384684/
        # replace-all-nonzero-values-by-zero-and-all-zero-values-by-a-specific-value/45386834
        grad_output = grad_input.clone()
        grad_output[grad_input.abs() < 0.0000000001] = 0
        # print("grad_output: " + str(grad_output))
        # print("grad_output.size(): " + str(grad_output.size()))
        # print("number of non-zeros after: " + str(TensorUtils.number_of_non_zeros(grad_output)))

        if not (gradient_computation_mask is None):
            # print("Applying gradient computation mask " + str(gradient_computation_mask) + " to " +
            #       "grad_output: " + str(grad_input))
            grad_output = TensorUtils.apply_binary_mask(
                grad_output, gradient_computation_mask)
        else:
            grad_output = grad_output

        grad_output = grad_output.clamp(min=-clamping_bound,
                                        max=clamping_bound)

        # print("clamp_grad_and_print - grad_output: " + str(grad_output))
        return grad_output
예제 #18
0
    def compute_convolution_result_and_apply_mask(
            self, previous_state_column: torch.Tensor, mask: torch.Tensor):

        # This call seems to be causing a memory leak.
        # This seems to be the memory leak root cause, basically just the call to
        # the 2D convolution with multiple groups. Perhaps there are too many groups,
        # e.g. 28 * 2. But it is not clear how to fix this. The bug seems to happen
        # in pytorch 0.4.0 and 0.4.1 at least.
        # print("self.number_of_paired_input_weightings_per_group: " +
        #       str(self.number_of_paired_input_weightings_per_group))
        result = self.compute_convolution_result(previous_state_column)
        # self.parallel_convolution(previous_state_column)
        # result = None
        # return None

        if self.clamp_gradients:
            # print("ParallelMultipleStateWeightingsComputation - register gradient clamping...")
            # Create a 1d convolution with clamping of the gradient
            result = InsideModelGradientClamping.\
                register_gradient_clamping_default_clamping_bound(result,
                                                                  "parallel_multiple_state_weightings_Computation",
                                                                  mask)

        # It is necessary to mask the non-valid entries in the convolution result. If this
        # is not done, then the results will be "incorrect" and also when using examples packing,
        # the first row in the packed matrix will be treated differently from the first rows
        # of other examples under vertical row separators.
        # For this reason, we must mask not only the states computed for the next iteration
        # during MDLSTM computation but also for the convolution computation the entries that
        # are not valid
        # print("result.size(): " + str(result.size()))
        # print("mask.size(): " + str(mask.size()))
        # print("self.number_of_paired_input_weighting: " +
        #       str(self.get_number_of_paired_input_weightings()))
        if not (mask is None):
            result = TensorUtils.apply_binary_mask(result, mask)

        # print("compute_convolution_result - result.size():" + str(result.size()))

        return result
예제 #19
0
def test_ctc_loss_probabilities_match_labels_second_baidu_example():

    ctc_loss = warpctc_pytorch.CTCLoss()
    print("expected shape of seqLength x batchSize x alphabet_size")
    probs = torch.FloatTensor([[[1, 2, 3, 4, 5],
                                [6, 7, 8, 9, 10],
                                [11, 12, 13, 14, 15]]]).\
        transpose(0, 1).contiguous()

    probs.requires_grad_(True)

    print("probs.size(): " + str(probs.size()))

    labels = Variable(torch.IntTensor([3, 3]))
    # Labels sizes should be equal to number of labels
    label_sizes = Variable(torch.IntTensor([2]))
    # This one must be equal to the number of probabilities to avoid a crash
    probs_sizes = Variable(torch.IntTensor([3]))
    probs = Variable(
        probs,
        requires_grad=True)  # tells autograd to compute gradients for probs
    optimizer = optim.SGD(list([probs]), lr=0.001)
    print("probs: " + str(probs))
    cost = ctc_loss(probs, labels, probs_sizes, label_sizes)
    # cost: tensor([ 7.3557]) as in the Baidu tutorial, second example
    print("cost: " + str(cost))
    expected_cost_tensor = torch.FloatTensor([7.355742931365967])
    print("zeros_tensor: " + str(expected_cost_tensor))
    if not TensorUtils.tensors_are_equal(expected_cost_tensor, cost):
        raise RuntimeError("Error: cost expected to be " +
                           str(expected_cost_tensor) + "but was:" +
                           str((float(cost))))
    cost.backward()
    print("cost: " + str(cost))
    print("update probabilities...")
    optimizer.step()
    print("probs: " + str(probs))
예제 #20
0
def test_tensor_flipping_twice_retrieves_original():
    # a = torch.Tensor([range(1, 25)]).view(1, 2, 3, 4)
    a = torch.Tensor([range(1, 10)]).view(3, 3)
    print("a: " + str(a))

    flipping_tuples = list([])
    flipping_tuples.append((True, False))
    flipping_tuples.append((True, True))
    flipping_tuples.append((False, True))

    for flipping_tuple in flipping_tuples:
        print(">>> flip height: " + str(flipping_tuple[0]) + ", flip width: " + str(flipping_tuple[1]))
        tensor_flipping = TensorFlipping.create_tensor_flipping(flipping_tuple[0],
                                                                flipping_tuple[1])
        a_flipped = tensor_flipping.flip(a)
        print("a_flipped: " + str(a_flipped))
        a_flipped_back = tensor_flipping.flip(a_flipped)
        print("a_flipped_back: " + str(a_flipped_back))
        # a_flipped_back = torch.zeros(3, 3)

        if not TensorUtils.tensors_are_equal(a, a_flipped_back):
            raise RuntimeError("Error: original tensor:\n " +
                               str(a) + " and flipped, then flipped back tensor:\n " +
                               str(a_flipped_back) + " are not equal")