示例#1
0
    def create_transformed_images_row(row_number: int,
                                      number_of_image_tensors: int,
                                      number_of_channels: int, width: int,
                                      transformed_images_width,
                                      image_tensors_row, device):
        leading_zeros = row_number
        tailing_zeros = transformed_images_width - width - row_number

        if leading_zeros > 0:

            if Utils.use_cuda():
                with torch.cuda.device(device):
                    # creating the zeros directly on the gpu, which is faster
                    # See: https://discuss.pytorch.org/t/creating-tensors-on-gpu-directly/2714/5

                    leading_zeros_tensor = torch.cuda.FloatTensor(
                        number_of_image_tensors, number_of_channels,
                        leading_zeros).fill_(0)
            else:

                leading_zeros_tensor = torch.zeros(number_of_image_tensors,
                                                   number_of_channels,
                                                   leading_zeros)

            # print("leading_zeros_tensor.size()" + str(leading_zeros_tensor.size()))

            #new_row = torch.cat((leading_zeros_tensor,
            #                     image_tensors[:, :, row_number, :]), 2)
            new_row = torch.cat((leading_zeros_tensor, image_tensors_row), 2)
        else:
            # new_row = image_tensors[:, :, row_number, :]
            new_row = image_tensors_row

        if tailing_zeros > 0:
            # print("number of channels: " + str(number_of_channels))

            if Utils.use_cuda():

                with torch.cuda.device(device):
                    # creating the zeros directly on the gpu, which is faster
                    # See: https://discuss.pytorch.org/t/creating-tensors-on-gpu-directly/2714/5

                    tailing_zeros_tensor = torch.\
                        cuda.FloatTensor(number_of_image_tensors, number_of_channels, tailing_zeros).fill_(0)
            else:
                tailing_zeros_tensor = torch.zeros(number_of_image_tensors,
                                                   number_of_channels,
                                                   tailing_zeros)

            # print("new_row.size(): " + str(new_row.size()))
            # print("tailing_zeros_tensor.size(): " + str(tailing_zeros_tensor.size()))
            new_row = torch.cat((new_row, tailing_zeros_tensor), 2)
        return new_row
示例#2
0
    def create_row_diagonal_offset_tensors_parallel_using_split(image_tensors):

        if Utils.use_cuda():
            # https://discuss.pytorch.org/t/which-device-is-model-tensor-stored-on/4908/7
            device = image_tensors.get_device()

        # See: https://stackoverflow.com/questions/46826218/pytorch-how-to-get-the-shape-of-a-tensor-as-a-list-of-int

        # print("list(image_tensor.size()): " + str(list(image_tensors.size())))
        # See: https://discuss.pytorch.org/t/indexing-a-2d-tensor/1667/2
        number_of_channels = image_tensors.size(1)
        # height = image_tensors.size(2)
        width = image_tensors.size(3)
        # print("height: " + str(height))
        # print("width: " + str(width))

        number_of_image_tensors = image_tensors.size(0)
        # print("number of image tensors: " + str(number_of_image_tensors))
        # The width of the transformed images is width+height-1 (important for unequal sized input_
        # transformed_images = torch.zeros(number_of_image_tensors, number_of_channels, height, (width + height) - 1)
        # print("transformed_image: " + str(transformed_image))
        # print("transformed_im   age.size(): " + str(transformed_image.size()))

        # The width of the transformed images is width+height-1 (important for unequal sized input_
        transformed_images_width = ImageInputTransformer.get_skewed_images_width_four_dimensional_tensor(
            image_tensors)

        # In one go with split and cat on entire list

        list_for_cat = list([])
        row_number = 0
        for image_tensors_row in torch.split(image_tensors, 1, 2):
            # print("before - image_tensors_row.size(): " + str(image_tensors_row.size()))
            image_tensors_row = image_tensors_row.squeeze(2)
            # print("after - image_tensors_row.size(): " + str(image_tensors_row.size()))
            new_row = ImageInputTransformer. \
                create_transformed_images_row(row_number, number_of_image_tensors,
                                              number_of_channels,
                                              width, transformed_images_width, image_tensors_row, device)
            # print("before - new_row.size(): " + str(new_row.size()))
            new_row = new_row.unsqueeze(2)
            # print("after - new_row.size(): " + str(new_row.size()))
            # print("new row.size(): " + str(new_row.size()))
            # print("transformed_image[:, :, y, :].size()" + str(transformed_images[:, :, y, :].size()))
            #  transformed_images[:, :, y, :] = new_row

            # Use torch.cat instead of copying of a tensor slice into a zeros tensor.
            # torch.cat clearly preserves the backward gradient pointer, but with
            # copying to a zeros tensor it is not quite clear if this happens
            list_for_cat.append(new_row)
            row_number += 1
        transformed_images = torch.cat(list_for_cat, 2)

        # print("create_row_diagonal_offset_tensor: transformed_images.grad_fn: " + str(transformed_images.grad_fn))
        # print("transformed_images.size(): " + str(transformed_images.size()))
        return transformed_images
示例#3
0
    def dechunk_block_tensor_concatenated_along_batch_dimension_breaks_gradient(
            self, tensor: torch.tensor):
        number_of_examples = int(
            tensor.size(0) / self.number_of_feature_blocks_per_example)

        # print(">>> dechunk_block_tensor_concatenated_along_batch_dimension: - tensor.grad_fn "
        #       + str(tensor.grad_fn))

        # print("tensor.size(): " + str(tensor.size()))
        channels = tensor.size(1)

        tensor_grouped_by_block = tensor.view(
            self.number_of_feature_blocks_per_example, number_of_examples,
            channels, self.block_size.height, self.block_size.width)

        result = torch.zeros(number_of_examples, channels,
                             self.original_size.height,
                             self.original_size.width)

        # print("tensor.nelement(): " + str(tensor.nelement()))
        # print("resuls.nelement(): " + str(result.nelement()))
        if Utils.use_cuda():
            # https://discuss.pytorch.org/t/which-device-is-model-tensor-stored-on/4908/7
            device = tensor.get_device()
            result = result.to(device)

        # print("tensor_grouped_by_block.size(): " + str(tensor_grouped_by_block.size()))
        for block_index in range(0, tensor_grouped_by_block.size(0)):
            # print("i: " + str(block_index))

            height_span_begin, height_span_end = self.height_span(block_index)
            width_span_begin, width_span_end = self.width_span(block_index)
            # print("height_span: " + str(height_span_begin) + ":"  + str(height_span_end))
            # print("width_span: " + str(width_span_begin) + ":" + str(width_span_end))

            # print("tensor_grouped_by_block[block_index, :, :, :]:" + str(
            #    tensor_grouped_by_block[block_index, :, :, :]))

            # Fixme: possibly copying like this destroys the gradient, as the grad_fn function of result
            # shows" result.grad_fn <CopySlices object at 0x7f211cbfa208>
            # instead of something like "<TanhBackward object" , "<CatBackward object"...
            # Probably "cat" should be used to reconstruct the original configuration
            # row by row. This was used previously also in the "extract_unskewed_activations"
            # function
            result[:, :, height_span_begin:height_span_end,
            width_span_begin:width_span_end] = \
                tensor_grouped_by_block[block_index, :, :, :]

        # print(">>> dechunk_block_tensor_concatenated_along_batch_dimension: - result.grad_fn "
        #      + str(result.grad_fn))

        return result
 def get_shifted_column(previous_state_column, hidden_states_size: int):
     previous_memory_state_column_shifted = previous_state_column.clone()
     height = previous_state_column.size(2)
     zeros_padding = Variable(torch.zeros(previous_state_column.size(0), hidden_states_size, 1))
     if Utils.use_cuda():
         zeros_padding = zeros_padding.cuda()
     skip_first_sub_tensor = previous_memory_state_column_shifted[:, :, 0:(height - 1)]
     # print("zeros padding" + str(zeros_padding))
     # print("skip_first_sub_tensor: " + str(skip_first_sub_tensor))
     previous_memory_state_column_shifted = torch. \
         cat((zeros_padding, skip_first_sub_tensor), 2)
     # print("Returning previous_memory_state_column_shifted: " + str(previous_memory_state_column_shifted))
     return previous_memory_state_column_shifted
示例#5
0
    def compute_ctc_loss_version_two(self, probabilities, labels_row_tensor):
        ctc_loss = warpctc_pytorch.CTCLoss()

        #probs = torch.FloatTensor([
        #    [[0, 0, 0, 0, 0], [1, 2, 3, 4, 5], [-5, -4, -3, -2, -1]],
        #    [[0, 0, 0, 0, 0], [6, 7, 8, 9, 10], [-10, -9, -8, -7, -6]],
        #    [[0, 0, 0, 0, 0], [11, 12, 13, 14, 15], [-15, -14, -13, -12, -11]]
        #])

        probs = probabilities

        print(
            "test_ctc_loss_probabilities_match_labels_third_baidu_example - probs: "
            + str(probs))

        print(
            "test_ctc_loss_probabilities_match_labels_third_baidu_example - probs.size(): "
            + str(probs.size()))

        # labels = Variable(torch.IntTensor([ [1, 0], [3, 3], [2, 3]]))
        # See: https://github.com/SeanNaren/warp-ctc/issues/29
        # All label sequences are concatenated, without blanks/padding,
        # and label sizes lists the sizes without padding
        labels = Variable(torch.IntTensor([1, 3, 3, 2, 3]))
        # labels = Variable(torch.IntTensor([2, 3]))
        # labels = Variable(torch.IntTensor([3, 3]))
        # Labels sizes should be equal to number of labels
        label_sizes = Variable(torch.IntTensor([1, 2, 2]))
        # label_sizes = Variable(torch.IntTensor([2]))
        # This one must be equal to the number of probabilities to avoid a crash
        probs_sizes = Variable(torch.IntTensor([1, 3, 3]))
        # probs_sizes = Variable(torch.IntTensor([3]))
        probs = Variable(probs, requires_grad=True
                         )  # tells autograd to compute gradients for probs
        print("probs: " + str(probs))

        if Utils.use_cuda():
            probs = probs.cuda()
            device = probs.get_device()
            ctc_loss = ctc_loss.cuda()
            # labels = labels.cuda()
            # label_sizes = label_sizes.cuda()
            # probs_sizes = probs_sizes.cuda()

        loss = ctc_loss(probs, labels, probs_sizes, label_sizes)
        print("loss: " + str(loss))

        return loss
示例#6
0
    def check_inputs_is_right_type(inputs, input_is_list: bool):
        if Utils.use_cuda():
            expected_type_instance = torch.cuda.ByteTensor()
        else:
            expected_type_instance = torch.ByteTensor()

        # If inputs is a list, check the first element of the list
        if input_is_list:
            item_to_compare = inputs[0]
        else:
            item_to_compare = inputs

        if item_to_compare.type() != expected_type_instance.type():
            raise RuntimeError("Error: expected a " +
                               str(expected_type_instance.type()) +
                               " type image tensor" + " but got : " +
                               str(item_to_compare.type()))
def test_tensor_list_block_chunking_followed_by_dechunking_reconstructs_original(
        tensor_one, tensor_two, block_size,
        tensors_all_have_same_height: bool):

    if Utils.use_cuda():
        tensor_one = tensor_one.cuda()
        tensor_two = tensor_two.cuda()

    print("tensor_one: " + str(tensor_one))
    print("tensor_two: " + str(tensor_two))
    #print("tensor_one[0,  :, :]: " + str(tensor_one[0, :, :]))
    #print("tensor_two[0,  :, :]: " + str(tensor_two[0, :, :]))

    tensor_list = list([tensor_one, tensor_two])
    tensor_chunking = TensorListChunking.create_tensor_list_chunking(
        tensor_list, block_size)
    chunking = tensor_chunking.\
        chunk_tensor_list_into_blocks_concatenate_along_batch_dimension(tensor_list,
                                                                        tensors_all_have_same_height)
    print("chunking: " + str(chunking))
    print("chunking.size(): " + str(chunking.size()))
    dechunked_tensor_list = tensor_chunking.\
        dechunk_block_tensor_concatenated_along_batch_dimension_changed_block_size(chunking, block_size)

    print("dechunked_tensor_list: " + str(dechunked_tensor_list))

    # https://stackoverflow.com/questions/32996281/how-to-check-if-two-torch-tensors-or-matrices-are-equal
    # https://discuss.pytorch.org/t/tensor-math-logical-operations-any-and-all-functions/6624
    for tensor_original, tensor_reconstructed in zip(tensor_list,
                                                     dechunked_tensor_list):
        tensors_are_equal = torch.eq(tensor_original,
                                     tensor_reconstructed).all()
        print("tensors_are_equal: " + str(tensors_are_equal))
        if not tensors_are_equal:
            raise RuntimeError("Error: original tensor " +
                               str(tensor_original) +
                               " and dechunked tensor " +
                               str(tensor_reconstructed) + " are not equal")
        else:
            print(
                "Success: original tensor and dechunked(chunked(tensor)) are equal"
            )
示例#8
0
    def create_skewed_images_variable_four_dim(x):
        # skewed_images = ImageInputTransformer.create_row_diagonal_offset_tensors(x)

        ### Not clear if this method really causes the gradient to break or not.
        # skewed_images = ImageInputTransformer.\
        #    create_row_diagonal_offset_tensors_parallel_breaks_gradient(x)

        skewed_images = ImageInputTransformer. \
            create_row_diagonal_offset_tensors(x)

        # print("skewed images columns: " + str(skewed_images_columns))
        # print("skewed images rows: " + str(skewed_images_rows))
        # print("skewed_images: " + str(skewed_images))
        # See: https://pytorch.org/docs/stable/tensors.html

        if Utils.use_cuda():
            # https://discuss.pytorch.org/t/which-device-is-model-tensor-stored-on/4908/7
            device = x.get_device()
            skewed_images = skewed_images.to(device)
        return skewed_images
示例#9
0
    def chunk_tensor_into_blocks_concatenate_along_batch_dimension_no_cat(
            self, tensor: torch.tensor):

        tensor_split_on_height = torch.split(tensor, self.block_size.height, 2)

        # New implementation: completely without use of cat
        # https://discuss.pytorch.org/t/best-way-to-split-process-merge/18702
        total_blocks = self.blocks_per_column * self.blocks_per_row
        batch_size = tensor.size(0)
        # The height in the batch dimension must be such that it fits all stacked
        # blocks, i.e. stacked in a single column, and also keeping the batch dimension
        height_in_batch_dimension = total_blocks * batch_size
        print("height in batch dimension: " + str(height_in_batch_dimension))

        if Utils.use_cuda():
            device = tensor.get_device()
            with torch.cuda.device(device):
                # creating the zeros directly on the gpu, which is faster
                # See: https://discuss.pytorch.org/t/creating-tensors-on-gpu-directly/2714/5
                result = torch.cuda.FloatTensor(height_in_batch_dimension,
                                                tensor.size(1),
                                                self.block_size.height,
                                                self.block_size.width).fill_(0)
        else:
            result = torch.FloatTensor(height_in_batch_dimension,
                                       tensor.size(1), self.block_size.height,
                                       self.block_size.width).fill_(0)
        index = 0
        for row_block in tensor_split_on_height:
            blocks = torch.split(row_block, self.block_size.width, 3)
            for column_block in blocks:
                # print("column_block.size(): " + str(column_block.size()))
                # print("result.size(): " + str(result.size()))
                # print("result slice.size() : " +
                #      str(result[index * batch_size:((index + 1) * batch_size),
                #                 :, :, :].size())
                #      )
                # https://discuss.pytorch.org/t/best-way-to-split-process-merge/18702
                result[index * batch_size:((index + 1) *
                                           batch_size), :, :, :] = column_block
        return result
示例#10
0
def test_tensor_block_chunking_followed_by_dechunking_reconstructs_original():
    tensor = torch.Tensor([range(1, 97)]).view(2, 2, 4, 6)

    if Utils.use_cuda():
        tensor = tensor.cuda()

    print(tensor)
    print("tensor[0, 0, :, :]: " + str(tensor[0, 0, :, :]))
    # chunking = chunk_tensor_into_blocks_return_as_list(
    #     tensor, SizeTwoDimensional.create_size_two_dimensional(2, 2))
    # print("chunking: " + str(chunking))
    # for item in chunking:
    #     print("item.size(): " + str(item.size()))
    original_size = SizeTwoDimensional.create_size_two_dimensional(4, 6)
    block_size = SizeTwoDimensional.create_size_two_dimensional(2, 2)
    tensor_chunking = TensorChunking.create_tensor_chunking(
        original_size, block_size)
    chunking = tensor_chunking.chunk_tensor_into_blocks_concatenate_along_batch_dimension(
        tensor)
    print("chunking: " + str(chunking))
    print("chunking.size(): " + str(chunking.size()))
    dechunked_tensor = tensor_chunking.dechunk_block_tensor_concatenated_along_batch_dimension(
        chunking)

    print("dechunked_tensor: " + str(dechunked_tensor))

    # https://stackoverflow.com/questions/32996281/how-to-check-if-two-torch-tensors-or-matrices-are-equal
    # https://discuss.pytorch.org/t/tensor-math-logical-operations-any-and-all-functions/6624
    tensors_are_equal = torch.eq(tensor, dechunked_tensor).all()
    print("tensors_are_equal: " + str(tensors_are_equal))
    if not tensors_are_equal:
        raise RuntimeError("Error: original tensor " + str(tensor) +
                           " and dechunked tensor " + str(dechunked_tensor) +
                           " are not equal")
    else:
        print(
            "Success: original tensor and dechunked(chunked(tensor)) are equal"
        )
示例#11
0
    def create_row_diagonal_offset_tensors_parallel_breaks_gradient(
            image_tensors):

        if Utils.use_cuda():
            # https://discuss.pytorch.org/t/which-device-is-model-tensor-stored-on/4908/7
            device = image_tensors.get_device()

        number_of_channels = image_tensors.size(1)
        height = image_tensors.size(2)
        width = image_tensors.size(3)

        number_of_image_tensors = image_tensors.size(0)

        transformed_images = torch.zeros(
            number_of_image_tensors, number_of_channels, height,
            ImageInputTransformer.
            get_skewed_images_width_four_dimensional_tensor(image_tensors))

        for y in range(image_tensors.size(2)):
            leading_zeros = y
            tailing_zeros = transformed_images.size(3) - width - y

            if leading_zeros > 0:

                # To get a sub-tensor with everything from the 0th and 3th dimension,
                # and specific values for the 1th  and 2nd dimension you use
                # image_tensors[:, 0, y, :]
                # See:
                # https://stackoverflow.com/questions/47374172/how-to-select-index-over-two-dimension-in-pytorch?rq=1
                leading_zeros_tensor = torch.zeros(number_of_image_tensors,
                                                   number_of_channels,
                                                   leading_zeros)
                if Utils.use_cuda():
                    leading_zeros_tensor = leading_zeros_tensor.to(device)

                # print("leading_zeros_tensor.size()" + str(leading_zeros_tensor.size()))

                new_row = torch.cat(
                    (leading_zeros_tensor, image_tensors[:, :, y, :]), 2)
            else:
                new_row = image_tensors[:, :, y, :]

            if tailing_zeros > 0:
                # print("number of channels: " + str(number_of_channels))
                tailing_zeros_tensor = torch.zeros(number_of_image_tensors,
                                                   number_of_channels,
                                                   tailing_zeros)
                if Utils.use_cuda():
                    tailing_zeros_tensor = tailing_zeros_tensor.to(device)

                # print("new_row.size(): " + str(new_row.size()))
                # print("tailing_zeros_tensor.size(): " + str(tailing_zeros_tensor.size()))
                new_row = torch.cat((new_row, tailing_zeros_tensor), 2)
            # print("new row.size(): " + str(new_row.size()))
            # print("transformed_image[:, :, y, :].size()" + str(transformed_images[:, :, y, :].size()))
            transformed_images[:, :, y, :] = new_row

        # This method creates CopySlices objects as gradients. Not clear if this is ok.
        # It may be harmless, but seems to be slower in any case
        # Something can be found about CopySlices at
        # https://github.com/pytorch/pytorch/blob/master/torch/csrc/autograd/functions/tensor.cpp
        # but this is not also very conclusive
        print(
            "create_row_diagonal_offset_tensor_parallel_breaks_gradient: transformed_images.grad_fn: "
            + str(transformed_images.grad_fn))
        # print("transformed_images.size(): " + str(transformed_images.size()))
        return transformed_images
def train_mdrnn(train_loader, test_loader, input_channels: int,
                input_size: SizeTwoDimensional, hidden_states_size: int,
                batch_size, compute_multi_directional: bool,
                use_dropout: bool):
    import torch.optim as optim

    criterion = nn.CrossEntropyLoss()
    #multi_dimensional_rnn = MultiDimensionalRNN.create_multi_dimensional_rnn(hidden_states_size,
    #                                                                         batch_size,
    #                                                                         compute_multi_directional,
    #                                                                         nonlinearity="sigmoid")
    #multi_dimensional_rnn = MultiDimensionalRNNFast.create_multi_dimensional_rnn_fast(hidden_states_size,
    #                                                                                  batch_size,
    #                                                                                  compute_multi_directional,
    #                                                                                  use_dropout,
    #                                                                                  nonlinearity="sigmoid")

    #multi_dimensional_rnn = MultiDimensionalLSTM.create_multi_dimensional_lstm(hidden_states_size,
    #                                                                           batch_size,
    #                                                                           compute_multi_directional,
    #                                                                           use_dropout,
    #                                                                           nonlinearity="sigmoid")

    # http://pytorch.org/docs/master/notes/cuda.html
    device = torch.device("cuda:0")
    # device_ids should include device!
    # device_ids lists all the gpus that may be used for parallelization
    # device is the initial device the model will be put on
    #device_ids = [0, 1]
    device_ids = [0]

    # multi_dimensional_rnn = MultiDimensionalLSTM.create_multi_dimensional_lstm_fast(input_channels,
    #                                                                                 hidden_states_size,
    #                                                                                 compute_multi_directional,
    #                                                                                 use_dropout,
    #                                                                                 nonlinearity="sigmoid")

    mdlstm_block_size = SizeTwoDimensional.create_size_two_dimensional(4, 4)
    # multi_dimensional_rnn = BlockMultiDimensionalLSTM.create_block_multi_dimensional_lstm(input_channels,
    #                                                                                       hidden_states_size,
    #                                                                                       mdlstm_block_size,
    #                                                                                       compute_multi_directional,
    #                                                                                       use_dropout,
    #                                                                                       nonlinearity="sigmoid")
    #
    # block_strided_convolution_block_size = SizeTwoDimensional.create_size_two_dimensional(4, 4)
    # output_channels = mdlstm_block_size.width * mdlstm_block_size.height * hidden_states_size
    # multi_dimensional_rnn = BlockMultiDimensionalLSTMLayerPair.\
    #     create_block_multi_dimensional_lstm_layer_pair(input_channels, hidden_states_size,
    #                                                    output_channels, mdlstm_block_size,
    #                                                    block_strided_convolution_block_size,
    #                                                    compute_multi_directional,
    #                                                    use_dropout,
    #                                                    nonlinearity="tanh")

    # # An intermediate test case with first a layer-pair that consists of a
    # # BlockMultiDimensionalLSTM layer, followed by a BlockStructuredConvolution layer.
    # # After this comes an additional single block_strided_convolution layer as
    # # opposed to another full layer pair
    # mdlstm_block_size = SizeTwoDimensional.create_size_two_dimensional(4, 4)
    # block_strided_convolution_block_size = SizeTwoDimensional.create_size_two_dimensional(4, 4)
    # multi_dimensional_rnn = BlockMultiDimensionalLSTMLayerPairStacking.\
    #     create_one_layer_pair_plus_second_block_convolution_layer_network(hidden_states_size, mdlstm_block_size,
    #                                                                       block_strided_convolution_block_size)

    # # An intermediate test case with first a layer-pair that consists of a
    # # BlockMultiDimensionalLSTM layer, followed by a BlockStructuredConvolution layer.
    # # After this comes an additional single mdlstm layer as
    # # opposed to another full layer pair
    # mdlstm_block_size = SizeTwoDimensional.create_size_two_dimensional(4, 4)
    # block_strided_convolution_block_size = SizeTwoDimensional.create_size_two_dimensional(4, 4)
    # multi_dimensional_rnn = BlockMultiDimensionalLSTMLayerPairStacking.\
    #     create_one_layer_pair_plus_second_block_mdlstm_layer_network(hidden_states_size, mdlstm_block_size,
    #                                                                       block_strided_convolution_block_size)
    #
    mdlstm_block_size = SizeTwoDimensional.create_size_two_dimensional(4, 2)
    block_strided_convolution_block_size = SizeTwoDimensional.create_size_two_dimensional(
        4, 2)
    multi_dimensional_rnn = MultiDimensionalLSTMLayerPairStacking.\
        create_two_layer_pair_network(hidden_states_size, mdlstm_block_size,
                                      block_strided_convolution_block_size, False)

    network = MultiDimensionalRNNToSingleClassNetwork.\
        create_multi_dimensional_rnn_to_single_class_network(multi_dimensional_rnn, input_size)

    #multi_dimensional_rnn = Net()

    if Utils.use_cuda():
        #multi_dimensional_rnn = multi_dimensional_rnn.cuda()

        network = nn.DataParallel(network, device_ids=device_ids)

        network.to(device)
        #print("multi_dimensional_rnn.module.mdlstm_direction_one_parameters.parallel_memory_state_column_computation :"
        #      + str(multi_dimensional_rnn.module.mdlstm_direction_one_parameters.parallel_memory_state_column_computation))

        #print("multi_dimensional_rnn.module.mdlstm_direction_one_parameters."
        #      "parallel_memory_state_column_computation.parallel_convolution.bias :"
        #      + str(multi_dimensional_rnn.module.mdlstm_direction_one_parameters.
        #            parallel_memory_state_column_computation.parallel_convolution.bias))

        #print("multi_dimensional_rnn.module.mdlstm_direction_one_parameters."
        #      "parallel_hidden_state_column_computation.parallel_convolution.bias :"
        #      + str(multi_dimensional_rnn.module.mdlstm_direction_one_parameters.
        #            parallel_hidden_state_column_computation.parallel_convolution.bias))

    print_number_of_parameters(multi_dimensional_rnn)

    #optimizer = optim.SGD(multi_dimensional_rnn.parameters(), lr=0.001, momentum=0.9)

    # Adding some weight decay seems to do magic, see: http://pytorch.org/docs/master/optim.html
    optimizer = optim.SGD(network.parameters(),
                          lr=0.001,
                          momentum=0.9,
                          weight_decay=1e-5)

    # Faster learning
    #optimizer = optim.SGD(multi_dimensional_rnn.parameters(), lr=0.01, momentum=0.9)

    start = time.time()

    num_gradient_corrections = 0

    for epoch in range(4):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):

            # get the inputs
            inputs, labels = data

            if Utils.use_cuda():
                inputs = inputs.to(device)
                # Set requires_grad(True) directly and only for the input
                inputs.requires_grad_(True)

            # wrap them in Variable
            # labels = Variable(labels)  # Labels need no gradient apparently
            if Utils.use_cuda():
                labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            #print("inputs: " + str(inputs))

            # forward + backward + optimize
            #outputs = multi_dimensional_rnn(Variable(inputs))  # For "Net" (Le Net)
            time_start_network_forward = time.time()
            outputs = network(inputs)
            # print("Time used for network forward: " + str(util.timing.time_since(time_start_network_forward)))
            # print("outputs: " + str(outputs))
            # print("outputs.size(): " + str(outputs.size()))
            #print("labels: " + str(labels))

            time_start_loss_computation = time.time()
            loss = criterion(outputs, labels)
            # print("Time used for loss computation: " + str(util.timing.time_since(time_start_loss_computation)))

            time_start_loss_backward = time.time()

            get_dot = modules.find_bad_gradients.register_hooks(outputs)
            loss.backward()
            dot = get_dot()
            dot.save('mdlstm_find_bad_gradients.dot')
            render('dot', 'png', 'mdlstm_find_bad_gradients.dot')
            raise RuntimeError("stopping after find bad gradients")

            # print("Time used for loss backward: " + str(util.timing.time_since(time_start_loss_backward)))

            # Perform gradient clipping
            made_gradient_norm_based_correction = clip_gradient(
                multi_dimensional_rnn)
            if made_gradient_norm_based_correction:
                num_gradient_corrections += 1

            optimizer.step()

            # print statistics
            # print("loss.data: " + str(loss.data))
            # print("loss.data[0]: " + str(loss.data[0]))
            running_loss += loss.data
            #if i % 2000 == 1999:  # print every 2000 mini-batches
            # See: https://stackoverflow.com/questions/5598181/python-multiple-prints-on-the-same-line
            #print(str(i)+",", end="", flush=True)
            if i % 100 == 99:  # print every 100 mini-batches
                end = time.time()
                running_time = end - start
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 100) +
                      " Running time: " + str(running_time))
                print("Number of gradient norm-based corrections: " +
                      str(num_gradient_corrections))
                running_loss = 0.0
                num_gradient_corrections = 0

    print('Finished Training')

    # Run evaluation
    # multi_dimensional_rnn.set_training(False) # Normal case
    network.module.set_training(False)  # When using DataParallel
    evaluate_mdrnn(test_loader, network, batch_size, device)
示例#13
0
    def compute_ctc_loss(self, probabilities, labels_row_tensor,
                         batch_size: int, width_reduction_factor: int):

        WarpCTCLossInterface.check_labels_row_tensor_contains_no_zeros(
            labels_row_tensor)

        labels = Variable(
            WarpCTCLossInterface.
            create_one_dimensional_labels_tensor_removing_padding_labels(
                labels_row_tensor))
        # label_sizes = Variable(WarpCTCLossInterface.\
        #    create_sequence_lengths_specification_tensor_all_same_length(labels_row_tensor))
        label_sizes = Variable(WarpCTCLossInterface.\
                               create_sequence_lengths_specification_tensor_different_lengths(labels_row_tensor))
        # probabilities_sizes = Variable(WarpCTCLossInterface.\
        #                               create_probabilities_lengths_specification_tensor_all_same_length(probabilities))
        # print("labels sizes: " + str(label_sizes))
        probabilities_sizes = \
            Variable(WarpCTCLossInterface.
                     create_probabilities_lengths_specification_tensor_different_lengths(
                        labels_row_tensor, width_reduction_factor, probabilities))

        # The ctc_loss interface expects the second dimension to be the batch size,
        # so the first and second dimension must be swapped
        probabilities_batch_second_dimension = probabilities.transpose(
            0, 1).contiguous()

        if Utils.use_cuda():
            device = probabilities.get_device()
            self.ctc_loss = self.ctc_loss.to(device)
            # self.ctc_loss = self.ctc_loss.cuda()
            # https://discuss.pytorch.org/t/which-device-is-model-tensor-stored-on/4908/7
            # device = probabilities.get_device()
            # Causes "Process finished with exit code 139 (interrupted by signal 11: SIGSEGV)"
            # labels = labels.cuda()

            #probabilities_batch_second_dimension = torch.zeros(probabilities_batch_second_dimension.size(0),
            #                                                   probabilities_batch_second_dimension.size(1),
            #                                                   probabilities_batch_second_dimension.size(2),
            #                                                   requires_grad=True
            #                                                )

            probabilities_batch_second_dimension = probabilities_batch_second_dimension.cuda(
            )

            # probabilities_batch_second_dimension = probabilities_batch_second_dimension.to(device)

            # print("probabilities_batch_second_dimension.requires_grad:" +
            #      str(probabilities_batch_second_dimension.requires_grad))

            #probabilities_sizes = Variable(torch.IntTensor([1, 3, 3]))
            #probabilities_sizes = probabilities_sizes.to(device)

        # print("probabilities_batch_second_dimension: " + str(probabilities_batch_second_dimension))

        # print("probabilities_sizes: " + str(probabilities_sizes))

        # print(">>> compute_ctc_loss - probabilities_batch_second_dimension.size(): "
        #      + str(probabilities_batch_second_dimension.size()))
        # print(">>> compute_ctc_loss - labels.size(): " + str(labels.size()))
        # print(">>> compute_ctc_loss - label_sizes.size(): " + str(label_sizes.size()))
        # print(">>> compute_ctc_loss - probabilities_sizes.size(): " + str(probabilities_sizes.size()))
        # print("label_sizes: " + str(label_sizes))
        # print("labels: " + str(labels))
        # print("probabilities_sizes: " + str(probabilities_sizes))

        # Sanity check: the batch size must be the right dimension of the probabilities
        # tensor, otherwise the ctc_loss function will give wrong results and or
        # crash.
        if probabilities_batch_second_dimension.size(1) != batch_size:
            raise RuntimeError(
                "Error: the second dimension of probabilities_batch_second_dimension "
                + "should equal batch_size " + str(batch_size) + " but is " +
                str(probabilities_batch_second_dimension.size(1)))

        # print("compute_ctc_loss - probabilities_sizes: " + str(probabilities_sizes))
        # print("compute_ctc_loss - labels: " + str(labels))
        # print("compute_ctc_loss - label_sizes: " + str(label_sizes))

        loss = self.ctc_loss(probabilities_batch_second_dimension, labels,
                             probabilities_sizes, label_sizes)

        # print(">>> compute_ctc_loss - loss: " + str(loss))

        return loss
示例#14
0
    def evaluate_mdrnn(test_loader, multi_dimensional_rnn, device,
                       vocab_list: list, blank_symbol: str, horizontal_reduction_factor: int,
                       image_input_is_unsigned_int: bool, input_is_list: bool,
                       language_model_parameters: LanguageModelParameters,
                       save_score_table_file_path: str, epoch_number: int, epoch_statistics: EpochStatistics):

        correct = 0
        total = 0

        output_strings = list([])
        reference_labels_strings = list([])

        for data in test_loader:
            inputs, labels = data

            if Utils.use_cuda():
                labels = labels.to(device)

                if input_is_list:
                    inputs = Utils.move_tensor_list_to_device(inputs, device)
                else:
                    inputs = inputs.to(device)

            # If the image input comes in the form of unsigned ints, they need to
            # be converted to floats (after moving to GPU, i.e. directly on GPU
            # which is faster)
            if image_input_is_unsigned_int:
                Trainer.check_inputs_is_right_type(inputs, input_is_list)
                inputs = IamLinesDataset.convert_unsigned_int_image_tensor_or_list_to_float_image_tensor_or_list(inputs)

            # https://github.com/pytorch/pytorch/issues/235
            # Running the evaluation without computing gradients is the recommended way
            # since this saves time, and more importantly, memory
            with torch.no_grad():

                # outputs = multi_dimensional_rnn(Variable(inputs))  # For "Net" (Le Net)
                max_input_width = NetworkToSoftMaxNetwork.get_max_input_width(inputs)
                outputs = multi_dimensional_rnn(inputs, max_input_width)

                probabilities_sum_to_one_dimension = 2
                # Outputs is the output of the linear layer which is the input to warp_ctc
                # But to get probabilities for the decoder, the softmax function needs to
                # be applied to the outputs
                probabilities = torch.nn.functional. \
                    softmax(outputs, probabilities_sum_to_one_dimension)

                # No longer necessary with fixed word separator specification in decoder
                # and normal language model
                # probabilities = Evaluator.append_preceding_word_separator_to_probabilities(
                #    probabilities, vocab_list, Evaluator.WORD_SEPARATOR_SYMBOL)

                print(">>> evaluate_mdrnn  - outputs.size: " + str(outputs.size()))
                print(">>> evaluate_mdrnn  - probabilities.size: " + str(probabilities.size()))

                # beam_size = 20   # This is the problem perhaps...
                # beam_size = 100  # The normal default is 100
                beam_size = Evaluator.BEAM_SIZE  # Larger value to see if it further improves results
                # This value specifies the number of (character) probabilities kept in the
                # decoder. If it is set equal or larger to the number of characters in the
                # vocabulary, no pruning is done for it
                cutoff_top_n = len(vocab_list)  # No pruning for this parameter
                print(">>> evaluate_mdrnn  - len(vocab_list): " + str(len(vocab_list)))
                decoder = Evaluator.create_decoder(vocab_list,  cutoff_top_n, beam_size,
                                                   blank_symbol,
                                                   language_model_parameters)
                label_sizes = WarpCTCLossInterface. \
                    create_sequence_lengths_specification_tensor_different_lengths(labels)

                sequence_lengths = WarpCTCLossInterface.\
                    create_probabilities_lengths_specification_tensor_different_lengths(
                        labels, horizontal_reduction_factor, probabilities)
                sequence_lengths = Evaluator.increase_sequence_lengths_by_one(sequence_lengths)
                # print(">>> evaluate_mdrnn  -  sequence lengths: " + str(sequence_lengths))
                # print("probabilities.data.size(): " + str(probabilities.data.size()))
                beam_results, beam_scores, timesteps, out_seq_len = \
                    decoder.decode(probabilities.data, sequence_lengths)

                # print(">>> evaluate_mdrnn  - beam_results: " + str(beam_results))

                total += labels.size(0)

                for example_index in range(0, beam_results.size(0)):
                    beam_results_sequence = beam_results[example_index][0]
                    # print("beam_results_sequence: \"" + str(beam_results_sequence) + "\"")
                    use_language_model_in_decoder = language_model_parameters is not None
                    output_string = Evaluator.convert_to_string(
                        beam_results_sequence, vocab_list, out_seq_len[example_index][0],
                        use_language_model_in_decoder)
                    example_labels_with_padding = labels[example_index]
                    # Extract the real example labels, removing the padding labels
                    reference_labels = example_labels_with_padding[0:label_sizes[example_index]]

                    # print(">>> evaluate_mdrnn  - reference_labels: " + str(reference_labels))
                    reference_labels_string = Evaluator.convert_labels_tensor_to_string(
                        reference_labels, vocab_list, blank_symbol)

                    if reference_labels_string == output_string:
                        # print("Yaaaaah, got one correct!!!")
                        correct += 1
                        correct_string = "correct"
                    else:
                        correct_string = "wrong"

                    print(">>> evaluate_mdrnn  - output: \"" + output_string + "\" " +
                          "\nreference: \"" + reference_labels_string + "\" --- "
                          + correct_string)

                    output_strings.append(output_string)
                    reference_labels_strings.append(reference_labels_string)

            # correct += (predicted == labels).sum()

        cer_including_word_separators = evaluation_metrics.character_error_rate. \
            compute_character_error_rate_for_list_of_output_reference_pairs_fast(
                output_strings, reference_labels_strings, True)

        cer_excluding_word_separators = evaluation_metrics.character_error_rate. \
            compute_character_error_rate_for_list_of_output_reference_pairs_fast(
                output_strings, reference_labels_strings, False)

        wer = evaluation_metrics.word_error_rate. \
            compute_word_error_rate_for_list_of_output_reference_pairs(
                output_strings, reference_labels_strings)

        total_examples = len(test_loader.dataset)
        validation_stats = ValidationStats(total_examples, correct, cer_excluding_word_separators, wer)
        # https://stackoverflow.com/questions/3395138/using-multiple-arguments-for-string-formatting-in-python-e-g-s-s
        print("Accuracy of the network on the {} test inputs: {:.2f} % accuracy".format(
            total_examples, validation_stats.get_accuracy()))

        print("Character Error Rate (CER)[%] of the network on the {} test inputs, "
              "including word separators: {:.3f}  CER".format(
                total_examples, cer_including_word_separators))
        print("Character Error Rate (CER)[%] of the network on the {} test inputs, "
              "excluding word separators: {:.3f}  CER".format(
                total_examples, cer_excluding_word_separators))
        print("Word Error Rate (WER)[%] of the network on the {} test inputs: {:.3f}  WER".format(
            total_examples, wer))

        if save_score_table_file_path is not None:
            score_file_existed = os.path.exists(save_score_table_file_path)
            # Opens the file in append-mode, create if it doesn't exists
            with open(save_score_table_file_path, "a") as scores_table_file:
                if not score_file_existed:
                    scores_table_file.write(Evaluator.score_table_header(total_examples, epoch_statistics))
                scores_table_file.write(Evaluator.score_table_line(epoch_number, correct,
                                                                   validation_stats.get_accuracy(),
                                                                   cer_including_word_separators,
                                                                   cer_excluding_word_separators,
                                                                   wer,
                                                                   epoch_statistics) + "\n")

        return validation_stats
示例#15
0
    def train_one_epoch(self,
                        train_loader,
                        epoch: int,
                        start: int,
                        batch_size,
                        device,
                        inputs_is_list: bool,
                        report_func=None):
        """ Train next epoch.
        Args:
            train_iter: training data iterator
            epoch(int): the epoch number
            report_func(fn): function for logging
            train_loader: the train loader,
            start: time in seconds training started

        return: Average loss per minibatch, total_examples

        """
        # if isinstance(self.model, torch.nn.DataParallel):
        #     device = self.model.module.get_device()
        # else:
        #     device = self.model.get_device()

        num_gradient_corrections = 0
        gradient_norms_sum = 0
        running_loss = 0.0
        total_summed_loss_epoch = 0.0
        total_examples = 0
        number_of_minibatches = 0
        time_start = time.time()
        for i, data in enumerate(train_loader, 0):

            time_start_batch = time.time()

            # get the inputs
            inputs, labels = data
            # This one might expect to make things faster, but it doesn't seems
            # to help yet
            # inputs = TensorUtils.get_pinned_memory_copy_of_list(inputs)

            Trainer.check_there_are_no_zero_labels(labels, inputs_is_list)

            # If minimize_horizontal_padding is used, inputs will be a list
            if Utils.use_cuda():
                if not inputs_is_list:
                    inputs = inputs.to(device)
                else:
                    inputs = Utils.move_tensor_list_to_device(inputs, device)

            # If the image input comes in the form of unsigned ints, they need to
            # be converted to floats (after moving to GPU, i.e. directly on GPU
            # which is faster)
            if self.model_properties.image_input_is_unsigned_int:
                Trainer.check_inputs_is_right_type(inputs, inputs_is_list)
                inputs = IamLinesDataset.convert_unsigned_int_image_tensor_or_list_to_float_image_tensor_or_list(
                    inputs)

            if inputs_is_list:
                for element in inputs:
                    element.requires_grad_(True)
            else:
                # Set requires_grad(True) directly and only for the input
                inputs.requires_grad_(True)

            # wrap them in Variable
            # labels = Variable(labels)  # Labels need no gradient apparently
            # if Utils.use_cuda():

            # Labels must remain on CPU for warp-ctc loss
            # labels = labels.to(device)

            # print("inputs: " + str(inputs))

            # forward + backward + optimize
            # outputs = multi_dimensional_rnn(Variable(inputs))  # For "Net" (Le Net)
            # print("train_multi_dimensional_rnn_ctc.train_mdrnn - labels.size(): " + str(labels.size()))
            # print("train_multi_dimensional_rnn_ctc.train_mdrnn - inputs.size(): " + str(inputs.size()))
            # print("train_multi_dimensional_rnn_ctc.train_mdrnn - inputs: " + str(inputs))

            time_start_network_forward = util.timing.date_time_now()
            max_input_width = NetworkToSoftMaxNetwork.get_max_input_width(
                inputs)
            outputs = self.model(inputs, max_input_width)
            # print("Time used for network forward: " + str(util.timing.milliseconds_since(time_start_network_forward)))

            # print(">>> outputs.size(): " + str(outputs.size()))

            # print(">>> labels.size() : " + str(labels.size()))
            # print("labels: " + str(labels))
            # warp_ctc_loss_interface.
            # print(">>> labels_one_dimensional.size() : " + str(labels_one_dimensional.size()))
            # print("labels_one_dimensional: " + str(labels_one_dimensional))

            # print("outputs: " + str(outputs))
            # print("outputs.size(): " + str(outputs.size()))
            # print("labels: " + str(labels))
            if inputs_is_list:
                number_of_examples = len(inputs)
            else:
                number_of_examples = inputs.size(0)

            time_start_ctc_loss_computation = util.timing.date_time_now()
            # print("trainer - outputs.size(): " + str(outputs.size()))
            loss = self.warp_ctc_loss_interface.compute_ctc_loss(
                outputs, labels, number_of_examples,
                self.model_properties.width_reduction_factor)
            total_examples += number_of_examples

            # print("Time used for ctc loss computation: " +
            # str(util.timing.milliseconds_since(time_start_ctc_loss_computation)))

            # See: https://github.com/SeanNaren/deepspeech.pytorch/blob/master/train.py
            # The averaging seems to help learning (but a smaller learning rate
            # might have the same effect!)
            loss = loss / number_of_examples  # average the loss by minibatch size

            loss_sum = loss.data.sum()
            inf = float("inf")
            if loss_sum == inf or loss_sum == -inf:
                print("WARNING: received an inf loss, setting loss value to 0")
                loss_value = 0
            else:
                loss_value = loss.item()

            # print("loss: " + str(loss))
            # loss = criterion(outputs, labels)

            time_start_loss_backward = util.timing.date_time_now()

            # zero the parameter gradients
            self.optimizer.zero_grad()
            self.model.zero_grad()

            # get_dot = modules.find_bad_gradients.register_hooks(outputs)
            loss = loss.contiguous()
            loss.backward()

            # https://discuss.pytorch.org/t/how-to-check-for-vanishing-exploding-gradients/9019/4
            #for p, n in zip(self.model.parameters(), self.model._all_weights[0]):
            #    if n[:6] == 'weight':
            #        print('===========\ngradient:{}\n----------\n{}'.format(n, p.grad))

            # for name, p in self.model.named_parameters():
            #         print('===========\ngradient {} \n----------\n{}'.format(name, p.grad))

            # dot = get_dot()
            # dot.save('mdlstm_ctc_no_data_parallel_find_bad_gradients-clamp-pad-function.dot')
            # render('dot', 'png', 'mdlstm_ctc_mnist_find_bad_gradients.dot')
            # print("Time used for loss backward: " + str(util.timing.milliseconds_since(time_start_loss_backward)))

            # raise RuntimeError("stopping after find bad gradients")

            # Perform step including gradient clipping
            # made_gradient_norm_based_correction, total_norm = self.optimizer.step()

            # Perform an update step, including norm-based gradient clipping. Compensate the maximum gradient
            # norm by the factor: number_of_examples/batch_size.  This is to avoid over-correction (too much learning)
            # for the last batch, which contains less examples.
            made_gradient_norm_based_correction, total_norm = self.optimizer.step_with_scaling_for_size_current_batch(
                number_of_examples, batch_size)
            print("trainer - total norm: " + str(total_norm))

            if made_gradient_norm_based_correction:
                num_gradient_corrections += 1
            gradient_norms_sum += total_norm

            # print statistics
            # print("loss.data: " + str(loss.data))
            # print("loss.data[0]: " + str(loss.data[0]))
            running_loss += loss_value
            total_summed_loss_epoch += loss_value
            # if i % 2000 == 1999:  # print every 2000 mini-batches
            # See: https://stackoverflow.com/questions/5598181/python-multiple-prints-on-the-same-line
            # print(str(i)+",", end="", flush=True)
            if i % 10 == 9:  # print every 10 mini-batches
                end = time.time()
                running_time = end - start
                print('[%d, %5d] loss: %.3f' %
                      (epoch, i + 1, running_loss / 10) + " Running time: " +
                      str(running_time))
                average_norm = gradient_norms_sum / 10
                print("Number of gradient norm-based corrections: " +
                      str(num_gradient_corrections))
                print("Average gradient total norm: " + str(average_norm))
                running_loss = 0.0
                num_gradient_corrections = 0
                gradient_norms_sum = 0

                percent = (i + 1) / float(len(train_loader))
                examples_processed = (i + 1) * batch_size
                total_examples = len(train_loader.dataset)
                print("Processed " + str(examples_processed) + " of " +
                      str(total_examples) + " examples in this epoch")
                print(">>> Time used in current epoch: " + str(
                    util.timing.time_since_and_expected_remaining_time(
                        time_start, percent)))
                sys.stdout.flush()
            number_of_minibatches += 1

        average_loss_per_minibatch = total_summed_loss_epoch / number_of_minibatches
        return average_loss_per_minibatch, total_examples