Exemplo n.º 1
0
def gather_list(tensor_lists, destination=None):
    """Gathers tensor lists from multiple GPUs.

    Tensor sizes in all dimension different than ``dim`` have to match.

    Arguments:
        tensor_lists (Iterable[Tensor]): iterable of tensor lists to gather.
        destination (int, optional): output device (-1 means CPU, default:
            current device)

    Returns:
        A tensor list located on ``destination`` device, that is a result of
        appending``tensor lists``.
    """
    total_size = 0
    # expected_size = list(tensor_lists[0].size())
    for tensor_list in tensor_lists:
        for element in tensor_list:
            assert element.is_cuda, "gather expects all inputs to be on GPUs"
        # expected_size[dim] = tensor_list.size(dim)
        # if list(tensor_list.size()) != expected_size:
        #     got = 'x'.join(str(x) for x in tensor_list.size())
        #     expected = 'x'.join(str(x) for x in expected_size)
        #     raise ValueError("gather got an input of invalid size: got {}, "
        #                      "but expected {}".format(got, expected))
        #total_size += tensor_list.size(dim)
        total_size += len(tensor_list)
    # expected_size[dim] = total_size
    # expected_size = torch.Size(expected_size)

    result = list([])
    for tensor_list in tensor_lists:
        result.extend(tensor_list)

    if destination is None:
        destination = torch.cuda.current_device()
    if destination == -1:
        # result = tensor_lists[0].new().cpu().resize_(expected_size)
        result = Utils.move_tensor_list_to_device(result, -1)
    else:
        result = Utils.move_tensor_list_to_device(result, destination)

    # chunk_start = 0
    # # TODO: if copying to CPU, allocate a pinned buffer, do async copies to it,
    # # and copy it to regular memory
    # for tensor_list in tensor_lists:
    #     result.narrow(dim, chunk_start, tensor_list.size(dim)).copy_(tensor_list, True)
    #     chunk_start += tensor_list.size(dim)
    return result
Exemplo n.º 2
0
def scatter_list(list_of_tensors, devices, chunk_sizes=None, streams=None):
    """Scatters tensor across multiple GPUs.

    Arguments:
        list_of_tensors (list(Tensor)): list of tensors to scatter.
        devices (Iterable[int]): iterable of ints, specifying among which
            devices the tensor should be scattered.
        chunk_sizes (Iterable[int], optional): sizes of chunks to be placed on
            each device. It should match ``devices`` in length and sum to
            ``len(list_of_tensors)``. If not specified, the
            list of tensors will be divided
            into equal chunks.

    Returns:
        A tuple containing chunks of the ``list of tensors``, spread across given
        ``devices``.
    """
    if chunk_sizes is None:
        chunks = chunk_list(list_of_tensors, len(devices))
    else:
        assert sum(chunk_sizes) == len(list_of_tensors), "given chunk sizes " \
            "don't sum up to the tensor's size (sum(chunk_sizes) == {}, but " \
            "expected {})".format(sum(chunk_sizes), len(list_of_tensors))
        assert min(chunk_sizes) > 0, "got a negative chunk_size"
        # chunks = [list_of_tensors.narrow(dim, start - size, size)
        #           for start, size in zip(_accumulate(chunk_sizes), chunk_sizes)]
        chunks = [
            list_of_tensors[start:start + size]
            for start, size in zip(_accumulate(chunk_sizes), chunk_sizes)
        ]
    # chunks = tuple(chunk.contiguous() for chunk in chunks)
    # TODO: copy to a pinned buffer first (if copying from CPU)
    if streams is None:
        streams = [None] * len(devices)
    outputs = []
    for device, chunk, stream in zip(devices, chunks, streams):
        with torch.cuda.device(device), torch.cuda.stream(stream):
            # outputs.append(chunk.cuda(device, non_blocking=True))
            outputs.append(
                Utils.move_tensor_list_to_device(chunk,
                                                 device,
                                                 non_blocking=True))
    return tuple(outputs)
Exemplo n.º 3
0
    def evaluate_mdrnn(test_loader, multi_dimensional_rnn, device,
                       vocab_list: list, blank_symbol: str, horizontal_reduction_factor: int,
                       image_input_is_unsigned_int: bool, input_is_list: bool,
                       language_model_parameters: LanguageModelParameters,
                       save_score_table_file_path: str, epoch_number: int, epoch_statistics: EpochStatistics):

        correct = 0
        total = 0

        output_strings = list([])
        reference_labels_strings = list([])

        for data in test_loader:
            inputs, labels = data

            if Utils.use_cuda():
                labels = labels.to(device)

                if input_is_list:
                    inputs = Utils.move_tensor_list_to_device(inputs, device)
                else:
                    inputs = inputs.to(device)

            # If the image input comes in the form of unsigned ints, they need to
            # be converted to floats (after moving to GPU, i.e. directly on GPU
            # which is faster)
            if image_input_is_unsigned_int:
                Trainer.check_inputs_is_right_type(inputs, input_is_list)
                inputs = IamLinesDataset.convert_unsigned_int_image_tensor_or_list_to_float_image_tensor_or_list(inputs)

            # https://github.com/pytorch/pytorch/issues/235
            # Running the evaluation without computing gradients is the recommended way
            # since this saves time, and more importantly, memory
            with torch.no_grad():

                # outputs = multi_dimensional_rnn(Variable(inputs))  # For "Net" (Le Net)
                max_input_width = NetworkToSoftMaxNetwork.get_max_input_width(inputs)
                outputs = multi_dimensional_rnn(inputs, max_input_width)

                probabilities_sum_to_one_dimension = 2
                # Outputs is the output of the linear layer which is the input to warp_ctc
                # But to get probabilities for the decoder, the softmax function needs to
                # be applied to the outputs
                probabilities = torch.nn.functional. \
                    softmax(outputs, probabilities_sum_to_one_dimension)

                # No longer necessary with fixed word separator specification in decoder
                # and normal language model
                # probabilities = Evaluator.append_preceding_word_separator_to_probabilities(
                #    probabilities, vocab_list, Evaluator.WORD_SEPARATOR_SYMBOL)

                print(">>> evaluate_mdrnn  - outputs.size: " + str(outputs.size()))
                print(">>> evaluate_mdrnn  - probabilities.size: " + str(probabilities.size()))

                # beam_size = 20   # This is the problem perhaps...
                # beam_size = 100  # The normal default is 100
                beam_size = Evaluator.BEAM_SIZE  # Larger value to see if it further improves results
                # This value specifies the number of (character) probabilities kept in the
                # decoder. If it is set equal or larger to the number of characters in the
                # vocabulary, no pruning is done for it
                cutoff_top_n = len(vocab_list)  # No pruning for this parameter
                print(">>> evaluate_mdrnn  - len(vocab_list): " + str(len(vocab_list)))
                decoder = Evaluator.create_decoder(vocab_list,  cutoff_top_n, beam_size,
                                                   blank_symbol,
                                                   language_model_parameters)
                label_sizes = WarpCTCLossInterface. \
                    create_sequence_lengths_specification_tensor_different_lengths(labels)

                sequence_lengths = WarpCTCLossInterface.\
                    create_probabilities_lengths_specification_tensor_different_lengths(
                        labels, horizontal_reduction_factor, probabilities)
                sequence_lengths = Evaluator.increase_sequence_lengths_by_one(sequence_lengths)
                # print(">>> evaluate_mdrnn  -  sequence lengths: " + str(sequence_lengths))
                # print("probabilities.data.size(): " + str(probabilities.data.size()))
                beam_results, beam_scores, timesteps, out_seq_len = \
                    decoder.decode(probabilities.data, sequence_lengths)

                # print(">>> evaluate_mdrnn  - beam_results: " + str(beam_results))

                total += labels.size(0)

                for example_index in range(0, beam_results.size(0)):
                    beam_results_sequence = beam_results[example_index][0]
                    # print("beam_results_sequence: \"" + str(beam_results_sequence) + "\"")
                    use_language_model_in_decoder = language_model_parameters is not None
                    output_string = Evaluator.convert_to_string(
                        beam_results_sequence, vocab_list, out_seq_len[example_index][0],
                        use_language_model_in_decoder)
                    example_labels_with_padding = labels[example_index]
                    # Extract the real example labels, removing the padding labels
                    reference_labels = example_labels_with_padding[0:label_sizes[example_index]]

                    # print(">>> evaluate_mdrnn  - reference_labels: " + str(reference_labels))
                    reference_labels_string = Evaluator.convert_labels_tensor_to_string(
                        reference_labels, vocab_list, blank_symbol)

                    if reference_labels_string == output_string:
                        # print("Yaaaaah, got one correct!!!")
                        correct += 1
                        correct_string = "correct"
                    else:
                        correct_string = "wrong"

                    print(">>> evaluate_mdrnn  - output: \"" + output_string + "\" " +
                          "\nreference: \"" + reference_labels_string + "\" --- "
                          + correct_string)

                    output_strings.append(output_string)
                    reference_labels_strings.append(reference_labels_string)

            # correct += (predicted == labels).sum()

        cer_including_word_separators = evaluation_metrics.character_error_rate. \
            compute_character_error_rate_for_list_of_output_reference_pairs_fast(
                output_strings, reference_labels_strings, True)

        cer_excluding_word_separators = evaluation_metrics.character_error_rate. \
            compute_character_error_rate_for_list_of_output_reference_pairs_fast(
                output_strings, reference_labels_strings, False)

        wer = evaluation_metrics.word_error_rate. \
            compute_word_error_rate_for_list_of_output_reference_pairs(
                output_strings, reference_labels_strings)

        total_examples = len(test_loader.dataset)
        validation_stats = ValidationStats(total_examples, correct, cer_excluding_word_separators, wer)
        # https://stackoverflow.com/questions/3395138/using-multiple-arguments-for-string-formatting-in-python-e-g-s-s
        print("Accuracy of the network on the {} test inputs: {:.2f} % accuracy".format(
            total_examples, validation_stats.get_accuracy()))

        print("Character Error Rate (CER)[%] of the network on the {} test inputs, "
              "including word separators: {:.3f}  CER".format(
                total_examples, cer_including_word_separators))
        print("Character Error Rate (CER)[%] of the network on the {} test inputs, "
              "excluding word separators: {:.3f}  CER".format(
                total_examples, cer_excluding_word_separators))
        print("Word Error Rate (WER)[%] of the network on the {} test inputs: {:.3f}  WER".format(
            total_examples, wer))

        if save_score_table_file_path is not None:
            score_file_existed = os.path.exists(save_score_table_file_path)
            # Opens the file in append-mode, create if it doesn't exists
            with open(save_score_table_file_path, "a") as scores_table_file:
                if not score_file_existed:
                    scores_table_file.write(Evaluator.score_table_header(total_examples, epoch_statistics))
                scores_table_file.write(Evaluator.score_table_line(epoch_number, correct,
                                                                   validation_stats.get_accuracy(),
                                                                   cer_including_word_separators,
                                                                   cer_excluding_word_separators,
                                                                   wer,
                                                                   epoch_statistics) + "\n")

        return validation_stats
Exemplo n.º 4
0
    def train_one_epoch(self,
                        train_loader,
                        epoch: int,
                        start: int,
                        batch_size,
                        device,
                        inputs_is_list: bool,
                        report_func=None):
        """ Train next epoch.
        Args:
            train_iter: training data iterator
            epoch(int): the epoch number
            report_func(fn): function for logging
            train_loader: the train loader,
            start: time in seconds training started

        return: Average loss per minibatch, total_examples

        """
        # if isinstance(self.model, torch.nn.DataParallel):
        #     device = self.model.module.get_device()
        # else:
        #     device = self.model.get_device()

        num_gradient_corrections = 0
        gradient_norms_sum = 0
        running_loss = 0.0
        total_summed_loss_epoch = 0.0
        total_examples = 0
        number_of_minibatches = 0
        time_start = time.time()
        for i, data in enumerate(train_loader, 0):

            time_start_batch = time.time()

            # get the inputs
            inputs, labels = data
            # This one might expect to make things faster, but it doesn't seems
            # to help yet
            # inputs = TensorUtils.get_pinned_memory_copy_of_list(inputs)

            Trainer.check_there_are_no_zero_labels(labels, inputs_is_list)

            # If minimize_horizontal_padding is used, inputs will be a list
            if Utils.use_cuda():
                if not inputs_is_list:
                    inputs = inputs.to(device)
                else:
                    inputs = Utils.move_tensor_list_to_device(inputs, device)

            # If the image input comes in the form of unsigned ints, they need to
            # be converted to floats (after moving to GPU, i.e. directly on GPU
            # which is faster)
            if self.model_properties.image_input_is_unsigned_int:
                Trainer.check_inputs_is_right_type(inputs, inputs_is_list)
                inputs = IamLinesDataset.convert_unsigned_int_image_tensor_or_list_to_float_image_tensor_or_list(
                    inputs)

            if inputs_is_list:
                for element in inputs:
                    element.requires_grad_(True)
            else:
                # Set requires_grad(True) directly and only for the input
                inputs.requires_grad_(True)

            # wrap them in Variable
            # labels = Variable(labels)  # Labels need no gradient apparently
            # if Utils.use_cuda():

            # Labels must remain on CPU for warp-ctc loss
            # labels = labels.to(device)

            # print("inputs: " + str(inputs))

            # forward + backward + optimize
            # outputs = multi_dimensional_rnn(Variable(inputs))  # For "Net" (Le Net)
            # print("train_multi_dimensional_rnn_ctc.train_mdrnn - labels.size(): " + str(labels.size()))
            # print("train_multi_dimensional_rnn_ctc.train_mdrnn - inputs.size(): " + str(inputs.size()))
            # print("train_multi_dimensional_rnn_ctc.train_mdrnn - inputs: " + str(inputs))

            time_start_network_forward = util.timing.date_time_now()
            max_input_width = NetworkToSoftMaxNetwork.get_max_input_width(
                inputs)
            outputs = self.model(inputs, max_input_width)
            # print("Time used for network forward: " + str(util.timing.milliseconds_since(time_start_network_forward)))

            # print(">>> outputs.size(): " + str(outputs.size()))

            # print(">>> labels.size() : " + str(labels.size()))
            # print("labels: " + str(labels))
            # warp_ctc_loss_interface.
            # print(">>> labels_one_dimensional.size() : " + str(labels_one_dimensional.size()))
            # print("labels_one_dimensional: " + str(labels_one_dimensional))

            # print("outputs: " + str(outputs))
            # print("outputs.size(): " + str(outputs.size()))
            # print("labels: " + str(labels))
            if inputs_is_list:
                number_of_examples = len(inputs)
            else:
                number_of_examples = inputs.size(0)

            time_start_ctc_loss_computation = util.timing.date_time_now()
            # print("trainer - outputs.size(): " + str(outputs.size()))
            loss = self.warp_ctc_loss_interface.compute_ctc_loss(
                outputs, labels, number_of_examples,
                self.model_properties.width_reduction_factor)
            total_examples += number_of_examples

            # print("Time used for ctc loss computation: " +
            # str(util.timing.milliseconds_since(time_start_ctc_loss_computation)))

            # See: https://github.com/SeanNaren/deepspeech.pytorch/blob/master/train.py
            # The averaging seems to help learning (but a smaller learning rate
            # might have the same effect!)
            loss = loss / number_of_examples  # average the loss by minibatch size

            loss_sum = loss.data.sum()
            inf = float("inf")
            if loss_sum == inf or loss_sum == -inf:
                print("WARNING: received an inf loss, setting loss value to 0")
                loss_value = 0
            else:
                loss_value = loss.item()

            # print("loss: " + str(loss))
            # loss = criterion(outputs, labels)

            time_start_loss_backward = util.timing.date_time_now()

            # zero the parameter gradients
            self.optimizer.zero_grad()
            self.model.zero_grad()

            # get_dot = modules.find_bad_gradients.register_hooks(outputs)
            loss = loss.contiguous()
            loss.backward()

            # https://discuss.pytorch.org/t/how-to-check-for-vanishing-exploding-gradients/9019/4
            #for p, n in zip(self.model.parameters(), self.model._all_weights[0]):
            #    if n[:6] == 'weight':
            #        print('===========\ngradient:{}\n----------\n{}'.format(n, p.grad))

            # for name, p in self.model.named_parameters():
            #         print('===========\ngradient {} \n----------\n{}'.format(name, p.grad))

            # dot = get_dot()
            # dot.save('mdlstm_ctc_no_data_parallel_find_bad_gradients-clamp-pad-function.dot')
            # render('dot', 'png', 'mdlstm_ctc_mnist_find_bad_gradients.dot')
            # print("Time used for loss backward: " + str(util.timing.milliseconds_since(time_start_loss_backward)))

            # raise RuntimeError("stopping after find bad gradients")

            # Perform step including gradient clipping
            # made_gradient_norm_based_correction, total_norm = self.optimizer.step()

            # Perform an update step, including norm-based gradient clipping. Compensate the maximum gradient
            # norm by the factor: number_of_examples/batch_size.  This is to avoid over-correction (too much learning)
            # for the last batch, which contains less examples.
            made_gradient_norm_based_correction, total_norm = self.optimizer.step_with_scaling_for_size_current_batch(
                number_of_examples, batch_size)
            print("trainer - total norm: " + str(total_norm))

            if made_gradient_norm_based_correction:
                num_gradient_corrections += 1
            gradient_norms_sum += total_norm

            # print statistics
            # print("loss.data: " + str(loss.data))
            # print("loss.data[0]: " + str(loss.data[0]))
            running_loss += loss_value
            total_summed_loss_epoch += loss_value
            # if i % 2000 == 1999:  # print every 2000 mini-batches
            # See: https://stackoverflow.com/questions/5598181/python-multiple-prints-on-the-same-line
            # print(str(i)+",", end="", flush=True)
            if i % 10 == 9:  # print every 10 mini-batches
                end = time.time()
                running_time = end - start
                print('[%d, %5d] loss: %.3f' %
                      (epoch, i + 1, running_loss / 10) + " Running time: " +
                      str(running_time))
                average_norm = gradient_norms_sum / 10
                print("Number of gradient norm-based corrections: " +
                      str(num_gradient_corrections))
                print("Average gradient total norm: " + str(average_norm))
                running_loss = 0.0
                num_gradient_corrections = 0
                gradient_norms_sum = 0

                percent = (i + 1) / float(len(train_loader))
                examples_processed = (i + 1) * batch_size
                total_examples = len(train_loader.dataset)
                print("Processed " + str(examples_processed) + " of " +
                      str(total_examples) + " examples in this epoch")
                print(">>> Time used in current epoch: " + str(
                    util.timing.time_since_and_expected_remaining_time(
                        time_start, percent)))
                sys.stdout.flush()
            number_of_minibatches += 1

        average_loss_per_minibatch = total_summed_loss_epoch / number_of_minibatches
        return average_loss_per_minibatch, total_examples