def check_reading(self, num_gpu, is_sorted):
        print('----- num_gpu: ' + str(num_gpu) + ', is_sorted: ' +
              str(is_sorted) + ' -----')

        batch_size = 64
        dataset = DataSet(data_type='train',
                          label_type_second='phone61',
                          batch_size=batch_size,
                          num_stack=3,
                          num_skip=3,
                          is_sorted=is_sorted,
                          is_progressbar=True,
                          num_gpu=num_gpu)

        tf.reset_default_graph()
        with tf.Session().as_default() as sess:
            print('=> Reading mini-batch...')
            map_file_path_char = '../metric/mapping_files/ctc/char2num.txt'
            map_file_path_phone = '../metric/mapping_files/ctc/phone2num_61.txt'

            mini_batch = dataset.next_batch(session=sess)

            iter_per_epoch = int(dataset.data_num / (batch_size * num_gpu)) + 1
            for i in range(iter_per_epoch + 1):
                inputs, labels_char_st, labels_phone_st, inputs_seq_len, input_names = mini_batch.__next__(
                )

                if num_gpu > 1:
                    for inputs_gpu in inputs:
                        print(inputs_gpu.shape)
                    labels_char_st = labels_char_st[0]
                    labels_phone_st = labels_phone_st[0]

                labels_char = sparsetensor2list(labels_char_st,
                                                batch_size=len(inputs))
                labels_phone = sparsetensor2list(labels_phone_st,
                                                 batch_size=len(inputs))

                if num_gpu == 1:
                    for inputs_i, labels_i in zip(inputs, labels_char):
                        if len(inputs_i) < len(labels_i):
                            print(len(inputs_i))
                            print(len(labels_i))
                            raise ValueError
                    for inputs_i, labels_i in zip(inputs, labels_phone):
                        if len(inputs_i) < len(labels_i):
                            print(len(inputs_i))
                            print(len(labels_i))
                            raise ValueError

                str_true_char = num2char(labels_char[0], map_file_path_char)
                str_true_char = re.sub(r'_', ' ', str_true_char)
                str_true_phone = num2phone(labels_phone[0],
                                           map_file_path_phone)
                print(str_true_char)
                print(str_true_phone)
                print('-----')
def decode_test(session,
                decode_op,
                network,
                dataset,
                label_type,
                save_path=None):
    """Visualize label outputs of CTC model.
    Args:
        session: session of training model
        decode_op: operation for decoding
        network: network to evaluate
        dataset: An instance of a `Dataset` class
        label_type: string, phone39 or phone48 or phone61 or character
        save_path: path to save decoding results
    """
    # Batch size is expected to be 1
    iteration = dataset.data_num

    # Make data generator
    mini_batch = dataset.next_batch(batch_size=1)

    if label_type == 'character':
        map_file_path = '../metric/mapping_files/ctc/char2num.txt'
    else:
        map_file_path = '../metric/mapping_files/ctc/phone2num_' + \
            label_type[5:7] + '.txt'

    if save_path is not None:
        sys.stdout = open(join(network.model_dir, 'decode.txt'), 'w')

    for step in range(iteration):
        # Create feed dictionary for next mini batch
        inputs, labels_true_st, inputs_seq_len, input_names = mini_batch.__next__(
        )

        feed_dict = {
            network.inputs: inputs,
            network.inputs_seq_len: inputs_seq_len,
            network.keep_prob_input: 1.0,
            network.keep_prob_hidden: 1.0
        }

        # Visualize
        labels_pred_st = session.run(decode_op, feed_dict=feed_dict)
        labels_true = sparsetensor2list(labels_true_st, batch_size=1)
        labels_pred = sparsetensor2list(labels_pred_st, batch_size=1)

        if label_type == 'character':
            print('----- wav: %s -----' % input_names[0])
            print('True: %s' % num2char(labels_true[0], map_file_path))
            print('Pred: %s' % num2char(labels_pred[0], map_file_path))

        else:
            print('----- wav: %s -----' % input_names[0])
            print('True: %s' % num2phone(labels_true[0], map_file_path))

            print('Pred: %s' % num2phone(labels_pred[0], map_file_path))
Exemplo n.º 3
0
    def check_reading(self, label_type, num_gpu, is_sorted):
        print('----- label_type: ' + label_type + ', num_gpu: ' +
              str(num_gpu) + ', is_sorted: ' + str(is_sorted) + ' -----')

        batch_size = 64
        dataset = DataSet(data_type='train',
                          train_data_size='default',
                          label_type=label_type,
                          batch_size=batch_size,
                          num_stack=3,
                          num_skip=3,
                          is_sorted=is_sorted,
                          is_progressbar=True,
                          num_gpu=num_gpu)

        tf.reset_default_graph()
        with tf.Session().as_default() as sess:
            print('=> Reading mini-batch...')
            if label_type == 'kanji':
                map_file_path = '../metric/mapping_files/ctc/kanji2num.txt'
                map_fn = num2char
            elif label_type == 'character':
                map_file_path = '../metric/mapping_files/ctc/char2num.txt'
                map_fn = num2char
            else:
                map_file_path = '../metric/mapping_files/ctc/phone2num.txt'
                map_fn = num2phone

            mini_batch = dataset.next_batch(session=sess)

            iter_per_epoch = int(dataset.data_num / (batch_size * num_gpu)) + 1
            for i in range(iter_per_epoch + 1):
                inputs, labels_st, inputs_seq_len, input_names = mini_batch.__next__(
                )

                if num_gpu > 1:
                    for inputs_gpu in inputs:
                        print(inputs_gpu.shape)
                    inputs = inputs[0]
                    labels_st = labels_st[0]

                labels = sparsetensor2list(labels_st, batch_size=len(inputs))

                if num_gpu < 1:
                    for inputs_i, labels_i in zip(inputs, labels):
                        if len(inputs_i) < len(labels_i):
                            print(len(inputs_i))
                            print(len(labels_i))
                            raise ValueError

                str_true = map_fn(labels[0], map_file_path)
                str_true = re.sub(r'_', ' ', str_true)
                print(str_true)
def do_eval_cer(session,
                decode_op,
                network,
                dataset,
                label_type,
                is_test=None,
                eval_batch_size=None,
                is_progressbar=False,
                is_multitask=False,
                is_main=False):
    """Evaluate trained model by Character Error Rate.
    Args:
        session: session of training model
        decode_op: operation for decoding
        network: network to evaluate
        dataset: An instance of `Dataset` class
        label_type: string, character or kanji
        is_test: set to True when evaluating by the test set
        eval_batch_size: int, the batch size when evaluating the model
        is_progressbar: if True, visualize progressbar
        is_multitask: if True, evaluate the multitask model
        is_main: if True, evaluate the main task
    Return:
        cer_mean: An average of CER
    """
    if eval_batch_size is None:
        batch_size = network.batch_size
    else:
        batch_size = eval_batch_size

    num_examples = dataset.data_num
    iteration = int(num_examples / batch_size)
    if (num_examples / batch_size) != int(num_examples / batch_size):
        iteration += 1
    cer_sum = 0

    # Make data generator
    mini_batch = dataset.next_batch(batch_size=batch_size)

    if label_type == 'character':
        map_file_path = '../metric/mapping_files/ctc/char2num.txt'
    elif label_type == 'kanji':
        map_file_path = '../metric/mapping_files/ctc/kanji2num.txt'
    for step in wrap_iterator(range(iteration), is_progressbar):
        # Create feed dictionary for next mini batch
        if not is_multitask:
            inputs, labels_true_st, inputs_seq_len, _ = mini_batch.__next__()
        else:
            if is_main:
                inputs, labels_true_st, _, inputs_seq_len, _ = mini_batch.__next__(
                )
            else:
                inputs, _, labels_true_st, inputs_seq_len, _ = mini_batch.__next__(
                )

        feed_dict = {
            network.inputs: inputs,
            network.inputs_seq_len: inputs_seq_len,
            network.keep_prob_input: 1.0,
            network.keep_prob_hidden: 1.0
        }

        batch_size_each = len(inputs_seq_len)

        labels_pred_st = session.run(decode_op, feed_dict=feed_dict)
        labels_true = sparsetensor2list(labels_true_st, batch_size_each)
        labels_pred = sparsetensor2list(labels_pred_st, batch_size_each)
        for i_batch in range(batch_size_each):
            # Convert from list to string
            str_pred = num2char(labels_pred[i_batch], map_file_path)
            # TODO: change in case of character
            if label_type == 'kanji' and is_test:
                str_true = ''.join(labels_true[i_batch])
                # NOTE* 漢字の場合はテストデータのラベルはそのまま保存してある
            else:
                str_true = num2char(labels_true[i_batch], map_file_path)

            # Remove silence(_) labels
            str_true = re.sub(r'[_]+', "", str_true)
            str_pred = re.sub(r'[_]+', "", str_pred)

            # Compute edit distance
            cer_each = Levenshtein.distance(str_pred, str_true) / len(
                list(str_true))

            cer_sum += cer_each

    cer_mean = cer_sum / dataset.data_num

    return cer_mean
def do_eval_per(session,
                decode_op,
                per_op,
                network,
                dataset,
                train_label_type,
                eval_batch_size=None,
                is_progressbar=False,
                is_multitask=False):
    """Evaluate trained model by Phone Error Rate.
    Args:
        session: session of training model
        decode_op: operation for decoding
        per_op: operation for computing phone error rate
        network: network to evaluate
        dataset: An instance of a `Dataset' class
        train_label_type: string, phone39 or phone48 or phone61
        eval_batch_size: int, the batch size when evaluating the model
        is_progressbar: if True, visualize the progressbar
        is_multitask: if True, evaluate the multitask model
    Returns:
        per_global: An average of PER
    """
    if eval_batch_size is not None:
        batch_size = eval_batch_size
    else:
        batch_size = dataset.batch_size

    data_label_type = dataset.label_type

    num_examples = dataset.data_num
    iteration = int(num_examples / batch_size)
    if (num_examples / batch_size) != int(num_examples / batch_size):
        iteration += 1
    per_global = 0

    # Make data generator
    mini_batch = dataset.next_batch(batch_size=batch_size)

    phone2num_map_file_path = '../metric/mapping_files/ctc/phone2num_' + \
        train_label_type[5:7] + '.txt'
    phone2num_39_map_file_path = '../metric/mapping_files/ctc/phone2num_39.txt'
    phone2phone_map_file_path = '../metric/mapping_files/phone2phone.txt'
    for step in wrap_iterator(range(iteration), is_progressbar):
        # Create feed dictionary for next mini batch
        if not is_multitask:
            inputs, labels_true_st, inputs_seq_len, _ = mini_batch.__next__()
        else:
            inputs, _, labels_true_st, inputs_seq_len, _ = mini_batch.__next__(
            )

        feed_dict = {
            network.inputs: inputs,
            network.inputs_seq_len: inputs_seq_len,
            network.keep_prob_input: 1.0,
            network.keep_prob_hidden: 1.0
        }

        batch_size_each = len(inputs_seq_len)

        if False:
            # Evaluate by the same phones as phones used when training
            per_local = session.run(per_op, feed_dict=feed_dict)
            per_global += per_local * batch_size_each

        else:
            # Evaluate by 39 phones
            labels_pred_st = session.run(decode_op, feed_dict=feed_dict)
            labels_true = sparsetensor2list(labels_true_st, batch_size_each)
            labels_pred = sparsetensor2list(labels_pred_st, batch_size_each)
            for i_batch in range(batch_size_each):
                # Convert from num to phone (-> list of phone strings)
                phone_pred_seq = num2phone(labels_pred[i_batch],
                                           phone2num_map_file_path)
                phone_pred_list = phone_pred_seq.split(' ')

                # Mapping to 39 phones (-> list of phone strings)
                phone_pred_list = map_to_39phone(phone_pred_list,
                                                 train_label_type,
                                                 phone2phone_map_file_path)

                # Convert from phone to num (-> list of phone indices)
                phone_pred_list = phone2num(phone_pred_list,
                                            phone2num_39_map_file_path)
                labels_pred[i_batch] = phone_pred_list

                if data_label_type != 'phone39':
                    # Convert from num to phone (-> list of phone strings)
                    phone_true_seq = num2phone(labels_true[i_batch],
                                               phone2num_map_file_path)
                    phone_true_list = phone_true_seq.split(' ')

                    # Mapping to 39 phones (-> list of phone strings)
                    phone_true_list = map_to_39phone(
                        phone_true_list, data_label_type,
                        phone2phone_map_file_path)

                    # Convert from phone to num (-> list of phone indices)
                    phone_true_list = phone2num(phone_true_list,
                                                phone2num_39_map_file_path)
                    labels_true[i_batch] = phone_true_list

            # Compute edit distance
            labels_true_st = list2sparsetensor(labels_true)
            labels_pred_st = list2sparsetensor(labels_pred)
            per_local = compute_edit_distance(session, labels_true_st,
                                              labels_pred_st)
            per_global += per_local * batch_size_each

    per_global /= dataset.data_num

    return per_global