def predict(data_set=None, data_per_batch=32, epoch=1, model_path='model.h5'):

    Dataset = None

    if data_set == None:
        dataloaders = os.listdir('Dataset')
        for dataloader in dataloaders:
            loader_path = os.path.join('Dataset', dataloader)
            if dataloader.endswith('.py') and os.path.isfile(
                    loader_path) and dataloader != '__init__.py':
                try:
                    Dataset = importlib.import_module("Dataset." +
                                                      dataloader[:-3])
                except Exception as ex:
                    print('failed to load Dataset from "%s".' % dataloader, ex)
                else:
                    print('successfuly loaded Dataset from "%s"!' % dataloader)
                    break
        if Dataset == None:
            raise Exception('No vaild dataset found!')
    else:
        try:
            Dataset = importlib.import_module("Dataset." + data_set)
        except Exception as ex:
            raise Exception('"%s" is not a vaild dataset!' % data_set)

    data_loader = Dataset.DataLoader(1024, 1, 13)

    # 加载网络模型
    model = NN.model.create_pridict_model()
    # 输出网络结构
    model.summary()
    # 加载之前训练的数据
    model.load_weights(model_path)
    # 验证集
    validation_data = data_loader.get_validation_generator()
    data = next(validation_data)[0]
    r = model.predict(data['speech_data_input'])
    r = K.ctc_decode(r, data['input_length'][0])
    r1 = K.get_value(r[0][0])
    r1 = r1[0]

    tokens = NN.model.get_tokens()

    print('predict: [', end='')
    for i in r1:
        print(tokens[i], end=', ')
    print(']')
    print('truth  : [', end='')
    for i in range(data['label_length'][0][0]):
        print(tokens[int(data['speech_labels'][0][i])], end=', ')
    print(']')
    pass
Пример #2
0
    def evaluate(self, batch_num):
        batch_acc = 0
        o_acc = 0
        generator = self.val_seq.generator()
        #关闭学习率
        for i in range(batch_num):
            inputs = next(generator)
            x_test, y_test, source_str = (inputs["the_input"],
                                          inputs["the_labels"],
                                          inputs["source_str"])
            out = self.validation_func([x_test, 0])[0]
            current_acc = np.zeros([out.shape[0]])

            c_acc = np.zeros([out.shape[0]])
            #example one
            # ctc_decode = K.ctc_decode(y_pred[:, 2:, :], input_length=np.ones(shape[0]) * shape[1])[0][0]
            ctc_decode = K.get_value(
                K.ctc_decode(out,
                             input_length=np.ones(out.shape[0]) * out.shape[1],
                             greedy=True)[0][0])
            # print(ctc_decode)
            for j in range(ctc_decode.shape[0]):
                print("ctc_decode", ctc_decode[j], y_test[j][:4])
                # out_best = list(np.argmax(decode_out[j, 2:], 1))
                out_best = list(ctc_decode[j])
                out_best = [k for k, g in itertools.groupby(out_best)]
                if self.val_seq.equals_after_trim(y_test[j],
                                                  np.asarray(out_best)):
                    c_acc[j] = 1
                    print(source_str[j], y_test[j], out_best)
            o_acc += c_acc.mean()
            # print(" ctc_acc: %f%%" % (o_acc))

            for j in range(out.shape[0]):
                # 该层的输出结果是使用 max ,此处推断出最有可能的结果, 对每一列
                out_best = list(np.argmax(out[j, 2:], 1))
                out_best = [k for k, g in itertools.groupby(out_best)]
                if self.val_seq.equals_after_trim(y_test[j],
                                                  np.asarray(out_best)):
                    current_acc[j] = 1
                    print(source_str[j], y_test[j], out_best)
            batch_acc += current_acc.mean()
        return batch_acc / batch_num, o_acc / batch_num