示例#1
0
文件: ABCDNN.py 项目: xmpx/ASRFrame
    def train(datagenes: list, load_model=None):
        w, h = 1600, 200

        dataset = VoiceDatasetList()
        x_set, y_set = dataset.merge_load(datagenes)

        pymap = PinyinMapper(sil_mode=-1)
        vloader = VoiceLoader(x_set,
                              y_set,
                              batch_size=16,
                              n_mels=h,
                              feature_pad_len=w,
                              feature_dim=3,
                              cut_sub=32)

        model_helper = DCBNN2D(pymap)
        model_helper.compile(feature_shape=(w, h, 1),
                             label_max_string_length=32,
                             ms_output_size=1423)

        if load_model is not None:
            load_model = os.path.abspath(load_model)
            model_helper.load(load_model)

        model_helper.fit(vloader)
示例#2
0
    def train(datagene: TextDataGenerator, load_model=None):

        txtfs = datagene.load_from_path()

        max_label_len = 200

        pinyin_map = PinyinMapper(sil_mode=0)
        chs_map = ChsMapper()

        tloader = TextLoader2(
            txtfs,
            padding_length=max_label_len,
            pinyin_map=pinyin_map,
            chs_map=chs_map,
            grain=TextLoader2.grain_alpha,
            cut_sub=175,
        )

        model_helper = SOMMalpha()
        model_helper.compile(feature_shape=(max_label_len, ),
                             ms_pinyin_size=pinyin_map.max_index,
                             ms_output_size=chs_map.categores)

        if load_model is not None:
            model_helper.load(load_model)

        model_helper.fit(tloader, -1)
示例#3
0
文件: WAVE.py 项目: xmpx/ASRFrame
    def train(datagenes: list, load_model=None):
        w, h = 1600, 200
        max_label_len = 64

        dataset = VoiceDatasetList()
        x_set, y_set = dataset.merge_load(datagenes)
        pymap = PinyinMapper(sil_mode=-1)
        vloader = VoiceLoader(
            x_set,
            y_set,
            batch_size=16,
            feature_pad_len=w,
            n_mels=h,
            max_label_len=max_label_len,
            pymap=pymap,
            melf=MelFeature5(),
            all_train=False,
        )

        model_helper = WAVEM(pymap)
        model_helper.compile(feature_shape=(w, h),
                             label_max_string_length=max_label_len,
                             ms_output_size=pymap.max_index + 1)

        if load_model is not None:
            load_model = os.path.abspath(load_model)
            model_helper.load(load_model)

        model_helper.fit(vloader, epoch=-1, save_step=100, use_ctc=True)
示例#4
0
    def train(datagenes: list, load_model=None,**kwargs):
        w, h = 1600, 200
        max_label_len = 64


        dataset = VoiceDatasetList()
        x_set, y_set = dataset.merge_load(datagenes)
        pymap = PinyinMapper(sil_mode=-1)
        vloader = VoiceLoader(x_set, y_set,
                              batch_size=16,
                              feature_pad_len=w,
                              n_mels=h,
                              max_label_len=max_label_len,
                              pymap=pymap,
                              melf=MelFeature4(),
                              divide_feature_len=8,
                              all_train=False,
                              )

        model_helper = DCBNN1D(pymap)
        model_helper.compile(feature_shape=(w, h), label_max_string_length=max_label_len,
                             ms_output_size=pymap.max_index + 1)

        if load_model is not None:
            load_model = os.path.abspath(load_model)
            model_helper.load(load_model)


        epoch = kwargs.get("epoch",-1)
        save_step = kwargs.get("save_step",1000)


        model_helper.fit(vloader, epoch=epoch, save_step=save_step, use_ctc=True)
示例#5
0
文件: ABCDNN.py 项目: xmpx/ASRFrame
    def train(datagenes: list, load_model=None):
        w, h = 1600, 200

        dataset = VoiceDatasetList()
        x_set, y_set = dataset.merge_load(datagenes)

        pymap = PinyinMapper(sil_mode=-1)
        vloader = VoiceLoader(
            x_set,
            y_set,
            batch_size=16,
            n_mels=h,
            feature_pad_len=w,
            feature_dim=3,
            pymap=pymap,
            melf=MelFeature5(),
            divide_feature_len=8,
        )

        model_helper = DCNN2D(pymap)
        model_helper.compile(feature_shape=(w, h, 1),
                             ms_output_size=pymap.max_index +
                             1)  # ctcloss 计算要求: index < num_class-1

        if load_model is not None:
            load_model = os.path.abspath(load_model)
            model_helper.load(load_model)

        model_helper.fit(vloader, epoch=-1, use_ctc=True)
示例#6
0
 def summary(gene):
     x_set, y_set = gene.load_from_path()
     py_map = PinyinMapper(sil_mode=-1)
     vloader = VoiceLoader(x_set, y_set, pymap=py_map,vad_cut=False, check=False)
     print(f"start to summary the {gene.__class__.__name__} dataset")
     vloader.summery(audio=True,
                     label=True,
                     plot=True,
                     dataset_name=gene.__class__.__name__)
示例#7
0
def predict_dchmm(path="./model/DCBNN1D_cur_best.h5"):
    dcnn = DCHMM(acmodel_input_shape=(1600, 200),
                 acmodel_output_shape=(200, ),
                 lgmodel_input_shape=None,
                 py_map=PinyinMapper(sil_mode=-1),
                 chs_map=ChsMapper())

    dcnn.compile(path)

    while True:
        pyline, chline, prob = dcnn.record_from_cmd(3)
        print(pyline, chline, prob)
示例#8
0
    def real_predict(path):
        max_label_len = 200
        pinyin_map = PinyinMapper(sil_mode=0)
        chs_map = ChsMapper()

        model_helper = SOMMalpha()
        model_helper.compile(feature_shape=(max_label_len, ),
                             ms_pinyin_size=pinyin_map.max_index,
                             ms_output_size=chs_map.categores)

        model_helper.load(path)

        while True:
            string = input("请输入拼音:")
            xs = [pinyin_map.alist2vector(string)]
            print(xs)
            batch = pad_sequences(xs,
                                  maxlen=max_label_len,
                                  padding="post",
                                  truncating="post"), None
            result = model_helper.predict(batch)[0]
            print(result.replace("_", ""))
示例#9
0
def predict_dcsom(ac_path="./model/DCBNN1D_cur_best.h5",
                  lg_path="./model/language/SOMMalpha_step_18000.h5"):
    dcs = DCSOM(acmodel_input_shape=(1600, 200),
                acmodel_output_shape=(200, ),
                lgmodel_input_shape=(200, ),
                py_map=PinyinMapper(sil_mode=-1),
                chs_map=ChsMapper(),
                divide_feature=8)

    dcs.compile(ac_path, lg_path)
    while True:
        try:
            print(dcs.record_from_cmd(5))
        except:
            print("[info*]未识别到语音")
示例#10
0
文件: DCHMM.py 项目: xmpx/ASRFrame
    def real_predict(path="./model/DCBNN1D_cur_best.h5"):
        '''
        :param path:DCBNN1D的预训练权重文件路径
        :return:
        '''
        dcnn = DCHMM(
            acmodel_input_shape=(1600, 200),
            acmodel_output_shape=(200,),
            lgmodel_input_shape=None,
            py_map=PinyinMapper(sil_mode=-1),
            chs_map=ChsMapper())

        dcnn.compile(path)

        while True:
            pyline, chline, prob = dcnn.record_from_cmd(3)
            print(pyline, chline, prob)
示例#11
0
def summary_dcbann1d(datagenes:list, load_model = None):
    w, h = 1600, 200
    max_label_len = 64

    dataset = VoiceDatasetList()
    x_set, y_set = dataset.merge_load(datagenes)
    pymap = PinyinMapper(sil_mode=-1)
    vloader = VoiceLoader(x_set, y_set,
                          batch_size= 16,
                          feature_pad_len = w,
                          n_mels=h,
                          max_label_len=max_label_len,
                          pymap=pymap,
                          melf=MelFeature5(),
                          divide_feature_len=8,
                          # cut_sub=64,
                          )

    model_helper = DCBNN1D(pymap)
    model_helper.compile(feature_shape=(w, h), label_max_string_length=max_label_len, ms_output_size=pymap.max_index+1)

    if load_model is not None:
        load_model = os.path.abspath(load_model)
        model_helper.load(load_model)


    viter = vloader.create_iter(one_batch=True)
    all_err_dict = {}
    with open("./error_dict.txt", "w", encoding="utf-8") as w:
        for batch in viter:
            test_res = model_helper.test(batch,use_ctc=True,return_result=True)
            err_dict = test_res["err_pylist"]
            for k,lst in err_dict.items():
                errlist = all_err_dict.setdefault(k,[])
                errlist.extend(lst)

            for k,v in err_dict.items():
                v = set(v)
                w.write(f"{k},{' '.join(v)}")
    print(all_err_dict)

    with open("./error_dict.txt", "w", encoding="utf-8") as w:
        for k,v in all_err_dict.items():
            v = set(v)
            w.write(f"{k},{' '.join(v)}")
示例#12
0
def train_dcnn1d(datagene: list, load_model=None):

    dataset = VoiceDatasetList()
    _, y_set = dataset.merge_load(datagene, choose_x=False, choose_y=True)

    max_label_len = 64
    pinyin_map = PinyinMapper(sil_mode=0)
    chs_map = ChsMapper()
    tloader = TextLoader(y_set,
                         padding_length=max_label_len,
                         pinyin_map=pinyin_map,
                         cut_sub=16,
                         chs_map=chs_map)

    model_helper = DCNN1D()
    model_helper.compile(feature_shape=(max_label_len, tloader.max_py_size),
                         ms_input_size=pinyin_map.max_index,
                         ms_output_size=chs_map.categores)

    if load_model is not None:
        model_helper.load(load_model)

    model_helper.fit(tloader, -1)
示例#13
0
文件: dataset.py 项目: xmpx/ASRFrame
 def _check(self):
     '''清洗完后,在根目录下生成一个文件,表示无需再清洗了'''
     symbol = os.path.join(self.path,"symbol")
     self.check = os.path.exists(symbol)
     self.pymap = PinyinMapper()
     self.chsmap = ChsMapper()
示例#14
0
 def __init__(self,path):
     self.path = path
     self.pymap = PinyinMapper(use_pinyin=True)
示例#15
0
 def __init__(self, path, strip_tone=False):
     assert os.path.exists(path), "path not exists!"
     self.path = path
     self.strip_tone = strip_tone
     self.pymap = PinyinMapper()
     self.chs_map = ChsMapper()
示例#16
0
        alpha_batch = ["".join(sample) for sample in raw_pylist_batch]

        alpha_vector_batch = self.py_map.batch_alist2vector(alpha_batch)
        alpha_vector_batch = TextLoader2.corpus2feature(alpha_vector_batch,self.lgmodel_input_shape[0])

        ch_list_batch,prob_batch = self.lg_model.predict([alpha_vector_batch,None],True)

        pyline = np.concatenate(pylist_batch).tolist()
        chline = ",".join(ch_list_batch).replace("_","")

        print(pyline,chline)
        return pyline,chline,[ctc_prob[0]]


if __name__ == "__main__":
    dcs = DCSOM(acmodel_input_shape=(1600,200),
                acmodel_output_shape=(200,),
                lgmodel_input_shape=(200,),
                py_map=PinyinMapper(sil_mode=-1),
                chs_map=ChsMapper(),
                divide_feature=8)

    # dcs.compile("../model/DCBNN1D_step_326000.h5",
    #             "../model/language/SOMMalpha_step_18000.h5")
    dcs.compile("../model/DCBNN1D_cur_best.h5",
                "../model/language/SOMMalpha_step_18000.h5")
    while True:
        try:
            print(dcs.record_from_cmd(5))
        except:
            print("[info*]未识别到语音")
示例#17
0
文件: dataset.py 项目: xmpx/ASRFrame
class Dataset:
    label_mode = "label" #用于生成正确的标签
    clean_mode = "clean" #用于在生成正确的标签后进行清洗
    train_mode = "train" #用于清洗后提供数据集
    ''''''
    def __init__(self,path):
        self.path = path
        self._check()

    def _check(self):
        '''清洗完后,在根目录下生成一个文件,表示无需再清洗了'''
        symbol = os.path.join(self.path,"symbol")
        self.check = os.path.exists(symbol)
        self.pymap = PinyinMapper()
        self.chsmap = ChsMapper()

    def _pre_process_line(self,line):
        '''
        TODO 处理中文字符串
        :param line:str
        :return: 如果存在字母、数字,则返回None
                如果存在标点符号、空格,返回去掉之后的字符串
                不考虑汉字是否在字典中
        '''
    def _pre_process_pyline(self,pyline):
        '''
        TODO 处理拼音字符串
        :param pyline:
        :return: 将所有多余的空格去掉,确保拼音之间只有一个空格
                    如果拼音不在字典中,则返回None
        '''

    def initial(self):
        self.label_dataset()
        self.count_dataset()
        # self.clean_dataset()

    def clean(self):
        self.clean_dataset()

    def label_dataset(self):
        '''对数据生成需要的标签,均为在目录下为wav文件生成相应的txt(清华除外,为wav.trn)文件
                如果出现错误无法生成数据(即无法同时具备音频和文本),则删除相应的wav文件或标签文件

            注意:此时,不保证汉字和拼音一一对应,汉字中可能存在一些无法被注音的数字、字母、标点符号。
        '''
        print(f"[info*]Create labels in {self.__class__.__name__}.")

        dataiter = self.create_fs_iter(mode=Dataset.label_mode)
        for i,(wav_fn,txt_fn,[line,pyline]) in enumerate(dataiter):
            print(f"\r[info*]Process {i},fn = {txt_fn}",end="\0",flush=True)

            if not os.path.exists(wav_fn) and os.path.exists(txt_fn):
                # os.remove(txt_fn)
                print(f"\n{txt_fn} may not have the wav file {wav_fn}, please check it.")
                continue
            if line is None and os.path.exists(wav_fn): # 没有中文但是有wav文件
                # os.remove(wav_fn)
                print(f"\n{wav_fn} not have the labels, it will be deleted.")
                continue

            if pyline is None or len(pyline) == 0: # 没有拼音的话
                pyline = self.pymap.sent2pylist(line)  # 转化为拼音
            else:
                continue # 目前只有清华的数据集全都有,所以不用清洗,不用重写
            with open(txt_fn,"w",encoding="utf-8") as w:
                w.write(f"{line}\n")
                w.write(f"{pyline}\n")
        print()

    def count_dataset(self):
        '''统计两个词典到数据的根目录下,具体如何整合需要使用者自行整理'''
        '''加载训练测试用数据集,使用train_mode'''
        print(f"[info*]Create dicts in {self.__class__.__name__}.")
        dataiter = self.create_fs_iter(mode=Dataset.train_mode)

        chs_all_dict = {}
        py_all_dict = {}
        for _,txt_fn in dataiter:
            with open(txt_fn,encoding="utf-8") as f:
                line = f.readline().strip()
                pyline = f.readline().strip().split(" ")
                pyline = [i.strip("5\n") for i in pyline]

            chs_dict = Datautil.count_label(line)
            py_dict = Datautil.count_label(pyline)
            chs_all_dict = Datautil.merge_count_label(chs_dict,chs_all_dict)
            py_all_dict = Datautil.merge_count_label(py_dict,py_all_dict)

        Datautil.write_count_result(path=self.path,
                                    chs_dict=chs_all_dict,
                                    py_dict=py_all_dict)


    def clean_dataset(self):
        '''根据最终确定的词典结果进行清理
        '''
        dataiter = self.create_fs_iter(mode=Dataset.train_mode) # 清洗时候需要的数据格式和train_mode一样
        count = 0
        oov_count = 0
        for i,(wav_fn,txt_fn) in enumerate(dataiter):
            print(f"\r{i},err_count = {count},oov_count = {oov_count},fn = {txt_fn[:-20]}",end="\0",flush=True)
            with open(txt_fn,encoding="utf-8") as f:
                line = f.readline().replace(" ","").replace(" ","").strip()
                pyline = f.readline().strip().split(" ")
            new_line = StopwordUtil.clean_line(line)
            if new_line is None:
                Datautil.check_remove(wav_fn)
                Datautil.check_remove(txt_fn)
                count+=1
            elif len(new_line) != len(pyline):
                Datautil.check_remove(wav_fn)
                Datautil.check_remove(txt_fn)
                count+=1

            no_oov,oov_dict = self.pymap.check_line(pyline)
            if not no_oov:
                oov_count+=1
                Datautil.check_remove(wav_fn)
                Datautil.check_remove(txt_fn)

        print()


    def load_dataset(self):
        '''加载训练测试用数据集,使用train_mode'''
        dataiter = self.create_fs_iter(mode=Dataset.train_mode)
        x_set = []
        y_set = []
        for x,y in dataiter:
            x_set.append(x)
            y_set.append(y)

        return x_set,y_set

    def create_fs_iter(self,mode="train"):
        raise NotImplementedError(
            f"create_fs_iter() must be Implemented in {self.__class__.__name__}")