예제 #1
0
def main(testing=False):

    # Config Values, Feature Assembly, focused on WordCount/FrameCount
    Frame_length = 0.025
    Frame_step = 0.01

    overwrite_MFCCs = False
    TrainAll = False
    #testing = True
    FramelevelORword = False

    cwd = os.getcwd()
    SysPath = cwd.split('GOP-LSTM')[0]
    Dbdir = SysPath + 'GOP-LSTM/PhoneInfo/speakers_db_correct/'
    '''Based on your computer setup. (I have both folders on same level as GOP-LSTM)'''
    Wavdir = SysPath + 'corpus/dat/speakers/'  # On SysPath level
    Holddir = SysPath + 'HoldDir/'

    Traindir = Holddir + 'Train/'
    Testdir = Holddir + 'Test/'
    PhoInfDir = SysPath + 'GOP-LSTM/PhoneInfo/'

    N_context = 2
    N_ceps = 26
    wordcount = 30

    # Training & Test Data
    if FramelevelORword:
        speakers_trainNtest(db_corpus=Dbdir,
                            wav_corpus=Wavdir,
                            n_ceps=N_ceps,
                            n_context=N_context,
                            frame_length=Frame_length,
                            frame_step=Frame_step,
                            inmat=True,
                            holddir=Holddir,
                            overwrite=overwrite_MFCCs)
        cdnn_dict_name = 'crnn_gridsearch_records_ALL.pk'
        if not TrainAll:
            ByCount = 4000
            Traindir = Holddir + 'Train_Correct/'
            Testdir = Holddir + 'Test_Correct/'
            selected_phones, totalcount = select_trainNtest(
                bycount=ByCount,
                holddir=Holddir,
                train_corpus=Traindir,
                test_corpus=Testdir,
                overwrite=False)
            Traindir = Holddir + f'Train_Select_{ByCount}/'
            Testdir = Holddir + f'Test_Select_{ByCount}/'
            N_classes = len(selected_phones)
            print(f'N selected classes: {N_classes}')
            cdnn_dict_name = f'segmentation_records_{ByCount}.pk'
    else:
        selected_phones, totalcount, w2pdict = createNcount_trainNtest(
            frame_length=Frame_length,
            frame_step=Frame_step,
            n_ceps=N_ceps,
            n_context=N_context,
            dbdir=Dbdir,
            datdir=Wavdir,
            holddir=Holddir,
            phoinfdir=PhoInfDir,
            wordcount=wordcount)
        N_classes = len(selected_phones)
        Traindir = Holddir + f'FLP_Train_{wordcount}/'
        Testdir = Holddir + f'FLP_Test_{wordcount}/'
        cdnn_dict_name = f'segment_blstm_records_wl_{wordcount}.pk'
        print(f'Selected phones: {selected_phones}')
        print(f'Train count & test count: {totalcount}')

    if testing:
        cdnn_dict_name = f'testing_records.pk'
    print(f' Using:{cdnn_dict_name}')
    cdnn_address = SysPath + 'GOP-LSTM/Results/CDNN_segments/'
    # Iterate over gridsearch
    N_epochs = 70
    Input_tuple = (5, 26, 1)
    # ,[32 for _ in range(10)]
    ConvLayerList = [[32 for _ in range(5)]]
    DropoutList = [0.8]

    # add one for sil
    N_classes += 1

    seq_sizelist = [64]
    for seq_size in seq_sizelist:
        totaltrain = nseqsofnsize(Traindir, seq_size=seq_size)
        totaltest = nseqsofnsize(Testdir, seq_size=seq_size)
        for cl in ConvLayerList:
            for dl in DropoutList:
                # Compile Params
                #cname = '_'.join(str(x) for x in cl)
                cname = f'{cl[0]}_x{len(cl)}'
                Model_name = f'BLSTM_CP{cname}_FBN_SS{seq_size}_DL{dl}_V2'
                # check if model exist with said name
                Model_address = f'{Model_name}model.hdf5'
                # if manual entry
                Model_address = 'BLSTM_CP32_x5_FBN_SS64_DL0.8_V2model.hdf5'
                if os.path.exists(Model_address):
                    print(f'loading model: {Model_address}')

                    model = load_model(Model_address)
                    print(model.inputs, model.outputs)
                    firsttime = False
                    print(f'Loaded')
                else:
                    print(f'No such model as :{Model_address}')

                # Forced Accuracy
                print('...Predicting...')
                if testing:
                    totaltest = 30
                gen = generator_test_bufferedseq_wfname(train_dir=Testdir,
                                                        batch_size=seq_size,
                                                        n_classes=N_classes,
                                                        wfname=True)
                ''' Need to add  dictionary with similar sounding word
                however it seems there are too few words to do this with a small test set.
                also, might be having problems with switching to 25 wc from 30wc'''
                this = True

                if this:
                    sameworddiffprondict = defaultdict(list)
                    for keys, v in w2pdict.items():
                        wordnum = keys.split('_')[-1]
                        #check if already in dict list
                        if v not in sameworddiffprondict[wordnum]:
                            sameworddiffprondict[wordnum].append(v)
                    sameworddiffprondict.pop('17')
                    sameworddiffprondict.pop('18')
                    for key, v in sameworddiffprondict.items():
                        v[0] += '_'
                        print(key, v)

                similardict = {'13': '48', '48': '13', '25': '5', '5': '25'}
                dolist = ['13', '48', '25', '5']
                ''' Return Word Accuracy (by Softmax & ForcedMax), Max Seg Accuracy (from goldstandard-gst)'''
                that = True
                # import the whole list for testing each possible word, instead of softmax
                totaltest = 15  # for testing purpose shorten
                if that:
                    selected_phones.append('_')
                    print(f'Total Test size:{totaltest}\n')
                    x, y, file = next(gen)
                    cfile = file
                    cfname = cfile.split('.')[0].split('_')[-1]
                    n = 0
                    for _ in range(totaltest):  # amount of words to be judged
                        # Two versions, Right Word and Random Wrong Word
                        print(f'starting file:{file}, {cfname}')
                        cfile = file
                        cfname = cfile.split('.')[0].split('_')[-1]
                        if cfname not in dolist:
                            notinlist = True
                            while notinlist:
                                n += 1
                                x, y, file = next(gen)
                                cfile = file
                                cfname = cfile.split('.')[0].split('_')[-1]
                                if cfname in dolist:
                                    print(f'in')
                                    notinlist = False
                                    print(f'N:{n}')
                                elif n > 100:
                                    notinlist = False
                                    print(f'N:{n}')

                        predictions = []
                        samefile = True
                        while samefile:
                            tp = model.predict(x=x)[0].tolist()
                            predictions += tp
                            # get all values then roll through loops
                            x, y, file = next(gen)
                            if cfile != file:  # break out of word while loop
                                samefile = False
                                print(f'Next file:{file}')
                        print(f'Going in: {cfile},{cfname}')
                        for turnN in range(2):
                            sscores = []
                            fscores = []
                            fwordphones = []
                            if turnN == 1:
                                fname = similardict[cfname]
                                print(f'Wrong: {fname}')
                                potphones = sameworddiffprondict[fname][0]
                            else:
                                print(f'Right: {cfname}')
                                potphones = sameworddiffprondict[cfname][0]
                            print(potphones)
                            pind = [
                                selected_phones.index(sp) for sp in potphones
                            ]
                            print(f'Expected word: {potphones}')
                            predictions = np.asarray(predictions)
                            smargs = np.max(predictions, axis=1)
                            fargs = np.max(predictions[:, pind], axis=1)
                            fwordphones += [
                                selected_phones[pind[sp]]
                                for sp in np.argmax(predictions[:, pind],
                                                    axis=1)
                            ]
                            sscores += smargs.tolist()
                            fscores += fargs.tolist()
                            # got word segs, process them
                            fseg = segmentphonelist(fwordphones)

                            # use fseg to separate phones and then show their prob
                            fscores = np.asarray(fscores)
                            sscores = np.asarray(sscores)
                            gop_per_frame = np.round(
                                np.abs(np.log(fscores / sscores)), 4)
                            gop_list = []
                            avg_phone_error = []
                            for seg in fseg[:-1]:  #skip space
                                phone_chuck = gop_per_frame[seg[1] - 1:seg[2] -
                                                            1]
                                phonescore = np.round(
                                    np.sum(phone_chuck) / seg[3], 3)
                                gop_list.append((seg[0], phonescore, seg[3]))
                                avg_phone_error.append(phonescore)
                            print(gop_list)
                            print(np.round(np.average(avg_phone_error)))
                            print('\n')
                            # accuracy

                    # GOP, create ratios between RNN-WF/RNN-O and RNN-WF/(0.5*WF + 0.5*O)
                    # for correct words, for similar words, then for opposite words

                del gen
                del model
                k.clear_session()
def main(testing=False):

    # Config Values[DNN params]
    Frame_length = 0.025
    Frame_step = 0.01
    Dbdir = './speakers_db_correct/'

    overwrite_MFCCs = False
    TrainAll = False
    testing = True
    FramelevelORword = False

    cwd = os.getcwd()
    SysPath = cwd.split('GOP-LSTM')[0]
    Wavdir = SysPath + 'corpus/dat/speakers/'
    Dbdir = SysPath +'GOP-LSTM/PhoneInfo/speakers_db_correct/'
    Holddir = SysPath + 'HoldDir/'
    Traindir = Holddir + 'Train/'
    Testdir = Holddir + 'Test/'
    PhoInfDir = SysPath + 'GOP-LSTM/PhoneInfo/'
    N_context = 2
    N_ceps = 26
    wordcount = 10

    # Training & Test Data
    if FramelevelORword:
        speakers_trainNtest(db_corpus=Dbdir,
                            wav_corpus=Wavdir,
                            n_ceps=N_ceps,
                            n_context=N_context,
                            frame_length=Frame_length,
                            frame_step=Frame_step,
                            inmat=True,
                            holddir=Holddir,
                            overwrite=overwrite_MFCCs)
        cdnn_dict_name = 'crnn_gridsearch_records_ALL.pk'
        if not TrainAll:
            ByCount = 4000
            Traindir = Holddir + 'Train_Correct/'
            Testdir = Holddir + 'Test_Correct/'
            selected_phones, totalcount = select_trainNtest(bycount=ByCount,
                                                            holddir=Holddir,
                                                            train_corpus=Traindir,
                                                            test_corpus=Testdir,
                                                            overwrite=False)
            Traindir = Holddir + f'Train_Select_{ByCount}/'
            Testdir = Holddir + f'Test_Select_{ByCount}/'
            N_classes = len(selected_phones)
            print(f'N selected classes: {N_classes}')
            cdnn_dict_name = f'crnn_gridsearch_records_{ByCount}.pk'
    else:
        selected_phones, totalcount, w2pdict = createNcount_trainNtest(frame_length=Frame_length,
                                                                       frame_step=Frame_step,
                                                                       n_ceps=N_ceps,
                                                                       n_context=N_context,
                                                                       dbdir=Dbdir,
                                                                       datdir=Wavdir,
                                                                       holddir=Holddir,
                                                                       wordcount=wordcount,
                                                                       phoinfdir=PhoInfDir)
        N_classes = len(selected_phones)
        Traindir = Holddir + f'FLP_Train_{wordcount}/'
        Testdir = Holddir + f'FLP_Test_{wordcount}/'
        cdnn_dict_name = f'ddcp_blstm_gridsearch_records_wl_{wordcount}.pk'
        print(f'Selected phones: {selected_phones}')
        print(f'Train count & test count: {totalcount}')

    if testing:
        cdnn_dict_name = f'testing_records.pk'
    cdnn_address = SysPath + 'GOP-LSTM/Results/CDNN_phones/'
    # Iterate over gridsearch
    N_epochs = 70
    Input_tuple = (5, 26, 1)
    ConvLayerList = [[32 for _ in range(15)]]
    DropoutList =[0.8]

    # add one for sil
    N_classes += 1
    selected_phones.append('_')

    seq_sizelist = [64]
    for seq_size in seq_sizelist:
        totaltrain = nseqsofnsize(Traindir,seq_size=seq_size)
        totaltest = nseqsofnsize(Testdir,seq_size=seq_size)
        for cl in ConvLayerList:
            for dl in DropoutList:
                # Compile Params
                #cname = '_'.join(str(x) for x in cl)
                cname = f'{cl[0]}_x{len(cl)}'
                Model_name = f'BLSTM_CP{cname}_FBN_SS{seq_size}_DL{dl}_V3'
                model = make_CNNLSTM_classifier(input_tuple=Input_tuple,
                                                conv_layers=cl,
                                                n_classes=N_classes,
                                                seq_size=seq_size,
                                                dropout_rate=dl,
                                                channel_order='channels_last')
                model, Model_name = train_model(n_epochs=N_epochs,
                                                model=model,
                                                traindir=Traindir,
                                                model_name=Model_name,
                                                n_classes=N_classes,
                                                totalsamples=totaltrain,
                                                dict_name=cdnn_dict_name,
                                                results_dir=cdnn_address,
                                                batch_size=seq_size,
                                                testing=testing)
                print('...Evaluating...')
                evaluate_model(model=model,
                               testdir=Testdir,
                               n_classes=N_classes,
                               totalsamples=totaltest,
                               model_name=Model_name,
                               dict_name=cdnn_dict_name,
                               results_dir=cdnn_address,
                               batch_size=seq_size,
                               testing=testing)
                # Forced Accuracy
                print('...Predicting...')
                if testing:
                    totaltest = 30
                gen = generator_test_bufferedseq_wfname(train_dir=Testdir,
                                                        batch_size=seq_size,
                                                        n_classes=N_classes,
                                                        wfname=True)

                ''' Return Word Accuracy (by Softmax & ForcedMax), Max Seg Accuracy (from goldstandard-gst)'''
                that = True
                if that:
                    selected_phones.append('_')
                    diagnose = False
                    s_correct = 0
                    f_correct = 0
                    total = 0
                    s_IDS = 0
                    f_IDS = 0
                    maxsegtotal = 0
                    s_seg = 0
                    f_seg = 0
                    print(f'Total Test size:{totaltest}\n')
                    x, y, file = next(gen)
                    cfile = file
                    for _ in range(totaltest):  # amount of words to be judged
                        if diagnose:
                            print(f'Current file:{file}')
                            print(f'Word\'s phones{potphones}')
                        print(file)
                        segcount = 0
                        gwordphones = []  # gold standard word segments
                        swordphones = []  # softmax word segments
                        fwordphones = []  # forced word segments
                        fname = file.split('.')[0]
                        potphones = w2pdict[fname]
                        potphones.append('_')
                        pind = [selected_phones.index(sp) for sp in potphones]
                        predictions = model.predict(x=x)
                        segcount += 1
                        gstd = [selected_phones[sp] for sp in np.argmax(y, axis=2)[0]]
                        softmax = [selected_phones[sp] for sp in np.argmax(predictions, axis=2)[0]]
                        forceda = [selected_phones[pind[sp]] for sp in
                                   np.argmax(predictions[:, :, pind][0], axis=1)]
                        gwordphones += gstd
                        swordphones += softmax
                        fwordphones += forceda
                        x, y, file = next(gen)
                        predictions = model.predict(x=x)
                        if cfile == file:  # break out of word while loop
                            samefile = True
                            cfile = file
                        else:
                            samefile = False
                            cfile = file
                        while samefile:  # track error for same file
                            segcount += 1
                            gstd = [selected_phones[sp] for sp in np.argmax(y, axis=2)[0]]
                            softmax = [selected_phones[sp] for sp in np.argmax(predictions, axis=2)[0]]
                            forceda = [selected_phones[pind[sp]] for sp in
                                       np.argmax(predictions[:, :, pind][0], axis=1)]
                            gwordphones += gstd
                            swordphones += softmax
                            fwordphones += forceda
                            x, y, file = next(gen)
                            predictions = model.predict(x=x)
                            if cfile != file:  # break out of word while loop
                                samefile = False
                                cfile = file
                        # got word segs, process them
                        gseg = segmentphonelist(gwordphones)
                        sseg = segmentphonelist(swordphones)
                        fseg = segmentphonelist(fwordphones)
                        sLD = uttLD(gseg, sseg)
                        fLD = uttLD(gseg, fseg)
                        s_IDS += sLD
                        f_IDS += fLD
                        if diagnose:
                            print('\n')
                            print(gseg)
                            print(sseg)
                            print(fseg)
                            print('\n')
                            print(sLD)
                            print(fLD)
                            print('\n')
                        # accuracy
                        startsil = gseg[-1][1]  # Index of Silence
                        g_len = len(gwordphones[:startsil])
                        s_correct += segCorrect(gwordphones[:startsil], swordphones[:startsil])
                        f_correct += segCorrect(gwordphones[:startsil], fwordphones[:startsil])
                        total += g_len

                        # max-seg-score with known boundaries
                        # per word, then test set score
                        wordweight = 0
                        for seg in gseg[:-1]:  # last phone is silence '_'
                            maxsegtotal += 1
                            cphone = seg[0]
                            sboundedlist = swordphones[seg[1]:seg[2]]
                            fboundedlist = fwordphones[seg[1]:seg[2]]
                            smaxphone = max(sboundedlist, key=sboundedlist.count)
                            fmaxphone = max(fboundedlist, key=fboundedlist.count)
                            if smaxphone == cphone:
                                s_seg += 1
                            if fmaxphone == cphone:
                                f_seg += 1
                            if diagnose:
                                print(seg)
                                print(smaxphone, fmaxphone, cphone)

                    sLDpercent = sLD / total * 100
                    fLDpercent = fLD / total * 100
                    print(f'Insertions, Deletions, Substitions (SM):{sLD} out of {total}: {sLDpercent}%')
                    print(f'Insertions, Deletions, Substitions (FM):{fLD} out of {total}: {fLDpercent}%')
                    Spercent = s_correct / total * 100
                    Fpercent = f_correct / total * 100
                    print('\n')
                    print(f'Softmax: {s_correct} out of {total}, {Spercent}%')
                    print(f'Forced: {f_correct} out of {total}, {Fpercent}%')

                    Spercent = s_seg / maxsegtotal * 100
                    Fpercent = f_seg / maxsegtotal * 100

                    print(f'Softmax (seg): {s_seg} out of {maxsegtotal}, {Spercent}%')
                    print(f'Forced (seg): {f_seg} out of {maxsegtotal}, {Fpercent}%')
                    cdnn_records_add(loss=Spercent,
                                     accuracy=Fpercent,
                                     model_name=Model_name,
                                     nn_records_name=cdnn_dict_name,
                                     results_address=cdnn_address)
                del gen
                del model
                k.clear_session()
예제 #3
0
def main(testing=False):

    # Config Values, Feature Assembly, focused on WordCount/FrameCount
    Frame_length = 0.025
    Frame_step = 0.01

    overwrite_MFCCs = False
    TrainAll = False
    #testing = True
    FramelevelORword = False

    cwd = os.getcwd()
    SysPath = cwd.split('GOP-LSTM')[0]
    Dbdir = SysPath + 'GOP-LSTM/PhoneInfo/speakers_db_correct/'
    '''Based on your computer setup. (I have both folders on same level as GOP-LSTM)'''
    Wavdir = SysPath + 'corpus/dat/speakers/'  # On SysPath level
    Holddir = SysPath + 'HoldDir/'

    Traindir = Holddir + 'Train/'
    Testdir = Holddir + 'Test/'
    PhoInfDir = SysPath + 'GOP-LSTM/PhoneInfo/'

    N_context = 2
    N_ceps = 26
    wordcount = 10

    # Training & Test Data
    if FramelevelORword:
        speakers_trainNtest(db_corpus=Dbdir,
                            wav_corpus=Wavdir,
                            n_ceps=N_ceps,
                            n_context=N_context,
                            frame_length=Frame_length,
                            frame_step=Frame_step,
                            inmat=True,
                            holddir=Holddir,
                            overwrite=overwrite_MFCCs)
        cdnn_dict_name = 'crnn_gridsearch_records_ALL.pk'
        if not TrainAll:
            ByCount = 4000
            Traindir = Holddir + 'Train_Correct/'
            Testdir = Holddir + 'Test_Correct/'
            selected_phones, totalcount = select_trainNtest(
                bycount=ByCount,
                holddir=Holddir,
                train_corpus=Traindir,
                test_corpus=Testdir,
                overwrite=False)
            Traindir = Holddir + f'Train_Select_{ByCount}/'
            Testdir = Holddir + f'Test_Select_{ByCount}/'
            N_classes = len(selected_phones)
            print(f'N selected classes: {N_classes}')
            cdnn_dict_name = f'segmentation_records_{ByCount}.pk'
    else:
        selected_phones, totalcount, w2pdict = createNcount_trainNtest(
            frame_length=Frame_length,
            frame_step=Frame_step,
            n_ceps=N_ceps,
            n_context=N_context,
            dbdir=Dbdir,
            datdir=Wavdir,
            holddir=Holddir,
            phoinfdir=PhoInfDir,
            wordcount=wordcount)
        N_classes = len(selected_phones)
        Traindir = Holddir + f'FLP_Train_{wordcount}/'
        Testdir = Holddir + f'FLP_Test_{wordcount}/'
        cdnn_dict_name = f'segment_blstm_records_wl_{wordcount}.pk'
        print(f'Selected phones: {selected_phones}')
        print(f'Train count & test count: {totalcount}')

    if testing:
        cdnn_dict_name = f'testing_records.pk'
    print(f' Using:{cdnn_dict_name}')
    cdnn_address = SysPath + 'GOP-LSTM/Results/CDNN_segments/'
    # Iterate over gridsearch
    N_epochs = 70
    Input_tuple = (5, 26, 1)
    # ,[32 for _ in range(10)]
    ConvLayerList = [[32 for _ in range(10)]]
    DropoutList = [0.8]

    # add one for sil
    N_classes += 1

    seq_sizelist = [64]
    for seq_size in seq_sizelist:
        totaltrain = nseqsofnsize(Traindir, seq_size=seq_size)
        totaltest = nseqsofnsize(Testdir, seq_size=seq_size)
        for cl in ConvLayerList:
            for dl in DropoutList:
                # Compile Params
                #cname = '_'.join(str(x) for x in cl)
                cname = f'{cl[0]}_x{len(cl)}'
                Model_name = f'BLSTM_CP{cname}_FBN_SS{seq_size}_DL{dl}_V2'
                # check if model exist with said name
                Model_address = f'{Model_name}model.hdf5'
                if os.path.exists(Model_address):
                    print(f'loading model: {Model_address}')
                    model = load_model(Model_address)
                    firsttime = False
                    print(f'Loaded')
                else:
                    print(f'No such model as :{Model_address}')

                # Forced Accuracy
                print('...Predicting...')
                if testing:
                    totaltest = 30
                gen = generator_test_bufferedseq_wfname(train_dir=Testdir,
                                                        batch_size=seq_size,
                                                        n_classes=N_classes,
                                                        wfname=True)
                ''' Need to add  dictionary with similar sounding word
                however it seems there are too few words to do this with a small test set.
                also, might be having problems with switching to 25 wc from 30wc'''
                this = True
                if this:
                    sameworddiffprondict = defaultdict(list)
                    for keys, v in w2pdict.items():
                        wordnum = keys.split('_')[-1]
                        #check if already in dict list
                        if v not in sameworddiffprondict[wordnum]:
                            sameworddiffprondict[wordnum].append(v)

                    for keys, v in sameworddiffprondict.items():
                        print(keys, v)
                ''' Return Word Accuracy (by Softmax & ForcedMax), Max Seg Accuracy (from goldstandard-gst)'''
                that = True

                # possibly create function to address, which words are possibly useful forced-word-max
                #sameworddiffname
                swdname = ['9', '44']
                swdforms = [[['b', 'ɔ', 'l', '_'], ['b', 'a', 'l', '_'],
                             ['b', 'ɔ', '_']],
                            [['d', 'ɹ', 'ʌ', 'm', '_'],
                             ['dʒ', 'ɹ', 'ʌ', 'm', '_']]]

                # import the whole list for testing each possible word, instead of softmax
                #totaltest = 3 # for testing purpose shorten
                if that:
                    perphoneaccdict = defaultdict(
                        list)  # Word-key,list-accuracy, post-process
                    selected_phones.append('_')
                    diagnose = False
                    s_correct = 0
                    f_correct = 0
                    fw_correct = 0
                    total = 0
                    s_IDS = 0
                    f_IDS = 0
                    maxsegtotal = 0
                    s_seg = 0
                    f_seg = 0

                    print(f'Total Test size:{totaltest}\n')
                    x, y, file = next(gen)
                    cfile = file
                    for _ in range(totaltest):  # amount of words to be judged
                        if diagnose:
                            print(f'Current file:{file}')
                            print(f'Word\'s phones{potphones}')
                        segcount = 0
                        gwordphones = []  # gold standard word segments
                        swordphones = []  # softmax word segments
                        fwordphones = []  # forced word segments
                        fname = file.split('.')[0]
                        potphones = w2pdict[fname]
                        fnamelast = fname.split('_')[-1]
                        potphones.append('_')
                        pind = [selected_phones.index(sp) for sp in potphones]
                        predictions = model.predict(x=x)
                        segcount += 1
                        gstd = argmaxpredicts2phones(y[0], selected_phones)
                        softmax = argmaxpredicts2phones(
                            predictions[0], selected_phones)
                        forceda = argmaxpredicts2forcedphones(
                            predictions[0], selected_phones, pind)
                        wordmaxforce = []
                        if fnamelast in swdname:
                            wordmaxforce = swdforms[swdname.index(fnamelast)]
                            swdlist = []
                            swdlistscore = [0 for _ in wordmaxforce]
                            for index, wsp in enumerate(wordmaxforce):
                                wspind = [
                                    selected_phones.index(sp) for sp in wsp
                                ]
                                tv, tswd = argmaxpredicts2forcedphones(
                                    predictions[0],
                                    selected_phones,
                                    wspind,
                                    fwords=True)
                                swdlist.append(tswd)
                                swdlistscore[index] += tv

                        gwordphones += gstd
                        swordphones += softmax
                        fwordphones += forceda
                        x, y, file = next(gen)
                        predictions = model.predict(x=x)
                        if cfile == file:  # break out of word while loop
                            samefile = True
                            cfile = file
                        else:
                            samefile = False
                            cfile = file
                        while samefile:  # track error for same file
                            segcount += 1
                            gstd = argmaxpredicts2phones(y[0], selected_phones)
                            softmax = argmaxpredicts2phones(
                                predictions[0], selected_phones)
                            forceda = argmaxpredicts2forcedphones(
                                predictions[0], selected_phones, pind)
                            if wordmaxforce:
                                for index, wsp in enumerate(wordmaxforce):
                                    wspind = [
                                        selected_phones.index(sp) for sp in wsp
                                    ]
                                    tv, tswd = argmaxpredicts2forcedphones(
                                        predictions[0],
                                        selected_phones,
                                        wspind,
                                        fwords=True)
                                    swdlist[index] += tswd
                                    swdlistscore[index] += tv
                            gwordphones += gstd
                            swordphones += softmax
                            fwordphones += forceda
                            x, y, file = next(gen)
                            predictions = model.predict(x=x)
                            if cfile != file:  # break out of word while loop
                                samefile = False
                                cfile = file
                        # got word segs, process them

                        gseg = segmentphonelist(gwordphones)
                        sseg = segmentphonelist(swordphones)
                        fseg = segmentphonelist(fwordphones)
                        sLD = uttLD(gseg, sseg)
                        fLD = uttLD(gseg, fseg)
                        s_IDS += sLD
                        f_IDS += fLD
                        if diagnose:
                            print('\n')
                            print(gseg)
                            print(sseg)
                            print(fseg)
                            print('\n')
                            print(sLD)
                            print(fLD)
                            print('\n')
                        # accuracy
                        startsil = gseg[-1][1]  #Index of Silence
                        g_len = len(gwordphones[:startsil])
                        s_correct += segCorrect(gwordphones[:startsil],
                                                swordphones[:startsil])
                        f_correct += segCorrect(gwordphones[:startsil],
                                                fwordphones[:startsil])
                        total += g_len
                        if wordmaxforce:
                            print(f'actual:{potphones}')
                            print(f'swd {wordmaxforce}'
                                  )  # list for individual comparision
                            print(f'scores{swdlistscore}')
                            for wlist in swdlist:
                                print(
                                    segCorrect(gwordphones[:startsil],
                                               wlist[:startsil]) / g_len)
                            print('\n')

                        # max-seg-score with known boundaries
                        # per word, then test set score
                        wordweight = 0
                        for seg in gseg[:-1]:  # last phone is silence '_'
                            maxsegtotal += 1
                            cphone = seg[0]
                            sboundedlist = swordphones[seg[1]:seg[2]]
                            fboundedlist = fwordphones[seg[1]:seg[2]]
                            smaxphone = max(sboundedlist,
                                            key=sboundedlist.count)
                            fmaxphone = max(fboundedlist,
                                            key=fboundedlist.count)
                            if smaxphone == cphone:
                                s_seg += 1
                            if fmaxphone == cphone:
                                f_seg += 1
                            #if cphone in perphoneaccdict.keys():
                            #    perphoneaccdict[cphone].append()
                            if diagnose:
                                print(seg)
                                print(smaxphone, fmaxphone, cphone)

                    sLDpercent = sLD / total * 100
                    fLDpercent = fLD / total * 100
                    print(
                        f'Insertions, Deletions, Substitions (SM):{sLD} out of {total}: {sLDpercent}%'
                    )
                    print(
                        f'Insertions, Deletions, Substitions (FM):{fLD} out of {total}: {fLDpercent}%'
                    )
                    Spercent = s_correct / total * 100
                    Fpercent = f_correct / total * 100
                    print('\n')
                    print(f'Softmax: {s_correct} out of {total}, {Spercent}%')
                    print(f'Forced: {f_correct} out of {total}, {Fpercent}%')

                    Spercent = s_seg / maxsegtotal * 100
                    Fpercent = f_seg / maxsegtotal * 100

                    print(
                        f'Softmax (seg): {s_seg} out of {maxsegtotal}, {Spercent}%'
                    )
                    print(
                        f'Forced (seg): {f_seg} out of {maxsegtotal}, {Fpercent}%'
                    )

                del gen
                del model
                k.clear_session()
예제 #4
0
def main(testing=False):

    # Config Values, Feature Assembly, focused on WordCount/FrameCount
    Frame_length = 0.025
    Frame_step = 0.01

    overwrite_MFCCs = False
    TrainAll = False
    #testing = True
    FramelevelORword = False

    cwd = os.getcwd()
    SysPath = cwd.split('GOP-LSTM')[0]
    Dbdir = SysPath + 'GOP-LSTM/PhoneInfo/speakers_db_correct/'
    '''Based on your computer setup. (I have both folders on same level as GOP-LSTM)'''
    Wavdir = SysPath + 'corpus/dat/speakers/'  # On SysPath level
    Holddir = SysPath + 'HoldDir/'

    Traindir = Holddir + 'Train/'
    Testdir = Holddir + 'Test/'
    PhoInfDir = SysPath + 'GOP-LSTM/PhoneInfo/'

    N_context = 2
    N_ceps = 26
    wordcount = 25

    # Training & Test Data
    if FramelevelORword:
        speakers_trainNtest(db_corpus=Dbdir,
                            wav_corpus=Wavdir,
                            n_ceps=N_ceps,
                            n_context=N_context,
                            frame_length=Frame_length,
                            frame_step=Frame_step,
                            inmat=True,
                            holddir=Holddir,
                            overwrite=overwrite_MFCCs)
        cdnn_dict_name = 'crnn_gridsearch_records_ALL.pk'
        if not TrainAll:
            ByCount = 4000
            Traindir = Holddir + 'Train_Correct/'
            Testdir = Holddir + 'Test_Correct/'
            selected_phones, totalcount = select_trainNtest(
                bycount=ByCount,
                holddir=Holddir,
                train_corpus=Traindir,
                test_corpus=Testdir,
                overwrite=False)
            Traindir = Holddir + f'Train_Select_{ByCount}/'
            Testdir = Holddir + f'Test_Select_{ByCount}/'
            N_classes = len(selected_phones)
            print(f'N selected classes: {N_classes}')
            cdnn_dict_name = f'segmentation_records_{ByCount}.pk'
    else:
        selected_phones, totalcount, w2pdict = createNcount_trainNtest(
            frame_length=Frame_length,
            frame_step=Frame_step,
            n_ceps=N_ceps,
            n_context=N_context,
            dbdir=Dbdir,
            datdir=Wavdir,
            holddir=Holddir,
            phoinfdir=PhoInfDir,
            wordcount=wordcount)
        N_classes = len(selected_phones)
        Traindir = Holddir + f'FLP_Train_{wordcount}/'
        Testdir = Holddir + f'FLP_Test_{wordcount}/'
        cdnn_dict_name = f'segment_blstm_records_wl_{wordcount}.pk'
        print(f'Selected phones: {selected_phones}')
        print(f'Train count & test count: {totalcount}')

    if testing:
        cdnn_dict_name = f'testing_records.pk'
    print(f' Using:{cdnn_dict_name}')
    cdnn_address = SysPath + 'GOP-LSTM/Results/CDNN_segments/'
    # Iterate over gridsearch
    N_epochs = 70
    Input_tuple = (5, 26, 1)
    # ,[32 for _ in range(10)]
    ConvLayerList = [[32 for _ in range(10)]]
    DropoutList = [0.8]

    # add one for sil
    print(f'Number of phones:{N_classes}')
    N_classes += 1

    seq_sizelist = [64]
    for seq_size in seq_sizelist:
        totaltrain = nseqsofnsize(Traindir, seq_size=seq_size)
        totaltest = nseqsofnsize(Testdir, seq_size=seq_size)
        for cl in ConvLayerList:
            for dl in DropoutList:
                # Compile Params
                #cname = '_'.join(str(x) for x in cl)
                cname = f'{cl[0]}_x{len(cl)}'
                Model_name = f'BLSTM_CP{cname}_FBN_SS{seq_size}_DL{dl}_V2'
                # check if model exist with said name
                Model_address = f'{Model_name}model.hdf5'
                if os.path.exists(Model_address):
                    print(f'loading model: {Model_address}')
                    model = load_model(Model_address)
                    firsttime = False
                    print(f'Loaded')
                else:
                    print(f'No such model as :{Model_address}')

                # Forced Accuracy
                print('...Predicting...')
                if testing:
                    totaltest = 30
                gen = generator_test_bufferedseq_wfname(train_dir=Testdir,
                                                        batch_size=seq_size,
                                                        n_classes=N_classes,
                                                        wfname=True)
                ''' Need to add  dictionary with similar sounding word
                however it seems there are too few words to do this with a small test set.
                also, might be having problems with switching to 25 wc from 30wc'''
                this = True
                if this:
                    swddict = defaultdict(list)
                    for keys, v in w2pdict.items():
                        wordnum = keys.split('_')[-1]
                        if v not in swddict[wordnum]:
                            swddict[wordnum].append(v)

                    for keys, v in swddict.items():
                        v[0].append('_')
                        #print(keys,v)

                that = True
                if that:
                    perwordaccdict = defaultdict(
                        list)  # Word-key,list-accuracy, post-process
                    perwordmaxdict = defaultdict(
                        list)  # Word-key,list-accuracy, post-process
                    skiplist = [17, 18]
                    selected_phones.append('_')
                    diagnose = False
                    print(f'Total Test size:{totaltest}\n')
                    x, y, file = next(gen)
                    cfile = file
                    for _ in range(totaltest):  # amount of words to be judged
                        if diagnose:
                            print(f'Word\'s phones{potphones}')
                        #print(f'Current file:{file}')
                        trueword = file.split('.')[0].split('_')[1]
                        #print(trueword)
                        segcount = 0
                        gwordphones = []  # gold standard word segments
                        # we need to do this for all words, not just expected
                        predictions = model.predict(x=x)
                        segcount += 1
                        gstd = argmaxpredicts2phones(y[0], selected_phones)
                        twordlistdict = defaultdict(list)
                        twordscoredict = defaultdict(list)
                        twordaccdict = defaultdict(list)
                        for index, plist in swddict.items():
                            twordlistdict[index] = []
                            twordscoredict[index] = 0
                            twordaccdict[index] = 0
                            if index not in skiplist:
                                for iindex, wsp in enumerate(
                                        plist
                                ):  # should only be one, two later
                                    wspind = [
                                        selected_phones.index(sp) for sp in wsp
                                    ]
                                    tv, tswd = argmaxpredicts2forcedphones(
                                        predictions[0],
                                        selected_phones,
                                        wspind,
                                        fwords=True)
                                    twordlistdict[index].append(tswd)
                                    twordscoredict[index] += tv

                        gwordphones += gstd
                        x, y, file = next(gen)
                        predictions = model.predict(x=x)
                        if cfile == file:  # break out of word while loop
                            samefile = True
                            cfile = file
                        else:
                            samefile = False
                            cfile = file
                        while samefile:  # track error for same file
                            segcount += 1
                            gstd = argmaxpredicts2phones(y[0], selected_phones)
                            for index, plist in swddict.items():
                                if index not in skiplist:
                                    for iindex, wsp in enumerate(
                                            plist
                                    ):  # should only be one, two later
                                        wspind = [
                                            selected_phones.index(sp)
                                            for sp in wsp
                                        ]
                                        tv, tswd = argmaxpredicts2forcedphones(
                                            predictions[0],
                                            selected_phones,
                                            wspind,
                                            fwords=True)
                                        twordlistdict[index] += (tswd)
                                        twordscoredict[index] += tv
                            gwordphones += gstd
                            x, y, file = next(gen)
                            predictions = model.predict(x=x)
                            if cfile != file:  # break out of word while loop
                                samefile = False
                                cfile = file
                        gseg = segmentphonelist(gwordphones)
                        startsil = gseg[-1][1]  #Index of Silence
                        g_len = len(gwordphones[:startsil])

                        for index, _ in swddict.items():
                            if index not in skiplist:
                                tacc = segCorrect(
                                    gwordphones[:startsil],
                                    twordlistdict[index][0][:startsil]) / g_len
                                twordaccdict[index] = tacc
                        sortedacc = sorted(twordaccdict.items(),
                                           key=operator.itemgetter(1),
                                           reverse=True)
                        sortedmax = sorted(twordscoredict.items(),
                                           key=operator.itemgetter(1),
                                           reverse=True)
                        for index, tuple in enumerate(sortedacc):
                            if tuple[0] == trueword:
                                perwordaccdict[trueword].append(index)
                        for index, tuple in enumerate(sortedmax):
                            if tuple[0] == trueword:
                                perwordmaxdict[trueword].append(index)
                        #print(sortedmax)
                        #print(sortedacc)
                    # Number of correct, binary score, then a relative score, the greater the worse
                    averageaccuracy = 0
                    numberoftrials = 0
                    averagelengthaway = 0
                    rankingsacc = []
                    rankingsmax = []
                    for _, alist in perwordaccdict.items():
                        for score in alist:
                            numberoftrials += 1
                            averagelengthaway += score
                            if score == 0:
                                averageaccuracy += 1
                            rankingsacc.append(score)
                    print(
                        f'Avg num of distance from 0, {averagelengthaway/numberoftrials} using max accuracy'
                    )
                    print(f'Avg accuracy {averageaccuracy/numberoftrials}')
                    averageaccuracy = 0
                    averagelengthaway = 0
                    for _, mlist in perwordmaxdict.items():
                        for score in mlist:
                            numberoftrials += 1
                            averagelengthaway += score
                            if score == 0:
                                averageaccuracy += 1
                            rankingsmax.append(score)
                    print(
                        f'Avg num of distance from 0, {averagelengthaway/numberoftrials} using sum of max'
                    )
                    print(
                        f'Avg sum phone maxs {averageaccuracy/numberoftrials}')
                    print(f'Out {len(perwordaccdict.keys())} words')

                    plt.subplot(1, 2, 1)
                    plt.title('Rankings by Accuracy')
                    plt.hist(rankingsacc, bins=38)
                    plt.xlabel('Occurrences')
                    plt.xlabel('Distance')
                    plt.subplot(1, 2, 2)
                    plt.title('Rankings by Sum of Softmax')
                    plt.hist(rankingsmax, bins=38)
                    plt.xlabel('Distance')
                    plt.tight_layout()
                    plt.show()

                del gen
                del model
                k.clear_session()
예제 #5
0
def main(testing=False):

    # Config Values[DNN params]
    Frame_length = 0.025
    Frame_step = 0.01
    Dbdir = './speakers_db_correct/'

    overwrite_MFCCs = False
    TrainAll = False
    #testing = True
    FramelevelORword = False

    cwd = os.getcwd()
    SysPath = cwd.split('GOP-LSTM')[0]
    Wavdir = SysPath + 'corpus/dat/speakers/'
    Dbdir = SysPath + 'GOP-LSTM/PhoneInfo/speakers_db_correct/'
    Holddir = SysPath + 'HoldDir/'
    Traindir = Holddir + 'Train/'
    Testdir = Holddir + 'Test/'
    N_context = 2
    N_ceps = 26
    wordcount = 40

    # Training & Test Data
    if FramelevelORword:
        speakers_trainNtest(db_corpus=Dbdir,
                            wav_corpus=Wavdir,
                            n_ceps=N_ceps,
                            n_context=N_context,
                            frame_length=Frame_length,
                            frame_step=Frame_step,
                            inmat=True,
                            holddir=Holddir,
                            overwrite=overwrite_MFCCs)
        cdnn_dict_name = 'crnn_gridsearch_records_ALL.pk'
        if not TrainAll:
            ByCount = 4000
            Traindir = Holddir + 'Train_Correct/'
            Testdir = Holddir + 'Test_Correct/'
            selected_phones, totalcount = select_trainNtest(
                bycount=ByCount,
                holddir=Holddir,
                train_corpus=Traindir,
                test_corpus=Testdir,
                overwrite=False)
            Traindir = Holddir + f'Train_Select_{ByCount}/'
            Testdir = Holddir + f'Test_Select_{ByCount}/'
            N_classes = len(selected_phones)
            print(f'N selected classes: {N_classes}')
            cdnn_dict_name = f'crnn_gridsearch_records_{ByCount}.pk'
    else:
        selected_phones, totalcount = createNcount_trainNtest(
            frame_length=Frame_length,
            frame_step=Frame_step,
            n_ceps=N_ceps,
            n_context=N_context,
            dbdir=Dbdir,
            datdir=Wavdir,
            holddir=Holddir,
            wordcount=wordcount,
            overwrite=True)
        N_classes = len(selected_phones)
        Traindir = Holddir + f'FLP_Train_{wordcount}/'
        Testdir = Holddir + f'FLP_Test_{wordcount}/'

        cdnn_dict_name = f'crnn_gridsearch_records_wl_{wordcount}.pk'

    if testing:
        cdnn_dict_name = f'testing_records.pk'
    cdnn_address = SysPath + 'GOP-LSTM/Results/CDNN_phones/'
    print(totalcount)
    # Iterate over gridsearch
    N_epochs = 70
    Input_tuple = (5, 26, 1)
    ConvLayerList = [[8], [32]]
    DropoutList = [0.8, 0.9]

    seq_sizelist = [64, 128]
    for seq_size in seq_sizelist:
        totalcount = nseqsofnsize(Traindir, seq_size=seq_size)
        for cl in ConvLayerList:
            for dl in DropoutList:
                # Compile Params
                cname = '_'.join(str(x) for x in cl)
                Model_name = f'CP{cname}_FBN_SS{seq_size}_DL{dl}'
                model = make_CNNLSTM_classifier(input_tuple=Input_tuple,
                                                conv_layers=cl,
                                                n_classes=N_classes,
                                                seq_size=seq_size,
                                                dropout_rate=dl,
                                                channel_order='channels_last')
                model, Model_name = train_model(n_epochs=N_epochs,
                                                model=model,
                                                traindir=Traindir,
                                                model_name=Model_name,
                                                n_classes=N_classes,
                                                totalsamples=totalcount,
                                                dict_name=cdnn_dict_name,
                                                results_dir=cdnn_address,
                                                batch_size=seq_size,
                                                testing=testing)
                evaluate_model(model=model,
                               testdir=Testdir,
                               n_classes=N_classes,
                               totalsamples=totalcount,
                               model_name=Model_name,
                               dict_name=cdnn_dict_name,
                               results_dir=cdnn_address,
                               batch_size=seq_size,
                               testing=testing)
                del model

    cdnn_records_rankNprint(nn_record_name=cdnn_dict_name,
                            results_address=cdnn_address)
def main(testing=False):

    # Config Values[DNN params]
    Frame_length = 0.025
    Frame_step = 0.01
    Dbdir = './speakers_db_correct/'

    overwrite_MFCCs = False
    TrainAll = False
    #testing = True
    FramelevelORword = False

    cwd = os.getcwd()
    SysPath = cwd.split('GOP-LSTM')[0]
    Wavdir = SysPath + 'corpus/dat/speakers/'
    Dbdir = SysPath + 'GOP-LSTM/PhoneInfo/speakers_db_correct/'
    Holddir = SysPath + 'HoldDir/'
    Traindir = Holddir + 'Train/'
    Testdir = Holddir + 'Test/'
    N_context = 2
    N_ceps = 26
    wordcount = 30
    ByCount = 4000

    # Training & Test Data
    if FramelevelORword:
        speakers_trainNtest(db_corpus=Dbdir,
                            wav_corpus=Wavdir,
                            n_ceps=N_ceps,
                            n_context=N_context,
                            frame_length=Frame_length,
                            frame_step=Frame_step,
                            inmat=True,
                            holddir=Holddir,
                            overwrite=overwrite_MFCCs)
        cdnn_dict_name = 'crnn_gridsearch_records_ALL.pk'
        if not TrainAll:
            Traindir = Holddir + 'Train_Correct/'
            Testdir = Holddir + 'Test_Correct/'
            selected_phones, totalcount = select_trainNtest(
                bycount=ByCount,
                holddir=Holddir,
                train_corpus=Traindir,
                test_corpus=Testdir,
                overwrite=False)
            Traindir = Holddir + f'Train_Select_{ByCount}/'
            Testdir = Holddir + f'Test_Select_{ByCount}/'
            N_classes = len(selected_phones)
            print(f'N selected classes: {N_classes}')
            cdnn_dict_name = f'crnn_gridsearch_records_{ByCount}.pk'
    else:
        selected_phones, totalcount, w2pdict = createNcount_trainNtest(
            frame_length=Frame_length,
            frame_step=Frame_step,
            n_ceps=N_ceps,
            n_context=N_context,
            dbdir=Dbdir,
            datdir=Wavdir,
            holddir=Holddir,
            wordcount=wordcount)
        N_classes = len(selected_phones)
        Traindir = Holddir + f'FLP_Train_{wordcount}/'
        Testdir = Holddir + f'FLP_Test_{wordcount}/'
        print(f'Selected phones: {selected_phones}')

        cdnn_dict_name = f'cp_blstm_gridsearch_records_wl_{wordcount}.pk'

    if testing:
        cdnn_dict_name = f'testing_records.pk'
    cdnn_address = SysPath + 'GOP-LSTM/Results/CDNN_phones/'
    print(f'Train count \& test count: {totalcount}')
    # Iterate over gridsearch
    N_epochs = 70
    Input_tuple = (5, 26, 1)
    ConvLayerList = [[64, 64, 64, 64], [32, 32, 32, 32]]
    DropoutList = [0.8]

    seq_sizelist = [16, 32]
    for seq_size in seq_sizelist:
        totaltrain = nseqsofnsize(Traindir, seq_size=seq_size)
        totaltest = nseqsofnsize(Testdir, seq_size=seq_size)
        for cl in ConvLayerList:
            for dl in DropoutList:
                # Compile Params
                cname = '_'.join(str(x) for x in cl)
                Model_name = f'BLSTM_CP{cname}_FBN_SS{seq_size}_DL{dl}'
                model = make_CNNLSTM_classifier(input_tuple=Input_tuple,
                                                conv_layers=cl,
                                                n_classes=N_classes,
                                                seq_size=seq_size,
                                                dropout_rate=dl,
                                                channel_order='channels_last')
                model, Model_name = train_model(n_epochs=N_epochs,
                                                model=model,
                                                traindir=Traindir,
                                                model_name=Model_name,
                                                n_classes=N_classes,
                                                totalsamples=totaltrain,
                                                dict_name=cdnn_dict_name,
                                                results_dir=cdnn_address,
                                                batch_size=seq_size,
                                                testing=testing)
                evaluate_model(model=model,
                               testdir=Testdir,
                               n_classes=N_classes,
                               totalsamples=totaltest,
                               model_name=Model_name,
                               dict_name=cdnn_dict_name,
                               results_dir=cdnn_address,
                               batch_size=seq_size,
                               testing=testing)
                print('...Predicting...')
                if testing:
                    totaltest = 30
                gen = generator_train_bufferedseq_wfname(train_dir=Testdir,
                                                         batch_size=seq_size,
                                                         n_classes=N_classes)
                p_correct = 0
                f_correct = 0
                total = 0
                for s in range(totaltest):
                    x, y, file = next(gen)
                    fname = file.split('.')[0]
                    potphones = w2pdict[fname]
                    pind = [selected_phones.index(sp) for sp in potphones]
                    predictions = model.predict_proba(x=x)
                    TrueY = [
                        selected_phones[sp] for sp in np.argmax(y, axis=2)[0]
                    ]
                    PredY = [
                        selected_phones[sp]
                        for sp in np.argmax(predictions, axis=2)[0]
                    ]
                    ForcY = [
                        selected_phones[pind[sp]]
                        for sp in np.argmax(predictions[:, :, pind][0], axis=1)
                    ]
                    p_correct += len(
                        [1 for x, y in zip(TrueY, PredY) if x == y])
                    f_correct += len(
                        [1 for x, y in zip(TrueY, ForcY) if x == y])
                    total += len(TrueY)
                p_percent = p_correct / total * 100
                f_percent = f_correct / total * 100
                cdnn_records_add(loss=p_percent,
                                 accuracy=f_percent,
                                 model_name=Model_name,
                                 nn_records_name=cdnn_dict_name,
                                 results_address=cdnn_address)
                print(
                    f'Predicted correct:{p_correct} out of {total}, {p_percent}'
                )
                print(
                    f'Forced  correct:{f_correct} out of {total}, {f_percent}')

                diagnosis = False
                if diagnosis:
                    print(potphones)
                    print(file)
                    print(
                        f'Goldstd:{[selected_phones[sp] for sp in np.argmax(y, axis=2)[0]]}'
                    )
                    print(
                        f'Max All:{[selected_phones[sp] for sp in np.argmax(predictions,axis=2)[0]]}'
                    )
                    print(
                        f'ForcedA:{[selected_phones[pind[sp]] for sp in np.argmax(predictions[:, :, pind][0], axis=1)]}'
                    )
                del gen
                del model
                k.clear_session()

    cdnn_records_rankNprint(nn_record_name=cdnn_dict_name,
                            results_address=cdnn_address)
예제 #7
0
def main(testing=False):
    # Config Values[DNN params]
    Frame_length = 0.025
    Frame_step = 0.01
    Dbdir = './speakers_db_correct/'

    overwrite_MFCCs = False
    TrainAll = False
    #testing = True
    FramelevelORword = False

    cwd = os.getcwd()
    SysPath = cwd.split('GOP-LSTM')[0]
    Wavdir = SysPath + 'corpus/dat/speakers/'
    Dbdir = SysPath + 'GOP-LSTM/PhoneInfo/speakers_db_correct/'
    Holddir = SysPath + 'HoldDir/'
    Traindir = Holddir + 'Train/'
    Testdir = Holddir + 'Test/'
    PhoInfDir = SysPath + 'GOP-LSTM/PhoneInfo/'
    N_context = 2
    N_ceps = 26
    wordcount = 10

    # Training & Test Data
    if FramelevelORword:
        speakers_trainNtest(db_corpus=Dbdir,
                            wav_corpus=Wavdir,
                            n_ceps=N_ceps,
                            n_context=N_context,
                            frame_length=Frame_length,
                            frame_step=Frame_step,
                            inmat=True,
                            holddir=Holddir,
                            overwrite=overwrite_MFCCs)
        cdnn_dict_name = 'crnn_gridsearch_records_ALL.pk'
        if not TrainAll:
            ByCount = 4000
            Traindir = Holddir + 'Train_Correct/'
            Testdir = Holddir + 'Test_Correct/'
            selected_phones, totalcount = select_trainNtest(
                bycount=ByCount,
                holddir=Holddir,
                train_corpus=Traindir,
                test_corpus=Testdir,
                overwrite=False)
            Traindir = Holddir + f'Train_Select_{ByCount}/'
            Testdir = Holddir + f'Test_Select_{ByCount}/'
            N_classes = len(selected_phones)
            print(f'N selected classes: {N_classes}')
            cdnn_dict_name = f'crnn_gridsearch_records_{ByCount}.pk'
    else:
        selected_phones, totalcount, w2pdict = createNcount_trainNtest(
            frame_length=Frame_length,
            frame_step=Frame_step,
            n_ceps=N_ceps,
            n_context=N_context,
            dbdir=Dbdir,
            datdir=Wavdir,
            holddir=Holddir,
            phoinfdir=PhoInfDir,
            wordcount=wordcount)
        N_classes = len(selected_phones)
        Traindir = Holddir + f'FLP_Train_{wordcount}/'
        Testdir = Holddir + f'FLP_Test_{wordcount}/'
        cdnn_dict_name = f'drescp_blstm_records_wl_{wordcount}.pk'
        print(f'Selected phones (amount: {selected_phones}')
        print(f'Train count & test count: {totalcount}')

    if testing:
        cdnn_dict_name = f'testing_records.pk'
    cdnn_address = SysPath + 'GOP-LSTM/Results/CDNN_phones/'
    # Iterate over gridsearch
    N_epochs = 80
    Input_tuple = (5, 26, 1)
    ConvLayerList = [[32, 7, 1024]]
    DropoutList = [0.8]

    # add one for sil
    N_classes += 1
    selected_phones.append('_')

    seq_sizelist = [64]
    for seq_size in seq_sizelist:
        totaltrain = nseqsofnsize(Traindir, seq_size=seq_size)
        totaltest = nseqsofnsize(Testdir, seq_size=seq_size)
        for cl in ConvLayerList:
            for dl in DropoutList:
                # Compile Params
                cname = '_'.join(str(x) for x in cl)
                Model_name = f'ResBLSTM_C{cname}_FBN_SS{seq_size}_DL{dl}_V3'
                Model_address = f'{Model_name}model.hdf5'
                if os.path.exists(Model_address):
                    print(f'Loading Model:{Model_address}')
                    model = load_model(Model_address)
                    firsttime = False
                    print('Loaded')
                else:
                    model = make_CNNLSTM_classifier(
                        input_tuple=Input_tuple,
                        conv_layers=cl,
                        n_classes=N_classes,
                        seq_size=seq_size,
                        dropout_rate=dl,
                        channel_order='channels_last')
                    model, Model_name2 = train_model(n_epochs=N_epochs,
                                                     model=model,
                                                     traindir=Traindir,
                                                     model_name=Model_name,
                                                     n_classes=N_classes,
                                                     totalsamples=totaltrain,
                                                     dict_name=cdnn_dict_name,
                                                     results_dir=cdnn_address,
                                                     batch_size=seq_size,
                                                     testing=testing)
                    print('...Evaluating...')
                    evaluate_model(model=model,
                                   testdir=Testdir,
                                   n_classes=N_classes,
                                   totalsamples=totaltest,
                                   model_name=Model_name2,
                                   dict_name=cdnn_dict_name,
                                   results_dir=cdnn_address,
                                   batch_size=seq_size,
                                   testing=testing)
                    firsttime = True
                if firsttime:
                    model.save(Model_address)

                # Forced Accuracy
                print('...Predicting...')
                if testing:
                    totaltest = 30
                gen = generator_test_bufferedseq_wfname(train_dir=Testdir,
                                                        batch_size=seq_size,
                                                        n_classes=N_classes,
                                                        wfname=True)

                that = True

                # possibly create function to address, which words are possibly useful forced-word-max
                #sameworddiffname
                swdname = ['9', '44']
                swdforms = [[['b', 'ɔ', 'l', '_'], ['b', 'a', 'l', '_'],
                             ['b', 'ɔ', '_']],
                            [['d', 'ɹ', 'ʌ', 'm', '_'],
                             ['dʒ', 'ɹ', 'ʌ', 'm', '_']]]

                # import the whole list for testing each possible word, instead of softmax
                #totaltest = 3 # for testing purpose shorten
                if that:
                    perphoneaccdict = defaultdict(list)
                    selected_phones.append('_')
                    diagnose = False
                    s_correct = 0
                    f_correct = 0
                    fw_correct = 0
                    total = 0
                    s_IDS = 0
                    f_IDS = 0
                    maxsegtotal = 0
                    s_seg = 0
                    f_seg = 0

                    print(f'Total Test size:{totaltest}\n')
                    x, y, file = next(gen)
                    cfile = file
                    for _ in range(totaltest):  # amount of words to be judged
                        if diagnose:
                            print(f'Current file:{file}')
                            print(f'Word\'s phones{potphones}')
                        segcount = 0
                        gwordphones = []  # gold standard word segments
                        swordphones = []  # softmax word segments
                        fwordphones = []  # forced word segments
                        fname = file.split('.')[0]
                        potphones = w2pdict[fname]
                        fnamelast = fname.split('_')[-1]
                        potphones.append('_')
                        pind = [selected_phones.index(sp) for sp in potphones]
                        predictions = model.predict(x=x)
                        segcount += 1
                        gstd = argmaxpredicts2phones(y[0], selected_phones)
                        softmax = argmaxpredicts2phones(
                            predictions[0], selected_phones)
                        forceda = argmaxpredicts2forcedphones(
                            predictions[0], selected_phones, pind)
                        wordmaxforce = []
                        if fnamelast in swdname:
                            wordmaxforce = swdforms[swdname.index(fnamelast)]
                            swdlist = []
                            swdlistscore = [0 for _ in wordmaxforce]
                            for index, wsp in enumerate(wordmaxforce):
                                wspind = [
                                    selected_phones.index(sp) for sp in wsp
                                ]
                                tv, tswd = argmaxpredicts2forcedphones(
                                    predictions[0],
                                    selected_phones,
                                    wspind,
                                    fwords=True)
                                swdlist.append(tswd)
                                swdlistscore[index] += tv

                        gwordphones += gstd
                        swordphones += softmax
                        fwordphones += forceda
                        x, y, file = next(gen)
                        predictions = model.predict(x=x)
                        if cfile == file:  # break out of word while loop
                            samefile = True
                            cfile = file
                        else:
                            samefile = False
                            cfile = file
                        while samefile:  # track error for same file
                            segcount += 1
                            gstd = argmaxpredicts2phones(y[0], selected_phones)
                            softmax = argmaxpredicts2phones(
                                predictions[0], selected_phones)
                            forceda = argmaxpredicts2forcedphones(
                                predictions[0], selected_phones, pind)
                            if wordmaxforce:
                                for index, wsp in enumerate(wordmaxforce):
                                    wspind = [
                                        selected_phones.index(sp) for sp in wsp
                                    ]
                                    tv, tswd = argmaxpredicts2forcedphones(
                                        predictions[0],
                                        selected_phones,
                                        wspind,
                                        fwords=True)
                                    swdlist[index] += tswd
                                    swdlistscore[index] += tv
                            gwordphones += gstd
                            swordphones += softmax
                            fwordphones += forceda
                            x, y, file = next(gen)
                            predictions = model.predict(x=x)
                            if cfile != file:  # break out of word while loop
                                samefile = False
                                cfile = file
                        # got word segs, process them

                        gseg = segmentphonelist(gwordphones)
                        sseg = segmentphonelist(swordphones)
                        fseg = segmentphonelist(fwordphones)
                        sLD = uttLD(gseg, sseg)
                        fLD = uttLD(gseg, fseg)
                        s_IDS += sLD
                        f_IDS += fLD
                        if diagnose:
                            print('\n')
                            print(gseg)
                            print(sseg)
                            print(fseg)
                            print('\n')
                            print(sLD)
                            print(fLD)
                            print('\n')
                        # accuracy
                        startsil = gseg[-1][1]  #Index of Silence
                        g_len = len(gwordphones[:startsil])
                        s_correct += segCorrect(gwordphones[:startsil],
                                                swordphones[:startsil])
                        f_correct += segCorrect(gwordphones[:startsil],
                                                fwordphones[:startsil])
                        total += g_len
                        if wordmaxforce:
                            print(f'actual:{potphones}')
                            print(f'swd {wordmaxforce}'
                                  )  # list for individual comparision
                            print(f'scores{swdlistscore}')
                            for wlist in swdlist:
                                print(
                                    segCorrect(gwordphones[:startsil],
                                               wlist[:startsil]) / g_len)
                            print('\n')

                        # max-seg-score with known boundaries
                        # per word, then test set score
                        wordweight = 0
                        for seg in gseg[:-1]:  # last phone is silence '_'
                            maxsegtotal += 1
                            cphone = seg[0]
                            sboundedlist = swordphones[seg[1]:seg[2]]
                            fboundedlist = fwordphones[seg[1]:seg[2]]
                            smaxphone = max(sboundedlist,
                                            key=sboundedlist.count)
                            fmaxphone = max(fboundedlist,
                                            key=fboundedlist.count)
                            if smaxphone == cphone:
                                s_seg += 1
                            if fmaxphone == cphone:
                                f_seg += 1
                            #if cphone in perphoneaccdict.keys():
                            #    perphoneaccdict[cphone].append()
                            if diagnose:
                                print(seg)
                                print(smaxphone, fmaxphone, cphone)

                    sLDpercent = sLD / total * 100
                    fLDpercent = fLD / total * 100
                    print(
                        f'Insertions, Deletions, Substitions (SM):{sLD} out of {total}: {sLDpercent}%'
                    )
                    print(
                        f'Insertions, Deletions, Substitions (FM):{fLD} out of {total}: {fLDpercent}%'
                    )
                    Spercent = s_correct / total * 100
                    Fpercent = f_correct / total * 100
                    print('\n')
                    print(f'Softmax: {s_correct} out of {total}, {Spercent}%')
                    print(f'Forced: {f_correct} out of {total}, {Fpercent}%')

                    Spercent = s_seg / maxsegtotal * 100
                    Fpercent = f_seg / maxsegtotal * 100

                    print(
                        f'Softmax (seg): {s_seg} out of {maxsegtotal}, {Spercent}%'
                    )
                    print(
                        f'Forced (seg): {f_seg} out of {maxsegtotal}, {Fpercent}%'
                    )

                del gen
                del model
                k.clear_session()