def doitForTestPiece(compositionName, recordingDir, withSynthesis=0): ####### prepare composition! ############ pathToComposition = os.path.join(PATH_TEST_DATASET, compositionName) makamScore = loadLyrics(pathToComposition, whichSection=1) # TODO: issue 14 ########### ----- align one recording pathToRecording = os.path.join(pathToComposition, recordingDir) os.chdir(pathToRecording) # pathToSectionAnnotations = os.path.join(pathToRecording, glob.glob('*.sectionAnno.txt')[0]) # pathToAudio = os.path.join(pathToRecording, glob.glob('*.wav')[0]) listExtensions = ["sectionAnno.json", "sectionAnno.txt", "sectionAnno.tsv"] sectionAnnoFiles = findFileByExtensions(pathToRecording, listExtensions) pathToSectionAnnotations = os.path.join(pathToRecording, sectionAnnoFiles[0]) pathToAudio = os.path.join(pathToRecording, recordingDir) + '.wav' # TODO: issue 14 recordingSegmenter = RecordingSegmenter() makamRecording = recordingSegmenter.segment(makamScore, pathToAudio, pathToSectionAnnotations) alignmentErrors = [] # alignmentErrors = recordingSegmenter.alignOneRecording(MODEL_URI, makamRecording, OUTPUT_PATH, withSynthesis) return alignmentErrors
def alignDependingOnWithDuration(URIrecordingNoExt, whichSection, pathToComposition, withDuration, withSynthesis, evalLevel, params, usePersistentFiles, htkParser): ''' call alignment method depending on whether duration or htk selected ''' Phonetizer.initLookupTable(withSynthesis) tokenLevelAlignedSuffix, phonemesAlignedSuffix = determineSuffix(withDuration, withSynthesis, evalLevel) if withDuration: alignmentErrors, detectedWordList, grTruthDurationWordList = alignOneChunk(URIrecordingNoExt, pathToComposition, whichSection, htkParser, params, evalLevel, usePersistentFiles) else: URIrecordingAnno = URIrecordingNoExt + ANNOTATION_EXT URIrecordingWav = URIrecordingNoExt + AUDIO_EXTENSION # new makamScore used lyricsObj = loadLyrics(pathToComposition, whichSection) lyrics = lyricsObj.__str__() # in case we are at no-lyrics section if not lyrics or lyrics =='_SAZ_': logger.warn("skipping section {} with no lyrics ...".format(whichSection)) return [], [], [], [] outputHTKPhoneAlignedURI = Aligner.alignOnechunk(MODEL_URI, URIrecordingWav, lyrics.__str__(), URIrecordingAnno, '/tmp/', withSynthesis) alignmentErrors = evalAlignmentError(URIrecordingAnno, outputHTKPhoneAlignedURI, evalLevel) detectedWordList = outputHTKPhoneAlignedURI grTruthDurationWordList = [] # store decoding results in a file FIXME: if with duration it is not mlf detectedAlignedfileName = [] detectedAlignedfileName = tokenList2TabFile(detectedWordList, URIrecordingNoExt, tokenLevelAlignedSuffix) return alignmentErrors, detectedWordList, grTruthDurationWordList, detectedAlignedfileName
def doit(argv): if len(argv) != 4 : sys.exit ("usage: {} <recordingURI.wav> <sectionAnnoPath> <scorePath>".format(argv[0]) ) recordingURI = argv[1] sectionAnnoPath = argv[2] scorePath = argv[3] makamScore = loadLyrics(scorePath, whichSection=1) os.chdir(sectionAnnoPath) listExtensions = ["sectionAnno.json", "sectionAnno.txt", "sectionAnno.tsv"] sectionAnnoFiles = findFileByExtensions(sectionAnnoPath, listExtensions) pathToSectionAnnotations = os.path.join(sectionAnnoPath, sectionAnnoFiles[0]) recordingSegmenter = RecordingSegmenter() makamRecording= recordingSegmenter.segment(makamScore, recordingURI, pathToSectionAnnotations)
def alignOneChunk(URIrecordingNoExt, pathToComposition, whichSection, htkParser, params, evalLevel, usePersistentFiles): ''' top most logic method ''' lyrics = loadLyrics(pathToComposition, whichSection) lyricsStr = lyrics.__str__() if not lyricsStr or lyricsStr =='_SAZ_': logger.warn("skipping section {} with no lyrics ...".format(whichSection)) return [], [], [] logger.info("aligning audio {}".format(URIrecordingNoExt)) lyricsWithModels = LyricsWithModels(lyrics, htkParser, params.ONLY_MIDDLE_STATE) # DEBUG: score-derived phoneme durations # lyricsWithModels.printPhonemeNetwork() decoder = Decoder(lyricsWithModels, params.ALPHA) # TODO: DEBUG: do not load models # decoder = Decoder(lyrics, withModels=False, numStates=86) #################### decode if usePersistentFiles=='True': usePersistentFiles = True elif usePersistentFiles=='False': usePersistentFiles = False else: sys.exit("usePersistentFiles can be only True or False") detectedWordList, grTruthWordList = decodeAudioChunk(URIrecordingNoExt, decoder, evalLevel, usePersistentFiles) ### VISUALIZE # decoder.lyricsWithModels.printWordsAndStatesAndDurations(decoder.path) #################### evaluate alignmentErrors = [2, 3, 4] alignmentErrors = _evalAlignmentError(URIrecordingNoExt + ANNOTATION_EXT, detectedWordList, evalLevel) return alignmentErrors, detectedWordList, grTruthWordList
''' alpha = 0.97 deviationInSec = 0.1 ONLY_MIDDLE_STATE=False params = Parameters(alpha, ONLY_MIDDLE_STATE, deviationInSec) decoder = Decoder(lyricsWithModels, params.ALPHA, params.deviationInSec) # decodes decoder.hmmNetwork.initDecodingParameters(observationFeatures) chiBackPointer, psiBackPointer = decoder.hmmNetwork._viterbiForcedDur(observationFeatures) # backtrack path = Path(chiBackPointer, psiBackPointer) detectedWordList = decoder.path2ResultWordList(path) # DEBUG decoder.lyricsWithModels.printWordsAndStatesAndDurations(decoder.path) path.printDurations() if __name__ == '__main__': #test_simple() # test_rand() #test_discrete() # testRand_DurationHMM() withSynthesis = False lyrics = loadLyrics(pathToComposition, whichSection, withSynthesis) lyricsWithModels, observationFeatures = loadSmallAudioFragment(lyrics, URIrecordingNoExt, withSynthesis, fromTs=-1, toTs=-1) decode(lyricsWithModels, observationFeatures)