def alignOneChunk(lyrics, withSynthesis, withOracle, lyricsWithModelsORacle, listNonVocalFragments, alpha, usePersistentFiles, tokenLevelAlignedSuffix, URIrecordingNoExt, currSectionLink, htkParser): ''' wrapper top-most logic method ''' if withOracle: # synthesis not needed really in this setting. workaround because without synth takes whole recording withSynthesis = 1 # read from file result URIRecordingChunkResynthesizedNoExt = URIrecordingNoExt + "_" + str(currSectionLink.beginTs) + '_' + str(currSectionLink.endTs) detectedAlignedfileName = URIRecordingChunkResynthesizedNoExt + tokenLevelAlignedSuffix if not os.path.isfile(detectedAlignedfileName): # ###### extract audio features lyricsWithModels, obsFeatures, URIrecordingChunk = loadSmallAudioFragment(lyrics, 'dummyExtractedPitchList', URIrecordingNoExt, URIRecordingChunkResynthesizedNoExt, bool(withSynthesis), currSectionLink, htkParser) # lyricsWithModels, observationFeatures = loadSmallAudioFragment(lyrics, URIrecordingNoExt, withSynthesis, fromTs=-1, toTs=-1) # DEBUG: score-derived phoneme durations # lyricsWithModels.printPhonemeNetwork() # lyricsWithModels.printWordsAndStates() decoder = Decoder(lyricsWithModels, URIRecordingChunkResynthesizedNoExt, alpha) # TODO: DEBUG: do not load models # decoder = Decoder(lyrics, withModels=False, numStates=86) #################### decode if usePersistentFiles=='True': usePersistentFiles = True elif usePersistentFiles=='False': usePersistentFiles = False else: sys.exit("usePersistentFiles can be only True or False") if withOracle: detectedTokenList = decoder.decodeWithOracle(lyricsWithModelsORacle, URIRecordingChunkResynthesizedNoExt ) else: detectedTokenList = decoder.decodeAudio(obsFeatures, listNonVocalFragments, usePersistentFiles) phiOptPath = decoder.path.phiOptPath detectedPath = decoder.path.pathRaw tokenList2TabFile(detectedTokenList, URIRecordingChunkResynthesizedNoExt, tokenLevelAlignedSuffix, currSectionLink.beginTs) ### VISUALIZE result # decoder.lyricsWithModels.printWordsAndStatesAndDurations(decoder.path) else: print "{}\n already exists. No decoding".format(detectedAlignedfileName) detectedTokenList = readListOfListTextFile(detectedAlignedfileName) if withOracle: outputURI = URIRecordingChunkResynthesizedNoExt + '.path_oracle' else: outputURI = URIRecordingChunkResynthesizedNoExt + '.path' detectedPath = readListTextFile(outputURI) # TODO: store persistently phiOptPath = 0 return detectedTokenList, detectedPath, phiOptPath
def alignLyricsSection(self, extractedPitchList, listNonVocalFragments, tokenLevelAlignedSuffix, currSectionLink): ''' align @param: lyrics for one section ''' # read from file result URIRecordingChunkResynthesizedNoExt = currSectionLink.URIRecordingChunk detectedAlignedfileName = currSectionLink.URIRecordingChunk + tokenLevelAlignedSuffix fe = FeatureExtractor(self.path_to_hcopy, currSectionLink) onsetDetector = OnsetDetector(currSectionLink) detectedPath = '' phiOptPath = '' detectedTokenList = [] if not os.path.isfile(detectedAlignedfileName): fromTsTextGrid = -1 toTsTextGrid = -1 if ParametersAlgo.WITH_ORACLE_PHONEMES: # oracle phonemes raw_input( 'implemented only for Kimseye...! Continue only if working with Kimseye' ) if ParametersAlgo.FOR_MAKAM: fromTsTextGrid = 0 toTsTextGrid = 20.88 # for kimseye etmem fromSyllableIdx = 0 toSyllableIdx = 10 currSectionLink.loadSmallAudioFragmentOracle( self.model, fromSyllableIdx, toSyllableIdx) fe.featureVectors = currSectionLink.lyricsWithModels # featureVectors is alias for LyricsWithModelsOracle else: ###### extract audio features fe.featureVectors = currSectionLink.loadSmallAudioFragment( fe, extractedPitchList, self.recording.recordingNoExtURI, self.model) # sectionLink.lyricsWithModels.printWordsAndStates() #################### decode decoder = Decoder(currSectionLink, currSectionLink.lyricsWithModels, URIRecordingChunkResynthesizedNoExt) ##### prepare note onsets. result stored in files, which are used in decoding ############################ if ParametersAlgo.WITH_ORACLE_ONSETS == 1: URIrecOnsets = os.path.join( os.path.dirname(self.recording.recordingNoExtURI), ParametersAlgo.ANNOTATION_RULES_ONSETS_EXT) onsetDetector.parseNoteOnsetsGrTruth(URIrecOnsets) elif ParametersAlgo.WITH_ORACLE_ONSETS == 0: onsetDetector.extractNoteOnsets( URIRecordingChunkResynthesizedNoExt + '.wav') ############################################### detectedTokenList = decoder.decodeAudio(fe, onsetDetector, listNonVocalFragments, fromTsTextGrid, toTsTextGrid) detectedTokenList = addTimeShift(detectedTokenList, currSectionLink.beginTs) detectedPath = decoder.path.pathRaw # ##### write all decoded output persistently to files if ParametersAlgo.WRITE_TO_FILE: self.write_decoded_to_file( tokenLevelAlignedSuffix, URIRecordingChunkResynthesizedNoExt, decoder.path.phiPathLikelihood, detectedTokenList) ### VISUALIZE result # decoder.lyricsWithModels.printWordsAndStatesAndDurations(decoder.path) else: # do not decode, read form file detectedTokenList, phiOptPath, detectedPath = self.read_decoded( URIRecordingChunkResynthesizedNoExt, detectedAlignedfileName) return detectedTokenList, detectedPath, phiOptPath