Python Decoder.decodeAudio示例

def alignOneChunk(lyrics, withSynthesis, withOracle, lyricsWithModelsORacle, listNonVocalFragments, alpha, usePersistentFiles, tokenLevelAlignedSuffix,  URIrecordingNoExt, currSectionLink, htkParser):
    '''
    wrapper top-most logic method
    '''
    if withOracle:

        # synthesis not needed really in this setting. workaround because without synth takes whole recording  
        withSynthesis = 1
        
#     read from file result
    URIRecordingChunkResynthesizedNoExt =  URIrecordingNoExt + "_" + str(currSectionLink.beginTs) + '_' + str(currSectionLink.endTs)
    detectedAlignedfileName = URIRecordingChunkResynthesizedNoExt + tokenLevelAlignedSuffix
    if not os.path.isfile(detectedAlignedfileName):
        #     ###### extract audio features
        lyricsWithModels, obsFeatures, URIrecordingChunk = loadSmallAudioFragment(lyrics, 'dummyExtractedPitchList', URIrecordingNoExt, URIRecordingChunkResynthesizedNoExt, bool(withSynthesis), currSectionLink, htkParser)
            #     lyricsWithModels, observationFeatures = loadSmallAudioFragment(lyrics,  URIrecordingNoExt, withSynthesis, fromTs=-1, toTs=-1)
        
    # DEBUG: score-derived phoneme  durations
#     lyricsWithModels.printPhonemeNetwork()
#     lyricsWithModels.printWordsAndStates()
   
        decoder = Decoder(lyricsWithModels, URIRecordingChunkResynthesizedNoExt, alpha)
    #  TODO: DEBUG: do not load models
    # decoder = Decoder(lyrics, withModels=False, numStates=86)
    #################### decode
        if usePersistentFiles=='True':
            usePersistentFiles = True
        elif usePersistentFiles=='False':
            usePersistentFiles = False
        else: 
            sys.exit("usePersistentFiles can be only True or False") 
        
        if withOracle:
            detectedTokenList = decoder.decodeWithOracle(lyricsWithModelsORacle, URIRecordingChunkResynthesizedNoExt )
        else:
            detectedTokenList = decoder.decodeAudio(obsFeatures, listNonVocalFragments, usePersistentFiles)
        
        phiOptPath = decoder.path.phiOptPath
        detectedPath = decoder.path.pathRaw
        tokenList2TabFile(detectedTokenList, URIRecordingChunkResynthesizedNoExt, tokenLevelAlignedSuffix, currSectionLink.beginTs)
     
       
        
    ### VISUALIZE result 
#         decoder.lyricsWithModels.printWordsAndStatesAndDurations(decoder.path)
    
    else:   
            print "{}\n already exists. No decoding".format(detectedAlignedfileName)
            detectedTokenList = readListOfListTextFile(detectedAlignedfileName)
            if withOracle:
                outputURI = URIRecordingChunkResynthesizedNoExt + '.path_oracle'
            else:
                outputURI = URIRecordingChunkResynthesizedNoExt + '.path'
            
            detectedPath = readListTextFile(outputURI)
            
            # TODO: store persistently
            phiOptPath = 0
   

    return detectedTokenList, detectedPath, phiOptPath

示例#2

显示文件

文件： LyricsAligner.py 项目： xrick/AlignmentDuration

    def alignLyricsSection(self, extractedPitchList, listNonVocalFragments,
                           tokenLevelAlignedSuffix, currSectionLink):
        '''
            align @param: lyrics for one section
            '''

        #     read from file result
        URIRecordingChunkResynthesizedNoExt = currSectionLink.URIRecordingChunk
        detectedAlignedfileName = currSectionLink.URIRecordingChunk + tokenLevelAlignedSuffix
        fe = FeatureExtractor(self.path_to_hcopy, currSectionLink)
        onsetDetector = OnsetDetector(currSectionLink)

        detectedPath = ''
        phiOptPath = ''
        detectedTokenList = []

        if not os.path.isfile(detectedAlignedfileName):

            fromTsTextGrid = -1
            toTsTextGrid = -1

            if ParametersAlgo.WITH_ORACLE_PHONEMES:  # oracle phonemes
                raw_input(
                    'implemented only for Kimseye...! Continue only if working with Kimseye'
                )
                if ParametersAlgo.FOR_MAKAM:
                    fromTsTextGrid = 0
                    toTsTextGrid = 20.88  # for kimseye etmem
                fromSyllableIdx = 0
                toSyllableIdx = 10
                currSectionLink.loadSmallAudioFragmentOracle(
                    self.model, fromSyllableIdx, toSyllableIdx)
                fe.featureVectors = currSectionLink.lyricsWithModels  # featureVectors is alias for LyricsWithModelsOracle

            else:  ###### extract audio features
                fe.featureVectors = currSectionLink.loadSmallAudioFragment(
                    fe, extractedPitchList, self.recording.recordingNoExtURI,
                    self.model)
    #                 sectionLink.lyricsWithModels.printWordsAndStates()
    #################### decode
            decoder = Decoder(currSectionLink,
                              currSectionLink.lyricsWithModels,
                              URIRecordingChunkResynthesizedNoExt)

            ##### prepare note onsets. result stored in files, which are used in decoding  ############################
            if ParametersAlgo.WITH_ORACLE_ONSETS == 1:
                URIrecOnsets = os.path.join(
                    os.path.dirname(self.recording.recordingNoExtURI),
                    ParametersAlgo.ANNOTATION_RULES_ONSETS_EXT)
                onsetDetector.parseNoteOnsetsGrTruth(URIrecOnsets)

            elif ParametersAlgo.WITH_ORACLE_ONSETS == 0:
                onsetDetector.extractNoteOnsets(
                    URIRecordingChunkResynthesizedNoExt + '.wav')
            ###############################################

            detectedTokenList = decoder.decodeAudio(fe, onsetDetector,
                                                    listNonVocalFragments,
                                                    fromTsTextGrid,
                                                    toTsTextGrid)
            detectedTokenList = addTimeShift(detectedTokenList,
                                             currSectionLink.beginTs)

            detectedPath = decoder.path.pathRaw

            #                 ##### write all decoded output persistently to files
            if ParametersAlgo.WRITE_TO_FILE:
                self.write_decoded_to_file(
                    tokenLevelAlignedSuffix,
                    URIRecordingChunkResynthesizedNoExt,
                    decoder.path.phiPathLikelihood, detectedTokenList)

        ### VISUALIZE result

    #         decoder.lyricsWithModels.printWordsAndStatesAndDurations(decoder.path)

        else:  # do not decode, read form file
            detectedTokenList, phiOptPath, detectedPath = self.read_decoded(
                URIRecordingChunkResynthesizedNoExt, detectedAlignedfileName)

        return detectedTokenList, detectedPath, phiOptPath