def doitForTestPiece(compositionName, recordingDir, withSynthesis=0):

    ####### prepare composition! ############

    pathToComposition = os.path.join(PATH_TEST_DATASET, compositionName)
    makamScore = loadLyrics(pathToComposition, whichSection=1)

    # TODO: issue 14

    ###########        ----- align one recording

    pathToRecording = os.path.join(pathToComposition, recordingDir)

    os.chdir(pathToRecording)
    #         pathToSectionAnnotations = os.path.join(pathToRecording, glob.glob('*.sectionAnno.txt')[0]) #             pathToAudio =  os.path.join(pathToRecording, glob.glob('*.wav')[0])

    listExtensions = ["sectionAnno.json", "sectionAnno.txt", "sectionAnno.tsv"]
    sectionAnnoFiles = findFileByExtensions(pathToRecording, listExtensions)
    pathToSectionAnnotations = os.path.join(pathToRecording,
                                            sectionAnnoFiles[0])

    pathToAudio = os.path.join(pathToRecording, recordingDir) + '.wav'

    # TODO: issue 14
    recordingSegmenter = RecordingSegmenter()
    makamRecording = recordingSegmenter.segment(makamScore, pathToAudio,
                                                pathToSectionAnnotations)

    alignmentErrors = []
    #         alignmentErrors = recordingSegmenter.alignOneRecording(MODEL_URI, makamRecording, OUTPUT_PATH, withSynthesis)

    return alignmentErrors
示例#2
0
def alignDependingOnWithDuration(URIrecordingNoExt, whichSection, pathToComposition, withDuration, withSynthesis, evalLevel, params, usePersistentFiles, htkParser):
    '''
    call alignment method depending on whether duration or htk  selected 
    '''

    Phonetizer.initLookupTable(withSynthesis)
    
    tokenLevelAlignedSuffix, phonemesAlignedSuffix = determineSuffix(withDuration, withSynthesis, evalLevel)
    
    
    if withDuration:
        alignmentErrors, detectedWordList, grTruthDurationWordList = alignOneChunk(URIrecordingNoExt, pathToComposition, whichSection, htkParser, params, evalLevel, usePersistentFiles)
        
            
    else:
        URIrecordingAnno = URIrecordingNoExt + ANNOTATION_EXT
        URIrecordingWav = URIrecordingNoExt + AUDIO_EXTENSION
        # new makamScore used
        lyricsObj = loadLyrics(pathToComposition, whichSection)
        lyrics = lyricsObj.__str__()
#         in case  we are at no-lyrics section
        if not lyrics or lyrics =='_SAZ_':
            logger.warn("skipping section {} with no lyrics ...".format(whichSection))
            return [], [], [], []
    
        outputHTKPhoneAlignedURI = Aligner.alignOnechunk(MODEL_URI, URIrecordingWav, lyrics.__str__(), URIrecordingAnno, '/tmp/', withSynthesis)
        alignmentErrors = evalAlignmentError(URIrecordingAnno, outputHTKPhoneAlignedURI, evalLevel)
        detectedWordList = outputHTKPhoneAlignedURI
        grTruthDurationWordList = []
    
    # store decoding results in a file FIXME: if with duration it is not mlf 
    detectedAlignedfileName = []
    detectedAlignedfileName =  tokenList2TabFile(detectedWordList, URIrecordingNoExt, tokenLevelAlignedSuffix)
        
    return alignmentErrors, detectedWordList, grTruthDurationWordList, detectedAlignedfileName
示例#3
0
def doit(argv):
        
        if len(argv) != 4  :
           sys.exit ("usage: {}  <recordingURI.wav> <sectionAnnoPath> <scorePath>".format(argv[0]) )
        recordingURI = argv[1]
        sectionAnnoPath = argv[2]
        scorePath = argv[3]
        
        makamScore = loadLyrics(scorePath, whichSection=1)

        os.chdir(sectionAnnoPath)
        
        listExtensions = ["sectionAnno.json", "sectionAnno.txt", "sectionAnno.tsv"]
        sectionAnnoFiles = findFileByExtensions(sectionAnnoPath, listExtensions)
        pathToSectionAnnotations = os.path.join(sectionAnnoPath, sectionAnnoFiles[0]) 


        recordingSegmenter = RecordingSegmenter()
        makamRecording= recordingSegmenter.segment(makamScore, recordingURI, pathToSectionAnnotations)
示例#4
0
def alignOneChunk(URIrecordingNoExt, pathToComposition, whichSection, htkParser, params, evalLevel, usePersistentFiles):
    '''
    top most logic method
    '''

    lyrics = loadLyrics(pathToComposition, whichSection)
    lyricsStr = lyrics.__str__()
    
    if not lyricsStr or lyricsStr =='_SAZ_':
        logger.warn("skipping section {} with no lyrics ...".format(whichSection))
        return [], [], []

    logger.info("aligning audio {}".format(URIrecordingNoExt))
    lyricsWithModels = LyricsWithModels(lyrics, htkParser, params.ONLY_MIDDLE_STATE)
    
    # DEBUG: score-derived phoneme  durations
#     lyricsWithModels.printPhonemeNetwork()
    
    
    decoder = Decoder(lyricsWithModels, params.ALPHA)
#  TODO: DEBUG: do not load models
#  decoder = Decoder(lyrics, withModels=False, numStates=86)
#################### decode
    if usePersistentFiles=='True':
        usePersistentFiles = True
    elif usePersistentFiles=='False':
        usePersistentFiles = False
    else: 
        sys.exit("usePersistentFiles can be only True or False") 
        
    detectedWordList, grTruthWordList = decodeAudioChunk(URIrecordingNoExt, decoder, evalLevel, usePersistentFiles)
    
### VISUALIZE
#     decoder.lyricsWithModels.printWordsAndStatesAndDurations(decoder.path)

#################### evaluate
    alignmentErrors = [2, 3, 4]
    alignmentErrors = _evalAlignmentError(URIrecordingNoExt + ANNOTATION_EXT, detectedWordList, evalLevel)
    return alignmentErrors, detectedWordList, grTruthWordList
示例#5
0
文件: tests.py 项目: EQ4/HMMDuration
    '''
    alpha = 0.97
    deviationInSec = 0.1
    ONLY_MIDDLE_STATE=False
    params = Parameters(alpha, ONLY_MIDDLE_STATE, deviationInSec)
    decoder = Decoder(lyricsWithModels, params.ALPHA, params.deviationInSec)
    
    #  decodes
    decoder.hmmNetwork.initDecodingParameters(observationFeatures)
    chiBackPointer, psiBackPointer = decoder.hmmNetwork._viterbiForcedDur(observationFeatures)
    
    # backtrack
    path =  Path(chiBackPointer, psiBackPointer)
    detectedWordList = decoder.path2ResultWordList(path)
         # DEBUG
    
    decoder.lyricsWithModels.printWordsAndStatesAndDurations(decoder.path)
    path.printDurations()
    

    
if __name__ == '__main__':    
    #test_simple()
    # test_rand()
    #test_discrete()
    # testRand_DurationHMM()
    
    withSynthesis = False
    lyrics = loadLyrics(pathToComposition, whichSection, withSynthesis)
    lyricsWithModels, observationFeatures = loadSmallAudioFragment(lyrics,  URIrecordingNoExt, withSynthesis, fromTs=-1, toTs=-1)
    decode(lyricsWithModels, observationFeatures)