def proto_from_txt(lab, args): #Create words proto = {"utt":[]} proto["id"] = lab[0].split("/")[-1] #Make words and look up in dictionary #If no parse exists (i.e. no pos tags) we will simply grab the first pronunciation we can find that is not reduced (if one exist). #We also forget the pos tag of that in the process. if not args.stanfordparse: for word in lab[1:]: proto["utt"].append({"id":word, "syllables":[args.dictionary.get_single_entry(word)]}) else: #Else a parse should exist and we can get the pos tags from that. tree = parsetrees.stanfordtree() tree.make_tree(args.parsedict[proto["id"]]) leafs = tree.get_leafs() #In this case we need to do some merging if len(leafs) != len(lab[1:]): print "WARNING! Merging not implemented yet - the current is a non-complete hack!" print "Check if this sentences was done correctly - {0}".format(proto["id"]) for i, l in enumerate(lab[1:]): if l == "i'm": p2 = leafs[i+1].label.split("-")[0] leafs.pop(i+1) leafs[i].label = leafs[i].label.split("-")[0]+"|"+p2+"-i'm" for i, leaf in enumerate(leafs): pos, word = leaf.label.lower().split("-") if word != lab[i+1]: print "ERROR: Parse and text does not match in {0}!".format(lab) print "{0} != {1}".format(word, lab[i+1]) sys.exit() c_best = args.dictionary.get_single_entry(word) proto["utt"].append({"id":word, "syllables":[c_best]}) #Make syllables and split dictionary format #Phony phoneme duration counter cur_dur = 0 for word in proto["utt"]: sylls = [x.strip("()") for x in word["syllables"][0].split(") (")] word["syllables"] = [] for syll in sylls: c_syll = {"id":"", "phonemes":[], "stress":None} syll = syll.split(") ") c_syll["stress"] = syll[1] #Make the phonemes for phon in syll[0].split(): c_phon = {"id":None, "stress":None, "start":None, "end":None} c_phon["id"] = phon c_phon["start"] = cur_dur #Add 100ms in HTK lab format cur_dur += 1000000 c_phon["end"] = cur_dur #Phone stress not encoded directly in combilex dict. c_phon["stress"] = None c_syll["phonemes"].append(c_phon) c_syll["id"] = syll[0].replace(" ", "") word["syllables"].append(c_syll) return proto
def load_stanford_parse(utt, parse): if utt.words == None: print "Error: No words in utterance! Please load an mlf or txt (not implemented yet) file first!" sys.exit() tree = parsetrees.stanfordtree() tree.make_tree(parse) leafs = tree.get_leafs() if len(leafs) != utt.num_words_no_pau(): #First we try to see if this is due to differences in how words are #dealt with in parsing and annotation. #Prime example is using 's in e.g. there's for transcription instead of there is. #Parsing splits there's into two whereas in e.g. combilex there's is one word. #If this is the case we split the WORD into two with the 's being a single phoneme #single syllable word. In other cases the contraction straddles two words and #we add a "phony" word which affects contexts but adds no phonemes. utterance_utils.try_split_words(utt) if len(leafs) != utt.num_words_no_pau(): print "Error! Number of leaves ({0}) not equal to number of words ({1})!".format(len(leafs), utt.num_words_no_pau()) print utt.id for w in utt.words: print w.id sys.exit() #Match each word with parse for i, word in enumerate(utt.get_words_no_pau()): l = leafs[i].label.split("-") word.id = l[1] word.pos = l[0] #There should always be a parent word.parent_phrase = leafs[i].parent #But there might not be more than one if word.parent_phrase.parent != None: word.grandparent_phrase = word.parent_phrase.parent else: word.grandparent_phrase = parsetrees.get_fake_stanford_parse() #And certainly we might be done here if word.grandparent_phrase.parent in [None, "xx"] or word.grandparent_phrase.parent.label == "xx": word.greatgrandparent_phrase = parsetrees.get_fake_stanford_parse() else: word.greatgrandparent_phrase = word.grandparent_phrase.parent #Now add fake parse for sil, pau and # for word in utt.words: if word.id in utt.phoneme_features.get_sil_phonemes(): word.parent_phrase = parsetrees.get_fake_stanford_parse() word.grandparent_phrase = parsetrees.get_fake_stanford_parse() word.greatgrandparent_phrase = parsetrees.get_fake_stanford_parse() word.pos = "sil"