def process_utterance(self, utt): for node in utt.xpath(self.target_nodes): assert node.has_attribute(self.class_attribute) assert node.has_attribute(self.target_attribute) current_class = node.attrib[self.class_attribute] if current_class in self.word_classes: word = node.attrib[self.target_attribute] pos = node.attrib.get(self.part_of_speech_attribute, None) # default to None (pronunciation, method) = self.get_phonetic_segments(word, part_of_speech=pos) node.set('phones_from', method) NodeProcessors.add_syllable_structure(node, pronunciation, syllable_delimiter='|', syllable_tag='syllable', \ phone_tag='segment', pronunciation_attribute='pronunciation', stress_attribute='stress') elif current_class in self.probable_pause_classes: pronunciation = c.PROB_PAUSE # [c.PROB_PAUSE] child = Element('segment') child.set('pronunciation', pronunciation) node.add_child(child) elif current_class in self.possible_pause_classes: pronunciation = c.POSS_PAUSE # [c.POSS_PAUSE] child = Element('segment') child.set('pronunciation', pronunciation) node.add_child(child) else: sys.exit('Class "%s" not in any of word_classes, probable_pause_classes, possible_pause_classes')
def process_utterance(self, utt): for node in utt.xpath(self.target_nodes): assert node.has_attribute(self.class_attribute) assert node.has_attribute(self.target_attribute) current_class = node.attrib[self.class_attribute] if current_class in self.word_classes: word = node.attrib[self.target_attribute] children = self.get_phonetic_segments(word) elif current_class in self.probable_pause_classes: children = [c.PROB_PAUSE] elif current_class in self.possible_pause_classes: children = [c.POSS_PAUSE] else: sys.exit('Class "%s" not in any of word_classes, probable_pause_classes, possible_pause_classes') for chunk in children: child = Element(self.child_node_type) child.set(self.output_attribute, chunk) node.add_child(child)
def process_utterance(self, utt): #print('target nodes: %s' % utt.xpath(self.target_nodes)) # import pdb; pdb.set_trace() for node in utt.xpath(self.target_nodes): assert node.has_attribute(self.split_attribute) to_split = node.get(self.split_attribute) child_chunks = self.splitting_function(to_split) for chunk in child_chunks: #print '==' #print(chunk) child = Element(self.child_node_type) child.set(self.split_attribute, chunk) if self.add_token_classes: token_class = self.classify_token(chunk) #print(token_class) child.set(self.class_attribute, token_class) if self.add_safetext: token_safetext = self.safetext_token(chunk) child.set(self.safetext_attribute, token_safetext) node.add_child(child)
def process_utterance(self, utt): # print 'target nodes: %s'%(utt.xpath(self.target_nodes)) for node in utt.xpath(self.target_nodes): assert node.has_attribute(self.split_attribute) to_split = node.get(self.split_attribute) #normalised = self.normalise(to_split) normalised = self.normalise_ice(to_split) child_chunks = self.splitting_function(normalised) for chunk in child_chunks: # print '==' # print chunk child = Element(self.child_node_type) child.set(self.split_attribute, chunk) if self.add_token_classes: token_class = self.classify_token(chunk) # print token_class child.set(self.class_attribute, token_class) if self.add_safetext: token_safetext = self.safetext_token(chunk) child.set(self.safetext_attribute, token_safetext) node.add_child(child)
def process_utterance(self, utt): print('target nodes: %s' % (utt.xpath(self.target_nodes))) node_count = 0 for node in utt.xpath(self.target_nodes): node_count += 1 assert node.has_attribute(self.split_attribute) text_to_normalise = node.get(self.split_attribute) print(text_to_normalise) normalised = self.normalizer.normalize(text_to_normalise) # get tokenized arr from normalizer, don't split here again child_chunks = self.splitting_function(normalised) for chunk in child_chunks: # print '==' # print chunk child = Element(self.child_node_type) child.set(self.split_attribute, chunk) if self.add_token_classes: token_class = self.classify_token(chunk) # print token_class child.set(self.class_attribute, token_class) if self.add_safetext: token_safetext = self.safetext_token(chunk) child.set(self.safetext_attribute, token_safetext) node.add_child(child)
def process_utterance(self, utt): #print 'target nodes: %s'%(utt.xpath(self.target_nodes)) for node in utt.xpath(self.target_nodes): assert node.has_attribute(self.split_attribute) to_split = node.get(self.split_attribute) child_chunks = self.splitting_function(to_split) for chunk in child_chunks: if chunk == "_END_" : continue child = Element(self.child_node_type) child.set(self.split_attribute, chunk) if self.add_token_classes: token_class = self.classify_token(chunk) child.set(self.class_attribute, token_class) if self.add_safetext: token_safetext = self.safetext_token(chunk) child.set(self.safetext_attribute, token_safetext) node.add_child(child)