def _sepFrontSpePunc(self, word): """ Separate special punctuation marks at the front of the word Special punctuation marks are non-English marks, which cannot be processed by regular expression Check out 'self.special' """ frontWords = [] input_str = word.nom tagNames = word.listNomTag() featNames = word.listNomFeature() new_str = input_str for key in self.special.keys() : if new_str.find(key) == 0 : new_str = new_str[len(key):] tmpWord = Word(key, tagNames, featNames) tmpWord.addTag("c") frontWords.append(tmpWord) return frontWords, new_str
def _sepFrontPunc(self, word): """ Separate punctuation marks at the front of the word """ frontWords = [] input_str = word.nom tagNames = word.listNomTag() featNames = word.listNomFeature() tmp_str = input_str i=0 allPunc = '.,():;{}[]!?#$%\*+<=>@^_|~"' #exclude / if self.options.u : allPunc = allPunc[:-1] while (i < len(input_str)) : c = input_str[i] if c in allPunc : tmpWord = Word(c, tagNames, featNames) tmpWord.addTag("c") frontWords.append(tmpWord) #create word for a punctuation mark tmp_str = input_str[i+1:] i += 1 else : i = len(input_str) #exit return frontWords, tmp_str