def extract_one(self, L, P, R): """ Given left context `L`, punctuation mark `P`, and right context R`, extract features. Probability distributions for any quantile-based features will not be modified. """ yield "*bias*" # L feature(s) if match(QUOTE, L): L = QUOTE_TOKEN elif isnumberlike(L): L = NUMBER_TOKEN else: yield "len(L)={}".format(min(len(L), CLIP)) if "." in L: yield "L:*period*" if not self.nocase: cf = case_feature(R) if cf: yield "L:{}'".format(cf) L = L.upper() if not any(char in VOWELS for char in L): yield "L:*no-vowel*" L_feat = "L='{}'".format(L) yield L_feat # P feature(s) yield "P='{}'".format(P) # R feature(s) if match(QUOTE, R): R = QUOTE_TOKEN elif isnumberlike(R): R = NUMBER_TOKEN else: if not self.nocase: cf = case_feature(R) if cf: yield "R:{}'".format(cf) R = R.upper() R_feat = "R='{}'".format(R) yield R_feat # the combined L,R feature yield "{},{}".format(L_feat, R_feat)