示例#1
0
 def extract_one(self, L, P, R):
     """
     Given left context `L`, punctuation mark `P`, and right context
     R`, extract features. Probability distributions for any
     quantile-based features will not be modified.
     """
     yield "*bias*"
     # L feature(s)
     if match(QUOTE, L):
         L = QUOTE_TOKEN
     elif isnumberlike(L):
         L = NUMBER_TOKEN
     else:
         yield "len(L)={}".format(min(len(L), CLIP))
         if "." in L:
             yield "L:*period*"
         if not self.nocase:
             cf = case_feature(R)
             if cf:
                 yield "L:{}'".format(cf)
         L = L.upper()
         if not any(char in VOWELS for char in L):
             yield "L:*no-vowel*"
     L_feat = "L='{}'".format(L)
     yield L_feat
     # P feature(s)
     yield "P='{}'".format(P)
     # R feature(s)
     if match(QUOTE, R):
         R = QUOTE_TOKEN
     elif isnumberlike(R):
         R = NUMBER_TOKEN
     else:
         if not self.nocase:
             cf = case_feature(R)
             if cf:
                 yield "R:{}'".format(cf)
         R = R.upper()
     R_feat = "R='{}'".format(R)
     yield R_feat
     # the combined L,R feature
     yield "{},{}".format(L_feat, R_feat)
示例#2
0
 def extract_one(self, L, P, R):
     """
     Given left context `L`, punctuation mark `P`, and right context 
     R`, extract features. Probability distributions for any 
     quantile-based features will not be modified.
     """
     yield "*bias*"
     # L feature(s)
     if match(QUOTE, L):
         L = QUOTE_TOKEN
     elif isnumberlike(L):
         L = NUMBER_TOKEN
     else:
         yield "len(L)={}".format(min(len(L), CLIP))
         if "." in L:
             yield "L:*period*"
         if not self.nocase:
             cf = case_feature(R)
             if cf:
                 yield "L:{}'".format(cf)
         L = L.upper()
         if not any(char in VOWELS for char in L):
             yield "L:*no-vowel*"
     L_feat = "L='{}'".format(L)
     yield L_feat
     # P feature(s)
     yield "P='{}'".format(P)
     # R feature(s)
     if match(QUOTE, R):
         R = QUOTE_TOKEN
     elif isnumberlike(R):
         R = NUMBER_TOKEN
     else:
         if not self.nocase:
             cf = case_feature(R)
             if cf:
                 yield "R:{}'".format(cf)
         R = R.upper()
     R_feat = "R='{}'".format(R)
     yield R_feat
     # the combined L,R feature
     yield "{},{}".format(L_feat, R_feat)