示例#1
0
 def _get_features(self, v="", v_corpus=None, cls2id=None, domain="src"):
     _flist = []
     _labellist_int = []
     _labellist_str = []
     _labelid = cls2id[v]
     if v_corpus:
         for sid, s in enumerate(v_corpus):
             try:
                 fe = FeatureExtractor(s, verb=v)
                 if "chunk" in self.featuretypes:
                     fe.chunk()
                 if "3gram" in self.featuretypes:
                     fe.ngrams(n=3)
                 if "5gram" in self.featuretypes:
                     fe.ngrams(n=5)
                 if "7gram" in self.featuretypes:
                     fe.ngrams(n=7)
                 if "dep" in self.featuretypes:
                     fe.dependency()
                 if "srl" in self.featuretypes:
                     fe.srl()
                 if "ne" in self.featuretypes:
                     fe.ne()
                 if "errorprob" in self.featuretypes:
                     pass
                 if "topic" in self.featuretypes:
                     pass
                 augf = proc_easyadapt(fe.features, domain=domain)
                 _flist.append(augf)
                 _labellist_int.append(_labelid)
                 _labellist_str.append(v)
             except ValueError:
                 logging.debug(pformat("CaseMaker feature extraction: couldn't find the verb"))
             except:
                 print v
                 raise
     else:
         _flist.append(self.nullfeature)
         _labellist_int.append(_labelid)
         _labellist_str.append(v)
     return _flist, _labellist_str, _labellist_int
示例#2
0
 def _get_features_tgt(self, v_corpus=None, cls2id=None, domain="tgt"):
     _flist = []
     _labellist_int = []
     _labellist_str = []
     for sid, sdic in enumerate(v_corpus):
         v = sdic["label_corr"]
         _labelid = cls2id[v]
         try:
             fe = SentenceFeatures(sdic["parsed_corr"], verb=v, v_idx=sdic["vidx_corr"])
             if "chunk" in self.featuretypes:
                 fe.chunk()
             if "3gram" in self.featuretypes:
                 fe.ngrams(n=3)
             if "5gram" in self.featuretypes:
                 fe.ngrams(n=5)
             if "7gram" in self.featuretypes:
                 fe.ngrams(n=7)
             if "dep" in self.featuretypes:
                 fe.dependency()
             if "srl" in self.featuretypes:
                 fe.srl()
             if "ne" in self.featuretypes:
                 fe.ne()
             if "errorprob" in self.featuretypes:
                 pass
             if "topic" in self.featuretypes:
                 pass
             augf = proc_easyadapt(fe.features, domain=domain)
             assert augf and _labelid and v
             _flist.append(augf)
             _labellist_int.append(_labelid)
             _labellist_str.append(v)
         except ValueError:
             logging.debug(pformat("CaseMaker feature extraction: couldn't find the verb"))
         except:
             print v
     # else:
     # _flist.append(self.nullfeature)
     # _labellist_int.append(_labelid)
     # _labellist_str.append(v)
     return _flist, _labellist_str, _labellist_int
示例#3
0
def get_features(tags=[], v="", v_idx=None, features=[]):
    fe = SentenceFeatures(tags=tags, verb=v, v_idx=v_idx)
    if "chunk" in features:
        fe.chunk()
    if "3gram" in features:
        fe.ngrams(n=3)
    if "5gram" in features:
        fe.ngrams(n=5)
    if "7gram" in features:
        fe.ngrams(n=7)
    if "dependency" in features:
        fe.dependency()
    if "ne" in features:
        fe.ne()
    if "srl" in features:
        fe.srl()
    if "topic" in features:
        fe.topic()
    if "errorprob" in features:
        fe.ep()
    # print pformat(fe.features)
    return proc_easyadapt(fe.features, domain="tgt")