Пример #1
0
 def ranker_instances(self):
     for f in self._files:
         print >> sys.stderr, f
         feat_map = FeatureMap(f, encoding=self._in_encoding)
         # feature filtering
         feat_map.filter_feats(self._features2filter,
                               mode="lose",
                               continuous=False) # WARNING: add hoc!!!
         edu_ante_list = {}
         # build attachment point correct/incorrect candidate lists 
         for instance in feat_map._all:
             (ana, cand), (cl, feats) = feat_map2_class_instance( instance,
                                                                  formatting=self._formatting,
                                                                  features2cross=self._features2cross )
             # print ana, cand, cl
             edu_ante_list[ana] = edu_ante_list.get(ana,[]) + [(cand,cl,feats)]
         # make sure data always get ordered the same way
         edu_ante_list = edu_ante_list.items()
         edu_ante_list.sort()
         # generate ranking instances
         for edu, cand_list in edu_ante_list:
             correct_cands = [(cand,cl,feats) for (cand,cl,feats) in cand_list
                              if cl == 1]  
             if len(correct_cands) == 0:
                 print >> sys.stderr, "Warning: no attachment point for EDU %s! Skipping." %edu
                 #print >> sys.stderr, edu, (cand_list)
             #elif len(correct_cands) > 1:
             #    print >> sys.stderr, "Warning: multiple attachment points for EDU %s! Skipping." %edu
             else:
                 yield edu, cand_list, f
Пример #2
0
 def ranker_instances(self):
     for f in self._files:
         print >> sys.stderr, f
         feat_map = FeatureMap(f, encoding=self._in_encoding)
         # feature filtering
         feat_map.filter_feats(self._features2filter,
                               mode="lose",
                               continuous=False)  # WARNING: add hoc!!!
         edu_ante_list = {}
         # build attachment point correct/incorrect candidate lists
         for instance in feat_map._all:
             (ana, cand), (cl, feats) = feat_map2_class_instance(
                 instance,
                 formatting=self._formatting,
                 features2cross=self._features2cross)
             # print ana, cand, cl
             edu_ante_list[ana] = edu_ante_list.get(
                 ana, []) + [(cand, cl, feats)]
         # make sure data always get ordered the same way
         edu_ante_list = edu_ante_list.items()
         edu_ante_list.sort()
         # generate ranking instances
         for edu, cand_list in edu_ante_list:
             correct_cands = [(cand, cl, feats)
                              for (cand, cl, feats) in cand_list if cl == 1]
             if len(correct_cands) == 0:
                 print >> sys.stderr, "Warning: no attachment point for EDU %s! Skipping." % edu
                 #print >> sys.stderr, edu, (cand_list)
             #elif len(correct_cands) > 1:
             #    print >> sys.stderr, "Warning: multiple attachment points for EDU %s! Skipping." %edu
             else:
                 yield edu, cand_list, f
Пример #3
0
 def classifier_instances(self):
     for f in self._files:
         print >> sys.stderr, f
         feat_map = FeatureMap(f, encoding=self._in_encoding)
         # feature filtering
         feat_map.filter_feats(self._features2filter,
                               mode="lose",
                               continuous=False) # WARNING: add hoc!!!
         for instance in feat_map._all:
             edu_pair, (cl, feats) = feat_map2_class_instance( instance,
                                                               formatting=self._formatting,
                                                               features2cross=self._features2cross )
             # print edu_pair[0], edu_pair[1], cl
             yield (edu_pair, f), cl, feats
Пример #4
0
 def classifier_instances(self):
     for f in self._files:
         print >> sys.stderr, f
         feat_map = FeatureMap(f, encoding=self._in_encoding)
         # feature filtering
         feat_map.filter_feats(self._features2filter,
                               mode="lose",
                               continuous=False)  # WARNING: add hoc!!!
         for instance in feat_map._all:
             edu_pair, (cl, feats) = feat_map2_class_instance(
                 instance,
                 formatting=self._formatting,
                 features2cross=self._features2cross)
             # print edu_pair[0], edu_pair[1], cl
             yield (edu_pair, f), cl, feats
Пример #5
0
test_dir = sys.argv[1]
test_files = [
    os.path.join(test_dir, f) for f in os.listdir(test_dir)
    if f.endswith(IN_SUFFIX)
]
print >> sys.stderr, "Nber test files: %s." % len(test_files)

correct1 = 0.0  # including multiple correct cases
total1 = 0

correct2 = 0.0  # not including them
total2 = 0

for tf in test_files:
    print >> sys.stderr, tf
    feat_map = FeatureMap(tf, encoding=DEF_ENCODING)
    correct_attachments = defaultdict(list)
    last_attachments = {}
    for instance in feat_map._all:
        edu2attach = instance.pop(ANA_NODE)
        attach_point = instance.pop(ANTE_NODE)
        cl = int(eval(instance.pop(CLASS_ATTR)))
        if cl == 1:
            correct_attachments[edu2attach].append(attach_point)
        is_last = eval(instance.get("D#LAST", "False"))
        if is_last:
            last_attachments[edu2attach] = attach_point
        # print edu2attach, attach_point, cl, is_last

    for edu, pts in correct_attachments.items():
        try: