Пример #1
0
 def predict_problem(self, model_fn, test_fn, out_fn, acc_bound=0.5):
     
     ed = self.load(model_fn)
     fh = open(test_fn)
     gold = []
     features = []
     query_string = []
     for l in fh:
         g, feats, query, string = l.strip().split('\t')
         feats_d = dict([(int(f), 1) for f in feats.split(' ')])
         gold.append(int(g))
         features.append(feats_d)
         query_string.append(';'.join([query, string]))
     p_labels, _, p_vals = predict(gold, features, ed.model, '-b 1')    
     d = dict([(v, k) for k, v in ed.class_cache.iteritems()])
     f = dict([(v, k) for k, v in ed.feat_cache.iteritems()])
     out_fh = open(out_fn, 'w')
     for i in xrange(len(p_labels)):
         out_fh.write('{0}\t{1}\t{2}\t{3}\n'.format(query_string[i], d[gold[i]], 
                     (d[int(p_labels[i])] 
                      if p_vals[i][int(p_labels[i])] > acc_bound 
                      else "unknown"), ';'.join([f[int(feat)] 
                                                for feat in features[i]]))) 
     fh.close()
     out_fh.close()
Пример #2
0
    def predict(self,
                features,
                gold=None,
                acc_bound=0.5,
                bias=-1,
                with_prob=False):
        self.problem.set_bias(bias)
        int_features = [self.int_feats(fvec) for fvec in features]
        if gold:
            gold_int_labels = [(self.class_cache[g] if type(g) == str
                                and g in self.class_cache else g)
                               for g in gold]
        else:
            gold_int_labels = [0 for i in xrange(len(features))]
        p_labels, _, p_vals = predict(gold_int_labels, int_features,
                                      self.model, '-b 1')

        d = dict([(v, k) for k, v in self.class_cache.iteritems()])
        if with_prob:
            return [(d[int(p_labels[event_i])], p_vals[event_i])
                    for event_i in xrange(len(p_labels))]
        else:
            return [(d[int(p_labels[event_i])]
                     if p_vals[event_i][int(p_labels[event_i])] > acc_bound
                     else "unknown") for event_i in xrange(len(p_labels))]
Пример #3
0
 def predict(self, features, gold = None):
     int_features = [self.int_feats(fvec) for fvec in features]
     if gold:
         gold_int_labels = [self.class_cache[g] for g in gold]
     else:
         gold_int_labels = [0 for i in xrange(len(features))]
     p_labels, _, p_vals = predict(gold_int_labels, int_features, self.model, '-b 1')
     
     d = dict([(v, k) for k, v in self.class_cache.iteritems()])
     return [d[int(label)] for label in p_labels]
Пример #4
0
 def predict(self, features, gold=None, acc_bound=0.5):
     int_features = [self.int_feats(fvec) for fvec in features]
     if gold:
         gold_int_labels = [
             (self.class_cache[g] if type(g) == str 
              and g in self.class_cache 
              else g)
             for g in gold]
     else:
         gold_int_labels = [0 for i in xrange(len(features))]
     p_labels, _, p_vals = predict(gold_int_labels, int_features, self.model, '-b 1')
     
     d = dict([(v, k) for k, v in self.class_cache.iteritems()])
     return [(d[int(p_labels[event_i])] 
              if p_vals[event_i][int(p_labels[event_i])] > acc_bound
              else "unknown") 
             for event_i in xrange(len(p_labels))]
Пример #5
0
    def getLogTagProbsByPos(self, senFeats):
        numberedSenFeats = self.getNumberedSenFeats(senFeats)
        contexts = [dict([(feat, 1) for feat in feats])
            for feats in numberedSenFeats]
        dummyOutcomes = [1 for c in contexts]
        _, __, probDistsByPos = predict(dummyOutcomes, contexts,
                                        self.model, self.params)

        """
        logTagProbsByPos = [ dict([(self.featCounter.noToFeat[i+1],
                                   math.log(prob))
                                   for i, prob in enumerate(probDist)])
                                   for probDist in probDistsByPos]
        """

        logTagProbsByPos = []
        for probDist in probDistsByPos:
            logProbsByTag = {}
            for c, prob in enumerate(probDist):
                tag = self.labelCounter.noToFeat[c+1]
                logProbsByTag[tag] = math.log(prob)
            logTagProbsByPos.append(logProbsByTag)

        return logTagProbsByPos
Пример #6
0
 def predict_problem(self, model_fn, test_fn, out_fn, acc_bound=0.5):
     ed = self.load(model_fn)
     fh = open(test_fn)
     gold = []
     features = []
     query_string = []
     for l in fh:
         g, feats, query, string = l.strip().split('\t')
         feats_d = dict([(int(f), 1) for f in feats.split(' ')])
         gold.append(int(g))
         features.append(feats_d)
         query_string.append(';'.join([query, string]))
     p_labels, _, p_vals = predict(gold, features, ed.model, '-b 1')
     d = dict([(v, k) for k, v in ed.class_cache.iteritems()])
     f = dict([(v, k) for k, v in ed.feat_cache.iteritems()])
     out_fh = open(out_fn, 'w')
     for i in xrange(len(p_labels)):
         out_fh.write('{0}\t{1}\t{2}\t{3}\n'.format(
             query_string[i], d[gold[i]],
             (d[int(p_labels[i])]
              if p_vals[i][int(p_labels[i])] > acc_bound else "unknown"),
             ';'.join([f[int(feat)] for feat in features[i]])))
     fh.close()
     out_fh.close()