def predict_problem(self, model_fn, test_fn, out_fn, acc_bound=0.5): ed = self.load(model_fn) fh = open(test_fn) gold = [] features = [] query_string = [] for l in fh: g, feats, query, string = l.strip().split('\t') feats_d = dict([(int(f), 1) for f in feats.split(' ')]) gold.append(int(g)) features.append(feats_d) query_string.append(';'.join([query, string])) p_labels, _, p_vals = predict(gold, features, ed.model, '-b 1') d = dict([(v, k) for k, v in ed.class_cache.iteritems()]) f = dict([(v, k) for k, v in ed.feat_cache.iteritems()]) out_fh = open(out_fn, 'w') for i in xrange(len(p_labels)): out_fh.write('{0}\t{1}\t{2}\t{3}\n'.format(query_string[i], d[gold[i]], (d[int(p_labels[i])] if p_vals[i][int(p_labels[i])] > acc_bound else "unknown"), ';'.join([f[int(feat)] for feat in features[i]]))) fh.close() out_fh.close()
def predict(self, features, gold=None, acc_bound=0.5, bias=-1, with_prob=False): self.problem.set_bias(bias) int_features = [self.int_feats(fvec) for fvec in features] if gold: gold_int_labels = [(self.class_cache[g] if type(g) == str and g in self.class_cache else g) for g in gold] else: gold_int_labels = [0 for i in xrange(len(features))] p_labels, _, p_vals = predict(gold_int_labels, int_features, self.model, '-b 1') d = dict([(v, k) for k, v in self.class_cache.iteritems()]) if with_prob: return [(d[int(p_labels[event_i])], p_vals[event_i]) for event_i in xrange(len(p_labels))] else: return [(d[int(p_labels[event_i])] if p_vals[event_i][int(p_labels[event_i])] > acc_bound else "unknown") for event_i in xrange(len(p_labels))]
def predict(self, features, gold = None): int_features = [self.int_feats(fvec) for fvec in features] if gold: gold_int_labels = [self.class_cache[g] for g in gold] else: gold_int_labels = [0 for i in xrange(len(features))] p_labels, _, p_vals = predict(gold_int_labels, int_features, self.model, '-b 1') d = dict([(v, k) for k, v in self.class_cache.iteritems()]) return [d[int(label)] for label in p_labels]
def predict(self, features, gold=None, acc_bound=0.5): int_features = [self.int_feats(fvec) for fvec in features] if gold: gold_int_labels = [ (self.class_cache[g] if type(g) == str and g in self.class_cache else g) for g in gold] else: gold_int_labels = [0 for i in xrange(len(features))] p_labels, _, p_vals = predict(gold_int_labels, int_features, self.model, '-b 1') d = dict([(v, k) for k, v in self.class_cache.iteritems()]) return [(d[int(p_labels[event_i])] if p_vals[event_i][int(p_labels[event_i])] > acc_bound else "unknown") for event_i in xrange(len(p_labels))]
def getLogTagProbsByPos(self, senFeats): numberedSenFeats = self.getNumberedSenFeats(senFeats) contexts = [dict([(feat, 1) for feat in feats]) for feats in numberedSenFeats] dummyOutcomes = [1 for c in contexts] _, __, probDistsByPos = predict(dummyOutcomes, contexts, self.model, self.params) """ logTagProbsByPos = [ dict([(self.featCounter.noToFeat[i+1], math.log(prob)) for i, prob in enumerate(probDist)]) for probDist in probDistsByPos] """ logTagProbsByPos = [] for probDist in probDistsByPos: logProbsByTag = {} for c, prob in enumerate(probDist): tag = self.labelCounter.noToFeat[c+1] logProbsByTag[tag] = math.log(prob) logTagProbsByPos.append(logProbsByTag) return logTagProbsByPos
def predict_problem(self, model_fn, test_fn, out_fn, acc_bound=0.5): ed = self.load(model_fn) fh = open(test_fn) gold = [] features = [] query_string = [] for l in fh: g, feats, query, string = l.strip().split('\t') feats_d = dict([(int(f), 1) for f in feats.split(' ')]) gold.append(int(g)) features.append(feats_d) query_string.append(';'.join([query, string])) p_labels, _, p_vals = predict(gold, features, ed.model, '-b 1') d = dict([(v, k) for k, v in ed.class_cache.iteritems()]) f = dict([(v, k) for k, v in ed.feat_cache.iteritems()]) out_fh = open(out_fn, 'w') for i in xrange(len(p_labels)): out_fh.write('{0}\t{1}\t{2}\t{3}\n'.format( query_string[i], d[gold[i]], (d[int(p_labels[i])] if p_vals[i][int(p_labels[i])] > acc_bound else "unknown"), ';'.join([f[int(feat)] for feat in features[i]]))) fh.close() out_fh.close()