def extract(tree, sentence, fclasses, do_sub=True, logprob=None): """ extract all features, return a Vector. visit subtrees first, and then extract all features on this level. mapping from full-names to ids are in fvector.py the non-sub version (just this level is used by BUDecoder (forest decoder). """ fvector = Vector() tree.annotate(None, do_sub=do_sub) if do_sub: if not tree.is_terminal(): for sub in tree.subs: fvector += extract(sub, sentence, fclasses) jobs = [] for fclass in fclasses: if not fclass.is_global() or tree.is_root(): if use_pp: jobs.append(job_server.submit(fclass.extract, (tree, sentence), (quantize,))) else: fvector += Vector.convert_fullname(fclass.extract(tree, sentence)) if use_pp: for job in jobs: fvector += Vector.convert_fullname(job()) if logprob is not None: fvector[0] = logprob return fvector
def input(): ws = Vector('gt_prob=-1.0 proot=-1.0 prhs=-1.0 plhs=-1.0 lexpef=-1.0 lexpfe=-1.0') for line in sys.stdin: chi, other = line.split(' -> ', 1) eng, fields = other.split(' ### ', 1) features = Vector(fields) score = features.dot(ws) yield (score, chi, line)
def __init__(self, w): '''input is either a filename or weightstr.''' if w.strip() != "" and not (w.find(":") >= 0 or w.find("=") >= 0): w = open(w).readline().strip() # single line Vector.__init__(self, w) print >> logs, 'using weights: "%s...%s" (%d fields)' \ % (w[:10], w[-10:], len(self)) self.lm_weight = self["lm"]
def __init__(self, w): '''input is either a filename or weightstr or vector .''' if not isinstance(w, Vector) and w.strip() != "" and not (w.find(":") >= 0 or w.find("=") >= 0): w = open(w).readline().strip() # single line Vector.__init__(self, w) print >> logs, 'using weights: (%d fields)' \ % (len(self)) self.lm_weight = self["lm"]
def simulate(self, actions, sent): '''simulate the result of a given sequence of actions''' self.State.sent = sent n = len(sent) state = self.State.initstate() # initial state actionfeats = Vector() for i, action in enumerate(actions, 1): ## actionfeats += state.make_feats(action) ## has to be OLD STATE -- WHY? for feat in state.make_feats(action): #if action[0] != -2: actionfeats[feat] += 1 if action in state.allowed_actions(): for new in state.take(action): state = new break else: print >> logs, "Error! BAD SEQUENCE!" break return state, actionfeats
def __init__(self, decoder): self.trainfile = FLAGS.train self.trainlines = open(self.trainfile).readlines() self.devfile = FLAGS.dev self.devlines = open(self.devfile).readlines() self.outfile = FLAGS.out self.save_to = FLAGS.save_to self.decoder = decoder # a class, providing functions: load(), decode(), get_feats() self.iter = FLAGS.iter self.start_iter = FLAGS.start_iter self.avg = FLAGS.avg self.shuffle = FLAGS.shuffle self.weights = decoder.model.weights if FLAGS.resume_from is None: self.allweights = Vector() self.c = 0. # counter: how many examples have i seen so far? = it * |train| + i else: self.c = (self.start_iter - 1) * len(self.trainlines) self.allweights = Model( FLAGS.allweights).weights # read all weights from file
def __init__(self, decoder): self.trainfile = FLAGS.train self.devfile = FLAGS.dev self.outfile = FLAGS.out self.decoder = decoder # a class, providing functions: load(), decode(), get_feats() self.iter = FLAGS.iter self.avg = FLAGS.avg self.weights = decoder.model.weights self.allweights = Vector() self.c = 0. # counter: how many examples have i seen so far? = it * |train| + i
def feats(self, action=None): if self._feats is None: self._feats = self.model.make_feats( self) #if self.model is not None else [] if action is None: return self._feats else: aa = "=>" + State.names[action] fv = Vector() for f in self._feats: fv[f + aa] = 1 return fv
def __init__(self, d={}): Vector.__init__(self, d) self.last_update = {} self.N = 0 ## number of examples (per iter). will be reset at the end of the first iteration
## LocalDecoder(), \ ## BUDecoder(opts.k, check_feats=False, adaptive_base=opts.adaptive)]\ ## [opts.mode] decoder = LocalDecoder(opts.hope) print >> logs, "decoder = %s" % decoder ### must read forest first! otherwise slow! # forests = [] # for forest in decoder.load("-"): # forests.append(forest) if opts.weightsfile is not None: weights = get_weights(opts.weightsfile) # see forest.py else: weights = Vector("lm1=2 gt_prob=1") ## initial vector initial_weights = weights.__copy__() extra_feats = None #prep_features(args) decoder.set_feats(extra_feats) all_feats = extra_feats if opts.trainfile == "-": trainforests = [] for forest in decoder.load(opts.trainfile): decoder.do_oracle(forest, weights) trainforests.append(forest) print >> logs, "pre-loaded %d train forests, load %.2lf, oracle %.2lf" % \