def get_hyps(sent, goal, weights): """Assumes that oraclemodel.input() has been called""" # worst violators oracleweights = theoracle.make_weights(additive=True) # we use the in-place operations because oracleweights might be # a subclass of Vector oracleweights *= fear_weight oracleweights += weights goal.reweight(oracleweights) hyps = decoder.get_nbest(goal, 1, 1) result = [] for hypv, hyp in hyps: hypscore = get_score(hypv, hyp) log.write("added new hyp: %s %s cost=%s score=%s\n" % (" ".join(sym.tostring(e) for e in hyp), hypv, weights.dot(hypv), hypscore)) # the learner MUST not see the oracle features hypv = theoracle.clean(hypv) result.append((hypv, hyp, hypscore)) return result
def get_hyps(sent, goal, weights): """Assumes that oraclemodel.input() has been called""" # worst violators oracleweights = theoracle.make_weights(additive=True) # we use the in-place operations because oracleweights might be # a subclass of Vector oracleweights *= fear_weight oracleweights += weights goal.reweight(oracleweights) hyps = decoder.get_nbest(goal, 1, 1) result = [] for hypv, hyp in hyps: hypscore = get_score(hypv, hyp) log.write( "added new hyp: %s %s cost=%s score=%s\n" % (" ".join(sym.tostring(e) for e in hyp), hypv, weights.dot(hypv), hypscore) ) # the learner MUST not see the oracle features hypv = theoracle.clean(hypv) result.append((hypv, hyp, hypscore)) return result
def get_hope(self): """Assumes that oraclemodel.input() has been called""" if not self.goal: _, hope = min( (self.qp.mweights.dot(hyp.mvector) + hope_weight * self.qp.oweights.dot(hyp.ovector), hyp) for hyp in self.hyps) return hope weights = theoracle.make_weights(additive="edge") # use in-place operations because theoracle.make_weights might # be a subclass of svector.Vector weights *= -hope_weight weights += self.qp.mweights self.goal.reweight(weights) hope_vector, hope = decoder.get_nbest(self.goal, 1, 1)[0] hope_ovector = theoracle.finish(hope_vector, hope) hope_mvector = theoracle.clean(hope_vector) if log.level >= 1: log.write("hope hyp: %s\n" % " ".join(sym.tostring(e) for e in hope)) log.write("hope features: %s\n" % hope_mvector) log.write("hope oracle: %s\n" % hope_ovector) return maxmargin.Hypothesis(hope_mvector, hope_ovector)
def process_heldout(sent): # Need to add an flen attribute that gives the length of the input sentence. # In the lattice-decoding case, we have to make a guess. distance = sent.compute_distance() sent.flen = distance.get((0, sent.n - 1), None) # could be missing if n == 0 theoracle.input(sent) log.write("done preparing\n") global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.write("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info()))) decoder_errors += 1 if decoder_errors >= 100: log.write( "decoder failed too many times, passing exception through!\n" ) raise else: return goal.rescore(theoracle.models, thedecoder.weights, add=True) bestv, best = decoder.get_nbest(goal, 1)[0] log.write("done decoding\n") bestg = theoracle.finish(bestv, best) #bestscore = theoracle.make_weights(additive="sentence").dot(bestg) bestscore = theoracle.make_weights(additive="edge").dot(bestg) log.write("best hyp: %s %s cost=%s score=%s\n" % (" ".join( sym.tostring(e) for e in best), bestv, thedecoder.weights.dot(bestv), bestscore)) sent.score_comps = bestg sent.ewords = [sym.tostring(e) for e in best] return sent
def process(sent): # Add an flen attribute that gives the length of the input sentence. # In the lattice-decoding case, we have to make a guess. distance = sent.compute_distance() sent.flen = distance.get((0, sent.n - 1), None) # could be missing if n == 0 theoracle.input(sent) global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.write("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info()))) decoder_errors += 1 if decoder_errors >= 3: log.write( "decoder failed too many times, passing exception through!\n") raise else: return # Augment forest with oracle features # this is overkill if we aren't going to search for hope/fear goal.rescore(theoracle.models, thedecoder.weights, add=True) best_vector, best = decoder.get_nbest(goal, 1)[0] best_mvector = theoracle.clean(best_vector) best_ovector = theoracle.finish(best_vector, best) best_loss = theoracle.make_weights(additive="sentence").dot(best_ovector) log.writeln("best hyp: %s %s cost=%s loss=%s" % (" ".join(sym.tostring(e) for e in best), best_vector, thedecoder.weights.dot(best_mvector), best_loss)) sent.score_comps = best_ovector sent.ewords = [sym.tostring(e) for e in best] return goal
def get_fear(self): """Assumes that oraclemodel.input() has been called""" if not self.goal: raise NotImplementedError() weights = theoracle.make_weights(additive="edge") # use in-place operations because theoracle.make_weights might # be a subclass of svector.Vector weights += self.qp.mweights self.goal.reweight(weights) fear_vector, fear = decoder.get_nbest(self.goal, 1, 1)[0] fear_ovector = theoracle.finish(fear_vector, fear) fear_mvector = theoracle.clean(fear_vector) if log.level >= 1: log.write("fear hyp: %s\n" % " ".join(sym.tostring(e) for e in fear)) log.write("fear features: %s\n" % fear_mvector) log.write("fear oracle: %s\n" % fear_ovector) return maxmargin.Hypothesis(fear_mvector, fear_ovector)
def process_heldout(sent): theoracle.input(sent) log.write("done preparing\n") global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.writeln( "decoder raised exception: %s %s" % (sent, "".join(traceback.format_exception(*sys.exc_info()))) ) decoder_errors += 1 if decoder_errors >= 100: log.write("decoder failed too many times, passing exception through!\n") raise else: return goal.rescore(theoracle.models, thedecoder.weights, add=True) bestv, best = decoder.get_nbest(goal, 1)[0] log.write("done decoding\n") bestscore = get_score(bestv, best) log.write( "best hyp: %s %s cost=%s score=%s\n" % (" ".join(sym.tostring(e) for e in best), bestv, thedecoder.weights.dot(bestv), bestscore) ) bestv = theoracle.finish(bestv, best) sent.score_comps = bestv sent.ewords = [sym.tostring(e) for e in best] return sent
def get_gold(sent, goal, weights): """Assumes that oraclemodel.input() has been called""" oracleweights = theoracle.make_weights(additive=True) # we use the in-place operations because oracleweights might be # a subclass of Vector oracleweights *= -hope_weight oracleweights += weights goal.reweight(oracleweights) goldv, gold = decoder.get_nbest(goal, 1, 1)[0] goldscore = get_score(goldv, gold) log.write("gold hyp: %s %s cost=%s score=%s\n" % (" ".join(sym.tostring(e) for e in gold), goldv, weights.dot(goldv), goldscore)) # the learner MUST not see the oracle features goldv = theoracle.clean(goldv) return goldv, gold, goldscore
def process_heldout(sent): theoracle.input(sent) log.write("done preparing\n") global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.writeln( "decoder raised exception: %s %s" % (sent, "".join(traceback.format_exception(*sys.exc_info())))) decoder_errors += 1 if decoder_errors >= 100: log.write( "decoder failed too many times, passing exception through!\n" ) raise else: return goal.rescore(theoracle.models, thedecoder.weights, add=True) bestv, best = decoder.get_nbest(goal, 1)[0] log.write("done decoding\n") bestscore = get_score(bestv, best) log.write("best hyp: %s %s cost=%s score=%s\n" % (" ".join( sym.tostring(e) for e in best), bestv, thedecoder.weights.dot(bestv), bestscore)) bestv = theoracle.finish(bestv, best) sent.score_comps = bestv sent.ewords = [sym.tostring(e) for e in best] return sent
def process_heldout(sent): # Need to add an flen attribute that gives the length of the input sentence. # In the lattice-decoding case, we have to make a guess. distance = sent.compute_distance() sent.flen = distance.get((0,sent.n-1), None) # could be missing if n == 0 theoracle.input(sent) log.write("done preparing\n") global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.write("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info()))) decoder_errors += 1 if decoder_errors >= 100: log.write("decoder failed too many times, passing exception through!\n") raise else: return goal.rescore(theoracle.models, thedecoder.weights, add=True) bestv, best = decoder.get_nbest(goal, 1)[0] log.write("done decoding\n") bestg = theoracle.finish(bestv, best) #bestscore = theoracle.make_weights(additive="sentence").dot(bestg) bestscore = theoracle.make_weights(additive="edge").dot(bestg) log.write("best hyp: %s %s cost=%s score=%s\n" % (" ".join(sym.tostring(e) for e in best), bestv, thedecoder.weights.dot(bestv), bestscore)) sent.score_comps = bestg sent.ewords = [sym.tostring(e) for e in best] return sent
def get_gold(sent, goal, weights): """Assumes that oraclemodel.input() has been called""" oracleweights = theoracle.make_weights(additive=True) # we use the in-place operations because oracleweights might be # a subclass of Vector oracleweights *= -hope_weight oracleweights += weights goal.reweight(oracleweights) goldv, gold = decoder.get_nbest(goal, 1, 1)[0] goldscore = get_score(goldv, gold) log.write( "gold hyp: %s %s cost=%s score=%s\n" % (" ".join(sym.tostring(e) for e in gold), goldv, weights.dot(goldv), goldscore) ) # the learner MUST not see the oracle features goldv = theoracle.clean(goldv) return goldv, gold, goldscore
def get_hope(self): """Assumes that oraclemodel.input() has been called""" if not self.goal: _, hope = min((self.qp.mweights.dot(hyp.mvector) + hope_weight * self.qp.oweights.dot(hyp.ovector), hyp) for hyp in self.hyps) return hope weights = theoracle.make_weights(additive="edge") # use in-place operations because theoracle.make_weights might # be a subclass of svector.Vector weights *= -hope_weight weights += self.qp.mweights self.goal.reweight(weights) hope_vector, hope = decoder.get_nbest(self.goal, 1, 1)[0] hope_ovector = theoracle.finish(hope_vector, hope) hope_mvector = theoracle.clean(hope_vector) if log.level >= 1: log.write("hope hyp: %s\n" % " ".join(sym.tostring(e) for e in hope)) log.write("hope features: %s\n" % hope_mvector) log.write("hope oracle: %s\n" % hope_ovector) return maxmargin.Hypothesis(hope_mvector, hope_ovector)
def process(sent): oraclemodel.input(sent) log.write("done preparing\n") try: goal = thedecoder.translate(sent) except Exception: import traceback log.writeln("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info()))) global decoder_errors decoder_errors += 1 if decoder_errors >= 5: raise else: return bestv, best =decoder.get_nbest(goal, 1)[0] log.write("done decoding\n") # Collect hypotheses that will be used for learning sent.hyps = get_hyps(sent, goal, thedecoder.weights) log.write("done rescoring\n") return sent
def process(sent): # Add an flen attribute that gives the length of the input sentence. # In the lattice-decoding case, we have to make a guess. distance = sent.compute_distance() sent.flen = distance.get((0,sent.n-1), None) # could be missing if n == 0 theoracle.input(sent) global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.write("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info()))) decoder_errors += 1 if decoder_errors >= 3: log.write("decoder failed too many times, passing exception through!\n") raise else: return # Augment forest with oracle features # this is overkill if we aren't going to search for hope/fear goal.rescore(theoracle.models, thedecoder.weights, add=True) best_vector, best = decoder.get_nbest(goal, 1)[0] best_mvector = theoracle.clean(best_vector) best_ovector = theoracle.finish(best_vector, best) best_loss = theoracle.make_weights(additive="sentence").dot(best_ovector) log.writeln("best hyp: %s %s cost=%s loss=%s" % (" ".join(sym.tostring(e) for e in best), best_vector, thedecoder.weights.dot(best_mvector), best_loss)) sent.score_comps = best_ovector sent.ewords = [sym.tostring(e) for e in best] return goal
def process(sent): oraclemodel.input(sent) log.write("done preparing\n") try: goal = thedecoder.translate(sent) except Exception: import traceback log.writeln("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info()))) global decoder_errors decoder_errors += 1 if decoder_errors >= 5: raise else: return bestv, best = decoder.get_nbest(goal, 1)[0] log.write("done decoding\n") # Collect hypotheses that will be used for learning sent.hyps = get_hyps(sent, goal, thedecoder.weights) log.write("done rescoring\n") return sent
def process(sent): global alphas if online_learning: updates.clear() alphas.clear() theoracle.input(sent) log.write("done preparing\n") global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.writeln( "decoder raised exception: %s %s" % (sent, "".join(traceback.format_exception(*sys.exc_info())))) decoder_errors += 1 if decoder_errors >= 100: log.write( "decoder failed too many times, passing exception through!\n" ) raise else: return goal.rescore(theoracle.models, thedecoder.weights, add=True) bestv, best = decoder.get_nbest(goal, 1)[0] log.write("done decoding\n") bestscore = get_score(bestv, best) log.write("best hyp: %s %s cost=%s score=%s\n" % (" ".join( sym.tostring(e) for e in best), bestv, thedecoder.weights.dot(bestv), bestscore)) goldv, gold, goldscore = get_gold(sent, goal, thedecoder.weights) assert ( sent.id not in updates ) # in batch learning, this can happen, and we would have to undo the update associated with this sentence updates[sent.id] = [(svector.Vector(), 0.)] alphas[sent.id] = [max_learning_rate] if opts.parallel: while True: if mpi.world.iprobe(tag=1): (sentid, vscores) = mpi.world.recv(tag=1) log.write("received update for %s\n" % (sentid, )) if sentid in updates: # see comment above log.write("ignoring update for %s\n" % (sentid, )) continue # drop this update on the floor updates[sentid] = vscores alphas[sentid] = [max_learning_rate ] + [0.] * (len(vscores) - 1) # since the first update is zero, the alphas & updates # are still consistent with weights else: break def oracle(weights): hyps = get_hyps(sent, goal, weights) return [(goldv - hypv, goldscore - hypscore) for (hypv, hyp, hypscore) in hyps] thedecoder.weights, alphas = cutting_plane(thedecoder.weights, updates, alphas, {sent.id: oracle}) remove_zeros(thedecoder.weights) log.write("feature weights: %s\n" % (thedecoder.weights * watch_features)) log.write("weight norm: %s\n" % (math.sqrt(thedecoder.weights.normsquared()))) # update weight sum for averaging global nweights, sumweights_helper # sumweights_helper = \sum_{i=0}^n (i \Delta w_i) for sentid in updates: for (v, score), alpha in itertools.izip(updates[sentid], alphas[sentid]): apply_update(sumweights_helper, nweights * alpha * v) nweights += 1 # update feature scales if update_feature_scales: global sum_updates2, n_updates, feature_scales for sentid in updates: u = svector.Vector() for (v, score), alpha in itertools.izip(updates[sentid], alphas[sentid]): u += alpha / max_learning_rate * v sum_updates2 += u * u n_updates += 1 try: default_feature_scale = 1. / compute_variance(0, n_updates) except ZeroDivisionError: default_feature_scale = 0. # pseudoinverse feature_scales = collections.defaultdict( lambda: default_feature_scale) for feat in sum_updates2: try: feature_scales[feat] = 1. / compute_variance( sum_updates2[feat], n_updates) except ZeroDivisionError: feature_scales[feat] = 0. # pseudoinverse log.write( "feature scales: %s\n" % (" ".join("%s=%s" % (f, feature_scales[f]) for f in watch_features if f in feature_scales))) if opts.parallel: # flush out filled requests global requests requests = [request for request in requests if not request.test()] # transmit updates to other nodes for node in parallel.slaves: if node != parallel.rank: requests.append( mpi.world.isend(value=(sent.id, updates[sent.id]), dest=node, tag=1)) bestv = theoracle.finish(bestv, best) theoracle.update(bestv) sent.score_comps = bestv if log.level >= 1: gc.collect() log.write("done updating, memory = %s\n" % monitor.memory()) sent.ewords = [sym.tostring(e) for e in best] return sent
def process(sent): global alphas if online_learning: updates.clear() alphas.clear() theoracle.input(sent) log.write("done preparing\n") global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.writeln( "decoder raised exception: %s %s" % (sent, "".join(traceback.format_exception(*sys.exc_info()))) ) decoder_errors += 1 if decoder_errors >= 100: log.write("decoder failed too many times, passing exception through!\n") raise else: return goal.rescore(theoracle.models, thedecoder.weights, add=True) bestv, best = decoder.get_nbest(goal, 1)[0] log.write("done decoding\n") bestscore = get_score(bestv, best) log.write( "best hyp: %s %s cost=%s score=%s\n" % (" ".join(sym.tostring(e) for e in best), bestv, thedecoder.weights.dot(bestv), bestscore) ) goldv, gold, goldscore = get_gold(sent, goal, thedecoder.weights) assert ( sent.id not in updates ) # in batch learning, this can happen, and we would have to undo the update associated with this sentence updates[sent.id] = [(svector.Vector(), 0.0)] alphas[sent.id] = [max_learning_rate] if opts.parallel: while True: if mpi.world.iprobe(tag=1): (sentid, vscores) = mpi.world.recv(tag=1) log.write("received update for %s\n" % (sentid,)) if sentid in updates: # see comment above log.write("ignoring update for %s\n" % (sentid,)) continue # drop this update on the floor updates[sentid] = vscores alphas[sentid] = [max_learning_rate] + [0.0] * (len(vscores) - 1) # since the first update is zero, the alphas & updates # are still consistent with weights else: break def oracle(weights): hyps = get_hyps(sent, goal, weights) return [(goldv - hypv, goldscore - hypscore) for (hypv, hyp, hypscore) in hyps] thedecoder.weights, alphas = cutting_plane(thedecoder.weights, updates, alphas, {sent.id: oracle}) remove_zeros(thedecoder.weights) log.write("feature weights: %s\n" % (thedecoder.weights * watch_features)) log.write("weight norm: %s\n" % (math.sqrt(thedecoder.weights.normsquared()))) # update weight sum for averaging global nweights, sumweights_helper # sumweights_helper = \sum_{i=0}^n (i \Delta w_i) for sentid in updates: for (v, score), alpha in itertools.izip(updates[sentid], alphas[sentid]): apply_update(sumweights_helper, nweights * alpha * v) nweights += 1 # update feature scales if update_feature_scales: global sum_updates2, n_updates, feature_scales for sentid in updates: u = svector.Vector() for (v, score), alpha in itertools.izip(updates[sentid], alphas[sentid]): u += alpha / max_learning_rate * v sum_updates2 += u * u n_updates += 1 try: default_feature_scale = 1.0 / compute_variance(0, n_updates) except ZeroDivisionError: default_feature_scale = 0.0 # pseudoinverse feature_scales = collections.defaultdict(lambda: default_feature_scale) for feat in sum_updates2: try: feature_scales[feat] = 1.0 / compute_variance(sum_updates2[feat], n_updates) except ZeroDivisionError: feature_scales[feat] = 0.0 # pseudoinverse log.write( "feature scales: %s\n" % (" ".join("%s=%s" % (f, feature_scales[f]) for f in watch_features if f in feature_scales)) ) if opts.parallel: # flush out filled requests global requests requests = [request for request in requests if not request.test()] # transmit updates to other nodes for node in parallel.slaves: if node != parallel.rank: requests.append(mpi.world.isend(value=(sent.id, updates[sent.id]), dest=node, tag=1)) bestv = theoracle.finish(bestv, best) theoracle.update(bestv) sent.score_comps = bestv if log.level >= 1: gc.collect() log.write("done updating, memory = %s\n" % monitor.memory()) sent.ewords = [sym.tostring(e) for e in best] return sent
def process(sent): # Need to add an flen attribute that gives the length of the input sentence. # In the lattice-decoding case, we have to make a guess. distance = sent.compute_distance() sent.flen = distance.get((0,sent.n-1), None) # could be missing if n == 0 theoracle.input(sent) global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.write("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info()))) decoder_errors += 1 if decoder_errors >= 3: log.write("decoder failed too many times, passing exception through!\n") raise else: return goal.rescore(theoracle.models, thedecoder.weights, add=True) best_vector, best = decoder.get_nbest(goal, 1)[0] best_mvector = theoracle.clean(best_vector) best_ovector = theoracle.finish(best_vector, best) best_loss = theoracle.make_weights(additive="sentence").dot(best_ovector) log.write("best hyp: %s %s cost=%s loss=%s\n" % (" ".join(sym.tostring(e) for e in best), best_vector, thedecoder.weights.dot(best_mvector), best_loss)) # Set up quadratic program qp = maxmargin.QuadraticProgram() cur_instance = ForestInstance(sent.id, goal) qp.add_instance(cur_instance) if opts.parallel: while MPI.COMM_WORLD.Iprobe(tag=1, source=MPI.ANY_SOURCE): log.writeln("received update...\n") recv_instance = MPI.COMM_WORLD.recv(tag=1, source=MPI.ANY_SOURCE) log.writeln("received update for %s" % (recv_instance.instance_id,)) # need to check for duplicate instances? qp.add_instance(recv_instance) # Add cached hyps if cache_hyps: for instance in qp.instances: hyps = hyp_cache[instance.instance_id] if len(hyps) > 0: log.writeln("retrieved %d cached hyps for %s" % (len(hyps), instance.instance_id)) for hyp in hyps: instance.add_hyp(hyp) # Make oracle weight vector oweights = theoracle.make_weights(additive="sentence") oweights *= -1 # Make vector of learning rates # We have to be careful to assign a learning rate to every possible feature # This is not very efficient feats = set() for item in goal.bottomup(): for ded in item.deds: feats.update(ded.dcost) for instance in qp.instances: for hyp in instance.hyps: feats.update(hyp.mvector) learning_rates = svector.Vector() for feat in feats: learning_rates[feat] = compute_feature_learning_rate(feat) if log.level >= 3: log.writeln("learning rate vector: %s" % learning_rates) qp.optimize(thedecoder.weights, oweights, learning_rate=learning_rates) thedecoder.weights.compact() log.write("feature weights: %s\n" % (thedecoder.weights * watch_features)) # update weight sum for averaging global nweights, sumweights_helper # sumweights_helper = \sum_{i=0}^n (i \Delta w_i) sumweights_helper += nweights * qp.delta_mweights() nweights += 1 # update feature scales if update_feature_scales: global sum_updates2, n_updates for instance in qp.instances: """u = svector.Vector(instance.hope.mvector) for hyp in instance.hyps: u -= hyp.alpha*hyp.mvector sum_updates2 += u*u""" for hyp in instance.hyps: if hyp is not instance.hope: # hyp = instance.hope is a non-update u = instance.hope.mvector - hyp.mvector sum_updates2 += hyp.alpha*(u*u) n_updates += hyp.alpha #log.write("sum of squared updates: %s\n" % (" ".join("%s=%s" % (f,sum_updates2[f]) for f in watch_features))) log.write("feature learning rates: %s\n" % (" ".join("%s=%s" % (f,compute_feature_learning_rate(f)) for f in watch_features))) if opts.parallel: # flush out filled requests global requests requests = [request for request in requests if not request.Test()] # transmit updates to other nodes # make a plain Instance (without forest) # we used to designate a hope translation, #send_instance = maxmargin.Instance(cur_instance.hyps, hope=cur_instance.hope, instance_id=cur_instance.sentid) # but now are letting the other node choose. send_instance = maxmargin.Instance(cur_instance.hyps, instance_id=cur_instance.sentid) for node in parallel.slaves: if node != parallel.rank: requests.append(MPI.COMM_WORLD.isend(send_instance, dest=node, tag=1)) # save all hyps for next time if cache_hyps: epsilon = 0.01 for instance in qp.instances: hyps = hyp_cache[instance.instance_id] for hyp in instance.hyps: for hyp1 in hyps: if (hyp.mvector-hyp1.mvector).normsquared() <= epsilon and (hyp.ovector-hyp1.ovector).normsquared() <= epsilon: break else: if log.level >= 2: log.writeln("add hyp to cache: %s" % hyp) hyps.append(hyp) theoracle.update(best_ovector) sent.score_comps = best_ovector if log.level >= 1: gc.collect() log.write("done updating, memory = %s\n" % monitor.memory()) sent.ewords = [sym.tostring(e) for e in best] return sent
def process(sent): # Need to add an flen attribute that gives the length of the input sentence. # In the lattice-decoding case, we have to make a guess. distance = sent.compute_distance() sent.flen = distance.get((0, sent.n - 1), None) # could be missing if n == 0 theoracle.input(sent) global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.write("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info()))) decoder_errors += 1 if decoder_errors >= 3: log.write( "decoder failed too many times, passing exception through!\n" ) raise else: return goal.rescore(theoracle.models, thedecoder.weights, add=True) best_vector, best = decoder.get_nbest(goal, 1)[0] best_mvector = theoracle.clean(best_vector) best_ovector = theoracle.finish(best_vector, best) best_loss = theoracle.make_weights( additive="sentence").dot(best_ovector) log.write("best hyp: %s %s cost=%s loss=%s\n" % (" ".join(sym.tostring(e) for e in best), best_vector, thedecoder.weights.dot(best_mvector), best_loss)) # Set up quadratic program qp = maxmargin.QuadraticProgram() cur_instance = ForestInstance(sent.id, goal) qp.add_instance(cur_instance) if opts.parallel: while MPI.COMM_WORLD.Iprobe(tag=1, source=MPI.ANY_SOURCE): log.writeln("received update...\n") recv_instance = MPI.COMM_WORLD.recv(tag=1, source=MPI.ANY_SOURCE) log.writeln("received update for %s" % (recv_instance.instance_id, )) # need to check for duplicate instances? qp.add_instance(recv_instance) # Add cached hyps if cache_hyps: for instance in qp.instances: hyps = hyp_cache[instance.instance_id] if len(hyps) > 0: log.writeln("retrieved %d cached hyps for %s" % (len(hyps), instance.instance_id)) for hyp in hyps: instance.add_hyp(hyp) # Make oracle weight vector oweights = theoracle.make_weights(additive="sentence") oweights *= -1 # Make vector of learning rates # We have to be careful to assign a learning rate to every possible feature # This is not very efficient feats = set() for item in goal.bottomup(): for ded in item.deds: feats.update(ded.dcost) for instance in qp.instances: for hyp in instance.hyps: feats.update(hyp.mvector) learning_rates = svector.Vector() for feat in feats: learning_rates[feat] = compute_feature_learning_rate(feat) if log.level >= 3: log.writeln("learning rate vector: %s" % learning_rates) qp.optimize(thedecoder.weights, oweights, learning_rate=learning_rates) thedecoder.weights.compact() log.write("feature weights: %s\n" % (thedecoder.weights * watch_features)) # update weight sum for averaging global nweights, sumweights_helper # sumweights_helper = \sum_{i=0}^n (i \Delta w_i) sumweights_helper += nweights * qp.delta_mweights() nweights += 1 # update feature scales if update_feature_scales: global sum_updates2, n_updates for instance in qp.instances: """u = svector.Vector(instance.hope.mvector) for hyp in instance.hyps: u -= hyp.alpha*hyp.mvector sum_updates2 += u*u""" for hyp in instance.hyps: if hyp is not instance.hope: # hyp = instance.hope is a non-update u = instance.hope.mvector - hyp.mvector sum_updates2 += hyp.alpha * (u * u) n_updates += hyp.alpha #log.write("sum of squared updates: %s\n" % (" ".join("%s=%s" % (f,sum_updates2[f]) for f in watch_features))) log.write("feature learning rates: %s\n" % (" ".join("%s=%s" % (f, compute_feature_learning_rate(f)) for f in watch_features))) if opts.parallel: # flush out filled requests global requests requests = [request for request in requests if not request.Test()] # transmit updates to other nodes # make a plain Instance (without forest) # we used to designate a hope translation, #send_instance = maxmargin.Instance(cur_instance.hyps, hope=cur_instance.hope, instance_id=cur_instance.sentid) # but now are letting the other node choose. send_instance = maxmargin.Instance(cur_instance.hyps, instance_id=cur_instance.sentid) for node in parallel.slaves: if node != parallel.rank: requests.append( MPI.COMM_WORLD.isend(send_instance, dest=node, tag=1)) # save all hyps for next time if cache_hyps: epsilon = 0.01 for instance in qp.instances: hyps = hyp_cache[instance.instance_id] for hyp in instance.hyps: for hyp1 in hyps: if (hyp.mvector - hyp1.mvector).normsquared() <= epsilon and ( hyp.ovector - hyp1.ovector).normsquared() <= epsilon: break else: if log.level >= 2: log.writeln("add hyp to cache: %s" % hyp) hyps.append(hyp) theoracle.update(best_ovector) sent.score_comps = best_ovector if log.level >= 1: gc.collect() log.write("done updating, memory = %s\n" % monitor.memory()) sent.ewords = [sym.tostring(e) for e in best] return sent