def pmap(comm, nodes, inputs, f=None, g=None): """f gets called before sending input out to slave g gets called after receiving output back from slave""" busy = {} idle = set(nodes) next = 0 finished = 0 outputs = [None] * len(inputs) while finished < len(inputs): while next < len(inputs) and len(idle) > 0: x = inputs[next] node = idle.pop() if f: x = f(x, node=node) comm.send(x, dest=node) busy[node] = next next += 1 status = MPI.Status() y = comm.recv(source=MPI.ANY_SOURCE, status=status) node = status.source if node not in busy: log.writeln("warning: received output from idle node %s" % node) if g: y = g(y, node=node) outputs[busy[node]] = y del busy[node] idle.add(node) finished += 1 return outputs
def _optimize_pair(self, hyp2, hyp1): """Try to improve the objective function by finding some pair of hypotheses and moving weight (alpha) from one to the other, keeping the total weight constant.""" # this is the direction of the update to self.mweights update = hyp2.mvector-hyp1.mvector if verbosity >= 5: log.writeln("update direction: %s" % update) # now we figure out what step size is needed to minimize the combined violation denom = update.dot(self.qp.learning_rate*update) if denom > 0.: delta = (hyp2.violation-hyp1.violation)/denom # but don't let any alpha go negative assert -hyp2.alpha <= hyp1.alpha clipped_delta = max(-hyp2.alpha, min(hyp1.alpha, delta)) if verbosity >= 5: log.writeln("delta: %s->%s" % (delta, clipped_delta)) delta = clipped_delta else: log.writeln("warning: update direction would be useless, not updating") return False hyp2.alpha += delta hyp1.alpha -= delta self.qp.mweights += delta*self.qp.learning_rate*update if verbosity >= 5: if len(watch_features) > 0: log.writeln("new weights: %s" % (self.qp.mweights * watch_features)) log.writeln("objective function: %s" % self.qp.objective()) return True
def objective(self): dmweights = self.delta_mweights() obj = 0.5 * dmweights.dot(dmweights/self.learning_rate) for instance in self.instances: if verbosity >= 3: log.writeln("instance %s:" % instance.instance_id) obj += max(instance.violation(hyp) for hyp in instance.hyps) if verbosity >= 3: for hyp in instance.hyps: log.writeln(" hyp %s: alpha=%s violation=%s" % (hyp.hyp_id, hyp.alpha, instance.violation(hyp))) return obj
def parse_chat(): log_command() tokens = re.split(' ', q3a.Cmd_Argv(1)) for t in tokens: log.writeln("script", t) if tokens[1] == "^2hello" or tokens[1] == "^2hi" or tokens[1] == "^2privet": q3a.Cbuf_ExecuteText(1, "say HELLO") q3a.Cbuf_ExecuteText( 1, "play sound/player/bones/taunt.wav" ) # can play sound/feedback/intro_01.wav and time s_volume 0 after 'welcome' if tokens[1] == "^2?info": q3a.Cbuf_ExecuteText(1, "say no info")
def instruct(self, input): pg = "pop-grammar;" if input.instruction.endswith(pg): spre, spost = input.instruction[:-len(pg)].rstrip(), pg else: spre, spost = s, '' self.send_instruction(spre, input) log.writeln("reading forest from decoder\n") r = self.child.recvline() self.send_instruction(spost, input) return r
def _select_pair(self, epsilon=0.01): """Find a pair of hypotheses that violates one of the KKT conditions: alpha_i = 0 => violation_i is not the max alpha_i > 0 => violation_i is the max From Ben Taskar's PhD thesis, p. 80.""" # shuffle hyps? for hyp in self.hyps: if verbosity >= 5: log.writeln("hyp %s" % hyp.hyp_id) violation_max = max(hyp1.violation for hyp1 in self.hyps if hyp1 is not hyp) if verbosity >= 5: log.writeln(" max violation of other hyps: %s" % violation_max) if hyp.alpha == 0 and hyp.violation > violation_max + epsilon: # hyp is the worst violator but has no weight yet, # so find someone to take weight from for hyp1 in self.hyps: if hyp1 is not hyp and hyp1.alpha > 0: if verbosity >= 5: log.writeln("hyp %s alpha = %s -> fear hyp %s alpha %s" % (hyp1.hyp_id, hyp1.alpha, hyp.hyp_id, hyp.alpha)) return hyp, hyp1 if hyp.alpha > 0 and hyp.violation < violation_max - epsilon: # hyp has weight but is not the worst violator, # so find a worse violator to give weight to for hyp1 in self.hyps: if hyp1 is not hyp and hyp1.violation > hyp.violation + epsilon: if verbosity >= 5: log.writeln("hyp %s alpha = %s -> worse violator hyp %s alpha %s" % (hyp.hyp_id, hyp.alpha, hyp1.hyp_id, hyp1.alpha)) return hyp1, hyp
def g(sent, node): log.writeln("node %s -> sentence %s" % (node, sent.id)) # Flush out filled requests filled = [i for i in xrange(len(requests)) if requests[i].Test()] # test() seems buggy, why? for i in reversed(filled): requests[i:i+1] = [] # Send update to other slaves for othernode in slaves: if othernode == node: continue log.writeln("update for sentence %s -> node %s" % (sent.id, othernode)) requests.append(comm.isend(('update', sent), dest=othernode, tag=1)) return sent
def topological_sort(self): """replace the set self.nonterminals with a list such that if there is a rule X -> Y, then Y precedes X in the list""" if log.level >= 3: log.write("Doing topological sort on nonterminals\n") if len(self.nonterminals) < 1000: log.write(" unsorted: %s\n" % " ".join(x.cat for x in self.nonterminals)) nonterminals = [] # make unary_children into graph for (x, s) in self.unary_children.items(): for y in s: self.unary_children.setdefault(y, set()) if log.level >= 3: log.writeln(" unary children:") for (x, s) in self.unary_children.items(): log.write(" %s -> %s\n" % (x.cat, " | ".join(y.cat for y in s))) for x in self.nonterminals: if x not in self.unary_children: nonterminals.append(x) while len(self.unary_children) > 0: childless = None for (x, s) in self.unary_children.iteritems(): if len(s) == 0: childless = x break if childless is None: childless = self.unary_children.keys()[0] # arbitrary sys.stderr.write( " warning: cycle detected, breaking all unary children of %s\n" % childless) del self.unary_children[childless] for (x, s) in self.unary_children.iteritems(): s.discard(childless) nonterminals.append(childless) self.nonterminals = nonterminals if len(self.nonterminals) < 1000 and log.level >= 3: log.write(" sorted: %s\n" % " ".join(x.cat for x in self.nonterminals)) self.make_index() self.unary_children = None
def _new_fear(self, epsilon=0.01): """Possibly add a new fear hypothesis to the QP.""" try: fear = self.get_fear() except NotImplementedError: return False violation = self.violation(fear) for hyp in self.hyps: if violation <= self.violation(hyp) + epsilon: return False self.add_hyp(fear) if verbosity >= 2: log.writeln("instance %s new hyp %s: %s violation=%s" % (self.instance_id, fear.hyp_id, fear, violation)) return True
def update_server_timeout(sv, timeout): #log.writeln("script", f'updating server {sv.address}') conn = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) conn.settimeout(timeout) msg = b'\xFF\xFF\xFF\xFFgetstatus xxx' start = time.time() conn.sendto(msg, sv.address) try: data, server = conn.recvfrom(1024) end = time.time() elapsed = end - start parse_server_status_response(sv, data.decode('utf-8', 'ignore')) except socket.timeout: log.writeln("script", "timeout")
def compute_alpha(weights, update, loss, minalpha, maxalpha): """MIRA formula for update size""" sumsq = normsquared(update) margin = -weights.dot(update) if log.level >= 4: log.writeln("delta = (%s-%s)/%s" % (loss, margin, sumsq)) if sumsq > 0.0: alpha = (loss - margin) / sumsq alpha = max(minalpha, min(maxalpha, alpha)) elif loss - margin > 0.0: alpha = maxalpha elif loss - margin < 0.0: alpha = minalpha else: log.write("compute_alpha: 0/0, this shouldn't happen\n") alpha = 0.0 return alpha
def compute_alpha(weights, update, loss, minalpha, maxalpha): """MIRA formula for update size""" sumsq = normsquared(update) margin = -weights.dot(update) if log.level >= 4: log.writeln("delta = (%s-%s)/%s" % (loss, margin, sumsq)) if sumsq > 0.: alpha = (loss - margin) / sumsq alpha = max(minalpha, min(maxalpha, alpha)) elif loss - margin > 0.: alpha = maxalpha elif loss - margin < 0.: alpha = minalpha else: log.write("compute_alpha: 0/0, this shouldn't happen\n") alpha = 0. return alpha
def topological_sort(self): """replace the set self.nonterminals with a list such that if there is a rule X -> Y, then Y precedes X in the list""" if log.level >= 3: log.write("Doing topological sort on nonterminals\n") if len(self.nonterminals) < 1000: log.write(" unsorted: %s\n" % " ".join(x.cat for x in self.nonterminals)) nonterminals = [] # make unary_children into graph for (x, s) in self.unary_children.items(): for y in s: self.unary_children.setdefault(y, set()) if log.level >= 3: log.writeln(" unary children:") for (x, s) in self.unary_children.items(): log.write(" %s -> %s\n" % (x.cat, " | ".join(y.cat for y in s))) for x in self.nonterminals: if x not in self.unary_children: nonterminals.append(x) while len(self.unary_children) > 0: childless = None for (x, s) in self.unary_children.iteritems(): if len(s) == 0: childless = x break if childless is None: childless = self.unary_children.keys()[0] # arbitrary sys.stderr.write(" warning: cycle detected, breaking all unary children of %s\n" % childless) del self.unary_children[childless] for (x, s) in self.unary_children.iteritems(): s.discard(childless) nonterminals.append(childless) self.nonterminals = nonterminals if len(self.nonterminals) < 1000 and log.level >= 3: log.write(" sorted: %s\n" % " ".join(x.cat for x in self.nonterminals)) self.make_index() self.unary_children = None
def process(sent): # Add an flen attribute that gives the length of the input sentence. # In the lattice-decoding case, we have to make a guess. distance = sent.compute_distance() sent.flen = distance.get((0, sent.n - 1), None) # could be missing if n == 0 theoracle.input(sent) global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.write("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info()))) decoder_errors += 1 if decoder_errors >= 3: log.write( "decoder failed too many times, passing exception through!\n") raise else: return # Augment forest with oracle features # this is overkill if we aren't going to search for hope/fear goal.rescore(theoracle.models, thedecoder.weights, add=True) best_vector, best = decoder.get_nbest(goal, 1)[0] best_mvector = theoracle.clean(best_vector) best_ovector = theoracle.finish(best_vector, best) best_loss = theoracle.make_weights(additive="sentence").dot(best_ovector) log.writeln("best hyp: %s %s cost=%s loss=%s" % (" ".join(sym.tostring(e) for e in best), best_vector, thedecoder.weights.dot(best_mvector), best_loss)) sent.score_comps = best_ovector sent.ewords = [sym.tostring(e) for e in best] return goal
def process_heldout(sent): theoracle.input(sent) log.write("done preparing\n") global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.writeln( "decoder raised exception: %s %s" % (sent, "".join(traceback.format_exception(*sys.exc_info()))) ) decoder_errors += 1 if decoder_errors >= 100: log.write("decoder failed too many times, passing exception through!\n") raise else: return goal.rescore(theoracle.models, thedecoder.weights, add=True) bestv, best = decoder.get_nbest(goal, 1)[0] log.write("done decoding\n") bestscore = get_score(bestv, best) log.write( "best hyp: %s %s cost=%s score=%s\n" % (" ".join(sym.tostring(e) for e in best), bestv, thedecoder.weights.dot(bestv), bestscore) ) bestv = theoracle.finish(bestv, best) sent.score_comps = bestv sent.ewords = [sym.tostring(e) for e in best] return sent
def process_heldout(sent): theoracle.input(sent) log.write("done preparing\n") global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.writeln( "decoder raised exception: %s %s" % (sent, "".join(traceback.format_exception(*sys.exc_info())))) decoder_errors += 1 if decoder_errors >= 100: log.write( "decoder failed too many times, passing exception through!\n" ) raise else: return goal.rescore(theoracle.models, thedecoder.weights, add=True) bestv, best = decoder.get_nbest(goal, 1)[0] log.write("done decoding\n") bestscore = get_score(bestv, best) log.write("best hyp: %s %s cost=%s score=%s\n" % (" ".join( sym.tostring(e) for e in best), bestv, thedecoder.weights.dot(bestv), bestscore)) bestv = theoracle.finish(bestv, best) sent.score_comps = bestv sent.ewords = [sym.tostring(e) for e in best] return sent
def process(sent): oraclemodel.input(sent) log.write("done preparing\n") try: goal = thedecoder.translate(sent) except Exception: import traceback log.writeln("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info()))) global decoder_errors decoder_errors += 1 if decoder_errors >= 5: raise else: return bestv, best =decoder.get_nbest(goal, 1)[0] log.write("done decoding\n") # Collect hypotheses that will be used for learning sent.hyps = get_hyps(sent, goal, thedecoder.weights) log.write("done rescoring\n") return sent
def process(sent): # Add an flen attribute that gives the length of the input sentence. # In the lattice-decoding case, we have to make a guess. distance = sent.compute_distance() sent.flen = distance.get((0,sent.n-1), None) # could be missing if n == 0 theoracle.input(sent) global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.write("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info()))) decoder_errors += 1 if decoder_errors >= 3: log.write("decoder failed too many times, passing exception through!\n") raise else: return # Augment forest with oracle features # this is overkill if we aren't going to search for hope/fear goal.rescore(theoracle.models, thedecoder.weights, add=True) best_vector, best = decoder.get_nbest(goal, 1)[0] best_mvector = theoracle.clean(best_vector) best_ovector = theoracle.finish(best_vector, best) best_loss = theoracle.make_weights(additive="sentence").dot(best_ovector) log.writeln("best hyp: %s %s cost=%s loss=%s" % (" ".join(sym.tostring(e) for e in best), best_vector, thedecoder.weights.dot(best_mvector), best_loss)) sent.score_comps = best_ovector sent.ewords = [sym.tostring(e) for e in best] return goal
def start_decoder(self): if self.child: log.writeln("stopping decoder") try: if not self.child.returncode: self.child.stdin.close() self.child.stdout.close() # "Warning: This will deadlock when using stdout=PIPE and/or stderr=PIPE and the child process generates enough output to a pipe such that it blocks waiting for the OS pipe buffer to accept more data." # but closing stdout means that it should get SIGPIPE if process has buffered output or writes more. so OK. self.child.wait() else: os.kill(self.child.pid, signal.SIGKILL) self.child = None except: pass self.child = Popen([opts.decoder], cwd=opts.gars_dir, stdin=subprocess.PIPE, stdout=subprocess.PIPE) log.writeln("started decoder subprocess=%s at %s" % (self.child.pid, log.datetoday())) self.decoder_age = 0 self.oldweights = svector.Vector()
def start_decoder(self): if self.child: log.writeln("stopping decoder") try: if not self.child.returncode: self.child.stdin.close() self.child.stdout.close() # "Warning: This will deadlock when using stdout=PIPE and/or stderr=PIPE and the child process generates enough output to a pipe such that it blocks waiting for the OS pipe buffer to accept more data." # but closing stdout means that it should get SIGPIPE if process has buffered output or writes more. so OK. self.child.wait() else: os.kill(self.child.pid, signal.SIGKILL) self.child=None except: pass self.child = Popen( [opts.decoder], cwd=opts.gars_dir, stdin=subprocess.PIPE, stdout=subprocess.PIPE) log.writeln("started decoder subprocess=%s at %s" % (self.child.pid,log.datetoday())) self.decoder_age = 0 self.oldweights = svector.Vector()
def process(sent): oraclemodel.input(sent) log.write("done preparing\n") try: goal = thedecoder.translate(sent) except Exception: import traceback log.writeln("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info()))) global decoder_errors decoder_errors += 1 if decoder_errors >= 5: raise else: return bestv, best = decoder.get_nbest(goal, 1)[0] log.write("done decoding\n") # Collect hypotheses that will be used for learning sent.hyps = get_hyps(sent, goal, thedecoder.weights) log.write("done rescoring\n") return sent
def pmap_slave(f, verbose=False, tag=0, hook=None): while True: if verbose: log.writeln("ready to receive data") msg = world.recv(source=master, tag=tag) if isinstance(msg, Die): if verbose: log.writeln("received from master: Die") break if verbose: log.writeln("received from master: %s" % (msg, )) msg = f(msg) if verbose: log.writeln("sending output to master") world.send(msg, dest=master, tag=tag) if hook: hook() if verbose: log.writeln("finished") return ()
def pmap_slave(f, verbose=False, tag=0, hook=None): while True: if verbose: log.writeln("ready to receive data") msg = world.recv(source=master, tag=tag) if isinstance(msg, Die): if verbose: log.writeln("received from master: Die") break if verbose: log.writeln("received from master: %s" % (msg,)) msg = f(msg) if verbose: log.writeln("sending output to master") world.send(msg, dest=master, tag=tag) if hook: hook() if verbose: log.writeln("finished") return ()
def process(sent): goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) if goal is None: log.writeln("warning: parse failure") return None if opts.forest_dir: forest_file = gzip.open(os.path.join(opts.forest_dir, "forest.%s.gz" % sent.id), "w") forest_file.write( forest.forest_to_json( goal, fwords=sent.words, mode="english", models=thedecoder.models, weights=thedecoder.weights ) ) forest_file.close() if opts.rule_posterior_dir: rule_posterior_file = open(os.path.join(opts.rule_posterior_dir, "rule_posterior.%s" % sent.id), "w") beta = 1.0 insides = goal.compute_inside(thedecoder.weights, beta=beta) outsides = goal.compute_outside(thedecoder.weights, insides, beta=beta) z = insides[id(goal)] for item in goal.bottomup(): for ded in item.deds: c = outsides[id(item)] c += thedecoder.weights.dot(ded.dcost) c += sum(insides[id(ant)] for ant in ded.ants) c -= z rule_posterior_file.write( "%s ||| span=%s posterior=%s\n" % (ded.rule, (item.i, item.j), cost.prob(c)) ) ded.dcost["posterior"] = c rule_posterior_file.close() max_posterior_file = open(os.path.join(opts.rule_posterior_dir, "max_posterior.%s" % sent.id), "w") goal.reweight(svector.Vector("posterior=1")) max_posterior = goal.viterbi_deriv() def show(ded, antvalues): if ded.rule: value = rule.subst(ded.rule.erhs, antvalues) else: value = antvalues[0] return ("[%.3f" % cost.prob(ded.dcost["posterior"]),) + value + ("]",) value = max_posterior.value(show) s = " ".join(value) max_posterior_file.write("%s\n" % s) max_posterior_file.close() outputs = get_nbest(goal, n_best, ambiguity_limit) if n_best_file: for (v, e) in outputs: e = " ".join(e) # n_best_file.write("%s ||| %s ||| %s\n" % (sent.id, e, -thedecoder.weights.dot(v))) n_best_file.write("%s ||| %s ||| %s\n" % (sent.id, e, v)) n_best_file.flush() (bestv, best) = outputs[0] if french_parse_file: french_parse_file.write("%s ||| %s\n" % (sent.id, goal.viterbi_deriv().french_tree())) french_parse_file.flush() if english_parse_file: english_parse_file.write("%s ||| %s\n" % (sent.id, goal.viterbi_deriv().english_tree())) english_parse_file.flush() if log.level >= 1: gc.collect() log.write(" done decoding, memory=%s\n" % monitor.memory()) log.write(" features: %s; %s\n" % (bestv, thedecoder.weights.dot(bestv))) sent.ewords = best return sent
def _optimize_workingset(self, max_iterations=1000): """Optimize mweights for a fixed working set of hyps, using oweights to compute loss function""" iterations = 0 again = True while again: if verbosity >= 5: log.writeln("SMO iteration %d" % iterations) again = False # shuffle instances? for instance in self.instances: if len(instance.hyps) < 2: continue if verbosity >= 5: log.writeln("try to improve instance %s:" % (instance.instance_id)) # cache violation inside hyps, needed by both _select_pair and _optimize_pair for hyp in instance.hyps: hyp.violation = instance.violation(hyp) hyps = instance._select_pair() if hyps is None: if verbosity >= 5: log.writeln("all KKT conditions (almost) satisfied, do nothing") continue if instance._optimize_pair(*hyps): again = True iterations += 1 if iterations >= max_iterations: if verbosity >= 1: log.writeln("SMO: giving up") break if verbosity >= 1: log.writeln("SMO: finished in %d iterations" % iterations) if verbosity >= 4: if len(watch_features) > 0: log.writeln("new weights: %s" % (self.mweights * watch_features)) log.writeln("objective function: %s" % self.objective())
def g(sent, node): log.writeln("node %s -> sentence %s" % (node, sent.id)) return sent
log.write("best hyp: %s %s cost=%s score=%s\n" % (" ".join(sym.tostring(e) for e in best), bestv, thedecoder.weights.dot(bestv), bestscore)) sent.score_comps = bestg sent.ewords = [sym.tostring(e) for e in best] return sent epoch = 1 if loop_forever: iterations = itertools.count() else: iterations = xrange(1) for iteration in iterations: log.writeln("epoch %d" % iteration) # Process training data if shuffle_sentences and (not opts.parallel or parallel.rank == parallel.master): random.shuffle(trainsents) if opts.parallel: outsents = parallel.pmap(lambda (si, sent): (si, process(sent)), trainsents, tag=0, verbose=1) if parallel.rank == parallel.master: outsents = list(outsents) else: outsents = [(si, process(sent)) for (si,sent) in trainsents]
comm = MPI.Comm.Get_parent() log.prefix = '[%s] ' % (comm.Get_rank(), ) instances = [] while True: msg = comm.recv() if msg[0] == 'train': sent = msg[1] goal = process(sent) instances.append(ForestInstance(sent.id, goal)) while comm.Iprobe(tag=1): msg = comm.recv(tag=1) if msg[0] == 'update': log.writeln("receive update for sentence %s" % msg[1].id) instances.append(msg[1].instance) instance = thelearner.train(sent, instances) sent.instance = instance # it would be nicer if sent and instance were the same object comm.send(sent, dest=0) instances = [] elif msg[0] == 'translate': sent = msg[1] process(sent) comm.send(sent, dest=0) elif msg[0] == 'gather-weights': # Average weights (Daume trick) sum_weights = float(
def process(sent): # Need to add an flen attribute that gives the length of the input sentence. # In the lattice-decoding case, we have to make a guess. distance = sent.compute_distance() sent.flen = distance.get((0, sent.n - 1), None) # could be missing if n == 0 theoracle.input(sent) global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.write("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info()))) decoder_errors += 1 if decoder_errors >= 3: log.write( "decoder failed too many times, passing exception through!\n" ) raise else: return goal.rescore(theoracle.models, thedecoder.weights, add=True) best_vector, best = decoder.get_nbest(goal, 1)[0] best_mvector = theoracle.clean(best_vector) best_ovector = theoracle.finish(best_vector, best) best_loss = theoracle.make_weights( additive="sentence").dot(best_ovector) log.write("best hyp: %s %s cost=%s loss=%s\n" % (" ".join(sym.tostring(e) for e in best), best_vector, thedecoder.weights.dot(best_mvector), best_loss)) # Set up quadratic program qp = maxmargin.QuadraticProgram() cur_instance = ForestInstance(sent.id, goal) qp.add_instance(cur_instance) if opts.parallel: while MPI.COMM_WORLD.Iprobe(tag=1, source=MPI.ANY_SOURCE): log.writeln("received update...\n") recv_instance = MPI.COMM_WORLD.recv(tag=1, source=MPI.ANY_SOURCE) log.writeln("received update for %s" % (recv_instance.instance_id, )) # need to check for duplicate instances? qp.add_instance(recv_instance) # Add cached hyps if cache_hyps: for instance in qp.instances: hyps = hyp_cache[instance.instance_id] if len(hyps) > 0: log.writeln("retrieved %d cached hyps for %s" % (len(hyps), instance.instance_id)) for hyp in hyps: instance.add_hyp(hyp) # Make oracle weight vector oweights = theoracle.make_weights(additive="sentence") oweights *= -1 # Make vector of learning rates # We have to be careful to assign a learning rate to every possible feature # This is not very efficient feats = set() for item in goal.bottomup(): for ded in item.deds: feats.update(ded.dcost) for instance in qp.instances: for hyp in instance.hyps: feats.update(hyp.mvector) learning_rates = svector.Vector() for feat in feats: learning_rates[feat] = compute_feature_learning_rate(feat) if log.level >= 3: log.writeln("learning rate vector: %s" % learning_rates) qp.optimize(thedecoder.weights, oweights, learning_rate=learning_rates) thedecoder.weights.compact() log.write("feature weights: %s\n" % (thedecoder.weights * watch_features)) # update weight sum for averaging global nweights, sumweights_helper # sumweights_helper = \sum_{i=0}^n (i \Delta w_i) sumweights_helper += nweights * qp.delta_mweights() nweights += 1 # update feature scales if update_feature_scales: global sum_updates2, n_updates for instance in qp.instances: """u = svector.Vector(instance.hope.mvector) for hyp in instance.hyps: u -= hyp.alpha*hyp.mvector sum_updates2 += u*u""" for hyp in instance.hyps: if hyp is not instance.hope: # hyp = instance.hope is a non-update u = instance.hope.mvector - hyp.mvector sum_updates2 += hyp.alpha * (u * u) n_updates += hyp.alpha #log.write("sum of squared updates: %s\n" % (" ".join("%s=%s" % (f,sum_updates2[f]) for f in watch_features))) log.write("feature learning rates: %s\n" % (" ".join("%s=%s" % (f, compute_feature_learning_rate(f)) for f in watch_features))) if opts.parallel: # flush out filled requests global requests requests = [request for request in requests if not request.Test()] # transmit updates to other nodes # make a plain Instance (without forest) # we used to designate a hope translation, #send_instance = maxmargin.Instance(cur_instance.hyps, hope=cur_instance.hope, instance_id=cur_instance.sentid) # but now are letting the other node choose. send_instance = maxmargin.Instance(cur_instance.hyps, instance_id=cur_instance.sentid) for node in parallel.slaves: if node != parallel.rank: requests.append( MPI.COMM_WORLD.isend(send_instance, dest=node, tag=1)) # save all hyps for next time if cache_hyps: epsilon = 0.01 for instance in qp.instances: hyps = hyp_cache[instance.instance_id] for hyp in instance.hyps: for hyp1 in hyps: if (hyp.mvector - hyp1.mvector).normsquared() <= epsilon and ( hyp.ovector - hyp1.ovector).normsquared() <= epsilon: break else: if log.level >= 2: log.writeln("add hyp to cache: %s" % hyp) hyps.append(hyp) theoracle.update(best_ovector) sent.score_comps = best_ovector if log.level >= 1: gc.collect() log.write("done updating, memory = %s\n" % monitor.memory()) sent.ewords = [sym.tostring(e) for e in best] return sent
def send_instruction(self, s, input=''): s = s.rstrip() if len(s): log.writeln("sending instruction: %s for %s" % (s.rstrip(), input)) self.child.send(s+"\n")
def pmap_master(input, verbose=False, tag=0, shuffle=False, hook=None): if verbose: log.writeln("beginning") input = enumerate(input) if shuffle: input = list(input) random.shuffle(input) input = iter(input) output = [] flushed = 0 idle = set(slaves) busy = {} while len(idle) + len(busy) > 0: while len(idle) > 0: node = idle.pop() try: (i, line) = input.next() except StopIteration: if verbose: log.writeln("send to node %s: Die" % node) world.send(Die(), dest=node, tag=tag) else: if verbose: log.writeln("send to node %s: %s" % (node, line)) world.send(line, dest=node, tag=tag) if verbose: log.writeln("add node %s to busy list" % (node, )) busy[node] = i if len(busy) > 0: status = MPI.Status() line = world.recv(source=MPI.ANY_SOURCE, tag=tag, status=status) node = status.source if verbose: log.writeln("received from %s: %s" % (node, line)) i = busy[node] del busy[node] heapq.heappush(output, (i, line)) while len(output) > 0 and output[0][0] == flushed: (i, line) = heapq.heappop(output) yield line flushed += 1 if hook: hook(node) if verbose: log.writeln("adding %s to idle list" % (node, )) idle.add(node) if verbose: log.writeln("finished")
def pmap(f, input, verbose=False, tag=0, shuffle=False, master_hook=None, slave_hook=None): if rank == master: return pmap_master(input, verbose, tag, shuffle=shuffle, hook=master_hook) else: return pmap_slave(f, verbose, tag, hook=slave_hook) world = MPI.COMM_WORLD rank = world.Get_rank() size = world.Get_size() master = 0 slaves = set(r for r in xrange(size) if r != master) log.prefix = '[%s] ' % rank log.writeln("host %s rank %s\n" % (socket.gethostname(), rank)) if __name__ == "__main__": for line in pmap(lambda x: x, file("/home/nlg-01/chiangd/hiero-mira2/parallel.py"), verbose=True): sys.stdout.write(line)
def translate(self, input): """input: any object that has an attribute 'words' which is a list of numberized French words. and an 'id' attribute. and an 'instruction' attribute output: a forest""" if self.decoder_age >= 100: self.start_decoder() restarts = 0 self.decoder_age += 1 outforest="" while restarts <= 3: try: self.send_weights(input=input) outforest = self.instruct(input) if outforest == "" or not self.create_forest(outforest) or self.child.poll() is not None: continue else: break # graehl->pust: careful - restarts += 1 doesn't happen on continue. infinite loop possible if decoder really outputs no forest (I think you changed it so a dummy forest is output, so this may be what you want? just bad for error reporting if you hang forever) except: lastexcept=log.strexcept(True) log.writeln("CAUGHT exception: %s" % lastexcept) pass restarts += 1 if restarts <= 3: log.writeln("restarting decoder") self.start_decoder() else: self.start_decoder() #raise Exception("too many decoder restarts for %s, giving up - last was: %s"%(input,lastexcept)) #don't raise because of global 100-retries limit in trainer.py log.write("too many decoder restarts, giving up on exception %s:\n%s\nwith weights:\n%s\n" % (lastexcept,repr(input),self.weights)) self.create_forest("(0<noparse:1> )") # self.send_instruction('weights diff "%s";' % weightstring, input) # self.oldweights = svector.Vector(self.weights) # self.send_instruction(input.instruction,input) # outforest = self.child.recvline() # restarts = 0 # while outforest == "" or self.child.poll() is not None: # log.writeln("restarting decoder") # self.start_decoder() # if restarts > 3: # raise Exception("too many decoder restarts, giving up") # self.send_instruction('weights "%s";' % weightstring, input) # self.send_instruction(input.instruction, input) # outforest = self.child.recvline() # restarts += 1 log.writeln("received forest: %s...%s for %s" % (outforest[:80],outforest[-80:], input)) #sys.stderr.write("received forest: %s\n" % (outforest,)) # try: # f = forest.forest_from_text(outforest, delete_words=['@UNKNOWN@']) # except forest.TreeFormatException: # badforestf='%s/badforest.%s'%(badforestdir,input.id) # log.write("dumping bad forest to %s\n" % (badforestf,)) # forestfile = file(badforestf, "w") # forestfile.write(outforest) # forestfile.close() # raise f = self.forest self.forest = None #sys.stderr.write("internal forest: %s\n" % (forest.forest_to_text(f, mode='english'))) for item in f.bottomup(): for ded in item.deds: # replace rule's French side with correct number of French words # we don't even bother to use the right number of variables ded.rule = rule.Rule(ded.rule.lhs, rule.Phrase([sym.fromstring('<foreign-word>')]*int(ded.dcost['foreign-length'])), ded.rule.e) for feature in delete_features: del ded.dcost[feature] f.reweight(self.weights) # because forest_from_text doesn't compute viterbi return f
else: sys.stderr.write("invalid heldout-policy %s\n" % opts.heldout_policy) sys.exit(1) # Prepare output files if opts.outweightfilename: outweightfile = open(opts.outweightfilename, "w") if opts.outscorefilename: outscorefile = open(opts.outscorefilename, "w") theoracle = oracle.Oracle(order=4, variant=opts.bleuvariant, oracledoc_size=10) requests = [] for epoch in itertools.count(start=1): log.writeln("epoch %d" % epoch) # Process training data if opts.shuffle_sentences: random.shuffle(trainsents) def f(sent, node): log.writeln("sentence %s -> node %s" % (sent.id, node)) return ('train', sent) def g(sent, node): log.writeln("node %s -> sentence %s" % (node, sent.id)) # Flush out filled requests filled = [i for i in xrange(len(requests)) if requests[i].Test()] # test() seems buggy, why? for i in reversed(filled):
def f(sent, node): log.writeln("sentence %s -> node %s" % (sent.id, node)) return ('translate', sent)
def process(sent): # Need to add an flen attribute that gives the length of the input sentence. # In the lattice-decoding case, we have to make a guess. distance = sent.compute_distance() sent.flen = distance.get((0,sent.n-1), None) # could be missing if n == 0 theoracle.input(sent) global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.write("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info()))) decoder_errors += 1 if decoder_errors >= 3: log.write("decoder failed too many times, passing exception through!\n") raise else: return goal.rescore(theoracle.models, thedecoder.weights, add=True) best_vector, best = decoder.get_nbest(goal, 1)[0] best_mvector = theoracle.clean(best_vector) best_ovector = theoracle.finish(best_vector, best) best_loss = theoracle.make_weights(additive="sentence").dot(best_ovector) log.write("best hyp: %s %s cost=%s loss=%s\n" % (" ".join(sym.tostring(e) for e in best), best_vector, thedecoder.weights.dot(best_mvector), best_loss)) # Set up quadratic program qp = maxmargin.QuadraticProgram() cur_instance = ForestInstance(sent.id, goal) qp.add_instance(cur_instance) if opts.parallel: while MPI.COMM_WORLD.Iprobe(tag=1, source=MPI.ANY_SOURCE): log.writeln("received update...\n") recv_instance = MPI.COMM_WORLD.recv(tag=1, source=MPI.ANY_SOURCE) log.writeln("received update for %s" % (recv_instance.instance_id,)) # need to check for duplicate instances? qp.add_instance(recv_instance) # Add cached hyps if cache_hyps: for instance in qp.instances: hyps = hyp_cache[instance.instance_id] if len(hyps) > 0: log.writeln("retrieved %d cached hyps for %s" % (len(hyps), instance.instance_id)) for hyp in hyps: instance.add_hyp(hyp) # Make oracle weight vector oweights = theoracle.make_weights(additive="sentence") oweights *= -1 # Make vector of learning rates # We have to be careful to assign a learning rate to every possible feature # This is not very efficient feats = set() for item in goal.bottomup(): for ded in item.deds: feats.update(ded.dcost) for instance in qp.instances: for hyp in instance.hyps: feats.update(hyp.mvector) learning_rates = svector.Vector() for feat in feats: learning_rates[feat] = compute_feature_learning_rate(feat) if log.level >= 3: log.writeln("learning rate vector: %s" % learning_rates) qp.optimize(thedecoder.weights, oweights, learning_rate=learning_rates) thedecoder.weights.compact() log.write("feature weights: %s\n" % (thedecoder.weights * watch_features)) # update weight sum for averaging global nweights, sumweights_helper # sumweights_helper = \sum_{i=0}^n (i \Delta w_i) sumweights_helper += nweights * qp.delta_mweights() nweights += 1 # update feature scales if update_feature_scales: global sum_updates2, n_updates for instance in qp.instances: """u = svector.Vector(instance.hope.mvector) for hyp in instance.hyps: u -= hyp.alpha*hyp.mvector sum_updates2 += u*u""" for hyp in instance.hyps: if hyp is not instance.hope: # hyp = instance.hope is a non-update u = instance.hope.mvector - hyp.mvector sum_updates2 += hyp.alpha*(u*u) n_updates += hyp.alpha #log.write("sum of squared updates: %s\n" % (" ".join("%s=%s" % (f,sum_updates2[f]) for f in watch_features))) log.write("feature learning rates: %s\n" % (" ".join("%s=%s" % (f,compute_feature_learning_rate(f)) for f in watch_features))) if opts.parallel: # flush out filled requests global requests requests = [request for request in requests if not request.Test()] # transmit updates to other nodes # make a plain Instance (without forest) # we used to designate a hope translation, #send_instance = maxmargin.Instance(cur_instance.hyps, hope=cur_instance.hope, instance_id=cur_instance.sentid) # but now are letting the other node choose. send_instance = maxmargin.Instance(cur_instance.hyps, instance_id=cur_instance.sentid) for node in parallel.slaves: if node != parallel.rank: requests.append(MPI.COMM_WORLD.isend(send_instance, dest=node, tag=1)) # save all hyps for next time if cache_hyps: epsilon = 0.01 for instance in qp.instances: hyps = hyp_cache[instance.instance_id] for hyp in instance.hyps: for hyp1 in hyps: if (hyp.mvector-hyp1.mvector).normsquared() <= epsilon and (hyp.ovector-hyp1.ovector).normsquared() <= epsilon: break else: if log.level >= 2: log.writeln("add hyp to cache: %s" % hyp) hyps.append(hyp) theoracle.update(best_ovector) sent.score_comps = best_ovector if log.level >= 1: gc.collect() log.write("done updating, memory = %s\n" % monitor.memory()) sent.ewords = [sym.tostring(e) for e in best] return sent
def _new_hope(self, new_hope): """Switch to a new hope hypothesis.""" old_hope = self.hope self.add_hyp(new_hope) self.hope = new_hope if verbosity >= 2: log.writeln("hope candidates:") for hyp in self.hyps: if hyp is new_hope: code = "+" elif hyp is old_hope: code = "-" else: code = " " log.writeln("%s hyp %s: cost=%s loss=%s alpha=%s" % (code, hyp.hyp_id, self.qp.mweights.dot(hyp.mvector), self.qp.oweights.dot(hyp.ovector), hyp.alpha)) new_hope.alpha += 1. if old_hope: # Preserve identity: # mweights = mweights_0 - \sum_i hope_i + \sum_j alpha_ij hyp_ij old_hope.alpha -= 1. # Adjust alphas to be consistent with current weights if old_hope and new_hope is not old_hope: if verbosity >= 3: log.writeln("changing hope:") log.writeln(" old hope: %s alpha=%s" % (old_hope, old_hope.alpha)) log.writeln(" new hope: %s alpha=%s" % (new_hope, new_hope.alpha)) # Just in case one of the alphas went outside [0,1], immediately # optimize them old_hope.violation = self.violation(old_hope) new_hope.violation = self.violation(new_hope) self._optimize_pair(old_hope, new_hope) if verbosity >= 1: log.writeln("adjust alphas: old hope %s, new hope %s" % (old_hope.alpha, new_hope.alpha)) log.writeln("adjust weights: %s" % (self.qp.mweights * watch_features))
for e in best), bestv, thedecoder.weights.dot(bestv), bestscore)) sent.score_comps = bestg sent.ewords = [sym.tostring(e) for e in best] return sent epoch = 1 if loop_forever: iterations = itertools.count() else: iterations = xrange(1) for iteration in iterations: log.writeln("epoch %d" % iteration) # Process training data if shuffle_sentences and (not opts.parallel or parallel.rank == parallel.master): random.shuffle(trainsents) if opts.parallel: outsents = parallel.pmap(lambda (si, sent): (si, process(sent)), trainsents, tag=0, verbose=1) if parallel.rank == parallel.master: outsents = list(outsents) else:
def optimize(self, mweights, oweights, learning_rate=1.): """Optimize the model weights (mweights), using a set of oracle weights (oweights) to compute the loss function. The dot product of mweights and the model features of a hypothesis is its model cost (lower is better), and the dot product of oweights and the oracle features of a hypothesis is its loss (lower is better). The learning_rate can be an svector.Vector, but it cannot have any zero rates. """ self.mweights = mweights self.oweights = oweights self.learning_rate = learning_rate if verbosity >= 2: log.writeln("begin optimization") # Initialize QP by selecting a hope hypothesis for each instance for instance in self.instances: if verbosity >= 3: log.writeln("instance %s:" % (instance.instance_id,)) instance._new_hope(instance.get_hope()) if verbosity >= 2: for instance in self.instances: log.writeln("instance %s:" % (instance.instance_id,)) for hyp in instance.hyps: log.writeln(" hyp %s: %s" % (hyp.hyp_id, hyp)) if verbosity >= 1: if len(watch_features): log.writeln("initial weights: %s" % (self.mweights * watch_features)) log.writeln("objective function: %s" % self.objective()) again = True while again: again = False for instance in self.instances: if instance._new_fear(): again = True self._optimize_workingset() if verbosity >= 1: if len(watch_features) > 0: log.writeln("final weights: %s" % (self.mweights * watch_features)) log.writeln("objective function: %s" % self.objective()) return mweights
if verbose: log.writeln("received from master: %s" % (msg,)) msg = f(msg) if verbose: log.writeln("sending output to master") world.send(msg, dest=master, tag=tag) if hook: hook() if verbose: log.writeln("finished") return () def pmap(f, input, verbose=False, tag=0, shuffle=False, master_hook=None, slave_hook=None): if rank == master: return pmap_master(input, verbose, tag, shuffle=shuffle, hook=master_hook) else: return pmap_slave(f, verbose, tag, hook=slave_hook) world = MPI.COMM_WORLD rank = world.Get_rank() size = world.Get_size() master = 0 slaves = set(r for r in xrange(size) if r != master) log.prefix = '[%s] ' % rank log.writeln("host %s rank %s\n" % (socket.gethostname(), rank)) if __name__ == "__main__": for line in pmap(lambda x: x, file("/home/nlg-01/chiangd/hiero-mira2/parallel.py"), verbose=True): sys.stdout.write(line)
# Initial feature weights. feature_weights = svector.Vector() if opts.feature_weights: try: feature_weights = sbmt_vector(opts.feature_weights) except: try: feature_weights = sbmt_vector(file(opts.feature_weights).read()) except: raise Exception("couldn't obtain feature weights from %s\n" % opts.feature_weights) if opts.update_feature_scales in [None, "gauss-newton", "arow"]: update_feature_scales = opts.update_feature_scales else: log.writeln("warning: unknown value for --scale-features: %s" % opts.update_feature_scales) # Scaling factors for feature values as seen by the trainer. Think of # this as a learning rate that can be independently adjusted for each # feature. Ideally we want all the scaled feature values to have # similar magnitudes. if opts.feature_scales: log.writeln("warning: --feature-scales is no longer supported") # Features that are constrained to be >= 0. We're not using a very # smart method to enforce this constraint, so too many could paralyze # the trainer. It's probably a good idea to initialize these features # to nonzero values. positive_features = [] if opts.positive_features: log.writeln("warning: --positive-features is no longer supported")
def pmap_master(input, verbose=False, tag=0, shuffle=False, hook=None): if verbose: log.writeln("beginning") input = enumerate(input) if shuffle: input = list(input) random.shuffle(input) input = iter(input) output = [] flushed = 0 idle = set(slaves) busy = {} while len(idle)+len(busy) > 0: while len(idle) > 0: node = idle.pop() try: (i,line) = input.next() except StopIteration: if verbose: log.writeln("send to node %s: Die" % node) world.send(Die(), dest=node, tag=tag) else: if verbose: log.writeln("send to node %s: %s" % (node, line)) world.send(line, dest=node, tag=tag) if verbose: log.writeln("add node %s to busy list" % (node,)) busy[node] = i if len(busy) > 0: status = MPI.Status() line = world.recv(source=MPI.ANY_SOURCE, tag=tag, status=status) node = status.source if verbose: log.writeln("received from %s: %s" % (node, line)) i = busy[node] del busy[node] heapq.heappush(output, (i, line)) while len(output) > 0 and output[0][0] == flushed: (i, line) = heapq.heappop(output) yield line flushed += 1 if hook: hook(node) if verbose: log.writeln("adding %s to idle list" % (node,)) idle.add(node) if verbose: log.writeln("finished")
def process(sent): global alphas if online_learning: updates.clear() alphas.clear() theoracle.input(sent) log.write("done preparing\n") global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.writeln( "decoder raised exception: %s %s" % (sent, "".join(traceback.format_exception(*sys.exc_info()))) ) decoder_errors += 1 if decoder_errors >= 100: log.write("decoder failed too many times, passing exception through!\n") raise else: return goal.rescore(theoracle.models, thedecoder.weights, add=True) bestv, best = decoder.get_nbest(goal, 1)[0] log.write("done decoding\n") bestscore = get_score(bestv, best) log.write( "best hyp: %s %s cost=%s score=%s\n" % (" ".join(sym.tostring(e) for e in best), bestv, thedecoder.weights.dot(bestv), bestscore) ) goldv, gold, goldscore = get_gold(sent, goal, thedecoder.weights) assert ( sent.id not in updates ) # in batch learning, this can happen, and we would have to undo the update associated with this sentence updates[sent.id] = [(svector.Vector(), 0.0)] alphas[sent.id] = [max_learning_rate] if opts.parallel: while True: if mpi.world.iprobe(tag=1): (sentid, vscores) = mpi.world.recv(tag=1) log.write("received update for %s\n" % (sentid,)) if sentid in updates: # see comment above log.write("ignoring update for %s\n" % (sentid,)) continue # drop this update on the floor updates[sentid] = vscores alphas[sentid] = [max_learning_rate] + [0.0] * (len(vscores) - 1) # since the first update is zero, the alphas & updates # are still consistent with weights else: break def oracle(weights): hyps = get_hyps(sent, goal, weights) return [(goldv - hypv, goldscore - hypscore) for (hypv, hyp, hypscore) in hyps] thedecoder.weights, alphas = cutting_plane(thedecoder.weights, updates, alphas, {sent.id: oracle}) remove_zeros(thedecoder.weights) log.write("feature weights: %s\n" % (thedecoder.weights * watch_features)) log.write("weight norm: %s\n" % (math.sqrt(thedecoder.weights.normsquared()))) # update weight sum for averaging global nweights, sumweights_helper # sumweights_helper = \sum_{i=0}^n (i \Delta w_i) for sentid in updates: for (v, score), alpha in itertools.izip(updates[sentid], alphas[sentid]): apply_update(sumweights_helper, nweights * alpha * v) nweights += 1 # update feature scales if update_feature_scales: global sum_updates2, n_updates, feature_scales for sentid in updates: u = svector.Vector() for (v, score), alpha in itertools.izip(updates[sentid], alphas[sentid]): u += alpha / max_learning_rate * v sum_updates2 += u * u n_updates += 1 try: default_feature_scale = 1.0 / compute_variance(0, n_updates) except ZeroDivisionError: default_feature_scale = 0.0 # pseudoinverse feature_scales = collections.defaultdict(lambda: default_feature_scale) for feat in sum_updates2: try: feature_scales[feat] = 1.0 / compute_variance(sum_updates2[feat], n_updates) except ZeroDivisionError: feature_scales[feat] = 0.0 # pseudoinverse log.write( "feature scales: %s\n" % (" ".join("%s=%s" % (f, feature_scales[f]) for f in watch_features if f in feature_scales)) ) if opts.parallel: # flush out filled requests global requests requests = [request for request in requests if not request.test()] # transmit updates to other nodes for node in parallel.slaves: if node != parallel.rank: requests.append(mpi.world.isend(value=(sent.id, updates[sent.id]), dest=node, tag=1)) bestv = theoracle.finish(bestv, best) theoracle.update(bestv) sent.score_comps = bestv if log.level >= 1: gc.collect() log.write("done updating, memory = %s\n" % monitor.memory()) sent.ewords = [sym.tostring(e) for e in best] return sent
def train(self, sent, instances): # Set up quadratic program qp = maxmargin.QuadraticProgram() for instance in instances: qp.add_instance(instance) # Make oracle weight vector oweights = theoracle.make_weights(additive="sentence") oweights *= -1 # Make vector of learning rates # We have to be careful to assign a learning rate to every feature in the forest # This is not very efficient feats = set() for instance in qp.instances: if hasattr(instance, "goal") and instance.goal: for item in instance.goal.bottomup(): for ded in item.deds: feats.update(ded.dcost) for hyp in instance.hyps: feats.update(hyp.mvector) learning_rates = svector.Vector() for feat in feats: learning_rates[feat] = self.compute_feature_learning_rate(feat) if log.level >= 3: log.writeln("learning rate vector: %s" % learning_rates) # Solve the quadratic program qp.optimize(thedecoder.weights, oweights, learning_rate=learning_rates) thedecoder.weights.compact() log.write("feature weights: %s\n" % (thedecoder.weights * watch_features)) # Update weight sum for averaging # sum_weights_helper = \sum_{i=0}^n (i \Delta w_i) self.sum_weights_helper += self.n_weights * qp.delta_mweights() self.n_weights += 1 # Update feature scales if update_feature_scales: for instance in qp.instances: """u = svector.Vector(instance.hope.mvector) for hyp in instance.hyps: u -= hyp.alpha*hyp.mvector self.sum_updates2 += u*u""" for hyp in instance.hyps: if hyp is not instance.hope: # hyp = instance.hope is a non-update u = instance.hope.mvector - hyp.mvector self.sum_updates2 += hyp.alpha * (u * u) self.n_updates += hyp.alpha log.write("feature learning rates: %s\n" % (" ".join("%s=%s" % (f, self.compute_feature_learning_rate(f)) for f in watch_features))) theoracle.update(sent.score_comps) # make a plain Instance (without forest) # we used to designate a hope translation, #send_instance = maxmargin.Instance(cur_instance.hyps, hope=cur_instance.hope, instance_id=cur_instance.sentid) # but now are letting the other node choose. send_instances = [] for instance in instances: if hasattr(instance, "goal") and instance.goal: send_instances.append( maxmargin.Instance(instance.hyps, instance_id=instance.sentid)) assert len(send_instances) == 1 return send_instances[0]
help="fraction of sentences to hold out", type=float, default=None, ) optparser.add_option( "--heldout-sents", dest="heldout_sents", help="number of sentences to hold out", type=int, default=None ) optparser.add_option("--no-shuffle", dest="shuffle_sentences", action="store_false", default=True) try: configfilename = sys.argv[1] except IndexError: sys.stderr.write("usage: train.py config-file source-file reference-files [options...]\n") sys.exit(1) log.writeln("Starting at %s" % log.datetoday()) if log.level >= 1: log.write("Reading configuration from %s\n" % configfilename) execfile(configfilename) opts, args = optparser.parse_args(args=sys.argv[2:]) shuffle_sentences = opts.shuffle_sentences if opts.parallel: import parallel import mpi log.prefix = "[%s] " % parallel.rank if not opts.parallel or parallel.rank == parallel.master:
def process(sent): global alphas if online_learning: updates.clear() alphas.clear() theoracle.input(sent) log.write("done preparing\n") global decoder_errors try: goal = thedecoder.translate(sent) thedecoder.process_output(sent, goal) decoder_errors = 0 if goal is None: raise Exception("parse failure") except Exception: import traceback log.writeln( "decoder raised exception: %s %s" % (sent, "".join(traceback.format_exception(*sys.exc_info())))) decoder_errors += 1 if decoder_errors >= 100: log.write( "decoder failed too many times, passing exception through!\n" ) raise else: return goal.rescore(theoracle.models, thedecoder.weights, add=True) bestv, best = decoder.get_nbest(goal, 1)[0] log.write("done decoding\n") bestscore = get_score(bestv, best) log.write("best hyp: %s %s cost=%s score=%s\n" % (" ".join( sym.tostring(e) for e in best), bestv, thedecoder.weights.dot(bestv), bestscore)) goldv, gold, goldscore = get_gold(sent, goal, thedecoder.weights) assert ( sent.id not in updates ) # in batch learning, this can happen, and we would have to undo the update associated with this sentence updates[sent.id] = [(svector.Vector(), 0.)] alphas[sent.id] = [max_learning_rate] if opts.parallel: while True: if mpi.world.iprobe(tag=1): (sentid, vscores) = mpi.world.recv(tag=1) log.write("received update for %s\n" % (sentid, )) if sentid in updates: # see comment above log.write("ignoring update for %s\n" % (sentid, )) continue # drop this update on the floor updates[sentid] = vscores alphas[sentid] = [max_learning_rate ] + [0.] * (len(vscores) - 1) # since the first update is zero, the alphas & updates # are still consistent with weights else: break def oracle(weights): hyps = get_hyps(sent, goal, weights) return [(goldv - hypv, goldscore - hypscore) for (hypv, hyp, hypscore) in hyps] thedecoder.weights, alphas = cutting_plane(thedecoder.weights, updates, alphas, {sent.id: oracle}) remove_zeros(thedecoder.weights) log.write("feature weights: %s\n" % (thedecoder.weights * watch_features)) log.write("weight norm: %s\n" % (math.sqrt(thedecoder.weights.normsquared()))) # update weight sum for averaging global nweights, sumweights_helper # sumweights_helper = \sum_{i=0}^n (i \Delta w_i) for sentid in updates: for (v, score), alpha in itertools.izip(updates[sentid], alphas[sentid]): apply_update(sumweights_helper, nweights * alpha * v) nweights += 1 # update feature scales if update_feature_scales: global sum_updates2, n_updates, feature_scales for sentid in updates: u = svector.Vector() for (v, score), alpha in itertools.izip(updates[sentid], alphas[sentid]): u += alpha / max_learning_rate * v sum_updates2 += u * u n_updates += 1 try: default_feature_scale = 1. / compute_variance(0, n_updates) except ZeroDivisionError: default_feature_scale = 0. # pseudoinverse feature_scales = collections.defaultdict( lambda: default_feature_scale) for feat in sum_updates2: try: feature_scales[feat] = 1. / compute_variance( sum_updates2[feat], n_updates) except ZeroDivisionError: feature_scales[feat] = 0. # pseudoinverse log.write( "feature scales: %s\n" % (" ".join("%s=%s" % (f, feature_scales[f]) for f in watch_features if f in feature_scales))) if opts.parallel: # flush out filled requests global requests requests = [request for request in requests if not request.test()] # transmit updates to other nodes for node in parallel.slaves: if node != parallel.rank: requests.append( mpi.world.isend(value=(sent.id, updates[sent.id]), dest=node, tag=1)) bestv = theoracle.finish(bestv, best) theoracle.update(bestv) sent.score_comps = bestv if log.level >= 1: gc.collect() log.write("done updating, memory = %s\n" % monitor.memory()) sent.ewords = [sym.tostring(e) for e in best] return sent
type=int, default=None) optparser.add_option("--no-shuffle", dest="shuffle_sentences", action="store_false", default=True) try: configfilename = sys.argv[1] except IndexError: sys.stderr.write( "usage: train.py config-file source-file reference-files [options...]\n" ) sys.exit(1) log.writeln("Starting at %s" % log.datetoday()) if log.level >= 1: log.write("Reading configuration from %s\n" % configfilename) execfile(configfilename) opts, args = optparser.parse_args(args=sys.argv[2:]) shuffle_sentences = opts.shuffle_sentences if opts.parallel: import parallel import mpi log.prefix = "[%s] " % parallel.rank if not opts.parallel or parallel.rank == parallel.master: infile = file(args[0])