def rescore_deduction(self, ded, models, weights, memo, add=False): """Recompute ded.dcost and ded.viterbi according to models and weights.""" vviterbi = svector.Vector() for ant in ded.ants: vviterbi += ant.rescore(models, weights, memo, add=add, check_states=True) if not add: ded.dcost = svector.Vector() states = [] for m_i in xrange(len(models)): antstates = [ant.states[m_i] for ant in ded.ants] if ded.rule is not None: j1 = ded.ants[0].j if len(ded.ants) == 2 else None (state, mdcost) = models[m_i].transition(ded.rule, antstates, self.i, self.j, j1) elif len(antstates) == 1: # goal item mdcost = models[m_i].finaltransition(antstates[0]) state = None states.append(state) ded.dcost += mdcost vviterbi += ded.dcost ded.viterbi = weights.dot(vviterbi) return vviterbi, states
def send_weights(self): #log.write("prev weights: %s\n" % self.prev_weights) #log.write("weights: %s\n" % self.weights) if self.prev_weights is None: weights = self.weights else: weights = self.weights - self.prev_weights weights.compact() core_weights = [0.] * self.n_core_features sparse_weights = svector.Vector() for feature in weights: if not feature.startswith('_core'): sparse_weights[feature] = -weights[feature] else: i = int(feature[5:]) core_weights[i] = -weights[feature] request = {'core-weights' : ','.join(str(x) for x in core_weights), 'sparse-weights': str(sparse_weights)} if self.prev_weights is None: log.write("setWeights(%s)\n" % request) self.server.setWeights(request) else: log.write("addWeights(%s)\n" % request) self.server.addWeights(request) self.prev_weights = svector.Vector(self.weights)
def make_decoder(): thedecoder = Decoder(opts.decoder, opts.n_core_features) if opts.feature_weights: if '=' in opts.feature_weights: thedecoder.weights = -svector.Vector(opts.feature_weights) else: thedecoder.weights = -svector.Vector(open(opts.feature_weights).read()) else: thedecoder.weights = svector.Vector() return thedecoder
def __init__(self, flen=0, elen=0): """ Initialize member objects """ self.links = [] self.score = 0 self.fscore = 0 self.hope = 0 self.fear = 0 # local feature vector self.scoreVector = svector.Vector() self.scoreVector_nonlocal = svector.Vector() self.position = None self.boundingBox = None
def seed(self, flattice, grammars, models, weights): self.models = models self.weights = weights # Seed the dotchart. This will give the extracted rules self.grammars = [(g, DotChart(self, flattice)) for g in grammars if isinstance(g, Grammar)] for (g, dotchart) in self.grammars: for i in xrange(self.flattice.n - 1): if g.filterspan(self.flattice, i, i): dotchart.add(g.root, i, i, ()) self.dot_added += 1 for g in grammars: if isinstance(g, NewGrammar): g.input(flattice) for i in xrange(self.flattice.n - 1): for j in xrange(i + 1, self.flattice.n): for (r, ) in g.get_rules(i, j): estimate_rule(r, models, weights) self.add_axiom(i, j, r) # Last resort for unknown French word: pass it through for edge in flattice.edges: for x in self.default_nonterminals: r = rule.Rule(x, [edge.w], [edge.w], scores=svector.Vector('unknown', 1.)) estimate_rule(r, models, weights) self.add_axiom(edge.i, edge.j, r)
def seed(self, input, grammars, models, weights): fwords = [sym.fromstring(f) for f in input.fwords] self.models = models self.weights = weights # Seed the dotchart. This will give the extracted rules self.grammars = [(g, DotChart(self, fwords)) for g in grammars if isinstance(g, Grammar)] for (g,dotchart) in self.grammars: for i in xrange(self.n): if g.filterspan(i,i,self.n): dotchart.add(g.root,i,i,()) self.dot_added += 1 for g in grammars: if isinstance(g, NewGrammar): g.input(input) for i in xrange(self.n): for j in xrange(i+1,self.n+1): for (r,) in g.get_rules(i,j): estimate_rule(r, models, weights) self.add_axiom(i, j, r) # Last resort for unknown French word: pass it through for i in xrange(0, len(fwords)): for x in self.default_nonterminals: r = rule.Rule(x, rule.Phrase(fwords[i:i+1]), rule.Phrase(fwords[i:i+1]), scores=svector.Vector('unknown', 1.)) estimate_rule(r, models, weights) self.add_axiom(i, i+1, r)
def reweight(self, weights, memo=None): """Recompute self.viterbi according to weights. Returns the Viterbi vector, and (unlike the decoder) only calls weights.dot on vectors of whole subderivations, which is handy for overriding weights.dot.""" if memo is None: memo = {} if id(self) in memo: return memo[id(self)] vviterbi = None for ded in self.deds: ded_vviterbi = svector.Vector() for ant in ded.ants: ded_vviterbi += ant.reweight(weights, memo) ded_vviterbi += ded.dcost ded.viterbi = weights.dot(ded_vviterbi) if vviterbi is None or ded.viterbi < self.viterbi: vviterbi = ded_vviterbi self.viterbi = ded.viterbi memo[id(self)] = vviterbi return vviterbi
def delta_mweights(self): dmweights = svector.Vector() for instance in self.instances: dmweights += self.learning_rate * instance.hope.mvector for hyp in instance.hyps: dmweights -= hyp.alpha * self.learning_rate * hyp.mvector return -dmweights
def __init__(self, i, j, x, f, e, v={}): self.i = i self.j = j self.x = x self.f = f self.e = e self.v = svector.Vector(v)
def clean(self, v): """Return a copy of v that doesn't have any of the features used for the oracle.""" v = svector.Vector(v) for f in self.feats: del v[f] return v
def sbmt_vector(s): v = svector.Vector() if s: for featvalue in s.split(","): feat, value = featvalue.split(":", 1) v[feat] = float(value) return v
def update_weights(weights, updates, alphas): # sequential minimum optimization # minimize 1/2 ||sum(updates)||**2 + C*sum(xis) # one xi for all candidates for each sentence # s.t. each margin >= loss - xi # s.t. each xi >= 0 # these are not sensitive to feature_scales, but maybe they should be # this is not right -- gammas should be preserved across calls if l1_regularization: gammas = svector.Vector() iterations = 0 done = False while not done: if l1_regularization: for f in weights: delta = max( -l1_regularization * max_learning_rate * len(updates) - gammas[f], min( weights[f], l1_regularization * max_learning_rate * len(updates) - gammas[f])) gammas[f] += delta weights[f] += -delta if log.level >= 4: log.write(" gammas: %s\n" % gammas) done = True sentids = updates.keys() #random.shuffle(sentids) for sentid in sentids: vscores = updates[sentid] if len(vscores) < 2: continue if log.level >= 4: log.write(" sentence %s\n" % sentid) try: weights, alphas[sentid] = update_sentence_weights( weights, updates[sentid], alphas[sentid]) done = False except StopOptimization: pass if log.level >= 4: log.write(" alphas: %s\n" % (" ".join(str(alpha) for alpha in alphas[sentid]))) iterations += 1 if iterations > 1000: log.write(" SMO: 1000 passes through data, stopping\n") break #log.write(" intermediate weights: %s\n" % weights) return weights, alphas
def clear(self): self.links = [] self.score = 0 self.fscore = 0 self.hope = 0 self.fear = 0 self.scoreVector = svector.Vector() self.position = None self.boundingBox = None
def estimate(self, r): v = svector.Vector() v["oracle.srclen"] = srclen = len(r.f) - r.f.arity() v["oracle.candlen"] = candlen = len(r.e) - r.e.arity() # pro-rate reference length try: v["oracle.reflen"] = float(srclen) / self.srclen * self.reflen except ZeroDivisionError: v["oracle.reflen"] = self.reflen return v
def estimate_rule(r, models, weights): #, return_vector=False): '''Puts a lower-bound estimate inside the rule, returns the full estimate.''' r.statelesscost = svector.Vector() estcost = svector.Vector() #estcost = 0. for m in models: me = m.estimate(r) if m.stateless: r.statelesscost += me else: estcost += me #estcost += weights.dot(me) estcost += r.statelesscost #estcost += weights.dot(r.statelesscost) #return estcost return weights.dot(estcost)
def input(): for line in sys.stdin: try: key, rule, scores = line.split("\t") except Exception: sys.stderr.write("bad line: %s\n" % line.rstrip()) raise scores = svector.Vector(scores) if feat_prob in scores: raise Exception("feature %s already present" % feat_prob) yield key, rule, scores
def from_str(s): fields = s.split(" ||| ") lhs = Nonterminal.from_str(fields[0].strip()) frhs = [Nonterminal.from_str(f) for f in fields[1].split()] erhs = [Nonterminal.from_str(e) for e in fields[2].split()] r = Rule(lhs, frhs, erhs) if len(fields) >= 4: r.scores = svector.Vector(fields[3]) if len(fields) >= 5: r.attrs = Attributes() r.attrs['align'] = fields[4].strip() return r
def make_forest(fieldss): nodes = {} goal_ids = set() for fields in fieldss: node_id = fields['hyp'] if node_id not in nodes: nodes[node_id] = forest.Item(sym.fromtag('PHRASE'), 0, 0, []) node = nodes[node_id] if node_id == 0: r = rule.Rule(sym.fromtag('PHRASE'), rule.Phrase([]), rule.Phrase([])) node.deds.append(forest.Deduction((), r, svector.Vector())) else: m = scores_re.match(fields['scores']) core_values = [float(x) for x in m.group(1).split(',')] dcost = svector.Vector(m.group(2).encode('utf8')) for i, x in enumerate(core_values): dcost["_core%d" % i] = x back = int(fields['back']) ant = nodes[back] f = fields['src-phrase'].encode('utf8').split() e = fields['tgt-phrase'].encode('utf8').split() if len(f) != int(fields['cover-end']) - int(fields['cover-start']) + 1: sys.stderr.write("warning: French phrase length didn't match covered length\n") f = rule.Phrase([sym.setindex(sym.fromtag('PHRASE'), 1)] + f) e = rule.Phrase([sym.setindex(sym.fromtag('PHRASE'), 1)] + e) r = rule.Rule(sym.fromtag('PHRASE'), f, e) ded = forest.Deduction((ant,), r, dcost) node.deds.append(ded) if int(fields['forward']) < 0: # goal goal_ids.add(node_id) goal = forest.Item(None, 0, 0, []) for node_id in goal_ids: goal.deds.append(forest.Deduction((nodes[node_id],), None, svector.Vector())) return goal
def createEdge(self, childEdges, currentNode, span): """ Create a new edge from the list of edges 'edge'. Creating an edge involves: (1) Initializing the PartialGridAlignment data structure (2) Adding links (f,e) to list newEdge.links (3) setting the score of the edge with scoreEdge(newEdge, ...) In addition, set the score of the new edge. """ newEdge = PartialGridAlignment() newEdge.scoreVector_local = svector.Vector() newEdge.scoreVector = svector.Vector() for e in childEdges: newEdge.links += e.links newEdge.scoreVector_local += e.scoreVector_local newEdge.scoreVector += e.scoreVector if e.boundingBox is None: e.boundingBox = self.boundingBox(e.links) score, boundingBox = self.scoreEdge(newEdge, currentNode, span, childEdges) return newEdge, boundingBox
def estimate(self, r): if len(r.e) - r.e.arity() == 0: return model.zero # a hack to avoid having estimate the glue rule match = svector.Vector() state = r.e.subst((), ((HOLE, ), ) * r.e.arity()) for o in xrange(1, self.order + 1): m = 0 for i in xrange(len(state) - o + 1): if (tuple(state[i:i + o]) in self.refngrams): m += 1 match[self.feat[o - 1]] = m return match
def compute_item(self, r, ants, i, j): """Computes various pieces of information that go into an Item: heuristic (float), for comparing Items cost (float), of the resulting Item dcost (Vector), to be stored in Deduction states The reason this isn't just part of Item.__init__() is that we want to be able to abort creation of an Item object as early as possible. It didn't really need to be a method of Chart. """ ms = self.models w = self.weights cost = sum(ant.viterbi for ant in ants) dcost = svector.Vector(r.statelesscost) bonus = svector.Vector() newstates = [None]*len(ms) if r.arity() == 2: j1 = ants[0].j else: j1 = None for m_i in xrange(len(ms)): m = ms[m_i] if not m.stateless: antstates = [ant.states[m_i] for ant in ants] (state, mdcost) = m.transition(r, antstates, i, j, j1) bonus += m.bonus(r.lhs, state) newstates[m_i] = state dcost += mdcost cost += w.dot(dcost) return (cost+w.dot(bonus), (cost, dcost, newstates))
def __init__(self, x, order=4, add=None, scale=True): svector.Vector.__init__(self, x) self.order = order if add is not None: self.add = add else: self.add = svector.Vector() # add zero self.matchfeat = ["oracle.match%d" % o for o in xrange(order)] self.guessfeat = ["oracle.guess%d" % o for o in xrange(order)] self.addmatch = [self.add["oracle.match%d" % o] for o in xrange(order)] self.addguess = [self.add["oracle.guess%d" % o] for o in xrange(order)] self.addcandlen = self.add["oracle.candlen"] self.addreflen = self.add["oracle.reflen"] self.addsrclen = self.add["oracle.srclen"] self.scale = scale
def __init__(self, filename, feat, mapdigits=False, p_unk=None): model.Model.__init__(self) log.write("Reading language model from %s...\n" % filename) if p_unk is not None: self.ngram = Ngram(filename, override_unk=-p_unk) else: self.ngram = Ngram(filename) self.order = self.ngram.order self.mapdigits = mapdigits self.unit = svector.Vector(feat, 1.) self.START = self.ngram.lookup_word("<s>") self.STOP = self.ngram.lookup_word("</s>")
def scoreEdge(self, edge, currentNode, srcSpan, childEdges): """ Score an edge. (1) edge: new hyperedge in the alignment forest, tail of this hyperedge are the edges in childEdges (2) currentNode: the currentNode in the tree (3) srcSpan: span (i, j) of currentNode; i = index of first terminal node in span, j = index of last terminal node in span (4) childEdges: the two (or more in case of general trees) nodes we are combining with a new hyperedge """ # print(srcSpan) if self.COMPUTE_ORACLE: edge.fscore = self.ff_fscore(edge, srcSpan) boundingBox = None if self.DO_RESCORE: ################################################################## # Compute data needed for certain feature functions ################################################################## tgtSpan = None if len(edge.links) > 0: boundingBox = self.boundingBox(edge.links) tgtSpan = (boundingBox[0][0], boundingBox[1][0]) edge.boundingBox = boundingBox # TODO: This is an awful O(l) patch of code linkedIndices = defaultdict(list) for link in edge.links: fIndex = link[0] eIndex = link[1] linkedIndices[fIndex].append(eIndex) scoreVector = svector.Vector(edge.scoreVector) if currentNode.data is not None and currentNode.data is not '_XXX_': for _, func in enumerate(self.featureTemplates_nonlocal): value_dict = func(self.info, currentNode, edge, edge.links, srcSpan, tgtSpan, linkedIndices, childEdges, self.diagValues, self.treeDistValues) for name, value in value_dict.iteritems(): if value != 0: scoreVector[name] = value edge.scoreVector = scoreVector ################################################## # Compute final score for this partial alignment ################################################## edge.score = edge.scoreVector.dot(self.weights) return edge.score, boundingBox
def expand_goal(self, bin1): for (cost1, item1) in bin1: if item1.x == self.start_nonterminal: if log.level >= 3: log.write("Considering: %s\n" % str(item1)) dcost = sum((m.finaltransition(item1.states[m_i]) for (m_i, m) in enumerate(self.models)), svector.Vector()) cost = item1.viterbi + self.weights.dot(dcost) ded = forest.Deduction((item1, ), None, dcost, viterbi=cost) self.goal.add( cost, forest.Item(None, 0, self.flattice.n - 1, deds=[ded], states=(), viterbi=cost))
def send_weights(self, delta=None, input=''): if delta is None: delta = default_delta if len(self.oldweights) == 0: delta = False # looking to avoid any weird bug from decoder's default weight vector (note: excluding 0 items is risky too) w = self.weights fmt = "%s:%+g" if delta else "%s:%g" cmd = "weights" keep = lambda x: True if delta: keep = lambda x: abs(x) != 0. cmd += " diff" w = w - self.oldweights weightstr = ",".join(fmt % (cstr_escape_nows(k), v) for (k, v) in w.iteritems() if keep(v)) #FIXME: should non-delta weights omit 0? should be ok, except crazy lm (unk?) weight from feature semantics self.send_instruction('%s "%s";' % (cmd, weightstr), input) self.oldweights = svector.Vector(self.weights)
def __init__(self, order=4, variant="nist", oracledoc_size=10): self.order = order self.variant = variant.lower() if self.variant not in ['ibm', 'nist', 'average']: raise Exception("unknown BLEU variant %s" % self.variant) self.oraclemodel = OracleModel(order=order) self.wordcounter = WordCounter(variant=self.variant) self.models = [self.oraclemodel, self.wordcounter] self.oracledoc = svector.Vector( "oracle.candlen=1 oracle.reflen=1 oracle.srclen=1") for o in xrange(order): self.oracledoc["oracle.match%d" % o] = 1 self.oracledoc["oracle.guess%d" % o] = 1 self.oracledoc_size = oracledoc_size self.feats = list(self.oracledoc)
def input(self, input): self.rules = collections.defaultdict(list) for tag, attrs, i, j in input.fmeta: attrs = sgml.attrs_to_dict(attrs) if attrs.has_key('english'): ephrases = attrs['english'].split('|') if attrs.has_key('cost'): costs = [float(x) for x in attrs['cost'].split('|')] elif attrs.has_key('prob'): costs = [-math.log10(float(x)) for x in attrs['prob'].split('|')] else: costs = [-math.log10(1.0/len(ephrases)) for e in ephrases] # uniform if len(costs) != len(ephrases): sys.stderr.write("wrong number of probabilities/costs") raise ValueError if attrs.has_key('features'): features = attrs['features'].split('|') if len(features) != len(ephrases): sys.stderr.write("wrong number of feature names") raise ValueError elif attrs.has_key('feature'): features = [attrs['feature'] for ephrase in ephrases] else: features = ['sgml' for ephrase in ephrases] if attrs.has_key('label'): tags = attrs['label'].split('|') else: tags = [tag.upper()] # bug: if new nonterminals are introduced at this point, # they will not participate in the topological sort for (ephrase,cost,feature) in zip(ephrases,costs,features): for tag in tags: r = rule.Rule(sym.fromtag(tag), rule.Phrase(input.fwords[i:j]), rule.Phrase([sym.fromstring(e) for e in ephrase.split()]), scores=svector.Vector('%s' % feature, cost)) self.rules[i,j].append((r,))
def input(self, lat): self.rules = collections.defaultdict(list) for span in lat.spans: i, j = span.i, span.j if hasattr(span, 'v'): v = svector.Vector(span.v) else: v = model.zero # bug: if new nonterminals are introduced at this point, # they will not participate in the topological sort r = rule.Rule(rule.Nonterminal(span.x), [rule.Nonterminal.from_str(f) for f in span.f], [rule.Nonterminal.from_str(e) for e in span.e], scores=v) self.rules[i, j].append((r, )) if log.level >= 2: log.write("added lattice rule at (%d,%d): %s\n" % (i, j, r))
def cutting_plane(weights, updates, alphas, oracles={}, epsilon=0.01): done = False saveweights = svector.Vector(weights) if l2_regularization: # not using feature scales #weights *= 1./(1+len(updates)*l2_regularization*max_learning_rate) # using feature scales for f in weights: weights[f] *= 1. / (1. + feature_scales[f] * len(updates) * l2_regularization * max_learning_rate) while not done: # call separation oracles done = True for sentid, oracle in oracles.iteritems(): vscores = oracle(weights) for v, score in vscores: violation = weights.dot(v) + score for v1, score1 in updates[sentid]: violation1 = weights.dot(v1) + score1 if violation <= violation1 + epsilon: break else: updates[sentid].append((v, score)) alphas[sentid].append(0.) done = False weights, alphas = update_weights(weights, updates, alphas) if log.level >= 4: log.write("alphas: %s\n" % alphas) if False and log.level >= 1: log.write("weight update: %s\n" % " ".join( "%s=%s" % (f, v) for f, v in (weights - saveweights).iteritems() if abs(v) > 0.)) return weights, alphas