def read(self, f, models, weights): if type(f) is str: if os.path.isfile(f): if log.level >= 1: log.write("Reading grammar from %s...\n" % f) f = file(f, 'r', 4*1024*1024) elif os.path.isfile("%s.gz" % f): f = "%s.gz" % f if log.level >= 1: log.write("Decompressing grammar from %s...\n" % f) f = file(f, 'r', 4*1024*1024) f = gzip.GzipFile(fileobj=f) else: if log.level >= 1: log.write("Reading grammar...\n") for line in f: try: r = rule.rule_from_line(line) except Exception: log.write("warning: couldn't scan rule %s\n" % line.strip()) continue estcost = estimate_rule(r, models, weights) self.add(r, estcost) # self.add(rule.rule_from_line(line)) # this once caused a segfault log.write("%d rules read\n" % self.count)
def read_rules(files): """Merge several grammar files together (assuming they are sorted).""" if len(files) == 1: for line in files[0]: try: (handle, ruleline) = line.split("|||", 1) r = rule.rule_from_line(ruleline) yield handle, r except: sys.stderr.write("couldn't scan line: %s\n" % line.strip()) return heap = [] for f in files: try: line = f.next() except StopIteration: pass else: heap.append((line, f)) heapq.heapify(heap) while len(heap) > 0: (line, f) = heapq.heappop(heap) try: (handle, ruleline) = line.split("|||", 1) r = rule.rule_from_line(ruleline) except: sys.stderr.write("couldn't scan line: %s\n" % line.strip()) r = None if r is not None and len(r.scores) < 1: sys.stderr.write("rule doesn't have enough scores: %s\n" % str(r)) r = None if r is not None: yield handle, r try: line = f.next() except StopIteration: pass else: heapq.heappush(heap, (line, f))
return ll(c12,c1,p) + ll(c2-c12,n-c1,p) - ll(c12,c1,p1) - ll(c2-c12,n-c1,p2) if __name__ == "__main__": import rule threshold = 1e-8 fweightfile = sys.argv[1] eweightfile = sys.argv[2] fweighttable = read_weightfile(file(fweightfile), threshold=threshold) eweighttable = read_weightfile(file(eweightfile), threshold=threshold) progress = 0 for line in sys.stdin: r = rule.rule_from_line(line) if r.word_alignments is None: scores = r.scores scores.extend([scores[0],scores[0]]) r.scores = scores sys.stdout.write("%s\n" % r.to_line()) progress += 1 continue align = set(r.word_alignments) fweight = eweight = 1.0 for fi in xrange(len(r.f)): if not sym.isvar(r.f[fi]): fwordweight = 0.
c2 - c12, n - c1, p2) if __name__ == "__main__": import rule threshold = 1e-8 fweightfile = sys.argv[1] eweightfile = sys.argv[2] fweighttable = read_weightfile(file(fweightfile), threshold=threshold) eweighttable = read_weightfile(file(eweightfile), threshold=threshold) progress = 0 for line in sys.stdin: r = rule.rule_from_line(line) if r.word_alignments is None: scores = r.scores scores.extend([scores[0], scores[0]]) r.scores = scores sys.stdout.write("%s\n" % r.to_line()) progress += 1 continue align = set(r.word_alignments) fweight = eweight = 1.0 for fi in xrange(len(r.f)): if not sym.isvar(r.f[fi]): fwordweight = 0.