示例#1
0
文件: mira.py 项目: jungikim/sbmt
def get_hyps(sent, goal, weights):
    """Assumes that oraclemodel.input() has been called"""
    # worst violators

    oracleweights = theoracle.make_weights(additive=True)
    # we use the in-place operations because oracleweights might be
    # a subclass of Vector
    oracleweights *= fear_weight
    oracleweights += weights

    goal.reweight(oracleweights)

    hyps = decoder.get_nbest(goal, 1, 1)
    result = []

    for hypv, hyp in hyps:
        hypscore = get_score(hypv, hyp)
        log.write("added new hyp: %s %s cost=%s score=%s\n" %
                  (" ".join(sym.tostring(e)
                            for e in hyp), hypv, weights.dot(hypv), hypscore))

        # the learner MUST not see the oracle features
        hypv = theoracle.clean(hypv)

        result.append((hypv, hyp, hypscore))

    return result
示例#2
0
文件: mira.py 项目: isi-nlp/sbmt
def get_hyps(sent, goal, weights):
    """Assumes that oraclemodel.input() has been called"""
    # worst violators

    oracleweights = theoracle.make_weights(additive=True)
    # we use the in-place operations because oracleweights might be
    # a subclass of Vector
    oracleweights *= fear_weight
    oracleweights += weights

    goal.reweight(oracleweights)

    hyps = decoder.get_nbest(goal, 1, 1)
    result = []

    for hypv, hyp in hyps:
        hypscore = get_score(hypv, hyp)
        log.write(
            "added new hyp: %s %s cost=%s score=%s\n"
            % (" ".join(sym.tostring(e) for e in hyp), hypv, weights.dot(hypv), hypscore)
        )

        # the learner MUST not see the oracle features
        hypv = theoracle.clean(hypv)

        result.append((hypv, hyp, hypscore))

    return result
示例#3
0
    def get_hope(self):
        """Assumes that oraclemodel.input() has been called"""
        if not self.goal:
            _, hope = min(
                (self.qp.mweights.dot(hyp.mvector) +
                 hope_weight * self.qp.oweights.dot(hyp.ovector), hyp)
                for hyp in self.hyps)
            return hope

        weights = theoracle.make_weights(additive="edge")
        # use in-place operations because theoracle.make_weights might
        # be a subclass of svector.Vector
        weights *= -hope_weight
        weights += self.qp.mweights
        self.goal.reweight(weights)

        hope_vector, hope = decoder.get_nbest(self.goal, 1, 1)[0]
        hope_ovector = theoracle.finish(hope_vector, hope)
        hope_mvector = theoracle.clean(hope_vector)

        if log.level >= 1:
            log.write("hope hyp: %s\n" %
                      " ".join(sym.tostring(e) for e in hope))
            log.write("hope features: %s\n" % hope_mvector)
            log.write("hope oracle: %s\n" % hope_ovector)
        return maxmargin.Hypothesis(hope_mvector, hope_ovector)
示例#4
0
文件: trainer.py 项目: jungikim/sbmt
    def process_heldout(sent):
        # Need to add an flen attribute that gives the length of the input sentence.
        # In the lattice-decoding case, we have to make a guess.
        distance = sent.compute_distance()
        sent.flen = distance.get((0, sent.n - 1),
                                 None)  # could be missing if n == 0

        theoracle.input(sent)

        log.write("done preparing\n")

        global decoder_errors
        try:
            goal = thedecoder.translate(sent)
            thedecoder.process_output(sent, goal)
            decoder_errors = 0
            if goal is None: raise Exception("parse failure")
        except Exception:
            import traceback
            log.write("decoder raised exception: %s" %
                      "".join(traceback.format_exception(*sys.exc_info())))
            decoder_errors += 1
            if decoder_errors >= 100:
                log.write(
                    "decoder failed too many times, passing exception through!\n"
                )
                raise
            else:
                return

        goal.rescore(theoracle.models, thedecoder.weights, add=True)

        bestv, best = decoder.get_nbest(goal, 1)[0]
        log.write("done decoding\n")

        bestg = theoracle.finish(bestv, best)
        #bestscore = theoracle.make_weights(additive="sentence").dot(bestg)
        bestscore = theoracle.make_weights(additive="edge").dot(bestg)
        log.write("best hyp: %s %s cost=%s score=%s\n" % (" ".join(
            sym.tostring(e)
            for e in best), bestv, thedecoder.weights.dot(bestv), bestscore))

        sent.score_comps = bestg
        sent.ewords = [sym.tostring(e) for e in best]

        return sent
示例#5
0
def process(sent):
    # Add an flen attribute that gives the length of the input sentence.
    # In the lattice-decoding case, we have to make a guess.
    distance = sent.compute_distance()
    sent.flen = distance.get((0, sent.n - 1),
                             None)  # could be missing if n == 0

    theoracle.input(sent)

    global decoder_errors
    try:
        goal = thedecoder.translate(sent)
        thedecoder.process_output(sent, goal)
        decoder_errors = 0
        if goal is None: raise Exception("parse failure")
    except Exception:
        import traceback
        log.write("decoder raised exception: %s" %
                  "".join(traceback.format_exception(*sys.exc_info())))
        decoder_errors += 1
        if decoder_errors >= 3:
            log.write(
                "decoder failed too many times, passing exception through!\n")
            raise
        else:
            return

    # Augment forest with oracle features
    # this is overkill if we aren't going to search for hope/fear
    goal.rescore(theoracle.models, thedecoder.weights, add=True)

    best_vector, best = decoder.get_nbest(goal, 1)[0]
    best_mvector = theoracle.clean(best_vector)
    best_ovector = theoracle.finish(best_vector, best)
    best_loss = theoracle.make_weights(additive="sentence").dot(best_ovector)
    log.writeln("best hyp: %s %s cost=%s loss=%s" %
                (" ".join(sym.tostring(e) for e in best), best_vector,
                 thedecoder.weights.dot(best_mvector), best_loss))

    sent.score_comps = best_ovector
    sent.ewords = [sym.tostring(e) for e in best]

    return goal
示例#6
0
    def get_fear(self):
        """Assumes that oraclemodel.input() has been called"""
        if not self.goal:
            raise NotImplementedError()
        weights = theoracle.make_weights(additive="edge")
        # use in-place operations because theoracle.make_weights might
        # be a subclass of svector.Vector
        weights += self.qp.mweights
        self.goal.reweight(weights)

        fear_vector, fear = decoder.get_nbest(self.goal, 1, 1)[0]
        fear_ovector = theoracle.finish(fear_vector, fear)
        fear_mvector = theoracle.clean(fear_vector)

        if log.level >= 1:
            log.write("fear hyp: %s\n" % " ".join(sym.tostring(e) for e in fear))
            log.write("fear features: %s\n" % fear_mvector)
            log.write("fear oracle: %s\n" % fear_ovector)

        return maxmargin.Hypothesis(fear_mvector, fear_ovector)
示例#7
0
文件: mira.py 项目: isi-nlp/sbmt
    def process_heldout(sent):
        theoracle.input(sent)

        log.write("done preparing\n")

        global decoder_errors
        try:
            goal = thedecoder.translate(sent)
            thedecoder.process_output(sent, goal)
            decoder_errors = 0
            if goal is None:
                raise Exception("parse failure")
        except Exception:
            import traceback

            log.writeln(
                "decoder raised exception: %s %s" % (sent, "".join(traceback.format_exception(*sys.exc_info())))
            )
            decoder_errors += 1
            if decoder_errors >= 100:
                log.write("decoder failed too many times, passing exception through!\n")
                raise
            else:
                return

        goal.rescore(theoracle.models, thedecoder.weights, add=True)

        bestv, best = decoder.get_nbest(goal, 1)[0]
        log.write("done decoding\n")

        bestscore = get_score(bestv, best)
        log.write(
            "best hyp: %s %s cost=%s score=%s\n"
            % (" ".join(sym.tostring(e) for e in best), bestv, thedecoder.weights.dot(bestv), bestscore)
        )

        bestv = theoracle.finish(bestv, best)
        sent.score_comps = bestv
        sent.ewords = [sym.tostring(e) for e in best]

        return sent
示例#8
0
文件: mira.py 项目: jungikim/sbmt
def get_gold(sent, goal, weights):
    """Assumes that oraclemodel.input() has been called"""
    oracleweights = theoracle.make_weights(additive=True)
    # we use the in-place operations because oracleweights might be
    # a subclass of Vector
    oracleweights *= -hope_weight
    oracleweights += weights

    goal.reweight(oracleweights)

    goldv, gold = decoder.get_nbest(goal, 1, 1)[0]
    goldscore = get_score(goldv, gold)

    log.write("gold hyp: %s %s cost=%s score=%s\n" %
              (" ".join(sym.tostring(e)
                        for e in gold), goldv, weights.dot(goldv), goldscore))

    # the learner MUST not see the oracle features
    goldv = theoracle.clean(goldv)

    return goldv, gold, goldscore
示例#9
0
    def get_fear(self):
        """Assumes that oraclemodel.input() has been called"""
        if not self.goal:
            raise NotImplementedError()
        weights = theoracle.make_weights(additive="edge")
        # use in-place operations because theoracle.make_weights might
        # be a subclass of svector.Vector
        weights += self.qp.mweights
        self.goal.reweight(weights)

        fear_vector, fear = decoder.get_nbest(self.goal, 1, 1)[0]
        fear_ovector = theoracle.finish(fear_vector, fear)
        fear_mvector = theoracle.clean(fear_vector)

        if log.level >= 1:
            log.write("fear hyp: %s\n" %
                      " ".join(sym.tostring(e) for e in fear))
            log.write("fear features: %s\n" % fear_mvector)
            log.write("fear oracle: %s\n" % fear_ovector)

        return maxmargin.Hypothesis(fear_mvector, fear_ovector)
示例#10
0
文件: mira.py 项目: jungikim/sbmt
    def process_heldout(sent):
        theoracle.input(sent)

        log.write("done preparing\n")

        global decoder_errors
        try:
            goal = thedecoder.translate(sent)
            thedecoder.process_output(sent, goal)
            decoder_errors = 0
            if goal is None: raise Exception("parse failure")
        except Exception:
            import traceback
            log.writeln(
                "decoder raised exception: %s %s" %
                (sent, "".join(traceback.format_exception(*sys.exc_info()))))
            decoder_errors += 1
            if decoder_errors >= 100:
                log.write(
                    "decoder failed too many times, passing exception through!\n"
                )
                raise
            else:
                return

        goal.rescore(theoracle.models, thedecoder.weights, add=True)

        bestv, best = decoder.get_nbest(goal, 1)[0]
        log.write("done decoding\n")

        bestscore = get_score(bestv, best)
        log.write("best hyp: %s %s cost=%s score=%s\n" % (" ".join(
            sym.tostring(e)
            for e in best), bestv, thedecoder.weights.dot(bestv), bestscore))

        bestv = theoracle.finish(bestv, best)
        sent.score_comps = bestv
        sent.ewords = [sym.tostring(e) for e in best]

        return sent
示例#11
0
    def process_heldout(sent):
        # Need to add an flen attribute that gives the length of the input sentence.
        # In the lattice-decoding case, we have to make a guess.
        distance = sent.compute_distance()
        sent.flen = distance.get((0,sent.n-1), None) # could be missing if n == 0

        theoracle.input(sent)
        
        log.write("done preparing\n")

        global decoder_errors
        try:
            goal = thedecoder.translate(sent)
            thedecoder.process_output(sent, goal)
            decoder_errors = 0
            if goal is None: raise Exception("parse failure")
        except Exception:
            import traceback
            log.write("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info())))
            decoder_errors += 1
            if decoder_errors >= 100:
                log.write("decoder failed too many times, passing exception through!\n")
                raise
            else:
                return

        goal.rescore(theoracle.models, thedecoder.weights, add=True)
            
        bestv, best = decoder.get_nbest(goal, 1)[0]
        log.write("done decoding\n")

        bestg = theoracle.finish(bestv, best)
        #bestscore = theoracle.make_weights(additive="sentence").dot(bestg)
        bestscore = theoracle.make_weights(additive="edge").dot(bestg)
        log.write("best hyp: %s %s cost=%s score=%s\n"  % (" ".join(sym.tostring(e) for e in best), bestv, thedecoder.weights.dot(bestv), bestscore))

        sent.score_comps = bestg
        sent.ewords = [sym.tostring(e) for e in best]

        return sent
示例#12
0
文件: mira.py 项目: isi-nlp/sbmt
def get_gold(sent, goal, weights):
    """Assumes that oraclemodel.input() has been called"""
    oracleweights = theoracle.make_weights(additive=True)
    # we use the in-place operations because oracleweights might be
    # a subclass of Vector
    oracleweights *= -hope_weight
    oracleweights += weights

    goal.reweight(oracleweights)

    goldv, gold = decoder.get_nbest(goal, 1, 1)[0]
    goldscore = get_score(goldv, gold)

    log.write(
        "gold hyp: %s %s cost=%s score=%s\n"
        % (" ".join(sym.tostring(e) for e in gold), goldv, weights.dot(goldv), goldscore)
    )

    # the learner MUST not see the oracle features
    goldv = theoracle.clean(goldv)

    return goldv, gold, goldscore
示例#13
0
    def get_hope(self):
        """Assumes that oraclemodel.input() has been called"""
        if not self.goal:
            _, hope = min((self.qp.mweights.dot(hyp.mvector) + hope_weight * self.qp.oweights.dot(hyp.ovector), hyp) for hyp in self.hyps)
            return hope
            
        weights = theoracle.make_weights(additive="edge")
        # use in-place operations because theoracle.make_weights might
        # be a subclass of svector.Vector
        weights *= -hope_weight
        weights += self.qp.mweights
        self.goal.reweight(weights)

        hope_vector, hope = decoder.get_nbest(self.goal, 1, 1)[0]
        hope_ovector = theoracle.finish(hope_vector, hope)
        hope_mvector = theoracle.clean(hope_vector)

        if log.level >= 1:
            log.write("hope hyp: %s\n" % " ".join(sym.tostring(e) for e in hope))
            log.write("hope features: %s\n" % hope_mvector)
            log.write("hope oracle: %s\n" % hope_ovector)
        return maxmargin.Hypothesis(hope_mvector, hope_ovector)
示例#14
0
    def process(sent):
        oraclemodel.input(sent)
        log.write("done preparing\n")
        try:
            goal = thedecoder.translate(sent)
        except Exception:
            import traceback
            log.writeln("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info())))
            global decoder_errors
            decoder_errors += 1
            if decoder_errors >= 5:
                raise
            else:
                return
        bestv, best =decoder.get_nbest(goal, 1)[0]
        log.write("done decoding\n")

        # Collect hypotheses that will be used for learning
        sent.hyps = get_hyps(sent, goal, thedecoder.weights)
        log.write("done rescoring\n")

        return sent
示例#15
0
def process(sent):
    # Add an flen attribute that gives the length of the input sentence.
    # In the lattice-decoding case, we have to make a guess.
    distance = sent.compute_distance()
    sent.flen = distance.get((0,sent.n-1), None) # could be missing if n == 0

    theoracle.input(sent)

    global decoder_errors
    try:
        goal = thedecoder.translate(sent)
        thedecoder.process_output(sent, goal)
        decoder_errors = 0
        if goal is None: raise Exception("parse failure")
    except Exception:
        import traceback
        log.write("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info())))
        decoder_errors += 1
        if decoder_errors >= 3:
            log.write("decoder failed too many times, passing exception through!\n")
            raise
        else:
            return

    # Augment forest with oracle features
    # this is overkill if we aren't going to search for hope/fear
    goal.rescore(theoracle.models, thedecoder.weights, add=True)

    best_vector, best = decoder.get_nbest(goal, 1)[0]
    best_mvector = theoracle.clean(best_vector)
    best_ovector = theoracle.finish(best_vector, best)
    best_loss = theoracle.make_weights(additive="sentence").dot(best_ovector)
    log.writeln("best hyp: %s %s cost=%s loss=%s"  % (" ".join(sym.tostring(e) for e in best), best_vector, thedecoder.weights.dot(best_mvector), best_loss))

    sent.score_comps = best_ovector
    sent.ewords = [sym.tostring(e) for e in best]

    return goal
示例#16
0
文件: genhyps.py 项目: jungikim/sbmt
    def process(sent):
        oraclemodel.input(sent)
        log.write("done preparing\n")
        try:
            goal = thedecoder.translate(sent)
        except Exception:
            import traceback
            log.writeln("decoder raised exception: %s" %
                        "".join(traceback.format_exception(*sys.exc_info())))
            global decoder_errors
            decoder_errors += 1
            if decoder_errors >= 5:
                raise
            else:
                return
        bestv, best = decoder.get_nbest(goal, 1)[0]
        log.write("done decoding\n")

        # Collect hypotheses that will be used for learning
        sent.hyps = get_hyps(sent, goal, thedecoder.weights)
        log.write("done rescoring\n")

        return sent
示例#17
0
文件: mira.py 项目: jungikim/sbmt
    def process(sent):
        global alphas

        if online_learning:
            updates.clear()
            alphas.clear()

        theoracle.input(sent)

        log.write("done preparing\n")

        global decoder_errors
        try:
            goal = thedecoder.translate(sent)
            thedecoder.process_output(sent, goal)
            decoder_errors = 0
            if goal is None: raise Exception("parse failure")
        except Exception:
            import traceback
            log.writeln(
                "decoder raised exception: %s %s" %
                (sent, "".join(traceback.format_exception(*sys.exc_info()))))
            decoder_errors += 1
            if decoder_errors >= 100:
                log.write(
                    "decoder failed too many times, passing exception through!\n"
                )
                raise
            else:
                return

        goal.rescore(theoracle.models, thedecoder.weights, add=True)

        bestv, best = decoder.get_nbest(goal, 1)[0]
        log.write("done decoding\n")

        bestscore = get_score(bestv, best)
        log.write("best hyp: %s %s cost=%s score=%s\n" % (" ".join(
            sym.tostring(e)
            for e in best), bestv, thedecoder.weights.dot(bestv), bestscore))

        goldv, gold, goldscore = get_gold(sent, goal, thedecoder.weights)

        assert (
            sent.id not in updates
        )  # in batch learning, this can happen, and we would have to undo the update associated with this sentence

        updates[sent.id] = [(svector.Vector(), 0.)]
        alphas[sent.id] = [max_learning_rate]

        if opts.parallel:
            while True:
                if mpi.world.iprobe(tag=1):
                    (sentid, vscores) = mpi.world.recv(tag=1)
                    log.write("received update for %s\n" % (sentid, ))

                    if sentid in updates:  # see comment above
                        log.write("ignoring update for %s\n" % (sentid, ))
                        continue  # drop this update on the floor

                    updates[sentid] = vscores
                    alphas[sentid] = [max_learning_rate
                                      ] + [0.] * (len(vscores) - 1)
                    # since the first update is zero, the alphas & updates
                    # are still consistent with weights
                else:
                    break

        def oracle(weights):
            hyps = get_hyps(sent, goal, weights)
            return [(goldv - hypv, goldscore - hypscore)
                    for (hypv, hyp, hypscore) in hyps]

        thedecoder.weights, alphas = cutting_plane(thedecoder.weights, updates,
                                                   alphas, {sent.id: oracle})

        remove_zeros(thedecoder.weights)
        log.write("feature weights: %s\n" %
                  (thedecoder.weights * watch_features))
        log.write("weight norm: %s\n" %
                  (math.sqrt(thedecoder.weights.normsquared())))

        # update weight sum for averaging
        global nweights, sumweights_helper

        # sumweights_helper = \sum_{i=0}^n (i \Delta w_i)
        for sentid in updates:
            for (v, score), alpha in itertools.izip(updates[sentid],
                                                    alphas[sentid]):
                apply_update(sumweights_helper, nweights * alpha * v)
        nweights += 1

        # update feature scales
        if update_feature_scales:
            global sum_updates2, n_updates, feature_scales
            for sentid in updates:
                u = svector.Vector()
                for (v,
                     score), alpha in itertools.izip(updates[sentid],
                                                     alphas[sentid]):
                    u += alpha / max_learning_rate * v
                sum_updates2 += u * u
                n_updates += 1

            try:
                default_feature_scale = 1. / compute_variance(0, n_updates)
            except ZeroDivisionError:
                default_feature_scale = 0.  # pseudoinverse
            feature_scales = collections.defaultdict(
                lambda: default_feature_scale)
            for feat in sum_updates2:
                try:
                    feature_scales[feat] = 1. / compute_variance(
                        sum_updates2[feat], n_updates)
                except ZeroDivisionError:
                    feature_scales[feat] = 0.  # pseudoinverse

            log.write(
                "feature scales: %s\n" %
                (" ".join("%s=%s" % (f, feature_scales[f])
                          for f in watch_features if f in feature_scales)))

        if opts.parallel:
            # flush out filled requests
            global requests
            requests = [request for request in requests if not request.test()]

            # transmit updates to other nodes
            for node in parallel.slaves:
                if node != parallel.rank:
                    requests.append(
                        mpi.world.isend(value=(sent.id, updates[sent.id]),
                                        dest=node,
                                        tag=1))

        bestv = theoracle.finish(bestv, best)
        theoracle.update(bestv)
        sent.score_comps = bestv

        if log.level >= 1:
            gc.collect()
            log.write("done updating, memory = %s\n" % monitor.memory())

        sent.ewords = [sym.tostring(e) for e in best]

        return sent
示例#18
0
文件: mira.py 项目: isi-nlp/sbmt
    def process(sent):
        global alphas

        if online_learning:
            updates.clear()
            alphas.clear()

        theoracle.input(sent)

        log.write("done preparing\n")

        global decoder_errors
        try:
            goal = thedecoder.translate(sent)
            thedecoder.process_output(sent, goal)
            decoder_errors = 0
            if goal is None:
                raise Exception("parse failure")
        except Exception:
            import traceback

            log.writeln(
                "decoder raised exception: %s %s" % (sent, "".join(traceback.format_exception(*sys.exc_info())))
            )
            decoder_errors += 1
            if decoder_errors >= 100:
                log.write("decoder failed too many times, passing exception through!\n")
                raise
            else:
                return

        goal.rescore(theoracle.models, thedecoder.weights, add=True)

        bestv, best = decoder.get_nbest(goal, 1)[0]
        log.write("done decoding\n")

        bestscore = get_score(bestv, best)
        log.write(
            "best hyp: %s %s cost=%s score=%s\n"
            % (" ".join(sym.tostring(e) for e in best), bestv, thedecoder.weights.dot(bestv), bestscore)
        )

        goldv, gold, goldscore = get_gold(sent, goal, thedecoder.weights)

        assert (
            sent.id not in updates
        )  # in batch learning, this can happen, and we would have to undo the update associated with this sentence

        updates[sent.id] = [(svector.Vector(), 0.0)]
        alphas[sent.id] = [max_learning_rate]

        if opts.parallel:
            while True:
                if mpi.world.iprobe(tag=1):
                    (sentid, vscores) = mpi.world.recv(tag=1)
                    log.write("received update for %s\n" % (sentid,))

                    if sentid in updates:  # see comment above
                        log.write("ignoring update for %s\n" % (sentid,))
                        continue  # drop this update on the floor

                    updates[sentid] = vscores
                    alphas[sentid] = [max_learning_rate] + [0.0] * (len(vscores) - 1)
                    # since the first update is zero, the alphas & updates
                    # are still consistent with weights
                else:
                    break

        def oracle(weights):
            hyps = get_hyps(sent, goal, weights)
            return [(goldv - hypv, goldscore - hypscore) for (hypv, hyp, hypscore) in hyps]

        thedecoder.weights, alphas = cutting_plane(thedecoder.weights, updates, alphas, {sent.id: oracle})

        remove_zeros(thedecoder.weights)
        log.write("feature weights: %s\n" % (thedecoder.weights * watch_features))
        log.write("weight norm: %s\n" % (math.sqrt(thedecoder.weights.normsquared())))

        # update weight sum for averaging
        global nweights, sumweights_helper

        # sumweights_helper = \sum_{i=0}^n (i \Delta w_i)
        for sentid in updates:
            for (v, score), alpha in itertools.izip(updates[sentid], alphas[sentid]):
                apply_update(sumweights_helper, nweights * alpha * v)
        nweights += 1

        # update feature scales
        if update_feature_scales:
            global sum_updates2, n_updates, feature_scales
            for sentid in updates:
                u = svector.Vector()
                for (v, score), alpha in itertools.izip(updates[sentid], alphas[sentid]):
                    u += alpha / max_learning_rate * v
                sum_updates2 += u * u
                n_updates += 1

            try:
                default_feature_scale = 1.0 / compute_variance(0, n_updates)
            except ZeroDivisionError:
                default_feature_scale = 0.0  # pseudoinverse
            feature_scales = collections.defaultdict(lambda: default_feature_scale)
            for feat in sum_updates2:
                try:
                    feature_scales[feat] = 1.0 / compute_variance(sum_updates2[feat], n_updates)
                except ZeroDivisionError:
                    feature_scales[feat] = 0.0  # pseudoinverse

            log.write(
                "feature scales: %s\n"
                % (" ".join("%s=%s" % (f, feature_scales[f]) for f in watch_features if f in feature_scales))
            )

        if opts.parallel:
            # flush out filled requests
            global requests
            requests = [request for request in requests if not request.test()]

            # transmit updates to other nodes
            for node in parallel.slaves:
                if node != parallel.rank:
                    requests.append(mpi.world.isend(value=(sent.id, updates[sent.id]), dest=node, tag=1))

        bestv = theoracle.finish(bestv, best)
        theoracle.update(bestv)
        sent.score_comps = bestv

        if log.level >= 1:
            gc.collect()
            log.write("done updating, memory = %s\n" % monitor.memory())

        sent.ewords = [sym.tostring(e) for e in best]

        return sent
示例#19
0
    def process(sent):
        # Need to add an flen attribute that gives the length of the input sentence.
        # In the lattice-decoding case, we have to make a guess.
        distance = sent.compute_distance()
        sent.flen = distance.get((0,sent.n-1), None) # could be missing if n == 0

        theoracle.input(sent)
        
        global decoder_errors
        try:
            goal = thedecoder.translate(sent)
            thedecoder.process_output(sent, goal)
            decoder_errors = 0
            if goal is None: raise Exception("parse failure")
        except Exception:
            import traceback
            log.write("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info())))
            decoder_errors += 1
            if decoder_errors >= 3:
                log.write("decoder failed too many times, passing exception through!\n")
                raise
            else:
                return

        goal.rescore(theoracle.models, thedecoder.weights, add=True)
            
        best_vector, best = decoder.get_nbest(goal, 1)[0]
        best_mvector = theoracle.clean(best_vector)
        best_ovector = theoracle.finish(best_vector, best)
        best_loss = theoracle.make_weights(additive="sentence").dot(best_ovector)
        log.write("best hyp: %s %s cost=%s loss=%s\n"  % (" ".join(sym.tostring(e) for e in best), best_vector, thedecoder.weights.dot(best_mvector), best_loss))

        # Set up quadratic program
        qp = maxmargin.QuadraticProgram()
        cur_instance = ForestInstance(sent.id, goal)
        qp.add_instance(cur_instance)

        if opts.parallel:
            while MPI.COMM_WORLD.Iprobe(tag=1, source=MPI.ANY_SOURCE):
                log.writeln("received update...\n")
                recv_instance = MPI.COMM_WORLD.recv(tag=1, source=MPI.ANY_SOURCE)
                log.writeln("received update for %s" % (recv_instance.instance_id,))
                # need to check for duplicate instances?
                qp.add_instance(recv_instance)

        # Add cached hyps
        if cache_hyps:
            for instance in qp.instances:
                hyps = hyp_cache[instance.instance_id]
                if len(hyps) > 0:
                    log.writeln("retrieved %d cached hyps for %s" % (len(hyps), instance.instance_id))
                for hyp in hyps:
                    instance.add_hyp(hyp)

        # Make oracle weight vector
        oweights = theoracle.make_weights(additive="sentence")
        oweights *= -1

        # Make vector of learning rates
        # We have to be careful to assign a learning rate to every possible feature
        # This is not very efficient
        feats = set()
        for item in goal.bottomup():
            for ded in item.deds:
                feats.update(ded.dcost)
        for instance in qp.instances:
            for hyp in instance.hyps:
                feats.update(hyp.mvector)
        learning_rates = svector.Vector()
        for feat in feats:
            learning_rates[feat] = compute_feature_learning_rate(feat)
        if log.level >= 3:
            log.writeln("learning rate vector: %s" % learning_rates)

        qp.optimize(thedecoder.weights, oweights, learning_rate=learning_rates)

        thedecoder.weights.compact()
        log.write("feature weights: %s\n" % (thedecoder.weights * watch_features))

        # update weight sum for averaging
        global nweights, sumweights_helper

        # sumweights_helper = \sum_{i=0}^n (i \Delta w_i)
        sumweights_helper += nweights * qp.delta_mweights()
        nweights += 1

        # update feature scales
        if update_feature_scales:
            global sum_updates2, n_updates
            for instance in qp.instances:
                """u = svector.Vector(instance.hope.mvector)
                for hyp in instance.hyps:
                    u -= hyp.alpha*hyp.mvector
                sum_updates2 += u*u"""
                for hyp in instance.hyps:
                    if hyp is not instance.hope: # hyp = instance.hope is a non-update
                        u = instance.hope.mvector - hyp.mvector
                        sum_updates2 += hyp.alpha*(u*u)
                        n_updates += hyp.alpha

            #log.write("sum of squared updates: %s\n" % (" ".join("%s=%s" % (f,sum_updates2[f]) for f in watch_features)))
            log.write("feature learning rates: %s\n" % (" ".join("%s=%s" % (f,compute_feature_learning_rate(f)) for f in watch_features)))

        if opts.parallel:
            # flush out filled requests
            global requests
            requests = [request for request in requests if not request.Test()]

            # transmit updates to other nodes
            # make a plain Instance (without forest)
            # we used to designate a hope translation,
            #send_instance = maxmargin.Instance(cur_instance.hyps, hope=cur_instance.hope, instance_id=cur_instance.sentid)
            # but now are letting the other node choose.
            send_instance = maxmargin.Instance(cur_instance.hyps, instance_id=cur_instance.sentid)

            for node in parallel.slaves:
                if node != parallel.rank:
                    requests.append(MPI.COMM_WORLD.isend(send_instance, dest=node, tag=1))

        # save all hyps for next time
        if cache_hyps:
            epsilon = 0.01
            for instance in qp.instances:
                hyps = hyp_cache[instance.instance_id]
                for hyp in instance.hyps:
                    for hyp1 in hyps:
                        if (hyp.mvector-hyp1.mvector).normsquared() <= epsilon and (hyp.ovector-hyp1.ovector).normsquared() <= epsilon:
                            break
                    else:
                        if log.level >= 2:
                            log.writeln("add hyp to cache: %s" % hyp)
                        hyps.append(hyp)

        theoracle.update(best_ovector)
        sent.score_comps = best_ovector

        if log.level >= 1:
            gc.collect()
            log.write("done updating, memory = %s\n" % monitor.memory())

        sent.ewords = [sym.tostring(e) for e in best]

        return sent
示例#20
0
文件: trainer.py 项目: jungikim/sbmt
    def process(sent):
        # Need to add an flen attribute that gives the length of the input sentence.
        # In the lattice-decoding case, we have to make a guess.
        distance = sent.compute_distance()
        sent.flen = distance.get((0, sent.n - 1),
                                 None)  # could be missing if n == 0

        theoracle.input(sent)

        global decoder_errors
        try:
            goal = thedecoder.translate(sent)
            thedecoder.process_output(sent, goal)
            decoder_errors = 0
            if goal is None: raise Exception("parse failure")
        except Exception:
            import traceback
            log.write("decoder raised exception: %s" %
                      "".join(traceback.format_exception(*sys.exc_info())))
            decoder_errors += 1
            if decoder_errors >= 3:
                log.write(
                    "decoder failed too many times, passing exception through!\n"
                )
                raise
            else:
                return

        goal.rescore(theoracle.models, thedecoder.weights, add=True)

        best_vector, best = decoder.get_nbest(goal, 1)[0]
        best_mvector = theoracle.clean(best_vector)
        best_ovector = theoracle.finish(best_vector, best)
        best_loss = theoracle.make_weights(
            additive="sentence").dot(best_ovector)
        log.write("best hyp: %s %s cost=%s loss=%s\n" %
                  (" ".join(sym.tostring(e) for e in best), best_vector,
                   thedecoder.weights.dot(best_mvector), best_loss))

        # Set up quadratic program
        qp = maxmargin.QuadraticProgram()
        cur_instance = ForestInstance(sent.id, goal)
        qp.add_instance(cur_instance)

        if opts.parallel:
            while MPI.COMM_WORLD.Iprobe(tag=1, source=MPI.ANY_SOURCE):
                log.writeln("received update...\n")
                recv_instance = MPI.COMM_WORLD.recv(tag=1,
                                                    source=MPI.ANY_SOURCE)
                log.writeln("received update for %s" %
                            (recv_instance.instance_id, ))
                # need to check for duplicate instances?
                qp.add_instance(recv_instance)

        # Add cached hyps
        if cache_hyps:
            for instance in qp.instances:
                hyps = hyp_cache[instance.instance_id]
                if len(hyps) > 0:
                    log.writeln("retrieved %d cached hyps for %s" %
                                (len(hyps), instance.instance_id))
                for hyp in hyps:
                    instance.add_hyp(hyp)

        # Make oracle weight vector
        oweights = theoracle.make_weights(additive="sentence")
        oweights *= -1

        # Make vector of learning rates
        # We have to be careful to assign a learning rate to every possible feature
        # This is not very efficient
        feats = set()
        for item in goal.bottomup():
            for ded in item.deds:
                feats.update(ded.dcost)
        for instance in qp.instances:
            for hyp in instance.hyps:
                feats.update(hyp.mvector)
        learning_rates = svector.Vector()
        for feat in feats:
            learning_rates[feat] = compute_feature_learning_rate(feat)
        if log.level >= 3:
            log.writeln("learning rate vector: %s" % learning_rates)

        qp.optimize(thedecoder.weights, oweights, learning_rate=learning_rates)

        thedecoder.weights.compact()
        log.write("feature weights: %s\n" %
                  (thedecoder.weights * watch_features))

        # update weight sum for averaging
        global nweights, sumweights_helper

        # sumweights_helper = \sum_{i=0}^n (i \Delta w_i)
        sumweights_helper += nweights * qp.delta_mweights()
        nweights += 1

        # update feature scales
        if update_feature_scales:
            global sum_updates2, n_updates
            for instance in qp.instances:
                """u = svector.Vector(instance.hope.mvector)
                for hyp in instance.hyps:
                    u -= hyp.alpha*hyp.mvector
                sum_updates2 += u*u"""
                for hyp in instance.hyps:
                    if hyp is not instance.hope:  # hyp = instance.hope is a non-update
                        u = instance.hope.mvector - hyp.mvector
                        sum_updates2 += hyp.alpha * (u * u)
                        n_updates += hyp.alpha

            #log.write("sum of squared updates: %s\n" % (" ".join("%s=%s" % (f,sum_updates2[f]) for f in watch_features)))
            log.write("feature learning rates: %s\n" %
                      (" ".join("%s=%s" % (f, compute_feature_learning_rate(f))
                                for f in watch_features)))

        if opts.parallel:
            # flush out filled requests
            global requests
            requests = [request for request in requests if not request.Test()]

            # transmit updates to other nodes
            # make a plain Instance (without forest)
            # we used to designate a hope translation,
            #send_instance = maxmargin.Instance(cur_instance.hyps, hope=cur_instance.hope, instance_id=cur_instance.sentid)
            # but now are letting the other node choose.
            send_instance = maxmargin.Instance(cur_instance.hyps,
                                               instance_id=cur_instance.sentid)

            for node in parallel.slaves:
                if node != parallel.rank:
                    requests.append(
                        MPI.COMM_WORLD.isend(send_instance, dest=node, tag=1))

        # save all hyps for next time
        if cache_hyps:
            epsilon = 0.01
            for instance in qp.instances:
                hyps = hyp_cache[instance.instance_id]
                for hyp in instance.hyps:
                    for hyp1 in hyps:
                        if (hyp.mvector -
                                hyp1.mvector).normsquared() <= epsilon and (
                                    hyp.ovector -
                                    hyp1.ovector).normsquared() <= epsilon:
                            break
                    else:
                        if log.level >= 2:
                            log.writeln("add hyp to cache: %s" % hyp)
                        hyps.append(hyp)

        theoracle.update(best_ovector)
        sent.score_comps = best_ovector

        if log.level >= 1:
            gc.collect()
            log.write("done updating, memory = %s\n" % monitor.memory())

        sent.ewords = [sym.tostring(e) for e in best]

        return sent