Exemplo n.º 1
0
    def __init__(self, modelname="", window_size=HYPERPARAMETERS["WINDOW_SIZE"], vocab_size=vocabulary.wordmap().len, embedding_size=HYPERPARAMETERS["EMBEDDING_SIZE"], hidden_size=HYPERPARAMETERS["HIDDEN_SIZE"], seed=miscglobals.RANDOMSEED, initial_embeddings=None, two_hidden_layers=HYPERPARAMETERS["TWO_HIDDEN_LAYERS"]):
        self.modelname = modelname
        self.parameters = Parameters(window_size, vocab_size, embedding_size, hidden_size, seed, initial_embeddings, two_hidden_layers)
        if LBL:
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases
            graph.score_biases = self.parameters.score_biases
        else:
            graph.hidden_weights = self.parameters.hidden_weights
            graph.hidden_biases = self.parameters.hidden_biases
            if self.parameters.two_hidden_layers:
                graph.hidden2_weights = self.parameters.hidden2_weights
                graph.hidden2_biases = self.parameters.hidden2_biases
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases

#        (self.graph_train, self.graph_predict, self.graph_verbose_predict) = graph.functions(self.parameters)
        import sets
        self.train_loss = MovingAverage()
        self.train_err = MovingAverage()
        self.train_lossnonzero = MovingAverage()
        self.train_squashloss = MovingAverage()
        self.train_unpenalized_loss = MovingAverage()
        self.train_l1penalty = MovingAverage()
        self.train_unpenalized_lossnonzero = MovingAverage()
        self.train_correct_score = MovingAverage()
        self.train_noise_score = MovingAverage()
        self.train_cnt = 0
Exemplo n.º 2
0
    def __init__(self):
        print 'call me'
        self.parameters = Parameters()
        if LBL:
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases
            graph.score_biases = self.parameters.score_biases
        else:
            graph.hidden_weights = self.parameters.hidden_weights
            graph.hidden_biases = self.parameters.hidden_biases
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases

#        (self.graph_train, self.graph_predict, self.graph_verbose_predict) = graph.functions(self.parameters)
        import sets
        self.train_loss = MovingAverage()
        self.train_err = MovingAverage()
        self.train_lossnonzero = MovingAverage()
        self.train_squashloss = MovingAverage()
        self.train_unpenalized_loss = MovingAverage()
        self.train_l1penalty = MovingAverage()
        self.train_unpenalized_lossnonzero = MovingAverage()
        self.train_correct_score = MovingAverage()
        self.train_noise_score = MovingAverage()
        self.train_cnt = 0
Exemplo n.º 3
0
class Model:
    """
    A Model can:

    @type parameters: L{Parameters}
    @todo: Document
    """

    def __init__(self):
        print 'call me'
        self.parameters = Parameters()
        if LBL:
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases
            graph.score_biases = self.parameters.score_biases
        else:
            graph.hidden_weights = self.parameters.hidden_weights
            graph.hidden_biases = self.parameters.hidden_biases
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases

#        (self.graph_train, self.graph_predict, self.graph_verbose_predict) = graph.functions(self.parameters)
        import sets
        self.train_loss = MovingAverage()
        self.train_err = MovingAverage()
        self.train_lossnonzero = MovingAverage()
        self.train_squashloss = MovingAverage()
        self.train_unpenalized_loss = MovingAverage()
        self.train_l1penalty = MovingAverage()
        self.train_unpenalized_lossnonzero = MovingAverage()
        self.train_correct_score = MovingAverage()
        self.train_noise_score = MovingAverage()
        self.train_cnt = 0

    def __getstate__(self):
        return (self.parameters, self.train_loss, self.train_err, self.train_lossnonzero, self.train_squashloss, self.train_unpenalized_loss, self.train_l1penalty, self.train_unpenalized_lossnonzero, self.train_correct_score, self.train_noise_score, self.train_cnt)

    def __setstate__(self, state):
        (self.parameters, self.train_loss, self.train_err, self.train_lossnonzero, self.train_squashloss, self.train_unpenalized_loss, self.train_l1penalty, self.train_unpenalized_lossnonzero, self.train_correct_score, self.train_noise_score, self.train_cnt) = state
        if LBL:
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases
            graph.score_biases = self.parameters.score_biases
        else:
            graph.hidden_weights = self.parameters.hidden_weights
            graph.hidden_biases = self.parameters.hidden_biases
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases

#    def load(self, filename):
#        sys.stderr.write("Loading model from: %s\n" % filename)
#        f = myopen(filename, "rb")
#        (self.parameters, self.train_loss, self.train_err, self.train_lossnonzero, self.train_squashloss, self.train_unpenalized_loss, self.train_l1penalty, self.train_unpenalized_lossnonzero, self.train_correct_score, self.train_noise_score, self.train_cnt) = pickle.load(f)
#        if LBL:
#            graph.output_weights = self.parameters.output_weights
#            graph.output_biases = self.parameters.output_biases
#            graph.score_biases = self.parameters.score_biases
#        else:
#            graph.hidden_weights = self.parameters.hidden_weights
#            graph.hidden_biases = self.parameters.hidden_biases
#            graph.output_weights = self.parameters.output_weights
#            graph.output_biases = self.parameters.output_biases
#
#    def save(self, filename):
#        sys.stderr.write("Saving model to: %s\n" % filename)
#        f = myopen(filename, "wb")
#        pickle.dump((self.parameters, self.train_loss, self.train_err, self.train_lossnonzero, self.train_squashloss, self.train_unpenalized_loss, self.train_l1penalty, self.train_unpenalized_lossnonzero, self.train_correct_score, self.train_noise_score, self.train_cnt), f)

    def embed(self, sequence):
        """
        Embed a sequence of vocabulary IDs
        """
        seq = [self.parameters.embeddings[s] for s in sequence]
        import numpy
        return [numpy.resize(s, (1, s.size)) for s in seq]
#        return [self.parameters.embeddings[s] for s in sequence]

    def embeds(self, sequences):
        """
        Embed sequences of vocabulary IDs.
        If we are given a list of MINIBATCH lists of SEQLEN items, return a list of SEQLEN matrices of shape (MINIBATCH, EMBSIZE)
        """
        embs = []
        for sequence in sequences:
            embs.append(self.embed(sequence))

        for emb in embs: assert len(emb) == len(embs[0])

        new_embs = []
        for i in range(len(embs[0])):
            colembs = [embs[j][i] for j in range(len(embs))]
            import numpy
            new_embs.append(numpy.vstack(colembs))
            assert new_embs[-1].shape == (len(sequences), self.parameters.embedding_size)
        assert len(new_embs) == len(sequences[0])
        return new_embs

    def corrupt_example(self, e):
        """
        Return a corrupted version of example e, plus the weight of this example.
        """
        from hyperparameters import HYPERPARAMETERS
        import random
        import copy
        e = copy.copy(e)
        last = e[-1]
        cnt = 0
        while e[-1] == last:
            if HYPERPARAMETERS["NGRAM_FOR_TRAINING_NOISE"] == 0:
                e[-1] = random.randint(0, self.parameters.vocab_size-1)
                pr = 1./self.parameters.vocab_size
            elif HYPERPARAMETERS["NGRAM_FOR_TRAINING_NOISE"] == 1:
                import noise
                from common.myrandom import weighted_sample
                e[-1], pr = weighted_sample(noise.indexed_weights())
#                from vocabulary import wordmap
#                print wordmap.str(e[-1]), pr
            else:
                assert 0
            cnt += 1
            # Backoff to 0gram smoothing if we fail 10 times to get noise.
            if cnt > 10: e[-1] = random.randint(0, self.parameters.vocab_size-1)
        weight = 1./pr
        return e, weight

    def corrupt_examples(self, correct_sequences):
        noise_sequences = []
        weights = []
        for e in correct_sequences:
            noise_sequence, weight = self.corrupt_example(e)
            noise_sequences.append(noise_sequence)
            weights.append(weight)
        return noise_sequences, weights

    def train(self, correct_sequences):
        from hyperparameters import HYPERPARAMETERS
        learning_rate = HYPERPARAMETERS["LEARNING_RATE"]
        if LBL:
            noise_sequences, weights = self.corrupt_examples(correct_sequences)
            # All weights must be the same, if we first multiply by the learning rate
            for w in weights: assert w == weights[0]

            # REWRITE FOR MINIBATCH
            assert 0

#            noise_repr = noise_sequence[-1]
#            correct_repr = correct_sequence[-1]
            noise_repr = noise_sequence[-1:]
            correct_repr = correct_sequence[-1:]
            assert noise_repr != correct_repr
            assert noise_sequence[:-1] == correct_sequence[:-1]
            sequence = correct_sequence[:-1]
#            r = graph.train(self.embed(sequence), self.embed([correct_repr])[0], self.embed([noise_repr])[0], self.parameters.score_biases[correct_repr], self.parameters.score_biases[noise_repr])
            r = graph.train(self.embed(sequence), self.embed(correct_repr)[0], self.embed(noise_repr)[0], self.parameters.score_biases[correct_repr], self.parameters.score_biases[noise_repr], learning_rate * weight)
            assert len(noise_repr) == 1
            assert len(correct_repr) == 1
            noise_repr = noise_repr[0]
            correct_repr = correct_repr[0]
            (loss, predictrepr, correct_score, noise_score, dsequence, dcorrect_repr, dnoise_repr, dcorrect_scorebias, dnoise_scorebias) = r
#            print
#            print "loss = ", loss
#            print "predictrepr = ", predictrepr
#            print "correct_repr = ", correct_repr, self.embed(correct_repr)[0]
#            print "noise_repr = ", noise_repr, self.embed(noise_repr)[0]
#            print "correct_score = ", correct_score
#            print "noise_score = ", noise_score
        else:
            noise_sequences, weights = self.corrupt_examples(correct_sequences)
            # All weights must be the same, if we first multiply by the learning rate
            for w in weights: assert w == weights[0]
            #print self.embeds(correct_sequences)
            #print self.embeds(noise_sequences)
            #print learning_rate * weights[0]
            r = graph.train(self.embeds(correct_sequences), self.embeds(noise_sequences), learning_rate * weights[0])
            (dcorrect_inputss, dnoise_inputss, losss, unpenalized_losss, l1penaltys, correct_scores, noise_scores) = r
#            print [d.shape for d in dcorrect_inputss]
#            print [d.shape for d in dnoise_inputss]
#            print "losss", losss.shape, losss
#            print "unpenalized_losss", unpenalized_losss.shape, unpenalized_losss
#            print "l1penaltys", l1penaltys.shape, l1penaltys
#            print "correct_scores", correct_scores.shape, correct_scores
#            print "noise_scores", noise_scores.shape, noise_scores

        import sets
        to_normalize = sets.Set()
        for ecnt in range(len(correct_sequences)):
            (loss, unpenalized_loss, correct_score, noise_score) = \
                (losss[ecnt], unpenalized_losss[ecnt], correct_scores[ecnt], noise_scores[ecnt])
            if l1penaltys.shape == ():
                assert l1penaltys == 0
                l1penalty = 0
            else:
                l1penalty = l1penaltys[ecnt]
            correct_sequence = correct_sequences[ecnt]
            noise_sequence = noise_sequences[ecnt]

            dcorrect_inputs = [d[ecnt] for d in dcorrect_inputss]
            dnoise_inputs = [d[ecnt] for d in dnoise_inputss]

#            print [d.shape for d in dcorrect_inputs]
#            print [d.shape for d in dnoise_inputs]
#            print "loss", loss.shape, loss
#            print "unpenalized_loss", unpenalized_loss.shape, unpenalized_loss
#            print "l1penalty", l1penalty.shape, l1penalty
#            print "correct_score", correct_score.shape, correct_score
#            print "noise_score", noise_score.shape, noise_score


            self.train_loss.add(loss)
            self.train_err.add(correct_score <= noise_score)
            self.train_lossnonzero.add(loss > 0)
            squashloss = 1./(1.+math.exp(-loss))
            self.train_squashloss.add(squashloss)
            if not LBL:
                self.train_unpenalized_loss.add(unpenalized_loss)
                self.train_l1penalty.add(l1penalty)
                self.train_unpenalized_lossnonzero.add(unpenalized_loss > 0)
            self.train_correct_score.add(correct_score)
            self.train_noise_score.add(noise_score)
    
            self.train_cnt += 1
            if self.train_cnt % 10000 == 0:
    #        if self.train_cnt % 1000 == 0:
    #            print self.train_cnt
#                graph.COMPILE_MODE.print_summary()
                logging.info(("After %d updates, pre-update train loss %s" % (self.train_cnt, self.train_loss)))
                logging.info(("After %d updates, pre-update train error %s" % (self.train_cnt, self.train_err)))
                logging.info(("After %d updates, pre-update train Pr(loss != 0) %s" % (self.train_cnt, self.train_lossnonzero)))
                logging.info(("After %d updates, pre-update train squash(loss) %s" % (self.train_cnt, self.train_squashloss)))
                if not LBL:
                    logging.info(("After %d updates, pre-update train unpenalized loss %s" % (self.train_cnt, self.train_unpenalized_loss)))
                    logging.info(("After %d updates, pre-update train l1penalty %s" % (self.train_cnt, self.train_l1penalty)))
                    logging.info(("After %d updates, pre-update train Pr(unpenalized loss != 0) %s" % (self.train_cnt, self.train_unpenalized_lossnonzero)))
                logging.info(("After %d updates, pre-update train correct score %s" % (self.train_cnt, self.train_correct_score)))
                logging.info(("After %d updates, pre-update train noise score %s" % (self.train_cnt, self.train_noise_score)))
    
                if LBL:
                    i = 1.
                    while i < wordmap.len:
                        inti = int(i)
                        str = "word %s, rank %d, score %f" % (wordmap.str(inti), inti, self.parameters.score_biases[inti])
                        logging.info("After %d updates, score biases: %s" % (self.train_cnt, str))
                        i *= 3.2
    
    #            print(("After %d updates, pre-update train loss %s" % (self.train_cnt, self.train_loss.verbose_string())))
    #            print(("After %d updates, pre-update train error %s" % (self.train_cnt, self.train_err.verbose_string())))
    
            for w in weights: assert w == weights[0]
            embedding_learning_rate = HYPERPARAMETERS["EMBEDDING_LEARNING_RATE"] * weights[0]
            if loss == 0:
                if LBL:
                    for di in dsequence + [dcorrect_repr, dnoise_repr]:
                        # This tends to trigger if training diverges (NaN)
                        assert (di == 0).all()

                else:
                    for di in dcorrect_inputs + dnoise_inputs:
                        assert (di == 0).all()
    
            if loss != 0:
                if LBL:
                    val = sequence + [correct_repr, noise_repr]
                    dval = dsequence + [dcorrect_repr, dnoise_repr]
    #                print val
                    for (i, di) in zip(val, dval):
    #                for (i, di) in zip(tuple(sequence + [correct_repr, noise_repr]), tuple(dsequence + [dcorrect_repr, dnoise_repr])):
                        assert di.shape[0] == 1
                        di.resize(di.size)
    #                    print i, di
                        self.parameters.embeddings[i] -= 1.0 * embedding_learning_rate * di
                        if HYPERPARAMETERS["NORMALIZE_EMBEDDINGS"]:
                            to_normalize.add(i)
    
                    for (i, di) in zip([correct_repr, noise_repr], [dcorrect_scorebias, dnoise_scorebias]):
                        self.parameters.score_biases[i] -= 1.0 * embedding_learning_rate * di
    #                    print "REMOVEME", i, self.parameters.score_biases[i]
                else:
                    for (i, di) in zip(correct_sequence, dcorrect_inputs):
#                        assert di.shape[0] == 1
#                        di.resize(di.size)
    #                    print i, di
                        assert di.shape == (self.parameters.embedding_size,)
                        self.parameters.embeddings[i] -= 1.0 * embedding_learning_rate * di
                        if HYPERPARAMETERS["NORMALIZE_EMBEDDINGS"]:
                            to_normalize.add(i)
                    for (i, di) in zip(noise_sequence, dnoise_inputs):
#                        assert di.shape[0] == 1
#                        di.resize(di.size)
    #                    print i, di
                        assert di.shape == (self.parameters.embedding_size,)
                        self.parameters.embeddings[i] -= 1.0 * embedding_learning_rate * di
                        if HYPERPARAMETERS["NORMALIZE_EMBEDDINGS"]:
                            to_normalize.add(i)
    #                print to_normalize
    
        if len(to_normalize) > 0:
            to_normalize = [i for i in to_normalize]
#            print "NORMALIZING", to_normalize
            self.parameters.normalize(to_normalize)



    def predict(self, sequence):
        if LBL:
            targetrepr = sequence[-1:]
            sequence = sequence[:-1]
            (predictrepr, score) = graph.predict(self.embed(sequence), self.embed(targetrepr)[0], self.parameters.score_biases[targetrepr], self.parameters)
            return score
        else:
            (score) = graph.predict(self.embed(sequence), self.parameters)
            return score

    def verbose_predict(self, sequence):
        if LBL:
            assert 0
        else:
            (score, prehidden) = graph.verbose_predict(self.embed(sequence), self.parameters)
            return score, prehidden

    def validate(self, sequence):
        """
        Get the rank of this final word, as opposed to all other words in the vocabulary.
        """
        import random
        r = random.Random()
        r.seed(0)
        from hyperparameters import HYPERPARAMETERS

        import copy
        corrupt_sequence = copy.copy(sequence)
        rank = 1
        correct_score = self.predict(sequence)
#        print "CORRECT", correct_score, [wordmap.str(id) for id in sequence]
        for i in range(self.parameters.vocab_size):
            if r.random() > HYPERPARAMETERS["PERCENT OF NOISE EXAMPLES FOR VALIDATION LOGRANK"]: continue
            if i == sequence[-1]: continue
            corrupt_sequence[-1] = i
            corrupt_score = self.predict(corrupt_sequence)
            if correct_score <= corrupt_score:
#                print " CORRUPT", corrupt_score, [wordmap.str(id) for id in corrupt_sequence]
                rank += 1
        return rank
    #RELOAD previous model
    channel.save()

    err = dict([(trainsize, {}) for trainsize in VALIDATION_TRAININGSIZE])
    rebuildunsup(model, LR, NOISE_LVL, ACTIVATION_REGULARIZATION_COEFF,
                 WEIGHT_REGULARIZATION_COEFF, BATCHSIZE, train)

    epoch = 0
    if epoch in EPOCHSTEST:
        svm_validation(err, epoch, model, train, datatrain, datatrainsave,
                       datatest, datatestsave, VALIDATION_TRAININGSIZE,
                       VALIDATION_RUNS_FOR_EACH_TRAININGSIZE, PATH_SAVE,
                       PATH_DATA, NAME_DATATEST)
        channel.save()

    train_reconstruction_error_mvgavg = MovingAverage()
    for epoch in xrange(1, NEPOCHS + 1):
        time1 = time.time()
        state.currentepoch = epoch
        for filenb in xrange(1, NB_FILES + 1):
            print >> sys.stderr, "\t\tAbout to read file %s..." % percent(
                filenb, NB_FILES)
            print >> sys.stderr, "\t\t", stats()
            #                initial_file_time = time.time()
            f = open(PATH_DATA + NAME_DATA + '_%s.pkl' % filenb, 'r')
            object = numpy.asarray(cPickle.load(f), dtype=theano.config.floatX)
            print >> sys.stderr, "\t\t...read file %s" % percent(
                filenb, NB_FILES)
            print >> sys.stderr, "\t\t", stats()
            # The last training file is not of the same shape as the other training files.
            # So, to avoid a GPU memory error, we want to make sure it is the same size.
Exemplo n.º 5
0
    def __init__(self,
                 modelname="",
                 window_size=HYPERPARAMETERS["WINDOW_SIZE"],
                 vocab_size=vocabulary.wordmap().len,
                 embedding_size=HYPERPARAMETERS["EMBEDDING_SIZE"],
                 hidden_size=HYPERPARAMETERS["HIDDEN_SIZE"],
                 seed=miscglobals.RANDOMSEED,
                 initial_embeddings=None,
                 two_hidden_layers=HYPERPARAMETERS["TWO_HIDDEN_LAYERS"]):
        self.modelname = modelname
        self.parameters = Parameters(window_size, vocab_size, embedding_size,
                                     hidden_size, seed, initial_embeddings,
                                     two_hidden_layers)
        if LBL:
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases
            graph.score_biases = self.parameters.score_biases
        else:
            graph.hidden_weights = self.parameters.hidden_weights
            graph.hidden_biases = self.parameters.hidden_biases
            if self.parameters.two_hidden_layers:
                graph.hidden2_weights = self.parameters.hidden2_weights
                graph.hidden2_biases = self.parameters.hidden2_biases
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases

#        (self.graph_train, self.graph_predict, self.graph_verbose_predict) = graph.functions(self.parameters)
        import sets
        self.train_loss = MovingAverage()
        self.train_err = MovingAverage()
        self.train_lossnonzero = MovingAverage()
        self.train_squashloss = MovingAverage()
        self.train_unpenalized_loss = MovingAverage()
        self.train_l1penalty = MovingAverage()
        self.train_unpenalized_lossnonzero = MovingAverage()
        self.train_correct_score = MovingAverage()
        self.train_noise_score = MovingAverage()
        self.train_cnt = 0
Exemplo n.º 6
0
class Model:
    """
    A Model can:

    @type parameters: L{Parameters}
    @todo: Document
    """

    import hyperparameters
    import miscglobals
    import vocabulary

    def __init__(self,
                 modelname="",
                 window_size=HYPERPARAMETERS["WINDOW_SIZE"],
                 vocab_size=vocabulary.wordmap().len,
                 embedding_size=HYPERPARAMETERS["EMBEDDING_SIZE"],
                 hidden_size=HYPERPARAMETERS["HIDDEN_SIZE"],
                 seed=miscglobals.RANDOMSEED,
                 initial_embeddings=None,
                 two_hidden_layers=HYPERPARAMETERS["TWO_HIDDEN_LAYERS"]):
        self.modelname = modelname
        self.parameters = Parameters(window_size, vocab_size, embedding_size,
                                     hidden_size, seed, initial_embeddings,
                                     two_hidden_layers)
        if LBL:
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases
            graph.score_biases = self.parameters.score_biases
        else:
            graph.hidden_weights = self.parameters.hidden_weights
            graph.hidden_biases = self.parameters.hidden_biases
            if self.parameters.two_hidden_layers:
                graph.hidden2_weights = self.parameters.hidden2_weights
                graph.hidden2_biases = self.parameters.hidden2_biases
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases

#        (self.graph_train, self.graph_predict, self.graph_verbose_predict) = graph.functions(self.parameters)
        import sets
        self.train_loss = MovingAverage()
        self.train_err = MovingAverage()
        self.train_lossnonzero = MovingAverage()
        self.train_squashloss = MovingAverage()
        self.train_unpenalized_loss = MovingAverage()
        self.train_l1penalty = MovingAverage()
        self.train_unpenalized_lossnonzero = MovingAverage()
        self.train_correct_score = MovingAverage()
        self.train_noise_score = MovingAverage()
        self.train_cnt = 0

    def __getstate__(self):
        return (self.modelname, self.parameters, self.train_loss,
                self.train_err, self.train_lossnonzero, self.train_squashloss,
                self.train_unpenalized_loss, self.train_l1penalty,
                self.train_unpenalized_lossnonzero, self.train_correct_score,
                self.train_noise_score, self.train_cnt)

    def __setstate__(self, state):
        (self.modelname, self.parameters, self.train_loss, self.train_err,
         self.train_lossnonzero, self.train_squashloss,
         self.train_unpenalized_loss, self.train_l1penalty,
         self.train_unpenalized_lossnonzero, self.train_correct_score,
         self.train_noise_score, self.train_cnt) = state
        if LBL:
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases
            graph.score_biases = self.parameters.score_biases
        else:
            graph.hidden_weights = self.parameters.hidden_weights
            graph.hidden_biases = self.parameters.hidden_biases
            if self.parameters.two_hidden_layers:
                graph.hidden2_weights = self.parameters.hidden2_weights
                graph.hidden2_biases = self.parameters.hidden2_biases
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases

#    def load(self, filename):
#        sys.stderr.write("Loading model from: %s\n" % filename)
#        f = myopen(filename, "rb")
#        (self.parameters, self.train_loss, self.train_err, self.train_lossnonzero, self.train_squashloss, self.train_unpenalized_loss, self.train_l1penalty, self.train_unpenalized_lossnonzero, self.train_correct_score, self.train_noise_score, self.train_cnt) = pickle.load(f)
#        if LBL:
#            graph.output_weights = self.parameters.output_weights
#            graph.output_biases = self.parameters.output_biases
#            graph.score_biases = self.parameters.score_biases
#        else:
#            graph.hidden_weights = self.parameters.hidden_weights
#            graph.hidden_biases = self.parameters.hidden_biases
#            graph.output_weights = self.parameters.output_weights
#            graph.output_biases = self.parameters.output_biases
#
#    def save(self, filename):
#        sys.stderr.write("Saving model to: %s\n" % filename)
#        f = myopen(filename, "wb")
#        pickle.dump((self.parameters, self.train_loss, self.train_err, self.train_lossnonzero, self.train_squashloss, self.train_unpenalized_loss, self.train_l1penalty, self.train_unpenalized_lossnonzero, self.train_correct_score, self.train_noise_score, self.train_cnt), f)

    def embed(self, sequence):
        """
        Embed a sequence of vocabulary IDs
        """
        seq = [self.parameters.embeddings[s] for s in sequence]
        import numpy
        return [numpy.resize(s, (1, s.size)) for s in seq]
#        return [self.parameters.embeddings[s] for s in sequence]

    def embeds(self, sequences):
        """
        Embed sequences of vocabulary IDs.
        If we are given a list of MINIBATCH lists of SEQLEN items, return a list of SEQLEN matrices of shape (MINIBATCH, EMBSIZE)
        """
        embs = []
        for sequence in sequences:
            embs.append(self.embed(sequence))

        for emb in embs:
            assert len(emb) == len(embs[0])

        new_embs = []
        for i in range(len(embs[0])):
            colembs = [embs[j][i] for j in range(len(embs))]
            import numpy
            new_embs.append(numpy.vstack(colembs))
            assert new_embs[-1].shape == (len(sequences),
                                          self.parameters.embedding_size)
        assert len(new_embs) == len(sequences[0])
        return new_embs

    def train(self, correct_sequences, noise_sequences, weights):
        from hyperparameters import HYPERPARAMETERS
        learning_rate = HYPERPARAMETERS["LEARNING_RATE"]

        # All weights must be the same, because of how we use a scalar learning rate
        assert HYPERPARAMETERS["UNIFORM EXAMPLE WEIGHTS"]
        if HYPERPARAMETERS["UNIFORM EXAMPLE WEIGHTS"]:
            for w in weights:
                assert w == weights[0]

        if LBL:
            # REWRITE FOR MINIBATCH
            assert 0

            #            noise_repr = noise_sequence[-1]
            #            correct_repr = correct_sequence[-1]
            noise_repr = noise_sequence[-1:]
            correct_repr = correct_sequence[-1:]
            assert noise_repr != correct_repr
            assert noise_sequence[:-1] == correct_sequence[:-1]
            sequence = correct_sequence[:-1]
            #            r = graph.train(self.embed(sequence), self.embed([correct_repr])[0], self.embed([noise_repr])[0], self.parameters.score_biases[correct_repr], self.parameters.score_biases[noise_repr])
            r = graph.train(self.embed(sequence),
                            self.embed(correct_repr)[0],
                            self.embed(noise_repr)[0],
                            self.parameters.score_biases[correct_repr],
                            self.parameters.score_biases[noise_repr],
                            learning_rate * weight)
            assert len(noise_repr) == 1
            assert len(correct_repr) == 1
            noise_repr = noise_repr[0]
            correct_repr = correct_repr[0]
            (loss, predictrepr, correct_score, noise_score, dsequence,
             dcorrect_repr, dnoise_repr, dcorrect_scorebias,
             dnoise_scorebias) = r
#            print
#            print "loss = ", loss
#            print "predictrepr = ", predictrepr
#            print "correct_repr = ", correct_repr, self.embed(correct_repr)[0]
#            print "noise_repr = ", noise_repr, self.embed(noise_repr)[0]
#            print "correct_score = ", correct_score
#            print "noise_score = ", noise_score
        else:
            r = graph.train(self.embeds(correct_sequences),
                            self.embeds(noise_sequences),
                            learning_rate * weights[0])
            if HYPERPARAMETERS["EMBEDDING_LEARNING_RATE"] != 0:
                (dcorrect_inputss, dnoise_inputss, losss, unpenalized_losss,
                 l1penaltys, correct_scores, noise_scores) = r
            else:
                (losss, unpenalized_losss, l1penaltys, correct_scores,
                 noise_scores) = r
#            print [d.shape for d in dcorrect_inputss]
#            print [d.shape for d in dnoise_inputss]
#            print "losss", losss.shape, losss
#            print "unpenalized_losss", unpenalized_losss.shape, unpenalized_losss
#            print "l1penaltys", l1penaltys.shape, l1penaltys
#            print "correct_scores", correct_scores.shape, correct_scores
#            print "noise_scores", noise_scores.shape, noise_scores

        import sets
        to_normalize = sets.Set()
        for ecnt in range(len(correct_sequences)):
            (loss, unpenalized_loss, correct_score, noise_score) = \
                (losss[ecnt], unpenalized_losss[ecnt], correct_scores[ecnt], noise_scores[ecnt])
            if l1penaltys.shape == ():
                assert l1penaltys == 0
                l1penalty = 0
            else:
                l1penalty = l1penaltys[ecnt]
            correct_sequence = correct_sequences[ecnt]
            noise_sequence = noise_sequences[ecnt]

            if HYPERPARAMETERS["EMBEDDING_LEARNING_RATE"] != 0:
                dcorrect_inputs = [d[ecnt] for d in dcorrect_inputss]
                dnoise_inputs = [d[ecnt] for d in dnoise_inputss]

#            print [d.shape for d in dcorrect_inputs]
#            print [d.shape for d in dnoise_inputs]
#            print "loss", loss.shape, loss
#            print "unpenalized_loss", unpenalized_loss.shape, unpenalized_loss
#            print "l1penalty", l1penalty.shape, l1penalty
#            print "correct_score", correct_score.shape, correct_score
#            print "noise_score", noise_score.shape, noise_score

            self.train_loss.add(loss)
            self.train_err.add(correct_score <= noise_score)
            self.train_lossnonzero.add(loss > 0)
            squashloss = 1. / (1. + math.exp(-loss))
            self.train_squashloss.add(squashloss)
            if not LBL:
                self.train_unpenalized_loss.add(unpenalized_loss)
                self.train_l1penalty.add(l1penalty)
                self.train_unpenalized_lossnonzero.add(unpenalized_loss > 0)
            self.train_correct_score.add(correct_score)
            self.train_noise_score.add(noise_score)

            self.train_cnt += 1
            if self.train_cnt % 10000 == 0:
                #        if self.train_cnt % 1000 == 0:
                #            print self.train_cnt
                #                graph.COMPILE_MODE.print_summary()
                logging.info(
                    ("After %d updates, pre-update train loss %s" %
                     (self.train_cnt, self.train_loss.verbose_string())))
                logging.info(
                    ("After %d updates, pre-update train error %s" %
                     (self.train_cnt, self.train_err.verbose_string())))
                logging.info((
                    "After %d updates, pre-update train Pr(loss != 0) %s" %
                    (self.train_cnt, self.train_lossnonzero.verbose_string())))
                logging.info(
                    ("After %d updates, pre-update train squash(loss) %s" %
                     (self.train_cnt, self.train_squashloss.verbose_string())))
                if not LBL:
                    logging.info((
                        "After %d updates, pre-update train unpenalized loss %s"
                        % (self.train_cnt,
                           self.train_unpenalized_loss.verbose_string())))
                    logging.info(
                        ("After %d updates, pre-update train l1penalty %s" %
                         (self.train_cnt,
                          self.train_l1penalty.verbose_string())))
                    logging.info((
                        "After %d updates, pre-update train Pr(unpenalized loss != 0) %s"
                        %
                        (self.train_cnt,
                         self.train_unpenalized_lossnonzero.verbose_string())))
                logging.info(
                    ("After %d updates, pre-update train correct score %s" %
                     (self.train_cnt,
                      self.train_correct_score.verbose_string())))
                logging.info((
                    "After %d updates, pre-update train noise score %s" %
                    (self.train_cnt, self.train_noise_score.verbose_string())))

                self.debug_prehidden_values(correct_sequences)

                if LBL:
                    i = 1.
                    while i < wordmap.len:
                        inti = int(i)
                        str = "word %s, rank %d, score %f" % (wordmap.str(
                            inti), inti, self.parameters.score_biases[inti])
                        logging.info("After %d updates, score biases: %s" %
                                     (self.train_cnt, str))
                        i *= 3.2

    #            print(("After %d updates, pre-update train loss %s" % (self.train_cnt, self.train_loss.verbose_string())))
    #            print(("After %d updates, pre-update train error %s" % (self.train_cnt, self.train_err.verbose_string())))

    # All weights must be the same, because of how we use a scalar learning rate
            assert HYPERPARAMETERS["UNIFORM EXAMPLE WEIGHTS"]
            if HYPERPARAMETERS["UNIFORM EXAMPLE WEIGHTS"]:
                for w in weights:
                    assert w == weights[0]
            embedding_learning_rate = HYPERPARAMETERS[
                "EMBEDDING_LEARNING_RATE"] * weights[0]
            if loss == 0:
                if LBL:
                    for di in dsequence + [dcorrect_repr, dnoise_repr]:
                        # This tends to trigger if training diverges (NaN)
                        assert (di == 0).all()
    #                if not (di == 0).all():
    #                    print "WARNING:", di
    #                    print "WARNING in ", dsequence + [dcorrect_repr, dnoise_repr]
    #                    print "loss = ", loss
    #                    print "predictrepr = ", predictrepr
    #                    print "correct_repr = ", correct_repr, self.embed(correct_repr)[0]
    #                    print "noise_repr = ", noise_repr, self.embed(noise_repr)[0]
    #                    print "correct_score = ", correct_score
    #                    print "noise_score = ", noise_score
                else:
                    if HYPERPARAMETERS["EMBEDDING_LEARNING_RATE"] != 0:
                        for di in dcorrect_inputs + dnoise_inputs:
                            assert (di == 0).all()

            if loss != 0:
                if LBL:
                    val = sequence + [correct_repr, noise_repr]
                    dval = dsequence + [dcorrect_repr, dnoise_repr]
                    #                print val
                    for (i, di) in zip(val, dval):
                        #                for (i, di) in zip(tuple(sequence + [correct_repr, noise_repr]), tuple(dsequence + [dcorrect_repr, dnoise_repr])):
                        assert di.shape[0] == 1
                        di.resize(di.size)
                        #                    print i, di
                        self.parameters.embeddings[
                            i] -= 1.0 * embedding_learning_rate * di
                        if HYPERPARAMETERS["NORMALIZE_EMBEDDINGS"]:
                            to_normalize.add(i)

                    for (i, di) in zip([correct_repr, noise_repr],
                                       [dcorrect_scorebias, dnoise_scorebias]):
                        self.parameters.score_biases[
                            i] -= 1.0 * embedding_learning_rate * di

    #                    print "REMOVEME", i, self.parameters.score_biases[i]
                else:
                    if HYPERPARAMETERS["EMBEDDING_LEARNING_RATE"] != 0:
                        for (i, di) in zip(correct_sequence, dcorrect_inputs):
                            #                        assert di.shape[0] == 1
                            #                        di.resize(di.size)
                            #                    print i, di
                            assert di.shape == (
                                self.parameters.embedding_size, )
                            self.parameters.embeddings[
                                i] -= 1.0 * embedding_learning_rate * di
                            if HYPERPARAMETERS["NORMALIZE_EMBEDDINGS"]:
                                to_normalize.add(i)
                        for (i, di) in zip(noise_sequence, dnoise_inputs):
                            #                        assert di.shape[0] == 1
                            #                        di.resize(di.size)
                            #                    print i, di
                            assert di.shape == (
                                self.parameters.embedding_size, )
                            self.parameters.embeddings[
                                i] -= 1.0 * embedding_learning_rate * di
                            if HYPERPARAMETERS["NORMALIZE_EMBEDDINGS"]:
                                to_normalize.add(i)
        #                print to_normalize

        if len(to_normalize) > 0:
            to_normalize = [i for i in to_normalize]
            #            print "NORMALIZING", to_normalize
            self.parameters.normalize(to_normalize)

    def predict(self, sequence):
        if LBL:
            targetrepr = sequence[-1:]
            sequence = sequence[:-1]
            (predictrepr,
             score) = graph.predict(self.embed(sequence),
                                    self.embed(targetrepr)[0],
                                    self.parameters.score_biases[targetrepr],
                                    self.parameters)
            return score
        else:
            (score) = graph.predict(self.embed(sequence), self.parameters)
            return score

    def verbose_predict(self, sequence):
        if LBL:
            assert 0
        else:
            (score, prehidden) = graph.verbose_predict(self.embed(sequence))
            return score, prehidden

    def debug_prehidden_values(self, sequences):
        """
        Give debug output on pre-squash hidden values.
        """
        import numpy
        for (i, ve) in enumerate(sequences):
            (score, prehidden) = self.verbose_predict(ve)
            abs_prehidden = numpy.abs(prehidden)
            med = numpy.median(abs_prehidden)
            abs_prehidden = abs_prehidden.tolist()
            assert len(abs_prehidden) == 1
            abs_prehidden = abs_prehidden[0]
            abs_prehidden.sort()
            abs_prehidden.reverse()

            logging.info("model %s, %s %s %s %s %s" %
                         (self.modelname, self.train_cnt,
                          "abs(pre-squash hidden) median =", med, "max =",
                          abs_prehidden[:3]))
            if i + 1 >= 3: break

    def validate(self, sequence):
        """
        Get the rank of this final word, as opposed to all other words in the vocabulary.
        """
        import random
        r = random.Random()
        r.seed(0)
        from hyperparameters import HYPERPARAMETERS

        import copy
        corrupt_sequence = copy.copy(sequence)
        rank = 1
        correct_score = self.predict(sequence)
        #        print "CORRECT", correct_score, [wordmap.str(id) for id in sequence]
        for i in range(self.parameters.vocab_size):
            if r.random() > HYPERPARAMETERS[
                    "PERCENT OF NOISE EXAMPLES FOR VALIDATION LOGRANK"]:
                continue
            if i == sequence[-1]: continue
            corrupt_sequence[-1] = i
            corrupt_score = self.predict(corrupt_sequence)
            if correct_score <= corrupt_score:
                #                print " CORRUPT", corrupt_score, [wordmap.str(id) for id in corrupt_sequence]
                rank += 1
        return rank

    def validate_errors(self, correct_sequences, noise_sequences):
        """
        Count the errors in this validation batch.
        """

        #            r = graph.train(self.embeds(correct_sequences), self.embeds(noise_sequences), learning_rate * weights[0])
        correct_scores = graph.predict(self.embeds(correct_sequences))
        noise_scores = graph.predict(self.embeds(noise_sequences))

        #        print correct_scores
        #        print noise_scores
        return correct_scores > noise_scores
Exemplo n.º 7
0
class Model:
    """
    A Model can:

    @type parameters: L{Parameters}
    @todo: Document
    """
    def __init__(self):
        self.parameters = Parameters()
        graph.hidden_weights = self.parameters.hidden_weights
        graph.hidden_biases = self.parameters.hidden_biases
        graph.output_weights = self.parameters.output_weights
        graph.output_biases = self.parameters.output_biases

        #        (self.graph_train, self.graph_predict, self.graph_verbose_predict) = graph.functions(self.parameters)
        import sets
        self.train_loss = MovingAverage()
        self.train_err = MovingAverage()
        self.train_lossnonzero = MovingAverage()
        self.train_squashloss = MovingAverage()
        self.train_unpenalized_loss = MovingAverage()
        self.train_l1penalty = MovingAverage()
        self.train_unpenalized_lossnonzero = MovingAverage()
        self.train_correct_score = MovingAverage()
        self.train_noise_score = MovingAverage()
        self.train_cnt = 0

    def __getstate__(self):
        return (self.parameters, self.train_loss, self.train_err,
                self.train_lossnonzero, self.train_squashloss,
                self.train_unpenalized_loss, self.train_l1penalty,
                self.train_unpenalized_lossnonzero, self.train_correct_score,
                self.train_noise_score, self.train_cnt)

    def __setstate__(self, state):
        (self.parameters, self.train_loss, self.train_err,
         self.train_lossnonzero, self.train_squashloss,
         self.train_unpenalized_loss, self.train_l1penalty,
         self.train_unpenalized_lossnonzero, self.train_correct_score,
         self.train_noise_score, self.train_cnt) = state

        graph.hidden_weights = self.parameters.hidden_weights
        graph.hidden_biases = self.parameters.hidden_biases
        graph.output_weights = self.parameters.output_weights
        graph.output_biases = self.parameters.output_biases

    def embed(self, sequence):
        """
        Embed a sequence of vocabulary IDs
        """
        seq = [self.parameters.embeddings[s] for s in sequence]
        import numpy
        return [numpy.resize(s, (1, s.size)) for s in seq]

    def embeds(self, sequences):
        """
        Embed sequences of vocabulary IDs.
        If we are given a list of MINIBATCH lists of SEQLEN items, return a list of SEQLEN matrices of shape (MINIBATCH, EMBSIZE)
        """
        embs = []
        for sequence in sequences:
            embs.append(self.embed(sequence))

        for emb in embs:
            assert len(emb) == len(embs[0])

        new_embs = []
        for i in range(len(embs[0])):
            colembs = [embs[j][i] for j in range(len(embs))]
            import numpy
            new_embs.append(numpy.vstack(colembs))
            assert new_embs[-1].shape == (len(sequences),
                                          self.parameters.embedding_size)
        assert len(new_embs) == len(sequences[0])
        return new_embs

    def corrupt_example(self, e):
        """
        Return a corrupted version of example e, plus the weight of this example.
        """
        from hyperparameters import HYPERPARAMETERS
        import random
        import copy
        e = copy.copy(e)
        last = e[-1]
        cnt = 0
        while e[-1] == last:
            e[-1] = random.randint(0, self.parameters.vocab_size - 1)
            pr = 1. / self.parameters.vocab_size
            cnt += 1
            # Backoff to 0gram smoothing if we fail 10 times to get noise.
            if cnt > 10:
                e[-1] = random.randint(0, self.parameters.vocab_size - 1)
        weight = 1. / pr
        return e, weight

    def corrupt_examples(self, correct_sequences):
        noise_sequences = []
        weights = []
        for e in correct_sequences:
            noise_sequence, weight = self.corrupt_example(e)
            noise_sequences.append(noise_sequence)
            weights.append(weight)
        return noise_sequences, weights

    def train(self, correct_sequences):
        from hyperparameters import HYPERPARAMETERS
        learning_rate = HYPERPARAMETERS["LEARNING_RATE"]

        noise_sequences, weights = self.corrupt_examples(correct_sequences)
        # All weights must be the same, if we first multiply by the learning rate
        for w in weights:
            assert w == weights[0]

        r = graph.train(self.embeds(correct_sequences),
                        self.embeds(noise_sequences),
                        learning_rate * weights[0])
        (dcorrect_inputss, dnoise_inputss, losss, unpenalized_losss,
         l1penaltys, correct_scores, noise_scores) = r
        #            print [d.shape for d in dcorrect_inputss]
        #            print [d.shape for d in dnoise_inputss]
        #            print "losss", losss.shape, losss
        #            print "unpenalized_losss", unpenalized_losss.shape, unpenalized_losss
        #            print "l1penaltys", l1penaltys.shape, l1penaltys
        #            print "correct_scores", correct_scores.shape, correct_scores
        #            print "noise_scores", noise_scores.shape, noise_scores

        import sets
        to_normalize = sets.Set()
        for ecnt in range(len(correct_sequences)):
            (loss, unpenalized_loss, correct_score, noise_score) = \
                (losss[ecnt], unpenalized_losss[ecnt], correct_scores[ecnt], noise_scores[ecnt])
            if l1penaltys.shape == ():
                assert l1penaltys == 0
                l1penalty = 0
            else:
                l1penalty = l1penaltys[ecnt]
            correct_sequence = correct_sequences[ecnt]
            noise_sequence = noise_sequences[ecnt]

            dcorrect_inputs = [d[ecnt] for d in dcorrect_inputss]
            dnoise_inputs = [d[ecnt] for d in dnoise_inputss]

            #            print [d.shape for d in dcorrect_inputs]
            #            print [d.shape for d in dnoise_inputs]
            #            print "loss", loss.shape, loss
            #            print "unpenalized_loss", unpenalized_loss.shape, unpenalized_loss
            #            print "l1penalty", l1penalty.shape, l1penalty
            #            print "correct_score", correct_score.shape, correct_score
            #            print "noise_score", noise_score.shape, noise_score

            self.train_loss.add(loss)
            self.train_err.add(correct_score <= noise_score)
            self.train_lossnonzero.add(loss > 0)
            squashloss = 1. / (1. + math.exp(-loss))
            self.train_squashloss.add(squashloss)

            self.train_unpenalized_loss.add(unpenalized_loss)
            self.train_l1penalty.add(l1penalty)
            self.train_unpenalized_lossnonzero.add(unpenalized_loss > 0)
            self.train_correct_score.add(correct_score)
            self.train_noise_score.add(noise_score)

            self.train_cnt += 1
            if self.train_cnt % 10000 == 0:
                #        if self.train_cnt % 1000 == 0:
                #            print self.train_cnt
                #                graph.COMPILE_MODE.print_summary()
                logging.info(
                    ("After %d updates, pre-update train loss %s" %
                     (self.train_cnt, self.train_loss.verbose_string())))
                logging.info(
                    ("After %d updates, pre-update train error %s" %
                     (self.train_cnt, self.train_err.verbose_string())))
                logging.info((
                    "After %d updates, pre-update train Pr(loss != 0) %s" %
                    (self.train_cnt, self.train_lossnonzero.verbose_string())))
                logging.info(
                    ("After %d updates, pre-update train squash(loss) %s" %
                     (self.train_cnt, self.train_squashloss.verbose_string())))

                logging.info(
                    ("After %d updates, pre-update train unpenalized loss %s" %
                     (self.train_cnt,
                      self.train_unpenalized_loss.verbose_string())))
                logging.info(
                    ("After %d updates, pre-update train l1penalty %s" %
                     (self.train_cnt, self.train_l1penalty.verbose_string())))
                logging.info((
                    "After %d updates, pre-update train Pr(unpenalized loss != 0) %s"
                    % (self.train_cnt,
                       self.train_unpenalized_lossnonzero.verbose_string())))
                logging.info(
                    ("After %d updates, pre-update train correct score %s" %
                     (self.train_cnt,
                      self.train_correct_score.verbose_string())))
                logging.info((
                    "After %d updates, pre-update train noise score %s" %
                    (self.train_cnt, self.train_noise_score.verbose_string())))

            for w in weights:
                assert w == weights[0]
            embedding_learning_rate = HYPERPARAMETERS[
                "EMBEDDING_LEARNING_RATE"] * weights[0]
            if loss == 0:
                for di in dcorrect_inputs + dnoise_inputs:
                    assert (di == 0).all()

            if loss != 0:
                for (i, di) in zip(correct_sequence, dcorrect_inputs):
                    #                        assert di.shape[0] == 1
                    #                        di.resize(di.size)
                    #                    print i, di
                    assert di.shape == (self.parameters.embedding_size, )
                    self.parameters.embeddings[
                        i] -= 1.0 * embedding_learning_rate * di
                    if HYPERPARAMETERS["NORMALIZE_EMBEDDINGS"]:
                        to_normalize.add(i)
                for (i, di) in zip(noise_sequence, dnoise_inputs):
                    #                        assert di.shape[0] == 1
                    #                        di.resize(di.size)
                    #                    print i, di
                    assert di.shape == (self.parameters.embedding_size, )
                    self.parameters.embeddings[
                        i] -= 1.0 * embedding_learning_rate * di
                    if HYPERPARAMETERS["NORMALIZE_EMBEDDINGS"]:
                        to_normalize.add(i)
#                print to_normalize

        if len(to_normalize) > 0:
            to_normalize = [i for i in to_normalize]
            #            print "NORMALIZING", to_normalize
            self.parameters.normalize(to_normalize)

    def predict(self, sequence):
        (score) = graph.predict(self.embed(sequence), self.parameters)
        return score

    def verbose_predict(self, sequence):
        (score, prehidden) = graph.verbose_predict(self.embed(sequence),
                                                   self.parameters)
        return score, prehidden

    def validate(self, sequence):
        """
        Get the rank of this final word, as opposed to all other words in the vocabulary.
        """
        import random
        r = random.Random()
        r.seed(0)
        from hyperparameters import HYPERPARAMETERS

        import copy
        corrupt_sequence = copy.copy(sequence)
        rank = 1
        correct_score = self.predict(sequence)
        #        print "CORRECT", correct_score, [wordmap.str(id) for id in sequence]
        for i in range(self.parameters.vocab_size):
            if r.random() > HYPERPARAMETERS[
                    "PERCENT OF NOISE EXAMPLES FOR VALIDATION LOGRANK"]:
                continue
            if i == sequence[-1]: continue
            corrupt_sequence[-1] = i
            corrupt_score = self.predict(corrupt_sequence)
            if correct_score <= corrupt_score:
                #                print " CORRUPT", corrupt_score, [wordmap.str(id) for id in corrupt_sequence]
                rank += 1
        return rank
Exemplo n.º 8
0
class Model:
    """
    A Model can:

    @type parameters: L{Parameters}
    @todo: Document
    """

    import hyperparameters
    import miscglobals
    import vocabulary
    def __init__(self, modelname="", window_size=HYPERPARAMETERS["WINDOW_SIZE"], vocab_size=vocabulary.wordmap().len, embedding_size=HYPERPARAMETERS["EMBEDDING_SIZE"], hidden_size=HYPERPARAMETERS["HIDDEN_SIZE"], seed=miscglobals.RANDOMSEED, initial_embeddings=None, two_hidden_layers=HYPERPARAMETERS["TWO_HIDDEN_LAYERS"]):
        self.modelname = modelname
        self.parameters = Parameters(window_size, vocab_size, embedding_size, hidden_size, seed, initial_embeddings, two_hidden_layers)
        if LBL:
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases
            graph.score_biases = self.parameters.score_biases
        else:
            graph.hidden_weights = self.parameters.hidden_weights
            graph.hidden_biases = self.parameters.hidden_biases
            if self.parameters.two_hidden_layers:
                graph.hidden2_weights = self.parameters.hidden2_weights
                graph.hidden2_biases = self.parameters.hidden2_biases
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases

#        (self.graph_train, self.graph_predict, self.graph_verbose_predict) = graph.functions(self.parameters)
        import sets
        self.train_loss = MovingAverage()
        self.train_err = MovingAverage()
        self.train_lossnonzero = MovingAverage()
        self.train_squashloss = MovingAverage()
        self.train_unpenalized_loss = MovingAverage()
        self.train_l1penalty = MovingAverage()
        self.train_unpenalized_lossnonzero = MovingAverage()
        self.train_correct_score = MovingAverage()
        self.train_noise_score = MovingAverage()
        self.train_cnt = 0

    def __getstate__(self):
        return (self.modelname, self.parameters, self.train_loss, self.train_err, self.train_lossnonzero, self.train_squashloss, self.train_unpenalized_loss, self.train_l1penalty, self.train_unpenalized_lossnonzero, self.train_correct_score, self.train_noise_score, self.train_cnt)

    def __setstate__(self, state):
        (self.modelname, self.parameters, self.train_loss, self.train_err, self.train_lossnonzero, self.train_squashloss, self.train_unpenalized_loss, self.train_l1penalty, self.train_unpenalized_lossnonzero, self.train_correct_score, self.train_noise_score, self.train_cnt) = state
        if LBL:
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases
            graph.score_biases = self.parameters.score_biases
        else:
            graph.hidden_weights = self.parameters.hidden_weights
            graph.hidden_biases = self.parameters.hidden_biases
            if self.parameters.two_hidden_layers:
                graph.hidden2_weights = self.parameters.hidden2_weights
                graph.hidden2_biases = self.parameters.hidden2_biases
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases

#    def load(self, filename):
#        sys.stderr.write("Loading model from: %s\n" % filename)
#        f = myopen(filename, "rb")
#        (self.parameters, self.train_loss, self.train_err, self.train_lossnonzero, self.train_squashloss, self.train_unpenalized_loss, self.train_l1penalty, self.train_unpenalized_lossnonzero, self.train_correct_score, self.train_noise_score, self.train_cnt) = pickle.load(f)
#        if LBL:
#            graph.output_weights = self.parameters.output_weights
#            graph.output_biases = self.parameters.output_biases
#            graph.score_biases = self.parameters.score_biases
#        else:
#            graph.hidden_weights = self.parameters.hidden_weights
#            graph.hidden_biases = self.parameters.hidden_biases
#            graph.output_weights = self.parameters.output_weights
#            graph.output_biases = self.parameters.output_biases
#
#    def save(self, filename):
#        sys.stderr.write("Saving model to: %s\n" % filename)
#        f = myopen(filename, "wb")
#        pickle.dump((self.parameters, self.train_loss, self.train_err, self.train_lossnonzero, self.train_squashloss, self.train_unpenalized_loss, self.train_l1penalty, self.train_unpenalized_lossnonzero, self.train_correct_score, self.train_noise_score, self.train_cnt), f)

    def embed(self, sequence):
        """
        Embed a sequence of vocabulary IDs
        """
        seq = [self.parameters.embeddings[s] for s in sequence]
        import numpy
        return [numpy.resize(s, (1, s.size)) for s in seq]
#        return [self.parameters.embeddings[s] for s in sequence]

    def embeds(self, sequences):
        """
        Embed sequences of vocabulary IDs.
        If we are given a list of MINIBATCH lists of SEQLEN items, return a list of SEQLEN matrices of shape (MINIBATCH, EMBSIZE)
        """
        embs = []
        for sequence in sequences:
            embs.append(self.embed(sequence))

        for emb in embs: assert len(emb) == len(embs[0])

        new_embs = []
        for i in range(len(embs[0])):
            colembs = [embs[j][i] for j in range(len(embs))]
            import numpy
            new_embs.append(numpy.vstack(colembs))
            assert new_embs[-1].shape == (len(sequences), self.parameters.embedding_size)
        assert len(new_embs) == len(sequences[0])
        return new_embs

    def train(self, correct_sequences, noise_sequences, weights):
        from hyperparameters import HYPERPARAMETERS
        learning_rate = HYPERPARAMETERS["LEARNING_RATE"]

        # All weights must be the same, because of how we use a scalar learning rate
        assert HYPERPARAMETERS["UNIFORM EXAMPLE WEIGHTS"]
        if HYPERPARAMETERS["UNIFORM EXAMPLE WEIGHTS"]:
            for w in weights: assert w == weights[0]

        if LBL:
            # REWRITE FOR MINIBATCH
            assert 0

#            noise_repr = noise_sequence[-1]
#            correct_repr = correct_sequence[-1]
            noise_repr = noise_sequence[-1:]
            correct_repr = correct_sequence[-1:]
            assert noise_repr != correct_repr
            assert noise_sequence[:-1] == correct_sequence[:-1]
            sequence = correct_sequence[:-1]
#            r = graph.train(self.embed(sequence), self.embed([correct_repr])[0], self.embed([noise_repr])[0], self.parameters.score_biases[correct_repr], self.parameters.score_biases[noise_repr])
            r = graph.train(self.embed(sequence), self.embed(correct_repr)[0], self.embed(noise_repr)[0], self.parameters.score_biases[correct_repr], self.parameters.score_biases[noise_repr], learning_rate * weight)
            assert len(noise_repr) == 1
            assert len(correct_repr) == 1
            noise_repr = noise_repr[0]
            correct_repr = correct_repr[0]
            (loss, predictrepr, correct_score, noise_score, dsequence, dcorrect_repr, dnoise_repr, dcorrect_scorebias, dnoise_scorebias) = r
#            print
#            print "loss = ", loss
#            print "predictrepr = ", predictrepr
#            print "correct_repr = ", correct_repr, self.embed(correct_repr)[0]
#            print "noise_repr = ", noise_repr, self.embed(noise_repr)[0]
#            print "correct_score = ", correct_score
#            print "noise_score = ", noise_score
        else:
            r = graph.train(self.embeds(correct_sequences), self.embeds(noise_sequences), learning_rate * weights[0])
            if HYPERPARAMETERS["EMBEDDING_LEARNING_RATE"] != 0:
                (dcorrect_inputss, dnoise_inputss, losss, unpenalized_losss, l1penaltys, correct_scores, noise_scores) = r
            else:
                (losss, unpenalized_losss, l1penaltys, correct_scores, noise_scores) = r
#            print [d.shape for d in dcorrect_inputss]
#            print [d.shape for d in dnoise_inputss]
#            print "losss", losss.shape, losss
#            print "unpenalized_losss", unpenalized_losss.shape, unpenalized_losss
#            print "l1penaltys", l1penaltys.shape, l1penaltys
#            print "correct_scores", correct_scores.shape, correct_scores
#            print "noise_scores", noise_scores.shape, noise_scores

        import sets
        to_normalize = sets.Set()
        for ecnt in range(len(correct_sequences)):
            (loss, unpenalized_loss, correct_score, noise_score) = \
                (losss[ecnt], unpenalized_losss[ecnt], correct_scores[ecnt], noise_scores[ecnt])
            if l1penaltys.shape == ():
                assert l1penaltys == 0
                l1penalty = 0
            else:
                l1penalty = l1penaltys[ecnt]
            correct_sequence = correct_sequences[ecnt]
            noise_sequence = noise_sequences[ecnt]

            if HYPERPARAMETERS["EMBEDDING_LEARNING_RATE"] != 0:
                dcorrect_inputs = [d[ecnt] for d in dcorrect_inputss]
                dnoise_inputs = [d[ecnt] for d in dnoise_inputss]

#            print [d.shape for d in dcorrect_inputs]
#            print [d.shape for d in dnoise_inputs]
#            print "loss", loss.shape, loss
#            print "unpenalized_loss", unpenalized_loss.shape, unpenalized_loss
#            print "l1penalty", l1penalty.shape, l1penalty
#            print "correct_score", correct_score.shape, correct_score
#            print "noise_score", noise_score.shape, noise_score


            self.train_loss.add(loss)
            self.train_err.add(correct_score <= noise_score)
            self.train_lossnonzero.add(loss > 0)
            squashloss = 1./(1.+math.exp(-loss))
            self.train_squashloss.add(squashloss)
            if not LBL:
                self.train_unpenalized_loss.add(unpenalized_loss)
                self.train_l1penalty.add(l1penalty)
                self.train_unpenalized_lossnonzero.add(unpenalized_loss > 0)
            self.train_correct_score.add(correct_score)
            self.train_noise_score.add(noise_score)
    
            self.train_cnt += 1
            if self.train_cnt % 10000 == 0:
    #        if self.train_cnt % 1000 == 0:
    #            print self.train_cnt
#                graph.COMPILE_MODE.print_summary()
                logging.info(("After %d updates, pre-update train loss %s" % (self.train_cnt, self.train_loss.verbose_string())))
                logging.info(("After %d updates, pre-update train error %s" % (self.train_cnt, self.train_err.verbose_string())))
                logging.info(("After %d updates, pre-update train Pr(loss != 0) %s" % (self.train_cnt, self.train_lossnonzero.verbose_string())))
                logging.info(("After %d updates, pre-update train squash(loss) %s" % (self.train_cnt, self.train_squashloss.verbose_string())))
                if not LBL:
                    logging.info(("After %d updates, pre-update train unpenalized loss %s" % (self.train_cnt, self.train_unpenalized_loss.verbose_string())))
                    logging.info(("After %d updates, pre-update train l1penalty %s" % (self.train_cnt, self.train_l1penalty.verbose_string())))
                    logging.info(("After %d updates, pre-update train Pr(unpenalized loss != 0) %s" % (self.train_cnt, self.train_unpenalized_lossnonzero.verbose_string())))
                logging.info(("After %d updates, pre-update train correct score %s" % (self.train_cnt, self.train_correct_score.verbose_string())))
                logging.info(("After %d updates, pre-update train noise score %s" % (self.train_cnt, self.train_noise_score.verbose_string())))

                self.debug_prehidden_values(correct_sequences)
    
                if LBL:
                    i = 1.
                    while i < wordmap.len:
                        inti = int(i)
                        str = "word %s, rank %d, score %f" % (wordmap.str(inti), inti, self.parameters.score_biases[inti])
                        logging.info("After %d updates, score biases: %s" % (self.train_cnt, str))
                        i *= 3.2
    
    #            print(("After %d updates, pre-update train loss %s" % (self.train_cnt, self.train_loss.verbose_string())))
    #            print(("After %d updates, pre-update train error %s" % (self.train_cnt, self.train_err.verbose_string())))
    

            # All weights must be the same, because of how we use a scalar learning rate
            assert HYPERPARAMETERS["UNIFORM EXAMPLE WEIGHTS"]
            if HYPERPARAMETERS["UNIFORM EXAMPLE WEIGHTS"]:
                for w in weights: assert w == weights[0]
            embedding_learning_rate = HYPERPARAMETERS["EMBEDDING_LEARNING_RATE"] * weights[0]
            if loss == 0:
                if LBL:
                    for di in dsequence + [dcorrect_repr, dnoise_repr]:
                        # This tends to trigger if training diverges (NaN)
                        assert (di == 0).all()
    #                if not (di == 0).all():
    #                    print "WARNING:", di
    #                    print "WARNING in ", dsequence + [dcorrect_repr, dnoise_repr]
    #                    print "loss = ", loss
    #                    print "predictrepr = ", predictrepr
    #                    print "correct_repr = ", correct_repr, self.embed(correct_repr)[0]
    #                    print "noise_repr = ", noise_repr, self.embed(noise_repr)[0]
    #                    print "correct_score = ", correct_score
    #                    print "noise_score = ", noise_score
                else:
                    if HYPERPARAMETERS["EMBEDDING_LEARNING_RATE"] != 0:
                        for di in dcorrect_inputs + dnoise_inputs:
                            assert (di == 0).all()
    
            if loss != 0:
                if LBL:
                    val = sequence + [correct_repr, noise_repr]
                    dval = dsequence + [dcorrect_repr, dnoise_repr]
    #                print val
                    for (i, di) in zip(val, dval):
    #                for (i, di) in zip(tuple(sequence + [correct_repr, noise_repr]), tuple(dsequence + [dcorrect_repr, dnoise_repr])):
                        assert di.shape[0] == 1
                        di.resize(di.size)
    #                    print i, di
                        self.parameters.embeddings[i] -= 1.0 * embedding_learning_rate * di
                        if HYPERPARAMETERS["NORMALIZE_EMBEDDINGS"]:
                            to_normalize.add(i)
    
                    for (i, di) in zip([correct_repr, noise_repr], [dcorrect_scorebias, dnoise_scorebias]):
                        self.parameters.score_biases[i] -= 1.0 * embedding_learning_rate * di
    #                    print "REMOVEME", i, self.parameters.score_biases[i]
                else:
                    if HYPERPARAMETERS["EMBEDDING_LEARNING_RATE"] != 0:
                        for (i, di) in zip(correct_sequence, dcorrect_inputs):
    #                        assert di.shape[0] == 1
    #                        di.resize(di.size)
        #                    print i, di
                            assert di.shape == (self.parameters.embedding_size,)
                            self.parameters.embeddings[i] -= 1.0 * embedding_learning_rate * di
                            if HYPERPARAMETERS["NORMALIZE_EMBEDDINGS"]:
                                to_normalize.add(i)
                        for (i, di) in zip(noise_sequence, dnoise_inputs):
    #                        assert di.shape[0] == 1
    #                        di.resize(di.size)
        #                    print i, di
                            assert di.shape == (self.parameters.embedding_size,)
                            self.parameters.embeddings[i] -= 1.0 * embedding_learning_rate * di
                            if HYPERPARAMETERS["NORMALIZE_EMBEDDINGS"]:
                                to_normalize.add(i)
        #                print to_normalize
    
        if len(to_normalize) > 0:
            to_normalize = [i for i in to_normalize]
#            print "NORMALIZING", to_normalize
            self.parameters.normalize(to_normalize)



    def predict(self, sequence):
        if LBL:
            targetrepr = sequence[-1:]
            sequence = sequence[:-1]
            (predictrepr, score) = graph.predict(self.embed(sequence), self.embed(targetrepr)[0], self.parameters.score_biases[targetrepr], self.parameters)
            return score
        else:
            (score) = graph.predict(self.embed(sequence), self.parameters)
            return score

    def verbose_predict(self, sequence):
        if LBL:
            assert 0
        else:
            (score, prehidden) = graph.verbose_predict(self.embed(sequence))
            return score, prehidden
    
    def debug_prehidden_values(self, sequences):
        """
        Give debug output on pre-squash hidden values.
        """
        import numpy
        for (i, ve) in enumerate(sequences):
            (score, prehidden) = self.verbose_predict(ve)
            abs_prehidden = numpy.abs(prehidden)
            med = numpy.median(abs_prehidden)
            abs_prehidden = abs_prehidden.tolist()
            assert len(abs_prehidden) == 1
            abs_prehidden = abs_prehidden[0]
            abs_prehidden.sort()
            abs_prehidden.reverse()

            logging.info("model %s, %s %s %s %s %s" % (self.modelname, self.train_cnt, "abs(pre-squash hidden) median =", med, "max =", abs_prehidden[:3]))
            if i+1 >= 3: break

    def validate(self, sequence):
        """
        Get the rank of this final word, as opposed to all other words in the vocabulary.
        """
        import random
        r = random.Random()
        r.seed(0)
        from hyperparameters import HYPERPARAMETERS

        import copy
        corrupt_sequence = copy.copy(sequence)
        rank = 1
        correct_score = self.predict(sequence)
#        print "CORRECT", correct_score, [wordmap.str(id) for id in sequence]
        for i in range(self.parameters.vocab_size):
            if r.random() > HYPERPARAMETERS["PERCENT OF NOISE EXAMPLES FOR VALIDATION LOGRANK"]: continue
            if i == sequence[-1]: continue
            corrupt_sequence[-1] = i
            corrupt_score = self.predict(corrupt_sequence)
            if correct_score <= corrupt_score:
#                print " CORRUPT", corrupt_score, [wordmap.str(id) for id in corrupt_sequence]
                rank += 1
        return rank

    def validate_errors(self, correct_sequences, noise_sequences):
        """
        Count the errors in this validation batch.
        """

#            r = graph.train(self.embeds(correct_sequences), self.embeds(noise_sequences), learning_rate * weights[0])
        correct_scores = graph.predict(self.embeds(correct_sequences))
        noise_scores = graph.predict(self.embeds(noise_sequences))

#        print correct_scores
#        print noise_scores
        return correct_scores > noise_scores
Exemplo n.º 9
0
                    model.auxsigma = theano.shared(value = numpy.asarray(stdvect, dtype = theano.config.floatX), name = 'auxsigma')
                    model.auxsigmamin = theano.shared(value = numpy.asarray( 0.001 * numpy.ones((n_aux,)), dtype = theano.config.floatX), name = 'auxsigmamin')

        reconstruction_error = {}
        err = dict([(trainsize, {}) for trainsize in VALIDATION_TRAININGSIZE])

        rebuildunsup(model,depth,ACT,LR[depth],NOISE_LVL[depth],BATCHSIZE,train,RULE)

        state.currentdepth = depth

        epoch = 0
        if epoch in EPOCHSTEST[depth]:
            svm_validation(err, reconstruction_error, epoch, model, depth,ACT,LR[depth],NOISE_LVL[depth],BATCHSIZE,train,datatrain,datatrainsave,datatest,datatestsave, VALIDATION_TRAININGSIZE, VALIDATION_RUNS_FOR_EACH_TRAININGSIZE, PATH_SAVE, PATH_DATA, NAME_DATATEST,RULE)
            channel.save()

        train_reconstruction_error_mvgavg = MovingAverage()
        for epoch in xrange(1,NEPOCHS[depth]+1):
            time1 = time.time()
            state.currentepoch = epoch
            for filenb in xrange(1,NB_FILES + 1):
                print >> sys.stderr, "\t\tAbout to read file %s..." % percent(filenb, NB_FILES)
                print >> sys.stderr, "\t\t", stats()
#                initial_file_time = time.time()
                f =myopen(PATH_DATA + NAME_DATA +'_%s.pkl.gz'%filenb,'r')
                object = numpy.asarray(cPickle.load(f),dtype=theano.config.floatX)
                print >> sys.stderr, "\t\t...read file %s" % percent(filenb, NB_FILES)
                print >> sys.stderr, "\t\t", stats()
                # The last training file is not of the same shape as the other training files.
                # So, to avoid a GPU memory error, we want to make sure it is the same size.
                # In which case, we pad the matrix but keep track of how many n (instances) there actually are.
                # TODO: Also want to pad trainl
Exemplo n.º 10
0
    normalshape = train.value.shape
    
    model=dA(numpy.random,RandomStreams(),input = None, n_visible = NINPUTS, n_hidden = N_HID, act = ACT, noise = NOISE)

    #RELOAD previous model
    channel.save()

    err = dict([(trainsize, {}) for trainsize in VALIDATION_TRAININGSIZE])
    rebuildunsup(model,LR,NOISE_LVL,ACTIVATION_REGULARIZATION_COEFF, WEIGHT_REGULARIZATION_COEFF, BATCHSIZE,train)

    epoch = 0
    if epoch in EPOCHSTEST:
        svm_validation(err, epoch, model,train,datatrain,datatrainsave,datatest,datatestsave, VALIDATION_TRAININGSIZE, VALIDATION_RUNS_FOR_EACH_TRAININGSIZE, PATH_SAVE, PATH_DATA, NAME_DATATEST)
        channel.save()

    train_reconstruction_error_mvgavg = MovingAverage()
    for epoch in xrange(1,NEPOCHS+1):
        time1 = time.time()
        state.currentepoch = epoch
        for filenb in xrange(1,NB_FILES + 1):
            print >> sys.stderr, "\t\tAbout to read file %s..." % percent(filenb, NB_FILES)
            print >> sys.stderr, "\t\t", stats()
#                initial_file_time = time.time()
            f =myopen(PATH_DATA + NAME_DATA +'_%s.pkl.gz'%filenb,'r')
            object = numpy.asarray(cPickle.load(f),dtype=theano.config.floatX)
            print >> sys.stderr, "\t\t...read file %s" % percent(filenb, NB_FILES)
            print >> sys.stderr, "\t\t", stats()
            # The last training file is not of the same shape as the other training files.
            # So, to avoid a GPU memory error, we want to make sure it is the same size.
            # In which case, we pad the matrix but keep track of how many n (instances) there actually are.
            # TODO: Also want to pad trainl
Exemplo n.º 11
0
class Model:
    """
    A Model can:

    @type parameters: L{Parameters}
    @todo: Document
    """

    def __init__(self):
        self.parameters = Parameters()
        graph.hidden_weights = self.parameters.hidden_weights
        graph.hidden_biases = self.parameters.hidden_biases
        graph.output_weights = self.parameters.output_weights
        graph.output_biases = self.parameters.output_biases

#        (self.graph_train, self.graph_predict, self.graph_verbose_predict) = graph.functions(self.parameters)
        import sets
        self.train_loss = MovingAverage()
        self.train_err = MovingAverage()
        self.train_lossnonzero = MovingAverage()
        self.train_squashloss = MovingAverage()
        self.train_unpenalized_loss = MovingAverage()
        self.train_l1penalty = MovingAverage()
        self.train_unpenalized_lossnonzero = MovingAverage()
        self.train_correct_score = MovingAverage()
        self.train_noise_score = MovingAverage()
        self.train_cnt = 0

    def __getstate__(self):
        return (self.parameters, self.train_loss, self.train_err, self.train_lossnonzero, self.train_squashloss, self.train_unpenalized_loss, self.train_l1penalty, self.train_unpenalized_lossnonzero, self.train_correct_score, self.train_noise_score, self.train_cnt)

    def __setstate__(self, state):
        (self.parameters, self.train_loss, self.train_err, self.train_lossnonzero, self.train_squashloss, self.train_unpenalized_loss, self.train_l1penalty, self.train_unpenalized_lossnonzero, self.train_correct_score, self.train_noise_score, self.train_cnt) = state

        graph.hidden_weights = self.parameters.hidden_weights
        graph.hidden_biases = self.parameters.hidden_biases
        graph.output_weights = self.parameters.output_weights
        graph.output_biases = self.parameters.output_biases


    def embed(self, sequence):
        """
        Embed a sequence of vocabulary IDs
        """
        seq = [self.parameters.embeddings[s] for s in sequence]
        import numpy
        return [numpy.resize(s, (1, s.size)) for s in seq]

    def embeds(self, sequences):
        """
        Embed sequences of vocabulary IDs.
        If we are given a list of MINIBATCH lists of SEQLEN items, return a list of SEQLEN matrices of shape (MINIBATCH, EMBSIZE)
        """
        embs = []
        for sequence in sequences:
            embs.append(self.embed(sequence))

        for emb in embs: assert len(emb) == len(embs[0])

        new_embs = []
        for i in range(len(embs[0])):
            colembs = [embs[j][i] for j in range(len(embs))]
            import numpy
            new_embs.append(numpy.vstack(colembs))
            assert new_embs[-1].shape == (len(sequences), self.parameters.embedding_size)
        assert len(new_embs) == len(sequences[0])
        return new_embs

    def corrupt_example(self, e):
        """
        Return a corrupted version of example e, plus the weight of this example.
        """
        from hyperparameters import HYPERPARAMETERS
        import random
        import copy
        e = copy.copy(e)
        last = e[-1]
        cnt = 0
        while e[-1] == last:           
            e[-1] = random.randint(0, self.parameters.vocab_size-1)
            pr = 1./self.parameters.vocab_size
            cnt += 1
            # Backoff to 0gram smoothing if we fail 10 times to get noise.
            if cnt > 10: e[-1] = random.randint(0, self.parameters.vocab_size-1)
        weight = 1./pr
        return e, weight

    def corrupt_examples(self, correct_sequences):
        noise_sequences = []
        weights = []
        for e in correct_sequences:
            noise_sequence, weight = self.corrupt_example(e)
            noise_sequences.append(noise_sequence)
            weights.append(weight)
        return noise_sequences, weights

    def train(self, correct_sequences):
        from hyperparameters import HYPERPARAMETERS
        learning_rate = HYPERPARAMETERS["LEARNING_RATE"]
       
        noise_sequences, weights = self.corrupt_examples(correct_sequences)
        # All weights must be the same, if we first multiply by the learning rate
        for w in weights: assert w == weights[0]

        r = graph.train(self.embeds(correct_sequences), self.embeds(noise_sequences), learning_rate * weights[0])
        (dcorrect_inputss, dnoise_inputss, losss, unpenalized_losss, l1penaltys, correct_scores, noise_scores) = r
#            print [d.shape for d in dcorrect_inputss]
#            print [d.shape for d in dnoise_inputss]
#            print "losss", losss.shape, losss
#            print "unpenalized_losss", unpenalized_losss.shape, unpenalized_losss
#            print "l1penaltys", l1penaltys.shape, l1penaltys
#            print "correct_scores", correct_scores.shape, correct_scores
#            print "noise_scores", noise_scores.shape, noise_scores

        import sets
        to_normalize = sets.Set()
        for ecnt in range(len(correct_sequences)):
            (loss, unpenalized_loss, correct_score, noise_score) = \
                (losss[ecnt], unpenalized_losss[ecnt], correct_scores[ecnt], noise_scores[ecnt])
            if l1penaltys.shape == ():
                assert l1penaltys == 0
                l1penalty = 0
            else:
                l1penalty = l1penaltys[ecnt]
            correct_sequence = correct_sequences[ecnt]
            noise_sequence = noise_sequences[ecnt]

            dcorrect_inputs = [d[ecnt] for d in dcorrect_inputss]
            dnoise_inputs = [d[ecnt] for d in dnoise_inputss]

#            print [d.shape for d in dcorrect_inputs]
#            print [d.shape for d in dnoise_inputs]
#            print "loss", loss.shape, loss
#            print "unpenalized_loss", unpenalized_loss.shape, unpenalized_loss
#            print "l1penalty", l1penalty.shape, l1penalty
#            print "correct_score", correct_score.shape, correct_score
#            print "noise_score", noise_score.shape, noise_score


            self.train_loss.add(loss)
            self.train_err.add(correct_score <= noise_score)
            self.train_lossnonzero.add(loss > 0)
            squashloss = 1./(1.+math.exp(-loss))
            self.train_squashloss.add(squashloss)

            self.train_unpenalized_loss.add(unpenalized_loss)
            self.train_l1penalty.add(l1penalty)
            self.train_unpenalized_lossnonzero.add(unpenalized_loss > 0)
            self.train_correct_score.add(correct_score)
            self.train_noise_score.add(noise_score)
    
            self.train_cnt += 1
            if self.train_cnt % 10000 == 0:
    #        if self.train_cnt % 1000 == 0:
    #            print self.train_cnt
#                graph.COMPILE_MODE.print_summary()
                logging.info(("After %d updates, pre-update train loss %s" % (self.train_cnt, self.train_loss.verbose_string())))
                logging.info(("After %d updates, pre-update train error %s" % (self.train_cnt, self.train_err.verbose_string())))
                logging.info(("After %d updates, pre-update train Pr(loss != 0) %s" % (self.train_cnt, self.train_lossnonzero.verbose_string())))
                logging.info(("After %d updates, pre-update train squash(loss) %s" % (self.train_cnt, self.train_squashloss.verbose_string())))

                logging.info(("After %d updates, pre-update train unpenalized loss %s" % (self.train_cnt, self.train_unpenalized_loss.verbose_string())))
                logging.info(("After %d updates, pre-update train l1penalty %s" % (self.train_cnt, self.train_l1penalty.verbose_string())))
                logging.info(("After %d updates, pre-update train Pr(unpenalized loss != 0) %s" % (self.train_cnt, self.train_unpenalized_lossnonzero.verbose_string())))
                logging.info(("After %d updates, pre-update train correct score %s" % (self.train_cnt, self.train_correct_score.verbose_string())))
                logging.info(("After %d updates, pre-update train noise score %s" % (self.train_cnt, self.train_noise_score.verbose_string())))
    

    
            for w in weights: assert w == weights[0]
            embedding_learning_rate = HYPERPARAMETERS["EMBEDDING_LEARNING_RATE"] * weights[0]
            if loss == 0:
                for di in dcorrect_inputs + dnoise_inputs:
                    assert (di == 0).all()
    
            if loss != 0:
                for (i, di) in zip(correct_sequence, dcorrect_inputs):
#                        assert di.shape[0] == 1
#                        di.resize(di.size)
#                    print i, di
                    assert di.shape == (self.parameters.embedding_size,)
                    self.parameters.embeddings[i] -= 1.0 * embedding_learning_rate * di
                    if HYPERPARAMETERS["NORMALIZE_EMBEDDINGS"]:
                        to_normalize.add(i)
                for (i, di) in zip(noise_sequence, dnoise_inputs):
#                        assert di.shape[0] == 1
#                        di.resize(di.size)
#                    print i, di
                    assert di.shape == (self.parameters.embedding_size,)
                    self.parameters.embeddings[i] -= 1.0 * embedding_learning_rate * di
                    if HYPERPARAMETERS["NORMALIZE_EMBEDDINGS"]:
                        to_normalize.add(i)
#                print to_normalize
    
        if len(to_normalize) > 0:
            to_normalize = [i for i in to_normalize]
#            print "NORMALIZING", to_normalize
            self.parameters.normalize(to_normalize)



    def predict(self, sequence):
        (score) = graph.predict(self.embed(sequence), self.parameters)
        return score

    def verbose_predict(self, sequence):
        (score, prehidden) = graph.verbose_predict(self.embed(sequence), self.parameters)
        return score, prehidden

    def validate(self, sequence):
        """
        Get the rank of this final word, as opposed to all other words in the vocabulary.
        """
        import random
        r = random.Random()
        r.seed(0)
        from hyperparameters import HYPERPARAMETERS

        import copy
        corrupt_sequence = copy.copy(sequence)
        rank = 1
        correct_score = self.predict(sequence)
#        print "CORRECT", correct_score, [wordmap.str(id) for id in sequence]
        for i in range(self.parameters.vocab_size):
            if r.random() > HYPERPARAMETERS["PERCENT OF NOISE EXAMPLES FOR VALIDATION LOGRANK"]: continue
            if i == sequence[-1]: continue
            corrupt_sequence[-1] = i
            corrupt_score = self.predict(corrupt_sequence)
            if correct_score <= corrupt_score:
#                print " CORRUPT", corrupt_score, [wordmap.str(id) for id in corrupt_sequence]
                rank += 1
        return rank
import sys


# Restrict to a particular path.
class RequestHandler(SimpleXMLRPCRequestHandler):
    rpc_paths = ('/RPC2', )


# Create server
server = SimpleXMLRPCServer(("0.0.0.0", jv_port + 1),
                            requestHandler=RequestHandler)
server.register_introspection_functions()

from common.movingaverage import MovingAverage
broke = MovingAverage()


def extractKeyphrases(txt):
    if broke.cnt % 100 == 0:
        print >> sys.stderr, "%s documents could NOT have keyphrase extracted" % broke
    try:
        kw = s.kea.extractKeyphrases(txt)
        broke.add(0)
        return kw
    except:
        print >> sys.stderr, "Oops! Couldn't extract keyphrases over:", repr(
            txt)
        broke.add(1)
        return []