Пример #1
0
    def __init__(self):
        print 'call me'
        self.parameters = Parameters()
        if LBL:
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases
            graph.score_biases = self.parameters.score_biases
        else:
            graph.hidden_weights = self.parameters.hidden_weights
            graph.hidden_biases = self.parameters.hidden_biases
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases

#        (self.graph_train, self.graph_predict, self.graph_verbose_predict) = graph.functions(self.parameters)
        import sets
        self.train_loss = MovingAverage()
        self.train_err = MovingAverage()
        self.train_lossnonzero = MovingAverage()
        self.train_squashloss = MovingAverage()
        self.train_unpenalized_loss = MovingAverage()
        self.train_l1penalty = MovingAverage()
        self.train_unpenalized_lossnonzero = MovingAverage()
        self.train_correct_score = MovingAverage()
        self.train_noise_score = MovingAverage()
        self.train_cnt = 0
Пример #2
0
    def __init__(self,
                 modelname="",
                 window_size=HYPERPARAMETERS["WINDOW_SIZE"],
                 vocab_size=vocabulary.wordmap().len,
                 embedding_size=HYPERPARAMETERS["EMBEDDING_SIZE"],
                 hidden_size=HYPERPARAMETERS["HIDDEN_SIZE"],
                 seed=miscglobals.RANDOMSEED,
                 initial_embeddings=None,
                 two_hidden_layers=HYPERPARAMETERS["TWO_HIDDEN_LAYERS"]):
        self.modelname = modelname
        self.parameters = Parameters(window_size, vocab_size, embedding_size,
                                     hidden_size, seed, initial_embeddings,
                                     two_hidden_layers)
        if LBL:
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases
            graph.score_biases = self.parameters.score_biases
        else:
            graph.hidden_weights = self.parameters.hidden_weights
            graph.hidden_biases = self.parameters.hidden_biases
            if self.parameters.two_hidden_layers:
                graph.hidden2_weights = self.parameters.hidden2_weights
                graph.hidden2_biases = self.parameters.hidden2_biases
            graph.output_weights = self.parameters.output_weights
            graph.output_biases = self.parameters.output_biases

#        (self.graph_train, self.graph_predict, self.graph_verbose_predict) = graph.functions(self.parameters)
        import sets
        self.train_loss = MovingAverage()
        self.train_err = MovingAverage()
        self.train_lossnonzero = MovingAverage()
        self.train_squashloss = MovingAverage()
        self.train_unpenalized_loss = MovingAverage()
        self.train_l1penalty = MovingAverage()
        self.train_unpenalized_lossnonzero = MovingAverage()
        self.train_correct_score = MovingAverage()
        self.train_noise_score = MovingAverage()
        self.train_cnt = 0
    #RELOAD previous model
    channel.save()

    err = dict([(trainsize, {}) for trainsize in VALIDATION_TRAININGSIZE])
    rebuildunsup(model, LR, NOISE_LVL, ACTIVATION_REGULARIZATION_COEFF,
                 WEIGHT_REGULARIZATION_COEFF, BATCHSIZE, train)

    epoch = 0
    if epoch in EPOCHSTEST:
        svm_validation(err, epoch, model, train, datatrain, datatrainsave,
                       datatest, datatestsave, VALIDATION_TRAININGSIZE,
                       VALIDATION_RUNS_FOR_EACH_TRAININGSIZE, PATH_SAVE,
                       PATH_DATA, NAME_DATATEST)
        channel.save()

    train_reconstruction_error_mvgavg = MovingAverage()
    for epoch in xrange(1, NEPOCHS + 1):
        time1 = time.time()
        state.currentepoch = epoch
        for filenb in xrange(1, NB_FILES + 1):
            print >> sys.stderr, "\t\tAbout to read file %s..." % percent(
                filenb, NB_FILES)
            print >> sys.stderr, "\t\t", stats()
            #                initial_file_time = time.time()
            f = open(PATH_DATA + NAME_DATA + '_%s.pkl' % filenb, 'r')
            object = numpy.asarray(cPickle.load(f), dtype=theano.config.floatX)
            print >> sys.stderr, "\t\t...read file %s" % percent(
                filenb, NB_FILES)
            print >> sys.stderr, "\t\t", stats()
            # The last training file is not of the same shape as the other training files.
            # So, to avoid a GPU memory error, we want to make sure it is the same size.
import sys


# Restrict to a particular path.
class RequestHandler(SimpleXMLRPCRequestHandler):
    rpc_paths = ('/RPC2', )


# Create server
server = SimpleXMLRPCServer(("0.0.0.0", jv_port + 1),
                            requestHandler=RequestHandler)
server.register_introspection_functions()

from common.movingaverage import MovingAverage
broke = MovingAverage()


def extractKeyphrases(txt):
    if broke.cnt % 100 == 0:
        print >> sys.stderr, "%s documents could NOT have keyphrase extracted" % broke
    try:
        kw = s.kea.extractKeyphrases(txt)
        broke.add(0)
        return kw
    except:
        print >> sys.stderr, "Oops! Couldn't extract keyphrases over:", repr(
            txt)
        broke.add(1)
        return []