Exemplo n.º 1
0
        logging.info("INITIALIZING TRAINING STATE")

    logging.info(myyaml.dump(common.dump.vars_seq([hyperparameters, miscglobals])))

    #validate(0)
    diagnostics.diagnostics(cnt, m)
#    diagnostics.visualizedebug(cnt, m, rundir)
    while 1:
        logging.info("STARTING EPOCH #%d" % epoch)
        for ebatch in get_train_minibatch:
            cnt += len(ebatch)
        #    print [wordmap.str(id) for id in e]
            m.train(ebatch)

            #validate(cnt)
            if cnt % (int(1000./HYPERPARAMETERS["MINIBATCH SIZE"])*HYPERPARAMETERS["MINIBATCH SIZE"]) == 0:
                logging.info("Finished training step %d (epoch %d)" % (cnt, epoch))
#                print ("Finished training step %d (epoch %d)" % (cnt, epoch))
            if cnt % (int(100000./HYPERPARAMETERS["MINIBATCH SIZE"])*HYPERPARAMETERS["MINIBATCH SIZE"]) == 0:
                diagnostics.diagnostics(cnt, m)
                if os.path.exists(os.path.join(rundir, "BAD")):
                    logging.info("Detected file: %s\nSTOPPING" % os.path.join(rundir, "BAD"))
                    sys.stderr.write("Detected file: %s\nSTOPPING\n" % os.path.join(rundir, "BAD"))
                    sys.exit(0)
            if cnt % (int(HYPERPARAMETERS["VALIDATE_EVERY"]*1./HYPERPARAMETERS["MINIBATCH SIZE"])*HYPERPARAMETERS["MINIBATCH SIZE"]) == 0:
                state.save(m, cnt, epoch, get_train_minibatch, rundir, newkeystr)
                diagnostics.visualizedebug(cnt, m, rundir, newkeystr)
#                validate(cnt)
        get_train_minibatch = examples.TrainingMinibatchStream()
        epoch += 1
    for tokens in trainingsentences():
        for i in range(len(tokens)):
            for j, context in enumerate(HYPERPARAMETERS["CONTEXT_TYPES"]):
                for k in context:
                    tokidx = i + k
                    if tokidx < 0 or tokidx >= len(tokens): continue
                    random_representations[tokens[i]] += context_vectors[j][tokens[tokidx]]
        cnt += 1
        if cnt % 10000 == 0:
            diagnostics.diagnostics(cnt, random_representations)

    logging.info("DONE. Dividing embeddings by their standard deviation...")
    random_representations = random_representations * (1. / numpy.std(random_representations))
    diagnostics.diagnostics(cnt, random_representations)
    diagnostics.visualizedebug(cnt, random_representations, rundir, newkeystr)

    outfile = os.path.join(rundir, "random_representations")
    if newkeystr != "":
        verboseoutfile = os.path.join(rundir, "random_representations%s" % newkeystr)
        logging.info("Writing representations to %s, and creating link %s" % (outfile, verboseoutfile))
        os.system("ln -s random_representations %s " % (verboseoutfile))
    else:
        logging.info("Writing representations to %s, not creating any link because of default settings" % outfile)

    o = open(outfile, "wt")
    from vocabulary import wordmap
    for i in range(wordmap.len):
        o.write(wordmap.str(i) + " ")
        for v in random_representations[i]:
            o.write(`v` + " ")
Exemplo n.º 3
0
    logging.info(myyaml.dump(common.dump.vars_seq([hyperparameters, miscglobals])))

    #validate(0)
    diagnostics.diagnostics(cnt, m)
#    diagnostics.visualizedebug(cnt, m, rundir)
    while 1:
        logging.info("STARTING EPOCH #%d" % epoch)
        for ebatch in get_train_minibatch:
            cnt += len(ebatch)
        #    print [wordmap.str(id) for id in e]

            noise_sequences, weights = corrupt.corrupt_examples(m, ebatch)
            m.train(ebatch, noise_sequences, weights)

            #validate(cnt)
            if cnt % (int(1000./HYPERPARAMETERS["MINIBATCH SIZE"])*HYPERPARAMETERS["MINIBATCH SIZE"]) == 0:
                logging.info("Finished training step %d (epoch %d)" % (cnt, epoch))
#                print ("Finished training step %d (epoch %d)" % (cnt, epoch))
            if cnt % (int(100000./HYPERPARAMETERS["MINIBATCH SIZE"])*HYPERPARAMETERS["MINIBATCH SIZE"]) == 0:
                diagnostics.diagnostics(cnt, m)
                if os.path.exists(os.path.join(rundir, "BAD")):
                    logging.info("Detected file: %s\nSTOPPING" % os.path.join(rundir, "BAD"))
                    sys.stderr.write("Detected file: %s\nSTOPPING\n" % os.path.join(rundir, "BAD"))
                    sys.exit(0)
            if cnt % (int(HYPERPARAMETERS["VALIDATE_EVERY"]*1./HYPERPARAMETERS["MINIBATCH SIZE"])*HYPERPARAMETERS["MINIBATCH SIZE"]) == 0:
                state.save(m, cnt, epoch, get_train_minibatch, rundir, newkeystr)
                diagnostics.visualizedebug(cnt, m, rundir, newkeystr)
#                validate(cnt)
        get_train_minibatch = examples.TrainingMinibatchStream()
        epoch += 1
Exemplo n.º 4
0
        for i in range(len(tokens)):
            for j, context in enumerate(HYPERPARAMETERS["CONTEXT_TYPES"]):
                for k in context:
                    tokidx = i + k
                    if tokidx < 0 or tokidx >= len(tokens): continue
                    random_representations[tokens[i]] += context_vectors[j][
                        tokens[tokidx]]
        cnt += 1
        if cnt % 10000 == 0:
            diagnostics.diagnostics(cnt, random_representations)

    logging.info("DONE. Dividing embeddings by their standard deviation...")
    random_representations = random_representations * (
        1. / numpy.std(random_representations))
    diagnostics.diagnostics(cnt, random_representations)
    diagnostics.visualizedebug(cnt, random_representations, rundir, newkeystr)

    outfile = os.path.join(rundir, "random_representations")
    if newkeystr != "":
        verboseoutfile = os.path.join(rundir,
                                      "random_representations%s" % newkeystr)
        logging.info("Writing representations to %s, and creating link %s" %
                     (outfile, verboseoutfile))
        os.system("ln -s random_representations %s " % (verboseoutfile))
    else:
        logging.info(
            "Writing representations to %s, not creating any link because of default settings"
            % outfile)

    o = open(outfile, "wt")
    from vocabulary import wordmap