Пример #1
0
def save(translation_model, cnt, lastcnt, epoch, rundir, newkeystr):
    global _lastfilename

    filename = os.path.join(rundir,
                            "translation_model-%d%s.pkl" % (cnt, newkeystr))
    logging.info("Writing translation_model to %s..." % filename)
    logging.info(stats())
    cPickle.dump(translation_model, myopen(filename, "wb"), protocol=-1)
    logging.info("...done writing translation_model to %s" % filename)
    logging.info(stats())

    #    if _lastfilename is not None:
    #        logging.info("Removing old translation_model %s..." % _lastfilename)
    #        try:
    #            os.remove(_lastfilename)
    #            logging.info("...removed %s" % _lastfilename)
    #        except:
    #            logging.info("Could NOT remove %s" % _lastfilename)
    _lastfilename = filename

    common.json.dumpfile((cnt, lastcnt, epoch, filename),
                         os.path.join(rundir, "trainstate.json"))

    filename = os.path.join(rundir, "newkeystr.txt")
    myopen(filename, "wt").write(newkeystr)
Пример #2
0
def createlibsvmfile(model,depth,datafiles,dataout):
    print >> sys.stderr, 'Creating libsvm file %s (model=%s, depth=%d, datafiles=%s)...' % (repr(dataout), repr(model),depth,datafiles)
    print >> sys.stderr, stats()
    outputs = [model.layers[depth].out]
    func = theano.function([model.inp],outputs)
    f = myopen(datafiles[0],'r')
    instances = numpy.asarray(cPickle.load(f),dtype=theano.config.floatX)
    f.close()
    f = myopen(datafiles[1],'r')
    labels = numpy.asarray(cPickle.load(f),dtype = 'int64')
    f.close()
    f = open(dataout,'w')
    for i in range(globalstate.NB_MAX_TRAINING_EXAMPLES_SVM/globalstate.BATCH_CREATION_LIBSVM):
        textr = ''
        rep = func(instances[globalstate.BATCH_CREATION_LIBSVM*i:globalstate.BATCH_CREATION_LIBSVM*(i+1),:])[0]
        for l in range(rep.shape[0]):
            textr += '%s '%labels[globalstate.BATCH_CREATION_LIBSVM*i+l]
            idx = rep[l,:].nonzero()[0]
            for j,v in zip(idx,rep[l,idx]):
                textr += '%s:%s '%(j,v)
            textr += '\n'
        f.write(textr)
    del instances,labels
    f.close()
    print >> sys.stderr, "...done creating libsvm files"
    print >> sys.stderr, stats()
Пример #3
0
def createlibsvmfile(model, depth, datafiles, dataout):
    print >> sys.stderr, 'Creating libsvm file %s (model=%s, depth=%d, datafiles=%s)...' % (
        repr(dataout), repr(model), depth, datafiles)
    print >> sys.stderr, stats()
    outputs = [model.layers[depth].out]
    func = theano.function([model.inp], outputs)
    f = myopen(datafiles[0], 'r')
    instances = numpy.asarray(cPickle.load(f), dtype=theano.config.floatX)
    f.close()
    f = myopen(datafiles[1], 'r')
    labels = numpy.asarray(cPickle.load(f), dtype='int64')
    f.close()
    f = open(dataout, 'w')
    for i in range(globalstate.NB_MAX_TRAINING_EXAMPLES_SVM /
                   globalstate.BATCH_CREATION_LIBSVM):
        textr = ''
        rep = func(
            instances[globalstate.BATCH_CREATION_LIBSVM *
                      i:globalstate.BATCH_CREATION_LIBSVM * (i + 1), :])[0]
        for l in range(rep.shape[0]):
            textr += '%s ' % labels[globalstate.BATCH_CREATION_LIBSVM * i + l]
            idx = rep[l, :].nonzero()[0]
            for j, v in zip(idx, rep[l, idx]):
                textr += '%s:%s ' % (j, v)
            textr += '\n'
        f.write(textr)
    del instances, labels
    f.close()
    print >> sys.stderr, "...done creating libsvm files"
    print >> sys.stderr, stats()
def all_training_examples_cached():
    global _all_examples
    if _all_examples is None:
        try:
            _all_examples, cnt = cPickle.load(myopen(training_examples_cache_filename()))
            assert len(_all_examples) == cnt
            logging.info("Successfully read %d training examples from %s" % (cnt, training_examples_cache_filename()))
            logging.info(stats())
        except:
            logging.info("(Couldn't read training examples from %s, sorry)" % (training_examples_cache_filename()))
            logging.info("Caching all training examples...")
            logging.info(stats())
            _all_examples = []
            for l1, l2, f1, f2, falign in bicorpora_filenames():
                for e in get_training_biexample(l1, l2, f1, f2, falign):
                    _all_examples.append(e)
                    if len(_all_examples) % 10000 == 0:
                        logging.info("\tcurrently have read %d training examples" % len(_all_examples))
                        logging.info(stats())
            random.shuffle(_all_examples)
            logging.info("...done caching all %d training examples" % len(_all_examples))
            logging.info(stats())

            cnt = len(_all_examples)
            cPickle.dump((_all_examples, cnt), myopen(training_examples_cache_filename(), "wb"), protocol=-1)
            assert len(_all_examples) == cnt
            logging.info("Wrote %d training examples to %s" % (cnt, training_examples_cache_filename()))
            logging.info(stats())
    assert _all_examples is not None
    return _all_examples
Пример #5
0
def createlibsvmfile(model,datafiles,dataout):
    print >> sys.stderr, 'Creating libsvm file %s (model=%s, datafiles=%s)...' % (repr(dataout), repr(model),datafiles)
    print >> sys.stderr, stats()

    x = T.dmatrix()
    params = [T.dmatrix(), T.dmatrix(), T.dvector(), T.dvector()]
    model.x = x
    model.W, model.W_prime, model.b, model.b_prime = params
    model.params = [model.W, model.W_prime, model.b, model.b_prime]

    outputs = [model.get_hidden_values(model.x)]
    func = theano.function([model.x] + params,outputs)

#    print >> sys.stderr, 'REMOVEME: about to read'
#    print >> sys.stderr, stats()

    f = myopen(datafiles[0],'r')
    instances = numpy.asarray(cPickle.load(f),dtype=theano.config.floatX)
    f.close()
    f = myopen(datafiles[1],'r')
    labels = numpy.asarray(cPickle.load(f),dtype = 'int64')
    f.close()
    f = myopen(dataout,'w')

#    print >> sys.stderr, 'REMOVEME: about to iterate'
#    print >> sys.stderr, stats()

#    params = [model.Wvalue, model.W_primevalue, model.bvalue, model.b_primevalue]
    for i in range(globalstate.NB_MAX_TRAINING_EXAMPLES_SVM/globalstate.BATCH_CREATION_LIBSVM):
#        print >> sys.stderr, 'REMOVEME: about to do %d' % i
#        print >> sys.stderr, stats()
        textr = ''

        assert globalstate.BATCH_CREATION_LIBSVM == 1       # Don't want to select indices from more than one example
        x = instances[globalstate.BATCH_CREATION_LIBSVM*i:globalstate.BATCH_CREATION_LIBSVM*(i+1),:]
        nonzeros = frozenset(x.nonzero()[1])
#        print >> sys.stderr, nonzeros
#        print >> sys.stderr, len(nonzeros)

        indices = list(nonzeros)
#        # TODO: Don't duplicate this code, which also appears about one hundred lines down.
#        x = x[:,indices]
#        params = [model.Wvalue[indices], model.W_primevalue[:,indices], model.bvalue, model.b_primevalue[indices]]
#        rep = func(x, *params)[0]

        rep = func(x[:,indices], model.Wvalue[indices], model.W_primevalue[:,indices], model.bvalue, model.b_primevalue[indices])[0]

        for l in range(rep.shape[0]):
            textr += '%s '%labels[globalstate.BATCH_CREATION_LIBSVM*i+l]
            idx = rep[l,:].nonzero()[0]
            for j,v in zip(idx,rep[l,idx]):
                textr += '%s:%s '%(j,v)
            textr += '\n'
        f.write(textr)
    del instances,labels
    f.close()
    print >> sys.stderr, "...done creating libsvm files"
    print >> sys.stderr, stats()
Пример #6
0
def save(params, rundir, best_validation_accuracy, best_validation_at):
    import common.hyperparameters
    HYPERPARAMETERS = common.hyperparameters.read("attardi07_english_ptb")
    HLAYERS = HYPERPARAMETERS["hidden layers"]
    if HLAYERS == 2:
        (w1, b1, wh, bh, w2, b2) = params
        cPickle.dump((w1, b1, wh, bh, w2, b2), myopen(_filename(rundir), "w"), protocol=-1)
    else:
        (w1, b1, w2, b2) = params
        cPickle.dump((w1, b1, w2, b2), myopen(_filename(rundir), "w"), protocol=-1)
    myopen(join(rundir, "best-model-validation.txt"), "w").write("Accuracy %.2f%% after %d updates" % (best_validation_accuracy*100, best_validation_at))
Пример #7
0
def state_save():
    if HLAYERS == 2:
        cPickle.dump((w1, b1, wh, bh, w2, b2),
                     myopen(join(rundir, "best-model.pkl"), "w"),
                     protocol=-1)
    else:
        cPickle.dump((w1, b1, w2, b2),
                     myopen(join(rundir, "best-model.pkl"), "w"),
                     protocol=-1)
    myopen(join(rundir, "best-model-validation.txt"),
           "w").write("Accuracy %.2f%% after %d updates" %
                      (best_validation_accuracy * 100, best_validation_at))
Пример #8
0
def indexed_weights():

    global _indexed_weights
    if _indexed_weights is not None:
        return _indexed_weights
    print >> sys.stderr, len(
        wordmap.map), "=?=", HYPERPARAMETERS["VOCABULARY_SIZE"]
    assert len(wordmap.map) == HYPERPARAMETERS["VOCABULARY_SIZE"]
    if HYPERPARAMETERS["NGRAM_FOR_TRAINING_NOISE"] == 0:
        _indexed_weights = [1 for id in range(wordmap.len)]
    elif HYPERPARAMETERS["NGRAM_FOR_TRAINING_NOISE"] == 1:
        from common.json import load
        from common.file import myopen
        ngrams_file = HYPERPARAMETERS["NGRAMS"][(
            HYPERPARAMETERS["NGRAM_FOR_TRAINING_NOISE"],
            HYPERPARAMETERS["VOCABULARY_SIZE"])]
        print >> sys.stderr, "Reading ngrams from", ngrams_file, "..."
        from collections import defaultdict
        ngramcnt = defaultdict(int)
        for (ngram, cnt) in load(myopen(ngrams_file)):
            assert len(ngram) == 1
            ngramcnt[ngram[0]] = cnt + HYPERPARAMETERS[
                "TRAINING_NOISE_SMOOTHING_ADDITION"]
        _indexed_weights = [
            ngramcnt[wordmap.str(id)] for id in range(len(wordmap.map))
        ]
        _indexed_weights = build(_indexed_weights)
    else:
        assert 0
    return _indexed_weights
Пример #9
0
def load(rundir):
    print >> sys.stderr, "Loading state from %s..." % _filename(rundir)
    print >> sys.stderr, stats()
    m = cPickle.load(myopen(_filename(rundir), "r"))
    print >> sys.stderr, "...done loading state from %s" % _filename(rundir)
    print >> sys.stderr, stats()
    return m
Пример #10
0
def load(rundir):
    print >> sys.stderr, "Loading state from %s..." % _filename(rundir)
    print >> sys.stderr, stats()
    m = cPickle.load(myopen(_filename(rundir), "r"))
    print >> sys.stderr, "...done loading state from %s" % _filename(rundir)
    print >> sys.stderr, stats()
    return m
Пример #11
0
def svm_validation(err, epoch, model, train,datatrain,datatrainsave,datatest,datatestsave, VALIDATION_TRAININGSIZE, VALIDATION_RUNS_FOR_EACH_TRAININGSIZE, PATH_SAVE, PATH_DATA, NAME_DATATEST):
    """
    Perform full SVM validation.
    """
    print >> sys.stderr, "Validating (err=%s,epoch=%s,model=%s,train=%s,datatrain=%s,datatrainsave=%s,datatest=%s,datatestsave=%s,VALIDATION_TRAININGSIZE=%s,VALIDATION_RUNS_FOR_EACH_TRAININGSIZE=%s,PATH_SAVE=%s)..." % (err, epoch, model,train,datatrain,datatrainsave,datatest,datatestsave, VALIDATION_TRAININGSIZE, VALIDATION_RUNS_FOR_EACH_TRAININGSIZE, PATH_SAVE)
    print >> sys.stderr, stats()

    createlibsvmfile(model,datatrain,datatrainsave)
    createlibsvmfile(model,datatest,datatestsave)

    for trainsize in VALIDATION_TRAININGSIZE:
        print trainsize
        print VALIDATION_RUNS_FOR_EACH_TRAININGSIZE
        C,testerr,testerrdev,trainerr,trainerrdev = svm_validation_for_one_trainsize(trainsize,VALIDATION_RUNS_FOR_EACH_TRAININGSIZE[`trainsize`],datatrainsave,datatestsave,PATH_SAVE)
        err[trainsize].update({epoch:(C,testerr,testerrdev,trainerr,trainerrdev)})

    for trainsize in VALIDATION_TRAININGSIZE:
        print >> sys.stderr, 'VALIDATION: epoch %d / trainsize %d / svm error' % ( epoch, trainsize) ,err[trainsize][epoch]
    print >> sys.stderr, stats()

    if epoch != 0:
        f = myopen('err.pkl','w')
        for trainsize in VALIDATION_TRAININGSIZE:
            cPickle.dump(err[trainsize],f,-1)
        f.close()
    print >> sys.stderr, "...done validating (err=%s,epoch=%s,model=%s,train=%s,datatrain=%s,datatrainsave=%s,datatest=%s,datatestsave=%s,VALIDATION_TRAININGSIZE=%s,VALIDATION_RUNS_FOR_EACH_TRAININGSIZE=%s,PATH_SAVE=%s)" % (err, epoch, model,train,datatrain,datatrainsave,datatest,datatestsave, VALIDATION_TRAININGSIZE, VALIDATION_RUNS_FOR_EACH_TRAININGSIZE, PATH_SAVE)
    print >> sys.stderr, stats()
Пример #12
0
def save(params, rundir, best_validation_accuracy, best_validation_at):
    import common.hyperparameters
    HYPERPARAMETERS = common.hyperparameters.read("attardi07_english_ptb")
    HLAYERS = HYPERPARAMETERS["hidden layers"]
    if HLAYERS == 2:
        (w1, b1, wh, bh, w2, b2) = params
        cPickle.dump((w1, b1, wh, bh, w2, b2),
                     myopen(_filename(rundir), "w"),
                     protocol=-1)
    else:
        (w1, b1, w2, b2) = params
        cPickle.dump((w1, b1, w2, b2),
                     myopen(_filename(rundir), "w"),
                     protocol=-1)
    myopen(join(rundir, "best-model-validation.txt"),
           "w").write("Accuracy %.2f%% after %d updates" %
                      (best_validation_accuracy * 100, best_validation_at))
def write(_wordmap_new):
    """
    Write the word ID map, passed as a parameter.
    """
    global _wordmap
    assert _wordmap is None
    _wordmap = _wordmap_new
    print >> sys.stderr, "Writing word map with %d words to %s..." % (_wordmap.len, _wordmap_filename())
    cPickle.dump(_wordmap, myopen(_wordmap_filename(), "w"))
Пример #14
0
def write(_wordmap_new):
    """
    Write the word ID map, passed as a parameter.
    """
    global _wordmap
    assert _wordmap is None
    _wordmap = _wordmap_new
    print >> sys.stderr, "Writing word map with %d words to %s..." % (
        _wordmap.len, _wordmap_filename())
    cPickle.dump(_wordmap, myopen(_wordmap_filename(), "w"))
def write(_targetmap_new, name=""):
    """
    Write the word ID map, passed as a parameter.
    """
    global _targetmap
    assert name not in _targetmap
    _targetmap[name] = _targetmap_new
    f = _targetmap_filename(name=name)
    print >> sys.stderr, "Writing target map to %s..." % f
    cPickle.dump(_targetmap[name], myopen(f, "w"))
Пример #16
0
def get_example(f):
    import common.hyperparameters
    HYPERPARAMETERS = common.hyperparameters.read("language-model")
    for l in myopen(f):
        prevwords = []
        for w in string.split(l):
            w = string.strip(w)
            prevwords.append(w)
            if len(prevwords) >= HYPERPARAMETERS["WINDOW_SIZE"]:
                yield prevwords[-HYPERPARAMETERS["WINDOW_SIZE"]:]
def targetmap(name=""):
    global _targetmap
    if name not in _targetmap:
        f = _targetmap_filename(name=name)
        print >> sys.stderr, "Reading target map from %s..." % f
        print >> sys.stderr, stats()
        _targetmap[name] = cPickle.load(myopen(f))
        print >> sys.stderr, "...done reading target map from %s" % f
        print >> sys.stderr, stats()
    return _targetmap[name]
def write(_targetmap_new, name=""):
    """
    Write the word ID map, passed as a parameter.
    """
    global _targetmap
    assert name not in _targetmap
    _targetmap[name] = _targetmap_new
    f = _targetmap_filename(name=name)
    print >> sys.stderr, "Writing target map to %s..." % f
    cPickle.dump(_targetmap[name], myopen(f, "w"))
def get_example(f):
    import common.hyperparameters
    HYPERPARAMETERS = common.hyperparameters.read("language-model")
    for l in myopen(f):
        prevwords = []
        for w in string.split(l):
            w = string.strip(w)
            prevwords.append(w)
            if len(prevwords) >= HYPERPARAMETERS["WINDOW_SIZE"]:
                yield prevwords[-HYPERPARAMETERS["WINDOW_SIZE"]:]
def targetmap(name=""):
    global _targetmap
    if name not in _targetmap:
        f = _targetmap_filename(name=name)
        print >> sys.stderr, "Reading target map from %s..." % f
        print >> sys.stderr, stats()
        _targetmap[name] = cPickle.load(myopen(f))
        print >> sys.stderr, "...done reading target map from %s" % f
        print >> sys.stderr, stats()
    return _targetmap[name]
Пример #21
0
def svm_validation(err, reconstruction_error, epoch, model, depth, ACT,LR,NOISE_LVL,BATCHSIZE,train,datatrain,datatrainsave,datatest,datatestsave, VALIDATION_TRAININGSIZE, VALIDATION_RUNS_FOR_EACH_TRAININGSIZE, PATH_SAVE, PATH_DATA, NAME_DATATEST,RULE):
    """
    Perform full SVM validation.
    """
    global TRAINFUNC

    print >> sys.stderr, "Validating (err=%s,epoch=%s,model=%s,depth=%s,ACT=%s,LR=%s,NOISE_LVL=%s,BATCHSIZE=%s,train=%s,datatrain=%s,datatrainsave=%s,datatest=%s,datatestsave=%s,VALIDATION_TRAININGSIZE=%s,VALIDATION_RUNS_FOR_EACH_TRAININGSIZE=%s,PATH_SAVE=%s)..." % (err, epoch, model, depth, ACT,LR,NOISE_LVL,BATCHSIZE,train,datatrain,datatrainsave,datatest,datatestsave, VALIDATION_TRAININGSIZE, VALIDATION_RUNS_FOR_EACH_TRAININGSIZE, PATH_SAVE)
    print >> sys.stderr, stats()

    # Call with noiselevel = None before running the SVM.
    # No noise because we want the exact representation for each instance.
    rebuildunsup(model,depth,ACT,LR,None,BATCHSIZE,train,RULE)

    createlibsvmfile(model,depth,datatrain,datatrainsave)
    createlibsvmfile(model,depth,datatest,datatestsave)

    for trainsize in VALIDATION_TRAININGSIZE:
        print trainsize
        print VALIDATION_RUNS_FOR_EACH_TRAININGSIZE
        C,testerr,testerrdev,trainerr,trainerrdev,testerrnew,testerrnewdev,trainerrnew,trainerrnewdev =\
                                            svm_validation_for_one_trainsize(trainsize,VALIDATION_RUNS_FOR_EACH_TRAININGSIZE[`trainsize`],datatrainsave,datatestsave,PATH_SAVE)
        err[trainsize].update({epoch:(C,testerr,testerrdev,trainerr,trainerrdev,testerrnew,testerrnewdev,trainerrnew,trainerrnewdev)})


    if epoch != 0:
        f = myopen(PATH_DATA + NAME_DATATEST +'_1.pkl.gz','r')
        train.container.value[:] = numpy.asarray(cPickle.load(f),dtype=theano.config.floatX)
        f.close()

    # Now, restore TRAINFUNC with the original NOISE_LVL
    rebuildunsup(model,depth,ACT,LR,NOISE_LVL,BATCHSIZE,train,RULE)
    reconstruction_error.update({epoch:TESTFUNC()})

    print >> sys.stderr, 'VALIDATION: depth %d / epoch %d / reconstruction error (is this on test or train?): ' % (depth+1, epoch),reconstruction_error[epoch]
    for trainsize in VALIDATION_TRAININGSIZE:
        print >> sys.stderr, 'VALIDATION: depth %d / epoch %d / trainsize %d / svm error' % (depth+1, epoch, trainsize),err[trainsize][epoch]
    print >> sys.stderr, stats()

    if epoch != 0:
        f = open('depth%serr.pkl'%depth,'w')
        cPickle.dump(reconstruction_error,f,-1)
        for trainsize in VALIDATION_TRAININGSIZE:
            cPickle.dump(err[trainsize],f,-1)
        f.close()
        modeldir = os.path.join(PATH_SAVE, 'depth%spre%s' % (depth+1,epoch))
        if not os.path.isdir(modeldir):
            os.mkdir(modeldir)
        model.save(modeldir)
        if RULE == 5:
            f = open(modeldir + '/auxsigma.pkl','w')
            cPickle.dump(model.auxsigma.value,f,-1)
            f.close()

    print >> sys.stderr, "...done validating (err=%s,epoch=%s,model=%s,depth=%s,ACT=%s,LR=%s,NOISE_LVL=%s,BATCHSIZE=%s,train=%s,datatrain=%s,datatrainsave=%s,datatest=%s,datatestsave=%s,VALIDATION_TRAININGSIZE=%s,VALIDATION_RUNS_FOR_EACH_TRAININGSIZE=%s,PATH_SAVE=%s)" % (err, epoch, model, depth, ACT,LR,NOISE_LVL,BATCHSIZE,train,datatrain,datatrainsave,datatest,datatestsave, VALIDATION_TRAININGSIZE, VALIDATION_RUNS_FOR_EACH_TRAININGSIZE, PATH_SAVE)
    print >> sys.stderr, stats()
def load(rundir, newkeystr):
    """
    Read the directory and load the translation_model, the training count, the training epoch, and the training state.
    """
    global _lastfilename

    filename = os.path.join(rundir, "newkeystr.txt")
    assert newkeystr == myopen(filename).read()

    (cnt, lastcnt, epoch, filename) = common.json.loadfile(os.path.join(rundir, "trainstate.json"))

#    filename = os.path.join(rundir, "translation_model-%d%s.pkl" % (cnt, newkeystr))
    print >> sys.stderr, ("Reading translation_model from %s..." % filename)
    print >> sys.stderr, (stats())
    translation_model = cPickle.load(myopen(filename))
    print >> sys.stderr, ("...done reading translation_model from %s" % filename)
    print >> sys.stderr, (stats())
    _lastfilename = filename

    return (translation_model, cnt, lastcnt, epoch)
def load(rundir, newkeystr):
    """
    Read the directory and load the model, the training count, the training epoch, and the training state.
    """
    global _lastfilename

    filename = os.path.join(rundir, "newkeystr.txt")
    assert newkeystr == myopen(filename).read()

    filename = os.path.join(rundir, "trainstate.pkl")
    (trainstate, cnt, epoch) = cPickle.load(myopen(filename))

    filename = os.path.join(rundir, "model-%d%s.pkl" % (cnt, newkeystr))
    print >> sys.stderr, ("Reading model from %s..." % filename)
    print >> sys.stderr, (stats())
    model = cPickle.load(myopen(filename))
    print >> sys.stderr, ("...done reading model from %s" % filename)
    print >> sys.stderr, (stats())
    _lastfilename = filename

    return (model, cnt, epoch, trainstate)
Пример #24
0
def get_validation_example():

    from vocabulary import wordmap
    for l in myopen(HYPERPARAMETERS["VALIDATION_SENTENCES"]):
        prevwords = []
        for w in string.split(l):
            w = string.strip(w)
            if wordmap.exists(w):
                prevwords.append(wordmap.id(w))
                if len(prevwords) >= HYPERPARAMETERS["WINDOW_SIZE"]:
                    yield prevwords[-HYPERPARAMETERS["WINDOW_SIZE"]:]
            else:
                prevwords = []
Пример #25
0
def all_training_examples_cached():
    global _all_examples
    if _all_examples is None:
        try:
            _all_examples, cnt = cPickle.load(
                myopen(training_examples_cache_filename()))
            assert len(_all_examples) == cnt
            logging.info("Successfully read %d training examples from %s" %
                         (cnt, training_examples_cache_filename()))
            logging.info(stats())
        except:
            logging.info("(Couldn't read training examples from %s, sorry)" %
                         (training_examples_cache_filename()))
            logging.info("Caching all training examples...")
            logging.info(stats())
            _all_examples = []
            for l1, l2, f1, f2, falign in bicorpora_filenames():
                for e in get_training_biexample(l1, l2, f1, f2, falign):
                    _all_examples.append(e)
                    if len(_all_examples) % 10000 == 0:
                        logging.info(
                            "\tcurrently have read %d training examples" %
                            len(_all_examples))
                        logging.info(stats())
            random.shuffle(_all_examples)
            logging.info("...done caching all %d training examples" %
                         len(_all_examples))
            logging.info(stats())

            cnt = len(_all_examples)
            cPickle.dump((_all_examples, cnt),
                         myopen(training_examples_cache_filename(), "wb"),
                         protocol=-1)
            assert len(_all_examples) == cnt
            logging.info("Wrote %d training examples to %s" %
                         (cnt, training_examples_cache_filename()))
            logging.info(stats())
    assert _all_examples is not None
    return _all_examples
Пример #26
0
def load(rundir, newkeystr):
    """
    Read the directory and load the translation_model, the training count, the training epoch, and the training state.
    """
    global _lastfilename

    filename = os.path.join(rundir, "newkeystr.txt")
    assert newkeystr == myopen(filename).read()

    (cnt, lastcnt, epoch,
     filename) = common.json.loadfile(os.path.join(rundir, "trainstate.json"))

    #    filename = os.path.join(rundir, "translation_model-%d%s.pkl" % (cnt, newkeystr))
    print >> sys.stderr, ("Reading translation_model from %s..." % filename)
    print >> sys.stderr, (stats())
    translation_model = cPickle.load(myopen(filename))
    print >> sys.stderr, ("...done reading translation_model from %s" %
                          filename)
    print >> sys.stderr, (stats())
    _lastfilename = filename

    return (translation_model, cnt, lastcnt, epoch)
Пример #27
0
def get_validation_example():
    
    from vocabulary import wordmap
    for l in myopen(HYPERPARAMETERS["VALIDATION_SENTENCES"]):
        prevwords = []
        for w in string.split(l):
            w = string.strip(w)
            if wordmap.exists(w):
                prevwords.append(wordmap.id(w))
                if len(prevwords) >= HYPERPARAMETERS["WINDOW_SIZE"]:
                    yield prevwords[-HYPERPARAMETERS["WINDOW_SIZE"]:]
            else:
                prevwords = []
def save(translation_model, cnt, lastcnt, epoch, rundir, newkeystr):
    global _lastfilename

    filename = os.path.join(rundir, "translation_model-%d%s.pkl" % (cnt, newkeystr))
    logging.info("Writing translation_model to %s..." % filename)
    logging.info(stats())
    cPickle.dump(translation_model, myopen(filename, "wb"), protocol=-1)
    logging.info("...done writing translation_model to %s" % filename)
    logging.info(stats())

#    if _lastfilename is not None:
#        logging.info("Removing old translation_model %s..." % _lastfilename)
#        try:
#            os.remove(_lastfilename)
#            logging.info("...removed %s" % _lastfilename)
#        except:
#            logging.info("Could NOT remove %s" % _lastfilename)
    _lastfilename = filename

    common.json.dumpfile((cnt, lastcnt, epoch, filename), os.path.join(rundir, "trainstate.json"))

    filename = os.path.join(rundir, "newkeystr.txt")
    myopen(filename, "wt").write(newkeystr)
def save(model, cnt, epoch, trainstate, rundir, newkeystr):
    global _lastfilename

    filename = os.path.join(rundir, "model-%d%s.pkl" % (cnt, newkeystr))
    logging.info("Writing model to %s..." % filename)
    logging.info(stats())
    cPickle.dump(model, myopen(filename, "wb"), protocol=-1)
    logging.info("...done writing model to %s" % filename)
    logging.info(stats())

    if _lastfilename is not None:
        logging.info("Removing old model %s..." % _lastfilename)
        try:
            os.remove(_lastfilename)
            logging.info("...removed %s" % _lastfilename)
        except:
            logging.info("Could NOT remove %s" % _lastfilename)
    _lastfilename = filename

    filename = os.path.join(rundir, "trainstate.pkl")
    cPickle.dump((trainstate, cnt, epoch), myopen(filename, "wb"), protocol=-1)

    filename = os.path.join(rundir, "newkeystr.txt")
    myopen(filename, "wt").write(newkeystr)
Пример #30
0
 def __iter__(self):
     from vocabulary import wordmap
     self.filename = HYPERPARAMETERS["TRAIN_SENTENCES"]
     self.count = 0
     for l in myopen(self.filename):
         prevwords = []
         for w in string.split(l):
             w = string.strip(w)
             id = None
             if wordmap.exists(w):
                 prevwords.append(wordmap.id(w))
                 if len(prevwords) >= HYPERPARAMETERS["WINDOW_SIZE"]:
                     self.count += 1
                     yield prevwords[-HYPERPARAMETERS["WINDOW_SIZE"]:]
             else:
                 prevwords = []
Пример #31
0
 def __iter__(self):
     from vocabulary import wordmap
     self.filename = HYPERPARAMETERS["TRAIN_SENTENCES"]
     self.count = 0
     for l in myopen(self.filename):
         prevwords = []
         for w in string.split(l):
             w = string.strip(w)
             id = None
             if wordmap.exists(w):
                 prevwords.append(wordmap.id(w))
                 if len(prevwords) >= HYPERPARAMETERS["WINDOW_SIZE"]:
                     self.count += 1
                     yield prevwords[-HYPERPARAMETERS["WINDOW_SIZE"]:]
             else:
                 prevwords = []
Пример #32
0
def svm_validation_for_one_trainsize_and_one_C(C, nbinputs,numruns,datatrainsave,datatestsave,PATH_SAVE):
    """
    Train an SVM using some C on nbinputs training examples, for numrums runs.
    Return:
        testerr,testerrdev,trainerr,trainerrdev
    """
    print >> sys.stderr, "\t\tTraining SVM with C=%f, nbinputs=%d, numruns=%d" % (C, nbinputs,numruns)

    os.system('%s -s 4 -c %s -l %s -r %s -q %s %s %s > /dev/null 2> /dev/null'%(globalstate.SVMRUNALL_PATH,C,nbinputs,numruns,datatrainsave,datatestsave,PATH_SAVE+'/currentsvm.txt'))
    results = myopen(PATH_SAVE+'/currentsvm.txt','r').readline()[:-1].split(' ')
    os.remove(PATH_SAVE+'/currentsvm.txt')
    trainerr       = float(results[1])
    trainerrdev    = float(results[2])
    testerr        = float(results[3])
    testerrdev     = float(results[4])
    return testerr,testerrdev,trainerr,trainerrdev
def get_validation_example():
    HYPERPARAMETERS = common.hyperparameters.read("language-model")

    from vocabulary import wordmap
    for l in myopen(HYPERPARAMETERS["VALIDATION_SENTENCES"]):
        prevwords = []
        for w in string.split(l):
            w = string.strip(w)
            if wordmap.exists(w):
                prevwords.append(wordmap.id(w))
                if len(prevwords) >= HYPERPARAMETERS["WINDOW_SIZE"]:
                    yield prevwords[-HYPERPARAMETERS["WINDOW_SIZE"]:]
            else:
                # If we can learn an unknown word token, we should
                # delexicalize the word, not discard the example!
                if HYPERPARAMETERS["INCLUDE_UNKNOWN_WORD"]: assert 0
                prevwords = []
Пример #34
0
def get_validation_example():
    HYPERPARAMETERS = common.hyperparameters.read("language-model")

    from vocabulary import wordmap
    for l in myopen(HYPERPARAMETERS["VALIDATION_SENTENCES"]):
        prevwords = []
        for w in string.split(l):
            w = string.strip(w)
            if wordmap.exists(w):
                prevwords.append(wordmap.id(w))
                if len(prevwords) >= HYPERPARAMETERS["WINDOW_SIZE"]:
                    yield prevwords[-HYPERPARAMETERS["WINDOW_SIZE"]:]
            else:
                # If we can learn an unknown word token, we should
                # delexicalize the word, not discard the example!
                if HYPERPARAMETERS["INCLUDE_UNKNOWN_WORD"]: assert 0
                prevwords = []
Пример #35
0
 def __iter__(self):
     HYPERPARAMETERS = common.hyperparameters.read("language-model")
     from vocabulary import wordmap
     self.filename = HYPERPARAMETERS["TRAIN_SENTENCES"]
     self.count = 0
     for l in myopen(self.filename):
         prevwords = []
         for w in string.split(l):
             w = string.strip(w)
             id = None
             if wordmap.exists(w):
                 prevwords.append(wordmap.id(w))
                 if len(prevwords) >= HYPERPARAMETERS["WINDOW_SIZE"]:
                     self.count += 1
                     yield prevwords[-HYPERPARAMETERS["WINDOW_SIZE"]:]
             else:
                 # If we can learn an unknown word token, we should
                 # delexicalize the word, not discard the example!
                 if HYPERPARAMETERS["INCLUDE_UNKNOWN_WORD"]: assert 0
                 prevwords = []
 def __iter__(self):
     HYPERPARAMETERS = common.hyperparameters.read("language-model")
     from vocabulary import wordmap
     self.filename = HYPERPARAMETERS["TRAIN_SENTENCES"]
     self.count = 0
     for l in myopen(self.filename):
         prevwords = []
         for w in string.split(l):
             w = string.strip(w)
             id = None
             if wordmap.exists(w):
                 prevwords.append(wordmap.id(w))
                 if len(prevwords) >= HYPERPARAMETERS["WINDOW_SIZE"]:
                     self.count += 1
                     yield prevwords[-HYPERPARAMETERS["WINDOW_SIZE"]:]
             else:
                 # If we can learn an unknown word token, we should
                 # delexicalize the word, not discard the example!
                 if HYPERPARAMETERS["INCLUDE_UNKNOWN_WORD"]: assert 0
                 prevwords = []
def trainingsentences():
    """
    For each line (sentence) in the training data, transform it into a list of token IDs.
    """

    HYPERPARAMETERS = common.hyperparameters.read("random-indexing")
    from vocabulary import wordmap
    filename = HYPERPARAMETERS["TRAIN_SENTENCES"]
    count = 0
    for l in myopen(filename):
        tokens = []
        for w in string.split(l):
            w = string.strip(w)
            assert wordmap.exists(w)     # Not exactly clear what to do
                                         # if the word isn't in the vocab.
            tokens.append(wordmap.id(w))
        yield tokens
        count += 1
        if count % 1000 == 0:
            logging.info("Read %d lines from training file %s..." % (count, filename))
            logging.info(stats())
Пример #38
0
def trainingsentences():
    """
    For each line (sentence) in the training data, transform it into a list of token IDs.
    """

    HYPERPARAMETERS = common.hyperparameters.read("random-indexing")
    from vocabulary import wordmap
    filename = HYPERPARAMETERS["TRAIN_SENTENCES"]
    count = 0
    for l in myopen(filename):
        tokens = []
        for w in string.split(l):
            w = string.strip(w)
            assert wordmap.exists(w)  # Not exactly clear what to do
            # if the word isn't in the vocab.
            tokens.append(wordmap.id(w))
        yield tokens
        count += 1
        if count % 1000 == 0:
            logging.info("Read %d lines from training file %s..." %
                         (count, filename))
            logging.info(stats())
Пример #39
0
def indexed_weights():
    
    global _indexed_weights
    if _indexed_weights is not None:
        return _indexed_weights
    print >> sys.stderr, len(wordmap.map), "=?=", HYPERPARAMETERS["VOCABULARY_SIZE"]
    assert len(wordmap.map) == HYPERPARAMETERS["VOCABULARY_SIZE"]
    if HYPERPARAMETERS["NGRAM_FOR_TRAINING_NOISE"] == 0:
        _indexed_weights = [1 for id in range(wordmap.len)]
    elif HYPERPARAMETERS["NGRAM_FOR_TRAINING_NOISE"] == 1:
        from common.json import load
        from common.file import myopen
        ngrams_file = HYPERPARAMETERS["NGRAMS"][(HYPERPARAMETERS["NGRAM_FOR_TRAINING_NOISE"], HYPERPARAMETERS["VOCABULARY_SIZE"])]
        print >> sys.stderr, "Reading ngrams from", ngrams_file, "..."
        from collections import defaultdict
        ngramcnt = defaultdict(int)
        for (ngram, cnt) in load(myopen(ngrams_file)):
            assert len(ngram) == 1
            ngramcnt[ngram[0]] = cnt + HYPERPARAMETERS["TRAINING_NOISE_SMOOTHING_ADDITION"]
        _indexed_weights = [ngramcnt[wordmap.str(id)] for id in range(len(wordmap.map))]
        _indexed_weights = build(_indexed_weights)
    else: assert 0
    return _indexed_weights
#!/bin/env python

import sys

brownfile = "/u/turian/data/share/brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1-v3.txt.gz"
prefixes = [4, 6, 10, 20]

from common.file import myopen
import string
word_to_cluster = {}
for l in myopen(brownfile):
    cluster, word, cnt = string.split(l)
    word_to_cluster[word] = cluster

def output_features(fo, seq):
    for i in range(2, len(seq)-2):
        fs = []

        fs.append('U00=%s' % seq[i-2][0])
        fs.append('U01=%s' % seq[i-1][0])
        fs.append('U02=%s' % seq[i][0])
        fs.append('U03=%s' % seq[i+1][0])
        fs.append('U04=%s' % seq[i+2][0])

        for name, pos in zip(["U00", "U01", "U02", "U03", "U04"], [i-2,i-1,i,i+1,i+2]):
            if seq[pos][0] not in word_to_cluster: continue
            for p in prefixes:
                fs.append("%sbp%d=%s" % (name, p, word_to_cluster[seq[pos][0]][:p]))

        fs.append('U05=%s/%s' % (seq[i-1][0], seq[i][0]))
        fs.append('U06=%s/%s' % (seq[i][0], seq[i+1][0]))
Пример #41
0
def state_save():
    if HLAYERS == 2:
        cPickle.dump((w1, b1, wh, bh, w2, b2), myopen(join(rundir, "best-model.pkl"), "w"), protocol=-1)
    else:
        cPickle.dump((w1, b1, w2, b2), myopen(join(rundir, "best-model.pkl"), "w"), protocol=-1)
    myopen(join(rundir, "best-model-validation.txt"), "w").write("Accuracy %.2f%% after %d updates" % (best_validation_accuracy*100, best_validation_at))
Пример #42
0
def dumpfile(object, filename):
    """
    Dump JSON to a filename.
    """
    return dump(object, myopen(filename, "wb"))
    print >> sys.stderr, cmd
    print >> sys.stderr, stats()
    os.system(cmd)
    print >> sys.stderr, stats()



prefixes = [int(s) for s in string.split(options.prefixes, sep=",")]

if options.brown is None: options.brown = []
word_to_cluster = []
for i, brownfile in enumerate(options.brown):
    print >> sys.stderr, "Reading Brown file: %s" % brownfile
    word_to_cluster.append({})
    assert len(word_to_cluster) == i+1
    for l in myopen(brownfile):
        cluster, word, cnt = string.split(l)
        word_to_cluster[i][word] = cluster

if options.embedding is None: options.embedding = []
word_to_embedding = []
for i, embeddingfile in enumerate(options.embedding):
    print >> sys.stderr, "Reading Embedding file: %s" % embeddingfile
    word_to_embedding.append({})
    assert len(word_to_embedding) == i+1
    for l in myopen(embeddingfile):
        sp = string.split(l)
        word_to_embedding[i][sp[0]] = [float(v)*options.embeddingscale for v in sp[1:]]

assert len(word_to_embedding) == 0
Пример #44
0
"""
Automatically load the wordmap, if available.
"""

import cPickle
from common.file import myopen
import sys


def _wordmap_filename(name):
    import common.hyperparameters, common.options

    HYPERPARAMETERS = common.hyperparameters.read("language-model")
    return HYPERPARAMETERS["MONOLINGUAL_VOCABULARY_IDMAP_FILE"]


wordmap = None
try:
    wordmap = cPickle.load(myopen(_wordmap_filename()))
    wordmap.str = wordmap.key
except:
    pass


def write(wordmap, name=""):
    """
    Write the word ID map, passed as a parameter.
    """
    print >>sys.stderr, "Writing word map to %s..." % _wordmap_filename(name)
    cPickle.dump(wordmap, myopen(_wordmap_filename(name), "w"))
Пример #45
0
HYPERPARAMETERS = common.hyperparameters.read("attardi07_english_ptb")
common.options.reparse(HYPERPARAMETERS)

random.seed(HYPERPARAMETERS["random seed"])

from common.file import myopen
import string

Tf = HYPERPARAMETERS["train examples file"]
Vf = HYPERPARAMETERS["validation examples file"]
T = open(Tf, "wt")
V = open(Vf, "wt")

print "Writing to %s and %s" % (Tf, Vf)

ex = ""
for l in myopen(HYPERPARAMETERS["original examples file"]):
    ex += l
    if string.strip(l) == "":
        if random.random() < HYPERPARAMETERS["validation example likelihood"]:
            V.write(ex)
        else:
            T.write(ex)
        ex = ""

if ex != "":
    if random.random() < HYPERPARAMETERS["validation example likelihood"]:
        V.write(ex)
    else:
        T.write(ex)
Пример #46
0
        WEIGHT_REGULARIZATION_COEFF = oldstate.weight_regularization_coeff[:-1] + WEIGHT_REGULARIZATION_COEFF
        NEPOCHS = oldstate.nepochs + NEPOCHS
        LR = oldstate.lr + LR
        NOISE_LVL = oldstate.noise_lvl + NOISE_LVL
        EPOCHSTEST = oldstate.epochstest + EPOCHSTEST
        state.bestrec = oldstate.bestrec
        state.bestrecepoch = oldstate.bestrec
        del oldstate

    #if 'rectifier' in ACT:
        #assert ACT.index('rectifier')== DEPTH -1
        # Methods to stack rectifier are still in evaluation (5 different techniques)
        # The best will be implemented in the script soon :).
    filename = PATH_DATA + NAME_DATATEST + '_1.pkl.gz'
    print filename
    f =myopen(filename,'r')
    train = theano.shared(numpy.asarray(cPickle.load(f),dtype=theano.config.floatX))
    f.close()
    normalshape = train.value.shape
    
    model=SDAE(numpy.random,RandomStreams(),DEPTH,True,act=ACT,n_hid=N_HID,n_out=5,sparsity=ACTIVATION_REGULARIZATION_COEFF,\
            regularization=WEIGHT_REGULARIZATION_COEFF, wdreg = WEIGHT_REGULARIZATION_TYPE, spreg = ACTIVATION_REGULARIZATION_TYPE, n_inp=NINPUTS,noise=NOISE,tie=True)

    #RELOAD previous model
    for depth in range(depthbegin):
        print >> sys.stderr, 'reload layer',depth+1
        print >> sys.stderr, stats()
        model.layers[depth].W.value = cPickle.load(open(MODEL_RELOAD + 'Layer%s_W.pkl'%(depth+1),'r'))
        model.layers[depth].b.value = cPickle.load(open(MODEL_RELOAD + 'Layer%s_b.pkl'%(depth+1),'r'))
        model.layers[depth].mask.value = cPickle.load(open(MODEL_RELOAD + 'Layer%s_mask.pkl'%(depth+1),'r'))
Пример #47
0
 def loadhelp():
     f = myopen(self.filename, "rb")
     (self.map, self.reverse_map) = pickle.load(f)
Пример #48
0
 def dump(self):
     """ Dump the map to disk. """
     assert self.synchronize
     f = myopen(self.filename, "wb")
     pickle.dump((self.map, self.reverse_map), f)
Пример #49
0
def loadfile(filename):
    """
    Load JSON from a filename.
    """
    return load(myopen(filename))
Пример #50
0
def dumpfile(object, filename):
    """
    Dump JSON to a filename.
    """
    return dump(object, myopen(filename, "wb"))
Пример #51
0
    RandomStreams(state.seed)
    numpy.random.seed(state.seed)
    datatrain = (PATH_DATA+NAME_DATA+'_1.pkl.gz',PATH_DATA+NAME_LABEL+'_1.pkl.gz')
    datatrainsave = PATH_SAVE+'/train.libsvm'
    datatest = (PATH_DATA+NAME_DATATEST+'_1.pkl.gz',PATH_DATA+NAME_LABELTEST+'_1.pkl.gz')
    datatestsave = PATH_SAVE+'/test.libsvm'

    depthbegin = 0

    #monitor best performance for reconstruction and classification
    state.besterr = dict([(`trainsize`, []) for trainsize in VALIDATION_TRAININGSIZE])
    state.besterrepoch = dict([(`trainsize`, []) for trainsize in VALIDATION_TRAININGSIZE])

    filename = PATH_DATA + NAME_DATATEST + '_1.pkl.gz'
    print filename
    f =myopen(filename,'r')
    train = theano.shared(numpy.asarray(cPickle.load(f),dtype=theano.config.floatX))
    f.close()
    normalshape = train.value.shape
    
    model=dA(numpy.random,RandomStreams(),input = None, n_visible = NINPUTS, n_hidden = N_HID, act = ACT, noise = NOISE)

    #RELOAD previous model
    channel.save()

    err = dict([(trainsize, {}) for trainsize in VALIDATION_TRAININGSIZE])
    rebuildunsup(model,LR,NOISE_LVL,ACTIVATION_REGULARIZATION_COEFF, WEIGHT_REGULARIZATION_COEFF, BATCHSIZE,train)

    epoch = 0
    if epoch in EPOCHSTEST:
        svm_validation(err, epoch, model,train,datatrain,datatrainsave,datatest,datatestsave, VALIDATION_TRAININGSIZE, VALIDATION_RUNS_FOR_EACH_TRAININGSIZE, PATH_SAVE, PATH_DATA, NAME_DATATEST)
Пример #52
0
import sys

assert len(sys.argv) == 2
embeddingsscale = float(sys.argv[1])

#embeddingsfile = "/u/turian/data/share/embeddings-20090916-rcv1.case-intact.LEARNING_RATE=0_000000001_--EMBEDDING_LEARNING_RATE=0_0000032.model-720000000.txt.gz"
embeddingsfile = "/u/turian/data/share/hlbl_reps_clean_1.rcv1.clean.tokenized-CoNLL03.case-intact.txt.gz"

brownfile = "/u/turian/data/share/brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1-v3.txt.gz"
prefixes = [4, 6, 10, 20]

from common.file import myopen
import string

word_to_embedding = {}
for l in myopen(embeddingsfile):
    sp = string.split(l)
    word_to_embedding[sp[0]] = [float(v) * embeddingsscale for v in sp[1:]]

from common.file import myopen
import string

word_to_cluster = {}
for l in myopen(brownfile):
    cluster, word, cnt = string.split(l)
    word_to_cluster[word] = cluster


def output_features(fo, seq):
    for i in range(2, len(seq) - 2):
        fs = []
Пример #53
0
def wordmap():
    global _wordmap
    if _wordmap is None:
        _wordmap = cPickle.load(myopen(_wordmap_filename()))
        _wordmap.str = _wordmap.key
    return _wordmap
Пример #54
0
def write(wordmap, name=""):
    """
    Write the word ID map, passed as a parameter.
    """
    print >>sys.stderr, "Writing word map to %s..." % _wordmap_filename(name)
    cPickle.dump(wordmap, myopen(_wordmap_filename(name), "w"))
Пример #55
0
"""
Automatically load the wordmap, if available.
"""

import cPickle
from common.file import myopen
from hyperparameters import *
import sys


def _wordmap_filename(name):
    #import common.hyperparameters, common.options
    #HYPERPARAMETERS = common.hyperparameters.read("language-model")
    return HYPERPARAMETERS["MONOLINGUAL_VOCABULARY_IDMAP_FILE"]


wordmap = None
try:
    wordmap = cPickle.load(myopen(_wordmap_filename("")))
    wordmap.str = wordmap.key
except:
    print sys.exc_info()[0], sys.exc_info()[1]


def write(wordmap, name=""):
    """
    Write the word ID map, passed as a parameter.
    """
    print >> sys.stderr, "Writing word map to %s..." % _wordmap_filename(name)
    cPickle.dump(wordmap, myopen(_wordmap_filename(name), "w"))
Пример #56
0
def write(wordmap, name=""):
    """
    Write the word ID map, passed as a parameter.
    """
    print >> sys.stderr, "Writing word map to %s..." % _wordmap_filename(name)
    cPickle.dump(wordmap, myopen(_wordmap_filename(name), "w"))
Пример #57
0
def loadfile(filename):
    """
    Load JSON from a filename.
    """
    return load(myopen(filename))