Пример #1
0
def init_params_shared(params):
    mylog('Start Initializing Shared Parameters')
    params_shared = OrderedDict()
    for key,value in params.iteritems():
        params_shared[key] = theano.shared(params[key], name = key)
    mylog('Stop Initializing Shared Parameters')
    return params_shared
Пример #2
0
    writeFile(prefix + '.reference', reference)
    writeFile(prefix + '.summary', summary)
    writeFile(prefix + '.counts', time_data)


def loadFromText(fName):
    f = codecs.open(fName, 'r', encoding='utf-8')
    result = []
    for l in f:
        line = l.strip().split()
        result.append(line)
    return result


if __name__ == '__main__':
    log = mylog()
    dataoptions = optionsLoader(log, True)
    # Load the Vocabulary and Features and Dataset First
    Vocab_Giga = loadFromPKL('giga_new.Vocab')
    Vocab = {
        'w2i': Vocab_Giga.w2i,
        'i2w': Vocab_Giga.i2w,
        'i2e': Vocab_Giga.i2e
    }

    Features_Giga = loadFromPKL('features.Embedding')
    I2Es = []
    for feat in dataoptions["featList"]:
        I2Es.append(Features_Giga[feat].i2e)

    dataset = data_loader(Vocab, dataoptions, log)
Пример #3
0
import codecs, sys

from mylog.mylog import mylog
from vocabulary.vocabulary import Vocabulary, I2E
from utility.utility import *
from options_loader import *

if __name__ == '__main__':
    #Vocab = loadFromPKL('../../vocab/gigaword.pkl')

    log_vocab = mylog(logFile='log/log_vocab')
    options = optionsLoader(log_vocab, True)

    fileName = options['primary_dir']

    inputCorpus = [fileName + options['trainSet'] + '.Ndocument']

    outputCorpus = [fileName + options['trainSet'] + '.Nsummary']

    Vocab = Vocabulary(options,
                       inputCorpus=inputCorpus,
                       outputCorpus=outputCorpus)

    log_vocab.log(
        str(Vocab.full_size) + ', ' + str(Vocab.n_in) + ', ' +
        str(Vocab.n_out))

    saveToPKL(fileName + sys.argv[1] + '.Vocab', Vocab)

    f = codecs.open(fileName + sys.argv[1] + '.i2w', 'w', encoding='utf-8')
    for item in Vocab.i2w: