Exemplo n.º 1
0
def load_model(embed_map=None, pickle_table=None):
    """
    Load all model components + apply vocab expansion
    """
    # Load the worddict
    print 'Loading dictionary...'
    with open(path_to_dictionary, 'rb') as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print 'Creating inverted dictionary...'
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Load model options
    print 'Loading model options...'
    with open('%s.pkl' % path_to_model, 'rb') as f:
        options = pkl.load(f)

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_model, params)
    tparams = init_tparams(params)

    # Extractor functions
    print 'Compiling encoder...'
    trng = RandomStreams(1234)
    trng, x, x_mask, ctx, emb = build_encoder(tparams, options)
    f_enc = theano.function([x, x_mask], ctx, name='f_enc')
    f_emb = theano.function([x], emb, name='f_emb')
    trng, embedding, x_mask, ctxw2v = build_encoder_w2v(tparams, options)
    f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')

    # Load word2vec, if applicable
    if embed_map == None:
        print 'Loading word2vec embeddings...'
        embed_map = load_googlenews_vectors(path_to_word2vec)

    # Lookup table using vocab expansion trick
    if pickle_table:
        t = numpy.load(pickle_table)
        table = OrderedDict()
        for k, v in t:
            table[k] = v
    else:
        print 'Creating word lookup tables...'
        table = lookup_table(options, embed_map, worddict, word_idict, f_emb)

    # Store everything we need in a dictionary
    print 'Packing up...'
    model = {}
    model['options'] = options
    model['table'] = table
    model['f_w2v'] = f_w2v

    return model
Exemplo n.º 2
0
def load_model(
        embed_map=None,
        path_to_model=PATH_TO_MODEL,            # model opts (.pkl)
        path_to_params=PATH_TO_PARAMS,          # model params (.npz)
        path_to_dictionary=PATH_TO_DICTIONARY,
        path_to_word2vec=PATH_TO_WORD2VEC
        ):
    """
    Load all model components + apply vocab expansion
    """
    # Load the worddict
    print 'Loading dictionary...'
    with open(path_to_dictionary, 'rb') as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print 'Creating inverted dictionary...'
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Load model options
    print 'Loading model options...'
    with open(path_to_model, 'rb') as f:
        options = pkl.load(f)

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_params, params)
    tparams = init_tparams(params)

    # Extractor functions
    print 'Compiling encoder...'
    trng = RandomStreams(1234)
    trng, x, x_mask, ctx, emb = build_encoder(tparams, options)
    f_enc = theano.function([x, x_mask], ctx, name='f_enc')
    f_emb = theano.function([x], emb, name='f_emb')
    trng, embedding, x_mask, ctxw2v = build_encoder_w2v(tparams, options)
    f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')

    # Load word2vec, if applicable
    if embed_map == None:
        print 'Loading word2vec embeddings...'
        embed_map = load_googlenews_vectors(path_to_word2vec)

    # Lookup table using vocab expansion trick
    print 'Creating word lookup tables...'
    table = lookup_table(options, embed_map, worddict, word_idict, f_emb)

    # Store everything we need in a dictionary
    print 'Packing up...'
    model = {}
    model['options'] = options
    model['table'] = table
    model['f_w2v'] = f_w2v

    return model
Exemplo n.º 3
0
def load_model(
        path_to_model='/home/shunan/Code/skip-thoughts/experiments/amazon/amazon_model_bi.npz',
        path_to_dictionary='/home/shunan/Code/skip-thoughts/experiments/amazon/word_dicts.pkl',
        embed_map=None):
    """
    Load all model components + apply vocab expansion
    """
    # Load the worddict
    print 'Loading dictionary...'
    with open(path_to_dictionary, 'rb') as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print 'Creating inverted dictionary...'
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Load model options
    print 'Loading model options...'
    with open('%s.pkl' % path_to_model, 'rb') as f:
        options = pkl.load(f)

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_model, params)
    tparams = init_tparams(params)

    # Extractor functions
    print 'Compiling encoder...'
    trng = RandomStreams(1234)
    trng, x, x_mask, ctx, emb = build_encoder(tparams, options)
    f_enc = theano.function([x, x_mask], ctx, name='f_enc')
    f_emb = theano.function([x], emb, name='f_emb')
    trng, embedding, x_mask, ctxw2v = build_encoder_w2v(tparams, options)
    f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')

    # Load word2vec, if applicable
    # if embed_map == None:
    #     print 'Loading word2vec embeddings...'
    #     embed_map = load_googlenews_vectors(path_to_word2vec)

    # Lookup table using vocab expansion trick
    print 'Creating word lookup tables...'
    table = lookup_table(options, embed_map, worddict, word_idict, f_emb)

    # Store everything we need in a dictionary
    print 'Packing up...'
    model = {}
    model['options'] = options
    model['table'] = table
    model['f_w2v'] = f_w2v

    # model is just a dict.
    return model