# hyperparameters
hidden_size = 128
embedding_dim = 128
vocab_size = 20000
sentence_length = 128
batch_size = 32
gradient_limit = 5
clip_gradients = True
num_epochs = args.epochs
embedding_update = True

# setup backend
be = gen_backend(**extract_valid_args(args, gen_backend))

# get the preprocessed and tokenized data
fname_h5, fname_vocab = build_data_train(filepath=args.review_file,
                                         vocab_file=args.vocab_file, skip_headers=True)


# play around with google-news word vectors for init
if args.use_w2v:
    w2v_file = args.w2v
    vocab, rev_vocab = cPickle.load(open(fname_vocab, 'rb'))
    init_emb, embedding_dim, _ = get_google_word2vec_W(w2v_file, vocab,
                                                       vocab_size=vocab_size, index_from=3)
    print "Done loading the Word2Vec vectors: embedding size - {}".format(embedding_dim)
    embedding_update = True
else:
    init_emb = Uniform(-0.1 / embedding_dim, 0.1 / embedding_dim)


h5f = h5py.File(fname_h5, 'r')
示例#2
0
# hyperparameters
hidden_size = 128
embedding_dim = 128
vocab_size = 20000
sentence_length = 128
batch_size = 32
gradient_limit = 5
clip_gradients = True
num_epochs = args.epochs
embedding_update = True

# setup backend
be = gen_backend(**extract_valid_args(args, gen_backend))

# get the preprocessed and tokenized data
fname_h5, fname_vocab = build_data_train(filepath=args.review_file,
                                         vocab_file=args.vocab_file, skip_headers=True)


# play around with google-news word vectors for init
if args.use_w2v:
    w2v_file = args.w2v
    vocab, rev_vocab = cPickle.load(open(fname_vocab, 'rb'))
    init_emb_np, embedding_dim, _ = get_google_word2vec_W(w2v_file, vocab,
                                                          vocab_size=vocab_size, index_from=3)
    print "Done loading the Word2Vec vectors: embedding size - {}".format(embedding_dim)
    embedding_update = True
    init_emb = Array(val=be.array(init_emb_np))
else:
    init_emb = Uniform(-0.1 / embedding_dim, 0.1 / embedding_dim)