# hyperparameters hidden_size = 128 embedding_dim = 128 vocab_size = 20000 sentence_length = 128 batch_size = 32 gradient_limit = 5 clip_gradients = True num_epochs = args.epochs embedding_update = True # setup backend be = gen_backend(**extract_valid_args(args, gen_backend)) # get the preprocessed and tokenized data fname_h5, fname_vocab = build_data_train(filepath=args.review_file, vocab_file=args.vocab_file, skip_headers=True) # play around with google-news word vectors for init if args.use_w2v: w2v_file = args.w2v vocab, rev_vocab = cPickle.load(open(fname_vocab, 'rb')) init_emb, embedding_dim, _ = get_google_word2vec_W(w2v_file, vocab, vocab_size=vocab_size, index_from=3) print "Done loading the Word2Vec vectors: embedding size - {}".format(embedding_dim) embedding_update = True else: init_emb = Uniform(-0.1 / embedding_dim, 0.1 / embedding_dim) h5f = h5py.File(fname_h5, 'r')
# hyperparameters hidden_size = 128 embedding_dim = 128 vocab_size = 20000 sentence_length = 128 batch_size = 32 gradient_limit = 5 clip_gradients = True num_epochs = args.epochs embedding_update = True # setup backend be = gen_backend(**extract_valid_args(args, gen_backend)) # get the preprocessed and tokenized data fname_h5, fname_vocab = build_data_train(filepath=args.review_file, vocab_file=args.vocab_file, skip_headers=True) # play around with google-news word vectors for init if args.use_w2v: w2v_file = args.w2v vocab, rev_vocab = cPickle.load(open(fname_vocab, 'rb')) init_emb_np, embedding_dim, _ = get_google_word2vec_W(w2v_file, vocab, vocab_size=vocab_size, index_from=3) print "Done loading the Word2Vec vectors: embedding size - {}".format(embedding_dim) embedding_update = True init_emb = Array(val=be.array(init_emb_np)) else: init_emb = Uniform(-0.1 / embedding_dim, 0.1 / embedding_dim)