else: sentences, word2idx = get_wikipedia_data(n_files=n_files, n_vocab=2000) with open(w2i_file, 'w') as f: json.dump(word2idx, f) V = len(word2idx) model = Glove(80, V, 10) # model.fit(sentences, cc_matrix=cc_matrix, epochs=20) # coordinate descent model.fit( sentences, cc_matrix=cc_matrix, learning_rate=3*10e-5, reg=0.01, epochs=2000, gd=True, use_theano=False ) # gradient descent model.save(we_file) if __name__ == '__main__': we = 'glove_model_50.npz' w2i = 'glove_word2idx_50.json' main(we, w2i) for concat in (True, False): print "** concat:", concat find_analogies('king', 'man', 'woman', concat, we, w2i) find_analogies('france', 'paris', 'london', concat, we, w2i) find_analogies('france', 'paris', 'rome', concat, we, w2i) find_analogies('paris', 'france', 'italy', concat, we, w2i)
word2idx = json.load(f) sentences = [] # dummy - we won't actually use it else: sentences, word2idx = get_wikipedia_data(n_files=n_files, n_vocab=2000) with open(w2i_file, 'w') as f: json.dump(word2idx, f) V = len(word2idx) model = Glove(80, V, 10) # model.fit(sentences, cc_matrix=cc_matrix, epochs=20) # coordinate descent model.fit(sentences, cc_matrix=cc_matrix, learning_rate=3 * 10e-5, reg=0.01, epochs=2000, gd=True, use_theano=False) # gradient descent model.save(we_file) if __name__ == '__main__': we = 'glove_model_50.npz' w2i = 'glove_word2idx_50.json' main(we, w2i) for concat in (True, False): print "** concat:", concat find_analogies('king', 'man', 'woman', concat, we, w2i) find_analogies('france', 'paris', 'london', concat, we, w2i) find_analogies('france', 'paris', 'rome', concat, we, w2i) find_analogies('paris', 'france', 'italy', concat, we, w2i)
model.fit( sentences, cc_matrix=cc_matrix, learning_rate=3 * 10e-5, reg=0.1, epochs=10, gd=True, use_theano=False, use_tensorflow=True, ) model.save(we_file) if __name__ == '__main__': # we = 'glove_model_50.npz' # w2i = 'glove_word2idx_50.json' we = 'glove_model_brown.npz' w2i = 'glove_word2idx_brown.json' main(we, w2i, use_brown=True) for concat in (True, False): print "** concat:", concat find_analogies('king', 'man', 'woman', concat, we, w2i) find_analogies('france', 'paris', 'london', concat, we, w2i) find_analogies('france', 'paris', 'rome', concat, we, w2i) find_analogies('paris', 'france', 'italy', concat, we, w2i) find_analogies('france', 'french', 'english', concat, we, w2i) find_analogies('japan', 'japanese', 'chinese', concat, we, w2i) find_analogies('japan', 'japanese', 'italian', concat, we, w2i) find_analogies('japan', 'japanese', 'australian', concat, we, w2i) find_analogies('december', 'november', 'june', concat, we, w2i)
model.fit( sentences, cc_matrix=cc_matrix, learning_rate=3*10e-5, reg=0.01, epochs=500, gd=True, use_theano=False, use_tensorflow=True, ) model.save(we_file) if __name__ == '__main__': # we = 'glove_model_50.npz' # w2i = 'glove_word2idx_50.json' we = 'glove_model_brown.npz' w2i = 'glove_word2idx_brown.json' main(we, w2i, use_brown=True) for concat in (True, False): print "** concat:", concat find_analogies('king', 'man', 'woman', concat, we, w2i) find_analogies('france', 'paris', 'london', concat, we, w2i) find_analogies('france', 'paris', 'rome', concat, we, w2i) find_analogies('paris', 'france', 'italy', concat, we, w2i) find_analogies('france', 'french', 'english', concat, we, w2i) find_analogies('japan', 'japanese', 'chinese', concat, we, w2i) find_analogies('japan', 'japanese', 'italian', concat, we, w2i) find_analogies('japan', 'japanese', 'australian', concat, we, w2i) find_analogies('december', 'november', 'june', concat, we, w2i)