random.seed(31415) np.random.seed(9265) word_vectors = np.concatenate(( (np.random.rand(num_words, dim_vectors) - .5) / dim_vectors, np.zeros((num_words, dim_vectors))), axis=0 ) params['sgd']['step'] = 0.2 params['sgd']['iterations'] = 40000 params['sgd']['tolerance'] = 1e-48 params['sgd']['anneal_every'] = 20000 params['sgd']['anneal_factor'] = 0.5 word_vectors0 = sgd( lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, params, neg_sampling_cost_and_gradient), word_vectors, params, postprocessing=normalize_rows, use_saved=True, print_every=100, save_params_every=5000) # sanity check: cost at convergence should be around or below 10 # sum the input and output word vectors word_vectors = (word_vectors0[:num_words,:] + word_vectors0[num_words:,:]) print "\n=== For autograder ===" check_words = ["the", "a", "an", "movie", "ordinary", "but", "and"] check_idx = [tokens[word] for word in check_words] check_vecs = word_vectors[check_idx, :] print check_vecs # Visualize the word vectors you trained
dim_vectors = word_vectors.shape[1] dataset = StanfordSentiment() train_features, train_labels, words = get_data(dataset, word_vectors, dtype='train') weights = np.random.randn(dim_vectors, 5) # D x NUM_LABELS array # We will do batch optimization params = AttrDict({ 'sgd' : {'batch_size': 50, 'step': 3.0, 'iterations': iterations, 'tolerance': 0, 'anneal_every': 10000, 'anneal_factor': 0.5}, 'dataset' : {} }) print "Starting SGD..." weights = sgd(lambda weights: softmax_wrapper(train_features, train_labels, weights, regularization), weights, params, postprocessing=None, use_saved=False, print_every=500, save_params_every=1000) _, _, pred = softmax_regression(train_features, train_labels, weights) print "Train precision (%%): %f" % precision(train_labels, pred) save_data(words, train_labels, pred, 'data_train.txt') print "Testing on dev dataset" dev_features, dev_labels, dev_words = get_data(dataset, word_vectors, dtype='dev') print dev_features.shape, weights.shape _, _, pred = softmax_regression(dev_features, dev_labels, weights) print "Dev precision (%%): %f" % precision(dev_labels, pred) save_data(dev_words, dev_labels, pred, 'data_dev.txt') test_features, test_labels, test_words = get_data(dataset, word_vectors, dtype='test')
# Context size context_size = 5 print "Training word vectors" # Reset the random seed to make sure that everyone gets the same results random.seed(31415) np.random.seed(9265) word_vectors = np.concatenate(( (np.random.rand(num_words, dim_vectors) - 0.5) / dim_vectors, np.zeros((num_words, dim_vectors))), axis=0 ) word_vectors0 = sgd( lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, C, neg_sampling_cost_and_gradient), word_vectors, 0.3, 40000, posprocessing=normalize_rows, use_saved=True, print_every=10, tolerance=1e-8) # sanity check: cost at convergence should be around or below 10 # sum the input and output word vectors word_vectors = (word_vectors0[:num_words,:] + word_vectors0[num_words:,:]) print "\n=== For autograder ===" check_words = ["the", "a", "an", "movie", "ordinary", "but", "and"] checkIdx = [tokens[word] for word in check_words] checkVecs = word_vectors[checkIdx, :] print checkVecs # In[ ]: