update_rule=update_rule) n_epochs = 1 n_batches = train_data_provider.batches_per_epoch * n_epochs costs = [] prev_weights = tweet_model.pack() for batch_index, iteration_info in enumerate(optimizer): costs.append(iteration_info['cost']) if batch_index % 10 == 0: # print costs[-1], iteration_info['param_mean_abs_values'] X_valid, Y_valid, meta_valid = validation_data_provider.next_batch( ) Y_hat = tweet_model.fprop(X_valid, meta=meta_valid) assert np.all(np.abs(Y_hat.sum(axis=1) - 1) < 1e-6) # print Y_hat[:5] acc = np.mean( np.argmax(Y_hat, axis=1) == np.argmax(Y_valid, axis=1)) print "B: {}, A: {}, C: {}, Param size: {}".format( batch_index, acc, costs[-1], np.mean(np.abs(tweet_model.pack()))) if batch_index % 100 == 0: with open("model.pkl", 'w') as model_file: pickle.dump(tweet_model, model_file, protocol=-1)
n_epochs = 1 n_batches = train_data_provider.batches_per_epoch * n_epochs time_start = time.time() costs = [] prev_weights = tweet_model.pack() for batch_index, iteration_info in enumerate(optimizer): costs.append(iteration_info['cost']) if batch_index % 10 == 0: X_valid, Y_valid, meta_valid = validation_data_provider.next_batch() Y_hat = tweet_model.fprop(X_valid, meta=meta_valid) assert np.all(np.abs(Y_hat.sum(axis=1) - 1) < 1e-6) # This is really slow: #grad_check = gradient_checker.check(model) grad_check = "skipped" acc = np.mean(np.argmax(Y_hat, axis=1) == np.argmax(Y_valid, axis=1)) print "B: {}, A: {}, C: {}, Prop1: {}, Param size: {}, g: {}".format( batch_index, acc, costs[-1], np.argmax(Y_hat, axis=1).mean(), np.mean(np.abs(tweet_model.pack())), grad_check)
progress = [] costs = [] prev_weights = model.pack() for batch_index, iteration_info in enumerate(optimizer): costs.append(iteration_info['cost']) if batch_index % 10 == 0: Y_hat = [] Y_valid = [] for _ in xrange(validation_data_provider.batches_per_epoch): X_valid_batch, Y_valid_batch, meta_valid = validation_data_provider.next_batch() X_valid_batch = maybe_get(X_valid_batch) Y_valid_batch = maybe_get(Y_valid_batch) Y_valid.append(Y_valid_batch) Y_hat.append(maybe_get(model.fprop(X_valid_batch, meta=meta_valid))) Y_valid = np.concatenate(Y_valid, axis=0) Y_hat = np.concatenate(Y_hat, axis=0) assert np.all(np.abs(Y_hat.sum(axis=1) - 1) < 1e-6) # This is really slow: #grad_check = gradient_checker.check(model) grad_check = "skipped" acc = np.mean(np.argmax(Y_hat, axis=1) == np.argmax(Y_valid, axis=1)) if acc > best_acc: best_acc = acc with open(os.path.expanduser("~/model_best.pkl"), 'w') as model_file: pickle.dump(model, model_file, protocol=-1)
def main(): random.seed(34532) np.random.seed(675) np.set_printoptions(linewidth=100) data_dir = os.path.join("/users/mdenil/code/txtnets/txtnets_deployed/data", "stanfordmovie") trainer = Word2Vec( train=os.path.join(data_dir, "stanfordmovie.train.sentences.clean.projected.txt"), output="stanford-movie-vectors.bin", cbow=1, size=300, window=8, negative=25, hs=0, sample=1e-4, threads=20, binary=1, iter=15, min_count=1) trainer.train() gensim_model = gensim.models.Word2Vec.load_word2vec_format( "/users/mdenil/code/txtnets/txtnets_deployed/code/stanford-movie-vectors.bin", binary=True) # print(gensim_model.most_similar(["refund"])) # print(gensim_model.most_similar(["amazing"])) embedding_model = txtnets_model_from_gensim_word2vec(gensim_model) with open(os.path.join(data_dir, "stanfordmovie.train.sentences.clean.projected.flat.json")) as data_file: data = json.load(data_file) random.shuffle(data) X, Y = map(list, zip(*data)) Y = [[":)", ":("].index(y) for y in Y] batch_size = 100 n_validation = 500 train_data_provider = LabelledSequenceMinibatchProvider( X=X[:-n_validation], Y=Y[:-n_validation], batch_size=batch_size, padding='PADDING') transformed_train_data_provider = TransformedLabelledDataProvider( data_source=train_data_provider, transformer=embedding_model) validation_data_provider = LabelledSequenceMinibatchProvider( X=X[-n_validation:], Y=Y[-n_validation:], batch_size=batch_size, padding='PADDING') transformed_validation_data_provider = TransformedLabelledDataProvider( data_source=validation_data_provider, transformer=embedding_model) logistic_regression = CSM( layers=[ Sum(axes=['w']), Softmax( n_input_dimensions=gensim_model.syn0.shape[1], n_classes=2) ] ) cost_function = CrossEntropy() regularizer = L2Regularizer(lamb=1e-4) objective = CostMinimizationObjective( cost=cost_function, data_provider=transformed_train_data_provider, regularizer=regularizer) update_rule = AdaGrad( gamma=0.1, model_template=logistic_regression) optimizer = SGD( model=logistic_regression, objective=objective, update_rule=update_rule) for batch_index, iteration_info in enumerate(optimizer): if batch_index % 100 == 0: # print(iteration_info['cost']) Y_hat = [] Y_valid = [] for _ in xrange(transformed_validation_data_provider.batches_per_epoch): X_valid_batch, Y_valid_batch, meta_valid = transformed_validation_data_provider.next_batch() Y_valid.append(get(Y_valid_batch)) Y_hat.append(get(logistic_regression.fprop(X_valid_batch, meta=meta_valid))) Y_valid = np.concatenate(Y_valid, axis=0) Y_hat = np.concatenate(Y_hat, axis=0) acc = np.mean(np.argmax(Y_hat, axis=1) == np.argmax(Y_valid, axis=1)) print("B: {}, A: {}, C: {}".format( batch_index, acc, iteration_info['cost'])) with open("model_w2vec_logreg.pkl", 'w') as model_file: pickle.dump(embedding_model.move_to_cpu(), model_file, protocol=-1) pickle.dump(logistic_regression.move_to_cpu(), model_file, protocol=-1)
def main(): random.seed(34532) np.random.seed(675) np.set_printoptions(linewidth=100) data_dir = os.path.join("/users/mdenil/code/txtnets/txtnets_deployed/data", "stanfordmovie") trainer = Word2Vec(train=os.path.join( data_dir, "stanfordmovie.train.sentences.clean.projected.txt"), output="stanford-movie-vectors.bin", cbow=1, size=300, window=8, negative=25, hs=0, sample=1e-4, threads=20, binary=1, iter=15, min_count=1) trainer.train() gensim_model = gensim.models.Word2Vec.load_word2vec_format( "/users/mdenil/code/txtnets/txtnets_deployed/code/stanford-movie-vectors.bin", binary=True) # print(gensim_model.most_similar(["refund"])) # print(gensim_model.most_similar(["amazing"])) embedding_model = txtnets_model_from_gensim_word2vec(gensim_model) with open( os.path.join( data_dir, "stanfordmovie.train.sentences.clean.projected.flat.json") ) as data_file: data = json.load(data_file) random.shuffle(data) X, Y = map(list, zip(*data)) Y = [[":)", ":("].index(y) for y in Y] batch_size = 100 n_validation = 500 train_data_provider = LabelledSequenceMinibatchProvider( X=X[:-n_validation], Y=Y[:-n_validation], batch_size=batch_size, padding='PADDING') transformed_train_data_provider = TransformedLabelledDataProvider( data_source=train_data_provider, transformer=embedding_model) validation_data_provider = LabelledSequenceMinibatchProvider( X=X[-n_validation:], Y=Y[-n_validation:], batch_size=batch_size, padding='PADDING') transformed_validation_data_provider = TransformedLabelledDataProvider( data_source=validation_data_provider, transformer=embedding_model) logistic_regression = CSM(layers=[ Sum(axes=['w']), Softmax(n_input_dimensions=gensim_model.syn0.shape[1], n_classes=2) ]) cost_function = CrossEntropy() regularizer = L2Regularizer(lamb=1e-4) objective = CostMinimizationObjective( cost=cost_function, data_provider=transformed_train_data_provider, regularizer=regularizer) update_rule = AdaGrad(gamma=0.1, model_template=logistic_regression) optimizer = SGD(model=logistic_regression, objective=objective, update_rule=update_rule) for batch_index, iteration_info in enumerate(optimizer): if batch_index % 100 == 0: # print(iteration_info['cost']) Y_hat = [] Y_valid = [] for _ in xrange( transformed_validation_data_provider.batches_per_epoch): X_valid_batch, Y_valid_batch, meta_valid = transformed_validation_data_provider.next_batch( ) Y_valid.append(get(Y_valid_batch)) Y_hat.append( get( logistic_regression.fprop(X_valid_batch, meta=meta_valid))) Y_valid = np.concatenate(Y_valid, axis=0) Y_hat = np.concatenate(Y_hat, axis=0) acc = np.mean( np.argmax(Y_hat, axis=1) == np.argmax(Y_valid, axis=1)) print("B: {}, A: {}, C: {}".format(batch_index, acc, iteration_info['cost'])) with open("model_w2vec_logreg.pkl", 'w') as model_file: pickle.dump(embedding_model.move_to_cpu(), model_file, protocol=-1) pickle.dump(logistic_regression.move_to_cpu(), model_file, protocol=-1)
# objective = CostMinimizationObjective(cost=cost_function, data_provider=validation_data_provider) # cost, grads = objective.evaluate(model) # return cost # # def grad(w): # model.unpack(w) # objective = CostMinimizationObjective(cost=cost_function, data_provider=validation_data_provider) # cost, grads = objective.evaluate(model) # # return np.concatenate([g.ravel() for g in grads]) # # print fast_gradient_check(func, grad, model.pack(), method='diff') costs = [] prev_weights = model.pack() for batch_index, iteration_info in enumerate(optimizer): costs.append(iteration_info['cost']) if batch_index % 10 == 0: X_valid, Y_valid, meta_valid = validation_data_provider.next_batch( ) Y_hat = model.fprop(X_valid, meta=meta_valid) assert np.all(np.abs(Y_hat.sum(axis=1) - 1) < 1e-6) acc = np.mean( np.argmax(Y_hat, axis=1) == np.argmax(Y_valid, axis=1)) print "B: {}, A: {}, C: {}, Param size: {}".format( batch_index, acc, np.exp(costs[-1]), np.mean(np.abs(model.pack())))
# momentum=0.5, # epsilon=0.05, # model_template=model) # update_rule = NAG( # momentum=0.95, # epsilon=0.001, # model_template=model) optimizer = SGD(model=model, objective=objective, update_rule=update_rule) n_batches = train_data_provider.batches_per_epoch * n_epochs costs = [] prev_weights = model.pack() for batch_index, iteration_info in enumerate(optimizer): costs.append(iteration_info['cost']) if batch_index % 10 == 0: X_valid, Y_valid, meta_valid = validation_data_provider.next_batch() Y_hat = model.fprop(X_valid, meta=meta_valid) assert np.all(np.abs(Y_hat.sum(axis=1) - 1) < 1e-6) acc = np.mean(np.argmax(Y_hat, axis=1) == np.argmax(Y_valid, axis=1)) print "B: {}, A: {}, Entropy (bits): {}".format(batch_index, acc, costs[-1]*np.log2(np.exp(1))) if batch_index % 100 == 0: with open("model.pkl", 'w') as model_file: pickle.dump(model, model_file, protocol=-1)
prev_weights = model.pack() for batch_index, iteration_info in enumerate(optimizer): costs.append(iteration_info['cost']) if batch_index % 10 == 0: Y_hat = [] Y_valid = [] for _ in xrange(validation_data_provider.batches_per_epoch): X_valid_batch, Y_valid_batch, meta_valid = validation_data_provider.next_batch( ) X_valid_batch = maybe_get(X_valid_batch) Y_valid_batch = maybe_get(Y_valid_batch) Y_valid.append(Y_valid_batch) Y_hat.append( maybe_get(model.fprop(X_valid_batch, meta=meta_valid))) Y_valid = np.concatenate(Y_valid, axis=0) Y_hat = np.concatenate(Y_hat, axis=0) assert np.all(np.abs(Y_hat.sum(axis=1) - 1) < 1e-6) # This is really slow: #grad_check = gradient_checker.check(model) grad_check = "skipped" acc = np.mean( np.argmax(Y_hat, axis=1) == np.argmax(Y_valid, axis=1)) if acc > best_acc: best_acc = acc with open(os.path.expanduser("~/model_best.pkl"), 'w') as model_file: