cost_function = CrossEntropy() objective = CostMinimizationObjective(cost=cost_function, data_provider=train_data_provider) update_rule = AdaGrad(gamma=0.1, model_template=tweet_model) optimizer = SGD(model=tweet_model, objective=objective, update_rule=update_rule) n_epochs = 1 n_batches = train_data_provider.batches_per_epoch * n_epochs costs = [] prev_weights = tweet_model.pack() for batch_index, iteration_info in enumerate(optimizer): costs.append(iteration_info['cost']) if batch_index % 10 == 0: # print costs[-1], iteration_info['param_mean_abs_values'] X_valid, Y_valid, meta_valid = validation_data_provider.next_batch( ) Y_hat = tweet_model.fprop(X_valid, meta=meta_valid) assert np.all(np.abs(Y_hat.sum(axis=1) - 1) < 1e-6) # print Y_hat[:5] acc = np.mean(
model=tweet_model, objective=objective, update_rule=update_rule) gradient_checker = ModelGradientChecker( CostMinimizationObjective(cost=cost_function, data_provider=validation_data_provider, regularizer=regularizer)) n_epochs = 1 n_batches = train_data_provider.batches_per_epoch * n_epochs time_start = time.time() costs = [] prev_weights = tweet_model.pack() for batch_index, iteration_info in enumerate(optimizer): costs.append(iteration_info['cost']) if batch_index % 10 == 0: X_valid, Y_valid, meta_valid = validation_data_provider.next_batch() Y_hat = tweet_model.fprop(X_valid, meta=meta_valid) assert np.all(np.abs(Y_hat.sum(axis=1) - 1) < 1e-6) # This is really slow: #grad_check = gradient_checker.check(model) grad_check = "skipped" acc = np.mean(np.argmax(Y_hat, axis=1) == np.argmax(Y_valid, axis=1))
optimizer = SGD( model=model, objective=objective, update_rule=update_rule) n_epochs = 1 n_batches = train_data_provider.batches_per_epoch * n_epochs time_start = time.time() best_acc = -1.0 progress = [] costs = [] prev_weights = model.pack() for batch_index, iteration_info in enumerate(optimizer): costs.append(iteration_info['cost']) if batch_index % 10 == 0: Y_hat = [] Y_valid = [] for _ in xrange(validation_data_provider.batches_per_epoch): X_valid_batch, Y_valid_batch, meta_valid = validation_data_provider.next_batch() X_valid_batch = maybe_get(X_valid_batch) Y_valid_batch = maybe_get(Y_valid_batch) Y_valid.append(Y_valid_batch) Y_hat.append(maybe_get(model.fprop(X_valid_batch, meta=meta_valid))) Y_valid = np.concatenate(Y_valid, axis=0) Y_hat = np.concatenate(Y_hat, axis=0)
cost_function = LargeMarginCost(0.1) noise_model = RandomAlphabetCorruption(alphabet) objective = NoiseContrastiveObjective( cost=cost_function, data_provider=train_data_provider, noise_model=noise_model) update_rule = AdaGrad( gamma=0.1, model_template=model) optimizer = SGD(model=model, objective=objective, update_rule=update_rule) evaluator = ModelEvaluator(alphabet) n_batches = train_data_provider.batches_per_epoch * n_epochs costs = [] prev_weights = model.pack() for batch_index, iteration_info in enumerate(optimizer): costs.append(iteration_info['cost']) if batch_index % 50 == 0: print costs[-1], iteration_info['param_mean_abs_values'] evaluator.evaluate(model) if batch_index % 100 == 0: with open("model.pkl", 'w') as model_file: pickle.dump(model, model_file, protocol=-1)
print model cost_function = LargeMarginCost(0.1) noise_model = RandomAlphabetCorruption(alphabet) objective = NoiseContrastiveObjective(cost=cost_function, data_provider=train_data_provider, noise_model=noise_model) update_rule = AdaGrad(gamma=0.1, model_template=model) optimizer = SGD(model=model, objective=objective, update_rule=update_rule) evaluator = ModelEvaluator(alphabet) n_batches = train_data_provider.batches_per_epoch * n_epochs costs = [] prev_weights = model.pack() for batch_index, iteration_info in enumerate(optimizer): costs.append(iteration_info['cost']) if batch_index % 50 == 0: print costs[-1], iteration_info['param_mean_abs_values'] evaluator.evaluate(model) if batch_index % 100 == 0: with open("model.pkl", 'w') as model_file: pickle.dump(model, model_file, protocol=-1)
optimizer = SGD(model=tweet_model, objective=objective, update_rule=update_rule) gradient_checker = ModelGradientChecker( CostMinimizationObjective(cost=cost_function, data_provider=validation_data_provider, regularizer=regularizer)) n_epochs = 1 n_batches = train_data_provider.batches_per_epoch * n_epochs time_start = time.time() costs = [] prev_weights = tweet_model.pack() for batch_index, iteration_info in enumerate(optimizer): costs.append(iteration_info['cost']) if batch_index % 10 == 0: X_valid, Y_valid, meta_valid = validation_data_provider.next_batch( ) Y_hat = tweet_model.fprop(X_valid, meta=meta_valid) assert np.all(np.abs(Y_hat.sum(axis=1) - 1) < 1e-6) # This is really slow: #grad_check = gradient_checker.check(model) grad_check = "skipped" acc = np.mean(
# model.unpack(w) # objective = CostMinimizationObjective(cost=cost_function, data_provider=validation_data_provider) # cost, grads = objective.evaluate(model) # return cost # # def grad(w): # model.unpack(w) # objective = CostMinimizationObjective(cost=cost_function, data_provider=validation_data_provider) # cost, grads = objective.evaluate(model) # # return np.concatenate([g.ravel() for g in grads]) # # print fast_gradient_check(func, grad, model.pack(), method='diff') costs = [] prev_weights = model.pack() for batch_index, iteration_info in enumerate(optimizer): costs.append(iteration_info['cost']) if batch_index % 10 == 0: X_valid, Y_valid, meta_valid = validation_data_provider.next_batch( ) Y_hat = model.fprop(X_valid, meta=meta_valid) assert np.all(np.abs(Y_hat.sum(axis=1) - 1) < 1e-6) acc = np.mean( np.argmax(Y_hat, axis=1) == np.argmax(Y_valid, axis=1)) print "B: {}, A: {}, C: {}, Param size: {}".format( batch_index, acc, np.exp(costs[-1]),
regularizer=regularizer) update_rule = AdaGrad(gamma=0.01, model_template=model) optimizer = SGD(model=model, objective=objective, update_rule=update_rule) n_epochs = 1 n_batches = train_data_provider.batches_per_epoch * n_epochs time_start = time.time() best_acc = -1.0 progress = [] costs = [] prev_weights = model.pack() for batch_index, iteration_info in enumerate(optimizer): costs.append(iteration_info['cost']) if batch_index % 10 == 0: Y_hat = [] Y_valid = [] for _ in xrange(validation_data_provider.batches_per_epoch): X_valid_batch, Y_valid_batch, meta_valid = validation_data_provider.next_batch( ) X_valid_batch = maybe_get(X_valid_batch) Y_valid_batch = maybe_get(Y_valid_batch) Y_valid.append(Y_valid_batch) Y_hat.append( maybe_get(model.fprop(X_valid_batch, meta=meta_valid)))