示例#1
0
文件: mlp.py 项目: jeromewu/ml_learn
def mlp_train(train_file_name, test_file_name, model_file_name, n_dim, n_label, n_hidden, learning_rate, L1_reg=0.00, L2_reg=0.0001, batch_size=20):
  logger.info('loading data')
  train_set_x, train_set_y, test_set_x, test_set_y = load_data(train_file_name, test_file_name, n_dim)
  n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
  n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
  logger.info('building the model')
  idx = T.lscalar()
  x = T.matrix('x')
  y = T.ivector('y')
  rng = np.random.RandomState(1234)
  
  hidden_W = None
  hidden_b = None
  sgd_W = None
  sgd_b = None
  if os.path.isfile(model_file_name):
    logger.debug('load existing model')
    hidden_W, hidden_b, sgd_W, sgd_b = np.load(model_file_name)
  
  classifier = MLP(rng=rng, input=x, n_in=n_dim, n_hidden=n_hidden, n_out=n_label, hidden_W=hidden_W, hidden_b=hidden_b, sgd_W=sgd_W, sgd_b=sgd_b)
  cost = classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr
  test_model = theano.function(inputs=[idx], 
                               outputs=classifier.errors(y), 
                               givens={
                                 x: test_set_x[idx * batch_size:(idx + 1) * batch_size],
                                 y: test_set_y[idx * batch_size:(idx + 1) * batch_size]
                              })
  gparams = []
  for param in classifier.params:
    gparam = T.grad(cost, param)
    gparams.append(gparam)
  updates = []
  for param, gparam in zip(classifier.params, gparams):
    updates.append((param, param - learning_rate * gparam))
  train_model = theano.function(inputs=[idx], 
                                outputs=cost,
                                updates=updates,
                                givens={
                                  x: train_set_x[idx * batch_size:(idx + 1) * batch_size],
                                  y: train_set_y[idx * batch_size:(idx + 1) * batch_size]
                              })
  logger.info('training the model')
  best_test_score = np.inf
  epoch = 0
  start_time = time.clock()
  while True:
    epoch = epoch + 1
    for minibatch_idx in xrange(n_train_batches):
      train_model(minibatch_idx)
      iter = (epoch - 1) * n_train_batches + minibatch_idx
      if (iter + 1) % n_train_batches == 0:
        test_loss = [test_model(i) for i in xrange(n_test_batches)]
        test_score = np.mean(test_loss)
        logger.info(('epoch %i, ran for %.5f hr, minibatch %i/%i, test error %f %%') % (epoch, (time.clock() - start_time)/3600.0, minibatch_idx + 1, n_train_batches, test_score * 100.))
        if test_score < best_test_score:
          np.save(model_file_name, (classifier.hiddenLayer.W.get_value(), classifier.hiddenLayer.b.get_value(), classifier.logRegressionLayer.W.get_value(), classifier.logRegressionLayer.b.get_value()))
          best_test_score = test_score
          logger.info(('epoch %i, minibatch %i/%i, test error of best model %f %%') % (epoch, minibatch_idx + 1, n_train_batches, best_test_score * 100.))
示例#2
0
def sgd_train(train_file_name, test_file_name, model_file_name, n_dim, n_label, learning_rate, batch_size=600):
  logger.info('loading data')
  train_set_x, train_set_y, test_set_x, test_set_y = load_data(train_file_name, test_file_name, n_dim)
  n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
  n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

  logger.info('building the model')
  idx = T.lscalar()
  x = T.matrix('x')
  y = T.ivector('y')

  W = None
  b = None
  if os.path.isfile(model_file_name):
    logger.debug('load existing model')
    W, b = np.load(model_file_name)

  classifier = LogisticRegression(input=x, n_in=n_dim, n_out=n_label, W=W, b=b)
  cost = classifier.negative_log_likelihood(y)

  test_model = theano.function(inputs=[idx],
                               outputs=classifier.errors(y),
                               givens={
                                 x: test_set_x[idx * batch_size: (idx + 1) * batch_size],
                                 y: test_set_y[idx * batch_size: (idx + 1) * batch_size]
                               })
  g_W = T.grad(cost=cost, wrt=classifier.W)
  g_b = T.grad(cost=cost, wrt=classifier.b)
  updates = [(classifier.W, classifier.W - learning_rate * g_W),
             (classifier.b, classifier.b - learning_rate * g_b)]
  train_model = theano.function(inputs=[idx],
                                outputs=cost,
                                updates=updates,
                                givens={
                                  x: train_set_x[idx * batch_size: (idx + 1) * batch_size],
                                  y: train_set_y[idx * batch_size: (idx + 1) * batch_size]
                                })
  logger.info('training the model')
  best_test_score = np.inf
  epoch = 0
  start_time = time.clock()
  while True:
    epoch = epoch + 1
    for minibatch_idx in xrange(n_train_batches):
      train_model(minibatch_idx)
      iter = (epoch - 1) * n_train_batches + minibatch_idx
      if (iter + 1) % n_train_batches == 0:
        test_loss = [test_model(i) for i in xrange(n_test_batches)]
        test_score = np.mean(test_loss)
        logger.info(('epoch %i, ran for %.5f hr, minibatch %i/%i, test error %f %%') % (epoch, (time.clock() - start_time)/3600.0, minibatch_idx + 1, n_train_batches, test_score * 100.))
        if test_score < best_test_score:
          np.save(model_file_name, (classifier.W.get_value(), classifier.b.get_value()))
          best_test_score = test_score
          logger.info(('epoch %i, minibatch %i/%i, test error of best model %f %%') % (epoch, minibatch_idx + 1, n_train_batches, best_test_score * 100.))