print "  batch SGD"
    losses = []
    for b in xrange(num_batches_chunk):
        # if b % 1000 == 0:
        #     print "  batch %d/%d" % (b + 1, num_batches_chunk)

        loss = train(b)
        losses.append(loss)
        # print "  loss: %.6f" % loss

    mean_train_loss = np.sqrt(np.mean(losses))
    print "  mean training loss (RMSE):\t\t%.6f" % mean_train_loss
    losses_train.append(mean_train_loss)

    # store param stds during training
    param_stds.append([p.std() for p in layers.get_param_values(l6)])

    if ((e + 1) % VALIDATE_EVERY) == 0:
        print
        print "VALIDATING"
        print "  load validation data onto GPU"
        for x_shared, x_valid in zip(xs_shared, xs_valid):
            x_shared.set_value(x_valid)
        y_shared.set_value(y_valid)

        print "  compute losses"
        losses = []
        for b in xrange(num_batches_valid):
            # if b % 1000 == 0:
            #     print "  batch %d/%d" % (b + 1, num_batches_valid)
            loss = compute_loss(b)
    print "  batch SGD"
    losses = []
    for b in xrange(num_batches_chunk):
        # if b % 1000 == 0:
        #     print "  batch %d/%d" % (b + 1, num_batches_chunk)

        loss = train(b)
        losses.append(loss)
        # print "  loss: %.6f" % loss

    mean_train_loss = np.sqrt(np.mean(losses))
    print "  mean training loss (RMSE):\t\t%.6f" % mean_train_loss
    losses_train.append(mean_train_loss)

    # store param stds during training
    param_stds.append([p.std() for p in layers.get_param_values(l6)])

    if ((e + 1) % VALIDATE_EVERY) == 0:
        print
        print "VALIDATING"
        print "  load validation data onto GPU"
        for x_shared, x_valid in zip(xs_shared, xs_valid):
            x_shared.set_value(x_valid)
        y_shared.set_value(y_valid)

        print "  compute losses"
        losses = []
        for b in xrange(num_batches_valid):
            # if b % 1000 == 0:
            #     print "  batch %d/%d" % (b + 1, num_batches_valid)
            loss = compute_loss(b)