def fprop(self, X): cost = MSE(X[0], X[1]) if self.use_sum: return cost.sum() else: return cost.mean()
def inner_fn(x_t, s1_tm1, s2_tm1, s3_tm1): s1_t = h1.fprop([[x_t], [s1_tm1, s2_tm1, s3_tm1]], params) s2_t = h2.fprop([[s1_t], [s2_tm1, s1_tm1, s3_tm1]], params) s3_t = h3.fprop([[s2_t], [s3_tm1], s1_tm1, s2_tm1], params) y_hat_t = output.fprop([s1_t, s2_t, s3_t], params) return s1_t, s2_t, s3_t, y_hat_t ((h1_temp, h2_temp, h3_temp, y_hat_temp), updates) =\ theano.scan(fn=inner_fn, sequences=[x], outputs_info=[s1_0, s2_0, s3_0, None]) mse = MSE(y, y_hat_temp) mse = mse.mean() mse.name = 'mse' model.inputs = [x, y] model.params = params model.nodes = nodes optimizer = Adam( lr=0.001 ) extension = [ GradientClipping(batch_size=batch_size), EpochCount(100), Monitoring(freq=100, ddout=[mse]),
s2_t = h2.fprop([[s1_t], [s2_tm1]]) s3_t = h3.fprop([[s2_t], [s3_tm1]]) y_hat_t = output.fprop([s1_t, s2_t, s3_t]) return s1_t, s2_t, s3_t, y_hat_t ((h1_temp, h2_temp, h3_temp, y_hat_temp), updates) =\ theano.scan(fn=inner_fn, sequences=[x], outputs_info=[s1_0, s2_0, s3_0, None]) ts, _, _ = y_hat_temp.shape y_hat_in = y_hat_temp.reshape((ts * batch_size, -1)) y_in = y.reshape((ts * batch_size, -1)) mse = MSE(y_in, y_hat_in) mse = mse.mean() mse.name = 'mse' model.inputs = [x, y] model._params = params model.nodes = nodes optimizer = Adam(lr=0.001) extension = [ GradientClipping(batch_size=batch_size), EpochCount(100), Monitoring(freq=100, ddout=[mse], data=[ Iterator(valid_data, batch_size),