def __init__(self, num_input, num_cells=50, num_output=1, lr=0.01, rho=0.95): X = T.matrix('x') Y = T.matrix('y') eta = T.scalar('eta') alpha = T.scalar('alpha') self.num_input = num_input self.num_output = num_output self.num_cells = num_cells self.eta = eta inputs = InputLayer(X, name="inputs") lstm = LSTMLayer(num_input, num_cells, input_layer=inputs, name="lstm") fc = FullyConnectedLayer(num_cells, num_output, input_layer=lstm) Y_hat = T.mean(fc.output(), axis=2) layer = inputs, lstm, fc self.params = get_params(layer) self.caches = make_caches(self.params) self.layers = layer mean_cost = T.mean((Y - Y_hat)**2) last_cost = T.mean((Y[-1] - Y_hat[-1])**2) self.cost = alpha*mean_cost + (1-alpha)*last_cost """" self.updates = momentum(self.cost, self.params, self.caches, self.eta, clip_at=3.0) """ self.updates,_,_,_,_ = create_optimization_updates(self.cost, self.params, method="adadelta", lr= lr, rho=rho) self.train = theano.function([X, Y, alpha], [self.cost, last_cost] ,\ updates=self.updates, allow_input_downcast=True) self.costfn = theano.function([X, Y, alpha], [self.cost, last_cost],\ allow_input_downcast=True) self.predict = theano.function([X], [Y_hat], allow_input_downcast=True)
def __init__(self, num_input=256, num_hidden=[512,512], num_output=256, clip_at=0.0, scale_norm=0.0): X = T.fmatrix() Y = T.imatrix() lr = T.fscalar() alpha = T.fscalar() reg = T.fscalar() dropout_prob = T.fscalar() self.num_input = num_input self.num_hidden = num_hidden self.num_output = num_output self.clip_at = clip_at self.scale_norm = scale_norm inputs = InputLayer(X, name='inputs') num_prev = num_input prev_layer = inputs self.layers = [inputs] if type(num_hidden) is types.IntType: lstm = LSTMLayer(num_prev, num_hidden, input_layers=[prev_layer], name="lstm", go_backwards=False) num_prev = num_hidden prev_layer = lstm self.layers.append(prev_layer) prev_layer = DropoutLayer(prev_layer, dropout_prob=dropout_prob) self.layers.append(prev_layer) FC = FullyConnectedLayer(num_prev, num_output, input_layers=[prev_layer], name="yhat") self.layers.append(FC) Y_hat = FC.output() # change to probilities Y_hat = T.nnet.softmax(Y_hat) params = get_params(self.layers) caches = make_caches(params) updates, grads = momentum(loss, params, lr, reg) self.train_func = theano.function([X, Y, lr, reg, dropout_prob, alpha], loss, updates=updates, allow_input_downcast=True) self.predict_sequence_func = theano.function([X, dropout_prob], [Y_hat], allow_input_downcast=True)
def __init__(self, num_input, num_cells=50, num_output=1, lr=0.01, rho=0.95): X = T.matrix('x') Y = T.matrix('y') eta = T.scalar('eta') alpha = T.scalar('alpha') self.num_input = num_input self.num_output = num_output self.num_cells = num_cells self.eta = eta inputs = InputLayer(X, name="inputs") lstm = LSTMLayer(num_input, num_cells, input_layer=inputs, name="lstm") fc = FullyConnectedLayer(num_cells, num_output, input_layer=lstm) Y_hat = T.mean(fc.output(), axis=2) layer = inputs, lstm, fc self.params = get_params(layer) self.caches = make_caches(self.params) self.layers = layer mean_cost = T.mean((Y - Y_hat)**2) last_cost = T.mean((Y[-1] - Y_hat[-1])**2) self.cost = alpha * mean_cost + (1 - alpha) * last_cost """" self.updates = momentum(self.cost, self.params, self.caches, self.eta, clip_at=3.0) """ self.updates, _, _, _, _ = create_optimization_updates( self.cost, self.params, method="adadelta", lr=lr, rho=rho) self.train = theano.function([X, Y, alpha], [self.cost, last_cost] ,\ updates=self.updates, allow_input_downcast=True) self.costfn = theano.function([X, Y, alpha], [self.cost, last_cost],\ allow_input_downcast=True) self.predict = theano.function([X], [Y_hat], allow_input_downcast=True)