class PRAE: def __init__(self, num_batch, max_len, n_features, hidden=[200, 200], **kwargs): self.num_batch = num_batch self.n_features = n_features self.max_len = max_len self.hidden = hidden rng = np.random.RandomState(123) self.drng = rng self.rng = RandomStreams(rng.randint(2 ** 30)) # params initial_W = np.asarray( rng.uniform( low=1e-5, high=1, size=(self.hidden[1], self.n_features) ), dtype=theano.config.floatX ) self.W_y_theta = theano.shared(value=initial_W, name='W_y_theta', borrow=True) # # self.W_y_kappa = theano.shared(value=initial_W, name='W_y_kappa', borrow=True) self.b_y_theta = theano.shared( value=np.zeros( self.n_features, dtype=theano.config.floatX ), borrow=True ) # self.b_y_kappa = theano.shared( # value=np.zeros( # self.n_features, # dtype=theano.config.floatX # ), # name='b', # borrow=True # ) # I could directly create the model here since it is fixed self.l_in = InputLayer(shape=(self.num_batch, self.max_len, self.n_features)) self.mask_input = InputLayer(shape=(self.num_batch, self.max_len)) first_hidden = GRULayer(self.l_in, mask_input=self.mask_input, num_units=hidden[0]) self.model =GRULayer(first_hidden, num_units=hidden[1]) # need some reshape voodoo # l_shp = ReshapeLayer(second_hidden, (-1, hidden[1])) # after the reshape I have batch*max_len X features # self.model = DenseLayer(l_shp, num_units=self.n_features, nonlinearity=rectify) # if now I put a dense layer this will collect all the output temporally which is what I want, I'll have to fix # the dimensions probably later # For every gaussian in the sum I need 3 values plus a value for the total scale # the output of this layer will be (num_batch, num_units, max_len) TODO check size def get_output_shape_for(self): return self.model.get_output_shape_for(self.num_batch, self.max_len, self.hidden[2]) def get_output_y(self, output): # (batch, time, hidden) X (hidden, features) + (, features) => (batch, time, features) theta_out = T.nnet.relu(T.dot(output, self.W_y_theta) + self.b_y_theta) #kappa_out = T.nnet.relu(T.dot(output, self.W_y_kappa) + self.b_y_kappa) return theta_out def get_log_x(self, x, theta_out): # DIM = (batch, time, hidden) # everything is elementwise log_x = T.log(theta_out + 1e-8) - theta_out * x log_x = log_x.sum(axis=2, dtype=theano.config.floatX) # sum over x cause I assume they are independent return log_x def build_model(self, train_x, train_mask_x, train_mask_out, train_target, test_x, test_mask_x, test_mask_out, test_target): self.train_x = train_x self.train_mask_x = train_mask_x self.train_mask_out = train_mask_out self.train_target = train_target self.test_x = test_x self.test_mask_x = test_mask_x self.test_mask_out = test_mask_out self.test_target = test_target self.index = T.iscalar('index') self.num_batch_test = T.iscalar('index') self.b_slice = slice(self.index * self.num_batch, (self.index + 1) * self.num_batch) sym_x = T.dtensor3() sym_mask_x = T.dmatrix() sym_target = T.dtensor3() # sym_mask_out = T.dtensor3() should not be useful since output is still zero # TODO think about this if it is true output = lasagne.layers.get_output(self.model, inputs={self.l_in: sym_x, self.mask_input: sym_mask_x}) theta = self.get_output_y(output) log_px = self.get_log_x(sym_target, theta) log_px_sum_time = log_px.sum(axis=1, dtype=theano.config.floatX) # sum over tx loss = - T.sum(log_px_sum_time) / self.num_batch # average over batch ## log_px_test = self.get_log_x(sym_target, theta) log_px_sum_time_test = log_px_test.sum(axis=1, dtype=theano.config.floatX) # sum over time loss_test = - T.sum(log_px_sum_time_test) / self.num_batch_test # average over batch # loss = T.mean(lasagne.objectives.squared_error(mu, sym_target)) all_params = [self.W_y_theta] + [self.b_y_theta] + lasagne.layers.get_all_params(self.model) all_grads_target = [T.clip(g, -3, 3) for g in T.grad(loss, all_params)] all_grads_target = lasagne.updates.total_norm_constraint(all_grads_target, 3) updates_target = adam(all_grads_target, all_params) train_model = theano.function([self.index], [loss, theta, log_px], givens={sym_x: self.train_x[self.b_slice], sym_mask_x: self.train_mask_x[self.b_slice], sym_target: self.train_target[self.b_slice]}, updates=updates_target) test_model = theano.function([self.num_batch_test], [loss_test, theta], givens={sym_x: self.test_x, sym_mask_x: self.test_mask_x, sym_target: self.test_target}) return train_model, test_model
class PRAE: def __init__(self, num_batch, max_len, n_features, hidden=[200, 200], **kwargs): self.num_batch = num_batch self.n_features = n_features self.max_len = max_len self.hidden = hidden rng = np.random.RandomState(123) self.drng = rng self.rng = RandomStreams(rng.randint(2**30)) # params initial_W = np.asarray(rng.uniform(low=1e-5, high=1, size=(self.hidden[1], self.n_features)), dtype=theano.config.floatX) self.W_y_theta = theano.shared(value=initial_W, name='W_y_theta', borrow=True) # # self.W_y_kappa = theano.shared(value=initial_W, name='W_y_kappa', borrow=True) self.b_y_theta = theano.shared(value=np.zeros( self.n_features, dtype=theano.config.floatX), borrow=True) # self.b_y_kappa = theano.shared( # value=np.zeros( # self.n_features, # dtype=theano.config.floatX # ), # name='b', # borrow=True # ) # I could directly create the model here since it is fixed self.l_in = InputLayer(shape=(self.num_batch, self.max_len, self.n_features)) self.mask_input = InputLayer(shape=(self.num_batch, self.max_len)) first_hidden = GRULayer(self.l_in, mask_input=self.mask_input, num_units=hidden[0]) self.model = GRULayer(first_hidden, num_units=hidden[1]) # need some reshape voodoo # l_shp = ReshapeLayer(second_hidden, (-1, hidden[1])) # after the reshape I have batch*max_len X features # self.model = DenseLayer(l_shp, num_units=self.n_features, nonlinearity=rectify) # if now I put a dense layer this will collect all the output temporally which is what I want, I'll have to fix # the dimensions probably later # For every gaussian in the sum I need 3 values plus a value for the total scale # the output of this layer will be (num_batch, num_units, max_len) TODO check size def get_output_shape_for(self): return self.model.get_output_shape_for(self.num_batch, self.max_len, self.hidden[2]) def get_output_y(self, output): # (batch, time, hidden) X (hidden, features) + (, features) => (batch, time, features) theta_out = T.nnet.relu(T.dot(output, self.W_y_theta) + self.b_y_theta) #kappa_out = T.nnet.relu(T.dot(output, self.W_y_kappa) + self.b_y_kappa) return theta_out def get_log_x(self, x, theta_out): # DIM = (batch, time, hidden) # everything is elementwise log_x = T.log(theta_out + 1e-8) - theta_out * x log_x = log_x.sum(axis=2, dtype=theano.config.floatX ) # sum over x cause I assume they are independent return log_x def build_model(self, train_x, train_mask_x, train_mask_out, train_target, test_x, test_mask_x, test_mask_out, test_target): self.train_x = train_x self.train_mask_x = train_mask_x self.train_mask_out = train_mask_out self.train_target = train_target self.test_x = test_x self.test_mask_x = test_mask_x self.test_mask_out = test_mask_out self.test_target = test_target self.index = T.iscalar('index') self.num_batch_test = T.iscalar('index') self.b_slice = slice(self.index * self.num_batch, (self.index + 1) * self.num_batch) sym_x = T.dtensor3() sym_mask_x = T.dmatrix() sym_target = T.dtensor3() # sym_mask_out = T.dtensor3() should not be useful since output is still zero # TODO think about this if it is true output = lasagne.layers.get_output(self.model, inputs={ self.l_in: sym_x, self.mask_input: sym_mask_x }) theta = self.get_output_y(output) log_px = self.get_log_x(sym_target, theta) log_px_sum_time = log_px.sum(axis=1, dtype=theano.config.floatX) # sum over tx loss = -T.sum(log_px_sum_time) / self.num_batch # average over batch ## log_px_test = self.get_log_x(sym_target, theta) log_px_sum_time_test = log_px_test.sum( axis=1, dtype=theano.config.floatX) # sum over time loss_test = -T.sum( log_px_sum_time_test) / self.num_batch_test # average over batch # loss = T.mean(lasagne.objectives.squared_error(mu, sym_target)) all_params = [self.W_y_theta] + [ self.b_y_theta ] + lasagne.layers.get_all_params(self.model) all_grads_target = [T.clip(g, -3, 3) for g in T.grad(loss, all_params)] all_grads_target = lasagne.updates.total_norm_constraint( all_grads_target, 3) updates_target = adam(all_grads_target, all_params) train_model = theano.function( [self.index], [loss, theta, log_px], givens={ sym_x: self.train_x[self.b_slice], sym_mask_x: self.train_mask_x[self.b_slice], sym_target: self.train_target[self.b_slice] }, updates=updates_target) test_model = theano.function( [self.num_batch_test], [loss_test, theta], givens={ sym_x: self.test_x, sym_mask_x: self.test_mask_x, sym_target: self.test_target }) return train_model, test_model
class PRAE: def __init__(self, num_batch, max_len, n_features, hidden=[200, 200], **kwargs): self.num_batch = num_batch self.n_features = n_features self.max_len = max_len self.hidden = hidden rng = np.random.RandomState(123) self.drng = rng self.rng = RandomStreams(rng.randint(2 ** 30)) # params initial_W = np.asarray( rng.uniform( low=-4 * np.sqrt(6. / (self.hidden[1] + self.n_features)), high=4 * np.sqrt(6. / (self.hidden[1] + self.n_features)), size=(self.hidden[1], self.n_features) ), dtype=theano.config.floatX ) self.W = theano.shared(value=initial_W, name='W', borrow=True) # # self.W_y_kappa = theano.shared(value=initial_W, name='W_y_kappa', borrow=True) self.b = theano.shared( value=np.zeros( self.n_features, dtype=theano.config.floatX ), borrow=True ) # self.b_y_kappa = theano.shared( # value=np.zeros( # self.n_features, # dtype=theano.config.floatX # ), # name='b', # borrow=True # ) # I could directly create the model here since it is fixed self.l_in = InputLayer(shape=(None, self.max_len, self.n_features)) self.mask_input = InputLayer(shape=(None, self.max_len)) first_hidden = GRULayer(self.l_in, mask_input=self.mask_input, num_units=hidden[0]) # l_shp = ReshapeLayer(first_hidden, (-1, hidden[0])) # l_dense = DenseLayer(l_shp, num_units=self.hidden[0], nonlinearity=rectify) # l_drop = DropoutLayer(l_dense, p=0.5) # l_shp = ReshapeLayer(l_drop, (-1, self.max_len, self.hidden[0])) self.model = GRULayer(first_hidden, num_units=hidden[1]) # self.model = ConcatLayer([first_hidden, second_hidden], axis=2) # l_shp = ReshapeLayer(second_hidden, (-1, hidden[1])) # l_dense = DenseLayer(l_shp, num_units=self.n_features, nonlinearity=rectify) # To reshape back to our original shape, we can use the symbolic shape # variables we retrieved above. #self.model = ReshapeLayer(l_dense, (-1, self.max_len, self.n_features)) # if now I put a dense layer this will collect all the output temporally which is what I want, I'll have to fix # the dimensions probably later # For every gaussian in the sum I need 3 values plus a value for the total scale # the output of this layer will be (num_batch, num_units, max_len) TODO check size def get_output_shape_for(self): return self.model.get_output_shape_for(self.num_batch, self.max_len, self.hidden[1]) def get_output_y(self, x): return T.nnet.relu(T.dot(x, self.W) + self.b) def build_model(self, train_x, train_mask_x, train_mask_out, train_target, test_x, test_mask_x, test_mask_out, test_target): self.train_x = train_x self.train_mask_x = train_mask_x self.train_mask_out = train_mask_out self.train_target = train_target self.test_x = test_x self.test_mask_x = test_mask_x self.test_mask_out = test_mask_out self.test_target = test_target self.index = T.iscalar('index') self.num_batch_test = T.iscalar('index') self.b_slice = slice(self.index * self.num_batch, (self.index + 1) * self.num_batch) sym_x = T.dtensor3() sym_mask_x = T.dmatrix() sym_target = T.dtensor3() sym_mask_out = T.dtensor3() # sym_mask_out = T.dtensor3() should not be useful since output is still zero # TODO think about this if it is true out = lasagne.layers.get_output(self.model, inputs={self.l_in: sym_x, self.mask_input: sym_mask_x}) out_out = self.get_output_y(out) loss = T.mean(lasagne.objectives.squared_error(out_out, sym_target)) / self.num_batch out_test = lasagne.layers.get_output(self.model, inputs={self.l_in: sym_x, self.mask_input: sym_mask_x}) out_out_test = self.get_output_y(out_test) loss_test = T.mean(lasagne.objectives.squared_error(out_out_test, sym_target)) / self.num_batch_test all_params = [self.W] + [self.b] +lasagne.layers.get_all_params(self.model) all_grads_target = [T.clip(g, -3, 3) for g in T.grad(loss, all_params)] all_grads_target = lasagne.updates.total_norm_constraint(all_grads_target, 3) updates_target = adam(all_grads_target, all_params) train_model = theano.function([self.index], [loss, out_out], givens={sym_x: self.train_x[self.b_slice], sym_mask_x: self.train_mask_x[self.b_slice], sym_target: self.train_target[self.b_slice], }, updates=updates_target) test_model = theano.function([self.num_batch_test], [loss_test, out_out_test], givens={sym_x: self.test_x, sym_mask_x: self.test_mask_x, sym_target: self.test_target, }) return train_model, test_model