Пример #1
0
    def compile(self, optimizer, loss, class_mode="categorical", theano_mode=None):
        self.optimizer = optimizers.get(optimizer)

        self.loss = objectives.get(loss)
        weighted_loss = weighted_objective(objectives.get(loss))

        # input of model
        self.X_train = self.get_input(train=True)
        self.X_test = self.get_input(train=False)

        self.y_train = self.get_output(train=True)
        self.y_test = self.get_output(train=False)

        # target of model
        self.y = T.zeros_like(self.y_train)

        self.weights = T.ones_like(self.y_train)

        train_loss = weighted_loss(self.y, self.y_train, self.weights)
        test_loss = weighted_loss(self.y, self.y_test, self.weights)

        train_loss.name = 'train_loss'
        test_loss.name = 'test_loss'
        self.y.name = 'y'

        if class_mode == "categorical":
            train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1)))
            test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1)))

        elif class_mode == "binary":
            train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train)))
            test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test)))
        else:
            raise Exception("Invalid class mode:" + str(class_mode))
        self.class_mode = class_mode
        self.theano_mode = theano_mode

        for r in self.regularizers:
            train_loss = r(train_loss)
        updates = self.optimizer.get_updates(self.params, self.constraints, train_loss)

        if type(self.X_train) == list:
            train_ins = self.X_train + [self.y, self.weights]
            test_ins = self.X_test + [self.y, self.weights]
            predict_ins = self.X_test
        else:
            train_ins = [self.X_train, self.y, self.weights]
            test_ins = [self.X_test, self.y, self.weights]
            predict_ins = [self.X_test]

        self._train = theano.function(train_ins, train_loss,
            updates=updates, allow_input_downcast=True, mode=theano_mode)
        self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy],
            updates=updates, allow_input_downcast=True, mode=theano_mode)
        self._predict = theano.function(predict_ins, self.y_test,
            allow_input_downcast=True, mode=theano_mode)
        self._test = theano.function(test_ins, test_loss,
            allow_input_downcast=True, mode=theano_mode)
        self._test_with_acc = theano.function(test_ins, [test_loss, test_accuracy],
            allow_input_downcast=True, mode=theano_mode)
Пример #2
0
    def build(self,output_type):      
        
        #### set up parameter         
        self.params+=[self.W_hy, self.b_hy]
        for param in self.params:
            self.updates[param] = theano.shared(
                                      value = np.zeros(
                                                  param.get_value(
                                                      borrow = True).shape,
                                                      dtype = theano.config.floatX),
                                      name = 'updates')
                                      
        ### set up regularizer                               
        
         
        self.L1 += T.sum(abs(self.W_hy))    
        self.L2_sqr += T.sum(self.W_hy**2)               
                                             
        ### fianl prediction formular
                                             
        self.y_pred = T.dot(self.get_output(), self.W_hy) + self.b_hy
                                     
        self.output_type = output_type
        if self.output_type == 'real':
            self.y = T.matrix(name = 'y', dtype = theano.config.floatX) 
            self.loss = lambda y: Loss.mse(self.y_pred,y) # y is input and self.mse(y) is output
            self.predict = theano.function(inputs = [self.x, ],
                                           outputs = self.y_pred,
                                           mode = mode)

        elif self.output_type == 'binary':
            self.y = T.matrix(name = 'y', dtype = 'int32')
            self.p_y_given_x = T.nnet.sigmoid(self.y_pred)
            self.y_out = T.round(self.p_y_given_x)  # round to {0,1}
            self.loss = lambda y: Loss.nll_binary(self.p_y_given_x,y)
            self.predict_proba = theano.function(inputs = [self.x, ],
                                                 outputs = self.p_y_given_x,
                                                 mode = mode)
            self.predict = theano.function(inputs = [self.x, ],
                                           outputs = T.round(self.p_y_given_x),
                                           mode = mode)
        
        elif self.output_type == 'softmax':
            self.y = T.vector(name = 'y', dtype = 'int32')
            self.p_y_given_x = T.nnet.softmax(self.y_pred)
            self.y_out = T.argmax(self.p_y_given_x, axis = -1)
            self.loss = lambda y: Loss.nll_multiclass(self.p_y_given_x,y)
            self.predict_proba = theano.function(inputs = [self.x, ],
                                                 outputs = self.p_y_given_x,
                                                 mode = mode)
            self.predict = theano.function(inputs = [self.x, ],
                                           outputs = self.y_out, # y-out is calculated by applying argmax
                                           mode = mode)
        else:
            raise NotImplementedError
def my_activation(input):
    d = 2

    input = input * T.power(10, d)
    input = T.round(input)
    x = input / T.power(10, d)
    abs_x = abs(x)

    ret =  x / (1. + abs_x)
    ret = T.round(ret * T.power(10, d)) / T.power(10, d)
    return ret
Пример #4
0
    def compile(self, optimizer, loss, class_mode="categorical", theano_mode=None):
        self.optimizer = optimizers.get(optimizer)
        self.loss = objectives.get(loss)

        # input of model 
        self.X_train = self.get_input(train=True)
        self.X_test = self.get_input(train=False)

        self.y_train = self.get_output(train=True)
        self.y_test = self.get_output(train=False)

        # target of model
        self.y = T.zeros_like(self.y_train)

        train_loss = self.loss(self.y, self.y_train)
        test_score = self.loss(self.y, self.y_test)

        if class_mode == "categorical":
            train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1)))
            test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1)))

        elif class_mode == "binary":
            train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train)))
            test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test)))
        else:
            raise Exception("Invalid class mode:" + str(class_mode))
        self.class_mode = class_mode

        if hasattr(self, 'cost_updates'):
            for u in self.loss_updates:
                train_loss = u.update_loss(train_loss)

        updates = self.optimizer.get_updates(self.params, self.regularizers, self.constraints,  train_loss)

        if type(self.X_train) == list:
            train_ins = self.X_train + [self.y]
            test_ins = self.X_test + [self.y]
            predict_ins = self.X_test
        else:
            train_ins = [self.X_train, self.y]
            test_ins = [self.X_test, self.y]
            predict_ins = [self.X_test]

        self._train = theano.function(train_ins, train_loss, 
            updates=updates, allow_input_downcast=True, mode=theano_mode)
        self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy], 
            updates=updates, allow_input_downcast=True, mode=theano_mode)
        self._predict = theano.function(predict_ins, self.y_test, 
            allow_input_downcast=True, mode=theano_mode)
        self._test = theano.function(test_ins, test_score, 
            allow_input_downcast=True, mode=theano_mode)
        self._test_with_acc = theano.function(test_ins, [test_score, test_accuracy], 
            allow_input_downcast=True, mode=theano_mode)
Пример #5
0
 def precision(self, y, threshold=0.5):
     #        y_outpred = self.y_out.eval()
     #        y_outpred[y_outpred>0.8] = 1
     #        y_outpred[y_outpred<=0.8] = 0
     #avg = T.mean(self.y_out)
     #stddev = T.std(self.y_out)
     #conditional_output = T.switch(T.lt(threshold,self.y_out), 1.0, 0.0)
     divider = T.sum(T.round(self.y_out))
     dividee = T.sum(T.eq(T.round(self.y_out), 1) * T.eq(y, 1))
     #        divider = T.sum(T.round(self.y_out))
     #        dividee = T.sum(T.eq(T.round(self.y_out),1)*T.eq(y,1))
     return dividee / divider
Пример #6
0
    def __init__(self, actual_probability, groundtruth_label, bias):

        self.cost = -T.mean(
            T.sum(
                groundtruth_label * T.log(actual_probability + bias) +
                (1 - groundtruth_label) * T.log(1 - actual_probability + bias),
                axis=1))

        self.error = T.mean(
            T.neq(T.round(actual_probability), groundtruth_label))

        self.prediction = T.round(actual_probability)
Пример #7
0
    def compile(self, optimizer, loss, class_mode="categorical", theano_mode=None):
        self.optimizer = optimizers.get(optimizer)
        self.loss = objectives.get(loss)

        # input of model 
        self.X_train = self.get_input(train=True)
        self.X_test = self.get_input(train=False)

        self.y_train = self.get_output(train=True)
        self.y_test = self.get_output(train=False)

        # target of model
        self.y = T.zeros_like(self.y_train)

        train_loss = self.loss(self.y, self.y_train)
        test_score = self.loss(self.y, self.y_test)

        if class_mode == "categorical":
            #just compare whether the most probable is or not
            train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1)))
            test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1)))

        elif class_mode == "binary":
            #after make prediction [0,0,1,0] like with round function, compare each class of each sample then accumulate and divide by n*k
            train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train)))
            test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test)))
        else:
            raise Exception("Invalid class mode:" + str(class_mode))
        self.class_mode = class_mode

        updates = self.optimizer.get_updates(self.params, self.regularizers, self.constraints, train_loss)
        
        if type(self.X_train) == list:
            train_ins = self.X_train + [self.y]
            test_ins = self.X_test + [self.y]
            predict_ins = self.X_test
        else:
            train_ins = [self.X_train, self.y]
            test_ins = [self.X_test, self.y]
            predict_ins = [self.X_test]

        #input is [[x1,x2,x3...],[y1,y2,y3]]   x1 and y1 are both vector
        self._train = theano.function(train_ins, train_loss, 
            updates=updates, allow_input_downcast=True, mode=theano_mode)
        self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy], 
            updates=updates, allow_input_downcast=True, mode=theano_mode)
        self._predict = theano.function(predict_ins, self.y_test, 
            allow_input_downcast=True, mode=theano_mode)
        self._test = theano.function(test_ins, test_score, 
            allow_input_downcast=True, mode=theano_mode)
        self._test_with_acc = theano.function(test_ins, [test_score, test_accuracy], 
            allow_input_downcast=True, mode=theano_mode)
Пример #8
0
    def compile(self, optimizer, loss, class_mode="categorical"):
        self.optimizer = optimizers.get(optimizer)
        self.loss = objectives.get(loss)

        self.X = self.layers[0].input  # input of model
        # (first layer must have an "input" attribute!)
        self.y_train = self.layers[-1].output(train=True)
        self.y_test = self.layers[-1].output(train=False)

        # output of model
        self.y = T.matrix()  # TODO: support for custom output shapes

        train_loss = self.loss(self.y, self.y_train)
        test_score = self.loss(self.y, self.y_test)

        if class_mode == "categorical":
            train_accuracy = T.mean(
                T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train,
                                                         axis=-1)))
            test_accuracy = T.mean(
                T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test,
                                                         axis=-1)))

        elif class_mode == "binary":
            train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train)))
            test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test)))
        else:
            raise Exception("Invalid class mode:" + str(class_mode))
        self.class_mode = class_mode

        updates = self.optimizer.get_updates(self.params, self.regularizers,
                                             self.constraints, train_loss)

        self._train = theano.function([self.X, self.y],
                                      train_loss,
                                      updates=updates,
                                      allow_input_downcast=True)
        self._train_with_acc = theano.function([self.X, self.y],
                                               [train_loss, train_accuracy],
                                               updates=updates,
                                               allow_input_downcast=True)
        self._predict = theano.function([self.X],
                                        self.y_test,
                                        allow_input_downcast=True)
        self._test = theano.function([self.X, self.y],
                                     test_score,
                                     allow_input_downcast=True)
        self._test_with_acc = theano.function([self.X, self.y],
                                              [test_score, test_accuracy],
                                              allow_input_downcast=True)
Пример #9
0
    def compile_training_functions(self):
        # print parameter info
        all_params = lasagne.layers.get_all_params(self.network['l_profile'], trainable=True) #l_out
        total_params = sum([p.get_value().size for p in all_params])
        print(" Total Model Parameters:", str(total_params))
        print(" Trainable Model Parameters")
        print("-" * 40)
        for param in all_params:
            print('', str(param), str(param.get_value().shape))
        print("-" * 40)
        print("\n")
        sys.stdout.flush()

        # train cost
        train_preds = lasagne.layers.get_output(self.network['l_out'], deterministic=False)
        cost_train = T.mean(lasagne.objectives.binary_crossentropy(train_preds, self.sym_target))
        L1_n_L2 = lasagne.regularization.regularize_network_params(self.network['l_out'],
                                                                   lasagne.regularization.l2,
                                                                   {'regularizable': True})
        cost_train += L1_n_L2 * self.options["L2"]
        eq_train = T.eq(T.round(train_preds), self.sym_target)
        train_acc = T.mean(eq_train, dtype=theano.config.floatX)

        # validation cost and accuracy
        val_preds = lasagne.layers.get_output(self.network['l_out'], deterministic=True)
        cost_val = T.mean(lasagne.objectives.binary_crossentropy(val_preds, self.sym_target))
        eq_val = T.eq(T.round(val_preds), self.sym_target)
        val_acc = T.mean(eq_val, dtype=theano.config.floatX)

        print(" Making update function...", end='')
        sys.stdout.flush()
        updates = lasagne.updates.adam(cost_train, all_params, learning_rate=self.options["ETA"], beta1=0.9,
                                       beta2=0.999, epsilon=1e-08)
        print("done")
        print(" Making training function - slow step...", end='')
        sys.stdout.flush()
        start_time = time.time()
        self.train_fn = theano.function([self.sym_input, self.sym_target], [cost_train, train_acc, train_preds],
                                        updates=updates, allow_input_downcast=True)
        ctime = time.time() - start_time
        print("finished in {:.3f}s".format(ctime))
        print(" Making validation function - slow step...", end='')
        sys.stdout.flush()
        start_time = time.time()
        self.val_fn = theano.function([self.sym_input, self.sym_target], [cost_val, val_acc, val_preds, eq_val],
                                      allow_input_downcast=True)
        ctime = time.time() - start_time
        print("finished in {:.3f}s".format(ctime))
        sys.stdout.flush()
Пример #10
0
    def __init__(self, input, n_in, n_out, W = None, b = None):
        """ Initialize the parameters of the logistic regression

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
                      architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
                     which the datapoints lie

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
                      which the labels lie

        :type W: tensor of size
        :param n_out: number of output units, the dimension of the space in

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        """

        if W is None:

            # initialize with 0 the weights W as a matrix of shape (batch_size, n_in, n_out)
            self.W = theano.shared(
                value=np.zeros(
                    (n_in, n_out),
                    dtype=theano.config.floatX
                ),
                name='W',
                borrow=True
            )

        else:
            self.W = W

        if b is None:

            # initialize the biases b as a vector of n_out 0s
            self.b = theano.shared(
                value=np.zeros(
                    (n_out,),
                    dtype=theano.config.floatX
                ),
                name='b',
                borrow=True
            )

        else:
            self.b = b

        # output
        self.output = T.nnet.sigmoid( T.dot(input, self.W) + self.b )       # batch_size x 1024
        self.thresh = T.round(self.output)
        # parameters of the model
        self.params = [self.W, self.b]                                      # W: 1024 x 8100, b: 1024 x 1

        # keep track of model input
        self.input = input
Пример #11
0
 def quantizeWeights(self, X):
     # [-1,1] -> [0,1]
     Xa = hard_sigmoid(X / self.scale)
     Xb = T.round(Xa)
     # 0 or 1 -> -1 or 1
     return T.cast(T.switch(Xb, self.scale, -self.scale),
                   theano.config.floatX)
    def gated_loss(self, y):
        gates = T.nnet.sigmoid(self.output[:, 0:1, :, :])

        gated_square_loss = T.mean(
            T.round(gates) * (self.output[:, 1:2, :, :] - y[:, 1:2, :, :])**2)
        logistic_loss = -T.mean(T.log(1 + T.exp(-y[:, 0:1, :, :] * gates)))
        return gated_square_loss, logistic_loss, gated_square_loss + logistic_loss
Пример #13
0
    def create_validator(self):
        """
        Generate theano function to check error and accuracy of the network.

        Returns: theano function that takes input (train_x,train_y)
                 and returns error and accuracy
        """
        print("Creating {} Validator...".format(self.name))
        # create prediction
        val_prediction = lasagne.layers.get_output(self.network,
                                                   deterministic=True)
        # check how much error in prediction
        if self.val_cost is None:
            if self.num_classes is None or self.num_classes == 0:
                self.val_cost = self.mse_loss(val_prediction, self.y)
                val_acc = T.constant(0)
            else:
                self.val_cost = self.cross_entropy_loss(val_prediction, self.y)
                # check the accuracy of the prediction
                if self.num_classes > 1:
                    val_acc = T.mean(T.eq(T.argmax(val_prediction, axis=1),
                                          T.argmax(self.y, axis=1)),
                                     dtype=theano.config.floatX)
                elif self.num_classes == 1:
                    val_acc = T.mean(T.eq(
                        T.round(val_prediction, mode='half_away_from_zero'),
                        self.y),
                                     dtype=theano.config.floatX)

        return theano.function([self.input_var, self.y],
                               [self.val_cost, val_acc])
Пример #14
0
def sigmoid_readout(operators, v_in, h_L, external):
    """Sigmoid readout layer. Cost is the binary crossentropy and
    monitor is RMSE.
    :param operators: list of [weight, bias] with shapes (n_hidden, n_visible)
        and (n_visible, )
    :param h_L: shape (timesteps, n_hidden)
    :return: shape (timesteps, n_visible)
    """
    weight = operators[0]
    bias = operators[1]
    v_pred = sigmoid(T.dot(h_L, weight) + bias)  # broadcastable bias??
    v_pred_c = T.clip(v_pred, 1.0e-7, 1.0 - 1.0e-7)
    v_in_c = T.clip(v_in, 1.0e-7, 1.0 - 1.0e-7)

    # Sample is just rounded to nearest integer:
    v_sample = T.round(v_pred)
    v_sample_c = T.clip(v_sample, eps, 1.0 - eps)

    # Cost:
    #cost = 1000 * ((v_pred[:-1] - v_in[1:]) ** 2).mean()
    #cost = -T.xlogx.xlogy0(v_in_c[1:], v_pred_c[:-1]) - \
    #       T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_pred_c[:-1])
    cost = crossent(v_pred_c[:-1], v_in_c[1:]) #TODO: v_sample_c !!!
    cost = cost.mean()

    # Monitor:
    #monitor = -T.xlogx.xlogy0(v_in_c[1:], v_sample_c[:-1]) - \
    #          T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_sample_c[:-1])
    monitor = crossent(v_sample_c[:-1], v_in_c[1:])
    monitor = monitor.mean()

    return v_sample, cost, monitor, None
Пример #15
0
def sigmoid_readout_old(operators, v_in, h_L, g):
    """Sigmoid readout layer. Cost is the binary crossentropy and
    monitor is RMSE.
    :param params: list of [weight, bias] with shapes (n_hidden, n_visible)
        and (n_visible, )
    :param h_L: shape (timesteps, n_visible)
    :return: shape (timesteps, n_hidden)
    """
    weight = operators[0]
    bias = operators[1]
    v_pred = g(T.dot(h_L, weight) + bias)  # broadcastable bias??
    v_pred_c = T.clip(v_pred, 1.0e-7, 1.0 - 1.0e-7)
    v_in_c = T.clip(v_in, 1.0e-7, 1.0 - 1.0e-7)

    # Cost:
    cost = -T.xlogx.xlogy0(v_in_c[1:], v_pred_c[:-1]) - T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_pred_c[:-1])
    cost = cost.sum() / v_in.shape[0]

    # Sample is just rounded to nearest integer:
    v_sample = T.round(v_pred)
    v_sample_c = T.clip(v_sample, 1.0e-7, 1.0 - 1.0e-7)

    # Monitor (needs to return something... for now):
    monitor = -T.xlogx.xlogy0(v_in_c[1:], v_sample_c[:-1]) - T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_sample_c[:-1])
    monitor = monitor.sum() / v_in.shape[0]

    return v_sample, cost, monitor, None
Пример #16
0
 def compute_activations(self, input_data, do_round=True):
     layer_input = input_data
     layer_signals = []
     for i, (w, b, k) in enumerate(zip(self.ws, self.bs,
                                       self.get_scales())):
         scaled_input = layer_input * k
         if not do_round:
             eta = None
             spikes = scaled_input
         else:
             eta = tt.round(scaled_input) - scaled_input
             spikes = scaled_input + disconnected_grad(eta)
         nonlinearity = get_named_activation_function(
             self.hidden_activations if i < len(self.ws) -
             1 else self.output_activation)
         output = nonlinearity((spikes / k).dot(w) + b)
         layer_signals.append({
             'input': layer_input,
             'scaled_input': scaled_input,
             'eta': eta,
             'spikes': spikes,
             'output': output
         })
         layer_input = output
     return layer_signals
Пример #17
0
    def __init__(self, input, n_in, n_out):
            # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
        self.W = theano.shared(
            value=numpy.zeros(
                (n_in, n_out),
                dtype=theano.config.floatX
            ),
            name='W',
            borrow=True
        )
        # initialize the biases b as a vector of n_out 0s
        self.b = theano.shared(
            value=numpy.zeros(
                (n_out,),
                dtype=theano.config.floatX
            ),
            name='b',
            borrow=True
        )

        
        self.p_y_given_x = (T.dot(input, self.W) + self.b)
        self.y_pred = T.round(self.p_y_given_x)
        
        self.params = [self.W, self.b]
        
        self.input = input
    def create_objectives(self, deterministic=False):
        """Stochastic approximation to the pseudo-likelihood"""
        X = self.inputs[0]
        X = X.reshape((-1, self.n_visible))

        # index of bit i in expression p(x_i | x_{\i})
        bit_i_idx = self.bit_i_idx
        # bit_i_idx = theano.shared(value=0, name='bit_i_idx')

        # binarize the input image by rounding to nearest integer
        xi = T.round(X)

        # calculate free energy for the given bit configuration
        fe_xi = self.free_energy(xi)

        # flip bit x_i of matrix xi and preserve all other bits x_{\i}
        # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx], but assigns
        # the result to xi_flip, instead of working in place on xi.
        xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx])

        # calculate free energy with bit flipped
        fe_xi_flip = self.free_energy(xi_flip)

        # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i})))
        cost = T.mean(self.n_visible *
                      T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi)))

        return cost, cost
Пример #19
0
    def compute_output(self):

        label_results = self.process_label_results(
            self.semantic_prediction)  #tensor.round(self.semantic_prediction)
        print(label_results)
        print(tensor.round(self.semantic_prediction))

        label_specific_Ws = tensor.tensordot(label_results,
                                             self.Ws,
                                             axes=[1, 0])

        label_specific_Vs = tensor.tensordot(label_results,
                                             self.Vs,
                                             axes=[1, 0])

        label_specific_W = th.dot(label_specific_Ws, self.W)

        label_specific_V = th.dot(label_specific_Vs, self.V)

        # compute output
        self.output = getFunction('softmax')(
            tensor.batched_dot(self.input, label_specific_W) +
            tensor.batched_dot(self.extra_input, label_specific_V) + self.b)

        for i in range(len(self.semantic_label_map.keys()) + 1):
            ho = self.get_output(i)
            self.output_hybrids.append(ho)
Пример #20
0
def to_fixed_point_theano(input, no_bits, no_int_bits):
    scale =T.cast(2.**(no_bits - no_int_bits), theano.config.floatX)
    max_val = T.cast((2.**no_bits) - 1, theano.config.floatX)
    scaled = input * scale
    scaled = T.round(scaled)
    scaled = T.clip(scaled, -max_val, max_val)
    return scaled/scale
Пример #21
0
def sigmoid_readout(operators, v_in, h_L, external):
    """Sigmoid readout layer. Cost is the binary crossentropy and
    monitor is RMSE.
    :param operators: list of [weight, bias] with shapes (n_hidden, n_visible)
        and (n_visible, )
    :param h_L: shape (timesteps, n_hidden)
    :return: shape (timesteps, n_visible)
    """
    weight = operators[0]
    bias = operators[1]
    v_pred = sigmoid(T.dot(h_L, weight) + bias)  # broadcastable bias??
    v_pred_c = T.clip(v_pred, 1.0e-7, 1.0 - 1.0e-7)
    v_in_c = T.clip(v_in, 1.0e-7, 1.0 - 1.0e-7)

    # Sample is just rounded to nearest integer:
    v_sample = T.round(v_pred)
    v_sample_c = T.clip(v_sample, eps, 1.0 - eps)

    # Cost:
    # cost = 1000 * ((v_pred[:-1] - v_in[1:]) ** 2).mean()
    # cost = -T.xlogx.xlogy0(v_in_c[1:], v_pred_c[:-1]) - \
    #       T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_pred_c[:-1])
    cost = crossent(v_pred_c[:-1], v_in_c[1:])  # TODO: v_sample_c !!!
    cost = cost.mean()

    # Monitor:
    # monitor = -T.xlogx.xlogy0(v_in_c[1:], v_sample_c[:-1]) - \
    #          T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_sample_c[:-1])
    monitor = crossent(v_sample_c[:-1], v_in_c[1:])
    monitor = monitor.mean()

    return v_sample, cost, monitor, None
Пример #22
0
 def getHidden(self, v):
     v = T.round(v)
     h = T.dot(v, self.w) + self.c
     h_sigmoid = T.nnet.sigmoid(h)
     h_bin = self.theano_rng.binomial(size=h.shape, n=1, p=h_sigmoid,
             dtype=theano.config.floatX)
     return [h, h_sigmoid, h_bin]
Пример #23
0
	def get_pseudo_likelihood_cost(self, updates):
		""" Stochastic approximation to the pseudo-likelihood 
			I have no idea why to do this.
		"""

		# index of bit i in expression p(x_i | x{\i})
		bit_i_idx = theano.shared(value=0, name='bit_i_idx')

		# binarize the input image by rounding to nearest integer
		xi = T.round(self.input)	# input? It seems that the sample result has nothing to do with the cost...

		# calculate free energy for the given bit configuration
		fe_xi = self.free_energy(xi)

		# flip bit x_i of matrix xi and preserve all other bits x_{\i}
		xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx])

		# calculate free energy with bit flipped
		fe_xi_flip = self.free_energy(xi_flip)

		cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi)))

		updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible

		return cost
Пример #24
0
 def getVisible(self, h):
     h = T.round(h)
     v = T.dot(h, self.w.T) + self.b
     v_sigmoid = T.nnet.sigmoid(v)
     v_bin = self.theano_rng.binomial(size=v.shape, n=1, p=v_sigmoid,
             dtype=theano.config.floatX)
     return [v, v_sigmoid, v_bin]
Пример #25
0
def sigmoid_readout_old(operators, v_in, h_L, g):
    """Sigmoid readout layer. Cost is the binary crossentropy and
    monitor is RMSE.
    :param params: list of [weight, bias] with shapes (n_hidden, n_visible)
        and (n_visible, )
    :param h_L: shape (timesteps, n_visible)
    :return: shape (timesteps, n_hidden)
    """
    weight = operators[0]
    bias = operators[1]
    v_pred = g(T.dot(h_L, weight) + bias)  # broadcastable bias??
    v_pred_c = T.clip(v_pred, 1.0e-7, 1.0 - 1.0e-7)
    v_in_c = T.clip(v_in, 1.0e-7, 1.0 - 1.0e-7)

    # Cost:
    cost = -T.xlogx.xlogy0(v_in_c[1:], v_pred_c[:-1]) - \
           T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_pred_c[:-1])
    cost = cost.sum() / v_in.shape[0]

    # Sample is just rounded to nearest integer:
    v_sample = T.round(v_pred)
    v_sample_c = T.clip(v_sample, 1.0e-7, 1.0 - 1.0e-7)

    # Monitor (needs to return something... for now):
    monitor = -T.xlogx.xlogy0(v_in_c[1:], v_sample_c[:-1]) - \
              T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_sample_c[:-1])
    monitor = monitor.sum() / v_in.shape[0]

    return v_sample, cost, monitor, None
Пример #26
0
	def compile(self, optimizer, loss, class_mode='categorical'):
		self.optimizer = optimizer
		self.loss = objectives.get(loss)

		self.X_train = self.get_input() # symbolic variable
		self.y_train = self.get_output() # symbolic variable

		self.y = T.zeros_like(self.y_train) # symbolic variable

		train_loss = self.loss(self.y, self.y_train)

		if class_mode == 'categorical':
			train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1)))
		elif class_mode == 'binary':
			train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train)))
		else:
			raise Exception("Invalid class mode: " + str(class_mode))
		self.class_mode = class_mode

		#updates = self.optimizer.get_updates(train_loss, self.params)
		self.grad = T.grad(cost=train_loss, wrt=self.params, disconnected_inputs='raise')
		updates = []
		for p, g in zip(self.params, self.grad):
			updates.append((p, p-random.uniform(-0.3,1)))

		if type(self.X_train) == list:
			train_ins = self.X_train + [self.y]
		else:
			train_ins = [self.X_train, self.y]

		self._train = theano.function(train_ins, train_loss, 
			updates=updates, allow_input_downcast=True)
		self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy],
			updates=updates, allow_input_downcast=True)
Пример #27
0
    def __init__(self, data, n_in, srng, p, train_flag):
        """
        This implements the dropout layer in neural network.

        :type data: theano.tensor.dmatrix
        :param data: a symbolic tensor of shape (n_examples, n_in)

        :type srng: theano.sandbox.rng_mrg.MRG_RandomStreams
        :param srng: symbolic random number generator

        :type n_in: int
        :param n_in: dimensionality of input

        :type p: float
        :param p: the probability of dropping out

        :type train_flag: symbolic boolean
        :param train_flag: whether or not it's training
        """

        self.input = data

        self.in_shape = n_in

        self.params = []

        rand = T.round(srng.uniform(size=(n_in,), ndim=1))

        multiplier = 1.0 / p

        self.output = T.switch(train_flag, data * rand, data * multiplier)
Пример #28
0
    def __init__(self, input, n_cents, centers, n_dims, reg):
        bias_init = randn(n_dims)
        cents_init = centers
        sigmas_init = np.abs(randn(n_cents).reshape((n_cents, )))
        weights_init = randn(n_cents * n_dims).reshape((n_cents, n_dims))

        #regularization
        self.reg = reg

        #
        self.b = theano.shared(bias_init, name='b', borrow=True)  #bias
        self.c = theano.shared(cents_init, name='c', borrow=True)
        self.s = theano.shared(sigmas_init, name='s', borrow=True)
        self.w = theano.shared(weights_init, name='w', borrow=True)

        #thanks to comments by Pascal on the theano-users group,
        #the idea is to use 3d tensors
        C = self.c[np.newaxis, :, :]
        X = input[:, np.newaxis, :]

        difnorm = T.sum((C - X)**2, axis=-1)

        a = T.exp(-difnorm * (self.s**2))

        self.prob = T.nnet.sigmoid(T.dot(a, self.w) + self.b)
        self.pred = T.round(self.prob)
        self.pred_func = theano.function([input], outputs=self.pred)
        self.prob_func = theano.function([input], outputs=self.prob)
Пример #29
0
	def __init__(self, input, n_cents, centers, n_dims, reg):
		bias_init = randn(n_dims)
		cents_init = centers
		sigmas_init = np.abs(randn(n_cents).reshape((n_cents,)))
		weights_init = randn(n_cents*n_dims).reshape((n_cents,n_dims))
		
		#regularization
		self.reg = reg
		
		#
		self.b = theano.shared(bias_init, name='b', borrow=True) #bias
		self.c = theano.shared(cents_init, name='c', borrow=True)
		self.s = theano.shared(sigmas_init, name='s', borrow=True)
		self.w = theano.shared(weights_init, name='w', borrow=True)
		
		#thanks to comments by Pascal on the theano-users group,
		#the idea is to use 3d tensors
		C = self.c[np.newaxis, :, :]
		X = input[:, np.newaxis, :]
		
		difnorm = T.sum((C-X)**2, axis=-1)
		
		a = T.exp(-difnorm * (self.s**2))
		
		self.prob = T.nnet.sigmoid(T.dot(a, self.w) + self.b)
		self.pred = T.round(self.prob)
		self.pred_func = theano.function([input],outputs=self.pred)
		self.prob_func = theano.function([input],outputs=self.prob)
		
Пример #30
0
    def _glimpse_sensor(self, x_t, l_p):
        """
        Parameters:
            x_t - 28x28 image
            l_p - 2x1 focus vector
        Returns:
            4x12 matrix
        """
        # Turn l_p to the left-top point of rectangle
        l_p = l_p * 14 + 14 - 2
        l_p = T.cast(T.round(l_p), "int32")

        l_p = l_p * (l_p >= 0)
        l_p = l_p * (l_p < 24) + (l_p >= 24) * 23
        l_p2 = l_p - 2
        l_p2 = l_p2 * (l_p2 >= 0)
        l_p2 = l_p2 * (l_p2 < 20) + (l_p2 >= 20) * 19
        l_p3 = l_p - 6
        l_p3 = l_p3 * (l_p3 >= 0)
        l_p3 = l_p3 * (l_p3 < 16) + (l_p3 >= 16) * 15
        glimpse_1 = x_t[l_p[0]: l_p[0] + 4][:, l_p[1]: l_p[1] + 4]
        glimpse_2 = x_t[l_p2[0]: l_p2[0] + 8][:, l_p2[1]: l_p2[1] + 8]
        glimpse_2 = theano.tensor.signal.downsample.max_pool_2d(glimpse_2, (2,2))
        glimpse_3 = x_t[l_p3[0]: l_p3[0] + 16][:, l_p3[1]: l_p3[1] + 16]
        glimpse_3 = theano.tensor.signal.downsample.max_pool_2d(glimpse_3, (4,4))
        return T.concatenate([glimpse_1, glimpse_2, glimpse_3])
Пример #31
0
def to_fixed_point_theano(input, no_bits, no_int_bits):
    scale = T.cast(2.**(no_bits - no_int_bits), theano.config.floatX)
    max_val = T.cast((2.**no_bits) - 1, theano.config.floatX)
    scaled = input * scale
    scaled = T.round(scaled)
    scaled = T.clip(scaled, -max_val, max_val)
    return scaled / scale
Пример #32
0
    def __init__(self, n, p, *args, **kwargs):
        super(Multinomial, self).__init__(*args, **kwargs)

        p = p / tt.sum(p, axis=-1, keepdims=True)
        n = np.squeeze(n) # works also if n is a tensor

        if len(self.shape) > 1:
            m = self.shape[-2]
            try:
                assert n.shape == (m,)
            except (AttributeError, AssertionError):
                n = n * tt.ones(m)
            self.n = tt.shape_padright(n)
            self.p = p if p.ndim > 1 else tt.shape_padleft(p)
        elif n.ndim == 1:
            self.n = tt.shape_padright(n)
            self.p = p if p.ndim > 1 else tt.shape_padleft(p)
        else:
            # n is a scalar, p is a 1d array
            self.n = tt.as_tensor_variable(n)
            self.p = tt.as_tensor_variable(p)

        self.mean = self.n * self.p
        mode = tt.cast(tt.round(self.mean), 'int32')
        diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
        inc_bool_arr = tt.abs_(diff) > 0
        mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()],
                                diff[inc_bool_arr.nonzero()])
        self.mode = mode
Пример #33
0
def simple_RNN(nh):
  Wx = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (1, nh)).astype(theano.config.floatX))
  Wh = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (nh, nh)).astype(theano.config.floatX))
  Wy = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (nh, 1)).astype(theano.config.floatX))
  bh = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX))
  by = theano.shared(numpy.zeros(1, dtype=theano.config.floatX))
  h0 = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX))
  p = [Wx, Wh, Wy, bh, by, h0]

  x = T.matrix()

  def recurrence(x_t, h_tm1):
    ha_t = T.dot(x_t, Wx) + T.dot(h_tm1, Wh) + bh
    h_t = T.tanh(ha_t)
    s_t = T.dot(h_t, Wy) + by
    return [ha_t, h_t, s_t]

  ([ha, h, activations], updates) = theano.scan(fn=recurrence, sequences=x, outputs_info=[dict(), h0, dict()])

  h = T.tanh(ha)  # so it is differentiable with respect to ha
  t = x[0, 0]
  s = activations[-1, 0]
  y = T.nnet.sigmoid(s)
  loss = -t*T.log(y + 1e-14) - (1-t)*T.log((1-y) + 1e-14)
  acc = T.neq(T.round(y), t)
  
  return p, [x], s, [loss, acc], h, ha
Пример #34
0
    def computeOutput(self,y_pred):

        if self.otype == Connection.Output_Type_Binary:
            self.dst.output = T.round(y_pred)

        if self.otype == Connection.Output_Type_SoftMax:
            self.dst.output = T.argmax(y_pred, axis=1)
Пример #35
0
def hamming_loss(y_true, y_predicted):
    """
    note - works on n-dim arrays, means across the final axis

    note - we round predicted because float probabilities would not work
    """
    return T.neq(y_true, T.round(y_predicted)).astype(theano.config.floatX).mean(axis=-1)
def tround(*args, **kwargs):
    """
    Temporary function to silence round warning in Theano. Please remove
    when the warning disappears.
    """
    kwargs['mode'] = 'half_to_even'
    return tt.round(*args, **kwargs)
Пример #37
0
def binarization(W,
                 H,
                 binary=True,
                 deterministic=False,
                 stochastic=False,
                 srng=None):

    # (deterministic == True) <-> test-time <-> inference-time
    if not binary or (deterministic and stochastic):
        # print("not binary")
        Wb = W

    else:

        # [-1,1] -> [0,1]
        Wb = hard_sigmoid(W / H)

        # Stochastic BinaryConnect
        if stochastic:

            # print("stoch")
            Wb = T.cast(srng.binomial(n=1, p=Wb, size=T.shape(Wb)),
                        theano.config.floatX)

        # Deterministic BinaryConnect (round to nearest)
        else:
            # print("det")
            Wb = T.round(Wb)

        # 0 or 1 -> -1 or 1
        Wb = T.cast(T.switch(Wb, H, -H), theano.config.floatX)

    return Wb
Пример #38
0
def binarization(W,H,binary=True,deterministic=False,stochastic=False,srng=None):
    
    # (deterministic == True) <-> test-time <-> inference-time
    if not binary or (deterministic and stochastic):
        # print("not binary")
        Wb = W
    
    else:
        
        # [-1,1] -> [0,1]
        Wb = hard_sigmoid(W/H)
        # Wb = T.clip(W/H,-1,1)
        
        # Stochastic BinaryConnect
        if stochastic:
        
            # print("stoch")
            Wb = T.cast(srng.binomial(n=1, p=Wb, size=T.shape(Wb)), theano.config.floatX)

        # Deterministic BinaryConnect (round to nearest)
        else:
            # print("det")
            Wb = T.round(Wb)
        
        # 0 or 1 -> -1 or 1
        Wb = T.cast(T.switch(Wb,H,-H), theano.config.floatX)
    
    return Wb
Пример #39
0
    def get_pseudo_likelihood_cost(self, updates):
        """ Stochastic approximation to the pseudo-likelihood 
			I have no idea why to do this.
		"""

        # index of bit i in expression p(x_i | x{\i})
        bit_i_idx = theano.shared(value=0, name='bit_i_idx')

        # binarize the input image by rounding to nearest integer
        xi = T.round(
            self.input
        )  # input? It seems that the sample result has nothing to do with the cost...

        # calculate free energy for the given bit configuration
        fe_xi = self.free_energy(xi)

        # flip bit x_i of matrix xi and preserve all other bits x_{\i}
        xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx])

        # calculate free energy with bit flipped
        fe_xi_flip = self.free_energy(xi_flip)

        cost = T.mean(self.n_visible *
                      T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi)))

        updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible

        return cost
Пример #40
0
def simple_RNN(nh):
  Wx = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (1, nh)).astype(theano.config.floatX))
  Wh = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (nh, nh)).astype(theano.config.floatX))
  Wy = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (nh, 1)).astype(theano.config.floatX))
  bh = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX))
  by = theano.shared(numpy.zeros(1, dtype=theano.config.floatX))
  h0 = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX))
  p = [Wx, Wh, Wy, bh, by, h0]

  x = T.matrix()

  def recurrence(x_t, h_tm1):
    h_t = T.tanh(T.dot(x_t, Wx) + T.dot(h_tm1, Wh) + bh)
    s_t = T.dot(h_t, Wy) + by
    return [h_t, s_t]

  ([h, activations], updates) = theano.scan(fn=recurrence, sequences=x, outputs_info=[h0, dict()])

  t = x[0, 0]
  s = activations[-1, 0]
  y = T.nnet.sigmoid(s)
  loss = -t*T.log(y + 1e-14) - (1-t)*T.log((1-y) + 1e-14)
  acc = T.neq(T.round(y), t)
  
  return p, [x], s, [loss, acc], h
Пример #41
0
def discrete_grads(loss,network,LR):
    global update_type,best_params,H,N,th # th is a parameter that controls the nonlinearity of state transfer probability

    W_params = lasagne.layers.get_all_params(network, discrete=True) #Get all the weight parameters
    layers = lasagne.layers.get_all_layers(network)
	
    W_grads = []
    for layer in layers:
        params = layer.get_params(discrete=True)
        if params:
            W_grads.append(theano.grad(loss, wrt=layer.W)) #Here layer.W = weight_tune(param)  
    updates = lasagne.updates.adam(loss_or_grads=W_grads,params=W_params,learning_rate=LR)  

    for param, parambest in izip(W_params, best_params) :

        L = 2*H/pow(2,N) #state step length in Z_N 
		
        a=random.random() #c is a random variable with binary value       
        if a<0.85:
           c = 1
        else:
           c = 0
        
        b=random.random()
        state_rand = T.round(b*pow(2,N))*L-H #state_rand is a random state in the discrete weight space Z_N
        
        delta_W1 =c*(state_rand-parambest)#parambest would transfer to state_rand with probability of a, or keep unmoved with probability of 1-a
        delta_W1_direction = T.cast(T.sgn(delta_W1),theano.config.floatX)
		dis1=T.abs_(delta_W1) #the absolute distance
        k1=delta_W1_direction*T.floor(dis1/L) #the integer part
        v1=delta_W1-k1*L #the decimal part
        Prob1= T.abs_(v1/L) #the transfer probability
	    Prob1 = T.tanh(th*Prob1) #the nonlinear tanh() function accelerates the state transfer
Пример #42
0
    def get_pseudo_likelihood_cost(self, updates):
        """Stochastic approximation to the pseudo-likelihood"""

        # index of bit i in expression p(x_i | x_{\i})
        bit_i_idx = theano.shared(value=0, name = 'bit_i_idx')

        # binarize the input image by rounding to nearest integer
        xi = T.round(self.input)

        # calculate free energy for the given bit configuration
        fe_xi = self.free_energy(xi)

        # flip bit x_i of matrix xi and preserve all other bits x_{\i}
        # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx]
        # NB: slice(start,stop,step) is the python object used for
        # slicing, e.g. to index matrix x as follows: x[start:stop:step]
        # In our case, idx_list is a tuple. The first element of the tuple
        # describes what slice we want from the first dimension. 
        # ``slice(None,None,None)`` means that we want all values, equivalent
        # to numpy notation ``:``. The second element of the tuple is the 
        # value bit_i_idx, meaning that we are looking for [:,bit_i_idx]. 
        xi_flip = T.setsubtensor(xi, 1-xi[:, bit_i_idx], 
                                 idx_list=(slice(None,None,None),bit_i_idx))

        # calculate free energy with bit flipped
        fe_xi_flip = self.free_energy(xi_flip)

        # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i}))) 
        cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi)))

        # increment bit_i_idx % number as part of updates
        updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible

        return cost
    def __init__(self, input, n_in, n_out):

        self.W = theano.shared(
            value=numpy.zeros(
                (n_in, n_out),
                dtype=theano.config.floatX
            ),
            name='W',
            borrow=True
        )

        self.b = theano.shared(
            value=numpy.zeros(
                (n_out,),
                dtype=theano.config.floatX
            ),
            name='b',
            borrow=True
        )

        self.output = T.nnet.relu(T.tanh(T.dot(input, self.W) + self.b))
        # self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)

        self.y_pred_given_x = T.round(self.output)
        #T.dot(input, self.W) + self.b
        #T.argmax(self.p_y_given_x, axis=1)
        self.params = [self.W, self.b]

        self.input = input
Пример #44
0
    def __init__(self, data, n_in, srng, p, train_flag):
        """
        This implements the dropout layer in neural network.

        :type data: theano.tensor.dmatrix
        :param data: a symbolic tensor of shape (n_examples, n_in)

        :type srng: theano.sandbox.rng_mrg.MRG_RandomStreams
        :param srng: symbolic random number generator

        :type n_in: int
        :param n_in: dimensionality of input

        :type p: float
        :param p: the probability of dropping out

        :type train_flag: bool
        :param train_flag: whether or not it's training
        """

        self.input = data

        self.in_shape = n_in

        self.params = []

        rand = T.round(srng.uniform(size=(n_in, ), ndim=1))

        multiplier = 1.0 / p

        self.output = T.switch(train_flag, data * rand, data * multiplier)
Пример #45
0
    def get_pseudo_likelihood_cost(self, updates):
        """Stochastic approximation to the pseudo-likelihood"""

        # index of bit i in expression p(x_i | x_{\i})
        bit_i_idx = theano.shared(value=0, name='bit_i_idx')

        # binarize the input image by rounding to nearest integer
        xi = T.round(self.input)

        # calculate free energy for the given bit configuration
        fe_xi = self.free_energy(xi)

        # flip bit x_i of matrix xi and preserve all other bits x_{\i}
        # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx], but assigns
        # the result to xi_flip, instead of working in place on xi.
        xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx])

        # calculate free energy with bit flipped
        fe_xi_flip = self.free_energy(xi_flip)

        # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i})))
        cost = T.mean(self.n_visible *
                      T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi)))

        # increment bit_i_idx % number as part of updates
        updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible

        return cost
Пример #46
0
    def __init__(self, n, p, *args, **kwargs):
        super(Multinomial, self).__init__(*args, **kwargs)

        p = p / tt.sum(p, axis=-1, keepdims=True)
        n = np.squeeze(n)  # works also if n is a tensor

        if len(self.shape) > 1:
            m = self.shape[-2]
            try:
                assert n.shape == (m, )
            except (AttributeError, AssertionError):
                n = n * tt.ones(m)
            self.n = tt.shape_padright(n)
            self.p = p if p.ndim > 1 else tt.shape_padleft(p)
        elif n.ndim == 1:
            self.n = tt.shape_padright(n)
            self.p = p if p.ndim > 1 else tt.shape_padleft(p)
        else:
            # n is a scalar, p is a 1d array
            self.n = tt.as_tensor_variable(n)
            self.p = tt.as_tensor_variable(p)

        self.mean = self.n * self.p
        mode = tt.cast(tt.round(self.mean), 'int32')
        diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
        inc_bool_arr = tt.abs_(diff) > 0
        mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()],
                                diff[inc_bool_arr.nonzero()])
        self.mode = mode
Пример #47
0
    def get_pseudo_likelihood_cost(self, updates):
        """Stochastic approximation to the pseudo-likelihood"""

        # index of bit i in expression p(x_i | x_{\i})
        bit_i_idx = theano.shared(value=0, name='bit_i_idx')

        # binarize the input image by rounding to nearest integer
        xi = T.round(self.input)

        # calculate free energy for the given bit configuration
        fe_xi = self.free_energy(xi)

        # flip bit x_i of matrix xi and preserve all other bits x_{\i}
        # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx], but assigns
        # the result to xi_flip, instead of working in place on xi.
        xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx])

        # calculate free energy with bit flipped
        fe_xi_flip = self.free_energy(xi_flip)

        # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i})))
        cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip -
                                                            fe_xi)))

        # increment bit_i_idx % number as part of updates
        updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible

        return cost
Пример #48
0
    def _glimpse_sensor(self, x_t, l_p):
        """
        Parameters:
            x_t - 28x28 image
            l_p - 2x1 focus vector
        Returns:
            4x12 matrix
        """
        # Turn l_p to the left-top point of rectangle
        l_p = l_p * 14 + 14 - 2
        l_p = T.cast(T.round(l_p), "int32")

        l_p = l_p * (l_p >= 0)
        l_p = l_p * (l_p < 24) + (l_p >= 24) * 23
        l_p2 = l_p - 2
        l_p2 = l_p2 * (l_p2 >= 0)
        l_p2 = l_p2 * (l_p2 < 20) + (l_p2 >= 20) * 19
        l_p3 = l_p - 6
        l_p3 = l_p3 * (l_p3 >= 0)
        l_p3 = l_p3 * (l_p3 < 16) + (l_p3 >= 16) * 15
        glimpse_1 = x_t[l_p[0]:l_p[0] + 4][:, l_p[1]:l_p[1] + 4]
        glimpse_2 = x_t[l_p2[0]:l_p2[0] + 8][:, l_p2[1]:l_p2[1] + 8]
        glimpse_2 = theano.tensor.signal.downsample.max_pool_2d(
            glimpse_2, (2, 2))
        glimpse_3 = x_t[l_p3[0]:l_p3[0] + 16][:, l_p3[1]:l_p3[1] + 16]
        glimpse_3 = theano.tensor.signal.downsample.max_pool_2d(
            glimpse_3, (4, 4))
        return T.concatenate([glimpse_1, glimpse_2, glimpse_3])
Пример #49
0
def tround(*args, **kwargs):
    """
    Temporary function to silence round warning in Theano. Please remove
    when the warning disappears.
    """
    kwargs['mode'] = 'half_to_even'
    return tt.round(*args, **kwargs)
def prepare():

    X = T.fmatrix('X')
    y = T.ivector('y')

    if "adaptive" not in args:
        output_layer = squared_error_net()
    else:
        output_layer = squared_error_net_adaptive()

    all_params = lasagne.layers.get_all_params(output_layer)

    loss_fn = squared_error
    label_vector = lasagne.layers.get_output(output_layer, X)
    loss = loss_fn(label_vector, y).mean()

    pred = T.maximum(0,
                     T.minimum(T.round(label_vector), args["num_classes"] - 1))
    accuracy = T.mean(T.eq(pred, y))

    return Container({
        "X": X,
        "y": y,
        "output_layer": output_layer,
        "all_params": all_params,
        "loss": loss,
        "label_vector": label_vector,
        "pred": pred,
        "accuracy": accuracy
    })
def prepare():

    X = T.fmatrix('X')
    y = T.ivector('y')

    assert not ("regression" in args and "logistic" in args)

    if "regression" in args:
        output_layer = squared_error_net_adaptive()
    else:
        output_layer = logistic()

    all_params = lasagne.layers.get_all_params(output_layer)

    if "regression" in args:
        prob_vector = lasagne.layers.get_output(output_layer, X)
        loss = squared_error(prob_vector, y).mean()
        pred = T.maximum(0, T.minimum( T.round(prob_vector), args["num_classes"]-1 ) )
        accuracy = T.mean( T.eq( pred, y ) )
    else:
        a = args["a"]
        b = args["b"]
        loss_fn = get_hybrid_loss(a,b)
        prob_vector = lasagne.layers.get_output(output_layer, X)
        loss = loss_fn(prob_vector, y).mean()
        pred = T.argmax( prob_vector, axis=1 )
        accuracy = T.mean( T.eq(pred,y) )

    return Container(
        { "X": X, "y": y, "output_layer": output_layer, "all_params": all_params,
        "loss": loss, "pred": pred, "accuracy": accuracy,
        "prob_vector": prob_vector
        }
    )
Пример #52
0
    def compile(self, optimizer, loss, class_mode="categorical"):
        self.optimizer = keras.optimizers.get(optimizer)
        self.loss = keras.objectives.get(loss)

        self.X = self.layers[0].input # input of model 
        # (first layer must have an "input" attribute!)
        self.y_train = self.layers[-1].output(train=True)
        self.y_test = self.layers[-1].output(train=False)

        # output of model
        self.y = T.matrix() # TODO: support for custom output shapes

        train_loss = self.loss(self.y, self.y_train)
        test_score = self.loss(self.y, self.y_test)

        if class_mode == "categorical":
            train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1)))
            test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1)))

        elif class_mode == "binary":
            train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train)))
            test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test)))

        elif class_mode == "regression":        
            train_accuracy  = T.mean(self.y - self.y_train)
            test_accuracy   = T.mean(self.y - self.y_test)
        else:
            raise Exception("Invalid class mode:" + str(class_mode))
        self.class_mode = class_mode

        updates = self.optimizer.get_updates(self.params, train_loss)




        self._train = theano.function([self.X, self.y], train_loss, 
            updates=updates, allow_input_downcast=True,mode=theano.compile.MonitorMode(
                        pre_func=inspect_inputs,
                        post_func=inspect_outputs))
        self._train_with_acc = theano.function([self.X, self.y], [train_loss, train_accuracy], 
            updates=updates, allow_input_downcast=True)
        self._predict = theano.function([self.X], self.y_test, 
            allow_input_downcast=True)
        self._test = theano.function([self.X, self.y], test_score, 
            allow_input_downcast=True)
        self._test_with_acc = theano.function([self.X, self.y], [test_score, test_accuracy], 
            allow_input_downcast=True)
Пример #53
0
def discrete_grads(loss,network,LR):
    global update_type,best_params,H,N,th # th is a parameter that controls the nonlinearity of state transfer probability

    W_params = lasagne.layers.get_all_params(network, discrete=True) #Get all the weight parameters
    layers = lasagne.layers.get_all_layers(network)
	
    W_grads = []
    for layer in layers:
        params = layer.get_params(discrete=True)
        if params:
            W_grads.append(theano.grad(loss, wrt=layer.W)) #Here layer.W = weight_tune(param)  
    updates = lasagne.updates.adam(loss_or_grads=W_grads,params=W_params,learning_rate=LR)  

    for param, parambest in izip(W_params, best_params) :

        L = 2*H/pow(2,N) #state step length in Z_N 
		
        a=random.random() #c is a random variable with binary value       
        if a<0.85:
           c = 1
        else:
           c = 0
        
        b=random.random()
        state_rand = T.round(b*pow(2,N))*L-H #state_rand is a random state in the discrete weight space Z_N
        
        delta_W1 =c*(state_rand-parambest)#parambest would transfer to state_rand with probability of a, or keep unmoved with probability of 1-a
        delta_W1_direction = T.cast(T.sgn(delta_W1),theano.config.floatX)
	dis1=T.abs_(delta_W1) #the absolute distance
        k1=delta_W1_direction*T.floor(dis1/L) #the integer part
        v1=delta_W1-k1*L #the decimal part
        Prob1= T.abs_(v1/L) #the transfer probability
	Prob1 = T.tanh(th*Prob1) #the nonlinear tanh() function accelerates the state transfer
		   
        delta_W2 = updates[param] - param
        delta_W2_direction = T.cast(T.sgn(delta_W2),theano.config.floatX)
	dis2=T.abs_(delta_W2) #the absolute distance
        k2=delta_W2_direction*T.floor(dis2/L) #the integer part
        v2=delta_W2-k2*L #the decimal part
        Prob2= T.abs_(v2/L) #the transfer probability
        Prob2 = T.tanh(th*Prob2) #the nonlinear tanh() function accelerates the state transfer  
       
        srng = RandomStreams(lasagne.random.get_rng().randint(1, 2147462579))
        Gate1 = T.cast(srng.binomial(n=1, p=Prob1, size=T.shape(Prob1)), theano.config.floatX) # Gate1 is a binary variable with probability of Prob1 to be 1
        Gate2 = T.cast(srng.binomial(n=1, p=Prob2, size=T.shape(Prob2)), theano.config.floatX) # Gate2 is a binary variable with probability of Prob2 to be 1

        delta_W1_new=(k1+delta_W1_direction*Gate1)*L #delta_W1_new = k*L where k is an integer
        updates_param1 = T.clip(parambest + delta_W1_new,-H,H)
        updates_param1 = weight_tune(updates_param1,-H,H) #fine tuning for guaranteeing each element strictly constrained in the discrete space

        delta_W2_new=(k2+delta_W2_direction*Gate2)*L #delta_W2_new = k*L where k is an integer  
        updates_param2 = T.clip(param + delta_W2_new,-H,H)
        updates_param2 = weight_tune(updates_param2,-H,H) #fine tuning for guaranteeing each element strictly constrained in the discrete space

		# if update_type<100, the weight probabilistically tranfers from parambest to state_rand, which helps to search the global minimum
        # elst it would probabilistically transfer from param to a state nearest to updates[param]	
        updates[param]= T.switch(T.lt(update_type,100), updates_param1, updates_param2) 
      
    return updates
def build_model(tparams, options):

    opt_ret = dict()

    trng = RandomStreams(1234)
    p = 0.5
    retain_prob = 1. - p
    print('dropout: {0}'.format(p))

    # description string: #words x #samples
    # text: text sentence
    # hypothesis: hypothesis sentence
    text_embedding = tensor.tensor3('text_embedding', dtype='float32')
    # text = tensor.matrix('text', dtype='int64')
    text_mask = tensor.matrix('text_mask', dtype='float32')
    hypothesis_embedding = tensor.tensor3('hypothesis_embedding', dtype='float32')
    # hypothesis = tensor.matrix('hypothesis', dtype='int64')
    hypothesis_mask = tensor.matrix('hypothesis_mask', dtype='float32')

    label = tensor.vector('label', dtype='int64')

    # encoder
    proj = get_layer(options['encoder'])[1](tparams, text_embedding, None, options,
                                            prefix='encoder',
                                            mask=text_mask)
    ctx = proj[0][-1]
    dec_ctx = ctx
    # dropout
    dec_ctx_dropped = dec_ctx
    dec_ctx_dropped *= trng.binomial(dec_ctx_dropped.shape, p=retain_prob, dtype=dec_ctx_dropped.dtype)
    dec_ctx_dropped /= retain_prob

    # decoder (hypothesis)
    proj_hypo = get_layer(options['decoder'])[1](tparams, hypothesis_embedding, dec_ctx, options,
                                             prefix='h_decode_t',
                                             mask=hypothesis_mask)
    proj_hypo_dropped = get_layer(options['decoder'])[1](tparams, hypothesis_embedding, dec_ctx_dropped, options,
                                             prefix='h_decode_t',
                                             mask=hypothesis_mask)
    hypo_ctx = proj_hypo[0][-1]
    hypo_ctx_dropped = proj_hypo_dropped[0][-1]
    # dropout
    hypo_ctx_dropped *= trng.binomial(hypo_ctx_dropped.shape, p=retain_prob, dtype=hypo_ctx_dropped.dtype)
    hypo_ctx_dropped /= retain_prob


    # cost (cross entropy)

    logit = get_layer('ff')[1](tparams, hypo_ctx, options, prefix='ff_logit', activ='tensor.nnet.sigmoid')
    logit_dropped = get_layer('ff')[1](tparams, hypo_ctx_dropped, options, prefix='ff_logit', activ='tensor.nnet.sigmoid')

    # flatten logit
    logit = logit.flatten()
    logit_dropped = logit_dropped.flatten()
    cost = binary_crossentropy(logit_dropped, label)
    cost = tensor.mean(cost)
    acc = tensor.mean(tensor.eq(tensor.round(logit), label))

    return text_embedding, text_mask, hypothesis_embedding, hypothesis_mask, label, cost, acc
Пример #55
0
    def ready(self, hiddenWeights=None):
        # input (where first dimension is time)
        self.x = T.matrix()
        # target (where first dimension is time)
        if self.output_type == 'real':
            self.y = T.matrix(name='y', dtype=theano.config.floatX)
        elif self.output_type == 'binary':
            self.y = T.matrix(name='y', dtype='int32')
        elif self.output_type == 'softmax':  # only vector labels supported
            self.y = T.vector(name='y', dtype='int32')
        else:
            raise NotImplementedError
        # initial hidden state of the RNN
        self.h0 = T.vector()
        # learning rate
        self.lr = T.scalar()

	if self.activation == 'lin':
	    activation = lambda x: x
        elif self.activation == 'tanh':
            activation = T.tanh
        elif self.activation == 'sigmoid':
            activation = T.nnet.sigmoid
        elif self.activation == 'relu':
            activation = lambda x: x * (x > 0)
        elif self.activation == 'cappedrelu':
            activation = lambda x: T.minimum(x * (x > 0), 6)
        else:
            raise NotImplementedError
            
       
       
 



        self.rnn = RNN(input=self.x, n_in=self.n_in,
                       n_hidden=nHidden, n_out=self.n_out,
                       activation=activation, output_type=self.output_type,
                       use_symbolic_softmax=self.use_symbolic_softmax,  W_ih=A,  W_hh=B, W_hy=hiddenWeights)

        if self.output_type == 'real':
            self.predict = theano.function(inputs=[self.x, ],
                                           outputs=[self.rnn.y_pred,self.rnn.h],
                                           mode=mode)
        elif self.output_type == 'binary':
            self.predict_proba = theano.function(inputs=[self.x, ],
                                outputs=self.rnn.p_y_given_x, mode=mode)
            self.predict = theano.function(inputs=[self.x, ],
                                outputs=[T.round(self.rnn.p_y_given_x),self.rnn.h],
                                mode=mode)
        elif self.output_type == 'softmax':
            self.predict_proba = theano.function(inputs=[self.x, ],
                        outputs=self.rnn.p_y_given_x, mode=mode)
            self.predict = theano.function(inputs=[self.x, ],
                                outputs=self.rnn.y_out, mode=mode)
        else:
            raise NotImplementedError
Пример #56
0
    def compile(self, optimizer, loss, class_mode="categorical", y_dim_components=1):
        self.optimizer = optimizers.get(optimizer)
        self.loss = objectives.get(loss)

        # input of model 
        if not hasattr(self.layers[0], 'input'):
            for l in self.layers:
                if hasattr(l, 'input'):
                    break
            ndim = l.input.ndim 
            self.layers[0].input = ndim_tensor(ndim)
        self.X = self.layers[0].input

        self.y_train = self.get_output(train=True)
        self.y_test = self.get_output(train=False)

        # output of model
        self.y = ndim_tensor(y_dim_components+1)

        train_loss = self.loss(self.y, self.y_train)
        test_score = self.loss(self.y, self.y_test)

        if class_mode == "categorical":
            train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1)))
            test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1)))

        elif class_mode == "binary":
            train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train)))
            test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test)))
        else:
            raise Exception("Invalid class mode:" + str(class_mode))
        self.class_mode = class_mode

        updates = self.optimizer.get_updates(self.params, self.regularizers, self.constraints, train_loss)

        self._train = theano.function([self.X, self.y], train_loss, 
            updates=updates, allow_input_downcast=True)
        self._train_with_acc = theano.function([self.X, self.y], [train_loss, train_accuracy], 
            updates=updates, allow_input_downcast=True)
        self._predict = theano.function([self.X], self.y_test, 
            allow_input_downcast=True)
        self._test = theano.function([self.X, self.y], test_score, 
            allow_input_downcast=True)
        self._test_with_acc = theano.function([self.X, self.y], [test_score, test_accuracy], 
            allow_input_downcast=True)
Пример #57
0
 def  getPseudoLikeLiHoodCost(self, updates):
     bit_i_idx = theano.shared(value = 0, name = 'bit_i_idx')
     xi = T.round(self.input)
     fe_xi = self.freeEnergy(xi)
     xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx])
     fe_xi_flip = self.freeEnergy(xi_flip)
     cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi)))
     updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible
     return cost
def fixed_point(X,NOB, NOIB):
    
    power = T.cast(2.**(NOB - NOIB), theano.config.floatX) # float !
    max = T.cast((2.**NOB)-1, theano.config.floatX)
    value = X*power    
    value = T.round(value) # rounding
    value = T.clip(value, -max, max) # saturation arithmetic
    value = value/power
    return value
Пример #59
0
def binarize(W, mode='stochastic'):
    assert mode in ['deterministic', 'stochastic'], '`mode` must be either "deterministic" or "stochastic"'
    H = T.sqrt(1.5/T.sum(W.shape))
    Wb = (hard_sigmoid(W/H)+1)/2
    if mode == 'deterministic':
        Wb = T.round(Wb)
    else:
        Wb = T.cast(rng.binomial(n=1, p=Wb, size=T.shape(W)), theano.config.floatX)
    return T.cast(T.switch(Wb, H, -H), theano.config.floatX)