def random_sample_bas(length, num_samples):
    """ Generates the distribution corresponding to num_samples samples drawn from the 
    (length x length) BAS dataset, showing bars or stripes.
    
    Args:
        length (int) : length of the bars/stripes.
        num_samples (int) : number of samples
    Returns:
        (1darray) : generated probability distribution.
    """

    stripes = npext.generate_binary_code(length)
    stripes = np.repeat(stripes, length, 0)
    stripes = stripes.reshape(2 ** length, length * length)

    bars = npext.generate_binary_code(length)
    bars = bars.reshape(2 ** length * length, 1)
    bars = np.repeat(bars, length, 1)
    bars = bars.reshape(2 ** length, length * length)
    data = np.vstack((stripes[0:stripes.shape[0]-1],bars[1:bars.shape[0]]))

    distrib = np.zeros(2**(length*length))
    for sample in range(num_samples):
        i = np.random.randint(0,len(data))
        bin_list = [int(data[i,j] ) for j in range(length**2)]
        bin_string = ''
        for bit in bin_list:
            bin_string += str(bit)
        number = int(bin_string, 2)
        distrib[number] += 1/num_samples
    return distrib
def generate_bas_complete(length):
    """ Creates a true exact distribution of the (length x length) BAS dataset.
    Args:
        length (int) : length of the bars/stripes.
    Returns:
        (1darray) : generated probability distribution.
    """

    :return: Samples.
    :rtype: numpy array [num_samples, length*length]

    stripes = npext.generate_binary_code(length)
    stripes = np.repeat(stripes, length, 0)
    stripes = stripes.reshape(2 ** length, length * length)

    bars = npext.generate_binary_code(length)
    bars = bars.reshape(2 ** length * length, 1)
    bars = np.repeat(bars, length, 1)
    bars = bars.reshape(2 ** length, length * length)
    data = np.vstack((stripes[0:stripes.shape[0]-1],bars[1:bars.shape[0]]))

    # generate distribution
    distrib = np.zeros(2**(length*length))
    for i in range(len(data)):
        bin_list = [int(data[i,j] ) for j in range(length**2)]
        bin_string = ''
        for bit in bin_list:
            bin_string += str(bit)
        number = int(bin_string, 2)
        distrib[number] = 1/len(data)
    return distrib
Exemplo n.º 3
0
def _LL_exact_check(model, x, lnZ):
    ''' Computes the exact log likelihood for x by summing over all possible
        states for h1, h2. Only possible for small hidden layers!

        This is just proof of concept, use LL_exact() instead, it is heaps faster!

    :Parameters:
        model:  The model
               -type: Valid DBM model

        x:      Input states.
               -type: numpy array [batch size, input dim]

        lnZ:    Logarithm of the patition function.
               -type: float

    :Returns:
        Exact log likelihood for x.
       -type: numpy array [batch size, 1]

    '''
    # Generate all binary codes
    all_h1 = npExt.generate_binary_code(model.W2.shape[0])
    all_h2 = npExt.generate_binary_code(model.W2.shape[1])
    result = numx.zeros(x.shape[0])
    for i in range(x.shape[0]):
        for j in range(all_h1.shape[0]):
            for k in range(all_h2.shape[0]):
                result[i] += numx.exp(-model.energy(
                    x[i].reshape(1, x.shape[1]),
                    all_h1[j].reshape(1, all_h1.shape[1]),
                    all_h2[k].reshape(1, all_h2.shape[1]),
                ))
    return numx.log(result) - lnZ
Exemplo n.º 4
0
 def unnormalized_log_probability_x(self, x):
     ''' Computes the unnormalized log probabilities of x.
     
     :Parameters:
         x:    Input layer states.
              -type: numpy array [batch size, input dim]
               
     :Returns:
         Unnormalized log probability of x.
        -type: numpy array [batch size, 1]
         
     '''
     # Generate all possibel binary codes for h1 and h2
     all_h1 = npExt.generate_binary_code(self.W2.shape[0])
     all_h2 = npExt.generate_binary_code(self.W2.shape[1])
     # Center variables
     xtemp = x - self.o1
     h1temp = all_h1 - self.o2
     h2temp = all_h2 - self.o3
     # Bias term
     bias = numx.dot(xtemp, self.b1.T)
     # Both quadratic terms
     part1 = numx.exp(numx.dot(
         numx.dot(xtemp, self.W1) + self.b2, h1temp.T))
     part2 = numx.exp(
         numx.dot(numx.dot(h1temp, self.W2) + self.b3, h2temp.T))
     # Dot product of all combination of all quadratic terms + bias
     return bias + numx.log(
         numx.sum(numx.dot(part1, part2), axis=1).reshape(x.shape[0], 1))
Exemplo n.º 5
0
def partition_function_factorize_v(model, 
                                   beta=None, 
                                   batchsize_exponent='AUTO', 
                                   status=False):
    ''' Computes the true partition function for the given model by factoring 
        over the visible units.
       
    :Info:
        Exponential increase of computations by the number of visible units.
        (16 usually ~ 20 seconds)
        
    :Parameters:
        model:              The model.
                           -type: Valid RBM model.
        
        beta:               Inverse temperature(s) for the models energy.
                           -type: None, float, numpy array [batchsize,1]
        
        batchsize_exponent: 2^batchsize_exponent will be the batch size.
                           -type: int
        
        status:             If true prints the progress to the console.
                           -type: bool
    
    :Returns:
        Log Partition function for the model.
       -type: float
        
    '''    
    if status is True:
        print "Calculating the partition function by factoring over v: "
        print '%3.2f' % (0.0), '%'
        
    bit_length = model.input_dim
    if batchsize_exponent == 'AUTO' or batchsize_exponent > 20:
        batchsize_exponent = numx.min([model.input_dim, 12])
    batchSize = numx.power(2, batchsize_exponent)
    num_combinations = numx.power(2, bit_length)

    num_batches = num_combinations / batchSize
    bitCombinations = numx.zeros((batchSize, model.input_dim))
    log_prob_vv_all = numx.zeros(num_combinations)
    
    for batch in range(1, num_batches + 1):
        # Generate current batch
        bitCombinations = npExt.generate_binary_code(bit_length, 
                                                     batchsize_exponent, 
                                                     batch - 1)

        # calculate LL
        log_prob_vv_all[(batch - 1) * batchSize:batch * batchSize] = model.\
        unnormalized_log_probability_v(bitCombinations, beta).reshape(
                                                    bitCombinations.shape[0])
        # print status if wanted    
        if status is True:
            print '%3.2f' %(100*numx.double(batch
                                            )/numx.double(num_batches)),'%'
    
    # return the log_sum of values
    return npExt.log_sum_exp(log_prob_vv_all)
Exemplo n.º 6
0
def generate_bars_and_stripes_complete(length):
    """ Creates a dataset containing all possible samples showing bars or stripes.

    :param length: Length of the bars/stripes.
    :type length: int

    :return: Samples.
    :rtype: numpy array [num_samples, length*length]
    """
    stripes = npext.generate_binary_code(length)
    stripes = numx.repeat(stripes, length, 0)
    stripes = stripes.reshape(2 ** length, length * length)

    bars = npext.generate_binary_code(length)
    bars = bars.reshape(2 ** length * length, 1)
    bars = numx.repeat(bars, length, 1)
    bars = bars.reshape(2 ** length, length * length)
    return numx.vstack((stripes[0:stripes.shape[0]-1],bars[1:bars.shape[0]]))
Exemplo n.º 7
0
def partition_function_factorize_h(model,
                                   beta=None,
                                   batchsize_exponent='AUTO',
                                   status=False):
    """ Computes the true partition function for the given model by factoring over the hidden units.

        :Info: Exponential increase of computations by the number of visible units. (16 usually ~ 20 seconds)

    :param model: The model.
    :type model: Valid RBM model.

    :param beta: Inverse temperature(s) for the models energy.
    :type beta: None, float, numpy array [batchsize,1]

    :param batchsize_exponent: 2^batchsize_exponent will be the batch size.
    :type batchsize_exponent: int

    :param status: If true prints the progress to the console.
    :type status: bool

    :return: Log Partition function for the model.
    :rtype: float
    """
    if status is True:
        print "Calculating the partition function by factoring over h: "
        print '%3.2f' % 0.0, '%'

    bit_length = model.output_dim
    if batchsize_exponent is 'AUTO' or batchsize_exponent > 20:
        batchsize_exponent = numx.min([model.output_dim, 12])
    batchsize = numx.power(2, batchsize_exponent)
    num_combinations = numx.power(2, bit_length)

    num_batches = num_combinations / batchsize
    log_prob_vv_all = numx.zeros(num_combinations)

    for batch in range(1, num_batches + 1):
        # Generate current batch
        bitcombinations = numxext.generate_binary_code(bit_length,
                                                       batchsize_exponent,
                                                       batch - 1)

        # calculate LL
        log_prob_vv_all[(batch - 1) * batchsize:batch *
                        batchsize] = model.unnormalized_log_probability_h(
                            bitcombinations,
                            beta).reshape(bitcombinations.shape[0])

        # print status if wanted
        if status is True:
            print '%3.2f' % (100 * numx.double(batch) /
                             numx.double(num_batches)), '%'

    # return the log_sum of values
    return numxext.log_sum_exp(log_prob_vv_all)
Exemplo n.º 8
0
def generate_bars_and_stripes_complete(length):
    ''' Creates a dataset containing all possible samples showing bars or 
        stripes.
    
    :Parameters:
        length: Length of the bars/stripes.
               -type: int
        
    :Returns:
        Samples
       -type: numpy array [num_samples, length*length]
        
    '''
    stripes = npExt.generate_binary_code(length)
    stripes = numx.repeat(stripes, length, 0)
    stripes = stripes.reshape(2**length,length*length)

    bars = npExt.generate_binary_code(length)
    bars = bars.reshape(2**length*length,1)
    bars = numx.repeat(bars, length, 1)
    bars = bars.reshape(2**length,length*length)
    return numx.vstack((stripes[0:stripes.shape[0]-1],bars[1:bars.shape[0]]))
Exemplo n.º 9
0
def _partition_function_exact_check(model, batchsize_exponent='AUTO'):
    ''' Computes the true partition function for the given model by factoring
        over the visible and hidden2 units.

        This is just proof of concept, use _partition_function_exact() instead,
        it is heaps faster!

    :Parameters:
        model:              The model
                           -type: Valid DBM model

        batchsize_exponent: 2^batchsize_exponent will be the batch size.
                           -type: int

    :Returns:
        Log Partition function for the model.
       -type: float

    '''
    bit_length = model.W1.shape[1]
    if batchsize_exponent is 'AUTO' or batchsize_exponent > 20:
        batchsize_exponent = numx.min([model.W1.shape[1], 12])
    batchSize = numx.power(2, batchsize_exponent)
    num_combinations = numx.power(2, bit_length)
    num_batches = num_combinations // batchSize
    bitCombinations = numx.zeros((batchSize, model.W1.shape[1]))
    log_prob_vv_all = numx.zeros(num_combinations)

    for batch in range(1, num_batches + 1):
        # Generate current batch
        bitCombinations = npExt.generate_binary_code(bit_length,
                                                     batchsize_exponent,
                                                     batch - 1)
        # calculate LL
        log_prob_vv_all[(batch - 1) * batchSize:batch *
                        batchSize] = model.unnormalized_log_probability_h1(
                            bitCombinations).reshape(bitCombinations.shape[0])
    # return the log_sum of values
    return npExt.log_sum_exp(log_prob_vv_all)
Exemplo n.º 10
0
    def _train(self,
               data,
               epsilon,
               k,
               momentum,
               reg_l1norm,
               reg_l2norm,
               reg_sparseness,
               desired_sparseness,
               update_visible_offsets,
               update_hidden_offsets,
               offset_typ,
               use_centered_gradient,
               restrict_gradient,
               restriction_norm,
               use_hidden_states):
        """ The training for one batch is performed using True Gradient (GD) for k Gibbs-sampling steps.

        :param data: The data used for training.
        :type data: numpy array [batch_size, input dimension]

        :param epsilon: The learning rate.
        :type epsilon: scalar or numpy array[num parameters] or numpy array[num parameters, parameter shape]

        :param k: NUmber of sampling steps.
        :type k: int

        :param momentum: The momentum term.
        :type momentum: scalar or numpy array[num parameters] or numpy array[num parameters, parameter shape]

        :param reg_l1norm: The parameter for the L1 regularization
        :type reg_l1norm: float

        :param reg_l2norm: The parameter for the L2 regularization also know as weight decay.
        :type reg_l2norm: float

        :param reg_sparseness: The parameter for the desired_sparseness regularization.
        :type reg_sparseness: None or float

        :param desired_sparseness: Desired average hidden activation or None for no regularization.
        :type desired_sparseness: None or float

        :param update_visible_offsets: The update step size for the models visible offsets.
        :type update_visible_offsets: float

        :param update_hidden_offsets: The update step size for the models hidden offsets.
        :type update_hidden_offsets: float

        :param offset_typ: | Different offsets can be used to center the gradient.<br />
                           | Example: 'DM' uses the positive phase visible mean and the negative phase hidden mean.
                           | 'A0' uses the average of positive and negative phase mean for visible, zero for the
                           | hiddens. Possible values are out of {A,D,M,0}x{A,D,M,0}
        :type offset_typ: string

        :param use_centered_gradient: Uses the centered gradient instead of centering.
        :type use_centered_gradient: bool

        :param restrict_gradient: If a scalar is given the norm of the weight gradient (along the input dim) is \
                                  restricted to stay below this value.
        :type restrict_gradient: None, float

        :param restriction_norm: Restricts the column norm, row norm or Matrix norm.
        :type restriction_norm: string, 'Cols','Rows', 'Mat'

        :param use_hidden_states: If True, the hidden states are used for the gradient calculations, the hiddens \
                                     probabilities otherwise.
        :type use_hidden_states: bool
        """
        # Sample the first time
        hid_probs_pos = self.model.probability_h_given_v(data)

        if update_visible_offsets != 0.0:
            xmean_pos = numx.mean(data, axis=0).reshape(1, self.model.input_dim)
        hmean_pos = 0.0
        if update_hidden_offsets != 0.0 or reg_sparseness != 0.0:
            if use_hidden_states:
                hid_states_pos = self.model.sample_h(hid_probs_pos)
                hmean_pos = numx.mean(hid_states_pos, axis=0).reshape(1, self.model.output_dim)
            else:
                hmean_pos = numx.mean(hid_probs_pos, axis=0).reshape(1, self.model.output_dim)

        # Calculate the partition function
        if self.model.input_dim < self.model.output_dim:
            batch_size = numx.min([self.model.input_dim, 12])
            ln_z = estimator.partition_function_factorize_v(self.model, beta=1.0, batchsize_exponent=batch_size,
                                                            status=False)
        else:
            batch_size = numx.min([self.model.output_dim, 12])
            ln_z = estimator.partition_function_factorize_h(self.model, beta=1.0, batchsize_exponent=batch_size,
                                                            status=False)

        # empty negative phase parts
        neg_gradients = [numx.zeros(self.model.w.shape),
                         numx.zeros(self.model.bv.shape),
                         numx.zeros(self.model.bh.shape)]

        # Calculate gradient stepwise in batches
        bit_length = self.model.input_dim

        batchsize = numx.power(2, batch_size)
        num_combinations = numx.power(2, bit_length)
        num_batches = num_combinations / batchsize

        for batch in range(0, num_batches):
            # Generate current batch
            bit_combinations = numxext.generate_binary_code(bit_length, batch_size, batch)
            # P(x)
            prob_x = numx.exp(
                self.model.log_probability_v(ln_z, bit_combinations))
            # P(h|x)
            prob_h_x = self.model.probability_h_given_v(bit_combinations)
            # Calculate gradient
            neg_gradients[1] += numx.sum(numx.tile(prob_x, (1, self.model.input_dim)) * (bit_combinations -
                                                                                         self.model.ov), axis=0)
            prob_x = (numx.tile(prob_x, (1, self.model.output_dim)) * (prob_h_x - self.model.oh))
            neg_gradients[0] += numx.dot((bit_combinations - self.model.ov).T, prob_x)
            neg_gradients[2] += numx.sum(prob_x, axis=0)

        if update_visible_offsets != 0.0 and (offset_typ[0] is 'A' or offset_typ[0] is 'M'):
            bit_combinations = numxext.generate_binary_code(self.model.input_dim, None, 0)
            prob_x = numx.exp(self.model.log_probability_v(ln_z, bit_combinations))
            xmean_neg = numx.sum(prob_x * bit_combinations, axis=0).reshape(1, self.model.input_dim)

        if update_hidden_offsets != 0.0 and (offset_typ[1] is 'A' or offset_typ[1] is 'M'):
            bit_combinations = numxext.generate_binary_code(self.model.output_dim, None, 0)
            prob_h = numx.exp(self.model.log_probability_h(ln_z, bit_combinations))
            hmean_neg = numx.sum(prob_h * bit_combinations, axis=0).reshape(1, self.model.output_dim)

        new_visible_offsets = 0.0
        if update_visible_offsets != 0.0:
            if offset_typ[0] is 'A':
                new_visible_offsets = (xmean_pos + xmean_neg) * 0.5
            if offset_typ[0] is 'D':
                new_visible_offsets = xmean_pos
            if offset_typ[0] is 'M':
                new_visible_offsets = xmean_neg
            if offset_typ[0] is '0':
                new_visible_offsets = 0.0 * xmean_pos
        new_hidden_offsets = 0.0
        if update_hidden_offsets != 0.0:
            if offset_typ[1] is 'A':
                new_hidden_offsets = (hmean_pos + hmean_neg) * 0.5
            if offset_typ[1] is 'D':
                new_hidden_offsets = hmean_pos
            if offset_typ[1] is 'M':
                new_hidden_offsets = hmean_neg
            if offset_typ[1] is '0':
                new_hidden_offsets = 0.0 * hmean_pos

        if use_centered_gradient is False:
            # update the centers
            self.model.update_offsets(new_visible_offsets, new_hidden_offsets, update_visible_offsets,
                                      update_hidden_offsets)
            self.visible_offsets = 0.0
            self.hidden_offsets = 0.0
        else:
            self.hidden_offsets = ((1.0 - update_hidden_offsets) * self.hidden_offsets + update_hidden_offsets
                                   * new_hidden_offsets)
            self.visible_offsets = ((1.0 - update_visible_offsets) * self.visible_offsets + update_visible_offsets
                                    * new_visible_offsets)

        # Calculate positive phase gradient using states or probabilities
        if use_hidden_states:
            pos_gradients = self.model.calculate_gradients(data, hid_states_pos)
        else:
            pos_gradients = self.model.calculate_gradients(data, hid_probs_pos)

        # Times batch size since adpat gradient devides by batchsize
        neg_gradients[0] *= data.shape[0]
        neg_gradients[1] *= data.shape[0]
        neg_gradients[2] *= data.shape[0]

        # Adapt the gradients by weight decay momentum and learning rate
        self._adapt_gradient(pos_gradients=pos_gradients,
                             neg_gradients=neg_gradients,
                             batch_size=data.shape[0],
                             epsilon=epsilon,
                             momentum=momentum,
                             reg_l1norm=reg_l1norm,
                             reg_l2norm=reg_l2norm,
                             reg_sparseness=reg_sparseness,
                             desired_sparseness=desired_sparseness,
                             mean_hidden_activity=hmean_pos,
                             visible_offsets=self.visible_offsets,
                             hidden_offsets=self.hidden_offsets,
                             use_centered_gradient=use_centered_gradient,
                             restrict_gradient=restrict_gradient,
                             restriction_norm=restriction_norm)

        # update the parameters with the calculated gradient
        self.model.update_parameters(self.parameter_updates)