def __contrastive_divergence_rbm__(self, vis, hid, linear): neg_vis = dbn.sigmoid(dot(hid, self.weights.T) + tile(self.visible_biases, (len(vis), 1))) if linear: neg_hid_prob = dot(neg_vis, self.weights) + tile(self.hidden_biases, (len(vis), 1)) else: neg_hid_prob = dbn.sigmoid(dot(neg_vis, self.weights) + tile(self.hidden_biases, (len(vis), 1))) return neg_vis, neg_hid_prob
def __contrastive_divergence_rbm__(self, vis, hid, linear): neg_vis = dbn.sigmoid( dot(hid, self.weights.T) + tile(self.visible_biases, (len(vis), 1))) if linear: neg_hid_prob = dot(neg_vis, self.weights) + tile( self.hidden_biases, (len(vis), 1)) else: neg_hid_prob = dbn.sigmoid( dot(neg_vis, self.weights) + tile(self.hidden_biases, (len(vis), 1))) return neg_vis, neg_hid_prob
def rbm_learn(self, epochs, first_layer=False, linear=False): """ The learning of the RBMs. The higher value of epochs will result in more training. @param epochs: The number of epochs. """ if linear: self.learning_rate = self.learning_rate * 0.01 for epoch in range(epochs): errsum = 0 batch_index = 0 for _ in self.batches: # Positive phase - generate data from visible to hidden units. pos_vis = self.__get_input_data__(batch_index, first_layer=first_layer) batch_size = len(pos_vis) if linear: pos_hid_prob = dot(pos_vis, self.weights) + tile(self.hidden_biases, (batch_size, 1)) else: pos_hid_prob = dbn.sigmoid(dot(pos_vis, self.weights) + tile(self.hidden_biases, (batch_size, 1))) self.__save_output__(batch_index, pos_hid_prob) # Serialize the output of the RBM # If probabilities are higher than randomly generated, the states are 1 randoms = rand.rand(batch_size, self.num_hid) pos_hid = array(randoms < pos_hid_prob, dtype=int) # Negative phase - generate data from hidden to visible units and then again to hidden units. neg_vis = pos_vis neg_hid_prob = pos_hid for i in range(self.gibbs_steps): # There is only 1 step of contrastive divergence neg_vis, neg_hid_prob = self.__contrastive_divergence_rbm__(neg_vis, pos_hid_prob, linear) # Set the error errsum += sum(((pos_vis) - neg_vis) ** 2) / len(pos_vis) # Update weights and biases self.delta_weights = self.momentum * self.delta_weights + self.learning_rate * ( (dot(pos_vis.T, pos_hid_prob) - dot(neg_vis.T, neg_hid_prob)) / batch_size - self.weight_cost * self.weights ) # TODO: RE-EVALUATE THE LAST LEARNING RATE self.delta_visible_biases = self.momentum * self.delta_visible_biases + ( self.learning_rate / batch_size ) * (sum(pos_vis, axis=0) - sum(neg_vis, axis=0)) self.delta_hidden_biases = self.momentum * self.delta_hidden_biases + ( self.learning_rate / batch_size ) * (sum(pos_hid_prob, axis=0) - sum(neg_hid_prob, axis=0)) self.weights += self.delta_weights self.visible_biases += self.delta_visible_biases self.hidden_biases += self.delta_hidden_biases batch_index += 1 # Output error scores e = errsum / len(self.batches) err_str = "Epoch[%2d]: Error = %.07f" % (epoch + 1, e) self.fout(err_str) self.error += [e]
def rsm_learn(self,epochs): """ The learning of the first layer RBM (Replicated Softmax Model). The higher value of epochs will result in more training. @param epochs: The number of epochs. """ for epoch in range(epochs): perplexity = 0 batch_index = 0 for _ in self.batches: # Positive phase - generate data from visible to hidden units. pos_vis = self.__get_input_data__(batch_index,first_layer=True) batch_size = len(pos_vis) D = sum(pos_vis,axis = 1) if epoch == 0: self.words += sum(pos_vis) # Calculate the number of words in order to calculate the perplexity. pos_hid_prob = dbn.sigmoid(dot(pos_vis,self.weights)+outer(D, self.hidden_biases)) self.__save_output__(batch_index, pos_hid_prob) # Serialize the output of the RBM # If probabilities are higher than randomly generated, the states are 1 randoms = rand.rand(batch_size,self.num_hid) pos_hid = array(randoms < pos_hid_prob,dtype = int) # Negative phase - generate data from hidden to visible units and then again to hidden units. neg_vis = pos_vis neg_hid_prob = pos_hid for i in range(100): # There is only 1 step of contrastive divergence neg_vis,neg_hid_prob,D,p = self.__contrastive_divergence_rsm__(neg_vis, pos_hid_prob, D) if i == 0: perplexity+=p pos_products = dot(pos_vis.T,pos_hid_prob) pos_visible_bias_activation = sum(pos_vis,axis = 0) pos_hidden_bias_activation = sum(pos_hid_prob,axis = 0) neg_products = dot(neg_vis.T,neg_hid_prob) neg_visibe_bias_activation = sum(neg_vis,axis = 0) neg_hidden_bias_activation = sum(neg_hid_prob,axis = 0) # Update the weights and biases self.delta_weights = self.momentum * self.delta_weights + self.learning_rate * ((pos_products-neg_products)/batch_size - self.weight_cost * self.weights) self.delta_visible_biases = (self.momentum * self.delta_visible_biases + (pos_visible_bias_activation-neg_visibe_bias_activation))*(self.learning_rate/batch_size) self.delta_hidden_biases = (self.momentum * self.delta_hidden_biases + (pos_hidden_bias_activation-neg_hidden_bias_activation))*(self.learning_rate/batch_size) self.weights += self.delta_weights self.visible_biases += self.delta_visible_biases self.hidden_biases += self.delta_hidden_biases batch_index += 1 if not epoch == 0: # Output error score. perplexity = exp(-perplexity/self.words) err_str = "Epoch[%2d]: Perplexity = %.02f"%(epoch,perplexity) self.fout(err_str) self.error += [perplexity] self.fprogress()
def generate_output_data(x, weight_matrices_added_biases, binary_output=False, sampled_noise=None): """ Compute forwards-pass in the deep autoencoder and compute the output. @param x: The BOW. @param weight_matrices_added_biases: The weight matrices added biases. @param binary_output: If the output of the DBN must be binary. If so, Gaussian noise will be added to bottleneck. @param sampled_noise: The gaussian noise matrix in case of binary output units. """ z_values = [] NN = sum(x, axis=1) for i in range(len(weight_matrices_added_biases) - 1): if i == 0: z = dbn.sigmoid( dot(x[:, :-1], weight_matrices_added_biases[i][:-1, :]) + outer(NN, weight_matrices_added_biases[i][-1, :])) elif i == (len(weight_matrices_added_biases) / 2) - 1: act = dot(z_values[i - 1], weight_matrices_added_biases[i]) if binary_output and not sampled_noise is None: z = act + sampled_noise else: z = act else: z = dbn.sigmoid( dot(z_values[i - 1], weight_matrices_added_biases[i])) z = append(z, ones((len(x), 1), dtype=float64), axis=1) z_values.append(z) neg_vis = dot(z_values[-1], weight_matrices_added_biases[-1]) softmax_value = dbn.softmax(neg_vis) xout = softmax_value if len(xout[xout == 0]) > 0: w = where(xout == 0) for i in range(len(w[0])): row = w[0][i] col = w[1][i] xout[row, col] = finfo(float).eps return xout, z_values
def __contrastive_divergence_rsm__(self, vis, hid, D): neg_vis = dot(hid, self.weights.T) + self.visible_biases softmax_value = dbn.softmax(neg_vis) neg_vis *= 0 for i in xrange(len(vis)): neg_vis[i] = random.multinomial(D[i], softmax_value[i], size=1) D = sum(neg_vis, axis=1) perplexity = nansum(vis * log(softmax_value)) neg_hid_prob = dbn.sigmoid(dot(neg_vis, self.weights) + outer(D, self.hidden_biases)) return neg_vis, neg_hid_prob, D, perplexity
def generate_output_data(x, weight_matrices_added_biases): """ Run through the deep autoencoder and compute the output. @param x: The BOW. @param weight_matrices_added_biases: The weight matrices added biases. """ z_values = [] NN = sum(x,axis = 1) for i in range(len(weight_matrices_added_biases)-1): if i == 0: z = dbn.sigmoid(dot(x,weight_matrices_added_biases[i])) elif i == (len(weight_matrices_added_biases)/2)-1: z = dot(z_values[i-1],weight_matrices_added_biases[i]) else: z = dbn.sigmoid(dot(z_values[i-1],weight_matrices_added_biases[i])) z = append(z,ones((len(x),1),dtype = float64),axis = 1) z_values.append(z) xout = dbn.sigmoid(dot(z_values[-1],weight_matrices_added_biases[-1])) return xout, z_values
def generate_output_data(x, weight_matrices_added_biases, binary_output=False, sampled_noise=None): """ Compute forwards-pass in the deep autoencoder and compute the output. @param x: The BOW. @param weight_matrices_added_biases: The weight matrices added biases. @param binary_output: If the output of the DBN must be binary. If so, Gaussian noise will be added to bottleneck. @param sampled_noise: The gaussian noise matrix in case of binary output units. """ z_values = [] NN = sum(x, axis=1) for i in range(len(weight_matrices_added_biases) - 1): if i == 0: z = dbn.sigmoid(dot(x[:, :-1], weight_matrices_added_biases[i][:-1, :]) + outer(NN, weight_matrices_added_biases[ i][-1, :])) elif i == (len(weight_matrices_added_biases) / 2) - 1: act = dot(z_values[i - 1], weight_matrices_added_biases[i]) if binary_output and not sampled_noise is None: z = act + sampled_noise else: z = act else: z = dbn.sigmoid(dot(z_values[i - 1], weight_matrices_added_biases[i])) z = append(z, ones((len(x), 1), dtype=float64), axis=1) z_values.append(z) neg_vis = dot(z_values[-1], weight_matrices_added_biases[-1]) softmax_value = dbn.softmax(neg_vis) xout = softmax_value if len(xout[xout == 0]) > 0: w = where(xout == 0) for i in range(len(w[0])): row = w[0][i] col = w[1][i] xout[row, col] = finfo(float).eps return xout, z_values
def __contrastive_divergence_rsm__(self, vis, hid, D): neg_vis = dot(hid, self.weights.T) + self.visible_biases softmax_value = dbn.softmax(neg_vis) neg_vis *= 0 for i in xrange(len(vis)): neg_vis[i] = random.multinomial(D[i], softmax_value[i], size=1) D = sum(neg_vis, axis=1) perplexity = nansum(vis * log(softmax_value)) neg_hid_prob = dbn.sigmoid( dot(neg_vis, self.weights) + outer(D, self.hidden_biases)) return neg_vis, neg_hid_prob, D, perplexity
def rsm_learn(self, epochs): """ The learning of the first layer RBM (Replicated Softmax Model). The higher value of epochs will result in more training. @param epochs: The number of epochs. """ for epoch in range(epochs): perplexity = 0 batch_index = 0 for _ in self.batches: # Positive phase - generate data from visible to hidden units. pos_vis = self.__get_input_data__(batch_index, first_layer=True) batch_size = len(pos_vis) D = sum(pos_vis, axis=1) if epoch == 0: self.words += sum( pos_vis ) # Calculate the number of words in order to calculate the perplexity. pos_hid_prob = dbn.sigmoid( dot(pos_vis, self.weights) + outer(D, self.hidden_biases)) self.__save_output__( batch_index, pos_hid_prob) # Serialize the output of the RBM # If probabilities are higher than randomly generated, the states are 1 randoms = rand.rand(batch_size, self.num_hid) pos_hid = array(randoms < pos_hid_prob, dtype=int) # Negative phase - generate data from hidden to visible units and then again to hidden units. neg_vis = pos_vis neg_hid_prob = pos_hid for i in range( self.gibbs_steps ): # There is only 1 step of contrastive divergence neg_vis, neg_hid_prob, D, p = self.__contrastive_divergence_rsm__( neg_vis, pos_hid_prob, D) if i == 0: perplexity += p pos_products = dot(pos_vis.T, pos_hid_prob) pos_visible_bias_activation = sum(pos_vis, axis=0) pos_hidden_bias_activation = sum(pos_hid_prob, axis=0) neg_products = dot(neg_vis.T, neg_hid_prob) neg_visibe_bias_activation = sum(neg_vis, axis=0) neg_hidden_bias_activation = sum(neg_hid_prob, axis=0) # Update the weights and biases self.delta_weights = self.momentum * self.delta_weights + self.learning_rate * ( (pos_products - neg_products) / batch_size - self.weight_cost * self.weights) self.delta_visible_biases = ( self.momentum * self.delta_visible_biases + (pos_visible_bias_activation - neg_visibe_bias_activation) ) * (self.learning_rate / batch_size) self.delta_hidden_biases = ( self.momentum * self.delta_hidden_biases + (pos_hidden_bias_activation - neg_hidden_bias_activation) ) * (self.learning_rate / batch_size) self.weights += self.delta_weights self.visible_biases += self.delta_visible_biases self.hidden_biases += self.delta_hidden_biases batch_index += 1 if not epoch == 0: # Output error score. perplexity = exp(-perplexity / self.words) err_str = "Epoch[%2d]: Perplexity = %.02f" % (epoch, perplexity) self.fout(err_str) self.error += [perplexity]
def rbm_learn(self, epochs, first_layer=False, linear=False): """ The learning of the RBMs. The higher value of epochs will result in more training. @param epochs: The number of epochs. """ if linear: self.learning_rate = self.learning_rate * 0.01 for epoch in range(epochs): errsum = 0 batch_index = 0 for _ in self.batches: # Positive phase - generate data from visible to hidden units. pos_vis = self.__get_input_data__(batch_index, first_layer=first_layer) batch_size = len(pos_vis) if linear: pos_hid_prob = dot(pos_vis, self.weights) + tile( self.hidden_biases, (batch_size, 1)) else: pos_hid_prob = dbn.sigmoid( dot(pos_vis, self.weights) + tile(self.hidden_biases, (batch_size, 1))) self.__save_output__( batch_index, pos_hid_prob) # Serialize the output of the RBM # If probabilities are higher than randomly generated, the states are 1 randoms = rand.rand(batch_size, self.num_hid) pos_hid = array(randoms < pos_hid_prob, dtype=int) # Negative phase - generate data from hidden to visible units and then again to hidden units. neg_vis = pos_vis neg_hid_prob = pos_hid for i in range( self.gibbs_steps ): # There is only 1 step of contrastive divergence neg_vis, neg_hid_prob = self.__contrastive_divergence_rbm__( neg_vis, pos_hid_prob, linear) # Set the error errsum += sum(((pos_vis) - neg_vis)**2) / len(pos_vis) # Update weights and biases self.delta_weights = self.momentum * self.delta_weights + self.learning_rate * ( (dot(pos_vis.T, pos_hid_prob) - dot(neg_vis.T, neg_hid_prob)) / batch_size - self.weight_cost * self.weights ) # TODO: RE-EVALUATE THE LAST LEARNING RATE self.delta_visible_biases = self.momentum * self.delta_visible_biases + ( self.learning_rate / batch_size) * (sum(pos_vis, axis=0) - sum(neg_vis, axis=0)) self.delta_hidden_biases = self.momentum * self.delta_hidden_biases + ( self.learning_rate / batch_size) * ( sum(pos_hid_prob, axis=0) - sum(neg_hid_prob, axis=0)) self.weights += self.delta_weights self.visible_biases += self.delta_visible_biases self.hidden_biases += self.delta_hidden_biases batch_index += 1 # Output error scores e = errsum / len(self.batches) err_str = "Epoch[%2d]: Error = %.07f" % (epoch + 1, e) self.fout(err_str) self.error += [e]