def __call__(self, x, h, c): concat = np.concatenate((x, h), axis=1) hidden = np.matmul(concat, self.W_full)+self.bias i, g, f, o = np.split(hidden, 4, axis=1) i = sigmoid(i) g = np.tanh(g) f = sigmoid(f+self.forget_bias) o = sigmoid(o) if self.train_mode: mask = np.array(np.random.rand(self.hidden_size) < self.dropout_keep_prob).astype(np.int) d_g = np.multiply(mask, g) else: d_g = self.dropout_keep_prob * g new_c = np.multiply(c, f) + np.multiply(d_g, i) new_h = np.multiply(np.tanh(new_c), o) return new_h, new_c
def forward_all(self, x, z): assert x.ndim == 3 assert z.ndim == 2 xz = T.concatenate([x, z.dimshuffle((0, 1, "x"))], axis=2) h0 = T.zeros((1, x.shape[1], self.n_hidden), dtype=theano.config.floatX) h = self.rlayer.forward_all(xz) h_prev = T.concatenate([h0, h[:-1]], axis=0) assert h.ndim == 3 assert h_prev.ndim == 3 pz = sigmoid(T.dot(x, self.w1) + T.dot(h_prev, self.w2) + self.bias) assert pz.ndim == 2 return pz
def forward(self, x_t, z_t, h_tm1, pz_tm1): print "z_t", z_t.ndim pz_t = sigmoid( T.dot(x_t, self.w1) + T.dot(h_tm1[:, -self.n_hidden:], self.w2) + self.bias) xz_t = T.concatenate([x_t, z_t.reshape((-1, 1))], axis=1) h_t = self.rlayer.forward(xz_t, h_tm1) # batch return h_t, pz_t
def forward(self, x_t, z_t, h_tm1, pz_tm1): print "z_t", z_t.ndim pz_t = sigmoid( T.dot(x_t, self.w1) + T.dot(h_tm1[:,-self.n_hidden:], self.w2) + self.bias ) xz_t = T.concatenate([x_t, z_t.reshape((-1,1))], axis=1) h_t = self.rlayer.forward(xz_t, h_tm1) # batch return h_t, pz_t
def task(self): iScale = 10 iExtent = 5 iMin = -iScale * iExtent iMax = iScale * iExtent listDblX = [float(i) / float(iScale) for i in xrange(iMin, iMax)] listDblSigmoid = [nn.sigmoid(dblX) for dblX in listDblX] listPairData = zip(listDblX, listDblSigmoid) return { "chart": {"defaultSeriesType": "line"}, "title": {"text": "Sigmoid Function"}, "xAxis": {"title": {"text": "Perceptron Input"}, "min": -iExtent, "max": iExtent}, "yAxis": {"title": {"text": "Activation"}, "min": 0.0, "max": 1.0}, "series": [{"name": "Activation", "data": listPairData}], }
def task(self): iScale = 10 iExtent = 5 iMin = -iScale*iExtent iMax = iScale*iExtent listDblX = [float(i)/float(iScale) for i in xrange(iMin,iMax)] listDblSigmoid = [nn.sigmoid(dblX) for dblX in listDblX] listPairData = zip(listDblX,listDblSigmoid) return {"chart": {"defaultSeriesType": "line"}, "title": {"text": "Sigmoid Function"}, "xAxis": {"title":{"text": "Perceptron Input"}, "min": -iExtent, "max": iExtent}, "yAxis": {"title":{"text":"Activation"}, "min":0.0, "max": 1.0}, "series": [{"name":"Activation", "data": listPairData}]}
def forward_all(self, x, z): assert x.ndim == 3 assert z.ndim == 2 xz = T.concatenate([x, z.dimshuffle((0,1,"x"))], axis=2) h0 = T.zeros((1, x.shape[1], self.n_hidden), dtype=theano.config.floatX) h = self.rlayer.forward_all(xz) h_prev = T.concatenate([h0, h[:-1]], axis=0) assert h.ndim == 3 assert h_prev.ndim == 3 pz = sigmoid( T.dot(x, self.w1) + T.dot(h_prev, self.w2) + self.bias ) assert pz.ndim == 2 return pz
def train(self): for epoch in range(10): for it, (x, y) in enumerate(self.data_loader): self.optim.zero_grad() x = torch.bernoulli(x) if cuda: x = x.cuda() x = Variable(x.view(-1, 1, 28, 28)) out = nn_.sigmoid(self.mdl((x, 0))[0]).permute(0, 3, 1, 2) loss = utils.bceloss(out, x).sum(1).sum(1).sum(1).mean() loss.backward() self.optim.step() if ((it + 1) % 100) == 0: print 'Epoch: [%2d] [%4d/%4d] loss: %.8f' % \ (epoch+1, it+1, self.data_loader.dataset.__len__() // 32, loss.data[0])
def sample(self, x_t, z_tm1, h_tm1): print "z_tm1", z_tm1.ndim, type(z_tm1) pz_t = sigmoid( T.dot(x_t, self.w1) + T.dot(h_tm1[:, -self.n_hidden:], self.w2) + self.bias) # batch pz_t = pz_t.ravel() z_t = T.cast(self.MRG_rng.binomial(size=pz_t.shape, p=pz_t), theano.config.floatX) xz_t = T.concatenate([x_t, z_t.reshape((-1, 1))], axis=1) h_t = self.rlayer.forward(xz_t, h_tm1) return z_t, h_t
def sample(self, x_t, z_tm1, h_tm1): print "z_tm1", z_tm1.ndim, type(z_tm1) pz_t = sigmoid( T.dot(x_t, self.w1) + T.dot(h_tm1[:,-self.n_hidden:], self.w2) + self.bias ) # batch pz_t = pz_t.ravel() z_t = T.cast(self.MRG_rng.binomial(size=pz_t.shape, p=pz_t), "int8") xz_t = T.concatenate([x_t, z_t.reshape((-1,1))], axis=1) h_t = self.rlayer.forward(xz_t, h_tm1) return z_t, h_t
def train(self): for epoch in range(10): for it, (x, y) in enumerate(self.data_loader): self.optim.zero_grad() x = torch.bernoulli(x) x = Variable(x.view(-1, 784)) out = nn_.sigmoid(self.mdl(x)[:,:,0]) loss = utils.bceloss(out, x).sum(1).mean() loss.backward() self.optim.step() if ((it + 1) % 10) == 0: print 'Epoch: [%2d] [%4d/%4d] loss: %.8f' % \ (epoch+1, it+1, self.data_loader.dataset.__len__() // 32, loss.data[0]) self.mdl.randomize()
import os import sys sys.path.append(os.path.abspath('home/navdeep/RLC/Robot-Learning-and-Control')) x = np.random.randint(0,3,(1,5,5)) y = np.where(x>0,0,1) y = y.reshape([25]) # Architecture conv1 = nn.convolve3d(shape=(10,1,3,3), mode='same') add1 = nn.add() relu1 = nn.relu() conv2 = nn.convolve3d(shape=(1,10,3,3), mode='same') add2 = nn.add() lin = nn.linear((25,25 )) sigmoid = nn.sigmoid() mse = nn.mse() print('Architecture loaded') # weigths init layer = [conv1, add1, relu1, conv2, add2, lin, sigmoid, mse] #compute graph def model(x,y, update = True): x = conv1.forward(x) x = add1.forward(x) x = conv2.forward(x) x = add2.forward(x) #x = x.reshape([25])
def weights(): data = np.full((3,3), 2) s = nn.sigmoid(data) weights = nn.init_weights(data.shape[1], data.shape[1]) print(weights)
def test_sigmoid(self): dblP = random.random() self.assertAlmostEqual(dblP, nn.sigmoid(logit(dblP)))
# Skip blanks and comments if (re.match("^\s*$", line) or re.match("^\s*#", line)): continue if len(line) > nn.__MAXSTRING__: print( "ERROR - input line too long, max length of {} characters".format( maxstring)) sys.exit() input_bits = np.zeros((1, nn.__MAXSTRING__ * 8)) input_bits[0] = nn.word_to_bits(line) # # Calculate outputs based on input and network weights # l1 = nn.sigmoid(np.dot(input_bits, w1)) l2 = nn.sigmoid(np.dot(l1, w2)) (rows, columns) = np.shape(l2) #print("\nOutput learned values (rounded to T/F)") #for row in range(rows): # for column in range(columns): # formatstring="{:.0f} " # print(formatstring.format(l2[row][column]),end='') # print() #print("\nString format:") # # Convert binary back to characters # index = 0
# and see how close it is to the desired output. How far off it is from the output is the error and we # adjust the weights by that derivative of that amount (so a small adjust). And then repeat for 10,000 times. # # # We will log our learning findings at each step. Write column headers of the CSV. # file_results = open("results.csv", "w+") file_results.write("iter\tw1\tw2\tw3\tl1\tl2\tl3\tl4\n") for iter in range(100000): # forward propagation, multiply input matrix by weights matrix to get new value. input_weighted = (np.dot(input_dataset, w1)) # Apply a sigmoid function to these weighted values to get them into the 0..1 range since our output_dataset is 0 or 1. Basically False or True. l1 = nn.sigmoid(input_weighted) # Figure out how far off from the expected output we were. l1_error = output_dataset - l1 # Figure out how much we will change our weights by. Too little here and it will take a ton # of iterations to converge. Too much change and we lack resolution and may miss the target # by too much and then next time we could end up adjusting back by the same amount, thus just oscilating. # Note that l1 is 0..1 so derivative(l1) will be 0 to .5, a nice smooth curve like an upside down bowl. So our multiplication # (used later) will be biggest if our value is around .5 (midpoint) but will be very small the closer we get to 0 or 1 meaning that # we will make larger updates as we are in the middle (could be true or false, 0 or 1), but will make very small changes as we get # closer to 0 or 1. # Really the core of this machine learning is searching the solution space looking for better and better answers. l1_delta = l1_error * nn.derivative(l1) # update weights
return(bits) input_bits = np.zeros((1,32)) input_bits[0]=word_to_bits("test") print("input_bits:\n",input_bits) output_bits = np.zeros((1,32)) output_bits=word_to_bits("food") print("output_bits:\n",output_bits) # Network of weights. 32 to match the input, 32 to match the output np.random.seed(1) w1 = 2*np.random.random((32,32))-1 for iter in range(1000): l1 = nn.sigmoid(np.dot(input_bits,w1)) l1_error = output_bits - l1 l1_delta = l1_error * nn.derivative(l1) w1 += np.dot(input_bits.T,l1_delta) print("\n\nOutput learned values (actual)") for row in l1: for column in row: print("{:05.5f} ".format(column),end='') print() print("\nOutput learned values (rounded to T/F)") for row in l1: for column in row:
def forward(self, params, x): W1, b1 = params # the [0] is to make sure we return a scalar. return nn.sigmoid(np.matmul(W1, x) + b1[0])
mdl = model() mdl.train() n = 16 spl = utils.varify(np.random.randn(n, 1, 28, 28).astype('float32')) spl.volative = True mdl.mdl = mdl.mdl.eval() for i in range(0, 28): for j in range(28): out, _ = mdl.mdl((spl, 0)) out = out.permute(0, 3, 1, 2) proba = nn_.sigmoid(out[:, 0, i, j]) spl.data[:, 0, i, j] = torch.bernoulli(proba).data #unif = torch.zeros_like(proba) #unif.data.uniform_(0,1) #spl[:,0,i,j] = torch.ge(proba,unif).float() #plt.imshow(nn_.sigmoid(out[3,0]).data.numpy().reshape(28,28), cmap='gray') path = './temp_results' if not os.path.exists(path): os.makedirs(path) for i in range(n): scipy.misc.imsave(path + '/{}.png'.format(i), nn_.sigmoid(out[i, 0]).data.numpy().reshape(28, 28))
def forward(self, params, x): W1, b1, W2, b2 = params # the [0] is to make sure we return a scalar. h1 = np.tanh(np.matmul(W1, x) + b1) return nn.sigmoid(np.matmul(W2, h1) + b2[0])
# # So the idea of the work is that we take the input_dataset, adjust it by the synapse0 weights (multiply) # and see how close it is to the desired output. How far off it is from the output is the error and we # adjust the weights by that derivative of that amount (so a small adjust). And then repeat for 10,000 times. # # # We will log our learning findings at each step. Write column headers of the CSV. # #file_results = open("results.csv","w+") #file_results.write("iter\tw1\tw2\tw3\tl1\tl2\tl3\tl4\n") for iter in range(10000): # Feed forward through layer1 and layer2 based on weights l1 = nn.sigmoid(np.dot(input_dataset, w1)) l2 = nn.sigmoid(np.dot(l1, w2)) # Figure out how much we missed on layer2 l2_error = output_dataset - l2 l2_delta = l2_error * nn.derivative(l2) # Figure out how much we will change our weights by. Too little here and it will take a ton # of iterations to converge. Too much change and we lack resolution and may miss the target # by too much and then next time we could end up adjusting back by the same amount, thus just oscilating. # Note that l1 is 0..1 so derivative(l1) will be 0 to .5, a nice smooth curve like an upside down bowl. So our multiplication # (used later) will be biggest if our value is around .5 (midpoint) but will be very small the closer we get to 0 or 1 meaning that # we will make larger updates as we are in the middle (could be true or false, 0 or 1), but will make very small changes as we get # closer to 0 or 1. # Really the core of this machine learning is searching the solution space looking for better and better answers.
loss.backward() self.optim.step() if ((it + 1) % 10) == 0: print 'Epoch: [%2d] [%4d/%4d] loss: %.8f' % \ (epoch+1, it+1, self.data_loader.dataset.__len__() // 32, loss.data[0]) self.mdl.randomize() mdl = model() mdl.train() spl = utils.varify(np.random.randn(64,784).astype('float32')) ranks = (mdl.mdl.rx) ind = np.argsort(ranks) for i in range(784): out = mdl.mdl(spl) spl[:,ind[i]] = torch.bernoulli(nn_.sigmoid(out[:,ind[i]])) plt.imshow(nn_.sigmoid(out[56]).data.numpy().reshape(28,28), cmap='gray')