def __init__(self) -> None: super().__init__() self.activation = Tanh() #self.layer1 = self.Conv2D((1, 28, 28), (8, 3, 3), 1) #self.layer2 = self.MaxPool() #self.layer3 = self.Conv2D((2, 3, 3), 2) self.layer4 = Linear(784, 16) self.layer5 = Linear(16, 16) self.layer6 = Linear(16, 10)
def build_model(): model = Sequential(MSE(), input_size=2) model.add_layer(Linear(2, 25)) model.add_layer(ReLU(25)) model.add_layer(Linear(25, 25)) model.add_layer(ReLU(25)) model.add_layer(Linear(25, 25)) model.add_layer(Tanh(25)) model.add_layer(Linear(25, 2)) return model
def run_bigger_example(): x = Tensor([1, 2, 3]) y = Tensor([7, 10]) print(x.shape, y.shape) linear1 = Linear(x.shape[0], x.shape[0], weight_init='ones') linear2 = Linear(x.shape[0], y.shape[0], weight_init='ones') net_2layer = Network([linear1, linear2]) pred_2layer = net_2layer.forward(x) #loss.backward() print("pred_2layer is ") print(pred_2layer) mse = MSE() loss = mse.forward(pred_2layer, y) print("loss for 2 layer net is ") print(loss) # Should be 2*(18-7) = 22 loss_grad = mse.backward() print("loss_grad for 2layer net is ") print(loss_grad) print("Printing params Grad before ") for layer in net_2layer.layers: for par_grad in layer.param_grad(): print(par_grad) print("now setting param grad to zero") net_2layer.zero_grad() print("Printing params Grad after ") for layer in net_2layer.layers: for par_grad in layer.param_grad(): print(par_grad) print("Printing params before backward") for layer in net_2layer.layers: for par in layer.param(): print(par) print("Doing backward pass") net_2layer.backward(loss_grad) print("Printing params after backward") for layer in net_2layer.layers: for par in layer.param(): print(par) print("Printing params Grad") for layer in net_2layer.layers: for par_grad in layer.param_grad(): print(par_grad) print("Doing param update") net_2layer.grad_step(lr=1e-3) print("Printing params after update") for layer in net_2layer.layers: for par in layer.param(): print(par)
def test_neural_net_back_forward(self): n_in, n_out = 3, 2 weights = np.array([[0, -1, 2], [-3, 4, -5]]) bias = np.arange(n_out)[:, np.newaxis] nn = NeuralNet(MeanSquaredError(), 1e-3, layers=[Linear(n_in, 2, weights, bias), ReLU()]) x = np.array([[[0], [1], [2]]]) y = np.array([[[2], [3]]]) assert y.shape[1] == n_out # |0 -1 2| |0| |0| | 3| |0| | 3| |3| # |-3 4 -5| |1| + |1| = |-6| + |1| = |-5| -> |0| # |2| pred = nn.forward(x) assert np.array_equal(pred, [[[3], [0]]]) nn.compute_loss(pred, y) dL_dx = nn.backward() # |0 -1 2| |0 + dx1| | 3 + 0 - dx2 + 2dx3| | 3 + ...| |3 - dx2 + 2dx3| # |-3 4 -5| |1 + dx2| = |-6 - 3dx1 + 4dx2 - 5dx3| = |-5 + ...| -> |0| # |2 + dx3| The second component is ReLU'ed away # MSE loss results in 2( ... ) so dL = -2dx2 + 4dx3, dL/dx = |0, -2, 4| assert np.array_equal(dL_dx, [[[0], [-2], [4]]])
def main(): if sys.argv[1] == "LINEAR": model = Linear() num_epochs = 100 elif sys.argv[1] == "CNN": model = CNN() num_epochs = 100 elif sys.argv[1] == "RNN": model = RNN() num_epochs = 100 if len(sys.argv) != 2 or sys.argv[1] not in {"LINEAR", "CNN", "RNN"}: print("USAGE: python main.py <Model Type>") print("<Model Type>: [LINEAR/CNN/RNN]") exit() print("Running preprocessing...") if sys.argv[1] == "RNN": train_inputs, train_labels, test_inputs, test_labels = get_rnn_data( "data/genres.tar") else: train_inputs, train_labels, test_inputs, test_labels = get_data( "data/genres.tar") print("Preprocessing completed.") if sys.argv[1] == "RNN": model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(units=256)) model.add(tf.keras.layers.BatchNormalization()) model.add(tf.keras.layers.LeakyReLU(0.2)) model.add(tf.keras.layers.Reshape((16, 16))) model.add( tf.keras.layers.LSTM(units=128, dropout=0.05, recurrent_dropout=0.35, return_sequences=True)) model.add( tf.keras.layers.LSTM(units=32, dropout=0.05, recurrent_dropout=0.35, return_sequences=False)) model.add(tf.keras.layers.Dense(units=10, activation="softmax")) num_epochs = 100 opt = tf.keras.optimizers.Adam(lr=.01) model.compile(optimizer=opt, loss="sparse_categorical_crossentropy", metrics=["accuracy"]) history = model.fit(train_inputs, train_labels, epochs=num_epochs, batch_size=100) model.summary() test_loss, test_acc = model.evaluate(test_inputs, test_labels) print('test acc: ', test_acc) else: for _ in range(num_epochs): train(model, train_inputs, train_labels) print(test(model, test_inputs, test_labels))
def __init__(self, similarity_function: Dict[str, Any] = None, **kwargs): super(MatrixAttention, self).__init__(**kwargs) self.similarity_function_params = deepcopy(similarity_function) if similarity_function is None: similarity_function = {} similarity_function['name'] = self.name + '_similarity_function' self.similarity_function = Linear(**similarity_function)
def __init__(self, in_features, n_classes, cutoffs, div_value=4., head_bias=False): super(AdaptiveLogSoftmaxWithLoss, self).__init__() cutoffs = list(cutoffs) if (cutoffs != sorted(cutoffs)) \ or (min(cutoffs) <= 0) \ or (max(cutoffs) >= (n_classes - 1)) \ or (len(set(cutoffs)) != len(cutoffs)) \ or any([int(c) != c for c in cutoffs]): raise ValueError( "cutoffs should be a sequence of unique, positive " "integers sorted in an increasing order, where " "each value is between 1 and n_classes-1") self.in_features = in_features self.n_classes = n_classes self.cutoffs = cutoffs + [n_classes] self.div_value = div_value self.head_bias = head_bias self.shortlist_size = self.cutoffs[0] self.n_clusters = len(self.cutoffs) - 1 self.head_size = self.shortlist_size + self.n_clusters self.head = Linear(self.in_features, self.head_size, bias=self.head_bias) self.tail = ModuleList() for i in range(self.n_clusters): hsz = int(self.in_features // (self.div_value**(i + 1))) osz = self.cutoffs[i + 1] - self.cutoffs[i] projection = Sequential(Linear(self.in_features, hsz, bias=False), Linear(hsz, osz, bias=False)) self.tail.append(projection)
def test_linear_weights(): w = Tensor([[2, 4, 8], [16, 32, 69]]) b = Tensor([0, 0, 0]) x = Tensor([3, 9, 27]) print(w.shape, b.shape) l1 = Linear(2, 3) l1.init_weights(1) w, b = l1.param() print(w.shape, b.shape)
def __init__( self, in_features: int, out_features: int, first: str=False, couple: str=False, dropout_p: float=0.0, init_weight: float='kaiming', init_bias: Union[int, float, str]=-1 ): super().__init__() self.first = first self.couple = couple if first: self.W_H = Linear(in_features, out_features, bias=False, activation=None) self.W_T = Linear(in_features, out_features, bias=False, activation=None) if not couple: self.W_C = Linear(in_features, out_features, bias=False, activation=None) self.R_H = Linear(in_features, out_features, bias=True, activation=None) self.R_T = Linear(in_features, out_features, bias=True, activation=None) if not couple: self.R_C = Linear(in_features, out_features, bias=True, activation=None) for child in self.children(): child.reset_parameters(init_weight, init_bias) self.dropout = RNNDropout(dropout_p)
def build_model(self): self.conv_layers = [] self.linear_layers = [] self.layers = [] # 1x28x28 -> 6x24x24 self.conv_layers += [Conv(1, 6, 5, self.activation)] # 6x24x24 -> 6x12x12 self.conv_layers += [MaxPool_2()] # 6x12x12 -> 16x8x8 self.conv_layers += [Conv(6, 16, 5, self.activation)] # 16x8x8 -> 16x4x4 self.conv_layers += [MaxPool_2()] # 256 -> 120 self.linear_layers += [Linear(16 * 4 * 4, 120, self.activation)] # 120 -> 84 self.linear_layers += [Linear(120, 84, self.activation)] # 84 -> 10 self.linear_layers += [Softmax(84, self.no_of_classes)] self.layers = self.conv_layers + self.linear_layers
def run_mini_example(): x = Tensor([1, 2, 3]) y = Tensor([7, 10]) print(x.shape, y.shape) linear = Linear(x.shape[0], y.shape[0], weight_init='ones') net = Network([linear]) pred = net.forward(x) #loss.backward() print("Pred is ") print(pred)
def test_forward(self): n_in, n_out = 3, 2 bias = np.arange(n_out)[:, np.newaxis] weights = np.arange(n_in * n_out).reshape((n_out, n_in)) layer = Linear(n_in, n_out, weights, bias) x = np.array([[[0], [1], [2]]]) # |0 1 2| |0| |0| | 5| |0| | 5| # |3 4 5| |1| + |1| = |14| + |1| = |15| # |2| # breakpoint() assert np.array_equal(layer.forward(x), [[[5], [15]]]) assert np.array_equal(layer.d_out_d_in, weights)
def linearProcessor(): """ Processor node for linear operation """ inputs, weights, bias = Input(), Input(), Input() f = Linear(inputs, weights, bias) feed_dict = { inputs: [6, 14, 3], weights: [0.5, 0.25, 1.4], bias: 2 } graph = topological_sort(feed_dict) output = forward_pass(f, graph) print(output , "(according to miniflow - linear)")
def __init__(self, input_size, output_size, hiddens, activations, weight_init_fn, bias_init_fn, criterion, lr, momentum=0.0, num_bn_layers=0): # Don't change this --> self.train_mode = True self.num_bn_layers = num_bn_layers self.bn = num_bn_layers > 0 self.nlayers = len(hiddens) + 1 self.input_size = input_size self.output_size = output_size self.activations = activations self.criterion = criterion self.lr = lr self.momentum = momentum # <--------------------- # Don't change the name of the following class attributes, # the autograder will check against these attributes. But you will need to change # the values in order to initialize them correctly # Initialize and add all your linear layers into the list 'self.linear_layers' # (HINT: self.foo = [ bar(???) for ?? in ? ]) # (HINT: Can you use zip here?) #self.linear_layers = [] #self.linear_layers.append(Linear(input_size, output_size, weight_init_fn, bias_init_fn)) input_size_list = [self.input_size]+hiddens output_size_list = hiddens+[self.output_size] weight_init_list = np.repeat(weight_init_fn,self.nlayers) bias_init_list = np.repeat(bias_init_fn,self.nlayers) para_list = list(zip(input_size_list,output_size_list,weight_init_list,bias_init_list)) self.linear_layers = [Linear(para_list[i][0],para_list[i][1],para_list[i][2],para_list[i][3]) for i in range(self.nlayers)] # If batch norm, add batch norm layers into the list 'self.bn_layers' self.bn_layers = [] if self.bn: #self.bn_layers.append(BatchNorm(input_size, alpha=0.9)) self.bn_layers = [BatchNorm(hiddens[i], alpha=0.9) for i in range(num_bn_layers)]
def test_neural_net_tends_to_correct(self): n_in, n_out = 4, 2 np.random.seed(12) weights = np.random.normal(size=(n_out, n_in)) bias = np.zeros(n_out)[:, np.newaxis] nn = NeuralNet(MeanSquaredError(), 1e-2, layers=[Linear(n_in, 2, weights, bias)]) x = np.array([[[-1], [0.5], [-0.33], [0.75]]]) y = np.array([[[-0.5], [0.2]]]) for _ in range(1000): pred = nn.forward(x) loss = nn.compute_loss(pred, y) nn.backward() assert np.isclose(loss, 0)
def __init__(self, n_samples, batch_size, n_bits, fwd_scale_factor, bck_scale_factor, loss_scale_factor, in_features, out_features, lr): self.lin_layer = Linear(n_samples=n_samples, batch_size=batch_size, n_bits=n_bits, fwd_scale_factor=fwd_scale_factor, bck_scale_factor=bck_scale_factor, in_features=in_features, out_features=out_features) self.loss_layer = CrossEntropy(n_samples, out_features, batch_size, n_bits, loss_scale_factor) self.lr = lr self.fwd_scale_factor = fwd_scale_factor self.bck_scale_factor = bck_scale_factor self.loss_scale_factor = loss_scale_factor
def test_neural_net_works_with_batches(self): n_in, n_out = 2, 2 np.random.seed(12) weights = np.random.normal(size=(n_out, n_in)) bias = np.zeros(n_out)[:, np.newaxis] nn = NeuralNet(MeanSquaredError(), 1e-2, layers=[Linear(n_in, 2, weights, bias)]) # batch of 3 x = np.array([[[-1], [0.5]], [[1], [-0.2]], [[-0.33], [0.75]]]) y = x # Why does this take so much longer to converge than the previous one? for _ in range(10000): pred = nn.forward(x) loss = nn.compute_loss(pred, y) nn.backward() assert np.isclose(loss, 0) assert np.all( np.isclose(nn.layers[0].weights, [[1, 0], [0, 1]], atol=1e-3))
) # 加epsilon,避免出现除‘0’的错误 validX = (validX - mu) / (std + np.finfo(np.float32).eps) testX = (testX - mu) / (std + np.finfo(np.float32).eps) #%% 可视化 mnist # https://colah.github.io/posts/2014-10-Visualizing-MNIST/ #%% 创建模型 model = NeuralNetwork() # 将神经网络看成由若干完成特定计算的层组成,数据经过这些层完成前馈运算; # 根据求导链式法则的启示,可以利用误差的反向传播计算代价函数对模型参数的偏导(即梯度)。 # 任务1:实现Relu类中的forward和backward方法 # 任务2:实现Softmax类中的forward方法 model.layers.append(Linear(n_feature, 60, lr)) model.layers.append(Relu()) model.layers.append(Linear(60, 10, lr)) model.layers.append(Softmax()) #%% 训练 # stochastic gradient descent batchsize = 100 trainloss = [] validloss = [] snapshot = [] for i in range(n_iter): # 每一轮迭代前,产生一组新的序号(目的在于置乱数据) idxs = np.random.permutation(trainX.shape[0])
#-*- coding:utf-8 -*- # @Time : 2019/10/30 # @Author : Botao Fan from config import DATA_PATH from linear import Linear from data_prepare import data_prep if __name__ == '__main__': train_idx, train_val, train_y, user_dict, item_dict = data_prep( DATA_PATH, 'ua.base') test_idx, test_val, test_y, _, _ = data_prep(DATA_PATH, 'ua.test', user_dict, item_dict) linear = Linear(param_size=len(user_dict) + len(item_dict), epoch=200) linear.fit(train_idx, train_val, train_y, test_idx, test_val, test_y)
def remove(self, module): raise NotImplementedError('implement remove to sequential!') def forward(self, inputs, outputs): self.outputs = inputs self.real = outputs for layer in self.layers: self.outputs = layer.forward(self.outputs, self.real) return self.outputs def backward(self, *args, **kwargs): self.grad_input = self.outputs for layer in self.layers[::-1]: self.grad_input = layer.backward(self.grad_input) return self.grad_input if __name__ == "__main__": model = Sequential() model.add(Linear(3, 10)) #model.add(Sigmoid(10)) model.add(MSE(10)) X = np.array([[3., 4., 5.]]).T y = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).T for i in xrange(10000): model.forward(X, y) model.backward() print(model.forward(X, y)) #print(model.layers[-1].Loss)
### Generation of the DATA train_input, train_target = generate_disc_set(1000) test_input, test_target = generate_disc_set(1000) #Standarize Data mean, std = train_input.mean(), train_input.std() train_input.sub_(mean).div_(std) test_input.sub_(mean).div_(std) #Convert to Labels so that we can train train_target_hot = conv_to_one_hot(train_target) test_target_hot = conv_to_one_hot(test_target) ### Build the Network hidden_layers = 3 layers = [] linear = Linear(2, 25, bias_init=True) layers.append(linear) layers.append(Relu()) for i in range(hidden_layers - 1): layers.append(Linear(25, 25, bias_init=True)) layers.append(Relu()) layers.append(Tanh()) layers.append(Linear(25, 2, bias_init=True)) model = Sequential(layers) #print model summary print("Model Summary:") print(model) ### Select Parameters to train the model criterion = MSE()
def test_Linear(): T = 5 batch_size = 2 douput = 3 dinput = 4 unit = Linear(dinput, douput) W = unit.get_weights() X = np.random.randn(T, dinput, batch_size) acc_Y = unit.forward(X) wrand = np.random.randn(*acc_Y.shape) loss = np.sum(acc_Y * wrand) dY = wrand dX = unit.backward(dY) dW = unit.get_grads() def fwd(): unit.set_weights(W) h = unit.forward(X) return np.sum(h * wrand) delta = 1e-4 error_threshold = 1e-3 all_values = [X, W] backpropagated_gradients = [dX, dW] names = ['X', 'W'] error_count = 0 for v in range(len(names)): values = all_values[v] dvalues = backpropagated_gradients[v] name = names[v] for i in range(values.size): actual = values.flat[i] values.flat[i] = actual + delta loss_minus = fwd() values.flat[i] = actual - delta loss_plus = fwd() values.flat[i] = actual backpropagated_gradient = dvalues.flat[i] numerical_gradient = (loss_minus - loss_plus) / (2 * delta) if numerical_gradient == 0 and backpropagated_gradient == 0: error = 0 elif abs(numerical_gradient) < 1e-7 and abs(backpropagated_gradient) < 1e-7: error = 0 else: error = abs(backpropagated_gradient - numerical_gradient) / abs(numerical_gradient + backpropagated_gradient) if error > error_threshold: print 'FAILURE!!!\n' print '\tparameter: ', name, '\tindex: ', np.unravel_index(i, values.shape) print '\tvalues: ', actual print '\tbackpropagated_gradient: ', backpropagated_gradient print '\tnumerical_gradient', numerical_gradient print '\terror: ', error print '\n\n' error_count += 1 if error_count == 0: print 'Linear Gradient Check Passed' else: print 'Failed for {} parameters'.format(error_count)
def ReLULayer(name, n_in, n_out, inputs): output = Linear(name + '.Linear', n_in, n_out, inputs) return tf.nn.relu(output)
def LeakyReLULayer(name, n_in, n_out, inputs): output = Linear(name + '.Linear', n_in, n_out, inputs, initialization='he') return LeakyReLU(output)
def LeakyReLULayer(name, n_in, n_out, inputs): output = Linear(name + '.Linear', n_in, n_out, inputs) return LeakyReLU(output)
def main(): ''' Main function. Runs a single training, or 10 trials with default model, loss function and optimizer ''' print('Default run: single training with default net, MSE loss and SGD.') print('Available activation functions: ReLU, tanh.') print( 'Available criteria: "mse" for MSE loss (default), "cross" for cross-entropy loss' ) print( 'Available optimizers: "sgd" for SGD (default), "mom" for SGD + momentum, "adam" for Adam optimization' ) print('Recommended learning rates: ') print('SGD: 1e-2 with MSE loss, 5e-4 with Cross-Entropy loss') print('SGD + Momentum: 1e-3 with MSE loss, 1e-4 with Cross-Entropy loss') print('Adam: 1e-3 \n') # Load default model net = Sequential([ Linear(2, 25), ReLU(), Linear(25, 25), ReLU(), Linear(25, 25), ReLU(), Linear(25, 2) ]) print(net) # Load default criterion and optimizer, with corresponding LR criterion = 'mse' optimizer = 'sgd' eta = 1e-2 # Running mode: 'train' for single training, 'trial' for several trainings mode = 'train' #mode = 'trial' print(f'\n Selected mode: {mode} \n') time.sleep(1) if mode == 'train': print('To visualize data, change flag "plot_data" to True.') print( 'To visualize training loss and predictions, change flag "plot_training" to True.' ) plot_data = True plot_training = True run_train(net, criterion, optimizer, eta, plot_data=plot_data, plot_training=plot_training) elif mode == 'trial': n_trials = 10 trial(net, n_trials=n_trials, input_criterion=criterion, input_optimizer=optimizer, eta=eta, verbose=True) else: raise ValueError( 'Running mode not found. Try "train" for simple train, "trial" for full trial.' )
W1, b1 = Input(), Input() W2, b2 = Input(), Input() # Train dataset X_ = np.reshape(np.array([[-1., -2., -3.], [1., 2., 3.]]), (2, 3)) W1_ = np.random.randn(3, 2) b1_ = np.random.randn(2) W2_ = np.random.randn(2, 1) b2_ = np.random.randn(1) y_ = np.reshape(np.array([[1.], [0.]]), (-1, 1)) # Test dataset X_t_ = np.reshape(np.array([-1., -2.01, -2.8]), (1, 3)) y_t_ = np.array([1.]) l1 = Linear(X, W1, b1) s1 = Sigmoid(l1) l2 = Linear(s1, W2, b2) cost = L2(y, l2) feed_dict = {X: X_, y: y_, W1: W1_, b1: b1_, W2: W2_, b2: b2_} hyper_parameters = [W1, b1, W2, b2] graph = Network.topological_sort(feed_dict) epoch = 1000000 for i in xrange(epoch): Network.forward_propagation(graph) Network.backward_propagation(graph) Update.stochastic_gradient_descent(hyper_parameters, learning_rate=1e-4)
mini_batch_size = 5 X_train, y_train = build_data(1000) #(1000,2) X_test, y_test = build_data(1000) #(1000,2) print( 'Start training with parameters : {0} rounds, {1} epochs and {2} batch size' .format(rounds, epochs, mini_batch_size)) result_rounds = [] #training_losses, training_acc, test_losses, test_acc time1 = time.perf_counter() for i in range(rounds): print("Training round {0} : ".format(i + 1)) model = Sequential(Linear(input_units, hidden_units), ReLU(), Linear(hidden_units, hidden_units), ReLU(), Linear(hidden_units, hidden_units), ReLU(), Linear(hidden_units, output_units), Sigmoid()) #array of shape (rounds,epochs) model_trained, train_loss, train_acc, test_pred, test_loss, test_acc = train_model( model, X_train, y_train, X_test, y_test, epochs, mini_batch_size, lr=0.01, opt='SGD', loss_name='MSE')
def __init__(self, num_dims, num_factors=2, name="Norm Lin"): self.name = name self.num_dims = num_dims self._norm = Normalization(num_dims) self._proj = Linear(num_dims, num_factors=num_factors)
You'll find the results, log and video recordings of your agent every 250k under the corresponding file in the results folder. A good way to monitor the progress of the training is to use Tensorboard. The starter code writes summaries of different variables. To launch tensorboard, open a Terminal window and run tensorboard --logdir=results/ Then, connect remotely to address-ip-of-the-server:6006 6006 is the default port used by tensorboard. """ if __name__ == '__main__': # make env env = gym.make(config.env_name) env = MaxAndSkipEnv(env, skip=config.skip_frame) env = PreproWrapper(env, prepro=greyscale, shape=(80, 80, 1), overwrite_render=config.overwrite_render) # exploration strategy exp_schedule = LinearExploration(env, config.eps_begin, config.eps_end, config.eps_nsteps) # learning rate schedule lr_schedule = LinearSchedule(config.lr_begin, config.lr_end, config.lr_nsteps) # train model model = Linear(env, config) model.run(exp_schedule, lr_schedule)