def main(): n = np.random.randint(2, 20) # Generate an undirected graph graph = np.random.randint(2, size=[n, n]) graph = np.tril(graph, -1) + np.tril(graph, -1).T label = np.random.randint(2) print(graph) print(graph.shape) print('label', label) dim_feature = 10 x = np.zeros([n, dim_feature]) x[:, 0] = 1 W = np.random.normal(0, 0.4, [dim_feature, dim_feature]) A = np.random.normal(0, 0.4, dim_feature) b = np.array([0.]) model = GraphNeuralNetwork(W, A, b, 2) optimizer = SGD(model, lr=0.001) for i in range(500): grads_flat = calc_grads(model, graph, x, label, lossfunc=bce_with_logit, eps=1e-4) outputs = model(graph, x) train_loss = bce_with_logit(outputs, label) optimizer.update(grads_flat) print('step: %d, train_loss: %.15f' % (i, train_loss))
def test_compare_with_Linear(self): in_size = 2 out_size = 3 x = np.random.rand(in_size) # x = np.array([1., 1]) optimizer = SGD(0.1) linear = Linear(in_size, out_size, initialize='zeros') wx = Wx(in_size, out_size, initialize='zeros') plusbias = PlusBias(out_size, initialize='zeros') wxbias = Seq(wx, plusbias) linear_y = linear.forward(x) wxbias_y = wxbias.forward(x) assert_array_equal(linear_y, wxbias_y) dJdy = np.random.rand(out_size) linear_grad = linear.backward(dJdy) wxbias_grad = wxbias.backward(dJdy) assert_array_equal(linear_grad, wxbias_grad) linear.update_weights(optimizer) wxbias.update_weights(optimizer) stack = np.vstack([plusbias.b.get(), wx.W.get().T]).T assert_array_equal(linear.W, stack)
def test_CheckMinibatchTrainerEqualsSimpleTrainer(self): train_set = [(np.random.rand(2), i) for i in xrange(3)] loss = SquaredLoss() epochs = 1 optimizer = SGD(learning_rate=0.01) minibatch_model = Seq([Linear(2, 5, initialize='ones')]) minibatch_trainer = MinibatchTrainer() minibatch_trainer.train_minibatches(minibatch_model, train_set, batch_size=1, loss=loss, epochs=epochs, optimizer=optimizer, shuffle=False) simple_model = Seq([Linear(2, 5, initialize='ones')]) simple_trainer = OnlineTrainer() simple_trainer.train(simple_model, train_set, loss, epochs, optimizer) x = np.random.rand(2) simple_y = simple_model.forward(x) minibatch_y = minibatch_model.forward(x) assert_array_equal(simple_y, minibatch_y)
def test_iris(self): scores = [] for i in range(1): hidden = 50 l1 = Linear(4, hidden, initialize='ones') l2 = Linear(hidden, 3, initialize='ones') l1.W *= 0.000000001 l2.W *= 0.00000001 model = Seq(l1, Sigmoid, l2) loss = CrossEntropyLoss() trainer = OnlineTrainer() losses = trainer.train(model, self.train_set, epochs=100, loss=loss, optimizer=SGD(learning_rate=0.01)) score = loss.test_score(model, self.train_set) print("hidden=%f score=%f" % (hidden, score)) scores.append(score) self.plot_loss_history(losses) plt.show() self.assertGreaterEqual(numpy.mean(scores), 94.)
def __init__(self, input_shape, filters, kernel_size, gpu=False): self.kernel_size = kernel_size self.input_shape = input_shape self.filters = filters input_channels = input_shape[2] self.gpu = gpu if gpu: self.weights = cp.random.random( (kernel_size, kernel_size, input_channels, filters)).astype('float32') self.bias = cp.zeros(self.filters).astype('float32') else: self.weights = np.random.random( (kernel_size, kernel_size, input_channels, filters)).astype('float32') self.bias = np.zeros(self.filters).astype('float32') self.weights_optimizer = SGD() self.bias_optimizer = SGD()
def fit(model): optimizer = SGD(model.parameters(), model.grads(), lr=0.1) losses = [] print('Epoch | Loss') for epoch in range(500): epoch_loss = 0 for b in range(num_batches): batch_input = train_input[b * batch_size:(b + 1) * batch_size] batch_target = train_target[b * batch_size:(b + 1) * batch_size] batch_output = model(batch_input) batch_loss = criterion(batch_output, batch_target) epoch_loss += batch_loss output_grad = criterion.backward() model.backward(output_grad) optimizer.step() optimizer.zero_grad() losses.append(epoch_loss.item() / num_batches) print(f'{epoch+1:>5} | {epoch_loss.item() / num_batches:.5f}') train_output = model(train_input) print( f'\nTrain Error: {sum(train_output.argmax(1) != train_target.argmax(1)).item() / 1000}' ) test_output = model(test_input) print( f'Test Error: {sum(test_output.argmax(1) != test_target.argmax(1)).item() / 1000}' ) return losses
def __init__(self, args): self.device = args.device self.draw_loss = args.draw_loss self.repeat_num = args.repeat_num self.model = args.model self.model_name = self.model.__class__.__name__ self.model_path = self._model_path() self.epochs_SGD = args.epochs_SGD self.epochs_AdamGD = args.epochs_AdamGD self.lr = args.lr self.optimizer_SGD = SGD(lr=self.lr, params=self.model.get_params(), device=self.device) self.optimizer_AdamGD = AdamGD(lr=self.lr, params=self.model.get_params(), device=self.device) self.lr_scheduler = LrScheduler(args.step_size, args.gamma) self.train_loader = args.train_loader self.valid_loader = args.valid_loader
def __train(weight_init_std): #初始化网络结构 bn_network=multi_net_extend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) network=multi_net_extend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std) train_acc_list=[] bn_train_acc_list=[] optimizer=SGD(lr=0.01) per_epoch=50 epoch_cnt = 0 for i in range(1000000000): ##minibatch batch_mask=np.random.choice(train_size,batch_size) x_batch=x_train[batch_mask] t_batch=t_train[batch_mask] ##梯度下降 for _network in (bn_network, network): grads = _network.gradient(x_batch, t_batch) optimizer.update(_network.params, grads) if i %10 == 0: train_acc = network.accuracy(x_train, t_train) bn_train_acc = bn_network.accuracy(x_train, t_train) train_acc_list.append(train_acc) bn_train_acc_list.append(bn_train_acc) print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break return train_acc_list, bn_train_acc_list
def run(self, batch_size=10, learning_rate=0.6, train_set_percentage=1.0, epochs=3): model = Seq( WxBiasLinear(784, 10, initialize='random') # Dropout(0.5) ) train_set_sliced = slice_percentage(self.train_set, train_set_percentage) trainer = MinibatchTrainer() trainer.train_minibatches( model, train_set_sliced, batch_size=batch_size, loss=CrossEntropyLoss(), epochs=epochs, optimizer=SGD(learning_rate=learning_rate), # optimizer=RMSProp(learning_rate=learning_rate, decay_rate=0.6), # optimizer=AdaGrad(learning_rate=learning_rate), show_progress=True) # self.show_mnist_grid(model, self.test_set) # trainer = PatienceTrainer() # trainer.train(model, # train_set_sliced, self.valid_set, self.test_set, # batch_size=batch_size, # loss=CrossEntropyLoss(), # max_epochs=100, # # optimizer=MomentumSGD(learning_rate=learning_rate, momentum=0.5), # optimizer=RMSProp(learning_rate=learning_rate, decay_rate=0.9), # # optimizer=AdaGrad(learning_rate=learning_rate), # test_score_function=self.test_score_fun # ) test_score = CrossEntropyLoss().test_score(model, self.test_set) return { # 'train_score': train_score, 'test_score': test_score, }
def our_fit(model, epochs=500, verbose=False): criterion = MSE() optimizer = SGD(model.parameters(), model.grads(), lr=0.1) start = time() losses = [] if verbose: print('Epoch | Loss') for epoch in range(epochs): epoch_loss = 0 for b in range(num_batches): batch_input = train_input[b * batch_size:(b + 1) * batch_size] batch_target = train_target[b * batch_size:(b + 1) * batch_size] batch_output = model(batch_input) batch_loss = criterion(batch_output, batch_target) epoch_loss += batch_loss output_grad = criterion.backward() model.backward(output_grad) optimizer.step() optimizer.zero_grad() losses.append(epoch_loss.item() / num_batches) if verbose: print(f'{epoch+1:>5} | {epoch_loss.item() / num_batches:.5f}') end = time() train_output = model(train_input) print( f'\nTrain Error: {sum(train_output.argmax(1) != train_target.argmax(1)).item() / len(train_output)}' ) test_output = model(test_input) print( f'Test Error: {sum(test_output.argmax(1) != test_target.argmax(1)).item() / len(test_output)}' ) return losses, end - start
multiclass_accuracy(model_with_reg.predict(train_X[:30]), train_y[:30]) #%% [markdown] # # Допишем код для процесса тренировки #%% from trainer import Trainer, Dataset from optim import SGD from modelGridSearch import search_model model = TwoLayerNet(n_input=train_X.shape[1], n_output=10, hidden_layer_size=100, reg=1e-1) dataset = Dataset(train_X, train_y, val_X, val_y) trainer = Trainer(model, dataset, SGD()) loss_history, train_history, val_history = trainer.fit() #%% dataset = Dataset(train_X, train_y, val_X, val_y) model.predict(dataset.val_X) #%% plt.plot(loss_history) #%% plt.plot(train_history) plt.plot(val_history) #%% [markdown] # # Улучшаем процесс тренировки #
def df(x,y): return x/10.0,y*2 ##初始值给定, init_pos=(-7.0,2.0) params={} params['x']=init_pos[0] params['y']=init_pos[1] grads={} grads['x']=0 grads['y']=0 optimizers=OrderedDict()##有序字典 optimizers['SGD']=SGD(lr=0.95) optimizers['momentum']=momentum(lr=0.1) optimizers['adagrad']=adagrad(lr=1.5) optimizers['Adam']= Adam(lr=0.3) idx=1##图的位置分布 for key in optimizers.keys():##取每个key键值 ##尝试每种优化方法 ##初始化参数 optimizer=optimizers[key] params['x']=init_pos[0] params['y']=init_pos[1] x_history=[] y_history=[] ##定义梯度的来源以及梯度下降过程
def main(args): # How much data to use for training num_train = 20000 # Model architecture hyperparameters. hidden_dim = 16 # Optimization hyperparameters. batch_size = 128 num_epochs = 10 learning_rate = 1e-4 reg = 1.0 ########################################################################### # TODO: Set hyperparameters for training your model. You can change any # # of the hyperparameters above. # ########################################################################### reg = 0 learning_rate = 0.007 num_epochs = 200 hidden_dim = 64 num_train = 1000 batch_size = 16 ########################################################################### # END OF YOUR CODE # ########################################################################### data = load_cifar10(num_train=num_train) train_sampler = DataSampler(data['X_train'], data['y_train'], batch_size) val_sampler = DataSampler(data['X_val'], data['y_val'], batch_size) # Set up the model and optimizer model = TwoLayerNet(hidden_dim=hidden_dim) optimizer = SGD(model.parameters(), learning_rate=learning_rate) stats = { 't': [], 'loss': [], 'train_acc': [], 'val_acc': [], } for epoch in range(1, num_epochs + 1): print(f'Starting epoch {epoch} / {num_epochs}') for i, (X_batch, y_batch) in enumerate(train_sampler): loss, grads = training_step(model, X_batch, y_batch, reg) optimizer.step(grads) if i % args.print_every == 0: print(f' Iteration {i} / {len(train_sampler)}, loss = {loss}') stats['t'].append(i / len(train_sampler) + epoch - 1) stats['loss'].append(loss) print('Checking accuracy') train_acc = check_accuracy(model, train_sampler) print(f' Train: {train_acc:.2f}') val_acc = check_accuracy(model, val_sampler) print(f' Val: {val_acc:.2f}') stats['train_acc'].append(train_acc) stats['val_acc'].append(val_acc) print(f'Saving plot to {args.plot_file}') plot_stats(stats, args.plot_file) print(f'Saving model checkpoint to {args.checkpoint_file}') model.save(args.checkpoint_file)
""" return [x >> i & 1 for i in range(10)] inputs = np.array([ binary_encode(x) for x in range(101, 1024) ]) targets = np.array([ fizz_buzz_encode(x) for x in range(101, 1024) ]) net = NeuralNet([ Linear(input_size=10, output_size=50), Tanh(), Linear(input_size=50, output_size=4) ]) train(net, inputs, targets, num_epochs=5000, optimiser=SGD(lr=0.001)) for x in range(1, 101): predicted = net.forward(binary_encode(x)) predicted_idx = np.argmax(predicted) actual_idx = np.argmax(fizz_buzz_encode(x)) labels = [str(x), "fizz", "buzz", "fizzbuzz"] print(x, labels[predicted_idx], labels[actual_idx])
def smooth_curve(x): """用于使损失函数的图形变圆滑 参考:http://glowingpython.blogspot.jp/2012/02/convolution-with-numpy.html """ window_len = 11 s = np.r_[x[window_len - 1:0:-1], x, x[-1:-window_len:-1]] w = np.kaiser(window_len, 2) y = np.convolve(w / w.sum(), s, mode='valid') return y[5:len(y) - 5] ##定义几种优化optimizer optimizers = OrderedDict() optimizers['SGD'] = SGD() optimizers['momentum'] = momentum() optimizers['adagrad'] = adagrad() optimizers['Adam'] = Adam() ##提取数据 (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) #设置参数,初始化网络 train_size = x_train.shape[0] batch_size = 100 iter_num = 1000 train_loss = {} networks = {}
def main(**kwargs): log = logging.getLogger(__name__) log.info(kwargs) lr = kwargs["lr"] wordWindowSize = kwargs["word_window_size"] startSymbol = kwargs["start_symbol"] endSymbol = kwargs["end_symbol"] numEpochs = kwargs["num_epochs"] encoderSize = kwargs["encoder_size"] batchSize = kwargs["batch_size"] noiseRate = kwargs["noise_rate"] saveModel = kwargs["save_model"] sync = kwargs["sync"] seed = None filters = [] for filterName in kwargs["filters"]: moduleName, className = filterName.rsplit('.', 1) log.info("Usando o filtro: " + moduleName + " " + className) module_ = importlib.import_module(moduleName) filters.append(getattr(module_, className)()) log.info("Reading W2v File") embedding = EmbeddingFactory().createFromW2V(kwargs["word_embedding"], RandomUnknownStrategy()) embedding.meanNormalization() datasetReader = TokenReader(kwargs["train"]) inputGenerator = WordWindowGenerator(wordWindowSize, embedding, filters, startSymbol, endSymbol) if sync: log.info("Loading e pre-processing train data set") trainBatchGenerator = SyncBatchList(datasetReader, [inputGenerator], None, batchSize) else: trainBatchGenerator = AsyncBatchIterator(datasetReader, [inputGenerator], None, batchSize) # We can't stop, because the data set is reading in a asynchronous way # embedding.stopAdd() input = T.lmatrix("window_words") # Window of words embeddingLayer = EmbeddingLayer(input, embedding.getEmbeddingMatrix(), trainable=False) flatten = FlattenLayer(embeddingLayer) # Noise Layer dropoutOutput = DropoutLayer(flatten, noiseRate, seed) # Encoder linear1 = LinearLayer(dropoutOutput, wordWindowSize * embedding.getEmbeddingSize(), encoderSize, weightInitialization=GlorotUniform()) act1 = ActivationLayer(linear1, tanh) # Decoder linear2 = TiedLayer(act1, linear1.getParameters()[0], wordWindowSize * embedding.getEmbeddingSize()) act2 = ActivationLayer(linear2, tanh) # Input of the hidden layer x = flatten.getOutput() # Creates the model mdaModel = Model(input, x, True) sgd = SGD(lr, decay=0.0) prediction = act2 loss = MeanSquaredError().calculateError(act2, prediction, x) log.info("Compiling the model") mdaModel.compile(act2.getLayerSet(),sgd, prediction, loss) cbs = [] if saveModel: writter = DAModelWritter(saveModel, linear1, linear2) cbs.append(SaveModelCallback(writter, "loss", False)) log.info("Traning model") mdaModel.train(trainBatchGenerator, numEpochs, callbacks=cbs)
class Trainer: def __init__(self, args): self.device = args.device self.draw_loss = args.draw_loss self.repeat_num = args.repeat_num self.model = args.model self.model_name = self.model.__class__.__name__ self.model_path = self._model_path() self.epochs_SGD = args.epochs_SGD self.epochs_AdamGD = args.epochs_AdamGD self.lr = args.lr self.optimizer_SGD = SGD(lr=self.lr, params=self.model.get_params(), device=self.device) self.optimizer_AdamGD = AdamGD(lr=self.lr, params=self.model.get_params(), device=self.device) self.lr_scheduler = LrScheduler(args.step_size, args.gamma) self.train_loader = args.train_loader self.valid_loader = args.valid_loader def _model_path(self): if not os.path.exists('checkpoints'): os.mkdir('checkpoints') path = os.path.join('checkpoints', self.model_name) if not os.path.exists(path): os.mkdir(path) return path def one_hot_label(self, label, label_num): one_hot = torch.zeros([len(label), label_num], device=self.device) one_hot[torch.arange(0, len(label), device=self.device), label] = 1 return one_hot def valid(self, epoch): valid_loss = 0. amount_num = 0. correct_num = 0. batch_num = 0. for batch, [inputs, labels] in enumerate(tqdm(self.valid_loader)): batch_num += 1 inputs = inputs.reshape(-1, 1, 28, 28).to(self.device) labels = self.one_hot_label(labels, 10).to(self.device) outputs = self.model.forward(inputs) valid_loss += torch.mean(-(labels * torch.log(outputs)).sum( dim=-1)).cpu().numpy() _outputs = torch.argmax(outputs, dim=-1).cpu().numpy() _labels = torch.argmax(labels, dim=-1).cpu().numpy() amount_num += len(inputs) correct_num += np.sum((_labels == _outputs)) valid_loss /= batch_num accuracy = correct_num / amount_num print('valid epoch:{} loss:{} acc:{}'.format(epoch + 1, valid_loss, accuracy)) return valid_loss def train(self): print('Start training ...') train_losses = [] valid_losses = [] best_loss = 1.e10 for epoch in range(self.epochs_SGD + self.epochs_AdamGD): batch_num = 0. train_loss = 0. for batch, [inputs, labels] in enumerate(tqdm(self.train_loader)): batch_num += 1 inputs = repeat_data(inputs, self.repeat_num).reshape( -1, 1, 28, 28).to(self.device) labels = self.one_hot_label(labels, 10).to(self.device) labels = repeat_data(labels, self.repeat_num).to(self.device) outputs = self.model.forward(inputs) loss = -torch.sum(labels * torch.log(outputs), dim=-1) grads = self.model.backward(loss) if epoch < self.epochs_SGD: self.optimizer_SGD.update_lr(self.lr) params = self.optimizer_SGD.update_params(grads) else: self.optimizer_AdamGD.update_lr(self.lr) params = self.optimizer_AdamGD.update_params(grads) self.model.set_params(params) train_loss += torch.mean(loss).cpu().numpy() train_loss /= batch_num train_losses += [train_loss] print('train epoch:{} loss:{} learning rate:{}'.format( epoch + 1, train_loss, self.lr)) self.lr = self.lr_scheduler.step(self.lr) valid_loss = self.valid(epoch) valid_losses += [valid_loss] is_best = valid_loss < best_loss best_loss = valid_loss if is_best else best_loss state = { 'epoch': epoch, 'state_dict': self.model, 'best_loss': best_loss } save_model(state, is_best, save_dir=self.model_path) print('Finished training ...') np.save('loss.npy', [train_losses, valid_losses]) if self.draw_loss: loss_data = np.load('loss.npy', allow_pickle=True) train_losses = loss_data[0] valid_losses = loss_data[1] fig = plt.figure(figsize=(10, 5)) ax = fig.add_subplot(111) ax.set_title('Loss curve', usetex=True, fontsize=20) ax.set_xlabel('batch', usetex=True, fontsize=20) ax.set_ylabel('loss', usetex=True, fontsize=20) ax.plot([x for x in range(1, len(train_losses) + 1)], train_losses, color='g', label='train loss') ax.plot([x for x in range(1, len(valid_losses) + 1)], valid_losses, color='r', label='valid loss') ax.legend(frameon=False, loc='best') plt.show()
train_data = read_images(os.path.join(args.data_dir, 'train-images.idx3-ubyte')) train_labels = read_labels( os.path.join(args.data_dir, 'train-labels.idx1-ubyte')) test_data = read_images(os.path.join(args.data_dir, 't10k-images.idx3-ubyte')) test_labels = read_labels(os.path.join(args.data_dir, 't10k-labels.idx1-ubyte')) # normalize train_data = (train_data - train_data.mean( (1, 2), keepdims=True)) / train_data.std((1, 2), keepdims=True) test_data = (test_data - test_data.mean( (1, 2), keepdims=True)) / test_data.std((1, 2), keepdims=True) my_net = LeNet() optimizer = SGD(my_net.parameters(), lr, momentum) loss_history = [] epoch_steps = train_data.shape[0] // batch + 1 avg_loss = avg_acc = 0 for e in range(epoch): if e and e % 3 == 0: optimizer.lr *= 0.1 train_loss = train_acc = 0 e_data, e_labels = shuffle(train_data, train_labels) with tqdm(total=epoch_steps) as pbar: for x, t in zip(np.array_split(e_data, epoch_steps),
return [x >> i & 1 for i in range(10)] inputs = np.array([ binary_encode(x) for x in range(101, 1024) ]) targets = np.array([ fizz_buzz_encode(x) for x in range(101, 1024) ]) net = NeuralNet([ Linear(input_size = 10, output_size = 50), Tanh(), Linear(input_size = 50, output_size = 4) ]) train(net, inputs, targets, num_epochs = 5000, optimizer = SGD(lr = 0.001)) for x in range(1, 101): predicted = net.forward(binary_encode(x)) predicted_idx = np.argmax(predicted) actual_idx = np.argmax(fizz_buzz_encode(x)) labels = [str(x), "fizz", "buzz", "fizzbuzz"] print(x, labels[predicted_idx], labels[actual_idx])