def testLineExample(): "example which decodes a RNN output of a text line. Taken from IAM dataset. RNN output produced by TensorFlow model." # chars of IAM dataset classes = ' !"#&\'()*+,-./0123456789:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' # matrix containing TxC RNN output. C=len(classes)+1 because of blank label. mat = softmax(loadRNNOutput('../data/line/rnnOutput.csv')) # language model: used for token passing (word list) and beam search (char bigrams) lm = LanguageModel.LanguageModel('../data/line/corpus.txt', classes) # decode RNN output with different decoding algorithms gt = 'the fake friend of the family, like the' print('TARGET :', '"' + gt + '"') print('BEST PATH :', '"' + BestPath.ctcBestPath(mat, classes) + '"') print('PREFIX SEARCH :', '"' + PrefixSearch.ctcPrefixSearchHeuristicSplit(mat, classes) + '"') print('BEAM SEARCH :', '"' + BeamSearch.ctcBeamSearch(mat, classes, None) + '"') print('BEAM SEARCH LM:', '"' + BeamSearch.ctcBeamSearch(mat, classes, lm) + '"') print( 'TOKEN :', '"' + TokenPassing.ctcTokenPassing(mat, classes, lm.getWordList()) + '"') print('PROB(TARGET) :', Loss.ctcLabelingProb(mat, gt, classes)) print('LOSS(TARGET) :', Loss.ctcLoss(mat, gt, classes))
def __init__(self, input_size, layers=[], loss_type="mse", num_layers=None, print_loss=False): """ Initializing the attributes of Neural Layer: input_size : size of a training example layers : Layers which are to be used in sequence (Class of layer must be Layer) loss_type : type of loss you want to use ("mse", "binary_crossentropy", ---) num_layers : Number of layers in Neural Network (Not compulsary) """ self.layers = { "layer" + str(l + 1): layers[l] for l in range(len(layers)) } self.loss = Loss(loss_type) self.input_size = input_size self.num_layers = len(layers) self.print_loss = print_loss self.layers["layer1"].weights = np.random.randn( self.layers["layer1"].units, input_size) * 0.01 for l in range(1, self.num_layers): self.layers["layer" + str(l + 1)].weights = np.random.randn( self.layers["layer" + str(l + 1)].units, self.layers["layer" + str(l)].units) * 0.01
def train(network, X, y): """ Train your network on a given batch of X and y. You first need to run forward to get all layer activations. You can estimate loss and loss_grad, obtaining dL / dy_pred Then you can run layer.backward going from last layer to first, propagating the gradient of input to previous layers. After you called backward for all layers, all Dense layers have already made one gradient step. """ # Get the layer activations layer_activations = forward(network, X) # создаем новый список из вх д ко всем слоям - к лэйерс акт приб входной вектор layer_inputs = [X] + layer_activations # layer_input[i] is an input for network[i] # рез вып-я нейросети logits = layer_activations[-1] # Compute the loss and the initial gradient софтмаксом loss = Loss.softmax_crossentropy_with_logits(logits, y) loss_grad = Loss.grad_softmax_crossentropy_with_logits(logits, y) # propagate gradients through network layers using .backward # hint: start from last layer and move to earlier layers # надо осуществить передачу от верхнего слоя к нижнему (бэк) - начнем с последнего for i in range(len(network) - 1, -1, -1): loss_grad = network[i].backward(layer_inputs[i], loss_grad) return np.mean(loss)
def build(self,output_type): #### set up parameter self.params+=[self.W_hy, self.b_hy] for param in self.params: self.updates[param] = theano.shared( value = np.zeros( param.get_value( borrow = True).shape, dtype = theano.config.floatX), name = 'updates') ### set up regularizer self.L1 += T.sum(abs(self.W_hy)) self.L2_sqr += T.sum(self.W_hy**2) ### fianl prediction formular self.y_pred = T.dot(self.get_output(), self.W_hy) + self.b_hy self.output_type = output_type if self.output_type == 'real': self.y = T.matrix(name = 'y', dtype = theano.config.floatX) self.loss = lambda y: Loss.mse(self.y_pred,y) # y is input and self.mse(y) is output self.predict = theano.function(inputs = [self.x, ], outputs = self.y_pred, mode = mode) elif self.output_type == 'binary': self.y = T.matrix(name = 'y', dtype = 'int32') self.p_y_given_x = T.nnet.sigmoid(self.y_pred) self.y_out = T.round(self.p_y_given_x) # round to {0,1} self.loss = lambda y: Loss.nll_binary(self.p_y_given_x,y) self.predict_proba = theano.function(inputs = [self.x, ], outputs = self.p_y_given_x, mode = mode) self.predict = theano.function(inputs = [self.x, ], outputs = T.round(self.p_y_given_x), mode = mode) elif self.output_type == 'softmax': self.y = T.vector(name = 'y', dtype = 'int32') self.p_y_given_x = T.nnet.softmax(self.y_pred) self.y_out = T.argmax(self.p_y_given_x, axis = -1) self.loss = lambda y: Loss.nll_multiclass(self.p_y_given_x,y) self.predict_proba = theano.function(inputs = [self.x, ], outputs = self.p_y_given_x, mode = mode) self.predict = theano.function(inputs = [self.x, ], outputs = self.y_out, # y-out is calculated by applying argmax mode = mode) else: raise NotImplementedError
def eval(model, criterion, valid_data): stats = Loss.Statistics() model.eval() loss = Loss.LossCompute(model.generator, criterion) for src, tgt in valid_data: src, tgt, src_lengths = prepare_data(src, tgt, True) outputs = model(src, tgt[:-1], src_lengths) gen_state = loss.make_loss_batch(outputs, tgt[1:]) _, batch_stats = loss.compute_loss(**gen_state) stats.update(batch_stats) model.train() return stats
def train_vae(model, train_iter, valid_iter, tgtvocab, optim): #train_iter = make_train_data_iter(train_data, opt) #valid_iter = make_valid_data_iter(valid_data, opt) train_loss = Loss.VAELoss(model.generator, tgtvocab) valid_loss = Loss.VAELoss(model.generator, tgtvocab) if use_gpu(opt): train_loss = train_loss.cuda() valid_loss = valid_loss.cuda() trunc_size = opt.truncated_decoder # Badly named... default=0 shard_size = opt.max_generator_batches #default=32 trainer = Trainer.VaeTrainer(model, train_iter, valid_iter, train_loss, valid_loss, optim) for epoch in range(opt.start_epoch, opt.epochs + 1): print('') # 1. Train for one epoch on the training set. train_stats = trainer.train(epoch, report_func) print('Train perplexity: %g' % train_stats.ppl()) print('Train accuracy: %g' % train_stats.accuracy()) # 2. Validate on the validation set. valid_stats = trainer.validate() print('Validation perplexity: %g' % valid_stats.ppl()) print('Validation accuracy: %g' % valid_stats.accuracy()) # 3. Log to remote server. if opt.exp_host: train_stats.log("train", experiment, optim.lr) valid_stats.log("valid", experiment, optim.lr) # 4. Update the learning rate trainer.epoch_step(valid_stats.ppl(), epoch) # 5. Drop a checkpoint if needed. if epoch >= opt.start_checkpoint_at: trainer.drop_checkpoint(opt, epoch, valid_stats) train_loss.VAE_weightaneal(epoch) valid_loss.VAE_weightaneal(epoch) model.encoder.Varianceanneal()
def define_criterion(self, name): if name.lower() == 'bce+dice': self.criterion = Loss.BCE_Dice() elif name.lower() == 'dice': self.criterion = Loss.DiceLoss() elif name.lower() == 'bce': self.criterion = nn.BCEWithLogitsLoss() elif name.lower() == 'robustfocal': self.criterion = Loss.RobustFocalLoss2d() elif name.lower() == 'lovasz-hinge' or name.lower() == 'lovasz': self.criterion = Loss.Lovasz_Hinge(per_image=True) elif name.lower() == 'bce+lovasz': self.criterion = Loss.BCE_Lovasz(per_image=True) else: raise NotImplementedError( 'Loss {} is not implemented'.format(name))
def testMiniExample(): "example which shows difference between taking most probable path and most probable labeling. No language model used." # chars and input matrix classes = 'ab' mat = np.array([[0.4, 0, 0.6], [0.4, 0, 0.6]]) # decode gt = 'a' print('TARGET :', '"' + gt + '"') print('BEST PATH :', '"' + BestPath.ctcBestPath(mat, classes) + '"') print('PREFIX SEARCH:', '"' + PrefixSearch.ctcPrefixSearch(mat, classes) + '"') print('BEAM SEARCH :', '"' + BeamSearch.ctcBeamSearch(mat, classes, None) + '"') print('TOKEN :', '"' + TokenPassing.ctcTokenPassing(mat, classes, ['a', 'b', 'ab', 'ba']) + '"') print('PROB(TARGET) :', Loss.ctcLabelingProb(mat, gt, classes)) print('LOSS(TARGET) :', Loss.ctcLoss(mat, gt, classes))
def build(self): ### set up parameters self.params+=[self.W_hi, self.b_hi, self.Wemb] ''' for param in self.params: self.updates[param] = theano.shared( value = np.zeros( param.get_value( borrow = True).shape, dtype = theano.config.floatX), name = 'updates') ''' ### set up regularizer self.L1 += T.sum(abs(self.W_hy)) self.L2_sqr += T.sum(self.W_hy**2) ### fianl prediction formular self.y_pred = T.dot(self.get_output(), self.Wemb.T) y_p = self.y_pred y_p_m = T.reshape(y_p, (y_p.shape[0] * y_p.shape[1], -1)) y_p_s = T.nnet.softmax(y_p_m) self.p_y_given_x = T.reshape(y_p_s, y_p.shape) self.loss = lambda y,y_mask: Loss.nll_multiclass(self.p_y_given_x,y,y_mask)
def build(self,output_type): self.params+=[self.W_hy, self.b_hy,self.W_hi, self.b_hi] for param in self.params: self.updates[param] = theano.shared( value = np.zeros( param.get_value( borrow = True).shape, dtype = theano.config.floatX), name = 'updates') ### set up regularizer self.L1 += T.sum(abs(self.W_hy)) self.L2_sqr += T.sum(self.W_hy**2) ### fianl prediction formular #self.y = T.vector(name = 'y', dtype = 'int32') self.y_pred = T.dot(self.get_output(), self.W_hy) + self.b_hy y_p = self.y_pred y_p_m = T.reshape(y_p, (y_p.shape[0] * y_p.shape[1], -1)) y_p_s = T.nnet.softmax(y_p_m) self.p_y_given_x = T.reshape(y_p_s, y_p.shape) self.loss = lambda y: Loss.nll_multiclass(self.p_y_given_x,y)
def build(self, output_type): self.params += [self.W_hy, self.b_hy, self.W_hi, self.b_hi] for param in self.params: self.updates[param] = theano.shared( value=np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX), name='updates') ### set up regularizer self.L1 += T.sum(abs(self.W_hy)) self.L2_sqr += T.sum(self.W_hy**2) ### fianl prediction formular #self.y = T.vector(name = 'y', dtype = 'int32') self.y_pred = T.dot(self.get_output(), self.W_hy) + self.b_hy y_p = self.y_pred y_p_m = T.reshape(y_p, (y_p.shape[0] * y_p.shape[1], -1)) y_p_s = T.nnet.softmax(y_p_m) self.p_y_given_x = T.reshape(y_p_s, y_p.shape) self.loss = lambda y: Loss.nll_multiclass(self.p_y_given_x, y)
def build(self): ### set up parameters self.params += [self.W_hi, self.b_hi, self.Wemb] """ for param in self.params: self.updates[param] = theano.shared( value = np.zeros( param.get_value( borrow = True).shape, dtype = theano.config.floatX), name = 'updates') """ ### set up regularizer self.L1 += T.sum(abs(self.W_hy)) self.L2_sqr += T.sum(self.W_hy ** 2) ### fianl prediction formular self.y_pred = T.dot(self.get_output(), self.Wemb.T) y_p = self.y_pred y_p_m = T.reshape(y_p, (y_p.shape[0] * y_p.shape[1], -1)) y_p_s = T.nnet.softmax(y_p_m) self.p_y_given_x = T.reshape(y_p_s, y_p.shape) self.loss = lambda y, y_mask: Loss.nll_multiclass(self.p_y_given_x, y, y_mask)
def initialize(self,opt): model_wrapper.initialize(self,opt) self.netG = network.create_G(opt.input_chan).to(self.device) # input_channel ,K = 64 ,downsample_num = 6 # dpwmsample_num can be add in opt.downsample_num if 'train' in self.opt.mode : # train mean we definitely need a D and loss/or new loss? # opt.mode = continue train / train / load pre train # continue train means go on training by from the last epoch # train mean first time train # load pre train mean do noting , just load the D and G with params # in train mode no matter how we need a D except we are in test/eval mode self.netD = network.create_D(opt.input_chan*2).to(self.device) # cat the input and target into patchGAN # so far we only have 3 channel # input_channel,K = 64,n_layers = 4 pretrain_path = '' if self.opt.mode == 'train' else self.save_dir # load model from the checkpoint # something like ./checkpoint/modename/10_net_G.pth self.load_network(self.netD, 'D', opt.which_epoch, pretrain_path) self.load_network(self.netG, 'G', opt.which_epoch, pretrain_path) # TBD # self.loss_filter = self.init_loss_filter(not opt.no_ganFeat_loss, not opt.no_vgg_loss) # TBD # we have gan loss and dis fake / real loss and vgg loss self.l1loss = nn.L1Loss() self.vggloss = Loss.pixHDversion_perceptual_loss(opt.gpu_ids) self.TVloss = Loss.TVLoss() self.GANloss = Loss.GANLoss(device = self.device,lsgan=opt.lsgan) self.optimizer_G = torch.optim.Adam(list(self.netG.parameters()),lr=opt.learningrate,betas=(0.9, 0.999)) self.optimizer_D = torch.optim.Adam(list(self.netD.parameters()),lr=opt.learningrate,betas=(0.9, 0.999)) print('---------- Networks initialized -------------') print('---------- NET G -------------') print(self.netG) print('---------- NET D -------------') print(self.netD) # remain to push the model to cuda if avliable elif 'test' in self.opt.mode : self.load_network(self.netG, 'G', opt.which_epoch, self.save_dir) else: print('mode error,this would create a empty netG without pretrain params')
def initialize_loss(self, model: Model) -> Callable: loss = Loss( model=model, kl_scale=self.kl_scale, n_samples=self.n_samples, stochastic_linearization=self.stochastic_linearization, ) return loss.nelbo_fsvi_classification
def loss_test(): # input=[batch, 3],表示每个batch中对每个类的预测值,总共有3类数据 x_input = torch.randn(6, 3, requires_grad=True) x_input_numpy = x_input.detach().numpy() print('x_input: ', x_input) # print('x_input_numpy: ',x_input_numpy) # target=[batch] 表示每个数据分类的标签 # y_target = torch.tensor([[1,0,0,0,0],[0,0,1,0,0],[0,0,0,1,0],[1,0,0,0,0],[0,0,0,0,1]]) y_target = torch.tensor([0, 2, 1, 0, 1, 2]) y_target_numpy = y_target.detach().numpy() print('y_target: ', y_target) print('y_target_numpy: ', y_target_numpy) # 初始化LogSoftmax类 logsoftmax_tensor = nn.LogSoftmax(dim=1) logsoftmax_output_tensor = logsoftmax_tensor(x_input) logsoftmax_numpy = Logsoftmax.Logsoftmax() logsoftmax_output_numpy = logsoftmax_numpy.forward(x_input_numpy) print('-----对比输出-----') print('logsoftmax_output_tensor: \n', logsoftmax_output_tensor) print('logsoftmax_output_numpy: \n', logsoftmax_output_numpy) loss_tensor = nn.NLLLoss() cross_entropy_loss_output_tensor = loss_tensor(logsoftmax_output_tensor, y_target) cross_entropy_loss_output_tensor.backward() loss_grad_tensor = x_input.grad loss_numpy = Loss.NLLLoss() cross_entropy_loss_output_numpy = loss_numpy.cal_loss( logsoftmax_output_numpy, y_target_numpy) eta = loss_numpy.gradient() print('eta: \n', eta) loss_grad_numpy = logsoftmax_numpy.gradient(eta) print('-----前向传播-----') print('cross_entropy_loss_output_tensor: \n', cross_entropy_loss_output_tensor) print('cross_entropy_loss_output_numpy: \n', cross_entropy_loss_output_numpy) print('-----反向传播-----') print('loss_grad_tensor: \n', loss_grad_tensor) print('loss_grad_numpy: \n', loss_grad_numpy) ''' # softmax (成功) softmax_func = nn.Softmax(dim=1) softmax_output = softmax_func(x_input) print('softmax_output: \n',softmax_output) softmax_output_numpy = softmax_numpy.predict(x_input_numpy) print('softmax_output_numpy: \n',softmax_output_numpy) ''' '''
def buile_model(style_img, content_img, cnn=vgg, style_weight=1000, content_weight=1, content_layers=content_layers_default, style_layers=style_layers_default): content_loss_list = [] style_loss_list = [] model = nn.Sequential() model = model.to(device) gram = Loss.Gram() gram = gram.to(device) i = 1 for layer in cnn: if isinstance(layer, nn.Conv2d): name = 'conv_' + str(i) model.add_module(name, layer) if name in content_layers: target = model(content_img) content_loss = Loss.Content_Loss(target, content_weight) model.add_module('content_loss_' + str(i), content_loss) content_loss_list.append(content_loss) if name in style_layers: target = model(style_img) target = gram(target) style_loss = Loss.Style_Loss(target, style_weight) model.add_module('style_loss_' + str(i), style_loss) style_loss_list.append(style_loss) i += 1 if isinstance(layer, nn.MaxPool2d): name = 'pool_' + str(i) model.add_module(name, layer) if isinstance(layer, nn.ReLU): name = 'relu' + str(i) model.add_module(name, layer) return model, style_loss_list, content_loss_list
def define_criterion(self, name): if name.lower() == 'bce+dice': self.criterion = Loss.BCE_Dice() elif name.lower() == 'dice': self.criterion = Loss.DiceLoss() elif name.lower() == 'bce': # self.criterion = nn.BCEWithLogitsLoss() # self.criterion = nn.CrossEntropyLoss(size_average=False, reduction='sum') self.criterion = nn.CrossEntropyLoss() # self.criterion = Loss.CE_SOFT() elif name.lower() == 'robustfocal': self.criterion = Loss.RobustFocalLoss2d() elif name.lower() == 'lovasz-hinge' or name.lower() == 'lovasz': self.criterion = Loss.Lovasz_Hinge(per_image=True) elif name.lower() == 'bce+lovasz': self.criterion = Loss.BCE_Lovasz(per_image=True) else: raise NotImplementedError( 'Loss {} is not implemented'.format(name))
def train(epoch): for epo in range(epoch): mioub, mioup = [], [] for ind, (_, mhi, rois, tars) in enumerate(train_loader): t1 = time.time() outs = basemodel(mhi.to('cuda')) tt = outs[3] try: ROIfs, Tars, ROIs = ROIPooling(rois=rois.numpy(), features=tt, target=tars) sum_loss, slx, sly, slw, slh, ioubs, ioups, tarabss, preabss = [], [], [], [], [], [], [], [], [] pre_tars = subnet(ROIfs) lx, ly, lw, lh, ioub, ioup, loss, tarabs, preabs = Loss.loss( pre_tars, Tars.to('cuda'), ROIs) sum_loss.append(loss) slx.append(lx) sly.append(ly) slw.append(lw) slh.append(lh) ioubs.append(ioub) ioups.append(ioup) tarabss.append(tarabs) preabss.append(preabs) sum(sum_loss).backward() torch.nn.utils.clip_grad_norm_(subnet.parameters(), 100) mlx, mly, mlw, mlh, ioub, ioup, tarabs, preabs = \ sum(slx)/len(slx), sum(sly)/len(sly), \ sum(slw)/len(slw), sum(slh)/len(slh), sum(ioubs)/len(ioubs), sum(ioups) / len(ioups),\ sum(tarabss)/len(tarabss), sum(preabss)/len(preabss)# sum(map(float, sum_loss))/len(sum_loss), optimizer.step() except IOError: print(IOError) print('train step have bugs, goto next step') # continue t2 = time.time() mioub.append(ioub) mioup.append(ioup) timep = (t2 - t1) print('epoch:', epo, 'step:', ind, 'lx:{:.4f}'.format(mlx), 'ly:{:.4f}'.format(mly), 'lw:{:.4f}'.format(mlw), 'lh:{:.4f}'.format(mlh), 'ioub:{:.4f}'.format(ioub), 'ioup:{:.4f}'.format(ioup), 'mioub:{:.4f}'.format(sum(mioub) / len(mioub)), 'mioup:{:.4f}'.format(sum(mioup) / len(mioup)), 'tarabs:{:.4f}'.format(tarabs), 'preabs:{:.4f}'.format(preabs), 'time:{:.4f}'.format(timep)) logger.write(['lx', 'ly', 'lw', 'lh', 'diff'], [mlx, mly, mlw, mlh, tarabs - preabs]) if ind % 10 == 0: logger.savetomat() torch.save(subnet.state_dict(), './checkpoint/subnet{}.pt'.format(epo))
def ctcLexiconSearch(mat, classes, lm): "compute approximation with best path decoding, search most similar words in dictionary, calculate score for each of them, return best scoring one. See Shi, Bai and Yao." # use best path decoding to get an approximation approx = BestPath.ctcBestPath(mat, classes) # search words with minimal edit-distance to the approximation (speed-up possible by using BK-tree data-structure) keepBest = 10 dist = [(w, editdistance.eval(approx, w)) for w in lm.getWordList()] # edit-distance of words to the recognized word from best path decoding dist = sorted(dist, key=lambda x: x[1])[:keepBest] # keep 10 best words w.r.t. edit-distance # for each word candidate, calculate probability and keep best-scoring word probs = [(entry[0], Loss.ctcLabelingProb(mat, entry[0], classes)) for entry in dist] probs = sorted(probs, key=lambda x: x[1], reverse=True) return probs[0][0]
def ctcLexiconSearch(mat, classes, bkTree, tolerance): "compute approximation with best path decoding, search most similar words in dictionary, calculate score for each of them, return best scoring one. See Shi, Bai and Yao." # use best path decoding to get an approximation approx = BestPath.ctcBestPath(mat, classes) # get similar words from dictionary within given tolerance words = bkTree.query(approx, tolerance) # if there are no similar words, return empty string if not words: return '' # else compute probabilities of all similar words and return best scoring one wordProbs = [(w, Loss.ctcLabelingProb(mat, w, classes)) for w in words] wordProbs.sort(key=lambda x: x[1], reverse=True) return wordProbs[0][0]
def __init__(self, lr=0.005, fold=None, val_mode='max', criterion_name='lovasz', comment=''): super(SegmentationNetwork, self).__init__() self.lr = lr self.fold = fold self.scheduler = None self.best_model_path = None self.epoch = 0 self.val_mode = val_mode if criterion_name == 'lovasz': self.criterion = Loss.Lovasz_Hinge(per_image=True) if self.val_mode == 'max': self.best_metric = -np.inf elif self.val_mode == 'min': self.best_metric = np.inf self.comment = comment self.train_log = dict(loss=[], iou=[], mAP=[]) self.val_log = dict(loss=[], iou=[], mAP=[]) self.create_save_folder()
def Train(self, Xtr, Ytr): # Forward pass # Xtr : 10000x3072 # init weights 3072x10 weights = 0.001 * np.random.randn(Xtr.shape[1], CLASS) # print(Xtr[1]) # bias 1X10 # bias = 0.01 * np.random.randn(1, CLASS) for epoch in range(30): loss, dw = Loss.softmax(Xtr, weights, Ytr, self.Yhop) #LOSS, DW = sample.softmax_loss_vectorized(weights, Xtr, Ytr) # backpropagation # weights = Loss.softmax_bp(Xtr, weights, Fmatrix, self.Yhop) weights -= 1e-2 * dw print("epoch: ", epoch) print("loss: ", loss) #print("dw: ", dw) return weights
def sigmoid_test(): x_numpy = np.random.randn(6,1).astype(np.float32) x = torch.tensor(x_numpy, requires_grad=True) y_target_numpy = np.array([0,0,1,0,1,1]).astype(np.float32) # target要是浮点数 y_target = torch.tensor(y_target_numpy) loss_tensor = nn.BCELoss() loss_numpy = Loss.BECLoss() """前向传播""" s_tensor = nn.Sigmoid() s_out_tensor = s_tensor(x).view(-1) s_numpy = Activators.Sigmoid_CE() s_out_numpy = s_numpy.forward(x_numpy).reshape(-1) print('-----对比输出-----') print('s_out_tensor: \n', s_out_tensor) print('s_out_tensor shape: \n', s_out_tensor.shape) print('s_out_numpy: \n', s_out_numpy) print('s_out_numpy shape: \n', s_out_numpy.shape) print('s_out_error: \n', s_out_numpy-s_out_tensor.detach().numpy()) """反向传播""" err_tensor = loss_tensor(s_out_tensor, y_target) err_numpy = loss_numpy.forward(s_out_numpy, y_target_numpy) err_tensor.backward() x_grad = x.grad dy_loss = loss_numpy.gradient() x_grad_numpy = s_numpy.gradient(dy_loss) print('-----对比loss-----') print('err_tensor: \n', err_tensor) print('err_numpy: \n', err_numpy) print('err_error: \n', err_numpy-err_tensor.detach().numpy()) print('-----对比x_grad-----') print('x_grad: \n', x_grad) print('x_grad_numpy: \n', x_grad_numpy) print('x_grad_error: \n', x_grad_numpy-x_grad.detach().numpy())
def main(): net = YoloNet.YOLONet() criterion = Loss.MyLoss() if input('Do you want to load network?').upper() == 'N': optimizer = optim.Adam(net.parameters(), lr=1e-4) train_data = Dataset.DetectionDataSet() trainer = Trainer.Trainer(net=net, data_set=train_data, optimizer=optimizer, criterion=criterion) trainer.train() else: net.load_state_dict(torch.load('yolo_cpu.pt')) test_data = Dataset.DetectionDataSet(paths='numpy_test/paths.txt', label_dir='numpy_test/', root_dir='test/') tester = Tester.Tester(net=net, test_criterion=criterion, data_set=test_data) tester.test()
def loss_test(): # input=[batch, 3],表示每个batch中对每个类的预测值,总共有3类数据 x_input = torch.randn(5, 3, requires_grad=True) x_input_numpy = x_input.detach().numpy() print('x_input: ', x_input) # print('x_input_numpy: ',x_input_numpy) # target=[batch] 表示每个数据分类的标签 y_target_t = torch.tensor(5, dtype=torch.long).random_(3) print(y_target_t) y_target = torch.tensor([0, 2, 1, 0, 1]) y_target_numpy = y_target.detach().numpy() print('y_target: ', y_target) print('y_target_numpy: ', y_target_numpy) # 初始化Softmax类 softmax_numpy = Loss.Softmax(x_input_numpy.shape) ''' # softmax (成功) softmax_func = nn.Softmax(dim=1) softmax_output = softmax_func(x_input) print('softmax_output: \n',softmax_output) softmax_output_numpy = softmax_numpy.predict(x_input_numpy) print('softmax_output_numpy: \n',softmax_output_numpy) ''' # softmax+cross entropy (成功) cross_entropy_loss = nn.CrossEntropyLoss() output = cross_entropy_loss(x_input, y_target) eta = output.backward() print('cross_entropy_output: \n', output) print('grad_output: \n', x_input.grad) cross_entropy_loss_numpy = softmax_numpy.cal_loss(x_input_numpy, y_target_numpy) eta_numpy = softmax_numpy.gradient_with_loss() print('cross_entropy_loss_numpy: \n', cross_entropy_loss_numpy) print('grad_output_numpy: \n', eta_numpy)
def __init__(self, config, train_loader, valid_loader, test_loader): # Data loader self.train_loader = train_loader self.valid_loader = valid_loader self.test_loader = test_loader # Models self.unet = None self.optimizer = None self.scheduler = None self.img_ch = config.img_ch self.output_ch = config.output_ch # Losses self.criterion = Loss.DiceLoss() # Hyper-parameters self.lr = config.lr self.beta1 = config.beta1 self.beta2 = config.beta2 # Training settings self.num_epochs = config.num_epochs self.num_epochs_test = config.num_epochs_test self.batch_size = config.batch_size # Path self.model_path = config.model_path self.train_result_path = config.train_result_path self.val_result_path = config.val_result_path self.test_result_path = config.test_result_path self.mode = config.mode self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.model_type = config.model_type self.build_model()
def train(self): path = "./dataset/" + self.dataset data = readData(path, self.train2id, self.year2id, self.step_list, self.headRelation2Tail, self.tailRelation2Head, self.headTail2Relation, self.nums) #read training data self.Triples = data.out() self.numOfTrainTriple = self.nums[0] self.numOfEntity = self.nums[1] self.numOfRelation = self.nums[2] self.numOfTime = self.nums[3] self.numOfMaxLen = self.nums[4] self.readValidateTriples(path) self.readTestTriples(path) self.model = MPKE(self.numOfEntity, self.numOfRelation, self.numOfTime, self.numOfMaxLen, self.entityDimension, self.relationDimension, self.norm, self.norm_m, self.hyper_m) #init the model #self.preRead() self.model.to(self.device) #self.test() Margin_Loss_D = Loss.double_marginLoss() #Margin_Loss_H = Loss.marginLoss() #Margin_Loss_S = Loss.sigmoidLoss() optimizer = optim.Adam(self.model.parameters(), lr=self.learningRate) Dataset = dataset(self.numOfTrainTriple) batchsize = int(self.numOfTrainTriple / self.numOfBatches) dataLoader = DataLoader(Dataset, batchsize, True) Log_path = "./dataset/" + self.dataset + "/" + str( self.learningRate) + "_MP_" + "log.txt" Log = open(Log_path, "w") for epoch in range(self.numOfEpochs): epochLoss = 0 for batch in dataLoader: self.positiveBatch = {} self.corruptedBatch = {} generateBatches(batch, self.train2id, self.step_list, self.positiveBatch, self.corruptedBatch, self.numOfEntity, self.numOfRelation, self.headRelation2Tail, self.tailRelation2Head, self.headTail2Relation, self.ns) optimizer.zero_grad() positiveBatchHead = self.positiveBatch["h"].to(self.device) positiveBatchRelation = self.positiveBatch["r"].to(self.device) positiveBatchTail = self.positiveBatch["t"].to(self.device) positiveBatchTime = self.positiveBatch["time"].to(self.device) positiveBatchStep = self.positiveBatch["step"].to(self.device) corruptedBatchHead = self.corruptedBatch["h"].to(self.device) corruptedBatchRelation = self.corruptedBatch["r"].to( self.device) corruptedBatchTail = self.corruptedBatch["t"].to(self.device) corruptedBatchTime = self.corruptedBatch["time"].to( self.device) corruptedBatchStep = self.corruptedBatch["step"].to( self.device) positiveScore, negativeScore = self.model( positiveBatchHead, positiveBatchRelation, positiveBatchTail, positiveBatchTime, positiveBatchStep, corruptedBatchHead, corruptedBatchRelation, corruptedBatchTail, corruptedBatchTime, corruptedBatchStep) ent_embeddings = self.model.entity_embeddings( torch.cat([ positiveBatchHead, positiveBatchTail, corruptedBatchHead, corruptedBatchTail ])) rel_embeddings = self.model.relation_embeddings( torch.cat([positiveBatchRelation, corruptedBatchRelation])) loss = Margin_Loss_D(positiveScore, negativeScore, self.margin_triple) time_embeddings = self.model.time_embeddings(positiveBatchTime) step_embeddings = self.model.step_embeddings(positiveBatchStep) batchLoss = loss + Loss.normLoss( time_embeddings) + Loss.normLoss(step_embeddings) batchLoss.backward() optimizer.step() epochLoss += batchLoss print("epoch " + str(epoch) + ": , loss: " + str(epochLoss)) if epoch % 20 == 0 and epoch != 0: Log.write("epoch " + str(epoch) + ": , loss: " + str(epochLoss)) meanRank_H, Hits10_H = self.model.Validate_entity_H( validateHead=self.testHead.to(self.device), validateRelation=self.testRelation.to(self.device), validateTail=self.testTail.to(self.device), validateTime=self.testTime.to(self.device), validateStepH=self.testStepH.to(self.device), trainTriple=self.Triples.to(self.device), numOfvalidateTriple=self.numOfTestTriple) print("mean rank H_2_2_.1_nonorm: " + str(meanRank_H)) meanRank_T, Hits10_T = self.model.Validate_entity_T( validateHead=self.testHead.to(self.device), validateRelation=self.testRelation.to(self.device), validateTail=self.testTail.to(self.device), validateTime=self.testTime.to(self.device), validateStepT=self.testStepT.to(self.device), trainTriple=self.Triples.to(self.device), numOfvalidateTriple=self.numOfTestTriple) print("mean rank T_2_2_.1_nonorm: " + str(meanRank_T)) Log.write("valid H MR: " + str(meanRank_H) + "\n") Log.write("valid T MR: " + str(meanRank_T) + "\n") Log.write("valid entity MR: " + str((meanRank_H + meanRank_T) / 2) + "\n") print("valid entity MR: " + str((meanRank_H + meanRank_T) / 2)) Log.write("valid entity H10: " + str((Hits10_H + Hits10_T) / 2) + "\n") print("valid entity H10: " + str((Hits10_H + Hits10_T) / 2)) ValidMR_relation = self.model.fastValidate_relation( validateHead=self.testHead.to(self.device), validateRelation=self.testRelation.to(self.device), validateTail=self.testTail.to(self.device), validateTime=self.testTime.to(self.device), validateStepH=self.testStepH.to(self.device), numOfvalidateTriple=self.numOfTestTriple) Log.write("valid relation MR: " + str(ValidMR_relation) + "\n") Log.write("\n") print("valid relation MR: " + str(ValidMR_relation)) Log.close()
def ParallelSQN(f, epoch, P, batch_size4SVRG, stepsize=10**-5, stepsize_type="fixed", verbose=False, optgap=10**(-30), loss='logistic'): ''' INPUT: x : data y : vector for label 1 or 0 K : number of outest iterations P : number of parallel precossors batch_size4SVRG : batch_size4H stepsize : default 10**-5 stepsize_type : fixed, decay 1/t, sqrt decay 1/sqrt( t ) OUTPUT: ''' # global FILENAME # FILENAME = outfile # print FILENAME # logging.basicConfig(level=logging.DEBUG, filename=FILENAME, filemode='w') x, label = Util.readlibsvm(f) #x = sklearn.preprocessing.normalize( x ) #label = (label == 1 )* 1 L = x.shape[0] // (batch_size4SVRG * P) if loss == 'logistic': loss = Loss.LogisticLoss_version2() elif loss == 'svm': loss = Loss.svm_quadratic() elif loss == 'ridge': loss = Loss.ridge_regression() #regularizer = 1/x.shape[0] regularizer = 10**(-3) #loss = Loss.ridge_regression() print("The number of cores : " + str(multiprocessing.cpu_count())) print("The dataset : " + str(f)) print("The number of instances N : " + str(x.shape[0])) print("The number of features p : " + str(x.shape[1])) print('The number of processes P : ' + str(P)) print("The batch size for SVRG : " + str(batch_size4SVRG)) print("The step size : " + str(stepsize)) print("The epoch : " + str(epoch)) print("The loss type: " + str(loss)) print("The regularizer : " + str(regularizer)) # init shared mem variables x_data = sharedmem.empty(len(x.data), dtype=x.data.dtype) x_data = x.data x_indices = sharedmem.empty(len(x.indices), dtype=x.indices.dtype) x_indices = x.indices x_indptr = sharedmem.empty(len(x.indptr), dtype=x.indptr.dtype) x_indptr = x.indptr y = sharedmem.empty(len(label), dtype=label.dtype) y = label x_shape = sharedmem.empty(len(x.shape), dtype=x.indices.dtype) x_shape = x.shape lock = Lock() w = sharedmem.empty(x.shape[1], dtype=np.longdouble) #w[:] = np.random.rand(x.shape[1],)#Array(c_double, np.random.rand(x.shape[0]), lock=False) w[:] = np.zeros(x.shape[1], ) w_multi = sharedmem.empty(x.shape[1], dtype=np.longdouble) w_multi[:] = np.copy(w) #multiprocessing.sharedctypes.copy(w) u = sharedmem.empty(x.shape[1], dtype=np.longdouble) u[:] = np.random.rand(x.shape[1], ) flag = sharedmem.empty(P, dtype=int) flag[:] = np.zeros([ P, ]) # ---------------------------------------------- procs = [] # add master procs.append( Process(target=master_loop, args=(loss, lock, L, P, x_data, x_indices, x_indptr, x_shape, y, batch_size4SVRG, w, w_multi, u, epoch, flag, optgap, stepsize_type, stepsize, regularizer))) # add slaves for proc_id in range(1, P): t = Process(target=slave_loop, args=(loss, lock, L, x_data, x_indices, x_indptr, x_shape, y, batch_size4SVRG, w, w_multi, u, epoch, flag, proc_id, stepsize_type, stepsize, regularizer)) procs.append(t) # start all processes for t in procs: t.daemon = True t.start() # wait until all processes finish for t in procs: t.join() print('Finish parallel ')
max_iou = 0 ############# # Load Data # ############# TRAIN_X = tf.placeholder(tf.float32, [batch_size, 128, 59, 3]) TRAIN_ANCHORS = tf.placeholder(tf.float32, [batch_size, num_boxes_one_image, 4]) TRAIN_CLASSES = tf.placeholder(tf.float32, [batch_size, num_boxes_one_image, 2]) classes, offset, anchors = SSDModel(n_classes=1, aspect_ratios=[2.5, 3, 3.2], scales=[47, 52, 56, 59])(TRAIN_X) loss_loc, loss_cls, values = Loss.cls_loc_loss( anchor_pred=offset, #此处函数名称要换 anchor_true=TRAIN_ANCHORS, y_pred=classes, y_true=TRAIN_CLASSES, pos_neg_ratio=pos_neg_ratio) loss = loss_cls + 10 * loss_loc #一个非常重要的参数,控制分类网络收敛速度 optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) opt = optimizer.minimize(loss) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_x, train_roi, test_x, test_roi, train_cls, test_cls = load_data( train_txt, test_txt) trainData = BatchGenerator(image=train_x, roi=train_roi, classes=train_cls, batch_size=batch_size) testData = BatchGenerator(image=test_x,
def define_criterion(self, name): if name.lower == 'lovasz': self.criterion = Loss.Lovasz_Hinge(per_image=True)
from model_evalution import ModelEval from RoiPooling import RoiPooling import time INPUT_DIR = 'I:\zjc\data\VOCtrainval_11-May-2012\VOCdevkit\VOC2012' EPOCHES = 50000 NUM_CLASSES = 20 IMAGE = tf.placeholder(tf.float32, [1, None, None, 3]) CLASSES = tf.placeholder(tf.float32, [1, None, NUM_CLASSES + 1]) OFFSET = tf.placeholder(tf.float32, [1, None, 4]) ROIS = tf.placeholder(tf.float32, [None, 4]) model = FootNet(aspect_ratio=[0.5, 1, 2], scales=[100, 200, 300]) base_net = model.base_net(x=IMAGE) classes_rpn, offset_rpn = model.SSD(base_net=base_net) loss_rpn_cls = Loss.loss_rpn_cls(y_pred=classes_rpn, y_true=CLASSES) loss_rpn_regress = Loss.loss_rpn_regress(y_pred=offset_rpn, y_true=OFFSET) loss = loss_rpn_regress + loss_rpn_cls optimizer = tf.train.AdadeltaOptimizer(learning_rate=0.1, rho=0.8) var_list = tf.trainable_variables() gradients = optimizer.compute_gradients(loss=loss, var_list=var_list) train_op = optimizer.apply_gradients(grads_and_vars=gradients) train_list, test_list = get_data(input_dir=INPUT_DIR) train_generator = BatchGenerator(info_list=train_list) test_generator = BatchGenerator(info_list=test_list) sess = tf.InteractiveSession() for i in range(EPOCHES): sess.run(tf.global_variables_initializer()) # try:
def train(): with tf.Graph().as_default(), tf.device('/cpu:0'): batch = tf.Variable(0, trainable=False) bn_decay = get_bn_decay(batch) learning_rate = get_learning_rate(batch) with tf.variable_scope(tf.get_variable_scope()): with tf.device('/gpu:0'): pointclouds = tf.placeholder(tf.float32, shape=(BATCH_SIZE, NUM_POINT, 3)) direction_labels = tf.placeholder(tf.float32, shape=(BATCH_SIZE, NUM_POINT, 3)) is_training = tf.placeholder(tf.bool, shape=()) #####DirectionEmbedding DeepPointwiseDirections = PDE_net.get_model_RRFSegNet( 'PDE_net', pointclouds, is_training=is_training, weight_decay=0.0001, bn_decay=bn_decay, k=20) #####DirectionEmbedding # DeepPointwiseDirections = PDE_net.get_model_DGCNN('PDE_net', # pointclouds, # is_training=is_training, # weight_decay=0.0001, # bn_decay=bn_decay, # k=20) ###### loss_esd = Loss.slack_based_direction_loss( DeepPointwiseDirections, direction_labels) loss_pd = Loss.direction_loss(DeepPointwiseDirections, direction_labels) loss = 1 * loss_esd + 0 * loss_pd + tf.add_n( tf.get_collection('losses')) ###optimizer--Adam train_op = tf.train.AdamOptimizer(learning_rate).minimize( loss, global_step=batch) saver = tf.train.Saver(tf.global_variables(), max_to_keep=3) # Create a session config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True sess = tf.Session(config=config) ##### init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init) ops = { 'learning_rate': learning_rate, 'pointclouds': pointclouds, 'direction_labels': direction_labels, 'is_training': is_training, 'loss': loss, 'loss_esd': loss_esd, 'loss_pd': loss_pd, 'train_op': train_op, 'step': batch } init_loss = 999.999 for epoch in range(MAX_EPOCH): log_string('**** EPOCH %03d ****' % (epoch)) sys.stdout.flush() ####training data generator train_set = py_util.get_data_set(TRAIN_DATA_PATH) generator_training = BSG.minibatch_generator( TRAIN_DATA_PATH, BATCH_SIZE, train_set, NUM_POINT) ####validating data generator val_set = py_util.get_data_set(VALIDATION_PATH) generator_val = BSG.minibatch_generator(TRAIN_DATA_PATH, BATCH_SIZE, val_set, NUM_POINT) #####trainging steps temp_loss = train_one_epoch(sess, epoch, train_set, generator_training, ops) #####validating steps validation(sess, val_set, generator_val, ops) ####saving the trianed models if temp_loss < init_loss: saver.save( sess, os.path.join(LOG_DIR, 'epoch_' + str(epoch) + '.ckpt')) init_loss = temp_loss
def get_device_name(device): if device == 'gpu': return torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') else: return 'cpu' if __name__ == '__main__': # セッティング set_seed(777) # 乱数シードを設定 device = get_device_name('gpu') # デバイス名(cpuかgpuか)を取得 net = Net.SixFullyConnectedNet() # 6層の全結合モデルを作成 dataset_loader = CIFAR10.CIFAR10_Loader('./data') # CIFER10のデータセットを読み込み criterion = Loss.CrossEntropyLoss() # クロスエントロピー損失を読み込み # トレーニングの設定 trainer = Trainer.Trainer( device=device, # 計算デバイスを設定 net=net, # ニューラルネット構成を設定 criterion=criterion, # 損失関数を設定 dataset_loader=dataset_loader # データセットを設定 ) # トレーニング trainer.train( init_lr=0.05, # トレーニングのステップサイズの初期値 min_lr=0.0001, # トレーニングのステップサイズの最小値 init_params=None # 初期パラメータ(パラメータ転移などを行う場合) )