def train_and_test(): # pick a test sample dataset = Data(config.train_size, config.batch_size, config.test_size) for test_batch in dataset.test_dataset: test_sample = test_batch[:config.num_examples_to_generate, :, :, :] random_vector_for_generation = tf.random.normal( shape=[config.num_examples_to_generate, config.latent_dim]) model = VAE(config.latent_dim) #utils.generate_and_save_images(model, 0, test_sample) optimizer = tf.keras.optimizers.Adam(1e-4) for epoch in range(1, config.epochs + 1): start_time = time.time() for train_x in dataset.train_dataset: utils.train_step(model, train_x, optimizer=optimizer) end_time = time.time() loss = tf.keras.metrics.Mean() for test_x in dataset.test_dataset: loss(utils.compute_loss(model, test_x)) elbo = -loss.result() print( 'Epoch: {}, Test set ELBO: {}, time elapse for current epoch: {}'. format(epoch, elbo, end_time - start_time)) utils.generate_and_save_images(model, epoch, test_sample)
def loss_func(model, predicted, boxes, classes): # if not model.training: # predicted = predicted[1] targets = [] bs = classes.shape[0] max_detections = classes.shape[1] for img_idx in range(bs): for detect_idx in range(max_detections): clazz = classes[img_idx, detect_idx] if clazz == 0: continue t, l, b, r = boxes[img_idx, detect_idx] * 0.5 + 0.5 w = r - l h = b - t cx = (l + r) / 2.0 cy = (t + b) / 2.0 targets.append([ img_idx, float(clazz - 1), float(cx), float(cy), float(w), float(h) ]) ft = torch.cuda.FloatTensor if predicted[0].is_cuda else torch.Tensor targets = ft(targets) loss, _ = compute_loss(predicted, targets, model) loss *= bs / 64 return loss[0]
def test(model, fetcher): model.eval() val_loss = 0 classes = fetcher.loader.dataset.classes num_classes = len(classes) total_size = 0 # true positive / intersection tp = torch.zeros(num_classes) fp = torch.zeros(num_classes) fn = torch.zeros(num_classes) pbar = tqdm(fetcher) for idx, (inputs, targets) in enumerate(pbar): batch_idx = idx + 1 outputs = model(inputs) loss = compute_loss(outputs, targets, model) val_loss += loss.item() predicted = outputs.max(1)[1] if idx == 0: show_batch(inputs, predicted) predicted = predicted.view(-1) targets = targets.view(-1) eq = predicted.eq(targets) total_size += predicted.size(0) for c_i, c in enumerate(classes): indices = targets.eq(c_i) positive = indices.sum().item() tpi = eq[indices].sum().item() fni = positive - tpi fpi = predicted.eq(c_i).sum().item() - tpi tp[c_i] += tpi fn[c_i] += fni fp[c_i] += fpi T, P, R, miou, F1 = compute_metrics(tp, fn, fp) pbar.set_description( 'loss: %8g, mAP: %8g, F1: %8g, miou: %8g' % (val_loss / batch_idx, P.mean(), F1.mean(), miou.mean())) if dist.is_initialized(): tp = tp.to(device) fn = fn.to(device) fp = fp.to(device) dist.all_reduce(tp, op=dist.ReduceOp.SUM) dist.all_reduce(fn, op=dist.ReduceOp.SUM) dist.all_reduce(fp, op=dist.ReduceOp.SUM) T, P, R, miou, F1 = compute_metrics(tp.cpu(), fn.cpu(), fp.cpu()) if len(classes) < 10: for c_i, c in enumerate(classes): print( 'cls: %8s, targets: %8d, pre: %8g, rec: %8g, iou: %8g, F1: %8g' % (c, T[c_i], P[c_i], R[c_i], miou[c_i], F1[c_i])) else: print('top error 5') copy_miou = miou.clone() for i in range(5): c_i = copy_miou.min(0)[1] copy_miou[c_i] = 1 print( 'cls: %8s, targets: %8d, pre: %8g, rec: %8g, iou: %8g, F1: %8g' % (classes[c_i], T[c_i], P[c_i], R[c_i], miou[c_i], F1[c_i])) return miou.mean().item()
def test(model, fetcher): model.eval() val_loss = 0 classes = fetcher.loader.dataset.classes num_classes = len(classes) total_size = torch.Tensor([0]) true_size = torch.Tensor([0]) tp = torch.zeros(num_classes) fp = torch.zeros(num_classes) fn = torch.zeros(num_classes) pbar = tqdm(enumerate(fetcher), total=len(fetcher)) for idx, (inputs, targets) in pbar: batch_idx = idx + 1 outputs = model(inputs) loss = compute_loss(outputs, targets, model) val_loss += loss.item() predicted = outputs.max(1)[1] if idx == 0: show_batch(inputs.cpu(), predicted.cpu(), classes) eq = predicted.eq(targets) total_size += predicted.size(0) true_size += eq.sum() for c_i, c in enumerate(classes): indices = targets.eq(c_i) positive = indices.sum().item() tpi = eq[indices].sum().item() fni = positive - tpi fpi = predicted.eq(c_i).sum().item() - tpi tp[c_i] += tpi fn[c_i] += fni fp[c_i] += fpi pbar.set_description('loss: %8g, acc: %8g' % (val_loss / batch_idx, true_size / total_size)) if dist.is_initialized(): tp = tp.to(device) fn = fn.to(device) fp = fp.to(device) total_size = total_size.to(device) true_size = true_size.to(device) dist.all_reduce(tp, op=dist.ReduceOp.SUM) dist.all_reduce(fn, op=dist.ReduceOp.SUM) dist.all_reduce(fp, op=dist.ReduceOp.SUM) dist.all_reduce(total_size, op=dist.ReduceOp.SUM) dist.all_reduce(true_size, op=dist.ReduceOp.SUM) T, P, R, F1 = compute_metrics(tp.cpu(), fn.cpu(), fp.cpu()) if len(classes) < 10: for c_i, c in enumerate(classes): print('cls: %8s, targets: %8d, pre: %8g, rec: %8g, F1: %8g' % (c, T[c_i], P[c_i], R[c_i], F1[c_i])) else: print('top error 5') copy_P = P.clone() for i in range(5): c_i = copy_P.min(0)[1] copy_P[c_i] = 1 print('cls: %8s, targets: %8d, pre: %8g, rec: %8g, F1: %8g' % (classes[c_i], T[c_i], P[c_i], R[c_i], F1[c_i])) return true_size.item() / total_size.item()
def test(model, fetcher): model.eval() val_loss = 0 classes = fetcher.loader.dataset.classes num_classes = len(classes) # true positive / intersection n = torch.zeros(num_classes) l2_sum = torch.zeros(num_classes) pbar = tqdm(fetcher) for idx, (inputs, targets) in enumerate(pbar): batch_idx = idx + 1 outputs = model(inputs) if idx == 0: show_batch(inputs, outputs) loss = compute_loss(outputs, targets, model) val_loss += loss.item() normalize_size = (64, 64) targets = F.interpolate(targets, normalize_size, mode='bilinear', align_corners=False).view( targets.size(0), targets.size(1), normalize_size[0] * normalize_size[1]).argmax(2) outputs = F.interpolate(outputs, normalize_size, mode='bilinear', align_corners=False).view( outputs.size(0), outputs.size(1), normalize_size[0] * normalize_size[1]).argmax(2) y_dis = (targets // normalize_size[0] - outputs // normalize_size[0]) / float(normalize_size[1]) x_dis = (targets % normalize_size[0] - outputs % normalize_size[0]) / float(normalize_size[0]) l2 = y_dis**2 + x_dis**2 l2 = torch.sqrt(l2) n += len(l2) l2_sum += l2.sum(0).cpu() pbar.set_description( 'loss: %8g, NME: %8g' % (val_loss / batch_idx, l2_sum.sum() / max(1, n.sum()))) if dist.is_initialized(): n = n.to(device) l2_sum = l2_sum.to(device) dist.all_reduce(n, op=dist.ReduceOp.SUM) dist.all_reduce(l2_sum, op=dist.ReduceOp.SUM) for c_i, c in enumerate(classes): print('cls: %8s, NME: %8g' % (c, l2_sum[c_i] / max(1, n[c_i]))) return (l2_sum.sum() / max(1, n.sum())).item()
def test(model, fetcher, distributed=False): model.eval() val_loss = 0 classes = fetcher.loader.dataset.classes num_classes = len(classes) total_size = 0 # true positive / intersection tp = torch.zeros(num_classes) fp = torch.zeros(num_classes) fn = torch.zeros(num_classes) with torch.no_grad(): pbar = tqdm(enumerate(fetcher), total=len(fetcher)) for idx, (inputs, targets) in pbar: batch_idx = idx + 1 outputs = model(inputs) loss = compute_loss(outputs, targets) val_loss += loss.item() predicted = outputs if idx == 0: show_batch('test_batch.png', inputs.cpu(), predicted.cpu()) predicted = predicted.max(1)[1].view(-1) targets = targets.max(1)[1].view(-1) eq = predicted.eq(targets) total_size += predicted.size(0) for c_i, c in enumerate(classes): indices = targets.eq(c_i) positive = indices.sum().item() tpi = eq[indices].sum().item() fni = positive - tpi fpi = predicted.eq(c_i).sum().item() - tpi tp[c_i] += tpi fn[c_i] += fni fp[c_i] += fpi T, P, R, miou, F1 = compute_metrics(tp, fn, fp) pbar.set_description( 'loss: %8g, mAP: %8g, F1: %8g, miou: %8g' % (val_loss / batch_idx, P.mean(), F1.mean(), miou.mean())) if distributed: tp = tp.to(device) fn = fn.to(device) fp = fp.to(device) dist.all_reduce(tp, op=dist.ReduceOp.SUM) dist.all_reduce(fn, op=dist.ReduceOp.SUM) dist.all_reduce(fp, op=dist.ReduceOp.SUM) T, P, R, miou, F1 = compute_metrics(tp.cpu(), fn.cpu(), fp.cpu()) for c_i, c in enumerate(classes): print('cls: %8s, targets: %8d, pre: %8g, rec: %8g, iou: %8g, F1: %8g' % (c, T[c_i], P[c_i], R[c_i], miou[c_i], F1[c_i])) return miou.mean().item()
def post_train_step(self, outputs, batch, batch_idx, epoch): _, targets, _, _, _ = batch # Loss loss, loss_items = compute_loss(outputs, targets, self.model) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss_items) exit(-1) loss *= self.batch_size / 64 # scale loss if self.calc_ni(batch_idx, epoch) % self.accumulate == 0: self.ema.update(self.model) return loss, loss_items
def train_batch(self, batch: TorchData, model: nn.Module, epoch_idx: int, batch_idx: int) -> Dict[str, torch.Tensor]: # opt = get_cli_args( # batch_size=pedl_batch_size, prebias=pedl_prebias, accumulate=pedl_accumulate # ) # This seems to impact performance => replacing it with just the values (imgs, targets, paths, _) = batch imgs = imgs.float() / 255.0 pred = model(imgs) loss, loss_items = compute_loss(pred, targets, model, not pedl_prebias) loss *= opt.batch_size / (pedl_batch_size * pedl_accumulate) if not torch.isfinite(loss): print("WARNING: non-finite loss, ending training ", loss_items) return {"loss": loss}
def forward(self, x, targets=None): img_dim = x.shape[2] loss = 0 layer_outputs, yolo_outputs = [], [] for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): if module_def["type"] in ["convolutional", "upsample", "maxpool"]: x = module(x) elif module_def["type"] == "route": x = torch.cat([ layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",") ], 1) elif module_def["type"] == "shortcut": layer_i = int(module_def["from"]) x = layer_outputs[-1] + layer_outputs[layer_i] elif module_def["type"] == "yolo": x, predictions = module[0](x, img_dim) layer_loss = compute_loss(predictions, targets, module[0]) loss += layer_loss yolo_outputs.append(x) layer_outputs.append(x) yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1)) return yolo_outputs if targets is None else (loss, yolo_outputs)
def test(model, fetcher, conf_thres=1e-3, nms_thres=0.5): model.eval() val_loss = 0 classes = fetcher.loader.dataset.classes num_classes = len(classes) seen = 0 s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1') p, r, f1, mp, mr, mAP, mf1 = 0., 0., 0., 0., 0., 0., 0. jdict, stats, ap, ap_class = [], [], [], [] pbar = tqdm(enumerate(fetcher), total=len(fetcher)) for idx, (imgs, targets) in pbar: _, _, height, width = imgs.shape # batch size, channels, height, width # Run model inf_out, train_out = model(imgs) # inference and training outputs # Compute loss val_loss += compute_loss(train_out, targets, model).item() # GIoU, obj, cls # Run NMS output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres) # Plot images with bounding boxes if idx == 0: show_batch(imgs, output) # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append(([], torch.Tensor(), torch.Tensor(), tcls)) continue # Clip boxes to image bounds clip_coords(pred, (height, width)) # Assign all predictions as incorrect correct = [0] * len(pred) if nl: detected = [] tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) tbox[:, [0, 2]] *= width tbox[:, [1, 3]] *= height # Search for correct predictions for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred): # Break if all targets already located in image if len(detected) == nl: break # Continue if predicted class not among image classes if pcls.item() not in tcls: continue # Best iou, index between pred and targets m = (pcls == tcls_tensor).nonzero().view(-1) iou, bi = bbox_iou(pbox, tbox[m]).max(0) # If iou > threshold and class is correct mark as correct if iou > 0.5 and m[ bi] not in detected: # and pcls == tcls[bi]: correct[i] = 1 detected.append(m[bi]) # Append statistics (correct, conf, pcls, tcls) stats.append( (correct, pred[:, 4].cpu().numpy(), pred[:, 6].cpu().numpy(), tcls)) pbar.set_description('loss: %8g' % (val_loss / (idx + 1))) # Compute statistics stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # sync stats if dist.is_initialized(): for i in range(len(stats)): stat = torch.FloatTensor(stats[i]).to(device) ls = torch.IntTensor([len(stat)]).to(device) ls_list = [ torch.IntTensor([0]).to(device) for _ in range(dist.get_world_size()) ] dist.all_gather(ls_list, ls) ls_list = [ls_item.item() for ls_item in ls_list] max_ls = max(ls_list) if len(stat) < max_ls: stat = torch.cat( [stat, torch.zeros(max_ls - len(stat)).to(device)]) stat_list = [ torch.zeros(max_ls).to(device) for _ in range(dist.get_world_size()) ] dist.all_gather(stat_list, stat) stat_list = [ stat_list[si][:ls_list[si]] for si in range(dist.get_world_size()) if ls_list[si] > 0 ] stat = torch.cat(stat_list) stats[i] = stat.cpu().numpy() if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) mp, mr, mAP, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=num_classes) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%10.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, mAP, mf1)) # Print results per class for i, c in enumerate(ap_class): print(pf % (classes[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) # Return results mAPs = np.zeros(num_classes) + mAP for i, c in enumerate(ap_class): mAPs[c] = ap[i] # return (mp, mr, mAP, mf1, *(loss / len(dataloader)).tolist()), mAPs return mAP
def validation_step(self, opt, outputs, batch, batch_idx, epoch): imgs, targets, paths, shapes, pad = batch _, _, height, width = imgs.shape inf_out, train_out = outputs whwh = torch.Tensor([width, height, width, height]).to(imgs.device) losses = compute_loss(train_out, targets, self.model)[1][:3] # GIoU, obj, cls output = non_max_suppression(inf_out, conf_thres=opt.conf_thres, iou_thres=opt.iou_thres, multi_label=self.calc_ni(batch_idx, epoch) > self.n_burn) # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class self.seen += 1 if pred is None: if nl: self.stats.append((torch.zeros(0, self.niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file # with open('test.txt', 'a') as file: # [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred] # Clip boxes to image bounds clip_coords(pred, (height, width)) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], self.niou, dtype=torch.bool, device=imgs.device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero().view(-1) # target indices pi = (cls == pred[:, 5]).nonzero().view(-1) # prediction indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices # Append detections for j in (ious > self.iouv[0].to(ious.device)).nonzero(): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[pi[j]] = ious[j] > self.iouv # iou_thres is 1xn if len(detected) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) self.stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) return losses
img_size = img_tensor.size()[2] # TODO: 目前只支持正方形,如416x416 ###### ### 训练过程主要包括以下几个步骤: # (1) 前传 #print('img_tensor:', img_tensor[0][1][208][208]) p, p_box = model( img_tensor ) # tuple, have 3 tensors; tensor[0]: (64, 3, 13, 13, 4) # (2) 计算损失 ###### clw add: for debug, localize in build_target() first, and can get target size, so catch the same target size there # if target_tensor.size()[0] == 679: # print('aaa') ###### loss, loss_items = compute_loss(p, p_box, target_tensor, model, img_size) if not torch.isfinite(loss): raise Exception('WARNING: non-finite loss, ending training ', loss_items) # (3) 损失:反向传播,求出梯度 if mixed_precision: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # (4) 优化器:更新参数、梯度清零 ni = i + nb * epoch # number integrated batches (since train start) if ni % accumulate == 0: # Accumulate gradient for x batches before optimizing optimizer.step()
def train(): # 0、Initialize parameters( set random seed, get cfg info, ) cfg = opt.cfg weights = opt.weights img_size = opt.img_size batch_size = opt.batch_size total_epochs = opt.epochs init_seeds() data = parse_data_cfg(opt.data) train_txt_path = data['train'] valid_txt_path = data['valid'] nc = int(data['classes']) # 0、打印配置文件信息,写log等 print('config file:', cfg) print('pretrained weights:', weights) # 1、加载模型 model = Darknet(cfg).to(device) if weights.endswith('.pt'): ### model.load_state_dict(torch.load(weights)['model']) # 错误原因:没有考虑类别对不上的那一层,也就是yolo_layer前一层 # 会报错size mismatch for module_list.81.Conv2d.weight: copying a param with shape torch.size([255, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([75, 1024, 1, 1]). # TODO:map_location=device ? chkpt = torch.load(weights, map_location=device) try: chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()} model.load_state_dict(chkpt['model'], strict=False) # model.load_state_dict(chkpt['model']) except KeyError as e: s = "%s is not compatible with %s" % (opt.weights, opt.cfg) raise KeyError(s) from e write_to_file(repr(opt), log_file_path, mode='w') write_to_file('anchors:\n' + repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path) elif weights.endswith('.pth'): # for 'https://download.pytorch.org/models/resnet50-19c8e357.pth', model_state_dict = model.state_dict() chkpt = torch.load(weights, map_location=device) #try: state_dict = {} block_cnt = 0 fc_item_num = 2 chkpt_keys = list(chkpt.keys()) model_keys = list(model.state_dict().keys()) model_values = list(model.state_dict().values()) for i in range(len(chkpt_keys) - fc_item_num): # 102 - 2 if i % 5 == 0: state_dict[model_keys[i+block_cnt]] = chkpt[chkpt_keys[i]] elif i % 5 == 1 or i % 5 == 2: state_dict[model_keys[i+block_cnt+2]] = chkpt[chkpt_keys[i]] elif i % 5 == 3 or i % 5 == 4: state_dict[model_keys[i+block_cnt-2]] = chkpt[chkpt_keys[i]] if i % 5 == 4: block_cnt += 1 state_dict[model_keys[i + block_cnt]] = model_values[i + block_cnt] #chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()} model.load_state_dict(state_dict, strict=False) # model.load_state_dict(chkpt['model']) # except KeyError as e: # s = "%s is not compatible with %s" % (opt.weights, opt.cfg) # raise KeyError(s) from e write_to_file(repr(opt), log_file_path, mode='w') write_to_file('anchors:\n' + repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path) elif len(weights) > 0: # darknet format # possible weights are '*.weights', 'yolov3-tiny.conv.15', 'darknet53.conv.74' etc. load_darknet_weights(model, weights) write_to_file(repr(opt), log_file_path, mode='w') write_to_file('anchors:\n' + repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path) # else: # raise Exception("pretrained model's path can't be NULL!") # 2、设置优化器 和 学习率 start_epoch = 0 #optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=momentum, weight_decay=weight_decay, nesterov=True) # TODO:nesterov ? weight_decay=0.0005 ? # Optimizer pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in dict(model.named_parameters()).items(): if '.bias' in k: pg2 += [v] # biases elif 'Conv2d.weight' in k: pg1 += [v] # apply weight_decay else: pg0 += [v] # parameter group 0 optimizer = torch.optim.SGD(pg0, lr=lr0, momentum=momentum, nesterov=True) optimizer.add_param_group({'params': pg1, 'weight_decay': weight_decay}) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) del pg0, pg1, pg2 ###### apex need ###### if mixed_precision: model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0) # Initialize distributed training if torch.cuda.device_count() > 1: dist.init_process_group(backend='nccl', # 'distributed backend' init_method='tcp://127.0.0.1:9999', # distributed training init method world_size=1, # number of nodes for distributed training rank=0) # distributed training node rank model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True) # clw note: 多卡,在 amp.initialize()之后调用分布式代码 DistributedDataParallel否则报错 model.yolo_layers = model.module.yolo_layers # move yolo layer indices to top level ###### model.nc = nc #### 阶梯学习率 scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(total_epochs * x) for x in [0.8, 0.9]], gamma=0.1) ### 余弦学习率 #lf = lambda x: (1 + math.cos(x * math.pi / total_epochs)) / 2 #scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # 3、加载数据集 train_dataset = VocDataset(train_txt_path, img_size, with_label=True) dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, # TODO: True num_workers=8, # TODO collate_fn=train_dataset.train_collate_fn, pin_memory=True) # 4、训练 print('') # 换行 print('Starting training for %g epochs...' % total_epochs) nb = len(dataloader) mloss = torch.zeros(4).to(device) # mean losses writer = SummaryWriter() # tensorboard --logdir=runs, view at http://localhost:6006/ prebias = start_epoch == 0 for epoch in range(start_epoch, total_epochs): # epoch ------------------------------ model.train() # 写在这里,是因为在一个epoch结束后,调用test.test()时,会调用 model.eval() # # Prebias # if prebias: # if epoch < 3: # prebias # ps = 0.1, 0.9 # prebias settings (lr=0.1, momentum=0.9) # else: # normal training # ps = lr0, momentum # normal training settings # print_model_biases(model) # prebias = False # # # Bias optimizer settings # optimizer.param_groups[2]['lr'] = ps[0] # if optimizer.param_groups[2].get('momentum') is not None: # for SGD but not Adam # optimizer.param_groups[2]['momentum'] = ps[1] start = time.time() title = ('\n' + '%10s' * 11 ) % ('Epoch', 'Batch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size', 'lr', 'time_use') print(title) #pbar = tqdm(dataloader, ncols=20) # 行数参数ncols=10,这个值可以自己调:尽量大到不能引起上下滚动,同时满足美观的需求。 #for i, (img_tensor, target_tensor, img_path, _) in enumerate(pbar): # # Freeze darknet53.conv.74 for first epoch # freeze_backbone = False # if freeze_backbone and (epoch < 3): # for i, (name, p) in enumerate(model.named_parameters()): # if int(name.split('.')[2]) < 75: # if layer < 75 # 多卡是[2],单卡[1] # p.requires_grad = False if (epoch < 3) else True for i, (img_tensor, target_tensor, img_path, _) in enumerate(dataloader): # # SGD burn-in # ni = epoch * nb + i # if ni <= 1000: # n_burnin = 1000 # lr = lr0 * (ni / 1000) ** 2 # for g in optimizer.param_groups: # g['lr'] = lr batch_start = time.time() #print(img_path) img_tensor = img_tensor.to(device) target_tensor = target_tensor.to(device) ### 训练过程主要包括以下几个步骤: # (1) 前传 #print('img_tensor:', img_tensor[0][1][208][208]) pred = model(img_tensor) # (2) 计算损失 loss, loss_items = compute_loss(pred, target_tensor, model) if not torch.isfinite(loss): raise Exception('WARNING: non-finite loss, ending training ', loss_items) # (3) 损失:反向传播,求出梯度 if mixed_precision: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # (4) 优化器:更新参数、梯度清零 # ni = i + nb * epoch # number integrated batches (since train start) # if ni % accumulate == 0: # Accumulate gradient for x batches before optimizing optimizer.step() optimizer.zero_grad() # Print batch results mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0 # (GB) #s = ('%10s' * 2 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size, scheduler.get_lr()[0], time.time()-batch_start) #s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size, optimizer.state_dict()['param_groups'][0]['lr'], time.time()-batch_start) s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size, scheduler.get_lr()[0], time.time()-batch_start) if i % 10 == 0: print(s) # Plot if epoch == start_epoch and i == 0: fname = 'train_batch.jpg' # filename cur_path = os.getcwd() res = plot_images(images=img_tensor, targets=target_tensor, paths=img_path, fname=os.path.join(cur_path, fname)) writer.add_image(fname, res, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ print('time use per epoch: %.3fs' % (time.time() - start)) write_to_file(title, log_file_path) write_to_file(s, log_file_path) # Update scheduler scheduler.step() # compute mAP results, maps = test.test(cfg, 'cfg/voc.data', batch_size=batch_size, img_size=img_size, conf_thres=0.05, iou_thres=0.5, nms_thres=0.5, src_txt_path=valid_txt_path, dst_path='./output', weights=None, model=model, log_file_path = log_file_path) # Tensorboard tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/F1'] for x, tag in zip(list(mloss[:-1]) + list(results), tags): writer.add_scalar(tag, x, epoch) # save model 保存模型 chkpt = {'epoch': epoch, 'model': model.module.state_dict() if type(model) is nn.parallel.DistributedDataParallel else model.state_dict(), # clw note: 多卡 'optimizer': optimizer.state_dict()} torch.save(chkpt, last_model_path) print('end')
# lr = lr0 * (ni / 1000) ** 2 # for g in optimizer.param_groups: # g['lr'] = lr scheduler.step(len(dataloader) * epoch + i) batch_start = time.time() #print(img_path) img_tensor = img_tensor.to(device) target_tensor = target_tensor.to(device) ### 训练过程主要包括以下几个步骤: # (1) 前传 #print('img_tensor:', img_tensor[0][1][208][208]) pred = model(img_tensor) # (2) 计算损失 loss, loss_items = compute_loss(pred, target_tensor, model) if not torch.isfinite(loss): raise Exception('WARNING: non-finite loss, ending training ', loss_items) # (3) 损失:反向传播,求出梯度 if mixed_precision: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # (4) 优化器:更新参数、梯度清零 # ni = i + nb * epoch # number integrated batches (since train start) # if ni % accumulate == 0: # Accumulate gradient for x batches before optimizing optimizer.step() optimizer.zero_grad()
def channels_select(prune_cfg, data, origin_model, aux_util, device, data_loader, select_layer, pruned_rate): with open(progress_result, 'a') as f: f.write(('\n' + '%10s' * 9 + '\n') % ('Stage', 'Change', 'MSELoss', 'AuxLoss', 'Total', 'P', 'R', '[email protected]', 'F1')) logger.info(('%10s' * 6) % ('Stage', 'Channels', 'Batch', 'MSELoss', 'AuxLoss', 'Total')) batch_size = data_loader.batch_size img_size = data_loader.dataset.img_size accumulate = 64 // batch_size hook_util = HookUtils() handles = [] n_iter = math.floor(500 / batch_size) pruning_model = Darknet(prune_cfg, img_size=(img_size, img_size)).to(device) chkpt = torch.load(progress_chkpt, map_location=device) pruning_model.load_state_dict(chkpt['model'], strict=True) aux_in_layer = aux_util.conv_layer_dict[select_layer] aux_model = aux_util.creat_aux_model(aux_in_layer) aux_model.to(device) aux_model.load_state_dict(chkpt['aux_in{}'.format(aux_in_layer)], strict=True) aux_loss_scalar = max(0.01, pow((int(aux_in_layer) + 1) / 75, 2)) del chkpt solve_sub_problem_optimizer = optim.SGD( pruning_model.module_list[int(aux_in_layer)].MaskConv2d.parameters(), lr=hyp['lr0'], momentum=hyp['momentum']) for name, child in origin_model.module_list.named_children(): if name == aux_in_layer: handles.append( child.register_forward_hook(hook_util.hook_origin_output)) if name == select_layer: handles.append( child.register_forward_hook(hook_util.hook_origin_output)) for name, child in pruning_model.module_list.named_children(): if name == aux_in_layer: handles.append( child.register_forward_hook(hook_util.hook_prune_output)) if name == select_layer: handles.append( child.register_forward_hook(hook_util.hook_prune_output)) if device.type != 'cpu' and torch.cuda.device_count() > 1: origin_model = torch.nn.parallel.DistributedDataParallel( origin_model, find_unused_parameters=True) origin_model.yolo_layers = origin_model.module.yolo_layers pruning_model = torch.nn.parallel.DistributedDataParallel( pruning_model, find_unused_parameters=True) pruning_model.yolo_layers = pruning_model.module.yolo_layers retain_channels_num = math.floor( aux_util.layer_info[select_layer]["in_channels"] * (1 - pruned_rate)) pruning_model.nc = 80 pruning_model.hyp = hyp pruning_model.arc = 'default' pruning_model.eval() aux_model.eval() MSE = nn.MSELoss(reduction='mean') mloss = torch.zeros(3).to(device) for i_k in range(retain_channels_num): data_iter = iter(data_loader) pbar = tqdm(range(n_iter), total=n_iter) print(('\n' + '%10s' * 6) % ('Stage', 'gpu_mem', 'channels', 'MSELoss', 'AuxLoss', 'Total')) for i in pbar: imgs, targets, _, _ = data_iter.next() if len(targets) == 0: continue imgs = imgs.to(device).float( ) / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 targets = targets.to(device) with torch.no_grad(): _ = origin_model(imgs) _, pruning_pred = pruning_model(imgs) pruning_loss, _ = compute_loss(pruning_pred, targets, pruning_model) hook_util.cat_to_gpu0() mse_loss = torch.zeros(1, device=device) aux_pred = aux_model(hook_util.prune_features['gpu0'][1], targets) aux_loss = compute_loss_for_DCP(aux_pred, targets) mse_loss += MSE(hook_util.prune_features['gpu0'][0], hook_util.origin_features['gpu0'][0]) loss = hyp['joint_loss'] * mse_loss + aux_loss + 0 * pruning_loss loss.backward() mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available( ) else 0 s = ('%10s' * 3 + '%10.3g' * 3) % ( 'Prune ' + select_layer, '%.3gG' % mem, '%g/%g' % (i_k, retain_channels_num), hyp['joint_loss'] * mse_loss, aux_loss, loss) pbar.set_description(s) # if (i + 1) % 10 == 0: # logger.info(('%10s' * 3 + '%10.3g' * 3) % # ('Prune' + select_layer, str(i_k), '%g/%g' % (i, n_iter), hyp['joint_loss'] * mse_loss, # aux_loss, loss)) hook_util.clean_hook_out() grad = pruning_model.module.module_list[int( select_layer)].MaskConv2d.weight.grad.detach()**2 grad = grad.sum((2, 3)).sqrt().sum(0) if i_k == 0: pruning_model.module.module_list[int( select_layer)].MaskConv2d.selected_channels_mask[:] = 1e-5 if select_layer in aux_util.sync_guide.keys(): sync_layer = aux_util.sync_guide[select_layer] pruning_model.module.module_list[int( sync_layer)].MaskConv2d.selected_channels_mask[( -1 * aux_util.layer_info[select_layer]["in_channels"] ):] = 1e-5 selected_channels_mask = pruning_model.module.module_list[int( select_layer)].MaskConv2d.selected_channels_mask _, indices = torch.topk(grad * (1 - selected_channels_mask), 1) pruning_model.module.module_list[int( select_layer)].MaskConv2d.selected_channels_mask[indices] = 1 if select_layer in aux_util.sync_guide.keys(): pruning_model.module.module_list[int( sync_layer)].MaskConv2d.selected_channels_mask[-( aux_util.layer_info[select_layer]["in_channels"] - indices)] = 1 pruning_model.zero_grad() pbar = tqdm(range(n_iter), total=n_iter) print(('\n' + '%10s' * 6) % ('Stage', 'gpu_mem', 'channels', 'MSELoss', 'AuxLoss', 'Total')) for i in pbar: imgs, targets, _, _ = data_iter.next() if len(targets) == 0: continue imgs = imgs.to(device).float( ) / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 targets = targets.to(device) with torch.no_grad(): _ = origin_model(imgs) _, pruning_pred = pruning_model(imgs) pruning_loss, _ = compute_loss(pruning_pred, targets, pruning_model) hook_util.cat_to_gpu0() mse_loss = torch.zeros(1, device=device) aux_pred = aux_model(hook_util.prune_features['gpu0'][1], targets) aux_loss = compute_loss_for_DCP(aux_pred, targets) mse_loss += MSE(hook_util.prune_features['gpu0'][0], hook_util.origin_features['gpu0'][0]) loss = hyp[ 'joint_loss'] * mse_loss + aux_loss_scalar * aux_loss + 0 * pruning_loss loss.backward() if i % accumulate == 0: solve_sub_problem_optimizer.step() solve_sub_problem_optimizer.zero_grad() mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available( ) else 0 mloss = (mloss * i + torch.cat([hyp['joint_loss'] * mse_loss, aux_loss, loss ]).detach()) / (i + 1) s = ('%10s' * 3 + '%10.3g' * 3) % ( 'SubProm ' + select_layer, '%.3gG' % mem, '%g/%g' % (i_k, retain_channels_num), *mloss) pbar.set_description(s) if (i + 1) % n_iter == 0: logger.info(('%10s' * 3 + '%10.3g' * 3) % ('SubPro' + select_layer, str(i_k), '%g/%g' % (i, n_iter), *mloss)) hook_util.clean_hook_out() for handle in handles: handle.remove() greedy_indices = pruning_model.module.module_list[int( select_layer)].MaskConv2d.selected_channels_mask < 1 pruning_model.module.module_list[int( select_layer)].MaskConv2d.selected_channels_mask[greedy_indices] = 0 res, _ = test.test(prune_cfg, data, batch_size=batch_size * 2, img_size=416, model=pruning_model, conf_thres=0.1, iou_thres=0.5, save_json=False, dataloader=None) chkpt = torch.load(progress_chkpt, map_location=device) chkpt['current_layer'] = aux_util.next_prune_layer(select_layer) chkpt['epoch'] = -1 chkpt['model'] = pruning_model.module.state_dict() if type( pruning_model ) is nn.parallel.DistributedDataParallel else pruning_model.state_dict() chkpt['optimizer'] = None torch.save(chkpt, progress_chkpt) torch.save(chkpt, last) del chkpt with open(progress_result, 'a') as f: f.write(('%10s' * 2 + '%10.3g' * 7) % ('Pruning ' + select_layer, str(aux_util.layer_info[select_layer]['in_channels']) + '->' + str(retain_channels_num), *mloss, *res[:4]) + '\n') torch.cuda.empty_cache()
def YOLO_Gradcam(model, dataloader, device, args): l = len(dataloader) j = 0 model.eval() for i, (imgs, labels, paths, _) in enumerate(dataloader): imgs = imgs.to(device).float() / 255.0 labels = labels.to(device) for img, path in tqdm(zip(imgs, paths)): # One (image, bboxes) per time # img = torch.stack([img]) id = labels[:, 0] == j _, y_hat, fts = model(imgs, [model.yolo_layers[args['head']] - 1]) j += 1 # Saving features fts[0].register_hook(save_) # Computing loss and backward loss, _ = compute_loss(y_hat, labels[id], model) model.zero_grad() loss.backward(retain_graph=True) ########### # Gradcam # ########### # getting grandients and features grads_val = gradients[0] target = fts[0] target = target[0, :] # weighting gradients in cam weights = torch.mean(grads_val, axis=(2, 3))[0, :] cam = torch.zeros(target.shape[1:], device=device, dtype=torch.float32) for i, w in enumerate(weights): cam += w * target[i, :, :] # creating the mask cam = cam = torch.where(cam > 0, cam, torch.tensor(0., device=device)) resize = Transforms.Compose([ Transforms.ToPILImage(), Transforms.Resize(img.shape[2:]), Transforms.ToTensor() ]) cam = resize(torch.stack([ cam.cpu() ]))[0] # torch resizes only 3D or moreD tensors, not 2D cam = cam - torch.min(cam) mask = cam / torch.max(cam) # creating a name to grad image ext = path.split('.')[-1] name = path.split(os.sep)[-1].split('.')[0] grad_name = f"{args['output']}{os.sep}{name}_{args['head']}_{'all'}.{ext}" orig_name = f"{args['output']}{os.sep}{name}.{ext}" # Saving results img = cv2.cvtColor(img[0].cpu().numpy().transpose(1, 2, 0), cv2.COLOR_RGB2BGR) show_cam_on_image(img, mask, grad_name) cv2.imwrite(orig_name, np.uint8(255 * img))
def smooth_bbox_losses(p, current_loss, model): utils.compute_loss(p, target, model) return 0
def main(args): # loading configurations with open(args.config) as f: config = yaml.safe_load(f)["configuration"] name = config["Name"] # Construct or load embeddings print("Initializing embeddings ...") vocab_size = config["embeddings"]["vocab_size"] embed_size = config["embeddings"]["embed_size"] per_num = config["embeddings"]["person_num"] per_embed_size = config["embeddings"]["person_embed_size"] embeddings = init_embeddings(vocab_size, embed_size, name=name) print("\tDone.") # Build the model and compute losses source_ids = tf.placeholder(tf.int32, [None, 40], name="source") target_ids = tf.placeholder(tf.int32, [None, 40], name="target") person_ids = tf.placeholder(tf.int32, [None], name="person_ids") lexicons_ids = tf.placeholder(tf.int32, [per_num, 1000], name="lexicons_ids") spectrogram = tf.placeholder(tf.float32, [None, 400, 200], name="audio") sequence_mask = tf.placeholder(tf.bool, [None, 40], name="mask") choice_qs = tf.placeholder(tf.float32, [None, 40], name="choice") emo_cat = tf.placeholder(tf.int32, [None], name="emotion_category") is_train = tf.placeholder(tf.bool) (enc_num_layers, enc_num_units, enc_cell_type, enc_bidir, dec_num_layers, dec_num_units, dec_cell_type, state_pass, num_emo, emo_cat_units, emo_int_units, infer_batch_size, beam_size, max_iter, attn_num_units, l2_regularize, word_config, spectrogram_config, lstm_int_num, batch_size, loss_weight) = get_PEC_config(config) print("Building model architecture ...") CE, loss, cla_loss, train_outs, infer_outputs, score = compute_loss( source_ids, target_ids, sequence_mask, choice_qs, embeddings, enc_num_layers, enc_num_units, enc_cell_type, enc_bidir, dec_num_layers, dec_num_units, dec_cell_type, state_pass, num_emo, emo_cat, emo_cat_units, emo_int_units, infer_batch_size, spectrogram, word_config, per_num, person_ids, per_embed_size, spectrogram_config, loss_weight, lstm_int_num, is_train, False, lexicons_ids, beam_size, max_iter, attn_num_units, l2_regularize, name) print("\tDone.") # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. (logdir, restore_from, learning_rate, gpu_fraction, max_checkpoints, train_steps, batch_size, print_every, checkpoint_every, s_filename, t_filename, q_filename, s_max_leng, t_max_leng, dev_s_filename, dev_t_filename, dev_q_filename, loss_fig, perp_fig, sp_filename, sp_max_leng, test_s_filename, test_t_filename, test_q_filename, test_output) = get_training_config(config, "training") # Set up session gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. var_list = tf.trainable_variables() g_list = tf.global_variables() bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name] bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name] var_list += bn_moving_vars saver = tf.train.Saver(var_list=tf.trainable_variables(), max_to_keep=max_checkpoints) try: saved_global_step = load(saver, sess, logdir) if saved_global_step is None: raise ValueError("Cannot find the checkpoint to restore from.") except Exception: print("Something went wrong while restoring checkpoint. ") raise # ##### Inference ##### # Load data print("Loading inference data ...") # id_0, id_1, id_2 preserved for SOS, EOS, constant zero padding embed_shift = 3 lexicons = load_lexicons() + embed_shift test_source_sentences_ids, test_source_person, test_source_data = loadfile( test_s_filename, is_dialog=True, is_source=True, max_length=s_max_leng) test_source_data += embed_shift test_target_sentences_ids, test_target_person, test_target_data, test_category_data = loadfile( test_t_filename, is_dialog=True, is_source=False, max_length=t_max_leng) test_target_data += embed_shift test_spectrogram_data = load_spectrogram(sp_filename, test_source_sentences_ids) test_choice_data = loadfile(test_q_filename, is_dialog=False, is_source=False, max_length=t_max_leng) test_choice_data[test_choice_data < 0] = 0 test_choice_data = test_choice_data.astype(np.float32) test_masks = (test_target_data >= embed_shift) test_masks = np.append(np.ones([len(test_masks), 1], dtype=bool), test_masks, axis=1) test_masks = test_masks[:, :-1] print("\tDone.") # test print("testing") if test_source_data is not None: CE_words = N_words = 0.0 for start in range(0, len(test_source_data), batch_size): test_feed_dict = { source_ids: test_source_data[start:start + batch_size], target_ids: test_target_data[start:start + batch_size], person_ids: test_target_person[start:start + batch_size], spectrogram: test_spectrogram_data[start:start + batch_size], choice_qs: test_choice_data[start:start + batch_size], emo_cat: test_category_data[start:start + batch_size], sequence_mask: test_masks[start:start + batch_size], lexicons_ids: lexicons, is_train: False, } CE_word, N_word = compute_test_perplexity( sess, CE, test_masks[start:start + batch_size], test_feed_dict) CE_words += CE_word N_words += N_word print("test_perp: {:.3f}".format(np.exp(CE_words / N_words))) infer_results = [] for start in range(0, len(test_source_data), infer_batch_size): # infer_result = sess.run(infer_outputs, # feed_dict={source_ids: test_source_data[start:start + infer_batch_size], # spectrogram: test_spectrogram_data[start:start + infer_batch_size], # person_ids: test_target_person[start:start + infer_batch_size], # emo_cat: test_category_data[start:start + infer_batch_size], # lexicons_ids: lexicons, # is_train: False, # }) # # infer_result = infer_result.ids[:, :, 0] # if infer_result.shape[1] < max_iter: # l_pad = max_iter - infer_result.shape[1] # infer_result = np.concatenate((infer_result, np.ones((infer_batch_size, l_pad))), axis=1) # else: # infer_result = infer_result[:, :max_iter] tmp_result = [] scores = [] for i in range(num_emo): cat = i * np.ones( [len(test_target_person[start:start + infer_batch_size])]) infer_result, sco = sess.run( [infer_outputs, score], feed_dict={ source_ids: test_source_data[start:start + infer_batch_size], spectrogram: test_spectrogram_data[start:start + infer_batch_size], #spectrogram: np.zeros([len(test_source_data[start:start + infer_batch_size]), 400, 200]), person_ids: test_target_person[start:start + infer_batch_size], emo_cat: cat, lexicons_ids: lexicons, is_train: False, }) infer_result = infer_result.ids[:, :, 0] if infer_result.shape[1] < max_iter: l_pad = max_iter - infer_result.shape[1] infer_result = np.concatenate( (infer_result, np.ones((infer_batch_size, l_pad))), axis=1) else: infer_result = infer_result[:, :max_iter] tmp_result.append(infer_result) scores.append(sco) tmp_result = np.transpose(np.array(tmp_result), [1, 0, 2]) scores = np.array(scores) scores = np.exp(scores) / np.sum(np.exp(scores), axis=0) scores = np.transpose(np.array(scores), [1, 0]) scores[range(infer_batch_size), test_category_data[start:start + infer_batch_size]] += 1 ind = np.argmax(scores, axis=-1) infer_results.extend( tmp_result[range(tmp_result.shape[0]), test_category_data[start:start + infer_batch_size]]) #infer_results.extend(infer_result) final_result = np.array(infer_results) - embed_shift final_result[final_result >= vocab_size] -= (vocab_size + embed_shift) final_result = id2_word(final_result.astype(int).tolist()) with open(os.path.join(test_output, "PEC_out_emo.tsv"), "w") as f: f.writelines('\n'.join([ "0\t0\t" + str(emo) + "\t" + ' '.join(sen) for emo, sen in zip(test_category_data, final_result) ]) + '\n') with open(os.path.join(test_output, "PEC_out_per.tsv"), "w") as f: f.writelines('\n'.join([ "0\t0\t" + str(per) + "\t" + ' '.join(sen) for per, sen in zip(test_target_person, final_result) ]) + '\n')
def fine_tune(prune_cfg, data, aux_util, device, train_loader, test_loader, epochs=10): with open(progress_result, 'a') as f: f.write(('\n' + '%10s' * 10 + '\n') % ('Stage', 'Epoch', 'DIoU', 'obj', 'cls', 'Total', 'P', 'R', '[email protected]', 'F1')) batch_size = train_loader.batch_size img_size = train_loader.dataset.img_size accumulate = 64 // batch_size hook_util = HookUtils() pruned_model = Darknet(prune_cfg, img_size=(img_size, img_size)).to(device) chkpt = torch.load(progress_chkpt, map_location=device) pruned_model.load_state_dict(chkpt['model'], strict=True) current_layer = chkpt['current_layer'] aux_in_layer = aux_util.conv_layer_dict[current_layer] aux_model = aux_util.creat_aux_model(aux_in_layer) aux_model.to(device) aux_model.load_state_dict(chkpt['aux_in{}'.format(aux_in_layer)], strict=True) aux_loss_scalar = max(0.01, pow((int(aux_in_layer) + 1) / 75, 2)) start_epoch = chkpt['epoch'] + 1 if start_epoch == epochs: return current_layer # fine tune 完毕,返回需要修剪的层名 pg0, pg1 = [], [] # optimizer parameter groups for k, v in dict(pruned_model.named_parameters()).items(): if 'MaskConv2d.weight' in k: pg1 += [v] # parameter group 1 (apply weight_decay) else: pg0 += [v] # parameter group 0 for v in aux_model.parameters(): pg0 += [v] # parameter group 0 optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({ 'params': pg1, 'weight_decay': hyp['weight_decay'] }) # add pg1 with weight_decay del pg0, pg1 if chkpt['optimizer'] is not None: optimizer.load_state_dict(chkpt['optimizer']) del chkpt scheduler = lr_scheduler.MultiStepLR( optimizer, milestones=[epochs // 3, 2 * (epochs // 3)], gamma=0.1) scheduler.last_epoch = start_epoch - 1 if device.type != 'cpu' and torch.cuda.device_count() > 1: pruned_model = nn.parallel.DistributedDataParallel( pruned_model, find_unused_parameters=True) pruned_model.yolo_layers = pruned_model.module.yolo_layers # -------------start train------------- nb = len(train_loader) pruned_model.nc = 80 pruned_model.hyp = hyp pruned_model.arc = 'default' for epoch in range(start_epoch, epochs): # -------------register hook for model------------- for name, child in pruned_model.module.module_list.named_children(): if name == aux_in_layer: handle = child.register_forward_hook( hook_util.hook_prune_output) # -------------register hook for model------------- pruned_model.train() aux_model.train() print(('\n' + '%10s' * 7) % ('Stage', 'Epoch', 'gpu_mem', 'DIoU', 'obj', 'cls', 'total')) # -------------start batch------------- mloss = torch.zeros(4).to(device) pbar = tqdm(enumerate(train_loader), total=nb) for i, (img, targets, _, _) in pbar: if len(targets) == 0: continue ni = nb * epoch + i img = img.to(device).float() / 255.0 targets = targets.to(device) pruned_pred = pruned_model(img) pruned_loss, pruned_loss_items = compute_loss( pruned_pred, targets, pruned_model) pruned_loss *= batch_size / 64 hook_util.cat_to_gpu0() aux_pred = aux_model(hook_util.prune_features['gpu0'][0], targets) aux_loss = compute_loss_for_DCP(aux_pred, targets) aux_loss *= aux_loss_scalar * batch_size / 64 loss = pruned_loss + aux_loss loss.backward() hook_util.clean_hook_out() if ni % accumulate == 0: optimizer.step() optimizer.zero_grad() pruned_loss_items[2] += aux_loss.item() mloss = (mloss * i + pruned_loss_items) / (i + 1) mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available( ) else 0 s = ('%10s' * 3 + '%10.3g' * 4) % ('FiTune ' + current_layer, '%g/%g' % (epoch, epochs - 1), '%.3gG' % mem, *mloss) pbar.set_description(s) # -------------end batch------------- scheduler.step() handle.remove() results, _ = test.test(prune_cfg, data, batch_size=batch_size * 2, img_size=416, model=pruned_model, conf_thres=0.1, iou_thres=0.5, save_json=False, dataloader=test_loader) """ chkpt = {'current_layer': 'epoch': 'model': 'optimizer': 'aux_in12': 'aux_in37': 'aux_in62': 'aux_in75': 'prune_guide':} """ chkpt = torch.load(progress_chkpt, map_location=device) chkpt['current_layer'] = current_layer chkpt['epoch'] = epoch chkpt['model'] = pruned_model.module.state_dict() if type( pruned_model ) is nn.parallel.DistributedDataParallel else pruned_model.state_dict( ) chkpt[ 'optimizer'] = None if epoch == epochs - 1 else optimizer.state_dict( ) chkpt['aux_in{}'.format(aux_in_layer)] = aux_model.state_dict() torch.save(chkpt, progress_chkpt) torch.save(chkpt, last) if epoch == epochs - 1: torch.save(chkpt, '../weights/DCP/backup{}.pt'.format(current_layer)) del chkpt with open(progress_result, 'a') as f: f.write(('%10s' * 2 + '%10.3g' * 8) % ('FiTune ' + current_layer, '%g/%g' % (epoch, epochs - 1), *mloss, *results[:4]) + '\n') # -------------end train------------- torch.cuda.empty_cache() return current_layer
def train(): img_size, img_size_test = opt.img_size if len( opt.img_size) == 2 else opt.img_size * 2 # train, test sizes epochs = opt.epochs batch_size = opt.batch_size accumulate = opt.accumulate # effective bs = batch_size * accumulate = 16 * 4 = 64 weights = opt.weights # initial training weights # remove previous results for f in glob.glob('*_batch*.png') + glob.glob(results_file): os.remove(f) # init model model = UltraNet().to(device) model.apply(weights_init_normal) # optimizer optimizer = torch.optim.Adam(model.parameters()) # cosine lr lf = lambda x: (1 + math.cos(x * math.pi / epochs) ) / 2 * 0.99 + 0.01 # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(epochs * x) for x in [0.8, 0.9]], gamma=0.1) scheduler.last_epoch = 0 root = "/share/DAC2020/dataset/" dataset = DACDataset(root, "train", BaseTransform(320, 160)) # Dataloader batch_size = min(batch_size, len(dataset)) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers dataloader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, num_workers=nw, shuffle=not opt. rect, # Shuffle=True unless rectangular training is used pin_memory=True, #collate_fn=dataset.collate_fn ) # Testloader testloader = torch.utils.data.DataLoader( DACDataset(root, "test", BaseTransform(320, 160)), batch_size=batch_size * 2, num_workers=nw, pin_memory=True, #collate_fn=dataset.collate_fn ) nc = 13 model.nc = nc # attach number of classes to model model.arc = opt.arc # attach yolo architecture model.hyp = hyp # attach hyperparameters to model #model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights model.class_weights = torch.ones(13) / 13 maps = np.zeros(nc) # mAP per class for epoch in range(opt.epochs): model.train() start_time = time.time() train_loss = 0 pbar = tqdm(enumerate(dataloader), total=len(dataloader)) # progress bar for batch_i, (_, imgs, targets) in pbar: batches_done = len(dataloader) * epoch + batch_i imgs = Variable(imgs.to(device)) targets = Variable(targets.to(device), requires_grad=False) # multi-scale is not used here # forward pred = model(imgs) # compute loss loss, loss_items = compute_loss(pred, targets, model) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss_items) return results loss.backward() loss = loss * batch_size / 64 train_loss = (train_loss * batch_i + loss.item()) / (batch_i + 1) # optimize every accumulate if batch_done % accumulate == 0: optimizer.step() optimizer.zero_grad() s = ('%10s' + '%10.3g' * 3) % ('%g/%g' % (epoch, epochs - 1), train_loss, len(targets), img_size) pbar.set_description(s) # end one epoch scheduler.step() # process data of current epoch final_epoch = (epoch + 1 == epochs) results = test.test( batch_size=batch_size * 2, img_size=img_size_test, model=model, conf_thres= 0.001, # 0.001 if opt.evolve or (final_epoch and is_coco) else 0.01, iou_thres=0.6, save_json=final_epoch and is_coco, single_cls=opt.single_cls, dataloader=testloader) # Write epoch results with open(results_file, 'a') as f: f.write(s + '%10.3g' * len(results) % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls) x = list(train_loss) + list(results) titles = ['Train loss', 'iou', 'Test_loss', 'Giou loss', 'obj loss'] for xi, title in zip(x, titles): tb_writer.add_scalar(title, xi, epoch) # Save training results save = (not opt.nosave) or (final_epoch) if save: with open(results_file, 'r') as f: # Create checkpoint chkpt = { 'epoch': epoch, # 'best_fitness': best_fitness, 'training_results': f.read(), 'model': model.module.state_dict() if type(model) is nn.parallel.DistributedDataParallel else model.state_dict(), 'optimizer': None if final_epoch else optimizer.state_dict() } # Save last checkpoint torch.save(chkpt, last) # Delete checkpoint del chkpt # end training torch.cuda.empty_cache() return results
def greedy_channel_select(origin_model, prune_cfg, origin_weights, select_layer, device, aux_util, data_loader, pruned_rate): init_state_dict = mask_converted(prune_cfg, origin_weights, target=None) prune_model = Darknet(prune_cfg).to(device) prune_model.load_state_dict(init_state_dict, strict=True) del init_state_dict solve_sub_problem_optimizer = optim.SGD( prune_model.module_list[int(select_layer)].MaskConv2d.parameters(), lr=hyp['lr0'], momentum=hyp['momentum']) hook_util = HookUtils() handles = [] info = aux_util.layer_info[int(select_layer)] in_channels = info['in_channels'] remove_k = math.floor(in_channels * pruned_rate) k = in_channels - remove_k for name, child in origin_model.module_list.named_children(): if name == select_layer: handles.append( child.BatchNorm2d.register_forward_hook( hook_util.hook_origin_input)) aux_idx = aux_util.conv_layer_dict[select_layer] hook_layer_aux = aux_util.down_sample_layer[aux_idx] for name, child in prune_model.module_list.named_children(): if name == select_layer: handles.append( child.BatchNorm2d.register_forward_hook( hook_util.hook_prune_input)) elif name == hook_layer_aux: handles.append( child.register_forward_hook(hook_util.hook_prune_input)) aux_net = aux_util.creat_aux_list(416, device, conv_layer_name=select_layer) chkpt_aux = torch.load(aux_weight, map_location=device) aux_net.load_state_dict(chkpt_aux['aux{}'.format(aux_idx)]) del chkpt_aux if device.type != 'cpu' and torch.cuda.device_count() > 1: prune_model = torch.nn.parallel.DistributedDataParallel( prune_model, find_unused_parameters=True) prune_model.yolo_layers = prune_model.module.yolo_layers aux_net = torch.nn.parallel.DistributedDataParallel( aux_net, find_unused_parameters=True) nb = len(data_loader) prune_model.nc = 80 prune_model.hyp = hyp prune_model.arc = 'default' prune_model.eval() aux_net.eval() MSE = nn.MSELoss(reduction='mean') greedy = torch.zeros(k) for i_k in range(k): pbar = tqdm(enumerate(data_loader), total=nb) print(('\n' + '%10s' * 8) % ('Stage', 'gpu_mem', 'iter', 'MSELoss', 'PdLoss', 'AuxLoss', 'Total', 'targets')) for i, (imgs, targets, _, _) in pbar: if len(targets) == 0: continue imgs = imgs.to(device).float( ) / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 targets = targets.to(device) with torch.no_grad(): _ = origin_model(imgs) _, pruning_pred = prune_model(imgs) pruning_loss, _ = compute_loss(pruning_pred, targets, prune_model) hook_util.cat_to_gpu0('prune') aux_pred = aux_net(hook_util.prune_features['gpu0'][1]) aux_loss, _ = AuxNetUtils.compute_loss_for_aux( aux_pred, aux_net, targets) mse_loss = torch.zeros(1).to(device) mse_loss += MSE(hook_util.prune_features['gpu0'][0], hook_util.origin_features['gpu0'][0]) loss = hyp['joint_loss'] * mse_loss + pruning_loss + aux_loss loss.backward() mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available( ) else 0 s = ('%10s' * 3 + '%10.3g' * 5) % ( 'Pruning ' + select_layer, '%.3gG' % mem, '%g/%g' % (i_k, k), mse_loss, pruning_loss, aux_loss, loss, len(targets)) pbar.set_description(s) hook_util.clean_hook_out('origin') hook_util.clean_hook_out('prune') grad = prune_model.module.module_list[int( select_layer)].MaskConv2d.weight.grad.detach().clone()**2 grad = grad.sum((2, 3)).sqrt().sum(0) if i_k == 0: prune_model.module.module_list[int( select_layer)].MaskConv2d.selected_channels_mask[:] = 1e-5 _, non_greedy_indices = torch.topk(grad, k) logger.info('non greedy layer{}: selected==>{}'.format( select_layer, str(non_greedy_indices))) selected_channels_mask = prune_model.module.module_list[int( select_layer)].MaskConv2d.selected_channels_mask _, indices = torch.topk(grad * (1 - selected_channels_mask), 1) prune_model.module.module_list[int( select_layer)].MaskConv2d.selected_channels_mask[indices] = 1 greedy[i_k] = indices logger.info('greedy layer{} iter{}: indices==>{}'.format( select_layer, str(i_k), str(indices))) prune_model.zero_grad() pbar = tqdm(enumerate(data_loader), total=nb) mloss = torch.zeros(4).to(device) print(('\n' + '%10s' * 8) % ('Stage', 'gpu_mem', 'iter', 'MSELoss', 'PdLoss', 'AuxLoss', 'Total', 'targets')) for i, (imgs, targets, _, _) in pbar: if len(targets) == 0: continue imgs = imgs.to(device).float( ) / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 targets = targets.to(device) with torch.no_grad(): _ = origin_model(imgs) _, pruning_pred = prune_model(imgs) pruning_loss, _ = compute_loss(pruning_pred, targets, prune_model) hook_util.cat_to_gpu0('prune') aux_pred = aux_net(hook_util.prune_features['gpu0'][1]) aux_loss, _ = AuxNetUtils.compute_loss_for_aux( aux_pred, aux_net, targets) mse_loss = torch.zeros(1).to(device) mse_loss += MSE(hook_util.prune_features['gpu0'][0], hook_util.origin_features['gpu0'][0]) loss = hyp['joint_loss'] * mse_loss + pruning_loss + aux_loss loss.backward() solve_sub_problem_optimizer.step() solve_sub_problem_optimizer.zero_grad() mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available( ) else 0 mloss = (mloss * i + torch.cat( [mse_loss, pruning_loss, aux_loss, loss]).detach()) / (i + 1) s = ('%10s' * 3 + '%10.3g' * 5) % ('SubProm ' + select_layer, '%.3gG' % mem, '%g/%g' % (i_k, k), *mloss, len(targets)) pbar.set_description(s) hook_util.clean_hook_out('origin') hook_util.clean_hook_out('prune') for handle in handles: handle.remove() logger.info( ("greedy layer{}: selected==>{}".format(select_layer, str(greedy))))
def main(args): # loading configurations with open(args.config) as f: config = yaml.safe_load(f)["configuration"] name = config["Name"] # Construct or load embeddings print("Initializing embeddings ...") vocab_size = config["embeddings"]["vocab_size"] embed_size = config["embeddings"]["embed_size"] per_num = config["embeddings"]["person_num"] per_embed_size = config["embeddings"]["person_embed_size"] ori_emb, ori_p_emb = load_embedding("model/emb.tsv") embeddings = init_embeddings(vocab_size, embed_size, initial_values=ori_emb, name=name) print("\tDone.") # Build the model and compute losses source_ids = tf.placeholder(tf.int32, [None, 40], name="source") target_ids = tf.placeholder(tf.int32, [None, 40], name="target") person_ids = tf.placeholder(tf.int32, [None], name="person_ids") lexicons_ids = tf.placeholder(tf.int32, [per_num, 1000], name="lexicons_ids") spectrogram = tf.placeholder(tf.float32, [None, 400, 200], name="audio") sequence_mask = tf.placeholder(tf.bool, [None, 40], name="mask") choice_qs = tf.placeholder(tf.float32, [None, 40], name="choice") emo_cat = tf.placeholder(tf.int32, [None], name="emotion_category") is_train = tf.placeholder(tf.bool) (enc_num_layers, enc_num_units, enc_cell_type, enc_bidir, dec_num_layers, dec_num_units, dec_cell_type, state_pass, num_emo, emo_cat_units, emo_int_units, infer_batch_size, beam_size, max_iter, attn_num_units, l2_regularize, word_config, spectrogram_config, lstm_int_num, batch_size, loss_weight) = get_PEC_config(config) print("Building model architecture ...") CE, loss, cla_loss, train_outs, infer_outputs, score = compute_loss( source_ids, target_ids, sequence_mask, choice_qs, embeddings, enc_num_layers, enc_num_units, enc_cell_type, enc_bidir, dec_num_layers, dec_num_units, dec_cell_type, state_pass, num_emo, emo_cat, emo_cat_units, emo_int_units, infer_batch_size, spectrogram, word_config, per_num, person_ids, per_embed_size, spectrogram_config, loss_weight, lstm_int_num, is_train, False, lexicons_ids, beam_size, max_iter, attn_num_units, l2_regularize, name) print("\tDone.") # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. (logdir, restore_from, learning_rate, gpu_fraction, max_checkpoints, train_steps, batch_size, print_every, checkpoint_every, s_filename, t_filename, q_filename, s_max_leng, t_max_leng, dev_s_filename, dev_t_filename, dev_q_filename, loss_fig, perp_fig, sp_filename, sp_max_leng, test_s_filename, test_t_filename, test_q_filename, test_output) = get_training_config(config, "training") is_overwritten_training = logdir != restore_from optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=1e-4) trainable = tf.trainable_variables() gradients = tf.gradients(loss, trainable) clipped_gradients, gradient_norm = tf.clip_by_global_norm(gradients, 5.0) optim = optimizer.apply_gradients(zip(clipped_gradients, trainable)) # optim = optimizer.minimize(loss, var_list=trainable) # Set up session gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. var_list = tf.trainable_variables() g_list = tf.global_variables() bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name] bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name] var_list += bn_moving_vars saver = tf.train.Saver(var_list=tf.trainable_variables(), max_to_keep=max_checkpoints) # BN extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) try: saved_global_step = load(saver, sess, restore_from) if is_overwritten_training or saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 except Exception: print("Something went wrong while restoring checkpoint. " "Training is terminated to avoid the overwriting.") raise # ##### Training ##### # Load data print("Loading data ...") # id_0, id_1, id_2 preserved for SOS, EOS, constant zero padding embed_shift = 3 lexicons = load_lexicons() + embed_shift source_sentences_ids, source_person, source_data = loadfile( s_filename, is_dialog=True, is_source=True, max_length=s_max_leng) source_data += embed_shift target_sentences_ids, target_person, target_data, category_data = loadfile( t_filename, is_dialog=True, is_source=False, max_length=t_max_leng) target_data += embed_shift spectrogram_data = load_spectrogram(sp_filename, source_sentences_ids) choice_data = loadfile(q_filename, is_dialog=False, is_source=False, max_length=t_max_leng) choice_data = choice_data.astype(np.float32) masks = (target_data >= embed_shift) masks = np.append(np.ones([len(masks), 1], dtype=bool), masks, axis=1) masks = masks[:, :-1] n_data = len(source_data) dev_source_data = None if dev_s_filename is not None: dev_source_sentences_ids, dev_source_person, dev_source_data = loadfile( dev_s_filename, is_dialog=True, is_source=True, max_length=s_max_leng) dev_source_data += embed_shift dev_target_sentences_ids, dev_target_person, dev_target_data, dev_category_data = loadfile( dev_t_filename, is_dialog=True, is_source=False, max_length=t_max_leng) dev_target_data += embed_shift dev_spectrogram_data = load_spectrogram(sp_filename, dev_source_sentences_ids) dev_choice_data = loadfile(dev_q_filename, is_dialog=False, is_source=False, max_length=t_max_leng) dev_choice_data[dev_choice_data < 0] = 0 dev_choice_data = dev_choice_data.astype(np.float32) dev_masks = (dev_target_data >= embed_shift) dev_masks = np.append(np.ones([len(dev_masks), 1], dtype=bool), dev_masks, axis=1) dev_masks = dev_masks[:, :-1] print("\tDone.") # Training last_saved_step = saved_global_step num_steps = saved_global_step + train_steps losses = [] cla_losses = [] steps = [] perps = [] dev_perps = [] print("Start training ...") try: step = last_saved_step for step in range(saved_global_step + 1, num_steps): start_time = time.time() rand_indexes = np.random.choice(n_data, batch_size) source_batch = source_data[rand_indexes] target_batch = target_data[rand_indexes] person_batch = target_person[rand_indexes] spectrogram_batch = spectrogram_data[rand_indexes] mask_batch = masks[rand_indexes] choice_batch = choice_data[rand_indexes] emotions = category_data[rand_indexes] feed_dict = { source_ids: source_batch, target_ids: target_batch, person_ids: person_batch, spectrogram: spectrogram_batch, sequence_mask: mask_batch, choice_qs: choice_batch, emo_cat: emotions, lexicons_ids: lexicons, is_train: True, } loss_value, cla_value, _, __ = sess.run( [loss, cla_loss, optim, extra_update_ops], feed_dict=feed_dict) losses.append(loss_value) cla_losses.append(cla_value) duration = time.time() - start_time if step % print_every == 0: # train perplexity t_perp = compute_perplexity(sess, CE, mask_batch, feed_dict) perps.append(t_perp) # dev perplexity dev_str = "" if dev_source_data is not None: CE_words = N_words = 0.0 for start in range(0, len(dev_source_data), batch_size): dev_feed_dict = { source_ids: dev_source_data[start:start + batch_size], target_ids: dev_target_data[start:start + batch_size], person_ids: dev_target_person[start:start + batch_size], spectrogram: dev_spectrogram_data[start:start + batch_size], choice_qs: dev_choice_data[start:start + batch_size], emo_cat: dev_category_data[start:start + batch_size], sequence_mask: dev_masks[start:start + batch_size], lexicons_ids: lexicons, is_train: False, } CE_word, N_word = compute_test_perplexity( sess, CE, dev_masks[start:start + batch_size], dev_feed_dict) CE_words += CE_word N_words += N_word dev_str = "dev_prep: {:.3f}, ".format( np.exp(CE_words / N_words)) dev_perps.append(np.exp(CE_words / N_words)) steps.append(step) info = 'step {:d}, loss = {:.6f}, cla_loss = {:.6f} ' info += 'perp: {:.3f}, {}({:.3f} sec/step)' print( info.format(step, loss_value, cla_value, t_perp, dev_str, duration)) if step % checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step except KeyboardInterrupt: # Introduce a line break after ^C so save message is on its own line. print() finally: if step > last_saved_step: save(saver, sess, logdir, step)