def __init__(self, model, criterion, optimizer, args, device, data_loader, valid_data_loader=None, lr_scheduler=None, train_logger=None, batch_logger=None, valid_logger=None): self.data_loader = data_loader self.model = model self.criterion = criterion self.optimizer = optimizer self.train_logger = train_logger self.batch_logger = batch_logger self.valid_logger = valid_logger self.args = args self.resume = self.args.resume self.resume_path = self.args.resume_path self.device = device self.valid_data_loader = valid_data_loader self.do_validation = self.valid_data_loader is not None self.lr_scheduler = lr_scheduler self.log_step = int(np.sqrt(data_loader.batch_size)) self.start_epoch = args.start_epoch self.epochs = args.n_epochs self.visualizer = Visualizer(self.data_loader.classes, self.data_loader.rgb_mean, self.data_loader.rgb_std, self.args)
def test_output_visualize(self): vs = Visualizer("asd") DATA_DIR='/home/apex/chendixi/Experiment/data/CityScapes' dataset = CityscapesDataset(DATA_DIR,split='train') dataloader = DataLoader(dataset, batch_size=2, shuffle=False) it = iter(dataloader) images,masks = it.next() #masks 并没有被除以255 print(images.size()) #torch.Size([2, 3, 513, 513]) vs.visualize_predict('result', images,0)
def __init__(self, model, loss, metrics, resume, config, data_loader, toolbox: Toolbox, valid_data_loader=None, train_logger=None): super(Trainer, self).__init__(model, loss, metrics, resume, config, train_logger) self.config = config self.batch_size = data_loader.batch_size self.data_loader = data_loader self.valid_data_loader = valid_data_loader self.valid = True if self.valid_data_loader is not None else False self.log_step = int(np.sqrt(self.batch_size)) self.toolbox = toolbox self.visdom = Visualizer(env='FOTS')
def __init__(self, head_detector): super(Trainer, self).__init__() self.head_detector = head_detector self.optimizer = self.head_detector.get_optimizer() self.anchor_target_layer = AnchorTargetLayer() self.loss_tuple = namedtuple('LossTuple', ['rpn_regr_loss', 'rpn_cls_loss', 'total_loss']) self.vis = Visualizer(env=cfg.VISDOM_ENV) self.rpn_cm = ConfusionMeter(2) # confusion matrix with 2 classes self.meters = {k: AverageValueMeter() for k in self.loss_tuple._fields} # average loss
def train(**kwargs): opt._parse(kwargs) vis = Visualizer(opt.env, port=opt.vis_port) model = models.WaveNet(opt.input_size, opt.out_size, opt.residual_size, opt.skip_size, opt.dilation_cycles, opt.dilation_depth) if opt.load_model_path: model.load(opt.load_model_path) device = torch.device('cuda') if opt.use_gpu else torch.device('cpu') model.to(device) data_utility = Data_utility(opt.train_data_root, opt.WINDOW_SIZE) scaler = data_utility.get_scaler() joblib.dump(scaler, 'scaler.pkl') X, Y = data_utility.get_data() criterion = nn.MSELoss() lr = opt.lr optimizer = model.get_optimizer(lr, opt.weight_decay) loss_meter = meter.AverageValueMeter() previous_loss = 1e10 for epoch in range(opt.max_epoch): loss_meter.reset() for i, (data, label) in tqdm( enumerate(data_utility.get_batches(X, Y, opt.batch_size))): inputs = data.to(device) targets = label.to(device) optimizer.zero_grad() preds = model(inputs) preds = preds.squeeze(2) loss = criterion(preds, targets) loss.backward() optimizer.step() loss_meter.add(loss.item()) if (i + 1) % opt.print_freq == 0: vis.plot('loss', loss_meter.value()[0]) save_name = 'models/checkpoints/' + opt.model + str(epoch) + '.pth' model.save(save_name) if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def __init__(self, cfg, classes): super().__init__() self.classes = classes self.summary_writer = SummaryWriter('logs') self.visualizer = Visualizer(classes, cfg.tensorboard.score_threshold, cfg.normalize.mean, cfg.normalize.std, font_size=cfg.tensorboard.font_size, alpha=cfg.tensorboard.alpha) self.num_visualizations = cfg.tensorboard.num_visualizations self.log_callback = None self.__num_logged_images = 0
def train(**kwargs): opt.parse(kwargs) vis = Visualizer(opt.env) #step1: config model model = getattr(Nets,opt.model)() if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) #step2: data train_data = imageSentiment(opt.train_path,train = True) #训练集 val_data = imageSentiment(opt.train_path,train = False) #验证集 train_dataloader = DataLoader(train_data,batch_size = opt.batch_size,shuffle=True,num_workers = opt.num_workers) val_dataloader = DataLoader(val_data,batch_size = opt.batch_size,shuffle=False,num_workers = opt.num_workers) #step3: 定义损失函数及优化器 # criterion = nn.CrossEntropyLoss() #交叉熵损失函数 如果使用该损失函数 则网络最后无需使用softmax函数 lr = opt.lr # optimizer = Optim.Adam(model.parameters(),lr = lr,weight_decay= opt.weight_decay) optimizer = Optim.SGD(model.parameters(),lr = 0.001,momentum=0.9,nesterov=True) #step4: 统计指标(计算平均损失以及混淆矩阵) loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(7) previous_loss = 1e100 #训练 for i in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() total_loss = 0. for ii,(label,data) in tqdm(enumerate(train_dataloader),total=len(train_dataloader)): if opt.use_gpu: label,data = label.to(device),data.to(device) optimizer.zero_grad() score = model(data) # ps:使用nll_loss和crossentropyloss进行多分类时 target为索引标签即可 无需转为one-hot loss = F.nll_loss(score,label) total_loss += loss.item() loss.backward() optimizer.step() #更新统计指标以及可视化 loss_meter.add(loss.item()) confusion_matrix.add(score.data,label.data) if ii%opt.print_freq==opt.print_freq-1: vis.plot('loss',loss_meter.value()[0]) vis.plot('mach avgloss', total_loss/len(train_dataloader)) model.save() #计算验证集上的指标 val_accuracy = val(model,val_dataloader) vis.plot('val_accuracy',val_accuracy)
def train(**kwargs): opt._parse(kwargs) vis = Visualizer(opt.env,port = opt.vis_port) # step1: configure model model = resnet101(pretrained=False) pthfile = './resnet101.pth' model.load_state_dict(t.load(pthfile)) # for param in model.parameters(): # param.requires_grad = True # for param in model.fc.parameters(): # param.requires_grad = True fc_feat_num = model.fc.in_features model.fc = t.nn.Linear(fc_feat_num, 3) # model.train() model.cuda() # step2: data data_dir='/home/apollo/ali-tianchi/dataset/train_dataset' train_loader,val_loader=Ali_loader(dataset_dir=data_dir,batch_size=opt.batch_size,num_workers=opt.num_workers,use_gpu=opt.use_gpu) # step3: criterion and optimizer criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer=t.optim.SGD( model.parameters(), lr=lr, momentum=0.9, weight_decay=opt.weight_decay)
def test(**kwargs): for k_, v_ in kwargs.items(): setattr(opt, k_, v_) if opt.vis: vis = Visualizer(opt.env) test_data = Val_MoireData(opt.test_path) test_dataloader = DataLoader(test_data, batch_size=opt.test_batch_size, shuffle=False, num_workers=opt.num_workers, drop_last=False) model = get_model("HRDN") prefix = "{0}{1}/".format(opt.save_prefix, "HRDN") model.eval() torch.cuda.empty_cache() # criterion_c = L1_Charbonnier_loss() # loss_meter = meter.AverageValueMeter() psnr_meter = meter.AverageValueMeter() for ii, (moires, clears, labels) in tqdm(enumerate(test_dataloader)): moires = moires.to(opt.device) clears = clears.to(opt.device) output_list, _ = model(moires) outputs = output_list[0] moires = tensor2im(moires) outputs = tensor2im(outputs) clears = tensor2im(clears) psnr = colour.utilities.metric_psnr(outputs, clears) psnr_meter.add(psnr) bs = moires.shape[0] for jj in range(bs): output, clear = outputs[jj], clears[jj] label = labels[jj] img_path = "{0}{1}_output.png".format(prefix, label) save_single_image(output, img_path) if opt.vis and vis != None and (ii + 1) % 10 == 0: # 每10个iter画图一次 vis.log(">>>>>>>> batch_psnr:{psnr}<<<<<<<<<<".format(psnr=psnr)) torch.cuda.empty_cache() print("average psnr is {}".format(psnr_meter.value()[0]))
def pre_(model): test_data = data.datasets(opt).getData() N = test_data.__len__() if opt.num > N: warnings.warn('Warning: data is not long enough, data(%d) num(%d)' % (N, opt.num)) start_index = 0 all_input_data = None all_output_data = None all_target_data = None all_ts = None loss = [] index = start_index while index - start_index < opt.num: def datatype(test_data, index): d_t, ts = test_data[index] i = d_t[0].to(opt.device).unsqueeze(0).permute(1, 0, 2) t = d_t[1].to(opt.device).unsqueeze(0).permute(1, 0, 2) return i, t, ts input_data, target_data, ts = datatype(test_data, index) input_data, target_data = data_norm([input_data, target_data], isup=False) if 'VAR' in opt.model: output_data = model([input_data, target_data]) else: output_data = model(input_data) # print(target_data.shape,output_data.shape) temp_loss = t.nn.MSELoss()(target_data, output_data).item() loss.append(temp_loss) def tensor2numpy(i_, o_, t_): return i_.cpu().detach().numpy(), \ o_.cpu().detach().numpy(), \ t_.cpu().detach().numpy() input_data, target_data, output_data = data_norm( [input_data, target_data, output_data], isup=True) i_, o_, t_ = tensor2numpy(input_data, output_data, target_data) all_input_data = i_ if all_input_data is None else np.concatenate( [all_input_data, i_]) all_output_data = o_ if all_output_data is None else np.concatenate( [all_output_data, o_]) all_target_data = t_ if all_target_data is None else np.concatenate( [all_target_data, t_]) all_ts = ts if all_ts is None else (t.cat( (all_ts[0], ts[0]), dim=0), t.cat((all_ts[1], ts[1]), dim=0)) index += opt.future print(all_input_data.shape, all_output_data.shape, all_target_data.shape) all_ts = (all_ts[0].unsqueeze(0), all_ts[1].unsqueeze(0)) Visualizer().drawTest(([], all_output_data, all_target_data), all_ts, drawLot=True) evaluation(t.from_numpy(all_output_data), t.from_numpy(all_target_data), 0) return loss, None, None
def __init__(self, opt): self.vis = Visualizer(opt.env, port=opt.vis_port) if opt.vis else None self.model = getattr(models, opt.model)(**opt.model_kwargs) if opt.load_model_path: self.model.load_state_dict(t.load(opt.load_model_path)) self.model.to(opt.device) self.opt = opt
def train(): vis = Visualizer(env='svs') model = getattr(models, 'Unet')() model.train().cuda() train_data = Spg('F:/crop_test', train=True) val_data = Spg('F:/crop_test', train=False) train_dataloader = DataLoader(train_data, batch_size=4, drop_last=True) val_dataloader = DataLoader(val_data, batch_size=1, drop_last=True) loss_meter = meter.AverageValueMeter() lr = 0.001 lr_decay = 0.05 optimizer = t.optim.Adam(model.parameters(), lr=lr, weight_decay=lr_decay) previous_loss = 1e100 for epoch in range(5): loss_meter.reset() for ii, (data, label) in tqdm(enumerate(train_dataloader)): input1 = Variable(data).cuda() target = Variable(label).cuda() optimizer.zero_grad() scroe = model(input1) loss = MyLoss()(input1, scroe, target).cuda() loss.backward() optimizer.step() loss_meter.add(loss.data.item()) if ii % 20 == 19: vis.plot('loss', loss_meter.value().item()) prefix = 'G:/Unet_svs/check/' name = time.strftime(prefix + '%m%d_%H_%M_%S.pth') t.save(model.state_dict(), name) if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def visdom_init(config, suffix='', vis_clear=True): vis = None vis_interval = None if config.visdom: if config.vis_env == '': config.vis_env = config.dataset + '_' + config.arch + suffix if config.vis_legend == '': config.vis_legend = config.arch + suffix vis = Visualizer(config.vis_env, config.vis_legend, clear=vis_clear) vis_interval = config.vis_interval return vis, vis_interval
def __init__(self, data_loader, models=[], metrics=[], config=None, visualize=True, show_labels=True, overlay=True, show_flow=False, save_dir=None, log_files=[], cuda=False, interval=0.02): self.logger = logging.getLogger(self.__class__.__name__) self.data_loader = data_loader self.models = models self.metrics = metrics self.config = config self.cuda = cuda # Following attributes are only used when visualize=True. self.show_labels = show_labels self.show_flow = show_flow self.save_dir = save_dir self.eval_loggers = [ EvaluationLogger(log_file) for log_file in log_files ] self.interval = interval self.count = 0 self.visualizer = None if visualize: assert not (show_flow and show_labels) self.visualizer = Visualizer(models, show_flow=show_flow, show_labels=show_labels, overlay=overlay, window_size=(15, 7))
def __init__(self, e_model, batch_size=128, cat_info=True, vis=False, dataloader=False): self.batch_size = batch_size self.cat_info = cat_info self.model = e_model if dataloader: self.dataloader = dataloader else: self.transform = tv.transforms.Compose([ tv.transforms.Resize(224), tv.transforms.ToTensor(), tv.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) self.vis = vis if self.vis: self.viser = Visualizer('caffe2torch_test')
def __init__(self): opt = get_config() self.opt = opt self.device = opt.device self.train_loader, self.val_loader = get_loaders( opt.dataroot, opt.val_batch_size, opt.train_batch_size, opt.input_size, opt.workers, opt.num_nodes, opt.local_rank) print(len(self.train_loader)) self.epochs_per_step = int( len(self.train_loader.dataset) / opt.train_batch_size) if (opt.back_bone == "mobiv3"): from backbone.mobilev3.mobilenetV3 import mobilenetv3 self.model = mobilenetv3(input_size=opt.input_size, num_classes=2, small=False, get_weights=False).to(self.device) print("use mobiv3") elif (opt.back_bone == "efficient"): from backbone.efficient.efficientNet import EfficientNet self.model = EfficientNet.from_name( 'efficientnet-b0', image_size=opt.input_size, num_classes=opt.num_classes).to(self.device) print("use efficient") self.lr = opt.learning_rate if (opt.loss == "cross_entropy"): from utils.cross_entropy import CrossEntropyLoss self.criterion = CrossEntropyLoss().to(self.device) if (self.device == 'cuda'): cudnn.enabled = True print("cudnn enable") self.epochs = opt.epochs num_parameters = sum([l.nelement() for l in self.model.parameters()]) print("num parameters", num_parameters) self.optim, self.mixup = self.init_optimizer_and_mixup() self.vis = Visualizer()
class TensorboardLogger: def __init__(self, cfg, classes): super().__init__() self.classes = classes self.summary_writer = SummaryWriter('logs') self.visualizer = Visualizer(classes, cfg.tensorboard.score_threshold, cfg.normalize.mean, cfg.normalize.std, font_size=cfg.tensorboard.font_size, alpha=cfg.tensorboard.alpha) self.num_visualizations = cfg.tensorboard.num_visualizations self.log_callback = None self.__num_logged_images = 0 def log_detections(self, batch, detections, step, tag): if self.__num_logged_images >= self.num_visualizations: return images = batch["input"].detach().cpu().numpy() ids = batch["id"].detach().cpu().numpy() for i in range(images.shape[0]): result = self.visualizer.visualize_detections( images[i].transpose(1, 2, 0), detections['pred_boxes'][i], detections['pred_classes'][i], detections['pred_scores'][i], detections['gt_boxes'][i], detections['gt_classes'][i], detections['gt_kps'][i] if 'gt_kps' in detections else None, detections['pred_kps'][i] if 'pred_kps' in detections else None, ) self.summary_writer.add_image(f'{tag}/detection_{ids[i]}', result, step) self.__num_logged_images += 1 if self.__num_logged_images >= self.num_visualizations: break def log_stat(self, name, value, step): self.summary_writer.add_scalar(name, value, step) def log_image(self, name, image, step): self.summary_writer.add_image(name, image, step) def reset(self): self.__num_logged_images = 0
def modelTestSimple(): opt.batch_size = 1 model = getattr(models, opt.model)(opt=opt).eval() if opt.load_model_path: model.load(opt.load_model_path) model.to(opt.device) test_data = data.datasets(opt).getData() loss = [] for _ in range(opt.num): index = np.random.randint(0, len(test_data) - 1) def datatype(test_data, index): d_t, ts = test_data[index] i = d_t[0].to(opt.device).unsqueeze(0).permute(1, 0, 2) t = d_t[1].to(opt.device).unsqueeze(0).permute(1, 0, 2) ts = (ts[0].to(opt.device).unsqueeze(0), ts[1].to(opt.device).unsqueeze(0)) return i, t, ts input_data, target_data, ts = datatype(test_data, index) input_data, target_data = data_norm([input_data, target_data], isup=False) if 'VAR' in opt.model: output_data = model([input_data, target_data]) else: output_data = model(input_data) # i: T * batch(1) * multi; t: future * batch(1) * multi; o: future * batch(1) * multi temp_loss = t.nn.MSELoss()(target_data, output_data).item() print('temp_loss : ', temp_loss) def tensor2numpy(i_, o_, t_): return i_.cpu().detach().numpy(), \ o_.cpu().detach().numpy(), \ t_.cpu().detach().numpy() input_data, target_data, output_data = data_norm( [input_data, target_data, output_data], isup=True) Visualizer().drawTest( tensor2numpy(input_data, output_data, target_data), ts) print(temp_loss) loss.append(temp_loss) return loss
def LetsGo(kwargs, fun): if kwargs is not None: opt._parse(kwargs) if fun == train: opt.load_model_path = None opt.input_size = opt._input_kv[opt.data] opt.output_size = opt._output_kv[opt.data] opt.needLog = opt._needLog_kv[opt.data] if opt.model == 'VAR': traditional() return epoch_losses, iter_losses, path = fun() print('path : ', path) print('loss : \n', np.mean(epoch_losses)) if len(epoch_losses) > 1: Visualizer().drawEpochLoss(epoch_losses[3:]) print('min: ', min(epoch_losses)) print('\n', 'epoch_losses:\n', epoch_losses)
def test(): vis = Visualizer(env='svs') model = getattr(models, 'Unet')().eval() # model.cuda() model.load_state_dict( t.load('G:/Unet_svs/check/epoch_219__0724_16_57_35.pth')) mix_wav, _ = load("C:/Users/lenovo/Music/c.mp3", sr=8192) mix_wav_mag, mix_wav_phase = magphase( stft(mix_wav, n_fft=1024, hop_length=768)) START = 700 END = START + 128 mix_wav_mag = mix_wav_mag[:, START:END] mix_wav_phase = mix_wav_phase[:, START:END] print(mix_wav_mag.shape) gg = mix_wav_mag[1:] gg = t.from_numpy(gg) gg.unsqueeze_(0) gg.unsqueeze_(0) vis.img('a', gg) print(gg.shape) with t.no_grad(): gg = Variable(gg) score = model(gg) predict = gg.data * score.data print(predict.shape) target_pred_mag = predict.view(512, 128).cpu().numpy() target_pred_mag = np.vstack((np.zeros((128)), target_pred_mag)) vis.img('b', t.from_numpy(target_pred_mag)) print(target_pred_mag.shape) write_wav( f'C:/Users/lenovo/Music/pred_vocal.wav', istft( target_pred_mag * mix_wav_phase # (mix_wav_mag * target_pred_mag) * mix_wav_phase , win_length=1024, hop_length=768), 8192, norm=True) write_wav(f'C:/Users/lenovo/Music/pred_mix.wav', istft(mix_wav_mag * mix_wav_phase, win_length=1024, hop_length=768), 8192, norm=True)
import torch.distributed as dist import torch.optim import torch.multiprocessing as mp import torch.utils.data import torch.utils.data.distributed from utils.visualize import Visualizer from dataset.dataset import MultiBranch_Data from apex import amp from apex.parallel import DistributedDataParallel import warnings from warmup_scheduler import GradualWarmupScheduler #yours from models.sfcn import SFCN from dataset.brain_age_dataset import CombinedData vis = Visualizer(args.env_name) def reduce_mean(tensor, nprocs): rt = tensor.clone() dist.all_reduce(rt, op=dist.ReduceOp.SUM) rt /= nprocs return rt class data_prefetcher(): def __init__(self, loader): self.loader = iter(loader) self.stream = torch.cuda.Stream() self.preload()
@author: dragon """ import tqdm import torch as t from model.loss import l2_loss from utils.config import config from data.FlowData import FlowData from torch.autograd import Variable from model.networks import NetD, NetG, weights_init from torchnet.meter import AverageValueMeter from utils.visualize import Visualizer from model_tv.network import model from model_tv.train_options import arguments if __name__ == '__main__': vis = Visualizer('CUHK') args = arguments().parse() args.data_size = [ config.batch_size, config.iChanel, config.iWidth, config.iHeight ] datapair = FlowData('dataset/Avenue_Dataset/training_videos') dataloader = t.utils.data.DataLoader(datapair, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, drop_last=True) netg, netd = NetG(config), NetD(config) net_st_fusion = model(args).cuda() map_location = lambda storage, loc: storage if config.netd_path:
def train(**kwargs): opt._parse(kwargs) vis = Visualizer(opt.env, port=opt.vis_port) # step1: configure model model = getattr(models, opt.model)() if opt.load_model_path: model.load(opt.load_model_path) model.to(opt.device) # step2: data train_data = DogCat(opt.train_data_root, train=True) val_data = DogCat(opt.train_data_root, train=False) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step3: criterion and optimizer criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = model.get_optimizer(lr, opt.weight_decay) # optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99)) # step4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e10 # train for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() print("trian epoch: ", epoch) for ii, (data, label) in tqdm(enumerate(train_dataloader)): # train model input = data.to(opt.device) target = label.to(opt.device) optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() optimizer.step() # meters update and visualize loss_meter.add(loss.item()) # detach 一下更安全保险 confusion_matrix.add(score.detach(), target.detach()) if (ii + 1) % opt.print_freq == 0: vis.plot('loss', loss_meter.value()[0]) # # 进入debug模式 # if os.path.exists(opt.debug_file): # import ipdb; # ipdb.set_trace() model.save() # validate and visualize print("start eval:") val_cm, val_accuracy = val(model, val_dataloader) vis.plot('val_accuracy', val_accuracy) vis.log( "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}" .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr)) # update learning rate if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr vis.plot('lr', lr) previous_loss = loss_meter.value()[0]
def train(**kwargs): # load kwargs opt.parse(kwargs) print(kwargs) # visdom vis = Visualizer(opt.env) # vis log opt vis.log('user config:') for k, v in opt.__class__.__dict__.items(): if not k.startswith('__'): vis.log('{} {}'.format(k, getattr(opt, k))) # config model model = getattr(models, opt.model)() if opt.use_pretrained_model: model = load_pretrained() if opt.load_model_path: # load exist model model.load(opt.load_model_path) elif opt.use_weight_init: # we need init weight # model.apply(weight_init) # if use GPU if opt.use_gpu: model.cuda() # genearte_data train_data = Flower(train=True) val_data = Flower(train=False) test_data = Flower(test=True) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) test_dataloader = DataLoader(test_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # criterion and optimizer criterion = torch.nn.CrossEntropyLoss() lr = opt.lr if 'Dense' in opt.model: optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=opt.weight_decay) else: optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) # meters loss_meter = meter.AverageValueMeter() # 17 classes confusion_matrix = meter.ConfusionMeter(17) previous_loss = 1e100 # best_accuracy = 0 # start training for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for bactch_index, (data, label) in tqdm(enumerate(train_dataloader)): # train model input = Variable(data) target = Variable(label) # gpu update if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() optimizer.step() # update meter loss_meter.add(loss.data[0]) # print(score.data, target.data) # [batch_size, 17] [batch_size] confusion_matrix.add(score.data, target.data) # plot if bactch_index % opt.print_freq == opt.print_freq - 1: # cross_entropy print('loss ', loss_meter.value()[0]) # visualize loss vis.plot('loss', loss_meter.value()[0]) # save model for this epoch if opt.use_pretrained_model is False and epoch % opt.save_freq == 0: model.save() # validate val_cm, val_accuracy = val(model, val_dataloader) # test test_cm, test_accuracy = val(model, test_dataloader) # plot validation accuracy print('Epoch {}/{}: val_accuracy {}'.format(epoch, opt.max_epoch, val_accuracy)) # plot vis vis.plot('val_accuracy', val_accuracy) vis.plot('test_accuracy', test_accuracy) vis.log('epoch:{epoch}, lr:{lr}, loss:{loss}'.format( epoch=epoch, loss=loss_meter.value()[0], lr=lr)) # vis.log('epoch:{epoch}, lr:{lr}, loss:{loss}, train_cm:{train_cm}, val_cm:{val_cm}'.format( # epoch=epoch, loss=loss_meter.value()[0], val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr) # ) # update best validation model if val_accuracy > best_accuracy: best_accuracy = val_accuracy torch.save(model.state_dict(), './checkpoints/best_{}.pth'.format(opt.model)) if opt.use_pretrained_model is False: model.save('./checkpoints/best_{}.pth'.format( model.model_name)) # update learning rate for this epoch if float(loss_meter.value()[0]) > previous_loss: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0] print('Best model validation accuracy {}'.format(best_accuracy))
from tqdm import tqdm from data.dataset2 import CascadeData from data.dataset import Liver, Tumor from loss.DiceLoss import DiceLoss from loss.TverskyLoss import TverskyLoss from loss.LovaszLoss import lovasz_hinge from utils.visualize import Visualizer import models import pickle from config.configuration import DefaultConfig from torch import nn import warnings warnings.filterwarnings("ignore") opt = DefaultConfig() vis = Visualizer(opt.env) def initial_params(module): if isinstance(module, nn.Conv3d) or isinstance(module, nn.ConvTranspose3d): nn.init.kaiming_normal_(module.weight.data, 0.25) nn.init.constant(module.bias.data, 0) def train_liver_net(): # 第一步:加载模型(模型,预训练参数,GPU) # model = getattr(models, opt.model)(net_type="liver_seg") # 等价于 models.ResUNet() model = models.DilatedDenseUNet() # 等价于 models.ResUNet() # model = models.ResUNet() # 等价于 models.ResUNet() if opt.liver_model_path: print("current model path is: ", opt.liver_model_path)
def train(args): # gpu init multi_gpus = False if len(args.gpus.split(',')) > 1: multi_gpus = True os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # log init save_dir = os.path.join( args.save_dir, args.model_pre + args.backbone.upper() + '_' + datetime.now().strftime('%Y%m%d_%H%M%S')) if os.path.exists(save_dir): raise NameError('model dir exists!') os.makedirs(save_dir) logging = init_log(save_dir) _print = logging.info # dataset loader transform = transforms.Compose([ transforms.ToTensor(), # range [0, 255] -> [0.0,1.0] transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) # range [0.0, 1.0] -> [-1.0,1.0] ]) # validation dataset trainset = CASIAWebFace(args.train_root, args.train_file_list, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=8, drop_last=False) # test dataset lfwdataset = LFW(args.lfw_test_root, args.lfw_file_list, transform=transform) lfwloader = torch.utils.data.DataLoader(lfwdataset, batch_size=128, shuffle=False, num_workers=4, drop_last=False) agedbdataset = AgeDB30(args.agedb_test_root, args.agedb_file_list, transform=transform) agedbloader = torch.utils.data.DataLoader(agedbdataset, batch_size=128, shuffle=False, num_workers=4, drop_last=False) cfpfpdataset = CFP_FP(args.cfpfp_test_root, args.cfpfp_file_list, transform=transform) cfpfploader = torch.utils.data.DataLoader(cfpfpdataset, batch_size=128, shuffle=False, num_workers=4, drop_last=False) # define backbone and margin layer if args.backbone == 'MobileFace': net = MobileFaceNet() elif args.backbone == 'Res50': net = ResNet50() elif args.backbone == 'Res101': net = ResNet101() elif args.backbone == 'Res50_IR': net = SEResNet_IR(50, feature_dim=args.feature_dim, mode='ir') elif args.backbone == 'SERes50_IR': net = SEResNet_IR(50, feature_dim=args.feature_dim, mode='se_ir') elif args.backbone == 'SphereNet': net = SphereNet(num_layers=64, feature_dim=args.feature_dim) else: print(args.backbone, ' is not available!') if args.margin_type == 'ArcFace': margin = ArcMarginProduct(args.feature_dim, trainset.class_nums, s=args.scale_size) elif args.margin_type == 'CosFace': pass elif args.margin_type == 'SphereFace': pass elif args.margin_type == 'InnerProduct': margin = InnerProduct(args.feature_dim, trainset.class_nums) else: print(args.margin_type, 'is not available!') if args.resume: print('resume the model parameters from: ', args.net_path, args.margin_path) net.load_state_dict(torch.load(args.net_path)['net_state_dict']) margin.load_state_dict(torch.load(args.margin_path)['net_state_dict']) # define optimizers for different layer criterion_classi = torch.nn.CrossEntropyLoss().to(device) optimizer_classi = optim.SGD([{ 'params': net.parameters(), 'weight_decay': 5e-4 }, { 'params': margin.parameters(), 'weight_decay': 5e-4 }], lr=0.1, momentum=0.9, nesterov=True) scheduler_classi = lr_scheduler.MultiStepLR(optimizer_classi, milestones=[20, 35, 45], gamma=0.1) if multi_gpus: net = DataParallel(net).to(device) margin = DataParallel(margin).to(device) else: net = net.to(device) margin = margin.to(device) best_lfw_acc = 0.0 best_lfw_iters = 0 best_agedb30_acc = 0.0 best_agedb30_iters = 0 best_cfp_fp_acc = 0.0 best_cfp_fp_iters = 0 total_iters = 0 vis = Visualizer(env='softmax_train') for epoch in range(1, args.total_epoch + 1): scheduler_classi.step() # train model _print('Train Epoch: {}/{} ...'.format(epoch, args.total_epoch)) net.train() since = time.time() for data in trainloader: img, label = data[0].to(device), data[1].to(device) feature = net(img) output = margin(feature) loss_classi = criterion_classi(output, label) total_loss = loss_classi optimizer_classi.zero_grad() total_loss.backward() optimizer_classi.step() total_iters += 1 # print train information if total_iters % 100 == 0: #current training accuracy _, predict = torch.max(output.data, 1) total = label.size(0) correct = (np.array(predict) == np.array(label.data)).sum() time_cur = (time.time() - since) / 100 since = time.time() vis.plot_curves({'train loss': loss_classi.item()}, iters=total_iters, title='train loss', xlabel='iters', ylabel='train loss') vis.plot_curves({'train accuracy': correct / total}, iters=total_iters, title='train accuracy', xlabel='iters', ylabel='train accuracy') print( "Iters: {:0>6d}/[{:0>2d}], loss_classi: {:.4f}, train_accuracy: {:.4f}, time: {:.2f} s/iter, learning rate: {}" .format(total_iters, epoch, loss_classi.item(), correct / total, time_cur, scheduler_classi.get_lr()[0])) # save model if total_iters % args.save_freq == 0: msg = 'Saving checkpoint: {}'.format(total_iters) _print(msg) if multi_gpus: net_state_dict = net.module.state_dict() margin_state_dict = margin.module.state_dict() else: net_state_dict = net.state_dict() margin_state_dict = margin.state_dict() if not os.path.exists(save_dir): os.mkdir(save_dir) torch.save( { 'iters': total_iters, 'net_state_dict': net_state_dict }, os.path.join(save_dir, 'Iter_%06d_net.ckpt' % total_iters)) torch.save( { 'iters': total_iters, 'net_state_dict': margin_state_dict }, os.path.join(save_dir, 'Iter_%06d_margin.ckpt' % total_iters)) # test accuracy if total_iters % args.test_freq == 0: # test model on lfw net.eval() getFeatureFromTorch('./result/cur_lfw_result.mat', net, device, lfwdataset, lfwloader) lfw_accs = evaluation_10_fold('./result/cur_lfw_result.mat') _print('LFW Ave Accuracy: {:.4f}'.format( np.mean(lfw_accs) * 100)) if best_lfw_acc < np.mean(lfw_accs) * 100: best_lfw_acc = np.mean(lfw_accs) * 100 best_lfw_iters = total_iters # test model on AgeDB30 getFeatureFromTorch('./result/cur_agedb30_result.mat', net, device, agedbdataset, agedbloader) age_accs = evaluation_10_fold( './result/cur_agedb30_result.mat') _print('AgeDB-30 Ave Accuracy: {:.4f}'.format( np.mean(age_accs) * 100)) if best_agedb30_acc < np.mean(age_accs) * 100: best_agedb30_acc = np.mean(age_accs) * 100 best_agedb30_iters = total_iters # test model on CFP-FP getFeatureFromTorch('./result/cur_cfpfp_result.mat', net, device, cfpfpdataset, cfpfploader) cfp_accs = evaluation_10_fold('./result/cur_cfpfp_result.mat') _print('CFP-FP Ave Accuracy: {:.4f}'.format( np.mean(cfp_accs) * 100)) if best_cfp_fp_acc < np.mean(cfp_accs) * 100: best_cfp_fp_acc = np.mean(cfp_accs) * 100 best_cfp_fp_iters = total_iters _print( 'Current Best Accuracy: LFW: {:.4f} in iters: {}, AgeDB-30: {:.4f} in iters: {} and CFP-FP: {:.4f} in iters: {}' .format(best_lfw_acc, best_lfw_iters, best_agedb30_acc, best_agedb30_iters, best_cfp_fp_acc, best_cfp_fp_iters)) vis.plot_curves( { 'lfw': np.mean(lfw_accs), 'agedb-30': np.mean(age_accs), 'cfp-fp': np.mean(cfp_accs) }, iters=total_iters, title='test accuracy', xlabel='iters', ylabel='test accuracy') net.train() _print( 'Finally Best Accuracy: LFW: {:.4f} in iters: {}, AgeDB-30: {:.4f} in iters: {} and CFP-FP: {:.4f} in iters: {}' .format(best_lfw_acc, best_lfw_iters, best_agedb30_acc, best_agedb30_iters, best_cfp_fp_acc, best_cfp_fp_iters)) print('finishing training')
def train(self): if self.net == 'vgg16': photo_net = DataParallel(self._get_vgg16()).cuda() sketch_net = DataParallel(self._get_vgg16()).cuda() elif self.net == 'resnet34': photo_net = DataParallel(self._get_resnet34()).cuda() sketch_net = DataParallel(self._get_resnet34()).cuda() elif self.net == 'resnet50': photo_net = DataParallel(self._get_resnet50()).cuda() sketch_net = DataParallel(self._get_resnet50()).cuda() if self.fine_tune: photo_net_root = self.model_root sketch_net_root = self.model_root.replace('photo', 'sketch') photo_net.load_state_dict( t.load(photo_net_root, map_location=t.device('cpu'))) sketch_net.load_state_dict( t.load(sketch_net_root, map_location=t.device('cpu'))) print('net') print(photo_net) # triplet_loss = nn.TripletMarginLoss(margin=self.margin, p=self.p).cuda() photo_cat_loss = nn.CrossEntropyLoss().cuda() sketch_cat_loss = nn.CrossEntropyLoss().cuda() my_triplet_loss = TripletLoss().cuda() # optimizer photo_optimizer = t.optim.Adam(photo_net.parameters(), lr=self.lr) sketch_optimizer = t.optim.Adam(sketch_net.parameters(), lr=self.lr) if self.vis: vis = Visualizer(self.env) triplet_loss_meter = AverageValueMeter() sketch_cat_loss_meter = AverageValueMeter() photo_cat_loss_meter = AverageValueMeter() data_loader = TripleDataLoader(self.dataloader_opt) dataset = data_loader.load_data() for epoch in range(self.epochs): print('---------------{0}---------------'.format(epoch)) if self.test and epoch % self.test_f == 0: tester_config = Config() tester_config.test_bs = 128 tester_config.photo_net = photo_net tester_config.sketch_net = sketch_net tester_config.photo_test = self.photo_test tester_config.sketch_test = self.sketch_test tester = Tester(tester_config) test_result = tester.test_instance_recall() result_key = list(test_result.keys()) vis.plot('recall', np.array([ test_result[result_key[0]], test_result[result_key[1]] ]), legend=[result_key[0], result_key[1]]) if self.save_model: t.save( photo_net.state_dict(), self.save_dir + '/photo' + '/photo_' + self.net + '_%s.pth' % epoch) t.save( sketch_net.state_dict(), self.save_dir + '/sketch' + '/sketch_' + self.net + '_%s.pth' % epoch) photo_net.train() sketch_net.train() for ii, data in enumerate(dataset): photo_optimizer.zero_grad() sketch_optimizer.zero_grad() photo = data['P'].cuda() sketch = data['S'].cuda() label = data['L'].cuda() p_cat, p_feature = photo_net(photo) s_cat, s_feature = sketch_net(sketch) # category loss p_cat_loss = photo_cat_loss(p_cat, label) s_cat_loss = sketch_cat_loss(s_cat, label) photo_cat_loss_meter.add(p_cat_loss.item()) sketch_cat_loss_meter.add(s_cat_loss.item()) # triplet loss loss = p_cat_loss + s_cat_loss # tri_record = 0. ''' for i in range(self.batch_size): # negative negative_feature = t.cat([p_feature[0:i, :], p_feature[i + 1:, :]], dim=0) # print('negative_feature.size :', negative_feature.size()) # photo_feature anchor_feature = s_feature[i, :] anchor_feature = anchor_feature.expand_as(negative_feature) # print('anchor_feature.size :', anchor_feature.size()) # positive positive_feature = p_feature[i, :] positive_feature = positive_feature.expand_as(negative_feature) # print('positive_feature.size :', positive_feature.size()) tri_loss = triplet_loss(anchor_feature, positive_feature, negative_feature) tri_record = tri_record + tri_loss # print('tri_loss :', tri_loss) loss = loss + tri_loss ''' # print('tri_record : ', tri_record) my_tri_loss = my_triplet_loss( s_feature, p_feature) / (self.batch_size - 1) triplet_loss_meter.add(my_tri_loss.item()) # print('my_tri_loss : ', my_tri_loss) # print(tri_record - my_tri_loss) loss = loss + my_tri_loss # print('loss :', loss) # loss = loss / opt.batch_size loss.backward() photo_optimizer.step() sketch_optimizer.step() if self.vis: vis.plot('triplet_loss', np.array([ triplet_loss_meter.value()[0], photo_cat_loss_meter.value()[0], sketch_cat_loss_meter.value()[0] ]), legend=[ 'triplet_loss', 'photo_cat_loss', 'sketch_cat_loss' ]) triplet_loss_meter.reset() photo_cat_loss_meter.reset() sketch_cat_loss_meter.reset()
def train(**kwargs): """根据命令行参数更新配置""" opt.parse(kwargs) vis = Visualizer(opt.env) """(1)step1:加载网络,若有预训练模型也加载""" model = getattr(models, opt.model)() """(2)step2:处理数据""" train_data = Ictal(opt.train_data_root, opt.model, train=True) # 训练集 val_data = Ictal(opt.train_data_root, opt.model, train=False) # 验证集 train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) """(3)step3:定义损失函数和优化器""" criterion = t.nn.CrossEntropyLoss() # 交叉熵损失 lr = opt.lr # 学习率 optimizer = t.optim.SGD(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) """(4)step4:统计指标,平滑处理之后的损失,还有混淆矩阵""" loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e10 start = time.time() """(5)开始训练""" for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in enumerate(train_dataloader): # 训练模型参数 input = Variable(data) if opt.model == 'CNN_1d': input = input.permute(0, 2, 1) target = Variable(label) # 梯度清零 optimizer.zero_grad() score = model(input) loss = criterion(score, target) loss.backward() # 反向传播 # 更新参数 optimizer.step() # 更新统计指标及可视化 loss_meter.add(loss.item()) # print score.shape, target.shape confusion_matrix.add(score.detach(), target.detach()) if ii % opt.print_freq == opt.print_freq - 1: vis.plot('loss', loss_meter.value()[0]) if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() model.save(epoch) """计算验证集上的指标及可视化""" val_cm, val_accuracy = val(model, val_dataloader, opt.model) vis.plot('val_accuracy', val_accuracy) vis.log( "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}" .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr)) tra_cm, tra_accuracy = val(model, train_dataloader, opt.model) print("epoch:", epoch, "loss:", loss_meter.value()[0], "val_accuracy:", val_accuracy, "tra_accuracy:", tra_accuracy) """如果损失不再下降,则降低学习率""" if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group["lr"] = lr previous_loss = loss_meter.value()[0] end = time.time() print(end - start)
def train(**kwargs): for k_, v_ in kwargs.items(): setattr(opt, k_, v_) if opt.vis: from utils.visualize import Visualizer vis = Visualizer(opt.env) transforms = tv.transforms.Compose([ tv.transforms.Scale(opt.image_size), tv.transforms.CenterCrop(opt.image_size), tv.transforms.ToTensor(), tv.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset = tv.datasets.ImageFolder(opt.data_path, transform=transforms) dataloader = t.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True) # 定义网络 netg, netd = NetGenerator(opt), NetD(opt) map_location = lambda storage, loc: storage if opt.netd_path: netd.load_state_dict(t.load(opt.netd_path, map_location=map_location)) if opt.netg_path: netg.load_state_dict(t.load(opt.netg_path, map_location=map_location)) # 定义优化器和损失 optimizer_g = t.optim.Adam(netg.parameters(), opt.G_lr, betas=(opt.beta1, 0.999)) optimizer_d = t.optim.Adam(netd.parameters(), opt.D_lr, betas=(opt.beta1, 0.999)) criterion = t.nn.BCELoss() # 真图片label为1,假图片label为0 # noises为生成网络的输入 true_labels = Variable(t.ones(opt.batch_size)) fake_labels = Variable(t.zeros(opt.batch_size)) fix_noises = Variable(t.randn(opt.batch_size, opt.nz, 1, 1)) noises = Variable(t.randn(opt.batch_size, opt.nz, 1, 1)) errord_meter = AverageValueMeter() errorg_meter = AverageValueMeter() if opt.use_gpu: netd.cuda() netg.cuda() criterion.cuda() true_labels, fake_labels = true_labels.cuda(), fake_labels.cuda() fix_noises, noises = fix_noises.cuda(), noises.cuda() epochs = range(opt.max_epoch) for epoch in iter(epochs): for ii, (img, _) in tqdm.tqdm(enumerate(dataloader)): real_img = Variable(img) if opt.use_gpu: real_img = real_img.cuda() if ii % opt.d_every == 0: # 训练判别器 optimizer_d.zero_grad() ## 尽可能的把真图片判别为正确 output = netd(real_img) error_d_real = criterion(output, true_labels) error_d_real.backward() ## 尽可能把假图片判别为错误 noises.data.copy_(t.randn(opt.batch_size, opt.nz, 1, 1)) fake_img = netg(noises).detach() # 根据噪声生成假图 output = netd(fake_img) error_d_fake = criterion(output, fake_labels) error_d_fake.backward() optimizer_d.step() error_d = error_d_fake + error_d_real errord_meter.add(error_d.data[0]) if ii % opt.g_every == 0: # 训练生成器 optimizer_g.zero_grad() noises.data.copy_(t.randn(opt.batch_size, opt.nz, 1, 1)) fake_img = netg(noises) output = netd(fake_img) error_g = criterion(output, true_labels) error_g.backward() optimizer_g.step() errorg_meter.add(error_g.data[0]) if opt.vis and ii % opt.plot_every == opt.plot_every - 1: ## 可视化 if os.path.exists(opt.debug_file): ipdb.set_trace() fix_fake_imgs = netg(fix_noises) vis.images(fix_fake_imgs.data.cpu().numpy()[:64] * 0.5 + 0.5, win='fixfake') vis.plot('error_d', errord_meter.value()[0]) vis.images(real_img.data.cpu().numpy()[:64] * 0.5 + 0.5, win='real') vis.plot('error_g', errorg_meter.value()[0]) if epoch % opt.decay_every == 0: # 保存模型、图片 tv.utils.save_image(fix_fake_imgs.data[:64], '%s/%s.png' % (opt.save_path, (epoch + opt.startpoint)), normalize=True, range=(-1, 1)) t.save(netd.state_dict(), 'checkpoints/netd_%s.pth' % (epoch + opt.startpoint)) t.save(netg.state_dict(), 'checkpoints/netg_%s.pth' % (epoch + opt.startpoint)) errord_meter.reset() errorg_meter.reset() optimizer_g = t.optim.Adam(netg.parameters(), opt.G_lr, betas=(opt.beta1, 0.999)) optimizer_d = t.optim.Adam(netd.parameters(), opt.D_lr, betas=(opt.beta1, 0.999))
def train(): vis = Visualizer("Kesci" + time.strftime('%m%d%H%M')) train_data = AppData("../kesci/data/data_v3_23d/train_ab.json", iflabel=True) val_data = AppData("../kesci/data/data_v3_23d/val_ab.json", iflabel=True) train_dataloader = DataLoader(train_data, 256, shuffle=True, num_workers=4) val_dataloader = DataLoader(val_data, 512, shuffle=False, num_workers=2) test_data = AppData("../kesci/data/data_v3_23d/test_ab.json", iflabel=True) test_dataloader = DataLoader(test_data, 512, shuffle=False, num_workers=2) criterion = t.nn.BCEWithLogitsLoss().cuda() learning_rate = 0.002 weight_decay = 0.0003 model = DoubleSequence(31, 128, 1).cuda() optimizer = t.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 for epoch in range(400): loss_meter.reset() confusion_matrix.reset() for ii, (data, property, target) in tqdm(enumerate(train_dataloader)): input = Variable(data).cuda() input2 = Variable(property).cuda() target = Variable(target).cuda() output = model(input, input2) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step() loss_meter.add(loss.data[0]) if ii % 100 == 99: vis.plot('loss', loss_meter.value()[0]) if epoch % 3 == 2: train_cm, train_f1 = val(model, train_dataloader) vis.plot('train_f1', train_f1) val_cm, val_f1 = val(model, val_dataloader) vis.plot_many({'val_f1': val_f1, 'learning_rate': learning_rate}) if loss_meter.value()[0] > previous_loss: learning_rate = learning_rate * 0.9 # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = learning_rate previous_loss = loss_meter.value()[0] if epoch % 3 == 2: model.save() test_cm, test_f1 = val(model, test_dataloader) vis.plot('test_f1', test_f1) vis.log( "训练集:{train_f1:%}, {train_pre:%}, {train_rec:%} | 验证集:{val_f1:%}, {val_pre:%}, {val_rec:%} | \ 测试集:{test_f1:%}, {test_pre:%}, {test_rec:%} | {train_true_num:%}, {val_true_num:%}, {test_true_num:%}" .format( train_f1=train_f1, val_f1=val_f1, test_f1=test_f1, train_true_num=train_cm.value()[:, 0].sum() / len(train_data), val_true_num=val_cm.value()[:, 0].sum() / len(val_data), test_true_num=test_cm.value()[:, 0].sum() / len(test_data), train_pre=train_cm.value()[0][0] / train_cm.value()[0].sum(), train_rec=train_cm.value()[0][0] / train_cm.value()[:, 0].sum(), val_pre=val_cm.value()[0][0] / val_cm.value()[0].sum(), val_rec=val_cm.value()[0][0] / val_cm.value()[:, 0].sum(), test_pre=test_cm.value()[0][0] / test_cm.value()[0].sum(), test_rec=test_cm.value()[0][0] / test_cm.value()[:, 0].sum()))
def train(**kwargs): opt._parse(kwargs) vis = Visualizer(opt.env,port = opt.vis_port) # step1: configure model model = getattr(models, opt.model)() if opt.load_model_path: model.load(opt.load_model_path) model.to(opt.device) # step2: data train_data = DogCat(opt.train_data_root,train=True) val_data = DogCat(opt.train_data_root,train=False) train_dataloader = DataLoader(train_data,opt.batch_size, shuffle=True,num_workers=opt.num_workers) val_dataloader = DataLoader(val_data,opt.batch_size, shuffle=False,num_workers=opt.num_workers) # step3: criterion and optimizer criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = model.get_optimizer(lr, opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e10 # train for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii,(data,label) in tqdm(enumerate(train_dataloader)): # train model input = data.to(opt.device) target = label.to(opt.device) optimizer.zero_grad() score = model(input) loss = criterion(score,target) loss.backward() optimizer.step() # meters update and visualize loss_meter.add(loss.item()) # detach 一下更安全保险 confusion_matrix.add(score.detach(), target.detach()) if (ii + 1)%opt.print_freq == 0: vis.plot('loss', loss_meter.value()[0]) # 进入debug模式 if os.path.exists(opt.debug_file): import ipdb; ipdb.set_trace() model.save() # validate and visualize val_cm,val_accuracy = val(model,val_dataloader) vis.plot('val_accuracy',val_accuracy) vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format( epoch = epoch,loss = loss_meter.value()[0],val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr)) # update learning rate if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]