def validate(self, logger): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode self.model.eval() end = time.time() for i, (images, labels) in enumerate(self.val_loader): if check_gpu() > 0: images = images.cuda(async=True) labels = labels.cuda(async=True) image_var = torch.autograd.Variable(images) label_var = torch.autograd.Variable(labels) # compute y_pred y_pred = self.model(image_var) if self.model_type == 'I3D': y_pred = y_pred[0] loss = self.criterion(y_pred, label_var) # measure accuracy and record loss prec1, prec5 = accuracy(y_pred.data, labels, topk=(1, 5)) losses.update(loss.item(), images.size(0)) acc.update(prec1.item(), images.size(0)) top1.update(prec1.item(), images.size(0)) top5.update(prec5.item(), images.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % self.print_freq == 0: print('TrainVal: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(self.val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) print(' * Accuracy {acc.avg:.3f} Loss {loss.avg:.3f}'.format( acc=acc, loss=losses)) logger.info(' * Accuracy {acc.avg:.3f} Loss {loss.avg:.3f}'.format( acc=acc, loss=losses)) return losses, acc
def train(train_loader, model, criterion, optimizer, epoch, args): """ Train a proposed ResNet for classification :param train_loader: default data_loader in pytorch :param model: The proposed model, ResNet for our serup :param criterion: criterion(loss) for optimization purpose :param optimizer: to optimize model, adam or sgd is recommended :param epoch: How many turns to train whole training set around :param args: arguments for user input :return: """ batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for i, (images, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if (i + 1) % args.print_freq == 0: progress.display(i)
def init_train(self, con_weight: float = 1.0): test_img = self.get_test_image() meter = AverageMeter("Loss") self.writer.flush() lr_scheduler = OneCycleLR(self.optimizer_G, max_lr=0.9999, steps_per_epoch=len(self.dataloader), epochs=self.init_train_epoch) for g in self.optimizer_G.param_groups: g['lr'] = self.init_lr for epoch in tqdm(range(self.init_train_epoch)): meter.reset() for i, (style, smooth, train) in enumerate(self.dataloader, 0): # train = transform(test_img).unsqueeze(0) self.G.zero_grad(set_to_none=self.grad_set_to_none) train = train.to(self.device) generator_output = self.G(train) # content_loss = loss.reconstruction_loss(generator_output, train) * con_weight content_loss = self.loss.content_loss(generator_output, train) * con_weight # content_loss = F.mse_loss(train, generator_output) * con_weight content_loss.backward() self.optimizer_G.step() lr_scheduler.step() meter.update(content_loss.detach()) self.writer.add_scalar(f"Loss : {self.init_time}", meter.sum.item(), epoch) self.write_weights(epoch + 1, write_D=False) self.eval_image(epoch, f'{self.init_time} reconstructed img', test_img) for g in self.optimizer_G.param_groups: g['lr'] = self.G_lr
def validate(val_loader, model, criterion, args): """ Call vaidate() to validate your result. Load validate data accordingly :param val_loader: default data loader to load validation data :param model: ResNet by default :param criterion: the loss to compute :param args: User defined input :return: """ batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(val_loader), [batch_time, losses, top1, top5], prefix='Test: ') # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if (i + 1) % args.print_freq == 0: progress.display(i) print( ' Validation finished! Avg stats: Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' .format(top1=top1, top5=top5)) return top1.avg, top5.avg
def process(self): acc = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() losses = AverageMeter() log_file = os.path.join(self.data_folder, 'test.log') logger = Logger('test', log_file) # switch to evaluate mode self.model.eval() start_time = time.clock() print("Begin testing") for i, (images, labels) in enumerate(self.test_loader): if check_gpu() > 0: images = images.cuda(async=True) labels = labels.cuda(async=True) image_var = torch.autograd.Variable(images) label_var = torch.autograd.Variable(labels) # compute y_pred y_pred = self.model(image_var) loss = self.criterion(y_pred, label_var) # measure accuracy and record loss prec1, prec5 = accuracy(y_pred.data, labels, topk=(1, 5)) losses.update(loss.item(), images.size(0)) acc.update(prec1.item(), images.size(0)) top1.update(prec1.item(), images.size(0)) top5.update(prec5.item(), images.size(0)) if i % self.print_freq == 0: print('TestVal: [{0}/{1}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(self.test_loader), loss=losses, top1=top1, top5=top5)) print( ' * Accuracy {acc.avg:.3f} Acc@5 {top5.avg:.3f} Loss {loss.avg:.3f}'.format(acc=acc, top5=top5, loss=losses)) end_time = time.clock() print("Total testing time %.2gs" % (end_time - start_time)) logger.info("Total testing time %.2gs" % (end_time - start_time)) logger.info( ' * Accuracy {acc.avg:.3f} Acc@5 {top5.avg:.3f} Loss {loss.avg:.3f}'.format(acc=acc, top5=top5, loss=losses))
def main(args): # NTU Dataset dataset = NTU( root=args.root, w=args.width, h=args.height, t=args.time, dataset='train', train=True, avi_dir=args.avi_dir, usual_transform=False, ) # Pytorch dataloader dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, num_workers=args.workers, pin_memory=args.cuda, collate_fn=my_collate) # Loop data_time = AverageMeter() start_data = time.time() for i, dict_input in enumerate(dataloader): duration_data = time.time() - start_data data_time.update(duration_data) # Get the data clip, skeleton = dict_input['clip'], dict_input[ 'skeleton'] # (B, C, T, 224, 224), (B, T, 2, 25, 2) # Show show_one_img(clip[0, :, 0], skeleton[0, 0]) print("{}/{} : {time.val:.3f} ({time.avg:.3f}) sec/batch".format( i + 1, len(dataloader), time=data_time)) sys.stdout.flush() start_data = time.time()
def validate(val_loader, model, criterion, epoch, start_time): timer = TimeMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.eval() eval_start_time = time.time() for i, (input, target) in enumerate(val_loader): if args.short_epoch and (i > 10): break batch_num = i + 1 timer.batch_start() if args.distributed: top1acc, top5acc, loss, batch_total = distributed_predict( input, target, model, criterion) else: with torch.no_grad(): output = model(input) loss = criterion(output, target).data batch_total = input.size(0) top1acc, top5acc = accuracy(output.data, target, topk=(1, 5)) # Eval batch done. Logging results timer.batch_end() losses.update(to_python_float(loss), to_python_float(batch_total)) top1.update(to_python_float(top1acc), to_python_float(batch_total)) top5.update(to_python_float(top5acc), to_python_float(batch_total)) should_print = (batch_num % args.print_freq == 0) or (batch_num == len(val_loader)) if args.local_rank == 0 and should_print: output = ( f'Test: [{epoch}][{batch_num}/{len(val_loader)}]\t' f'Time {timer.batch_time.val:.3f} ({timer.batch_time.avg:.3f})\t' f'Loss {losses.val:.4f} ({losses.avg:.4f})\t' f'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' f'Acc@5 {top5.val:.3f} ({top5.avg:.3f})') log.verbose(output) tb.log_eval(top1.avg, top5.avg, time.time() - eval_start_time) tb.log('epoch', epoch) return top1.avg, top5.avg
def train(trn_loader, model, criterion, optimizer, scheduler, epoch): net_meter = NetworkMeter() timer = TimeMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() for i, (input, target) in enumerate(trn_loader): if args.short_epoch and (i > 10): break batch_num = i + 1 timer.batch_start() scheduler.update_lr(epoch, i + 1, len(trn_loader)) # compute output output = model(input) loss = criterion(output, target) should_print = (batch_num % args.print_freq == 0) or (batch_num == len(trn_loader)) # compute gradient and do SGD step if args.fp16: loss = loss * args.loss_scale # zero_grad() and converting fp16/fp32 is handled in optimizer loss.backward() optimizer.step(wait_for_finish=should_print) loss = loss / args.loss_scale else: optimizer.zero_grad() loss.backward() optimizer.step() # Train batch done. Logging results timer.batch_end() if args.local_rank == 0 and should_print: corr1, corr5 = correct(output.data, target, topk=(1, 5)) reduced_loss, batch_total = to_python_float( loss.data), to_python_float(input.size(0)) if args.distributed: # Must keep track of global batch size, since not all machines are guaranteed equal batches at the end of an epoch validate_tensor[0] = batch_total validate_tensor[1] = reduced_loss validate_tensor[2] = corr1 validate_tensor[3] = corr5 batch_total, reduced_loss, corr1, corr5 = bps.push_pull( validate_tensor, average=False, name="validation_tensor") batch_total = batch_total.cpu().numpy() reduced_loss = reduced_loss.cpu().numpy() corr1 = corr1.cpu().numpy() corr5 = corr5.cpu().numpy() reduced_loss = reduced_loss / bps.size() top1acc = to_python_float(corr1) * (100.0 / batch_total) top5acc = to_python_float(corr5) * (100.0 / batch_total) losses.update(reduced_loss, batch_total) top1.update(top1acc, batch_total) top5.update(top5acc, batch_total) tb.log_memory() tb.log_trn_times(timer.batch_time.val, timer.data_time.val, input.size(0)) tb.log_trn_loss(losses.val, top1.val, top5.val) recv_gbit, transmit_gbit = net_meter.update_bandwidth() tb.log("sizes/batch_total", batch_total) tb.log('net/recv_gbit', recv_gbit) tb.log('net/transmit_gbit', transmit_gbit) output = ( f'Epoch: [{epoch}][{batch_num}/{len(trn_loader)}]\t' f'Time {timer.batch_time.val:.3f} ({timer.batch_time.avg:.3f})\t' f'Loss {losses.val:.4f} ({losses.avg:.4f})\t' f'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' f'Acc@5 {top5.val:.3f} ({top5.avg:.3f})\t' f'Data {timer.data_time.val:.3f} ({timer.data_time.avg:.3f})\t' f'BW {recv_gbit:.3f} {transmit_gbit:.3f}') log.verbose(output) tb.update_step_count(batch_total)
class DataGenerator(object): def __init__(self, MovementModule='default', EnvModule='default', TextModule='default', WindowHeight=720, WindowWidth=1080, MaxTextNum=15, DataStoragePath='../../../GeneratedData/DataFraction_1', camera_anchor_filepath='./camera_anchors/urbancity.txt', EnvName='', anchor_freq=10, max_emissive=5, FontSize=[8, 16], use_real_img=0.1, is_debug=True, languages=["Latin"], HighResFactor=2.0, UnrealProjectName="./", **kwargs): self.client = WrappedClient(UnrealCVClient, DataStoragePath, HighResFactor, UnrealProjectName) self.DataStoragePath = DataStoragePath self.UnrealProjectName = UnrealProjectName self.WindowHeight = WindowHeight self.WindowWidth = WindowWidth self.MaxTextNum = MaxTextNum self.is_debug = is_debug self.camera_anchor_filepath = camera_anchor_filepath self.anchor_freq = anchor_freq self.HighResFactor = HighResFactor self.RootPath = opa(DataStoragePath) while os.path.isdir(self.RootPath): root_path, count = self.RootPath.split('_') self.RootPath = root_path + '_' + str(int(count) + 1) print(f"Data will be saved to: {self.RootPath}") self.LabelPath = osp.join(self.RootPath, 'Label.json') self.DataLabel = None self.ImgFolder = osp.join(self.RootPath, 'imgs') self.LabelFolder = osp.join(self.RootPath, 'labels') self.WordFolder = osp.join(self.RootPath, 'WordCrops') self.DataCount = 0 self.isConnected = False self.SaveFreq = 100 # step 1 self._InitializeDataStorage() # step 2 if len(EnvName) > 0: StartEngine(EnvName) self._ConnectToGame() # step 3 set resolution & rotation self.client.setres(self.WindowWidth, self.WindowHeight) # self.client.setCameraRotation(0, 0, 0) self.EnvDepth = kwargs.get('EnvDepth', 100) # load modules self.Wanderer = CameraSet[MovementModule]( client=self.client, camera_anchor_filepath=self.camera_anchor_filepath, anchor_freq=self.anchor_freq) self.EnvRenderer = EnvSet[EnvModule](client=self.client) self.TextPlacer = TextPlacement[TextModule]( client=self.client, MaxTextCount=self.MaxTextNum, ContentPath=osp.join(self.RootPath, 'WordCrops'), max_emissive=max_emissive, FontSize=FontSize, is_debug=is_debug, use_real_img=use_real_img, languages=languages, HighResFactor=HighResFactor) # initializer meters self.camera_meter = AverageMeter() self.env_meter = AverageMeter() self.text_meter = AverageMeter() self.retrieve_label_meter = AverageMeter() self.save_label_meter = AverageMeter() self.save_meter = AverageMeter() self._cleanup() def __del__(self): if self.client.isconnected(): self.client.QuitGame() self.client.disconnect() self._cleanup() # os.system('~/cache_light.png') def _cleanup(self): os.system( f'rm ../../../PackagedEnvironment/{self.UnrealProjectName}/Demo/Saved/Screenshots/LinuxNoEditor/*png' ) #os.system(f'rm ../../../PackagedEnvironment/{self.UnrealProjectName}/Demo/Saved/Logs/*') def _InitializeDataStorage(self): os.makedirs(self.ImgFolder, exist_ok=True) os.makedirs(self.LabelFolder, exist_ok=True) os.makedirs(self.WordFolder, exist_ok=True) self.DataCount = 0 self.DataLabel = [] os.system(f'cp vis.py {self.RootPath}/') def _ConnectToGame(self): # wait and connect sleepTime = 1.0 while True: self.client.connect() self.isConnected = self.client.isconnected() if self.isConnected: break else: if sleepTime > 120: break time.sleep(sleepTime) sleepTime *= 2 if not self.isConnected: print('Failed to connect to UnrealCV server.') sys.exit(-1) def _GenerateOneImageInstance(self, step_count, force_change_camera_anchor=False): # step 1: move around time_stamp = time.time() if not self.is_debug: self.Wanderer.step( height=self.WindowHeight, width=self.WindowWidth, step=step_count, force_change_camera_anchor=force_change_camera_anchor) time_stamp = self.camera_meter.update(time.time() - time_stamp) # step 2: render env self.EnvRenderer.step() time_stamp = self.env_meter.update(time.time() - time_stamp) # step 3: place text self.TextPlacer.PutTextStep() time_stamp = self.text_meter.update(time.time() - time_stamp) if self.is_debug: print('Text Loaded, ready to retrieve data') # step 4: retrieve data img_path, Texts, WordBoxes, CharBoxes, TextNum = self.TextPlacer.RetrieveDataStep( osp.join(self.ImgFolder, f'{self.DataCount}.jpg')) time_stamp = self.retrieve_label_meter.update(time.time() - time_stamp) force_change_camera_anchor = TextNum == 0 DataLabel = { 'imgfile': f'imgs/{self.DataCount}.jpg', 'bbox': WordBoxes, 'cbox': CharBoxes, 'text': Texts, 'is_difficult': [0 for _ in range(len(WordBoxes))] } json.dump( DataLabel, open(osp.join(self.LabelFolder, str(self.DataCount) + '.json'), 'w')) time_stamp = self.save_label_meter.update(time.time() - time_stamp) if self.is_debug: print('Finished waiting, ready to save img') self.client.SaveImg(img_path) time_stamp = self.save_meter.update(time.time() - time_stamp) self.DataCount += 1 if self.is_debug: # step 5: visualize ShowImgAndAnnotation(img_path, Texts, WordBoxes, CharBoxes) time_stamp = time.time() return {'force_change_camera_anchor': force_change_camera_anchor} def StartGeneration(self, IterationNum=10000, sleep_time=0, sleep_freq=1): status = {'force_change_camera_anchor': False} for Count in range(IterationNum): status = self._GenerateOneImageInstance(Count, **status) if Count % self.anchor_freq == 0: print(f"{Count} images created. Timing:") print(f' ----- camera: {self.camera_meter}') print(f' ----- env: {self.env_meter}') print(f' ----- text: {self.text_meter}') print( f' ----- retrieve label: {self.retrieve_label_meter}' ) print(f' ----- save label: {self.save_label_meter}') print(f' ----- retrieve image: {self.save_meter}')
def train(trn_loader, model, criterion, optimizer, scheduler, epoch): net_meter = NetworkMeter() timer = TimeMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() for i, (input, target) in enumerate(trn_loader): if args.short_epoch and (i > 10): break batch_num = i + 1 timer.batch_start() scheduler.update_lr(epoch, i + 1, len(trn_loader)) # compute output output = model(input) loss = criterion(output, target) # compute gradient and do SGD step if args.fp16: loss = loss * args.loss_scale model.zero_grad() loss.backward() model_grads_to_master_grads(model_params, master_params) for param in master_params: param.grad.data = param.grad.data / args.loss_scale optimizer.step() master_params_to_model_params(model_params, master_params) loss = loss / args.loss_scale else: optimizer.zero_grad() loss.backward() optimizer.step() # Train batch done. Logging results timer.batch_end() corr1, corr5 = correct(output.data, target, topk=(1, 5)) reduced_loss, batch_total = to_python_float( loss.data), to_python_float(input.size(0)) if args.distributed: # Must keep track of global batch size, since not all machines are guaranteed equal batches at the end of an epoch metrics = torch.tensor([batch_total, reduced_loss, corr1, corr5]).float().cuda() batch_total, reduced_loss, corr1, corr5 = dist_utils.sum_tensor( metrics).cpu().numpy() reduced_loss = reduced_loss / dist_utils.env_world_size() top1acc = to_python_float(corr1) * (100.0 / batch_total) top5acc = to_python_float(corr5) * (100.0 / batch_total) losses.update(reduced_loss, batch_total) top1.update(top1acc, batch_total) top5.update(top5acc, batch_total) should_print = (batch_num % args.print_freq == 0) or (batch_num == len(trn_loader)) if args.local_rank == 0 and should_print: tb.log_memory() tb.log_trn_times(timer.batch_time.val, timer.data_time.val, input.size(0)) tb.log_trn_loss(losses.val, top1.val, top5.val) recv_gbit, transmit_gbit = net_meter.update_bandwidth() tb.log("sizes/batch_total", batch_total) tb.log('net/recv_gbit', recv_gbit) tb.log('net/transmit_gbit', transmit_gbit) output = ( f'Epoch: [{epoch}][{batch_num}/{len(trn_loader)}]\t' f'Time {timer.batch_time.val:.3f} ({timer.batch_time.avg:.3f})\t' f'Loss {losses.val:.4f} ({losses.avg:.4f})\t' f'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' f'Acc@5 {top5.val:.3f} ({top5.avg:.3f})\t' f'Data {timer.data_time.val:.3f} ({timer.data_time.avg:.3f})\t' f'BW {recv_gbit:.3f} {transmit_gbit:.3f}') log.verbose(output) tb.update_step_count(batch_total)
def process(self): acc = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() losses = AverageMeter() log_file = os.path.join(self.data_folder, 'test.log') logger = Logger('test', log_file) # switch to evaluate mode self.model.eval() start_time = time.clock() print("Begin testing") predicted, probs = [], [] for i, (images, labels) in enumerate(self.test_loader): if check_gpu() > 0: images = images.cuda(async=True) labels = labels.cuda(async=True) images = torch.autograd.Variable(images) labels = torch.autograd.Variable(labels) if self.tencrop: # Due to ten-cropping, input batch is a 5D Tensor batch_size, number_of_crops, number_of_channels, height, width = images.size( ) # Fuse batch size and crops images = images.view(-1, number_of_channels, height, width) # Compute model output output_batch_crops = self.model(images) # Average predictions for each set of crops output_batch = output_batch_crops.view(batch_size, number_of_crops, -1).mean(1) label_repeated = labels.repeat(10, 1).transpose( 1, 0).contiguous().view(-1, 1).squeeze() loss = self.criterion(output_batch_crops, label_repeated) else: output_batch = self.model(images) loss = self.criterion(output_batch, labels) # measure accuracy and record loss prec1, prec5 = accuracy(output_batch.data, labels, topk=(1, 5)) # print(prec1, prec5) losses.update(loss.item(), images.size(0)) acc.update(prec1.item(), images.size(0)) top1.update(prec1.item(), images.size(0)) top5.update(prec5.item(), images.size(0)) if i % self.print_freq == 0: print('TestVal: [{0}/{1}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(self.test_loader), loss=losses, top1=top1, top5=top5)) print( ' * Accuracy {acc.avg:.3f} Acc@5 {top5.avg:.3f} Loss {loss.avg:.3f}' .format(acc=acc, top5=top5, loss=losses)) end_time = time.clock() print("Total testing time %.2gs" % (end_time - start_time)) logger.info("Total testing time %.2gs" % (end_time - start_time)) logger.info(' * Accuracy {acc.avg:.3f} Loss {loss.avg:.3f}'.format( acc=acc, top5=top5, loss=losses))
def train(args): assert torch.cuda.is_available(), 'CUDA is not available.' torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True tfboard_writer = SummaryWriter() logname = '{}'.format(datetime.datetime.now().strftime('%Y-%m-%d-%H:%M')) logger = Logger(args.save_path, logname) logger.log('Arguments : -------------------------------') for name, value in args._get_kwargs(): logger.log('{:16} : {:}'.format(name, value)) # Data Augmentation mean_fill = tuple([int(x * 255) for x in [0.485, 0.456, 0.406]]) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_transform = [ transforms.AugTransBbox(args.transbbox_prob, args.transbbox_percent) ] train_transform += [transforms.PreCrop(args.pre_crop_expand)] train_transform += [ transforms.TrainScale2WH((args.crop_width, args.crop_height)) ] #train_transform += [transforms.AugHorizontalFlip(args.flip_prob)] #train_transform += [transforms.AugScale(args.scale_prob, args.scale_min, args.scale_max)] #train_transform += [transforms.AugCrop(args.crop_width, args.crop_height, args.crop_perturb_max, mean_fill)] if args.rotate_max: train_transform += [transforms.AugRotate(args.rotate_max)] train_transform += [ transforms.AugGaussianBlur(args.gaussianblur_prob, args.gaussianblur_kernel_size, args.gaussianblur_sigma) ] train_transform += [transforms.ToTensor(), normalize] train_transform = transforms.Compose(train_transform) eval_transform = transforms.Compose([ transforms.PreCrop(args.pre_crop_expand), transforms.TrainScale2WH((args.crop_width, args.crop_height)), transforms.ToTensor(), normalize ]) # Training datasets train_data = GeneralDataset(args.num_pts, train_transform, args.train_lists) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) # Evaluation Dataloader eval_loaders = [] for eval_ilist in args.eval_lists: eval_idata = GeneralDataset(args.num_pts, eval_transform, eval_ilist) eval_iloader = torch.utils.data.DataLoader(eval_idata, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) eval_loaders.append(eval_iloader) net = Model(args.num_pts) logger.log("=> network :\n {}".format(net)) logger.log('arguments : {:}'.format(args)) optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=args.LR, momentum=args.momentum, weight_decay=args.decay, nesterov=args.nesterov) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.schedule, gamma=args.gamma) criterion = wing_loss(args) # criterion = torch.nn.MSELoss(reduce=True) net = net.cuda() criterion = criterion.cuda() net = torch.nn.DataParallel(net) last_info = logger.last_info() if last_info.exists(): logger.log("=> loading checkpoint of the last-info '{:}' start".format( last_info)) last_info = torch.load(last_info) start_epoch = last_info['epoch'] + 1 checkpoint = torch.load(last_info['last_checkpoint']) assert last_info['epoch'] == checkpoint[ 'epoch'], 'Last-Info is not right {:} vs {:}'.format( last_info, checkpoint['epoch']) net.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) logger.log("=> load-ok checkpoint '{:}' (epoch {:}) done".format( logger.last_info(), checkpoint['epoch'])) else: logger.log("=> do not find the last-info file : {:}".format(last_info)) start_epoch = 0 for epoch in range(start_epoch, args.epochs): scheduler.step() net.train() # train img_prediction = [] img_target = [] train_losses = AverageMeter() for i, (inputs, target) in enumerate(train_loader): target = target.squeeze(1) inputs = inputs.cuda() target = target.cuda() #print(inputs.size()) #ssert 1==0 prediction = net(inputs) loss = criterion(prediction, target) train_losses.update(loss.item(), inputs.size(0)) prediction = prediction.detach().to(torch.device('cpu')).numpy() target = target.detach().to(torch.device('cpu')).numpy() for idx in range(inputs.size()[0]): img_prediction.append(prediction[idx, :]) img_target.append(target[idx, :]) optimizer.zero_grad() loss.backward() optimizer.step() if i % args.print_freq == 0 or i + 1 == len(train_loader): logger.log( '[train Info]: [epoch-{}-{}][{:04d}/{:04d}][Loss:{:.2f}]'. format(epoch, args.epochs, i, len(train_loader), loss.item())) train_nme = compute_nme(args.num_pts, img_prediction, img_target) logger.log('epoch {:02d} completed!'.format(epoch)) logger.log( '[train Info]: [epoch-{}-{}][Avg Loss:{:.6f}][NME:{:.2f}]'.format( epoch, args.epochs, train_losses.avg, train_nme * 100)) tfboard_writer.add_scalar('Average Loss', train_losses.avg, epoch) tfboard_writer.add_scalar('NME', train_nme * 100, epoch) # traing data nme # save checkpoint filename = 'epoch-{}-{}.pth'.format(epoch, args.epochs) save_path = logger.path('model') / filename torch.save( { 'epoch': epoch, 'args': deepcopy(args), 'state_dict': net.state_dict(), 'scheduler': scheduler.state_dict(), 'optimizer': optimizer.state_dict(), }, logger.path('model') / filename) logger.log('save checkpoint into {}'.format(filename)) last_info = torch.save({ 'epoch': epoch, 'last_checkpoint': save_path }, logger.last_info()) # eval logger.log('Basic-Eval-All evaluates {} dataset'.format( len(eval_loaders))) for i, loader in enumerate(eval_loaders): eval_losses = AverageMeter() eval_prediction = [] eval_target = [] with torch.no_grad(): net.eval() for i_batch, (inputs, target) in enumerate(loader): target = target.squeeze(1) inputs = inputs.cuda() target = target.cuda() prediction = net(inputs) loss = criterion(prediction, target) eval_losses.update(loss.item(), inputs.size(0)) prediction = prediction.detach().to( torch.device('cpu')).numpy() target = target.detach().to(torch.device('cpu')).numpy() for idx in range(inputs.size()[0]): eval_prediction.append(prediction[idx, :]) eval_target.append(target[idx, :]) if i_batch % args.print_freq == 0 or i + 1 == len(loader): logger.log( '[Eval Info]: [epoch-{}-{}][{:04d}/{:04d}][Loss:{:.2f}]' .format(epoch, args.epochs, i, len(loader), loss.item())) eval_nme = compute_nme(args.num_pts, eval_prediction, eval_target) logger.log( '[Eval Info]: [evaluate the {}/{}-th dataset][epoch-{}-{}][Avg Loss:{:.6f}][NME:{:.2f}]' .format(i, len(eval_loaders), epoch, args.epochs, eval_losses.avg, eval_nme * 100)) tfboard_writer.add_scalar('eval_nme/{}'.format(i), eval_nme * 100, epoch) logger.close()
def train(self, logger, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() rate = get_learning_rate(self.optimizer)[0] # switch to train mode self.model.train() end = time.time() for i, (images, target) in enumerate(self.train_loader): # adjust learning rate scheduler step self.scheduler.batch_step() # measure data loading time data_time.update(time.time() - end) if check_gpu() > 0: images = images.cuda(async=True) target = target.cuda(async=True) image_var = torch.autograd.Variable(images) label_var = torch.autograd.Variable(target) self.optimizer.zero_grad() # compute y_pred y_pred = self.model(image_var) if self.model_type == 'I3D': y_pred = y_pred[0] loss = self.criterion(y_pred, label_var) # measure accuracy and record loss prec1, prec5 = accuracy(y_pred.data, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) acc.update(prec1.item(), images.size(0)) top1.update(prec1.item(), images.size(0)) top5.update(prec5.item(), images.size(0)) # compute gradient and do SGD step loss.backward() self.optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % self.print_freq == 0: print('Epoch: [{0}/{1}][{2}/{3}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Lr {rate:.5f}\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, self.epochs, i, len(self.train_loader), batch_time=batch_time, data_time=data_time, rate=rate, loss=losses, top1=top1, top5=top5)) logger.info('Epoch: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Lr {rate:.5f}\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, self.epochs, batch_time=batch_time, data_time=data_time, rate=rate, loss=losses, top1=top1, top5=top5)) return losses, acc
def validate(self, logger): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode self.model.eval() end = time.time() for i, (images, labels) in enumerate(self.val_loader): if check_gpu() > 0: images = images.cuda(async=True) labels = labels.cuda(async=True) images = torch.autograd.Variable(images) labels = torch.autograd.Variable(labels) if self.tencrop: # Due to ten-cropping, input batch is a 5D Tensor batch_size, number_of_crops, number_of_channels, height, width = images.size( ) # Fuse batch size and crops images = images.view(-1, number_of_channels, height, width) # Compute model output output_batch_crops = self.model(images) # Average predictions for each set of crops output_batch = output_batch_crops.view(batch_size, number_of_crops, -1).mean(1) label_repeated = labels.repeat(10, 1).transpose( 1, 0).contiguous().view(-1, 1).squeeze() loss = self.criterion(output_batch_crops, label_repeated) else: output_batch = self.model(images) loss = self.criterion(output_batch, labels) # measure accuracy and record loss prec1, prec5 = accuracy(output_batch.data, labels, topk=(1, 5)) losses.update(loss.item(), images.size(0)) acc.update(prec1.item(), images.size(0)) top1.update(prec1.item(), images.size(0)) top5.update(prec5.item(), images.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % self.print_freq == 0: print('TrainVal: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(self.val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) print(' * Accuracy {acc.avg:.3f} Loss {loss.avg:.3f}'.format( acc=acc, loss=losses)) logger.info(' * Accuracy {acc.avg:.3f} Loss {loss.avg:.3f}'.format( acc=acc, loss=losses)) return losses, acc
drop_last=True, num_workers=opt.num_workers) # setup tracker tracker = TrackerSiamFC(name=opt.name, weight=opt.weight, device=opt.device) # training loop itr = 0 num_itrs = int((opt.num_epochs * len(loader)) / opt.print_freq) + 1 loss_logger = Logger(os.path.join(opt.log_dir, 'loss.csv'), num_itrs) loss_meter = AverageMeter() for epoch in range(opt.num_epochs): for step, batch in enumerate(loader): loss = tracker.step(batch, backward=True, update_lr=(step == 0)) itr += 1 loss_meter.update(loss) if itr % opt.print_freq == 0: print('Epoch [{}/{}] itr [{}]: Loss: {:.5f}'.format( epoch + 1, opt.num_epochs, itr, loss_meter.avg)) sys.stdout.flush() loss_logger.set(itr / opt.print_freq, loss_meter.avg) loss_meter = AverageMeter() # save checkpoint net_path = os.path.join(opt.log_dir, 'model_e%d.pth' % (epoch + 1)) torch.save(tracker.net.state_dict(), net_path)
def train_walk(walk_file, w, data_batches, valid_batches, model, num_epochs, verbose=False): # for param in model.parameters(): # param.requires_grad = False # freeze the model print("START TRAINING:", walk_file) opt = optim.SGD([w], lr=0.01) # opt = optim.Adam([w], lr=0.01, momentum=0.9) start_time = time.perf_counter() meter = AverageMeter() loss_hist_before = [] loss_hist_during = [] for e in range(num_epochs): avg_loss_before = average_loss(w, data_batches, model, verbose) model.train() total_loss = 0 nsents = 0 meter.clear() indices = list(range(len(data_batches))) random.shuffle(indices) for i, idx in enumerate(indices): opt.zero_grad() x, x_edit = data_batches[idx] # encode the input x mu, logvar = model.encode(x) z = reparameterize(mu, logvar) # add w to compute new latent new_latent = z + alpha * w # decode the new latent logits, hidden = model.decode(new_latent, x) # compute the loss wrt to the edit loss = model.loss_rec(logits, x_edit).mean() #print("LOSS", idx, ":", loss) loss.backward() opt.step() total_loss += loss * x.shape[1] nsents += x.shape[1] meter.update(loss, x.shape[1]) print("---------------------------") avg_loss_after = average_loss(w, data_batches, model) print("FINISHED EPOCH", e) print("avg loss before:", avg_loss_before) print("avg train loss: ", total_loss / nsents) # print("meter loss", meter.avg) loss_hist_before.append((e, avg_loss_before.item())) loss_hist_during.append((e, meter.avg.item())) if verbose: print("loss", loss) print("nsents", nsents) val_loss = average_loss(w, valid_batches, model, False) print("avg valid loss: ", val_loss) epoch_time = time.perf_counter() print("time: ", epoch_time - start_time) print("=" * 60) #print(torch.cuda.memory_summary(device=None, abbreviated=False)) print("FINISHED TRAINING") best_before_loss = min(loss_hist_before, key=lambda x: x[1]) best_during_loss = min(loss_hist_during, key=lambda x: x[1]) print("best_before_loss:", best_before_loss, loss_hist_during[best_before_loss[0]]) print("best_during_loss:", best_during_loss, loss_hist_before[best_during_loss[0]]) plot_series([loss_hist_before, loss_hist_during], walk_file) print(w) torch.save(w, results_dir + walk_file) return w
def average_loss(w, data_batches, model, verbose=False): meter = AverageMeter() model.eval() with torch.no_grad(): total_loss = 0 B = len(data_batches) nsents = 0 for idx in range(len(data_batches)): x, x_edit = data_batches[idx] mu, logvar = model.encode(x) z = reparameterize(mu, logvar) new_latent = z + alpha * w logits, hidden = model.decode(new_latent, x) loss = model.loss_rec(logits, x_edit).mean() if verbose: # losses = model.autoenc(x, x_edit) # print("autoenc", idx, ":", losses['rec'], "shapes", x.shape, x_edit.shape) print("my loss", idx, ":", loss) print("x", x.shape, "| x_edit", x_edit.shape) sents = [] edited_sents = [] walk_sents = [] batch_len = x.shape[1] max_len = 35 dec = 'greedy' outputs = model.generate(new_latent, max_len, dec).t() for i in range(batch_len): x_i = x[:, i] sents.append([vocab.idx2word[id] for id in x_i]) xe_i = x_edit[:, i] edited_sents.append([vocab.idx2word[id] for id in xe_i]) output_i = outputs[i] walk_sents.append([vocab.idx2word[id] for id in output_i]) for i in range(batch_len): x_i = torch.unsqueeze(x[:, i], dim=1) xe_i = torch.unsqueeze(x_edit[:, i], dim=1) loss_i = compute_loss(w, x_i, xe_i, model) print("batch", idx, ":", loss, "| sentence", i, ":", loss_i) print("--SENT:", sents[i]) print(x[:, i]) print("--EDIT:", edited_sents[i]) print(x_edit[:, i]) print("--WALK:", walk_sents[i]) print(outputs[i]) if print_outputs_flag: if idx == 4: print("batch", idx, "length", x.shape[1]) edited_sents = [] walked_sents = [] sents = [] max_len = 35 dec = 'greedy' outputs = model.generate(new_latent, max_len, dec).t() print("outputs", outputs.shape) print("x", x.shape) print("x_edit", x_edit.shape) print("z", z.shape) for i in range(batch_len): output_i = outputs[i] walked_sents.append( [vocab.idx2word[id] for id in output_i]) x_i = x[:, i] sents.append([vocab.idx2word[id] for id in x_i]) xe_i = x_edit[:, i] edited_sents.append( [vocab.idx2word[id] for id in xe_i]) walked_sents = strip_eos(walked_sents) edited_sents = strip_eos(edited_sents) sents = strip_eos(sents) for i in range(batch_len): print(i) print("--SENT:", sents[i]) print("--EDIT:", edited_sents[i]) print("--WALK:", walked_sents[i]) total_loss += loss * x.shape[1] nsents += x.shape[1] #breakpoint() meter.update(loss.item(), x.shape[1]) avg_loss = total_loss / nsents if verbose: print("avg_loss meter loss vs avg_loss", meter.avg, avg_loss) #print("average loss", avg_loss) #print("=" * 60) return avg_loss
decoder_losses = AverageMeter() # train for ibatch, (img, noise) in enumerate(train_loader): img = img.cuda() noise = noise.cuda() z_mu, z_sig = vae_encoder(img) # z_mu , z_sig : N*1*Dz zl = torch.exp(0.5 * z_sig) * noise + z_mu # zl : N*L*Dz x_ber = vae_decoder(zl) # N*L*Dx #encoder_loss, decoder_loss = vae_loss(img, x_mu, x_sig, z_mu, z_sig) encoder_loss, decoder_loss = vae_loss(img, x_ber, z_mu, z_sig) encoder_losses.update(encoder_loss.item()) decoder_losses.update(decoder_loss.item()) opt_encoder.zero_grad() opt_decoder.zero_grad() encoder_loss.backward() opt_encoder.step() opt_decoder.step() if ibatch % args.print_freq == 0 or ibatch + 1 == len(train_loader): logger.log( '[train Info]: [epoch-{}-{}][{:04d}/{:04d}][Encoder Loss:{:.2f}][Decoder Loss:{:.2f}]' .format(ep, args.epoch, ibatch, len(train_loader), encoder_loss.item(), decoder_loss.item())) tfboard_writer.add_scalar('Encoder_Loss', encoder_losses.avg, ep)