def calculate_disagreement(self, acs, features, next_features): """If next features is defined, return prediction error. Otherwise returns predictions i.e. dynamics model last layer output """ disagreement = super().calculate_disagreement(acs, features, next_features) model = self.dynamics_list[0].dynamics_net print("********************") print("Making and saving plot") dot = make_dot( disagreement, params=dict(model.named_parameters()), show_attrs=True, show_saved=True ) dot.format = "png" dot.render() # plt.savefig("disagreement_comp_graph.png") return disagreement
def test_SharedGeneratorCBN(args, myargs): yaml_file = 'enas_cgan/configs/enas_cgan.yaml' with open(yaml_file, 'r') as f: configs = EasyDict(yaml.load(f)) config = configs.retrain_cgan_gen_cifar10_v6_cbn.model resolution = 32 n_classes = 10 gen_c = config.generator generator = SharedGeneratorCBN( resolution=resolution, n_classes = n_classes, config=gen_c).cuda() bs = len(generator.ops) sample_arc = torch.arange(bs).view(-1, 1).repeat((1, generator.num_layers)) dummy_data = torch.rand(bs, gen_c.dim_z).cuda() y = torch.arange(bs).cuda() x = generator(z=dummy_data, y=y, sample_arcs=sample_arc) import torchviz g = torchviz.make_dot(x) g.view() pass
def validate(net, loader): ''' Computes mIoU for``net`` over the a set. args:: :param ``net``: network (in this case resnet34_8s_dilated :param ``loader``: dataloader (in this case, validation set loader) returns the mIoU (ignoring classes where no parts were annotated) for the semantic segmentation task and object-part inference task. ''' net.eval() dotfile = None for _, (image, _, _) in enumerate(tqdm.tqdm(loader)): image = Variable(image.cuda()) _, semantic_logits = net(image) dotfile = make_dot(semantic_logits.mean(), params=dict(net.named_parameters())) break print(dotfile.source) dotfile.format = 'svg' dotfile.render('network_graph.gv', view=True)
def test_SharedGeneratorNoSkip_v6(args, myargs): yaml_file = 'enas_cgan/configs/enas_cgan.yaml' with open(yaml_file, 'r') as f: configs = EasyDict(yaml.load(f)) config = configs.retrain_cgan_gen_cifar10_v6.model resolution = 32 gen_c = config.generator generator = SharedGeneratorNoSkip( resolution=resolution, config=gen_c).cuda() bs = len(generator.ops) sample_arc = torch.arange(bs).view(-1, 1) sample_arc = sample_arc.repeat((1, generator.num_layers)) # sample_arc = torch.randint(0, len(generator.ops), (128, generator.num_layers)) dummy_data = torch.rand(bs, gen_c.dim_z).cuda() x = generator(dummy_data, sample_arc) import torchviz g = torchviz.make_dot(x) g.view() pass
def test_autogan_cifar10_a_Generator(args1, myargs): import cfg, os, torch import numpy as np myargs.config = getattr(myargs.config, 'train_autogan_cifar10_a') myargs.args = args1 args = cfg.parse_args() for k, v in myargs.config.items(): setattr(args, k, v) args.tf_inception_model_dir = os.path.expanduser( args.tf_inception_model_dir) args.fid_stat = os.path.expanduser(args.fid_stat) args.data_path = os.path.expanduser(args.data_path) gen_net = Generator(args=args).cuda() z = torch.cuda.FloatTensor(np.random.normal(0, 1, (16, args.latent_dim))) x = gen_net(z) import torchviz g = torchviz.make_dot(x) g.view() pass
def test_AlphaDiscriminator(args, myargs): yaml_file = 'enas_cgan/configs/enas_cgan.yaml' with open(yaml_file, 'r') as f: configs = EasyDict(yaml.load(f)) config = configs.search_cgan_gen_dis_cifar10.model resolution = 32 # gen_c = config.generator # generator = SharedGeneratorNoSkip( # resolution=resolution, config=gen_c).cuda() dis_c = config.discriminator discriminator = SharedDiscriminatorNoSkip( resolution=resolution, config=dis_c).cuda() from enas_cgan.models.controller import Controller controller_c = configs.search_cgan_gen_dis_cifar10.model.controller n_classes = 10 controller = Controller(n_classes=n_classes, num_layers=discriminator.num_layers, num_branches=len(discriminator.ops), config=controller_c) controller = controller.cuda() class_id = 0 with torch.no_grad(): controller(class_id) # perform forward pass to generate a new architecture # sample_arc = controller.sample_arc sample_arc = torch.randint( 0, len(discriminator.ops), (128, discriminator.num_layers)) dummy_data = torch.rand(128, 3, 32, 32).cuda() x = discriminator(dummy_data, sample_arc) # from torchsummary import summary # summary(discriminator, [[3, 32, 32], [discriminator.num_layers, ]]) import torchviz g = torchviz.make_dot(x) g.view() pass
def test_DepthwiseSeparableConv2d(self): """ """ if 'CUDA_VISIBLE_DEVICES' not in os.environ: os.environ['CUDA_VISIBLE_DEVICES'] = '0' if 'PORT' not in os.environ: os.environ['PORT'] = '6006' if 'TIME_STR' not in os.environ: os.environ['TIME_STR'] = '0' if utils.is_debugging() else '1' # func name assert sys._getframe().f_code.co_name.startswith('test_') command = sys._getframe().f_code.co_name[5:] class_name = self.__class__.__name__[7:] \ if self.__class__.__name__.startswith('Testing') \ else self.__class__.__name__ outdir = f'results/{class_name}/{command}' import yaml from template_lib.d2.layers import build_d2layer cfg_str = """ name: "DepthwiseSeparableConv2d" in_channels: 256 out_channels: 256 kernel_size: 7 padding: 3 """ cfg = EasyDict(yaml.safe_load(cfg_str)) op = build_d2layer(cfg) op.cuda() x = torch.randn(2, 256, 32, 32).cuda() y = op(x) import torchviz g = torchviz.make_dot(y) g.view() pass
def test_case(): cfg_str = """ name: "DenseBlock" update_cfg: true in_channels: 144 """ cfg = EasyDict(yaml.safe_load(cfg_str)) op = build_d2layer(cfg) op.cuda() bs = 2 num_ops = len(op.cfg_ops) x = torch.randn(bs, 144, 8, 8).cuda() batched_arcs = torch.tensor([ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1], ]).cuda() out = op(x, batched_arcs) import torchviz g = torchviz.make_dot(out) g.view() pass
def display_net(self): #pip install git+https://github.com/szagoruyko/pytorchviz from graphviz import Digraph try: from torchviz import make_dot, make_dot_from_trace except ImportError: raise ImportError("pip install git+https://github.com/szagoruyko/pytorchviz") from utils import pdf2png input_ = self.to_var(torch.randn(1,3,224,224).type(torch.FloatTensor)) start_time = time.time() if self.OF_option!='None': y = self.C(input_, input_) else: y = self.C(input_) elapsed = time.time() - start_time elapsed = str(datetime.timedelta(seconds=elapsed)) print("Forward time: "+elapsed) g=make_dot(y, params=dict(self.C.named_parameters())) filename='misc/VGG16-OF_{}'.format(self.OF_option) g.filename=filename g.render() os.remove(filename) pdf2png(filename) print('Network saved at {}.png'.format(filename))
def Topology(experiment, filename="topology", extension="pdf"): assert extension in ("pdf", "svg") from torchviz import make_dot x, y = next(iter(experiment.train_dl)) x = x.to(experiment.device) y = y.to(experiment.device) # Save model topology topology_path = experiment.path / filename topology_pdf_path = topology_path.with_suffix("." + extension) if not topology_pdf_path.exists(): yhat = experiment.model(x) loss = experiment.loss_func(yhat, y) g = make_dot(loss) if extension == "svg": g.format = "svg" with S3Path.as_local(topology_path) as lf: g.render(lf) # Interested in pdf, the graphviz file can be removed if topology_path.exists(): topology_path.unlink()
def test_DenseCell(self): """ """ if 'CUDA_VISIBLE_DEVICES' not in os.environ: os.environ['CUDA_VISIBLE_DEVICES'] = '0' if 'PORT' not in os.environ: os.environ['PORT'] = '6006' if 'TIME_STR' not in os.environ: os.environ['TIME_STR'] = '0' if utils.is_debugging() else '1' # func name assert sys._getframe().f_code.co_name.startswith('test_') command = sys._getframe().f_code.co_name[5:] class_name = self.__class__.__name__[7:] \ if self.__class__.__name__.startswith('Testing') \ else self.__class__.__name__ outdir = f'results/{class_name}/{command}' import yaml from template_lib.d2.layers import build_d2layer cfg_str = """ name: "DenseCell" update_cfg: true in_channels: 3 out_channels: 32 """ cfg = EasyDict(yaml.safe_load(cfg_str)) op = build_d2layer(cfg, in_channels=3, out_channels=32) op.cuda() out = op.test_case() import torchviz g = torchviz.make_dot(out) g.view() pass
gpus=gpu_list, objective=objective, ) # check trainer support for data type expect(_data_type in trainer.supported_data_types()) # start training LOGGER.info("Start training.") # trainer.setup(load, load_state_dict, save_every, train_dir) if cfg["dataset_type"] == "cifar10": dummy_input = torch.rand([2, 3, 32, 32]).to(device) elif cfg["dataset_type"] == "imagenet": dummy_input = torch.rand([2, 3, 224, 224]).to(device) else: raise AssertionError("Dataset not supported") output = trainer.model.forward(dummy_input) dot = make_dot(output, params=dict(trainer.model.named_parameters())) dot.format = "pdf" dot.render("./test-torchviz") flops = trainer.model.total_flops / 1.0e6 bi_flops = trainer.model.bi_flops / 1.0e6 model_params = utils.count_parameters(trainer.model, count_binary=True) / 1.0e6 print("param size = {} M | bi-param {} M".format(model_params[0], model_params[1])) print("flops = {} M | bi-flops {} M".format(flops, bi_flops))
def main(args): assert args.model in ['GAE', 'VGAE'] kwargs = {'GAE': MyGAE, 'VGAE': MyVGAE} kwargs_enc = {'GCN': MetaEncoder, 'FC': MLPEncoder, 'MLP': MetaMLPEncoder, 'GraphSignature': MetaSignatureEncoder, 'GatedGraphSignature': MetaGatedSignatureEncoder} path = osp.join( osp.dirname(osp.realpath(__file__)), '..', 'data', args.dataset) train_loader, val_loader, test_loader = load_dataset(args.dataset,args) meta_model = kwargs[args.model](kwargs_enc[args.encoder](args, args.num_features, args.num_channels)).to(args.dev) if args.train_only_gs: trainable_parameters = [] for name, p in meta_model.named_parameters(): if "signature" in name: trainable_parameters.append(p) else: p.requires_grad = False optimizer = torch.optim.Adam(trainable_parameters, lr=args.meta_lr) else: optimizer = torch.optim.Adam(meta_model.parameters(), lr=args.meta_lr) total_loss = 0 if not args.do_kl_anneal: args.kl_anneal = 1 if args.encoder == 'GraphSignature' or args.encoder == 'GatedGraphSignature': args.allow_unused = True else: args.allow_unused = False ''' Random or Adamic Adar Baseline ''' if args.random_baseline or args.adamic_adar_baseline or args.deepwalk_baseline: test_inner_avg_auc, test_inner_avg_ap = test(args,meta_model,optimizer,test_loader,0,\ return_val=True,inner_steps=1000,seed=args.seed) sys.exit() ''' Run WL-Kernel ''' if args.wl: load_path = '../saved_models/' + args.namestr + '.pt' meta_model.load_state_dict(torch.load(load_path)) run_analysis(args, meta_model,train_loader) test(args,meta_model,optimizer,test_loader,0) sys.exit() ''' Meta-training ''' mode = 'Train' meta_loss = torch.Tensor([0]) args.final_test = False for epoch in range(0,args.epochs): graph_id_local = 0 graph_id_global = 0 train_inner_avg_auc_list, train_inner_avg_ap_list = [], [] if epoch > 0 and args.dataset !='PPI': args.resplit = False for i,data in enumerate(train_loader): if args.debug: ''' Print the Computation Graph ''' dot = make_dot(meta_gradient_step(meta_model,args,data,optimizer,args.inner_steps,args.inner_lr,\ args.order,graph_id_local,mode,test_inner_avg_auc_list, test_inner_avg_ap_list, \ epoch,i,True)[1],params=dict(meta_model.named_parameters())) dot.format = 'png' dot.render(args.debug_name) quit() graph_id_local, meta_loss, train_inner_avg_auc_list, train_inner_avg_ap_list = meta_gradient_step(meta_model,\ args,data,optimizer,args.inner_steps,args.inner_lr,args.order,graph_id_local,\ mode,train_inner_avg_auc_list, train_inner_avg_ap_list,epoch,i,True) if args.do_kl_anneal: args.kl_anneal = args.kl_anneal + 1/args.epochs auc_list, ap_list = global_test(args,meta_model,data,OrderedDict(meta_model.named_parameters())) if args.comet: if len(ap_list) > 0: auc_metric = 'Train_Global_Batch_Graph_' + str(i) +'_AUC' ap_metric = 'Train_Global_Batch_Graph_' + str(i) +'_AP' args.experiment.log_metric(auc_metric,sum(auc_list)/len(auc_list),step=epoch) args.experiment.log_metric(ap_metric,sum(ap_list)/len(ap_list),step=epoch) if args.wandb: if len(ap_list) > 0: auc_metric = 'Train_Global_Batch_Graph_' + str(i) +'_AUC' ap_metric = 'Train_Global_Batch_Graph_' + str(i) +'_AP' wandb.log({auc_metric:sum(auc_list)/len(auc_list),\ ap_metric:sum(ap_list)/len(ap_list),"x":epoch},commit=False) graph_id_global += len(ap_list) if args.wandb: wandb.log() if args.comet: if len(train_inner_avg_ap_list) > 0: auc_metric = 'Train_Inner_Avg' +'_AUC' ap_metric = 'Train_Inner_Avg' + str(i) +'_AP' args.experiment.log_metric(auc_metric,sum(train_inner_avg_auc_list)/len(train_inner_avg_auc_list),step=epoch) args.experiment.log_metric(ap_metric,sum(train_inner_avg_ap_list)/len(train_inner_avg_ap_list),step=epoch) if args.wandb: if len(train_inner_avg_ap_list) > 0: auc_metric = 'Train_Inner_Avg' +'_AUC' ap_metric = 'Train_Inner_Avg' + str(i) +'_AP' wandb.log({auc_metric:sum(train_inner_avg_auc_list)/len(train_inner_avg_auc_list),\ ap_metric:sum(train_inner_avg_ap_list)/len(train_inner_avg_ap_list),\ "x":epoch},commit=False) if len(train_inner_avg_ap_list) > 0: print('Train Inner AUC: {:.4f}, AP: {:.4f}'.format(sum(train_inner_avg_auc_list)/len(train_inner_avg_auc_list),\ sum(train_inner_avg_ap_list)/len(train_inner_avg_ap_list))) ''' Meta-Testing After every Epoch''' meta_model_copy = kwargs[args.model](kwargs_enc[args.encoder](args, args.num_features, args.num_channels)).to(args.dev) meta_model_copy.load_state_dict(meta_model.state_dict()) if args.train_only_gs: optimizer_copy = torch.optim.Adam(trainable_parameters, lr=args.meta_lr) else: optimizer_copy = torch.optim.Adam(meta_model_copy.parameters(), lr=args.meta_lr) optimizer_copy.load_state_dict(optimizer.state_dict()) validation(args,meta_model_copy,optimizer_copy,val_loader,epoch) test(args,meta_model_copy,optimizer_copy,test_loader,epoch,inner_steps=args.inner_steps) print("Failed on %d Training graphs" %(args.fail_counter)) ''' Save Global Params ''' if not os.path.exists('../saved_models/'): os.makedirs('../saved_models/') save_path = '../saved_models/meta_vgae.pt' save_path = '../saved_models/' + args.namestr + '_global_.pt' torch.save(meta_model.state_dict(), save_path) ''' Run to Convergence ''' if args.ego: optimizer = torch.optim.Adam(meta_model.parameters(), lr=args.meta_lr) args.inner_lr = args.inner_lr * args.reset_inner_factor val_inner_avg_auc, val_inner_avg_ap = test(args,meta_model,optimizer,val_loader,epoch,\ return_val=True,inner_steps=1000) if args.ego: optimizer = torch.optim.Adam(meta_model.parameters(), lr=args.meta_lr) args.inner_lr = args.inner_lr * args.reset_inner_factor test_inner_avg_auc, test_inner_avg_ap = test(args,meta_model,optimizer,test_loader,epoch,\ return_val=True,inner_steps=1000) if args.comet: args.experiment.end() val_eval_metric = 0.5*val_inner_avg_auc + 0.5*val_inner_avg_ap test_eval_metric = 0.5*test_inner_avg_auc + 0.5*test_inner_avg_ap return val_eval_metric
def visualize(your_var): from graphviz import Digraph import torch from torch.autograd import Variable from torchviz import make_dot make_dot(your_var).view()
cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) model = nn.Sequential() model.add_module( 'conv', ConvModule(in_channels, out_channels, 3, stride=2, padding=1, **cfg)) for idx in range(res_repeat): model.add_module('res{}'.format(idx), ResBlock(out_channels, **cfg)) return model if __name__ == '__main__': self = Darknet(depth=53) self.eval() inputs = torch.rand(1, 3, 416, 416) # 输入 level_outputs = self.forward(inputs) for level_out in level_outputs: print(tuple(level_out.shape)) g = make_dot(self(inputs), params=dict(self.named_parameters())) g.view() """ 1.网络深度53层 2.提取的特征图是第3,4,5个stage的输出 """
from model import QNetwork import torchviz import torch nn = QNetwork(37, 4, 42) torchviz.make_dot(nn(torch.randn(1, 37)), params=dict(nn.named_parameters())).render("NN_arch", format="png")
def visualizeGraph(var, params): """Visualize the network""" from torchviz import make_dot return make_dot(var, params)
def draw_model_architecture(model, output, input, input_name, save_name): make_dot(output, params=dict(list(model.named_parameters())) + [(input_name, input)])
return x def drn_a_50(pretrained=False, **kwargs): model = DRN_A(Bottleneck, [3, 4, 6, 3], **kwargs) if pretrained: model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) return model if __name__ == '__main__': import torch from torchsummary import summary import torch.nn as nn from torchviz import make_dot #print the layers of DRN device=torch.device('cuda' if torch.cuda.is_available() else 'cpu') model=drn_a_50().to(device) summary(model,(1,224,224)) print(model) #print the architecture of DRN x = torch.randn(1,1,224,224).requires_grad_(True) y = model(x.to(device)) vis_graph = make_dot(y,params=dict(list(model.named_parameters()) + [('x', x)] )) vis_graph.view()
sources='market1501', targets='market1501', height=384, width=128, batch_size_train=32, batch_size_test=100, transforms=['random_flip', 'random_crop', 'random_erase']) model = torchreidHash.models.build_model( name='resnet50', num_classes=datamanager.num_train_pids, loss='triplet', pretrained=True) # model = torchreid.models.build_model( # name='resnet50', # num_classes=datamanager.num_train_pids, # loss='triplet', # pretrained=True # ) model = model.cuda() #from torchsummary import summary # summary(model,(3,160,2048)) import torch from torchviz import make_dot x = torch.randn(1, 3, 384, 128) vis_graph = make_dot(model(x)) vis_graph.view()
def make_tree(node): dot = make_dot(node) dot.render('qlist')
network.train() pbar = tqdm(loader, total=params['steps'], initial=step, dynamic_ncols=True) last_time = time.time() with profiler.profile(use_cuda=True, with_stack=True, enabled=args.profile) as prof: for batch in pbar: optimizer.zero_grad(set_to_none=True) batch[0].to(device=device) output = network(batch[0]) if args.save_graph is not None: make_dot(output, params=dict(network.named_parameters())).render( args.save_graph) args.save_graph = None gt = batch[1].to(device=device) loss = criterion(output, gt) if min_loss < 0 or loss < min_loss: min_loss = loss norm = torch.mean(torch.linalg.norm(output, keepdims=True, dim=1)) gt_norm = torch.mean(torch.linalg.norm(gt, keepdims=True, dim=1)) loss.backward() optimizer.step() if (step) % params['decay_interval'] == 0: decay.step()
out = self.stages(x) out = self.upsample(out) out = self.lastConv(out) return out def get_net(num_class): return ENET(num_class) if 0: x = torch.zeros((2,3,512//2,512//2), dtype=torch.float32) # minibatch size 64, feature dimension 50 model = get_net(21) scores = model(x) print(scores.size()) # you should see [64, 10] #https://github.com/szagoruyko/pytorchviz #pip install git+https://github.com/szagoruyko/pytorchviz if 0: from graphviz import Digraph from torchviz import make_dot from torch.autograd import Variable net = get_net(21) net.cuda() x = Variable(torch.rand(1, 3, 256, 256)).cuda() h_x = net(x).cpu() dot = make_dot(h_x, params=dict(net.named_parameters())) dot.view("enet")
in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, groups=groups, bias=False, dilation=dilation, ) def conv1x1(in_planes, out_planes, stride=1): """1x1 convolution""" return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) def build_dct_resnet_50(n_classes: int) -> nn.Module: return DCTResNet(num_classes=n_classes) if __name__ == "__main__": resnet = DCTResNet() y = torch.zeros(1, 64, 64, 64, dtype=torch.float, requires_grad=False) cb = torch.zeros(1, 64, 32, 32, dtype=torch.float, requires_grad=False) cr = torch.zeros(1, 64, 32, 32, dtype=torch.float, requires_grad=False) make_dot(resnet(y, cb, cr)).render("model", format="png")
def print_compute_tree(name,node): dot = make_dot(node) #print(dot) dot.render(name)
def train_iter(cfg, dataloader, classifier, optimizer, writer, epoch, n_iter, cluster_loss_fn): num_classes = int(cfg['n_classes']) batch_size = int(cfg['batch_size']) n_epochs = int(cfg['n_epochs']) sample_size = int(cfg['fixed_size']) input_size = int(cfg['data_dim']) num_batch = cfg['num_batch'] alfa = 0 ep_loss = 0. ep_seg_loss = 0. ep_cluster_loss = 0. mean_acc = torch.tensor([]) mean_iou = torch.tensor([]) mean_prec = torch.tensor([]) mean_recall = torch.tensor([]) ### state that the model will run in train mode classifier.train() #d_list=[] #for dat in dataloader: #for d in dat: #d_list.append(d) #points = gBatch().from_data_list(d_list) #target = points['y'] #name = dataset['name'] #points, target = points.to('cuda'), target.to('cuda') for i_batch, sample_batched in enumerate(dataloader): ### get batch if 'graph' not in cfg['dataset']: points = sample_batched['points'] target = sample_batched['gt'] #if cfg['model'] == 'pointnet_cls': #points = points.view(batch_size*sample_size, -1, input_size) #target = target.view(batch_size*sample_size, -1) #batch_size = batch_size*sample_size #sample_size = points.shape[1] points, target = Variable(points), Variable(target) points, target = points.cuda(), target.cuda() else: data_list = [] name_list = [] for i, d in enumerate(sample_batched): if 'bvec' in d['points'].keys: d['points'].bvec += sample_size * i data_list.append(d['points']) name_list.append(d['name']) points = gBatch().from_data_list(data_list) if 'bvec' in points.keys: #points.batch = points.bvec.copy() points.batch = points.bvec.clone() del points.bvec #if 'bslices' in points.keys(): # points.__slices__ = torch.cum( target = points['y'] if cfg['same_size']: points['lengths'] = points['lengths'][0].item() sample_batched = { 'points': points, 'gt': target, 'name': name_list } #print('points:',points) #if (epoch != 0) and (epoch % 20 == 0): # assert(len(dataloader.dataset) % int(cfg['fold_size']) == 0) # folds = len(dataloader.dataset)/int(cfg['fold_size']) # n_fold = (dataloader.dataset.n_fold + 1) % folds # if n_fold != dataloader.dataset.n_fold: # dataloader.dataset.n_fold = n_fold # dataloader.dataset.load_fold() points, target = points.to('cuda'), target.to('cuda') #print(len(points.lengths),target.shape) ### initialize gradients #if not cfg['accumulation_interval'] or i_batch == 0: optimizer.zero_grad() ### forward logits = classifier(points) ### minimize the loss if len(cfg['loss']) == 2: if epoch <= int(cfg['switch_loss_epoch']): loss_type = cfg['loss'][0] else: loss_type = cfg['loss'][1] else: loss_type = cfg['loss'][0] if loss_type == 'nll': pred = F.log_softmax(logits, dim=-1) pred = pred.view(-1, num_classes) pred_choice = pred.data.max(1)[1].int() if cfg['nll_w']: ce_w = torch.tensor([1.5e-2] + [1.] * (num_classes - 1)).cuda() else: ce_w = torch.tensor([1.] * num_classes).cuda() #print(pred.shape) loss = F.nll_loss(pred, target.long(), weight=ce_w) elif loss_type == 'LLh': pred_choice = (logits.data > 0).int() loss = L.lovasz_hinge(logits.view(batch_size, sample_size, 1), target.view(batch_size, sample_size, 1), per_image=False) elif loss_type == 'LLm': pred = F.softmax(logits, dim=-1) pred_choice = pred.data.max(1)[1].int() loss = L.lovasz_softmax_flat(pred, target, op=cfg['llm_op'], only_present=cfg['multi_category']) ep_loss += loss if cfg['print_bwgraph']: #with torch.onnx.set_training(classifier, False): # trace, _ = torch.jit.get_trace_graph(classifier, args=(points.transpose(2,1),)) #g = make_dot_from_trace(trace) from torchviz import make_dot, make_dot_from_trace g = make_dot(loss, params=dict(classifier.named_parameters())) # g = make_dot(loss, # params=None) g.view('pointnet_mgf') print('classifier parameters: %d' % int(count_parameters(classifier))) os.system('rm -r runs/%s' % writer.logdir.split('/', 1)[1]) os.system('rm -r tb_logs/%s' % writer.logdir.split('/', 1)[1]) import sys sys.exit() #print('memory allocated in MB: ', torch.cuda.memory_allocated()/2**20) #import sys; sys.exit() loss.backward() #if int(cfg['accumulation_interval']) % (i_batch+1) == 0: optimizer.step() #elif not cfg['accumulation_interval']: # optimizer.step() ### compute performance correct = pred_choice.eq(target.data.int()).sum() acc = correct.item() / float(target.size(0)) tp = torch.mul(pred_choice.data, target.data.int()).sum().item() + 0.00001 fp = pred_choice.gt(target.data.int()).sum().item() fn = pred_choice.lt(target.data.int()).sum().item() tn = correct.item() - tp iou = float(tp) / (tp + fp + fn) prec = float(tp) / (tp + fp) recall = float(tp) / (tp + fn) print('[%d: %d/%d] train loss: %f acc: %f iou: %f' \ % (epoch, i_batch, num_batch, loss.item(), acc, iou)) mean_prec = torch.cat((mean_prec, torch.tensor([prec])), 0) mean_recall = torch.cat((mean_recall, torch.tensor([recall])), 0) mean_acc = torch.cat((mean_acc, torch.tensor([acc])), 0) mean_iou = torch.cat((mean_iou, torch.tensor([iou])), 0) n_iter += 1 writer.add_scalar('train/epoch_loss', ep_loss / (i_batch + 1), epoch) return mean_acc, mean_prec, mean_iou, mean_recall, ep_loss / (i_batch + 1), n_iter
def main(): """Main training entrypoint function.""" args = parse_args() seed_random_number_generators(args.seed) epochs = args.epochs batch_size = args.batch_size use_train_aug = not args.no_aug out_dir = args.out_dir base_model = args.base_model dilate = args.dilate truncate = args.truncate initial_lr = args.lr schedule_milestones = args.schedule_milestones schedule_gamma = args.schedule_gamma experiment_id = datetime.datetime.now().strftime('%Y%m%d-%H%M%S%f') exp_out_dir = os.path.join(out_dir, experiment_id) if out_dir else None print('Experiment ID: {}'.format(experiment_id)) #### # Model #### model_desc = { 'base': base_model, 'dilate': dilate, 'truncate': truncate, 'output_strat': args.output_strat, 'preact': args.preact, 'reg': args.reg, 'reg_coeff': args.reg_coeff, 'hm_sigma': args.hm_sigma, } model = build_mpii_pose_model(**model_desc) model.cuda() input_size = model.image_specs.size #### # Data #### train_data = MPIIDataset('/datasets/mpii', 'train', use_aug=use_train_aug, image_specs=model.image_specs, max_length=args.train_samples) train_loader = DataLoader(train_data, batch_size, num_workers=4, pin_memory=True, shuffle=True) val_data = MPIIDataset('/datasets/mpii', 'val', use_aug=False, image_specs=model.image_specs) val_loader = DataLoader(val_data, batch_size, num_workers=4, pin_memory=True) #### # Metrics and visualisation #### train_eval = PCKhEvaluator() val_eval = PCKhEvaluator() def eval_metrics_for_batch(evaluator, batch, norm_out): """Evaluate and accumulate performance metrics for batch.""" norm_out = norm_out.type(torch.DoubleTensor) # Coords in original MPII dataset space orig_out = torch.bmm(norm_out, batch['transform_m']).add_( batch['transform_b'].expand_as(norm_out)) norm_target = batch['part_coords'].double() orig_target = torch.bmm(norm_target, batch['transform_m']).add_( batch['transform_b'].expand_as(norm_target)) head_lengths = batch['normalize'].double() evaluator.add(orig_out, orig_target, batch['part_mask'], head_lengths) reporting = Reporting(train_eval, val_eval) tel = reporting.telemetry reporting.setup_console_output() if exp_out_dir: reporting.setup_folder_output(exp_out_dir) with open(os.path.join(exp_out_dir, 'cli_args.json'), 'w') as f: json.dump(vars(args), f, sort_keys=True, indent=2) if args.showoff: import pyshowoff with open('/etc/hostname', 'r') as f: hostname = f.read().strip() client = pyshowoff.Client('http://' + args.showoff) notebook = client.add_notebook( '[{}] Human pose ({}-d{}-t{}, {}, {}@{:.1e}, reg={})'.format( hostname, base_model, dilate, truncate, args.output_strat, args.optim, args.lr, args.reg)).result() for tag_name in args.tags: notebook.add_tag(tag_name) reporting.setup_showoff_output(notebook) progress_frame = notebook.add_frame('Progress', bounds={ 'x': 0, 'y': 924, 'width': 1920, 'height': 64 }).result() else: progress_frame = None # Set constant values tel['experiment_id'].set_value(experiment_id) tel['args'].set_value(vars(args)) # Generate a Graphviz graph to visualise the model dummy_data = torch.cuda.FloatTensor(1, 3, input_size, input_size).uniform_(0, 1) out_var = model(Variable(dummy_data, requires_grad=False)) if isinstance(out_var, list): out_var = out_var[-1] tel['model_graph'].set_value( make_dot(out_var, dict(model.named_parameters()))) del dummy_data best_val_acc_meter = tele.meter.MaxValueMeter(skip_reset=True) #### # Optimiser #### # Initialize optimiser and learning rate scheduler if args.optim == '1cycle': optimizer = optim.SGD(model.parameters(), lr=0) scheduler = make_1cycle(optimizer, epochs * len(train_loader), lr_max=initial_lr, momentum=0.9) else: if args.optim == 'sgd': optimizer = optim.SGD(model.parameters(), lr=initial_lr, momentum=0.9) elif args.optim == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=initial_lr) else: raise Exception('unrecognised optimizer: {}'.format(args.optim)) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=schedule_milestones, gamma=schedule_gamma) # `vis` will hold a few samples for visualisation vis = {} #### # Training #### def train(epoch): """Do a full pass over the training set, updating model parameters.""" if hasattr(scheduler, 'step'): scheduler.step(epoch) model.train() samples_processed = 0 with progressbar.ProgressBar(max_value=len(train_data)) as bar: for i, batch in generator_timer(enumerate(train_loader), tel['train_data_load_time']): if hasattr(scheduler, 'batch_step'): scheduler.batch_step() with timer(tel['train_data_transfer_time']): in_var = Variable(batch['input'].cuda(), requires_grad=False) target_var = Variable(batch['part_coords'].cuda(), requires_grad=False) mask_var = Variable(batch['part_mask'].type( torch.cuda.FloatTensor), requires_grad=False) with timer(tel['train_forward_time']): out_var = model(in_var) with timer(tel['train_criterion_time']): loss = model.forward_loss(out_var, target_var, mask_var) if np.isnan(loss.data[0]): state = { 'state_dict': model.state_dict(), 'model_desc': model_desc, 'optimizer': optimizer.state_dict(), 'epoch': epoch, 'input': in_var.data, 'target': target_var.data, 'mask': mask_var.data, } torch.save(state, 'model_dump.pth') raise Exception('training loss should not be nan') tel['train_loss'].add(loss.data[0]) with timer(tel['train_eval_time']): coords = model.compute_coords(out_var) eval_metrics_for_batch(train_eval, batch, coords) with timer(tel['train_backward_time']): optimizer.zero_grad() loss.backward() with timer(tel['train_optim_time']): optimizer.step() samples_processed += batch['input'].size(0) bar.update(samples_processed) if i == 0: vis['train_images'] = batch['input'] vis['train_preds'] = coords vis['train_masks'] = batch['part_mask'] vis['train_coords'] = batch['part_coords'] vis['train_heatmaps'] = model.heatmaps.data.cpu() if progress_frame is not None: so_far = epoch * len(train_data) + samples_processed total = epochs * len(train_data) notebook.set_progress(so_far / total) progress_frame.progress(so_far, total) def validate(epoch): '''Do a full pass over the validation set, evaluating model performance.''' model.eval() val_preds = torch.DoubleTensor(len(val_data), 16, 2) samples_processed = 0 with progressbar.ProgressBar(max_value=len(val_data)) as bar: for i, batch in enumerate(val_loader): in_var = Variable(batch['input'].cuda(), volatile=True) target_var = Variable(batch['part_coords'].cuda(), volatile=True) mask_var = Variable(batch['part_mask'].type( torch.cuda.FloatTensor), volatile=True) out_var = model(in_var) loss = model.forward_loss(out_var, target_var, mask_var) tel['val_loss'].add(loss.data[0]) coords = model.compute_coords(out_var) eval_metrics_for_batch(val_eval, batch, coords) preds = coords.double() pos = i * batch_size orig_preds = val_preds[pos:(pos + preds.size(0))] torch.baddbmm(batch['transform_b'], preds, batch['transform_m'], out=orig_preds) samples_processed += batch['input'].size(0) bar.update(samples_processed) if i == 0: vis['val_images'] = batch['input'] vis['val_preds'] = coords vis['val_masks'] = batch['part_mask'] vis['val_coords'] = batch['part_coords'] vis['val_heatmaps'] = model.heatmaps.data.cpu() tel['val_preds'].set_value(val_preds.numpy()) print('Entering the main training loop') for epoch in range(epochs): print('> Epoch {:3d}/{:3d}'.format(epoch + 1, epochs)) tel['epoch'].set_value(epoch) tel['epoch_time'].reset() print('Training pass...') train(epoch) print('Validation pass...') validate(epoch) train_sample = [] for i in range(min(16, vis['train_images'].size(0))): img = model.image_specs.unconvert(vis['train_images'][i], train_data) coords = (vis['train_preds'][i] + 1) * (input_size / 2) draw_skeleton(img, coords, vis['train_masks'][i]) train_sample.append(img) tel['train_sample'].set_value(train_sample) val_sample = [] for i in range(min(16, vis['val_images'].size(0))): img = model.image_specs.unconvert(vis['val_images'][i], val_data) coords = (vis['val_preds'][i] + 1) * (input_size / 2) draw_skeleton(img, coords, vis['val_masks'][i]) val_sample.append(img) tel['val_sample'].set_value(val_sample) def visualise_heatmaps(key): heatmap_images = [] for i in range(min(16, vis[key].size(0))): lwrist_hm = vis[key][i, PCKhEvaluator.JOINT_NAMES.index('lwrist')] rwrist_hm = vis[key][i, PCKhEvaluator.JOINT_NAMES.index('rwrist')] lwrist_hm = (lwrist_hm / lwrist_hm.max()).clamp_(0, 1) rwrist_hm = (rwrist_hm / rwrist_hm.max()).clamp_(0, 1) img = ToPILImage()(torch.stack( [rwrist_hm, lwrist_hm.clone().zero_(), lwrist_hm], 0)) heatmap_images.append(img) tel[key].set_value(heatmap_images) visualise_heatmaps('train_heatmaps') visualise_heatmaps('val_heatmaps') val_acc = val_eval.meters['total_mpii'].value()[0] is_best = best_val_acc_meter.add(val_acc) if exp_out_dir: state = { 'state_dict': model.state_dict(), 'model_desc': model_desc, 'optimizer': optimizer.state_dict(), 'epoch': epoch + 1, 'val_acc': val_acc, } torch.save(state, os.path.join(exp_out_dir, 'model.pth')) if is_best: torch.save(state, os.path.join(exp_out_dir, 'model-best.pth')) tel['best_val_preds'].set_value(tel['val_preds'].value()) tel.step() train_eval.reset() val_eval.reset() print()
self.linear.bias.data.fill_(1) self.sq = torch.nn.Sequential() self.sq.add_module("linear", self.linear) def forward(self, xo): x = self.sq(xo.reshape(1, 1)**2) print(self.sq.named_parameters()) dot = make_dot(x, params=dict(self.sq.named_parameters())) print("WTF") print(dot) dot.render(view=True) return x model = torch.nn.Sequential() linear = torch.nn.Linear(1, 1) model.add_module("linear", linear) #model=Model() x = Variable(torch.tensor([[1.0]]), requires_grad=True) #model(x) #loss=model(x.reshape(1,1)**2) loss = model(x) print(loss) dot = make_dot(model(x), params=dict(model.named_parameters())) #dot=make_dot(loss, params=dict(model.sq.named_parameters())) #dot.render(view=True) #for name,parameter in model.named_parameters(): # print("GRAD",name,parameter.size(),parameter.grad)
import torch from torch.autograd import Variable from torchviz import make_dot import matplotlib.pyplot as plt from src.core import CHAModule model = CHAModule.MyNet1(120, 2) x = Variable(torch.randn(1, 120)) y = model(x) g = make_dot(y.mean(), params=dict(model.named_parameters())) g.view() plt.show()
activation = self.decoder_hidden_layer(code) activation = torch.relu(activation) activation = self.decoder_output_layer(activation) reconstructed = torch.relu(activation) return reconstructed device = torch.device('cuda') if torch.cuda.is_available() else torch.device( 'cpu') print(device) # 28 by 28 pixels of MNIST=784 model = AE(input_shape=784).to(device) input = torch.randn(size=[1, 784]).to(device) dot = torchviz.make_dot(model(input), params=dict(model.named_parameters())) dot.format = 'svg' dot.render(filename='simple_encoder_decoder_graph', directory='../images') optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) criterion = torch.nn.MSELoss() data_transoformer = torchvision.transforms.Compose( [torchvision.transforms.ToTensor()]) # [60000, 28, 28]. train_data = torchvision.datasets.MNIST(root='../data', train=True, transform=data_transoformer, download=True)
).splitlines()[:2] focal_length = float(intrinsic_line_0[2:].split(',')[0]) cx = float(intrinsic_line_0[2:-2].split(',')[2].strip()) cy = float(intrinsic_line_1[1:-2].split(',')[2].strip()) intrinsic = torch.tensor( [[focal_length, 0, cx, 0], [0, focal_length, cy, 0], [0, 0, 1, 0], [0, 0, 0, 1]], device=device) return intrinsic if __name__ == '__main__': from torchviz import make_dot, make_dot_from_trace import pyexr import graphviz model = project(11, torch.tensor(3.)) dm_path = Path( "runs" ) / "02020244_fast_dev" / "vis" / "00000" / "val_0492_19_depthmap.exr" depth_map = pyexr.open(str(dm_path)).get("Z")[:, :, 0] depth_map = torch.from_numpy(depth_map).to(device) model.to(device) pointcloud = model.depthmap_to_gridspace(depth_map).reshape(-1, 3).unsqueeze(0) voxelized_occ = model(pointcloud) test = model( model.depthmap_to_gridspace(depth_map).reshape(-1, 3).unsqueeze(0)) a = make_dot(test, params=dict(model.named_parameters())) a.render(filename='backwards_intrinsic.png', format='png') #visualize_point_list(pointcloud, output_pt_cloud_path)