for m in modes: cy(m, len(C[m]['current_indicies']), len(C[m]['seen_indicies']), dp(C[m]['runtime'])) print( len( set(C['train']['seen_indicies']).intersection( set(C['val']['seen_indicies']))), len(C['train']['seen_indicies'])) images = [] for k in ['in', 'dd']: if shape(chain_net.A[k])[1] == 6: n0 = z55( utils.cuda_to_rgb_image(chain_net.A[k][:, :3, :, :])) m0 = z55( utils.cuda_to_rgb_image(chain_net.A[k][:, -3:, :, :])) images.append(n0) else: m0 = z55(utils.cuda_to_rgb_image(chain_net.A[k])) n0 = None if k == 'dd': threshold = A['bbox_threshold'] #85#input_int('threshold') m0[m0 < threshold] = threshold m0 = z55(m0) m0 = draw_bounding_boxes0(m0, threshold=threshold) images.append(m0) if shape(targets)[1] == 6: n1 = z55(utils.cuda_to_rgb_image(targets[:, :3, :, :]))
if False:#shape(squeezeNet.A[k])[1] == 6: n0 = z55(utils.cuda_to_rgb_image(squeezeNet.A[k][:,:3,:,:])) m0 = z55(utils.cuda_to_rgb_image(squeezeNet.A[k][:,-3:,:,:])) images.append(n0) else: m0 = z55(utils.cuda_to_rgb_image(squeezeNet.A[k])) n0 = None if False:#k == 'dd': threshold = A['bbox_threshold'] #85#input_int('threshold') m0[m0<threshold] = threshold m0 = z55(m0) m0 = draw_bounding_boxes0(m0,threshold=threshold) images.append(m0) """ m1 = z55(utils.cuda_to_rgb_image(targets)) #m1 = draw_bounding_boxes0(m1) images.append(m1) images.append(utils.cuda_to_rgb_image(squeezeNet.A['out'])) #images = [images[0],images[1],images[3],images[2],images[4]] if A['save_images']: image_list.append(deepcopy(images)) """ if not n0 is None: images.append(n0) if not n1 is None: images.append(n1) if not m0 is None: images.append(m0)
if A['time']['to_print'].rcheck(): for m in modes: cy(m, len(C[m]['current_indicies']), len(C[m]['seen_indicies']), dp(C[m]['runtime'])) print( len( set(C['train']['seen_indicies']).intersection( set(C['val']['seen_indicies']))), len(C['train']['seen_indicies'])) images = [] for k in []: # [0,4]:#,1,2,3]: #try: images.append(z55(utils.cuda_to_rgb_image(chain_net.A[k]))) #except: # pass images.append(utils.cuda_to_rgb_image(targets)) #mi(np.concatenate(images,axis=1),'images '+mode) s = 'images ' + mode figure(s, figsize=(A['figure_scale'] * len(images) * 3, 3)) clf() mi(get_image_row(images, blank_width=10), s) spause() #print('[%d, %5d] loss: %.6f' % # (C[mode]['epoch'], C[mode]['ctr'], C[mode]['loss_list_y'][-1])) figure('loss', figsize=(A['figure_scale'] * 3, A['figure_scale'] * 7)) clf()
#model_fp32_prepared = SqueezeNet_input('SqueezeNet_input int8',3) #model_fp32_prepared.load_state_dict(torch.load(opjD('temp.net'))) squeezeNet_prepared.eval() model_int8 = torch.quantization.convert(squeezeNet_prepared) t0 = time.time() print(model_int8.state_dict()) res = model_int8(inputs) print(time.time() - t0) t0 = time.time() res = squeezeNet_prepared(inputs) print(time.time() - t0) squeezeNet_prepared.train() images = [] m1 = z55(utils.cuda_to_rgb_image(targets)) images.append(m1) images.append(utils.cuda_to_rgb_image(res)) #images = [images[0],images[1],images[3],images[2],images[4]] if A['save_images']: image_list.append(deepcopy(images)) """ if not n0 is None: images.append(n0) if not n1 is None: images.append(n1) if not m0 is None: images.append(m0) if not m1 is None:
def example(rank, world_size): cy('rank:',rank,'/',world_size) os.environ['MASTER_ADDR'] = 'localhost' os.environ['MASTER_PORT'] = '12355' torch.distributed.init_process_group( backend="nccl", rank=rank, world_size=world_size, ) assert torch.distributed.is_initialized() """ python k3/V/SegNet/v3/segnet/main.py \ --batch_size 128 \ --print_time 10 \ --net_path Desktop/segnet_v3 \ --load_net False \ --device 0 \ """ Defaults = { 'save_time':300, 'print_time':60, 'freq_time':30, 'batch_size':16, 'load_net':False, 'learning_rate':0.001, 'net_path':None, 'figure_scale':1/3, 'max_time': int(3600*seconds), 'max_loss_ctr':100, 'device':rank, } if False:#not interactive(): A = get_Arguments2(Defaults,f=__file__) else: A = Defaults A['net_path'] = opjD('net_weights_rank'+str(rank)+'of'+str(world_size)+'-bs'+str(A['batch_size'])+'-sync') cy(A['net_path']) _Data = h5r(opjD('data_with_flip.h5py')) #device = torch.device(d2n('cuda:',A['device']) if torch.cuda.is_available() else 'cpu') device = rank class _Chain_net(nn.Module): def __init__(self): super().__init__() self.aa = SqueezeNet('aa') self.bb = SqueezeNet('bb') self.cc = SqueezeNet('cc') self.dd = SqueezeNet('dd') self.A = {} self.bn3 = nn.BatchNorm2d(3) def forward(self,x): self.A[0] = torch.clone(x) x = self.bn3(x) x = self.aa(x); self.A[1] = torch.clone(x) x = self.bn3(x) x = self.bb(x); self.A[2] = torch.clone(x) x = self.bn3(x) x = self.cc(x); self.A[3] = torch.clone(x) x = self.bn3(x) x = self.dd(x); self.A[4] = torch.clone(x) return torch.flatten(x, 1) num_features=3 width = 140 n = 100 class Model(nn.Module): def __init__(self): super().__init__() self.a = nn.Sequential( nn.Conv2d(num_features,n, kernel_size=3, stride=2), nn.ReLU(inplace=True), SyncBatchNorm(n), nn.Upsample((width,width),mode='nearest'), nn.Conv2d(n, n, kernel_size=3, stride=2), nn.ReLU(inplace=True), SyncBatchNorm(n), nn.Upsample((width,width),mode='nearest'), nn.Conv2d(n, n, kernel_size=3, stride=2), nn.ReLU(inplace=True), SyncBatchNorm(n), nn.Upsample((width,width),mode='nearest'), nn.Conv2d(n, n, kernel_size=3, stride=2), nn.ReLU(inplace=True), SyncBatchNorm(n), nn.Upsample((width,width),mode='nearest'), nn.Conv2d(n, n, kernel_size=3, stride=2), nn.ReLU(inplace=True), SyncBatchNorm(n), nn.Upsample((width,width),mode='nearest'), nn.Conv2d(n, n, kernel_size=3, stride=2), nn.ReLU(inplace=True), SyncBatchNorm(n), nn.Upsample((width,width),mode='nearest'), nn.Conv2d(n, num_features, kernel_size=3, stride=2), nn.ReLU(inplace=True), SyncBatchNorm(num_features), nn.Upsample((width,width),mode='nearest'), ) def forward(self,x): x = self.a(x) return torch.flatten(x, 1) def find_best_net(path): fs = sggo(path,'*.pth') Nets = {} kprint(fs) for f in fs: l = float(f.split('/')[-1].replace('.pth','')) Nets[f] = l Nets = sort_by_value(Nets,reverse=False) best_path = kys(Nets)[0] return best_path ctr1 = 0 A['sys.argv'] = ' '.join(sys.argv[1:]) A['time'] = {} A['time']['to_save'] = Timer(A['save_time']) A['time']['to_print'] = Timer(A['print_time']); A['time']['to_print'].trigger() A['time']['to_print_frequency'] = Timer(A['freq_time']) A['time']['to_exit'] = Timer(A['max_time']) A['ctr0'] = 0 Mode = { 'ctr':0, 'epoch':0, 'running_loss':0, 'loss_ctr':0, 'loss_list_x':[], 'loss_list_y':[], 'loss_list_t':[], 'all_indicies':[], 'current_indicies':[], 'seen_indicies':[], 'loss_color':'b', 'runtime':0, } C = { 'train':deepcopy(Mode), 'val':deepcopy(Mode), 'Inputs':np.concatenate((_Data['img'],_Data['img_flip']),axis=0), 'Targets':np.concatenate((_Data['seg'],_Data['seg_flip']),axis=0), } C['val']['loss_color'] = 'r' C['train']['all_indicies'], C['val']['all_indicies'] = \ utils.get_train_and_val_indicies(len(C['Targets']),10) cg("C['val']['all_indicies'] =",len(C['val']['all_indicies'])) cg("C['train']['all_indicies'] =",len(C['train']['all_indicies'])) modes = ['train','val'] chain_net = Model()#Chain_net() if A['load_net']: best_path = find_best_net(A['net_path']) cg("Loading net from",best_path) chain_net.load_state_dict(torch.load(best_path)) #chain_net = torch.nn.DataParallel(chain_net) chain_net.to(device) chain_net = torch.nn.parallel.DistributedDataParallel( chain_net, device_ids=[device], #find_unused_parameters=True, ) criterion = nn.MSELoss() optimizer = optim.Adam(chain_net.parameters(), A['learning_rate']) #, lr=0.001) A['ctr0'] = 0 t00 = time.time() while True:# A['ctr0'] < A['max_steps']: #not A['time']['to_exit'].rcheck(): #cm(A['ctr0'],r=1) for mode in ['train']:#,'train','train','val']: #modes: A['ctr0'] += 1 ctr1 += 1 t0 = time.time() if len(C[mode]['current_indicies']) < A['batch_size']*2: C[mode]['current_indicies'] = C[mode]['all_indicies'].copy() np.random.shuffle(C[mode]['current_indicies']) C[mode]['epoch'] += 1 #raw_enter() C[mode]['ctr'] += 1 if A['time']['to_print_frequency'].rcheck(): print('rank:',rank,'ctr0:',A['ctr0'],mode,'@',dp(A['batch_size']*C[mode]['ctr']/A['time']['to_print_frequency'].time_s),'Hz') C[mode]['ctr'] = 0 if mode == 'train' and rnd() < 0.5: splice = True else: splice = False b_in,b_out = utils.get_batch( C[mode]['current_indicies'], C[mode]['seen_indicies'], A['batch_size'], C['Inputs'], C['Targets'], splice, ) inputs = torch.from_numpy(b_in).float() inputs = inputs.to(device) targets = torch.from_numpy(b_out).float() targets = targets.to(device) optimizer.zero_grad() outputs = chain_net(inputs) loss = criterion(outputs,torch.flatten(targets,1)) if mode == 'train': loss.backward() optimizer.step() C[mode]['running_loss'] += loss.item() C[mode]['loss_ctr'] += 1 if A['time']['to_save'].rcheck(): cg("Saving net to",A['net_path']) os_system('mkdir -p',A['net_path']) torch.save( chain_net.state_dict(), opj( A['net_path'], d2p( time_str(), #int(C['train']['loss_list_x'][-1]), #int(C['train']['loss_list_y'][-1]), 'pth') ) ) so(opj(A['net_path'],'loss'), { 'loss_list_x':C['train']['loss_list_x'], 'loss_list_y':C['train']['loss_list_y'], 'loss_list_t':C['train']['loss_list_t'], } ) if C[mode]['loss_ctr'] >= A['max_loss_ctr']: C[mode]['loss_ctr'] = 0 C[mode]['loss_list_y'].append(C[mode]['running_loss']/A['max_loss_ctr']) C[mode]['loss_list_x'].append(A['ctr0']) C[mode]['loss_list_t'].append(time.time()-t00) C[mode]['running_loss'] = 0.0 if A['time']['to_print'].rcheck(): if False: for m in modes: cy(m,len(C[m]['current_indicies']), len(C[m]['seen_indicies']), dp(C[m]['runtime'])) #print(len(set(C['train']['seen_indicies']).intersection(set(C['val']['seen_indicies']))),len(C['train']['seen_indicies'])) images = [] for k in []:# [0,4]:#,1,2,3]: #try: images.append(z55(utils.cuda_to_rgb_image(chain_net.A[k]))) #except: # pass images.append(utils.cuda_to_rgb_image(targets)) #mi(np.concatenate(images,axis=1),'images '+mode) s = 'images '+mode figure(s,figsize=(A['figure_scale']*len(images)*3,3));clf() mi(get_image_row(images,blank_width=10),s) spause() #print('[%d, %5d] loss: %.6f' % # (C[mode]['epoch'], C[mode]['ctr'], C[mode]['loss_list_y'][-1])) figure('loss',figsize=(A['figure_scale']*3,A['figure_scale']*7));clf() for _mode in modes: plot(C[_mode]['loss_list_x'], C[_mode]['loss_list_y'], C[_mode]['loss_color']);spause() figure('seen_indicies '+mode,figsize=(A['figure_scale']*3,A['figure_scale']*2)) clf() hist(C[mode]['seen_indicies']) spause() C[mode]['runtime'] += time.time()-t0 print('\nDone.\n\n')