def __init__(self, utilities, writer): # load config file from previous or new experiments self.utilities = utilities self.save_dir = utilities.save_dir self.conf = utilities.conf self.writer = writer # creating loss class self.loss_set = Losses(self.conf) self.cvae = CVAE(self.conf).cuda() self.checkpoint = 0
def __init__(self, space_params): self.params = space_params self.seq = self.params['seq'] self.features = self.params['features'] self.loss = Losses()
class Stod(): def __init__(self, space_params): self.params = space_params self.seq = self.params['seq'] self.features = self.params['features'] self.loss = Losses() def build_model(self): sequence_input = Input(shape=(self.seq, self.features), dtype='float32', name='sequence_input') gru = GRU(self.params['gru'], activation='relu', name='GRU')(sequence_input) fc1 = Dense(self.params['d1'], activation='relu')(gru) fc1 = Dropout(self.params['drp'])(fc1) fc2 = Dense(self.params['classes'], activation='softmax')(fc1) model = Model(sequence_input, fc2) adam = optimizers.Adam(lr=self.params['lr']) model.compile(loss=self.loss.focal_loss(), optimizer=self.params['op'], metrics=['acc']) return model def fit_model(self, model, inputs): checkpoint = ModelCheckpoint(self.params['model_name'], monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] result = model.fit(x=inputs['x_train'], y=inputs['y_train'], batch_size=self.params['batch'], epochs=self.params['epochs'], verbose=1, validation_data=[inputs['x_val'],inputs['y_val']], callbacks=callbacks_list) return result, model def inverse_transform(self, data): classes = list() for item in tqdm(data): label_idx = np.argmax(item, axis=0).tolist() classes.append(label_idx) return classes def print_scores(self, model, inputs): predicted = model.predict(inputs['x_test'], verbose=1) y_pred = self.inverse_transform(predicted) y_true = self.inverse_transform(inputs['y_test']) wf1 = f1_score(y_true, y_pred, average="weighted") wr = recall_score(y_true, y_pred, average="weighted") wp = precision_score(y_true, y_pred, average="weighted") print(f'WF1- {wf1}') print(f'WR- {wr}') print(f'WP- {wp}') return {'wf1':wf1, 'wr':wr, 'wp': wp}
def __init__(self, space_params): self.params = space_params self.seq = 100 self.features = 3 self.loss = Losses()
def train(args): global _re_date if args.resume is not None: re_date = re.compile(r'\d{4}-\d{1,2}-\d{1,2}') _re_date = re_date.search(args.resume).group(0) reslut_file = open( path + '/' + date + date_time + ' @' + _re_date + '_' + args.arch + '.log', 'w') else: _re_date = None reslut_file = open( path + '/' + date + date_time + '_' + args.arch + '.log', 'w') # Setup Dataloader data_split = 'data1024_greyV2' data_path = '/dataset/unwarp_new/train/' data_path_validate = '/dataset/unwarp_new/train/' + data_split + '/' data_path_test = '/dataset/' test_shrink_sub_dir = 'shrink_1024_960/crop/' args.arch = 'flat_img_classifyAndRegress_grey' args.dataset = data_split print(args) print(args, file=reslut_file) print('data_split :' + data_split) print('data_split :' + data_split, file=reslut_file) n_classes = 2 '''network''' model = ResnetDilatedRgressAndClassifyV2v6v4c1GN(n_classes=n_classes, num_filter=32, BatchNorm='GN', in_channels=3) # if args.parallel is not None: device_ids = list(map(int, args.parallel)) args.gpu = device_ids[0] if args.gpu < 8: torch.cuda.set_device(args.gpu) model = torch.nn.DataParallel(model, device_ids=device_ids) model.cuda(args.gpu) elif args.distributed: model.cuda() model = torch.nn.parallel.DistributedDataParallel(model) else: warnings.warn('no gpu , go sleep !') exit() if args.optimizer == 'SGD': optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.8, weight_decay=1e-12) elif args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.l_rate, weight_decay=1e-10) # 1e-12 else: assert 'please choice optimizer' exit('error') if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer_state']) ''' model_parameter_dick = {} for k in checkpoint['model_state']: model_parameter_dick['module.'+k] = checkpoint['model_state'][k] model.load_state_dict(model_parameter_dick) ''' # optimizer.load_state_dict(checkpoint['optimizer_state']) # 1 why runing error 2 alter the optimizer of original program,because which optimizer was changing as the operaion print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("No checkpoint found at '{}'".format(args.resume)) loss_fun_classes = Losses(classify_size_average=True, args_gpu=args.gpu) loss_fun = loss_fun_classes.loss_fn_v6v8_compareLSC loss_classify_fun = loss_fun_classes.loss_fn_binary_cross_entropy_with_logits FlatImg = utils.FlatImg(args=args, path=path, date=date, date_time=date_time, _re_date=_re_date, data_split=data_split, model=model, \ reslut_file=reslut_file, n_classes=n_classes, optimizer=optimizer, \ loss_fn=loss_fun, loss_classify_fn=loss_classify_fun, data_loader=PerturbedDatastsForRegressAndClassify_pickle_color_v2C1, data_loader_hdf5=None, \ data_path=data_path, data_path_validate=data_path_validate, data_path_test=data_path_test, data_preproccess=False) # , valloaderSet=valloaderSet, v_loaderSet=v_loaderSet ''' load data ''' train_loader = data_loader(data_path, split=data_split, img_shrink=args.img_shrink) trainloader = data.DataLoader(train_loader, batch_size=args.batch_size, num_workers=args.batch_size // 2, shuffle=True) # trainloader = FlatImg.loadTrainData(data_split=data_split, is_shuffle=True) # FlatImg.loadValidateAndTestData(is_shuffle=True, sub_dir=test_shrink_sub_dir) FlatImg.loadTestData(is_shuffle=True, sub_dir=test_shrink_sub_dir) train_time = AverageMeter() losses = AverageMeter() FlatImg.lambda_loss = 0.1 FlatImg.lambda_loss_classify = 1 if args.schema == 'train': trainloader = FlatImg.loadTrainDataHDF5(data_split=data_split, is_shuffle=True) trainloader_len = len(trainloader) for epoch in range(epoch_start, args.n_epoch): if epoch >= 10 and epoch < 20: optimizer.param_groups[0]['lr'] = 0.5 * args.l_rate elif epoch >= 20 and epoch < 30: optimizer.param_groups[0]['lr'] = 0.1 * args.l_rate elif epoch >= 30 and epoch < 40: optimizer.param_groups[0]['lr'] = 0.05 * args.l_rate elif epoch >= 40: optimizer.param_groups[0]['lr'] = 0.01 * args.l_rate print('* lambda_loss :' + str(FlatImg.lambda_loss) + '\t' + 'learning_rate :' + str(optimizer.param_groups[0]['lr'])) print('* lambda_loss :' + str(FlatImg.lambda_loss) + '\t' + 'learning_rate :' + str(optimizer.param_groups[0]['lr']), file=reslut_file) begin_train = time.time() loss_classify_list = [] loss_regress_list = [] loss_l1_list = [] loss_local_list = [] loss_CS_list = [] loss_list = [] model.train() for i, (images, labels, labels_classify) in enumerate(trainloader): images = Variable(images) labels = Variable(labels.cuda(args.gpu)) labels_classify = Variable(labels_classify.cuda(args.gpu)) optimizer.zero_grad() outputs, outputs_classify = FlatImg.model(images, is_softmax=False) outputs_classify = outputs_classify.squeeze(1) loss_l1, loss_local, loss_CS = loss_fun(outputs, labels, outputs_classify, labels_classify, size_average=False) loss_regress = loss_l1 + loss_local + loss_CS loss_classify = loss_classify_fun(outputs_classify, labels_classify) loss = FlatImg.lambda_loss * loss_regress + FlatImg.lambda_loss_classify * loss_classify losses.update(loss.item()) loss.backward() optimizer.step() loss_regress_list.append(loss_regress.item()) loss_classify_list.append(loss_classify.item()) loss_list.append(loss.item()) loss_l1_list.append(loss_l1.item()) loss_CS_list.append(loss_CS.item()) loss_local_list.append(loss_local.item()) if (i + 1) % args.print_freq == 0 or (i + 1) == trainloader_len: list_len = len(loss_list) print( '[{0}][{1}/{2}]\t\t' '[{3:.2f} {4:.4f} {5:.2f}]\t' '[l1:{6:.2f} l:{7:.4f} cs:{8:.4f}\t| {loss_regress:.2f} {loss_classify:.4f}]\t' '{loss.avg:.4f}'.format( epoch + 1, i + 1, trainloader_len, min(loss_list), sum(loss_list) / list_len, max(loss_list), sum(loss_l1_list) / list_len, sum(loss_local_list) / list_len, sum(loss_CS_list) / list_len, loss_regress=sum(loss_regress_list) / list_len, loss_classify=sum(loss_classify_list) / list_len, loss=losses)) print( '[{0}][{1}/{2}]\t\t' '[{3:.2f} {4:.4f} {5:.2f}]\t' '[l1:{6:.2f} l:{7:.4f} cs:{8:.4f}\t| {loss_regress:.2f} {loss_classify:.4f}]\t' '{loss.avg:.4f}'.format( epoch + 1, i + 1, trainloader_len, min(loss_list), sum(loss_list) / list_len, max(loss_list), sum(loss_l1_list) / list_len, sum(loss_local_list) / list_len, sum(loss_CS_list) / list_len, loss_regress=sum(loss_regress_list) / list_len, loss_classify=sum(loss_classify_list) / list_len, loss=losses), file=reslut_file) del loss_list[:] del loss_regress_list[:] del loss_classify_list[:] del loss_l1_list[:] del loss_CS_list[:] del loss_local_list[:] FlatImg.saveModel_epoch( epoch) # FlatImg.saveModel(epoch, save_path=path) model.eval() # FlatImg.testModelV2GreyC1_index(epoch, train_time, ['36_2 copy.png', '17_1 copy.png']) # exit() trian_t = time.time() - begin_train losses.reset() # losses_regress.reset() # losses_classify.reset() train_time.update(trian_t) try: FlatImg.validateOrTestModelV2GreyC1(epoch, trian_t, validate_test='v_l3v3') FlatImg.validateOrTestModelV2GreyC1(epoch, 0, validate_test='t') except: print(' Error: validate or test') print('\n') elif args.schema == 'test': epoch = checkpoint['epoch'] if args.resume is not None else 0 model.eval() FlatImg.validateOrTestModelV2GreyC1(epoch, 0, validate_test='t') exit() elif args.schema == 'eval': epoch = checkpoint['epoch'] if args.resume is not None else 0 model.eval() FlatImg.evalModelGreyC1(epoch, 0, is_scaling=False) exit() elif args.schema == 'scaling': epoch = checkpoint['epoch'] if args.resume is not None else 0 model.eval() FlatImg.validateOrTestModelV2GreyC1(epoch, 0, validate_test='t', is_scaling=True) exit() m, s = divmod(train_time.sum, 60) h, m = divmod(m, 60) print("All Train Time : %02d:%02d:%02d\n" % (h, m, s)) print("All Train Time : %02d:%02d:%02d\n" % (h, m, s), file=reslut_file) reslut_file.close()
class Cvae: def __init__(self, utilities, writer): # load config file from previous or new experiments self.utilities = utilities self.save_dir = utilities.save_dir self.conf = utilities.conf self.writer = writer # creating loss class self.loss_set = Losses(self.conf) self.cvae = CVAE(self.conf).cuda() self.checkpoint = 0 def load_weights(self): print("loading CVAE weights, starting from epoch: " + str(self.conf['CVAE_EPOCH_CHECKPOINT'])) self.checkpoint = self.conf['CVAE_EPOCH_CHECKPOINT'] self.cvae.load_state_dict(torch.load( os.path.join(self.save_dir, 'model_cvae.pth')), strict=False) def train(self, data_loader, val_loader): print("starting CVAE Training..") set_random_seed(128) self.cvae.train(True) optimizer = optim.SGD(self.cvae.parameters(), lr=self.conf['CVAE_LR']) # warm up conf warm_up = np.ones(shape=self.conf['EPOCHS'] * len(data_loader)) warm_up[0:int(self.conf['EPOCHS'] * 0.7 * len(data_loader))] = np.linspace( self.conf['WARM_UP_TH'], 1, num=(self.conf['EPOCHS'] * 0.7 * len(data_loader))) for epochs in range(self.checkpoint, self.conf['EPOCHS']): for idx, (spectrals, (input_color, grey_little, batch_weights)) in \ tqdm(enumerate(data_loader), total=len(data_loader)): spectrals = spectrals.cuda() input_color = input_color.cuda() # input_color.requires_grad_() lossweights = batch_weights.cuda() lossweights = lossweights.view(self.conf['BATCHSIZE'], -1) optimizer.zero_grad() color_out, mu, logvar, z_grey, z_color, z = self.cvae( color=input_color, inputs=spectrals) hist_loss, kl_loss = self.loss_set.cvae_loss( color_out, input_color, lossweights, mu, logvar) loss = hist_loss + kl_loss * warm_up[epochs * len(data_loader) + idx] if torch.isnan(loss): print("\n\nabort: nan on epoch: ", epochs, idx) print("val hist loss: ", hist_loss) print("val kl: ", kl_loss) exit() # log loss self.writer.add_scalar('CVAE/prior', kl_loss, epochs * len(data_loader) + idx) self.writer.add_scalar('CVAE/hist', hist_loss.item(), epochs * len(data_loader) + idx) self.writer.add_scalar('CVAE/final', loss.item(), epochs * len(data_loader) + idx) self.writer.add_histogram('z_hist', z[0].cpu().detach().numpy(), epochs * len(data_loader) + idx) self.writer.add_histogram('z_grey_hist', z_grey[0].cpu().detach().numpy(), epochs * len(data_loader) + idx) self.writer.add_histogram('z_color_hist', z_color[0].cpu().detach().numpy(), epochs * len(data_loader) + idx) loss.backward() # nn.utils.clip_grad_value_(self.cvae.parameters(), self.conf['CLIP_TH']) optimizer.step() # validation test if epochs % self.conf['TEST_ON_TRAIN_RATE'] == 0 and epochs != 0: print("\nexecuting validation on train epoch: " + str(epochs)) self.test(val_loader, output_name='results_epoch{}'.format(str(epochs))) print("\nvalidation completed, back to training") self.cvae.train(True) # saving weights and checkpoints self.utilities.epoch_checkpoint('CVAE', epochs) torch.save(self.cvae.state_dict(), '%s/model_cvae.pth' % self.save_dir) def test(self, data_loader, output_name="results_cvae"): print("starting CVAE testing..") self.cvae.train(False) self.cvae.eval() with torch.no_grad(): for idx, (spectrals, (batch, grey_little, batch_weights)) in \ tqdm(enumerate(data_loader), total=len(data_loader)): input_color = batch.cuda() spectrals = spectrals.cuda() # checking result if encoder samples from posterior (gran truth) posterior, _, _, _, _, _ = self.cvae(color=input_color, inputs=spectrals) # n = min(posterior.size(0), 16) # recon_rgb_n = lab2rgb(grey_little[:n], posterior[:n]) # rgb_n = lab2rgb(grey_little[:n], input_color[:n]) # comparison = torch.cat([rgb_n[:n], recon_rgb_n[:n]]) # writer.add_images(output_name, comparison, idx) # checking results if encoder samples from prior (NMIX samplings from gaussian) results = [] for i in range(self.conf['NMIX']): color_out, _, _, _, _, _ = self.cvae(color=None, inputs=spectrals) results.append(color_out.unsqueeze(1)) results = torch.cat(results, dim=1) self.utilities.dump_results( color=results, # batch of 8 predictions for AB channels grey=grey_little, # batch of original grey channel gt=batch, # batch of gt AB channels file_name=idx, nmix=self.conf['NMIX'], model_name=output_name, tb_writer=self.writer, posterior=posterior) print("CVAE testing completed")