with io.open(imgpath, 'rb') as image_file: content = image_file.read() jsonpath=config.pdfdata+"json/"+os.path.splitext(os.path.basename(imgpath))[0]+".json" with open(jsonpath) as f: bounds = json.load(f) bounds=bounds_refine(bounds,imgpath,ref) #print("Characters in Image=",len(bounds)) ds,coords,labels,wordid,seq=get_ds(imgpath,bounds) coordsagg.extend(coords) labelsagg.extend(labels) pageagg.extend([os.path.splitext(os.path.basename(imgpath))[0]]*len(labels)) wordidagg.extend(wordid) sequenceagg.extend(seq) ds_train=DataUtils.EVALIMGDS(label_dict,ds) train_gen = torch.utils.data.DataLoader(ds_train ,batch_size=64,shuffle=False,num_workers =6,pin_memory=True) train_gen =DataUtils.DeviceDataLoader(train_gen, device) result = ModelUtils.evaluate(model,train_gen) print("Accuracy on {} page is {}".format(imgpath,result['val_acc'])) pdf_acc.append(len(bounds)*result['val_acc']) weight.append(len(bounds)) #os.remove(imgpath) #os.remove(jsonpath) train_gen = torch.utils.data.DataLoader(ds_train ,batch_size=64,shuffle=False,num_workers =6,pin_memory=True) train_gen =DataUtils.DeviceDataLoader(train_gen, device) predic=[] for batch in train_gen: images,labels= batch with torch.no_grad(): out = model(images) _, preds = torch.max(out, dim=1) predic.extend(preds.detach().cpu().numpy().tolist())
def RUN(batchsize, lr): #batchsize=config.batchsize #lr=config.learning_rate #num_epochs=config.num_epochs num_epochs = 50 device = config.device if device == None: device = utils.get_default_device() label_dict = utils.create_label_dict(config.symbols) revdict = {} for i, sym in enumerate(config.symbols): revdict[i] = sym model = InceptFC.FC_Model() #model=Resnet.ResNet50(3,97) model.to(device) print(config.checkpath) checkpoint = torch.load(config.checkpath, map_location=device) model.load_state_dict(checkpoint['model_state_dict']) print("MODEL LOADED") model.train() for name, child in model.named_children(): if name in ['conv_block1', "conv_block2", "conv1"]: print(name + ' is frozen') for param in child.parameters(): param.requires_grad = False else: print(name + ' is unfrozen') for param in child.parameters(): param.requires_grad = True optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, weight_decay=lr / 10.) finepath = config.data_dir_path myvalpath = "/home/ubuntu/data/ocr/kdeval/good/images/" valid_paths = [ join(myvalpath, f) for f in listdir(myvalpath) if isfile(join(myvalpath, f)) ] refinement_ratio = [0.5] checkpath = os.path.dirname(config.checkpath) checkpath = join(checkpath, "FineTune2") os.system('mkdir -p ' + checkpath) p = 'runs/Inceptfinalrun/hypergridfine_tune/LR' + str(int( 1000000 * lr)) + 'BS' + str(batchsize) writer = SummaryWriter(p) fineds = [f for f in listdir(finepath) if isfile(join(finepath, f))] for epoch_fine in range(num_epochs): random.shuffle(fineds) ds_train = DataUtils.FINEIMGDS(label_dict, finepath, fineds) train_gen = torch.utils.data.DataLoader(ds_train, batch_size=batchsize, shuffle=True, num_workers=6, pin_memory=True) train_gen = DataUtils.DeviceDataLoader(train_gen, device) result = ModelUtils.fit_fine(model, train_gen, optimizer) loss_epoch = result.item() print("MEAN LOSS ON EPOCH {} is : {}".format(epoch_fine, loss_epoch)) ## SAVE WEIGHT AFTER FINETUNE PER EPOCH ''' torch.save({ 'epoch': epoch_fine, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss_epoch, }, os.path.join(checkpath, 'fine-epoch-{}.pt'.format(epoch_fine))) ''' ## WRITER TENSORBOARD writer.add_scalar('Training loss per epoch', loss_epoch, epoch_fine) ############################################################### ####### CHECK FOR VALIDATION+ pdf_acc = [] weight = [] for imgpath in tqdm(valid_paths, desc="TEST"): with io.open(imgpath, 'rb') as image_file: content = image_file.read() jsonpath = "/home/ubuntu/data/ocr/kdeval/good/json/" + os.path.splitext( os.path.basename(imgpath))[0] + ".json" with open(jsonpath) as f: bounds = json.load(f) bounds = bounds_refine(bounds, imgpath, 0.48) #print("Characters in Image=",len(bounds)) ds = get_ds(imgpath, bounds) ds_train = DataUtils.EVALIMGDS(label_dict, ds) train_gen = torch.utils.data.DataLoader(ds_train, batch_size=64, shuffle=False, num_workers=6, pin_memory=True) train_gen = DataUtils.DeviceDataLoader(train_gen, device) result = ModelUtils.evaluate(model, train_gen) pdf_acc.append(len(bounds) * result['val_acc']) weight.append(len(bounds)) print("EPOCHFINE={} Validation Accuracy Mean on GOOD pdf is {}".format( epoch_fine, sum(pdf_acc) / sum(weight))) writer.add_scalar('validation acc per epoch', sum(pdf_acc) / sum(weight), epoch_fine)
imglist_train, imglist_val = utils.get_images_list(dir_path + "/imgs") #imglist_train=utils.csv_to_ls(config.csv_path+"/train_grid_imgs.csv") #imglist_val=utils.csv_to_ls(config.csv_path+"/valid_grid_imgs.csv") ds_train = DataUtils.IMGDS(label_dict, dir_path, imglist_train) ds_val = DataUtils.IMGDS(label_dict, dir_path, imglist_val) train_gen = torch.utils.data.DataLoader(ds_train, batch_size=batch_size, shuffle=shuffle, num_workers=num_worker, pin_memory=True) valid_gen = torch.utils.data.DataLoader(ds_val, batch_size=batch_size, shuffle=shuffle, num_workers=num_worker, pin_memory=True) train_gen = DataUtils.DeviceDataLoader(train_gen, device) valid_gen = DataUtils.DeviceDataLoader(valid_gen, device) #model=Resnet.ResNet50(3,config.num_classes) model = InceptFC.FC_Model() model = model.to(device) p = 'runs/Inceptfinalrun/LR' + str(int( 100000 * l_r)) + 'BS' + str(batch_size) writer = SummaryWriter(p) history = ModelUtils.fit(num_epochs, l_r, model, train_gen, valid_gen, opt_func=torch.optim.Adam, writer=writer)