def load_model(config_file, imgs_path): outputs = None # for config_file in config_files: cfg.merge_from_file(config_file) model = choose_net(name=cfg.MODEL.NAME, num_classes=cfg.MODEL.CLASSES, weight_path=cfg.MODEL.WEIGHT_FROM) # weight_path = cfg.MODEL.MODEL_PATH + cfg.MODEL.NAME + '.pth' weight_path = '../input/b2b3b4-2/weights/' + cfg.MODEL.NAME + '.pth' checkpoint = torch.load(weight_path) state_dict = checkpoint['state_dict'] model.load_state_dict(state_dict) model.to(device) model.eval() transform = T.Compose([ T.Resize(cfg.INPUT.SIZE_TRAIN), T.ToTensor(), T.Normalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD) ]) for img_path in imgs_path: img = Image.open(img_path).convert('RGB') img = transform(img) img = img.unsqueeze(0) img = img.cuda() with torch.no_grad(): output, _ = model(img) if outputs is None: outputs = output else: outputs = torch.cat((outputs, output), dim=0) return outputs, imgs_name
def main(model_name): model = choose_net(name=model_name, num_classes=num_class, weight_path='github') model.to(device) if torch.cuda.device_count() > 1: net = nn.DataParallel(model) optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, amsgrad=True) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, Epoches, eta_min=1e-6) # cheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.9, patience=2) kf = KFold(n_splits=5, shuffle=True) for fold, (train_idx, val_idx) in enumerate(kf.split(df)): print(f'fold:{fold+1}...', 'train_size: %d, val_size: %d' % (len(train_idx), len(val_idx))) df_train = df.values[train_idx] df_val = df.values[val_idx] train_dataset = MyData(root=Data_path, df=df_train, phase='train', transform=get_transform(image_size, 'train')) val_dataset = MyData(root=Data_path, df=df_val, phase='test', transform=get_transform(image_size, 'test')) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True) best_acc = 0.0 for epoch in range(Epoches): print('Train {} / {}'.format(epoch + 1, Epoches)) train_loss = train(model, train_loader, optimizer) if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau): scheduler.step(train_loss) else: scheduler.step(epoch) if epoch % 5 == 0: acc = validate(model, val_loader) if acc > best_acc: if torch.cuda.device_count() > 1: torch.save( model.module.state_dict(), Model_path + '/' + f"{model_name}_best_fold{fold + 1}.pth") else: torch.save( model.state_dict(), Model_path + '/' + f"{model_name}_best_fold{fold + 1}.pth")
def main(file_name, log): set_seed(cfg.SOLVER.SEED) config_file = './configs/' + file_name cfg.merge_from_file(config_file) # os.environ["CUDA_VISIBLE_DEVICES"] = cfg.MODEL.DEVICE_ID USE_CUDA = torch.cuda.is_available() device = torch.device("cuda:0" if USE_CUDA else "cpu") weight_path = cfg.MODEL.MODEL_PATH + cfg.MODEL.NAME + '.pth' model = choose_net(name=cfg.MODEL.NAME, num_classes=cfg.MODEL.CLASSES, weight_path=cfg.MODEL.WEIGHT_FROM) best_acc = 0.0 log.info('Train : {}'.format(cfg.MODEL.NAME)) if os.path.exists(weight_path): checkpoint = torch.load(weight_path) state_dict = checkpoint['state_dict'] best_acc = checkpoint['best_acc'] model.load_state_dict(state_dict) log.info('Network loaded from {}'.format(weight_path)) model.to(device) # model.cuda() if torch.cuda.device_count() > 1: model = nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) optimizer = torch.optim.AdamW(model.parameters(), lr=cfg.SOLVER.BASE_LR, amsgrad=True) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, cfg.SOLVER.MAX_EPOCHS, eta_min=1e-6) train_dataset = FGVC7Data(root=cfg.DATASETS.ROOT_DIR, phase='train', transform=get_transform(cfg.INPUT.SIZE_TRAIN, 'train')) indices = range(len(train_dataset)) split = int(cfg.DATASETS.SPLIT * len(train_dataset)) train_indices = indices[split:] test_indices = indices[:split] train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(test_indices) train_loader = DataLoader(train_dataset, batch_size=cfg.DATASETS.BATCH_SIZE, sampler=train_sampler, num_workers=cfg.DATASETS.WORKERS, pin_memory=True) val_loader = DataLoader(train_dataset, batch_size=cfg.DATASETS.BATCH_SIZE, sampler=valid_sampler, num_workers=cfg.DATASETS.WORKERS, pin_memory=True) for epoch in range(cfg.SOLVER.MAX_EPOCHS): # pbar = tqdm(total=len(train_loader), unit='batches', ncols=150) # unit 表示迭代速度的单位 # pbar.set_description('Epoch {}/{}'.format(epoch + 1, cfg.SOLVER.MAX_EPOCHS)) train(model, optimizer, epoch, train_loader, log) scheduler.step() if (epoch+1) % 5 == 0: acc = validate(model, val_loader, epoch, log) if acc > best_acc: if torch.cuda.device_count()>1: torch.save({'best_acc':best_acc, 'state_dict':model.module.state_dict()}, weight_path) else: torch.save({'best_acc':best_acc, 'state_dict':model.state_dict()}, weight_path)
def load_model(self): self.models = [] self.val_transforms = [] for config_file in config_files: cfg.merge_from_file(config_file) model = choose_net(name=cfg.MODEL.NAME, num_classes=cfg.MODEL.CLASSES, weight_path=cfg.MODEL.WEIGHT_FROM) weight_path = cfg.MODEL.MODEL_PATH + cfg.MODEL.NAME + '.pth' state_dict = torch.load(weight_path) model.load_state_dict(state_dict) model.cuda() model.eval() self.models.append(model) transform = T.Compose([ T.Resize(cfg.INPUT.SIZE_TRAIN), T.ToTensor(), T.Normalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD) ]) self.val_transforms.append(transform)