def test(path): model = Model() model.to("cuda:0") model.eval() checkpoint = torch.load("./model.pth") model.load_state_dict(checkpoint["model"]) img = np.array(Image.open(path).resize([448, 448]))[np.newaxis] img = np.transpose(img, axes=[0, 3, 1, 2]) / 255 img = torch.tensor(img, dtype=torch.float32).to("cuda:0") preds = model(img).cpu().detach().numpy() cell_h, cell_w = IMG_H / S, IMG_W / S x, y = np.meshgrid(range(S), range(S)) preds_xywhs = [] for i in range(B): preds_x = (preds[0, :, :, i * 4] + x) * cell_w preds_y = (preds[0, :, :, i * 4 + 1] + y) * cell_h preds_w = preds[0, :, :, i * 4 + 2] * IMG_W preds_h = preds[0, :, :, i * 4 + 3] * IMG_H preds_xywh = np.dstack((preds_x, preds_y, preds_w, preds_h)) preds_xywhs.append(preds_xywh) preds_xywhs = np.dstack(preds_xywhs) preds_xywhs = np.reshape(preds_xywhs, [-1, 4]) preds_class = preds[0, :, :, 10:] preds_class = np.reshape(preds_class, [-1, 20]) preds_c = preds[0, :, :, 8:10] preds_c = np.reshape(preds_c, [-1, 1]) max_arg = np.argmax(preds_c, axis=0) print("max confidence: %f" % (preds_c[max_arg])) max_arg_ = np.argmax(preds_class[int(max_arg // 2)]) print("class confidence: %f" % (preds_class[max_arg // 2, max_arg_])) print("class category: %s" % (CLASSES[int(max_arg_)])) Image.fromarray( np.uint8( draw_bboxes(np.array(Image.open(path).resize([448, 448])), preds_xywhs[max_arg[0]:max_arg[0] + 1]))).show()
def ini_model_train(opt): X_ini, y_ini, X_test, y_test, X_train_All, y_train_All = ini_model(opt) mod = Model().to(device) optimizer = optim.SGD(mod.parameters(), lr=opt.ini_lr) criterion = nn.CrossEntropyLoss() num_batches_train = X_ini.shape[0] // opt.ini_batch_size mod.train() for i in range(opt.ini_epoch): loss = 0 for j in range(num_batches_train): slce = get_slice(j, opt.ini_batch_size) X_tra = torch.from_numpy(X_ini[slce]).float().to(device) Y_tra = torch.from_numpy(y_ini[slce]).long().to(device) optimizer.zero_grad() out = mod(X_tra) batch_loss = criterion(out, Y_tra) batch_loss.backward() optimizer.step() loss += batch_loss mod.eval() acc = test_without_dropout(X_test, y_test, mod, device) print('\n[{}/{} epoch], training loss:{:.4f}, test accuracy is:{} \n'. format(i, opt.ini_epoch, loss.item() / num_batches_train, acc)) if i + 1 == opt.ini_epoch: for d in range(opt.num_dev): torch.save( { 'epoch': i, 'model_state_dict': mod.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss.item() }, os.path.join(opt.ini_model_path, 'device' + str(d), "ini.model.pth.tar")) torch.save( { 'epoch': i, 'model_state_dict': mod.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss.item() }, opt.ini_model_path) return X_test, y_test, X_train_All, y_train_All
def ini_train(X_ini, y_ini, X_te, y_te, epochs, paths, device, batch_size, lr, momentum, arr_drop): mod = Model(arr_drop).to(device) optimizer = optim.SGD(mod.parameters(), lr=lr, momentum=momentum) criterion = nn.CrossEntropyLoss() #batch_size = 200 num_batches_train = X_ini.shape[0] // batch_size print("number of batch ", num_batches_train) mod.train() for i in range(epochs): loss = 0 for j in range(num_batches_train): slce = get_slice(j, batch_size) X_tra = torch.from_numpy(X_ini[slce]).float().to(device) Y_tra = torch.from_numpy(y_ini[slce]).long().to(device) optimizer.zero_grad() out = mod(X_tra) batch_loss = criterion(out, Y_tra) batch_loss.backward() optimizer.step() loss += batch_loss mod.eval() with torch.no_grad(): X_va = torch.from_numpy(X_te).float().to(device) Y_va = torch.from_numpy(y_te).long().to(device) output = mod(X_va) preds = torch.max(output, 1)[1] acc = accuracy_score(Y_va, preds) print('\n[{}/{} epoch], training loss:{:.4f}, test accuracy is:{} \n'. format(i, epochs, loss.item() / num_batches_train, acc)) if i + 1 == epochs: for path in paths: torch.save( { 'epoch': i, 'model_state_dict': mod.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss.item() }, os.path.join(path, "ini.model.pth.tar")) return mod
class TestNetwork(unittest.TestCase): class RandomData(Dataset): def __init__(self, n_images, n_classes, input_transform=None): self.n_images = n_images self.n_classes = n_classes self.input_transform = input_transform def __getitem__(self, index): images = torch.rand(3, 300, 300) labels = random.randint(0, self.n_classes - 1) if self.input_transform: images = self.input_transform(images) return images, labels def __len__(self): return self.n_images def test_network(self): input_transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((300, 300)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) self.realset = TestNetwork.RandomData(n_images=10, n_classes=7, input_transform=input_transform) self.model = Model(n_classes=7) self.model.eval() all_features, all_outputs, all_preds, all_labels = predict( self.model, self.realset, batch_size=4, n_classes=7, GPUs=None) recall = np.sum(all_preds == all_labels) / float(len(self.realset)) ap = AP(all_outputs, all_labels) mean_ap = meanAP(all_outputs, all_labels) self.assertGreaterEqual(mean_ap, 0) self.assertLessEqual(mean_ap, 1)
def model(dataset, model_name=None, device=None, train=True): """加载模型""" device = device or torch.device( "cuda" if torch.cuda.is_available() else "cpu") net = Model(vocab_size=dataset.vocab_size, embedding_dim=config.embedding_dim, output_size=dataset.target_vocab_size, encoder_hidden_size=config.encoder_hidden_size, decoder_hidden_size=config.decoder_hidden_size, encoder_layers=config.encoder_layers, decoder_layers=config.decoder_layers, dropout=config.dropout, embedding_weights=dataset.vector_weights, device=device) if model_name: # 如果指定了模型名称, 就加载对应的模型 pre_trained_state_dict = torch.load(FILE_PATH + config.model_path + model_name, map_location=device) state_dict = net.state_dict() state_dict.update(pre_trained_state_dict) net.load_state_dict(state_dict) net.train() if train else net.eval() return net
val_indices = indices[:split] # check if dataset load order is correct # for ind in val_indices: # print(ind) # data = my_dataset[ind] # img = data['image'] # plt.figure() # plt.imshow(img.permute(1,2,0)) # plt.show() # load model model = Model().to(device=device) model.load_state_dict(torch.load('model_saved.pth')) model = model.float() model.eval() for ind in val_indices: data = my_dataset[ind] img = data['image'] img = img.to(device=device) img = img.unsqueeze(dim=0) position_map, feature_maps = model(img) position_map = position_map.squeeze() # must be (128,128) feature_maps = feature_maps.squeeze() # should be (16,128,128) feature_maps = feature_maps.permute(1, 2, 0) # should be (128,128,16) position_map = position_map.detach().cpu().numpy() feature_maps = feature_maps.detach().cpu().numpy()
def main(args: argparse.Namespace): # Load input data with open(args.train_metadata, 'r') as f: train_posts = json.load(f) with open(args.val_metadata, 'r') as f: val_posts = json.load(f) # Load labels labels = {} with open(args.label_intent, 'r') as f: intent_labels = json.load(f) labels['intent'] = {} for label in intent_labels: labels['intent'][label] = len(labels['intent']) with open(args.label_semiotic, 'r') as f: semiotic_labels = json.load(f) labels['semiotic'] = {} for label in semiotic_labels: labels['semiotic'][label] = len(labels['semiotic']) with open(args.label_contextual, 'r') as f: contextual_labels = json.load(f) labels['contextual'] = {} for label in contextual_labels: labels['contextual'][label] = len(labels['contextual']) # Build dictionary from training set train_captions = [] for post in train_posts: train_captions.append(post['orig_caption']) dictionary = Dictionary(tokenizer_method="TreebankWordTokenizer") dictionary.build_dictionary_from_captions(train_captions) # Set up torch device if 'cuda' in args.device and torch.cuda.is_available(): device = torch.device(args.device) kwargs = {'pin_memory': True} else: device = torch.device('cpu') kwargs = {} # Set up number of workers num_workers = min(multiprocessing.cpu_count(), args.num_workers) # Set up data loaders differently based on the task # TODO: Extend to ELMo + word2vec etc. if args.type == 'image_only': train_dataset = ImageOnlyDataset(train_posts, labels) val_dataset = ImageOnlyDataset(val_posts, labels) train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=args.shuffle, num_workers=num_workers, collate_fn=collate_fn_pad_image_only, **kwargs) val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=num_workers, collate_fn=collate_fn_pad_image_only, **kwargs) elif args.type == 'image_text': train_dataset = ImageTextDataset(train_posts, labels, dictionary) val_dataset = ImageTextDataset(val_posts, labels, dictionary) train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=args.shuffle, num_workers=num_workers, collate_fn=collate_fn_pad_image_text, **kwargs) val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=num_workers, collate_fn=collate_fn_pad_image_text, **kwargs) elif args.type == 'text_only': train_dataset = TextOnlyDataset(train_posts, labels, dictionary) val_dataset = TextOnlyDataset(val_posts, labels, dictionary) train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=args.shuffle, num_workers=num_workers, collate_fn=collate_fn_pad_text_only, **kwargs) val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=num_workers, collate_fn=collate_fn_pad_text_only, **kwargs) # Set up the model model = Model(vocab_size=dictionary.size()).to(device) # Set up an optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_scheduler_step_size, gamma=args.lr_scheduler_gamma) # decay by 0.1 every 15 epochs # Set up loss function loss_fn = torch.nn.CrossEntropyLoss() # Setup tensorboard if args.tensorboard: writer = tensorboard.SummaryWriter(log_dir=args.log_dir + "/" + args.name, flush_secs=1) else: writer = None # Training loop if args.classification == 'intent': keys = ['intent'] elif args.classification == 'semiotic': keys = ['semiotic'] elif args.classification == 'contextual': keys = ['contextual'] elif args.classification == 'all': keys = ['intent', 'semiotic', 'contextual'] else: raise ValueError("args.classification doesn't exist.") best_auc_ovr = 0.0 best_auc_ovo = 0.0 best_acc = 0.0 best_model = None best_optimizer = None best_scheduler = None for epoch in range(args.epochs): for mode in ["train", "eval"]: # Set up a progress bar if mode == "train": pbar = tqdm.tqdm(enumerate(train_data_loader), total=len(train_data_loader)) model.train() else: pbar = tqdm.tqdm(enumerate(val_data_loader), total=len(val_data_loader)) model.eval() total_loss = 0 label = dict.fromkeys(keys, np.array([], dtype=np.int)) pred = dict.fromkeys(keys, None) for _, batch in pbar: if 'caption' not in batch: caption_data = None else: caption_data = batch['caption'].to(device) if 'image' not in batch: image_data = None else: image_data = batch['image'].to(device) label_batch = {} for key in keys: label_batch[key] = batch['label'][key].to(device) if mode == "train": model.zero_grad() pred_batch = model(image_data, caption_data) for key in keys: label[key] = np.concatenate((label[key], batch['label'][key].cpu().numpy())) x = pred_batch[key].detach().cpu().numpy() x_max = np.max(x, axis=1).reshape(-1, 1) z = np.exp(x - x_max) prediction_scores = z / np.sum(z, axis=1).reshape(-1, 1) if pred[key] is not None: pred[key] = np.vstack((pred[key], prediction_scores)) else: pred[key] = prediction_scores loss_batch = {} loss = None for key in keys: loss_batch[key] = loss_fn(pred_batch[key], label_batch[key]) if loss is None: loss = loss_batch[key] else: loss += loss_bath[key] total_loss += loss.item() if mode == "train": loss.backward() optimizer.step() # Terminate the progress bar pbar.close() # Update lr scheduler if mode == "train": scheduler.step() for key in keys: auc_score_ovr = roc_auc_score(label[key], pred[key], multi_class='ovr') # pylint: disable-all auc_score_ovo = roc_auc_score(label[key], pred[key], multi_class='ovo') # pylint: disable-all accuracy = accuracy_score(label[key], np.argmax(pred[key], axis=1)) print("[{} - {}] [AUC-OVR={:.3f}, AUC-OVO={:.3f}, ACC={:.3f}]".format(mode, key, auc_score_ovr, auc_score_ovo, accuracy)) if mode == "eval": best_auc_ovr = max(best_auc_ovr, auc_score_ovr) best_auc_ovo = max(best_auc_ovo, auc_score_ovo) best_acc = max(best_acc, accuracy) best_model = model best_optimizer = optimizer best_scheduler = scheduler if writer: writer.add_scalar('AUC-OVR/{}-{}'.format(mode, key), auc_score_ovr, epoch) writer.add_scalar('AUC-OVO/{}-{}'.format(mode, key), auc_score_ovo, epoch) writer.add_scalar('ACC/{}-{}'.format(mode, key), accuracy, epoch) writer.flush() if writer: writer.add_scalar('Loss/{}'.format(mode), total_loss, epoch) writer.flush() print("[{}] Epoch {}: Loss = {}".format(mode, epoch, total_loss)) hparam_dict = { 'train_split': args.train_metadata, 'val_split': args.val_metadata, 'lr': args.lr, 'epochs': args.epochs, 'batch_size': args.batch_size, 'num_workers': args.num_workers, 'shuffle': args.shuffle, 'lr_scheduler_gamma': args.lr_scheduler_gamma, 'lr_scheduler_step_size': args.lr_scheduler_step_size, } metric_dict = { 'AUC-OVR': best_auc_ovr, 'AUC-OVO': best_auc_ovo, 'ACC': best_acc } if writer: writer.add_hparams(hparam_dict=hparam_dict, metric_dict=metric_dict) writer.flush() Path(args.output_dir).mkdir(exist_ok=True) torch.save({ 'hparam_dict': hparam_dict, 'metric_dict': metric_dict, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), }, Path(args.output_dir) / '{}.pt'.format(args.name))