def get_prediction(image): #image = Image. transform_test = transforms.Compose([ transforms.Resize(256), # smaller edge of image resized to 256 transforms.CenterCrop(224), # get 224x224 crop from the center transforms.ToTensor(), # convert the PIL Image to a tensor transforms.Normalize( (0.485, 0.456, 0.406), # normalize image for pre-trained model (0.229, 0.224, 0.225)) ]) orig_img = np.array(image) test_img = transform_test(image) sample_vocab = Vocabulary(threshold=5, load_vocab=True, anns_file="captions_train2014.json") vocab_size = len(sample_vocab) #Model checkpoint = torch.load('train-model-1-9900.pkl') # Specify values for embed_size and hidden_size - we use the same values as in training step embed_size = 256 hidden_size = 512 # Initialize the encoder and decoder, and set each to inference mode encoder = ResNetEncoder(embed_size) encoder.eval() decoder = RNNDecoder(embed_size, hidden_size, vocab_size) decoder.eval() # Load the pre-trained weights encoder.load_state_dict(checkpoint['encoder']) decoder.load_state_dict(checkpoint['decoder']) # Move models to GPU if CUDA is available. if torch.cuda.is_available(): encoder.cuda() decoder.cuda() image = image.cuda() test_img = test_img.unsqueeze(0) features = encoder(test_img).unsqueeze(1) output = decoder.greedy_search(features) cleaned_pred = [] for i in range(len(output)): vocab_id = output[i] word = sample_vocab.id2word[vocab_id] if word == sample_vocab.end_seq: break if word != sample_vocab.start_seq: cleaned_pred.append(word) caption = " ".join(cleaned_pred) return caption
def main(): data_path = '../five-video-classification-methods/data/data_file_ordinal_logistic_regression_pytorch.csv' testdata_path = '../five-video-classification-methods/data/3_combined_test.csv' train_loader, test_loader = get_dataloader(data_path, testdata_path) if torch.cuda.is_available(): map_location = lambda storage, loc: storage.cuda() else: map_location = 'cpu' model = nn.Sequential(CNNEncoder(**config.cnn_encoder_params), RNNDecoder(**config.rnn_decoder_params)) if args.mode == 'train': args.round = 0 train_model(model, train_loader, test_loader) elif args.mode == 'prune': previous_ckpt = 'checkpoints/ep-3794-0.193.pth' print("Pruning round %d, load model from %s" % (args.round, previous_ckpt)) ckpt = torch.load(previous_ckpt, map_location=map_location) model.load_state_dict(ckpt['model_state_dict']) prune_model(model) print(model) params = sum([np.prod(p.size()) for p in model.parameters()]) print("Number of Parameters: %.1fM" % (params / 1e6)) train_model(model, train_loader, test_loader) elif args.mode == 'test': ckpt = 'resnet18-round%d.pth' % (args.round) print("Load model from %s" % (ckpt)) model = torch.load(ckpt) params = sum([np.prod(p.size()) for p in model.parameters()]) print("Number of Parameters: %.1fM" % (params / 1e6)) acc = eval(model, test_loader) print("Acc=%.4f\n" % (acc))
def build_model(config, gpu_id, checkpoint=None): # Build encoder encoder = ImageEncoder(config.encoder_num_layers, True, config.encoder_num_hidden, config.dropout, config.image_channel_size) # Build decoder decoder_num_hidden = config.encoder_num_hidden decoder = RNNDecoder(True, config.target_embedding_size, config.decoder_num_layers, decoder_num_hidden, config.dropout, config.target_vocab_size, attn_type='general', input_feed=config.input_feed) device = torch.device('cuda') if gpu_id >= 0 else torch.device('cpu') # Build Generator generator = nn.Sequential( nn.Linear(decoder_num_hidden, config.target_vocab_size), nn.LogSoftmax(dim=-1)) # Build UIModel model = UIModel(encoder, decoder, generator) # Load the model states from checkpoint or initialize them if checkpoint is not None: model.load_state_dict(checkpoint['model']) else: for p in model.parameters(): p.data.uniform_(-config.param_init, config.param_init) model.to(device) return model
def train_on_epochs(train_loader: DataLoader, test_loader: DataLoader, restore_from: str = None): use_cuda = torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu') model = nn.Sequential(CNNEncoder(**config.cnn_encoder_params), RNNDecoder(**config.rnn_decoder_params)) model.to(device) device_count = torch.cuda.device_count() if device_count > 1: print('Use {} GPU training'.format(device_count)) model = nn.DataParallel(model) ckpt = {} if restore_from is not None: ckpt = torch.load(restore_from) model.load_state_dict(ckpt['model_state_dict']) print('Model is loaded from %s' % (restore_from)) model_params = model.parameters() optimizer = torch.optim.Adam(model_params, lr=config.learning_rate) if restore_from is not None: optimizer.load_state_dict(ckpt['optimizer_state_dict']) info = { 'train_losses': [], 'train_scores': [], 'test_losses': [], 'test_scores': [] } start_ep = ckpt['epoch'] + 1 if 'epoch' in ckpt else 0 save_path = './checkpoints' if not os.path.exists(save_path): os.mkdir(save_path) for ep in range(start_ep, config.epoches): train_losses, train_scores = train(model, train_loader, optimizer, ep, device) test_loss, test_score = validation(model, test_loader, optimizer, ep, device) info['train_losses'].append(train_losses) info['train_scores'].append(train_scores) info['test_losses'].append(test_loss) info['test_scores'].append(test_score) ckpt_path = os.path.join(save_path, 'ep-%d.pth' % ep) if (ep + 1) % config.save_interval == 0: torch.save( { 'epoch': ep, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'label_map': train_loader.dataset.labels }, ckpt_path) print('Model of Epoch %3d has been saved to: %s' % (ep, ckpt_path)) with open('./train_info.json', 'w') as f: json.dump(info, f) print('End of training')
os.path.splitext(os.path.basename(args.data_path))[0] + '.npy') accessed = np.load('y_gd' + os.path.splitext(os.path.basename(args.data_path))[0] + '.npy') plt.plot(modeled, label="modeled") plt.plot(accessed, label='accessed') plt.legend() plt.show() print('Loading model from {}'.format(args.checkpoint)) use_cuda = torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu') # Build model model = nn.Sequential(CNNEncoder(**config.cnn_encoder_params), RNNDecoder(**config.rnn_decoder_params)) model.to(device) model.eval() if torch.cuda.is_available(): map_location = lambda storage, loc: storage.cuda() else: map_location = 'cpu' # Load model ckpt = torch.load(args.checkpoint, map_location=map_location) model.load_state_dict(ckpt['model_state_dict']) print('Model has been loaded from {}'.format(args.checkpoint)) label_map = [-1] * config.rnn_decoder_params['num_classes'] # load label map if 'label_map' in ckpt:
batch_size=batch_size, threshold=vocab_threshold, load_vocab=load_vocab) val_loader = get_loader(transform=transform_val, mode='val', batch_size=batch_size, threshold=vocab_threshold, load_vocab=load_vocab) # The size of the vocabulary vocab_size = len(train_loader.dataset.vocab) # Initialize the encoder and decoder encoder = ResNetEncoder(embedding_size) decoder = RNNDecoder(embedding_size, hidden_size, vocab_size) # Move models to GPU if CUDA is available if torch.cuda.is_available(): encoder.cuda() decoder.cuda() # Define the loss function criterion = nn.CrossEntropyLoss().cuda() if torch.cuda.is_available( ) else nn.CrossEntropyLoss() # Specify the learnable parameters of the model params = list(decoder.parameters()) + list(encoder.embed.parameters()) + list( encoder.bn.parameters()) # Define the optimizer
def _eval(checkpoint: str, video_path: str, labels=[]) -> list: """Inference the model and return the labels. Args: checkpoint(str): The checkpoint where the model restore from. path(str): The path of videos. labels(list): Labels of videos. Returns: A list of labels of the videos. """ if not os.path.exists(video_path): raise ValueError('Invalid path! which is: {}'.format(video_path)) print('Loading model from {}'.format(checkpoint)) use_cuda = torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu') # Build model model = nn.Sequential(CNNEncoder(**config.cnn_encoder_params), RNNDecoder(**config.rnn_decoder_params)) model.to(device) model.eval() # Load model ckpt = torch.load(checkpoint) model.load_state_dict(ckpt['model_state_dict']) print('Model has been loaded from {}'.format(checkpoint)) label_map = [-1] * config.rnn_decoder_params['num_classes'] # load label map if 'label_map' in ckpt: label_map = ckpt['label_map'] # Do inference pred_labels = [] video_names = os.listdir(video_path) with torch.no_grad(): for video in tqdm(video_names, desc='Inferencing'): # read images from video images = load_imgs_from_video(os.path.join(video_path, video)) # apply transform images = [Dataset.transform(None, img) for img in images] # stack to tensor, batch size = 1 images = torch.stack(images, dim=0).unsqueeze(0) # do inference images = images.to(device) pred_y = model(images) # type: torch.Tensor pred_y = pred_y.argmax(dim=1).cpu().numpy().tolist() pred_labels.append([video, pred_y[0], label_map[pred_y[0]]]) print(pred_labels[-1]) if len(labels) > 0: acc = accuracy_score(pred_labels, labels) print('Accuracy: %0.2f' % acc) # Save results pandas.DataFrame(pred_labels).to_csv('result.csv', index=False) print('Results has been saved to {}'.format('result.csv')) return pred_labels