NUM_CLASSES = 2 kernel_x = KERNEL_SIZE[1] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') test_audio, sr = torchaudio.load(sys.argv[1]) test_audio = test_audio.squeeze() # Create models CRNN_model = CRNN(IN_SIZE, HIDDEN_SIZE, KERNEL_SIZE, STRIDE, GRU_NUM_LAYERS) attn_layer = AttnMech(HIDDEN_SIZE * NUM_DIRS) apply_attn = ApplyAttn(HIDDEN_SIZE * 2, NUM_CLASSES) # Load models checkpoint = torch.load('models/crnn_final', map_location=device) CRNN_model.load_state_dict(checkpoint['model_state_dict']) checkpoint = torch.load('models/attn_final', map_location=device) attn_layer.load_state_dict(checkpoint['model_state_dict']) checkpoint = torch.load('models/apply_attn_final', map_location=device) apply_attn.load_state_dict(checkpoint['model_state_dict']) # Create melspec melspec_test = torchaudio.transforms.MelSpectrogram( sample_rate=48000, n_mels=N_MELS ).to(device) # TEST all_probs = []
def train_and_predict(x_train, y_train, x_val, y_val, x_test): """Train a neural network classifier and compute predictions. Args: x_train (np.ndarray): Training instances. y_train (np.ndarray): Training labels. x_val (np.ndarray): Validation instances. y_val (np.ndarray): Validation labels. x_test (np.ndarray): Test instances. Returns: The predictions of the classifier. """ _ensure_reproducibility() # Determine which device (GPU or CPU) to use device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # Convert data into PyTorch tensors x_train = torch.FloatTensor(x_train).transpose(1, 2) x_val = torch.FloatTensor(x_val).transpose(1, 2) x_test = torch.FloatTensor(x_test).transpose(1, 2) y_train = torch.FloatTensor(y_train) y_val = torch.FloatTensor(y_val) # Instantiate neural network n_classes = y_train.shape[-1] n_feats = x_train.shape[1] net = CRNN(n_classes, n_feats).to(device) # Use binary cross-entropy loss function criterion = BCELoss() # Use Adam optimization algorithm optimizer = Adam(net.parameters(), lr=0.01) # Use scheduler to decay learning rate regularly scheduler = StepLR(optimizer, step_size=2, gamma=0.9) # Use helper class to iterate over data in batches loader_train = DataLoader(TensorDataset(x_train, y_train), batch_size=128, shuffle=True) loader_val = DataLoader(TensorDataset(x_val, y_val), batch_size=512) loader_test = DataLoader(TensorDataset(x_test), batch_size=512) # Instantiate Logger to record training/validation performance # Configure to save the states of the top 3 models during validation logger = Logger(net, n_states=3) for epoch in range(15): # Train model using training set pbar = tqdm(loader_train) pbar.set_description('Epoch %d' % epoch) train(net.train(), criterion, optimizer, pbar, logger, device) # Evaluate model using validation set and monitor F1 score validate(net.eval(), criterion, loader_val, logger, device) logger.monitor('val_f1') # Print training and validation results logger.print_results() # Invoke learning rate scheduler scheduler.step() # Ensemble top 3 model predictions y_preds = [] for state_dict in logger.state_dicts: net.load_state_dict(state_dict) y_preds.append(_flatten(predict(net, loader_test, device))) return torch.stack(y_preds).mean(dim=0).cpu().numpy()
# custom weights initialization called on crnn def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) crnn = crnn.CRNN(opt.imgH, nc, nclass, opt.nh) crnn.apply(weights_init) if opt.pretrained != '': print('loading pretrained model from %s' % opt.pretrained) crnn.load_state_dict(torch.load(opt.pretrained)) print(crnn) image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH) text = torch.IntTensor(opt.batchSize * 5) length = torch.IntTensor(opt.batchSize) if opt.cuda: crnn.cuda() crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu)) image = image.cuda() criterion = criterion.cuda() image = Variable(image) text = Variable(text) length = Variable(length)