Exemplo n.º 1
0
    NUM_CLASSES = 2

    kernel_x = KERNEL_SIZE[1]

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    test_audio, sr = torchaudio.load(sys.argv[1])
    test_audio = test_audio.squeeze()

    # Create models
    CRNN_model = CRNN(IN_SIZE, HIDDEN_SIZE, KERNEL_SIZE, STRIDE, GRU_NUM_LAYERS)
    attn_layer = AttnMech(HIDDEN_SIZE * NUM_DIRS)
    apply_attn = ApplyAttn(HIDDEN_SIZE * 2, NUM_CLASSES)
    # Load models
    checkpoint = torch.load('models/crnn_final', map_location=device)
    CRNN_model.load_state_dict(checkpoint['model_state_dict'])
    checkpoint = torch.load('models/attn_final', map_location=device)
    attn_layer.load_state_dict(checkpoint['model_state_dict'])
    checkpoint = torch.load('models/apply_attn_final', map_location=device)
    apply_attn.load_state_dict(checkpoint['model_state_dict'])

    # Create melspec
    melspec_test = torchaudio.transforms.MelSpectrogram(
        sample_rate=48000,
        n_mels=N_MELS
    ).to(device)


    # TEST
    all_probs = []
Exemplo n.º 2
0
def train_and_predict(x_train, y_train, x_val, y_val, x_test):
    """Train a neural network classifier and compute predictions.

    Args:
        x_train (np.ndarray): Training instances.
        y_train (np.ndarray): Training labels.
        x_val (np.ndarray): Validation instances.
        y_val (np.ndarray): Validation labels.
        x_test (np.ndarray): Test instances.

    Returns:
        The predictions of the classifier.
    """
    _ensure_reproducibility()

    # Determine which device (GPU or CPU) to use
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # Convert data into PyTorch tensors
    x_train = torch.FloatTensor(x_train).transpose(1, 2)
    x_val = torch.FloatTensor(x_val).transpose(1, 2)
    x_test = torch.FloatTensor(x_test).transpose(1, 2)
    y_train = torch.FloatTensor(y_train)
    y_val = torch.FloatTensor(y_val)

    # Instantiate neural network
    n_classes = y_train.shape[-1]
    n_feats = x_train.shape[1]
    net = CRNN(n_classes, n_feats).to(device)

    # Use binary cross-entropy loss function
    criterion = BCELoss()
    # Use Adam optimization algorithm
    optimizer = Adam(net.parameters(), lr=0.01)
    # Use scheduler to decay learning rate regularly
    scheduler = StepLR(optimizer, step_size=2, gamma=0.9)
    # Use helper class to iterate over data in batches
    loader_train = DataLoader(TensorDataset(x_train, y_train),
                              batch_size=128, shuffle=True)
    loader_val = DataLoader(TensorDataset(x_val, y_val), batch_size=512)
    loader_test = DataLoader(TensorDataset(x_test), batch_size=512)

    # Instantiate Logger to record training/validation performance
    # Configure to save the states of the top 3 models during validation
    logger = Logger(net, n_states=3)

    for epoch in range(15):
        # Train model using training set
        pbar = tqdm(loader_train)
        pbar.set_description('Epoch %d' % epoch)
        train(net.train(), criterion, optimizer, pbar, logger, device)

        # Evaluate model using validation set and monitor F1 score
        validate(net.eval(), criterion, loader_val, logger, device)
        logger.monitor('val_f1')

        # Print training and validation results
        logger.print_results()

        # Invoke learning rate scheduler
        scheduler.step()

    # Ensemble top 3 model predictions
    y_preds = []
    for state_dict in logger.state_dicts:
        net.load_state_dict(state_dict)
        y_preds.append(_flatten(predict(net, loader_test, device)))
    return torch.stack(y_preds).mean(dim=0).cpu().numpy()
Exemplo n.º 3
0
# custom weights initialization called on crnn
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)


crnn = crnn.CRNN(opt.imgH, nc, nclass, opt.nh)
crnn.apply(weights_init)
if opt.pretrained != '':
    print('loading pretrained model from %s' % opt.pretrained)
    crnn.load_state_dict(torch.load(opt.pretrained))
print(crnn)

image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH)
text = torch.IntTensor(opt.batchSize * 5)
length = torch.IntTensor(opt.batchSize)

if opt.cuda:
    crnn.cuda()
    crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu))
    image = image.cuda()
    criterion = criterion.cuda()

image = Variable(image)
text = Variable(text)
length = Variable(length)