def try_classifier(clf, tag):
    dataset = AnimalDataset(source_file + '_cat.csv')
    cat_testset = AnimalTestDataset(source_file + '_cat_test.csv')

    clf.fit(dataset.x, dataset.y)
    predictions_on_cats = clf.predict_proba(cat_testset.x)

    scores = cross_val_score(clf,
                             dataset.x,
                             dataset.y,
                             cv=5,
                             scoring='log_loss')
    print("Logloss (%s) on cats: %0.2f (+/- %0.2f)" %
          (tag, scores.mean(), scores.std() * 2))

    ###########################################

    dataset = AnimalDataset(source_file + '_dog.csv')
    dog_testset = AnimalTestDataset(source_file + '_dog_test.csv')

    clf.fit(dataset.x, dataset.y)
    predictions_on_dogs = clf.predict_proba(dog_testset.x)

    scores = cross_val_score(clf,
                             dataset.x,
                             dataset.y,
                             cv=5,
                             scoring='log_loss')
    print("Logloss (%s) on dogs: %0.2f (+/- %0.2f)" %
          (tag, scores.mean(), scores.std() * 2))

    dog_testset.export_prob_predictions_to_csv(
        output_file + '_' + tag + '.csv', cat_testset.ids + dog_testset.ids,
        list(predictions_on_cats) + list(predictions_on_dogs))
Exemplo n.º 2
0
def debug(model_file, mode):
    model = load_model(model_file)
    test_params = {'batch_size': 1, 'shuffle': True, 'num_workers': 3}
    process_steps = transforms.Compose([
        # transforms.RandomRotation(15),
        # transforms.RandomHorizontalFlip(),
        # transforms.ColorJitter(brightness=0.3),
        transforms.Resize((224, 224)),  # ImageNet standard
        transforms.ToTensor()
    ])
    test_dataset = AnimalDataset('testclasses.txt', process_steps)
    test_loader = data.DataLoader(test_dataset, **test_params)
    if mode == 'evaluate':
        print(evaluate(model, test_loader))
    elif mode == 'predict':
        make_predictions(model, test_loader)
Exemplo n.º 3
0
def train(num_epochs, eval_interval, learning_rate, output_filename,
          model_name, optimizer_name, batch_size):
    train_params = {
        'batch_size': batch_size,
        'shuffle': True,
        'num_workers': 3
    }
    test_params = {'batch_size': 1, 'shuffle': True, 'num_workers': 3}
    train_process_steps = transforms.Compose([
        transforms.RandomRotation(15),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.3, contrast=0.3),
        transforms.Resize((224, 224)),  # ImageNet standard
        transforms.ToTensor()
    ])
    test_process_steps = transforms.Compose(
        [transforms.Resize((224, 224)),
         transforms.ToTensor()])
    train_dataset = AnimalDataset('trainclasses.txt', train_process_steps)
    test_dataset = AnimalDataset('testclasses.txt', test_process_steps)
    train_loader = data.DataLoader(train_dataset, **train_params)
    test_loader = data.DataLoader(test_dataset, **test_params)
    criterion = nn.BCELoss()  # nn.BCELoss()

    total_steps = len(train_loader)
    if torch.cuda.device_count() > 1:
        model = build_model(num_labels, False, True).to(device)
    else:
        model = build_model(num_labels, False, False).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    for epoch in range(num_epochs):
        for i, (images, features, img_names,
                indexes) in enumerate(train_loader):
            # Batchnorm1D can't handle batch size of 1
            if images.shape[0] < 2:
                break
            images = images.to(device)
            features = features.to(device).float()
            # Toggle training flag
            model.train()

            outputs = model(images)
            sigmoid_outputs = torch.sigmoid(outputs)
            loss = criterion(sigmoid_outputs, features)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if i % 50 == 0:
                curr_iter = epoch * len(train_loader) + i
                print('Epoch [{}/{}], Step [{}/{}], Batch Loss: {:.4f}'.format(
                    epoch + 1, num_epochs, i + 1, total_steps, loss.item()))
                sys.stdout.flush()

        # Do some evaluations
        if (epoch + 1) % eval_interval == 0:
            print('Evaluating:')
            curr_acc = evaluate(model, test_loader)
            print('Epoch [{}/{}] Approx. training accuracy: {}'.format(
                epoch + 1, num_epochs, curr_acc))

    # Make final predictions
    print('Making predictions:')
    if not os.path.exists('models'):
        os.mkdir('models')
    torch.save(model.state_dict(), 'models/{}'.format(model_name))
    torch.save(optimizer.state_dict(), 'models/{}'.format(optimizer_name))
    make_predictions(model, test_loader, output_filename)
Exemplo n.º 4
0
# Usando a votacao como classificador

#################### Cats ##########################################

clf1 = GradientBoostingClassifier()
clf2 = RandomForestClassifier(n_estimators=1000)
clf3 = DecisionTreeClassifier()

eclf = VotingClassifier(estimators=[('bg', clf1), ('rf', clf2), ('dt', clf3)],
                        voting='soft',
                        weights=[5, 0.5, 1])

source_type = '_no_color_no_breed'
source_file = '../0cleaning/clean_data3' + source_type
output_file = 'voting_classifier_with_weight' + source_type
dataset = AnimalDataset(source_file + '_cat.csv')
cat_testset = AnimalTestDataset(source_file + '_cat_test.csv')

for clf, label in zip([clf1, clf2, eclf],
                      ['Gradient Boosting', 'Random Forest', 'Ensemble']):
    scores = cross_validation.cross_val_score(clf,
                                              dataset.x,
                                              dataset.y,
                                              cv=5,
                                              scoring='log_loss')
    print("Log Loss on cats: %0.2f (+/- %0.2f) [%s]" %
          (scores.mean(), scores.std(), label))

clf1 = GradientBoostingClassifier()
clf2 = RandomForestClassifier(n_estimators=1000)
clf3 = DecisionTreeClassifier()
Exemplo n.º 5
0
resource.setrlimit(resource.RLIMIT_DATA, (10 * 1024**3, -1))

bs = 1239  # images of antelope + beaber
# bs = 26328 # total Images of dataset
# bs = 50  # random small number

# imageSize = 224  # ImageNet Standard
imageSize = 24  # random small number

train_process_steps = transforms.Compose([
    transforms.Resize((imageSize, imageSize)),  # 224, 224 ImageNet standard
    transforms.ToTensor()
])

train_dataset = AnimalDataset('trainclasses4gloveRm2words.txt',
                              train_process_steps)

train_dataset.img_names = train_dataset.img_names[:1239]
train_dataset.img_index = train_dataset.img_index[:1239]

# set index to predicate_binary_mat
for i in range(len(train_dataset.img_index)):
    train_dataset.img_index[i] = train_dataset.predicate_binary_mat[
        train_dataset.img_index[i]]

train_params = {'batch_size': bs, 'shuffle': True, 'num_workers': 3}
train_loader = data.DataLoader(train_dataset, **train_params)

print('starting svm')
predicted = [0] * 85