Пример #1
0
    def __init__(self,
                 train=True,
                 transform=None,
                 target_transform=None,
                 noise_rate=0.2,
                 split_percentage=0.9,
                 seed=1,
                 num_classes=10,
                 feature_size=28 * 28,
                 norm_std=0.1):

        self.transform = transform
        self.target_transform = target_transform
        self.train = train
        original_images = np.load('data/mnist/train_images.npy')
        original_labels = np.load('data/mnist/train_labels.npy')
        data = torch.from_numpy(original_images).float()
        targets = torch.from_numpy(original_labels)

        dataset = zip(data, targets)
        new_labels = tools.get_instance_noisy_label(noise_rate, dataset,
                                                    targets, num_classes,
                                                    feature_size, norm_std,
                                                    seed)

        self.train_data, self.val_data, self.train_labels, self.val_labels = tools.data_split(
            original_images, new_labels, split_percentage, seed)
Пример #2
0
    def __init__(self,
                 train=True,
                 transform=None,
                 target_transform=None,
                 noise_rate=0.2,
                 split_percentage=0.9,
                 seed=1,
                 num_classes=10,
                 feature_size=3 * 32 * 32,
                 norm_std=0.1):

        self.transform = transform
        self.target_transform = target_transform
        self.train = train

        original_images = np.load('data/svhn/train_images.npy')
        original_labels = np.load('data/svhn/train_labels.npy')
        data = torch.from_numpy(original_images).float()
        targets = torch.from_numpy(original_labels)

        dataset = zip(data, targets)
        new_labels = tools.get_instance_noisy_label(noise_rate, dataset,
                                                    targets, num_classes,
                                                    feature_size, norm_std,
                                                    seed)

        self.train_data, self.val_data, self.train_labels, self.val_labels = tools.data_split(
            original_images, new_labels, split_percentage, seed)
        if self.train:
            self.train_data = self.train_data.reshape((-1, 3, 32, 32))
            self.train_data = self.train_data.transpose((0, 2, 3, 1))

        else:
            self.val_data = self.val_data.reshape((-1, 3, 32, 32))
            self.val_data = self.val_data.transpose((0, 2, 3, 1))
Пример #3
0
"""

print __doc__

import matplotlib.pyplot as plt
import numpy as np
import sys
sys.path.append("Kaggle Competetions/House Prices")

from tools import data_format
train_data = data_format("House Prices/House Prices - Data/train.csv")
test_data = data_format("House Prices/House Prices - Data/test.csv")

from tools import data_split
from sklearn.model_selection import train_test_split
targets, features, features_test_final = data_split(train_data, test_data)
features_train, features_test, labels_train, labels_test = train_test_split(
    features, targets, test_size=0.9, random_state=42)

# Remove Outliers
import matplotlib.pyplot as plt
plt.subplot(1, 2, 1)
boxplot = plt.boxplot(labels_train, notch=True)
outliers = boxplot["fliers"][0].get_data()[1]
indices = [i for i, x in enumerate(labels_train) if x in outliers]
i = 0
for num in indices:
    del labels_train[num - i]
    del features_train[num - i]
    i += 1