Exemplo n.º 1
0
def train_test(train, labels, test, **parameters):
    params = {
        'name': name_from_file(),
        'max_epochs': 600,
        'auto_stopping': True,
        'adaptive_weight_decay': False,
        'save_snapshots_stepsize': None,
        'epoch_steps': None,
        #'dense3_size': 0,
        'momentum_scaling': 1200,
        'dense1_nonlinearity': 'rectify',
        'dense1_init': 'glorot_uniform',
        'dense2_nonlinearity': 'rectify',
        'dense2_init': 'glorot_uniform',
        'batch_size': 128,
        'learning_rate': 0.0003,
        'learning_rate_scaling': 1000,
        'momentum': 0.98,
        'dense1_size': 700,
        'dense2_size': 550,
        'dense3_size': 400,
        #'nn__dense3_size': randint(low = 100, high = 400),
        'dropout0_rate': 0.,
        'dropout1_rate': 0.1,
        'dropout2_rate': 0.45,
        'dropout3_rate': 0.58,
        #'nn__dropout3_rate': triang(loc = 0, c = 0, scale = 1),
        #'nn__weight_decay': norm(0.00006, 0.0001),
    }
    params.update(parameters)
    estimator = Pipeline([
        ('row', PositiveSparseRowFeatureGenerator()),
        ('gen23',
         PositiveSparseFeatureGenerator(difficult_classes=(2, 3),
                                        extra_features=40)),
        ('gen234',
         PositiveSparseFeatureGenerator(difficult_classes=(2, 3, 4),
                                        extra_features=40)),
        ('gen19',
         PositiveSparseFeatureGenerator(difficult_classes=(1, 9),
                                        extra_features=63)),
        ('log',
         LogTransform()),  # log should be after integer feats but before dist
        #('distp31', DistanceFeatureGenerator(n_neighbors = 3, distance_p = 1)),
        #('distp52', DistanceFeatureGenerator(n_neighbors = 5, distance_p = 2)),
        ('scale03', MinMaxScaler(feature_range=(0, 3))
         ),  # scale should apply to int and float feats
        ('nn', NNet(**params)),
    ])
    estimator.fit(train, labels)
    prediction = estimator.predict_proba(test)
    return prediction
Exemplo n.º 2
0
#	('row', PositiveSparseRowFeatureGenerator()),
#	('distp31', DistanceFeatureGenerator(n_neighbors = 3, distance_p = 1)),
#	('distp52', DistanceFeatureGenerator(n_neighbors = 5, distance_p = 2)),
#]), expand_confidence = 0.9)
train, labels = get_training_data()[:2]
test = get_testing_data()[0]

#cpus = max(cpu_count() - 1, 1)
#random = RandomState()

opt = RandomizedSearchCV(
	estimator = Pipeline([
		#('gen23', PositiveSparseFeatureGenerator(difficult_classes = (2, 3), extra_features = 40)),
		#('gen234', PositiveSparseFeatureGenerator(difficult_classes = (2, 3, 4), extra_features = 40)),
		#('gen19', PositiveSparseFeatureGenerator(difficult_classes = (1, 9), extra_features = 63)),
		('log', LogTransform()),
		('scale03', MinMaxScaler(feature_range = (0, 3))),
		('nn', NNet(**{
			'dense1_nonlinearity': 'rectify',
			'dense1_init': 'glorot_normal',
			'max_epochs': 200,
			'learning_rate': 0.000676,
			'learning_rate_scaling': 30,
			'momentum': 0.9,
			'momentum_scaling': 10,
			'dense1_size': 35,#
			#'dense2_size': 463,
			#'dense3_size': 329,
			#'dropout0_rate': 0.06,
			#'dropout1_rate': 0.56,
			#'dropout2_rate': 0.19,
Exemplo n.º 3
0
from nnet.prepare import LogTransform
from utils.loading import get_training_data
from utils.shuffling import shuffle
from nndist.distance import DistanceFeatureGenerator

N = 500

train, labels = get_training_data()[:2]
train = train[logical_and(5 <= labels, labels <= 7), :]
labels = labels[logical_and(5 <= labels, labels <= 7)] - 4
train, labels = shuffle(train, labels)[:2]
train = train[:N, :]
labels = labels[:N]
print train.shape, train.dtype

train = LogTransform().fit_transform(train)
print train.shape, train.dtype

gen = DistanceFeatureGenerator(n_neighbors=3, distance_p=2, nr_classes=3)
train = gen.fit_transform(train, labels)
print train.shape, train.dtype

#train = MinMaxScaler(feature_range = (0, 3)).fit_transform(train)
#print train.shape, train.dtype

fig, ax = subplots(figsize=(7, 6))
colors = ['r', 'g', 'b']
for cls in range(1, 4):
    ax.scatter(train[labels == cls, -2],
               train[labels == cls, -1],
               c=colors[cls - 1],
Exemplo n.º 4
0
from utils.ioutil import makeSubmission
from utils.loading import get_testing_data
from utils.postprocess import scale_to_priors


labels = load(join(BASE_DIR, 'data', 'trainclas.npy'))
train = load(join(BASE_DIR, 'data', 'trainmat.npy'))[:, 1:]
valid = load(join(BASE_DIR, 'data', 'testmat.npy'))[:, 1:]
test = get_testing_data()[0]

pipe = Pipeline([
	('row', PositiveSparseRowFeatureGenerator()),
	('gen23', PositiveSparseFeatureGenerator(difficult_classes = (2, 3), extra_features = 40)),
	('gen234', PositiveSparseFeatureGenerator(difficult_classes = (2, 3, 4), extra_features = 40)),
	('gen19', PositiveSparseFeatureGenerator(difficult_classes = (1, 9), extra_features = 63)),
	('log', LogTransform()), # log should be after integer feats but before dist
	#('distp31', DistanceFeatureGenerator(n_neighbors = 3, distance_p = 1)),
	#('distp52', DistanceFeatureGenerator(n_neighbors = 5, distance_p = 2)),
	('scale03', MinMaxScaler(feature_range = (0, 3))), # scale should apply to int and float feats
])

train = pipe.fit_transform(train, labels)
valid = pipe.transform(valid)
test = pipe.transform(test)


names = ['final_final1_351', 'final_final2_5019', 'final_final1_4969', 'final_final2_5530', 'final_final2_2247', 'final_final1_8594', 'final_final1_1717', 'final_final4_3641', 'final_final2_9535', 'final_final2_2066', 'final_final2_5878', 'final_final4_7076', 'final_final3_6441', 'final_final3_5475']
totalvalid = totaltest = 0

for name in names: