def train_test(train, labels, test, **parameters): params = { 'name': name_from_file(), 'max_epochs': 600, 'auto_stopping': True, 'adaptive_weight_decay': False, 'save_snapshots_stepsize': None, 'epoch_steps': None, #'dense3_size': 0, 'momentum_scaling': 1200, 'dense1_nonlinearity': 'rectify', 'dense1_init': 'glorot_uniform', 'dense2_nonlinearity': 'rectify', 'dense2_init': 'glorot_uniform', 'batch_size': 128, 'learning_rate': 0.0003, 'learning_rate_scaling': 1000, 'momentum': 0.98, 'dense1_size': 700, 'dense2_size': 550, 'dense3_size': 400, #'nn__dense3_size': randint(low = 100, high = 400), 'dropout0_rate': 0., 'dropout1_rate': 0.1, 'dropout2_rate': 0.45, 'dropout3_rate': 0.58, #'nn__dropout3_rate': triang(loc = 0, c = 0, scale = 1), #'nn__weight_decay': norm(0.00006, 0.0001), } params.update(parameters) estimator = Pipeline([ ('row', PositiveSparseRowFeatureGenerator()), ('gen23', PositiveSparseFeatureGenerator(difficult_classes=(2, 3), extra_features=40)), ('gen234', PositiveSparseFeatureGenerator(difficult_classes=(2, 3, 4), extra_features=40)), ('gen19', PositiveSparseFeatureGenerator(difficult_classes=(1, 9), extra_features=63)), ('log', LogTransform()), # log should be after integer feats but before dist #('distp31', DistanceFeatureGenerator(n_neighbors = 3, distance_p = 1)), #('distp52', DistanceFeatureGenerator(n_neighbors = 5, distance_p = 2)), ('scale03', MinMaxScaler(feature_range=(0, 3)) ), # scale should apply to int and float feats ('nn', NNet(**params)), ]) estimator.fit(train, labels) prediction = estimator.predict_proba(test) return prediction
estimator = Pipeline([ #('gen23', PositiveSparseFeatureGenerator(difficult_classes = (2, 3), extra_features = 40)), #('gen234', PositiveSparseFeatureGenerator(difficult_classes = (2, 3, 4), extra_features = 40)), #('gen19', PositiveSparseFeatureGenerator(difficult_classes = (1, 9), extra_features = 63)), ('log', LogTransform()), ('scale03', MinMaxScaler(feature_range = (0, 3))), ('nn', NNet(**{ 'dense1_nonlinearity': 'rectify', 'dense1_init': 'glorot_normal', 'max_epochs': 200, 'learning_rate': 0.000676, 'learning_rate_scaling': 30, 'momentum': 0.9, 'momentum_scaling': 10, 'dense1_size': 35,# #'dense2_size': 463, #'dense3_size': 329, #'dropout0_rate': 0.06, #'dropout1_rate': 0.56, #'dropout2_rate': 0.19, #'dropout3_rate': 0.04, 'adaptive_weight_decay': False, 'auto_stopping': True, ## 'save_snapshots_stepsize': None, 'epoch_steps': None, })), ]), param_distributions = { 'nn__name': ['nn{0:03d}'.format(k) for k in range(10000)], #'nn__batch_size': binom(n = 256, p = 0.5), #'nn__learning_rate': norm(0.0005, 0.0005), #'nn__learning_rate_scaling': [1, 10, 100, 1000],
#('distp31', DistanceFeatureGenerator(n_neighbors = 3, distance_p = 1)), #('distp52', DistanceFeatureGenerator(n_neighbors = 5, distance_p = 2)), ('scale03', MinMaxScaler(feature_range = (0, 3))), # scale should apply to int and float feats ]) train = pipe.fit_transform(train, labels) valid = pipe.transform(valid) test = pipe.transform(test) names = ['final_final1_351', 'final_final2_5019', 'final_final1_4969', 'final_final2_5530', 'final_final2_2247', 'final_final1_8594', 'final_final1_1717', 'final_final4_3641', 'final_final2_9535', 'final_final2_2066', 'final_final2_5878', 'final_final4_7076', 'final_final3_6441', 'final_final3_5475'] totalvalid = totaltest = 0 for name in names: net = NNet.load(filepath = join(BASE_DIR, 'results', 'nets', name)) for nm, val in net.get_params().iteritems(): print '{0:s} = {1:}'.format(nm, val) #for nm, data in [('val', valid), ('tst', test)]: #probs = net.predict_proba(data) #save(join(SUBMISSIONS_DIR, '{0}_{1}_raw.npy'.format(name, nm)), probs) #makeSubmission(probs, fname = join(SUBMISSIONS_DIR, '{0}_{1}_rescale.csv'.format(name, nm)), digits = 8) probs = net.predict_proba(valid) probs = scale_to_priors(probs, priors = PRIORS) save(join(SUBMISSIONS_DIR, '{0}_valid.npy'.format(name)), probs) totalvalid += probs probs = net.predict_proba(test) probs = scale_to_priors(probs, priors = PRIORS)
#('distp31', DistanceFeatureGenerator(n_neighbors = 3, distance_p = 1)), #('distp52', DistanceFeatureGenerator(n_neighbors = 5, distance_p = 2)), ('scale03', MinMaxScaler(feature_range=(0, 3)) ), # scale should apply to int and float feats ( 'nn', NNet( **{ 'max_epochs': 800, 'auto_stopping': True, 'adaptive_weight_decay': False, 'save_snapshots_stepsize': None, 'epoch_steps': None, 'dense1_size': 400, 'dense2_size': 300, 'dense3_size': 300, 'dense1_nonlinearity': 'rectify', 'dense1_init': 'glorot_uniform', # uniform to reduce randomness 'momentum': 0.98, 'momentum_scaling': 10, 'learning_rate_scaling': 500, 'batch_size': 128, 'dropout0_rate': 0, })), ]), param_distributions={ 'nn__name': [ 'final_{0:s}_{1:03d}'.format(name_from_file(), k) for k in range(10000) ],
from nnet.scikit import NNet from utils.loading import get_training_data train, labels = get_training_data()[:2] #test = get_testing_data()[0] #train, labels = expand_from_test(train, labels, test, confidence = 0.9) #gen = PositiveSparseRowFeatureGenerator() #train = gen.fit_transform(train, labels) #test = gen.transform(test, labels) #random = RandomState() opt = RandomizedSearchCV( estimator=NNet( name=name_from_file(), auto_stopping=True, max_epochs=1500, # binom(n = 4000, p = 0.25) ), param_distributions={ 'dense1_size': randint(low=100, high=1200), 'dense2_size': randint(low=50, high=900), 'dense3_size': randint(low=25, high=700), }, fit_params={}, n_iter=600, n_jobs=cpus - 1, scoring=log_loss_scorer, refit=False, pre_dispatch=3, cv=ShuffleSplit( n=train.shape[0],
PositiveSparseFeatureGenerator(difficult_classes=(1, 9), extra_features=63)), ('log', LogTransform()), # log should be after integer feats but before dist #('distp31', DistanceFeatureGenerator(n_neighbors = 3, distance_p = 1)), #('distp52', DistanceFeatureGenerator(n_neighbors = 5, distance_p = 2)), ('scale03', MinMaxScaler(feature_range=(0, 3)) ), # scale should apply to int and float feats ( 'nn', NNet( **{ #name = name_from_file(), 'max_epochs': 1200, 'auto_stopping': True, 'adaptive_weight_decay': False, 'save_snapshots_stepsize': None, 'epoch_steps': None, 'dense3_size': 0, 'momentum_scaling': 1200, })), ]), param_distributions={ 'nn__name': ['nn{0:03d}'.format(k) for k in range(10000)], 'nn__dense1_nonlinearity': nonlinearities.keys(), 'nn__dense1_init': initializers.keys(), 'nn__dense2_nonlinearity': nonlinearities.keys(), 'nn__dense2_init': initializers.keys(), 'nn__batch_size': binom(n=256, p=0.5), 'nn__learning_rate': norm(0.0005, 0.0002), 'nn__learning_rate_scaling': [10, 100, 1000],
('scale03', MinMaxScaler(feature_range=(0, 3)) ), # scale should apply to int and float feats ( 'nn', NNet( **{ 'max_epochs': 800, 'auto_stopping': True, 'adaptive_weight_decay': False, 'save_snapshots_stepsize': None, 'epoch_steps': None, 'dense1_size': 1000, 'dense2_size': 1500, 'dense3_size': 1000, 'dense1_nonlinearity': 'leaky20', 'dense1_init': 'glorot_uniform', # uniform to reduce randomness 'momentum': 0.9, 'momentum_scaling': 10, 'learning_rate': 0.02, 'learning_rate_scaling': 500, 'batch_size': 64, 'dropout0_rate': 0.1, 'dropout1_rate': 0.6, 'dropout2_rate': 0.6, 'dropout3_rate': 0.6, })), ]), param_distributions={ 'nn__name': [ 'final_{0:s}_{1:03d}'.format(name_from_file(), k)
('log', LogTransform()), ('scale03', MinMaxScaler(feature_range=(0, 3))), ]), expand_confidence=0.9) net = NNet( name=name_from_file(), dense1_nonlinearity='rectify', dense1_init='glorot_normal', auto_stopping=True, max_epochs=1000, batch_size=256, learning_rate=0.0005, learning_rate_scaling=100, momentum=0.9, momentum_scaling=100, dense1_size=100, dense2_size=50, dense3_size=None, dropout0_rate=0, dropout1_rate=0, dropout2_rate=0, dropout3_rate=0, weight_decay=0.001, adaptive_weight_decay=True, ) cv = ShuffleSplit( n=train.shape[0], n_iter=5, test_size=0.2,
# Due to this error: # ValueError: Loading weights from a list of parameter values is no longer supported. # Please send me something like the return value of 'net.get_all_param_values()' instead. # testing new method import warnings from sklearn.pipeline import Pipeline from sklearn.preprocessing import MinMaxScaler from nnet.prepare import LogTransform from nnet.scikit import NNet from utils.loading import get_preproc_data warnings.filterwarnings("ignore") train, labels, test = get_preproc_data(Pipeline([ ('log', LogTransform()), ('scale03', MinMaxScaler(feature_range=(0, 3))), ]), expand_confidence=0.9) nn = NNet(max_epochs=1) nn.fit(train, labels) nn.save(filepath='/tmp/test') nn = NNet.load(filepath='/tmp/test') w = nn.net.get_all_params_values() print w nn.net.load_params_from(w)