def test_mlp_iter_fit(): X = np.random.standard_normal((10, 2)) Z = np.random.standard_normal((10, 1)) mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10) for i, info in enumerate(mlp.iter_fit(X, Z)): if i >= 10: break
def test_mlp_fit(): X = np.random.standard_normal((10, 2)) Z = np.random.standard_normal((10, 1)) X, Z = theano_floatx(X, Z) mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10) mlp.fit(X, Z)
def test_mlp_fit_with_imp_weight(): X = np.random.standard_normal((10, 2)) Z = np.random.standard_normal((10, 1)) W = np.random.random((10, 1)) > 0.5 X, Z, W = theano_floatx(X, Z, W) mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10, imp_weight=True) mlp.fit(X, Z, W)
def test_mlp_iter_fit(): X = np.random.standard_normal((10, 2)) Z = np.random.standard_normal((10, 1)) X, Z = theano_floatx(X, Z) mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10) for i, info in enumerate(mlp.iter_fit(X, Z)): if i >= 10: break
def run_mlp(n_job, pars): f = h5.File('../../../datasets/eigdata.hdf5', 'r') X = f['matrices'][...] Z = f['eigvals'][...] f = open('mlp_training_%d' %n_job, 'w') max_passes = 100 batch_size = 2000 max_iter = max_passes * X.shape[0] / batch_size n_report = X.shape[0] / batch_size stop = climin.stops.AfterNIterations(max_iter) pause = climin.stops.ModuloNIterations(n_report) m = Mlp(20000, pars['n_hidden'], 1, hidden_transfers=[pars['hidden_transfer']]*len(pars['n_hidden']), out_transfer='identity', loss='squared', optimizer=pars['optimizer'], batch_size=batch_size) climin.initialize.randomize_normal(m.parameters.data, 0, pars['par_std']) losses = [] f.write('max iter: %d \n' %max_iter) weight_decay = ((m.parameters.in_to_hidden**2).sum() + (m.parameters.hidden_to_out**2).sum()) weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] c_wd = 0.001 m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay start = time.time() # Set up a nice printout. keys = '#', 'seconds', 'loss', 'val_loss' max_len = max(len(i) for i in keys) header = '\t'.join(i for i in keys) f.write(header + '\n') f.write(('-' * len(header)) + '\n') for i, info in enumerate(m.powerfit((X, Z), (X, Z), stop, pause)): if info['n_iter'] % n_report != 0: continue passed = time.time() - start losses.append((info['loss'], info['val_loss'])) info.update({ 'time': passed}) row = '%(n_iter)i\t%(time)g\t%(loss)g\t%(val_loss)g' % info f.write(row) f.write('best val_loss: %f \n' %info['best_loss']) f.close() cp.dump(info['best_pars'], open('best_pars_%d.pkl' %n_job, 'w'))
def new_trainer(pars, data): # 132 for the hand-crafted features input_size = 156 # 13 as there are 12 fields output_size = 13 batch_size = pars["batch_size"] m = Mlp( input_size, pars["n_hidden"], output_size, hidden_transfers=pars["hidden_transfers"], out_transfer="softmax", loss="cat_ce", batch_size=batch_size, optimizer=pars["optimizer"], ) climin.initialize.randomize_normal(m.parameters.data, 0, pars["par_std"]) weight_decay = ( (m.parameters.in_to_hidden ** 2).sum() + (m.parameters.hidden_to_hidden_0 ** 2).sum() + (m.parameters.hidden_to_out ** 2).sum() ) weight_decay /= m.exprs["inpt"].shape[0] m.exprs["true_loss"] = m.exprs["loss"] c_wd = pars["L2"] m.exprs["loss"] = m.exprs["loss"] + c_wd * weight_decay # length of dataset should be 270000 (for no time-integration) n_report = 270000 / batch_size max_iter = n_report * 100 interrupt = climin.stops.OnSignal() print dir(climin.stops) stop = climin.stops.Any( [ climin.stops.AfterNIterations(max_iter), climin.stops.OnSignal(signal.SIGTERM), # climin.stops.NotBetterThanAfter(1e-1,500,key='train_loss'), ] ) pause = climin.stops.ModuloNIterations(n_report) reporter = KeyPrinter(["n_iter", "train_loss", "val_loss"]) t = Trainer(m, stop=stop, pause=pause, report=reporter, interrupt=interrupt) make_data_dict(t, data) return t
def new_trainer(self, pars, data): modules = ['theano', 'breze', 'climin', 'alchemie'] git_log(modules) copy_theanorc() m = Mlp(2, [pars['n_hidden']], 1, hidden_transfers=[pars['hidden_transfer']], out_transfer='sigmoid', loss='bern_ces', optimizer=pars['optimizer']) climin.initialize.randomize_normal(m.parameters.data, 0, pars['par_std']) n_report = 100 t = Trainer( model=m, data=data, stop=climin.stops.Any([ climin.stops.AfterNIterations(10000), climin.stops.NotBetterThanAfter(1e-1, 5000, key='val_loss') ]), pause=climin.stops.ModuloNIterations(n_report), report=OneLinePrinter( keys=['n_iter', 'runtime', 'train_loss', 'val_loss'], spaces=[6, '10.2f', '15.8f', '15.8f']), interrupt=climin.stops.OnSignal(), ) return t
def new_trainer(pars, data): m = Mlp(2, [pars['n_hidden']], 1, hidden_transfers=[pars['hidden_transfer']], out_transfer='sigmoid', loss='bern_ces', optimizer=pars['optimizer']) climin.initialize.randomize_normal(m.parameters.data, 0, pars['par_std']) n_report = 100 interrupt = climin.stops.OnSignal() print dir(climin.stops) stop = climin.stops.Any([ climin.stops.AfterNIterations(10000), climin.stops.OnSignal(signal.SIGTERM), climin.stops.NotBetterThanAfter(1e-1, 500, key='train_loss'), ]) pause = climin.stops.ModuloNIterations(n_report) reporter = KeyPrinter(['n_iter', 'train_loss']) t = Trainer(m, stop=stop, pause=pause, report=reporter, interrupt=interrupt) make_data_dict(t, data) return t
def new_trainer(pars, data): # 3700 for binning input_size = 3700 # 13 as there are 12 fields output_size = 13 batch_size = pars['batch_size'] m = Mlp(input_size, pars['n_hidden'], output_size, hidden_transfers=pars['hidden_transfers'], out_transfer='softmax', loss='cat_ce', batch_size = batch_size, optimizer=pars['optimizer']) climin.initialize.randomize_normal(m.parameters.data, 0, pars['par_std']) weight_decay = ((m.parameters.in_to_hidden**2).sum() + (m.parameters.hidden_to_hidden_0**2).sum() + (m.parameters.hidden_to_out**2).sum()) weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] c_wd = pars['L2'] m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay # length of dataset should be 270000 (for no time-integration) n_report = 40000/batch_size max_iter = n_report * 100 print m.exprs interrupt = climin.stops.OnSignal() print dir(climin.stops) stop = climin.stops.Any([ climin.stops.Patience('val_loss', max_iter, 1.2), climin.stops.OnSignal(signal.SIGTERM), #climin.stops.NotBetterThanAfter(1e-1,500,key='train_loss'), ]) pause = climin.stops.ModuloNIterations(n_report) reporter = KeyPrinter(['n_iter', 'train_loss', 'val_loss']) t = Trainer( m, stop=stop, pause=pause, report=reporter, interrupt=interrupt) make_data_dict(t,data) return t
def test_mlp_pickle(): X = np.random.standard_normal((10, 2)) Z = np.random.standard_normal((10, 1)) X, Z = theano_floatx(X, Z) mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=2) climin.initialize.randomize_normal(mlp.parameters.data, 0, 1) mlp.fit(X, Z) Y = mlp.predict(X) pickled = cPickle.dumps(mlp) mlp2 = cPickle.loads(pickled) Y2 = mlp2.predict(X) assert np.allclose(Y, Y2)
#max_iter = max_passes * X.shape[ 0] / batch_size max_iter = 75000000 n_report = X.shape[0] / batch_size stop = climin.stops.AfterNIterations(max_iter) pause = climin.stops.ModuloNIterations(n_report) optimizer = 'gd', {'step_rate': 0.001, 'momentum': 0} typ = 'plain' if typ == 'plain': m = Mlp(2099, [800, 800], 15, X, Z, hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared', optimizer=optimizer, batch_size=batch_size, max_iter=max_iter) elif typ == 'fd': m = FastDropoutNetwork(2099, [800, 800], 15, X, Z, TX, TZ, hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared', p_dropout_inpt=.1,
def do_one_eval(X, Z, VX, VZ, step_rate, momentum, decay, c_wd): """ Does one evaluation of a neural network with the above parameters. Parameters ---------- X, Z : matrix Feature and Target matrices of the training set, one-hot encoded. VX, VZ : matrix Feature and Target matrices of the validation set, one-hot encoded. step_rate : float The step-rate/learning rate of the rmsprop-algorithm momentum : float The momentum of the rmsprop. decay : float The step-rate decay c_wd : float Penalty term for the weight Returns ------- val_emp : float The percentage of wrongly classified samples. """ max_passes = 100 batch_size = 250 max_iter = max_passes * X.shape[0] / batch_size n_report = X.shape[0] / batch_size optimizer = 'rmsprop', {'step_rate': step_rate, 'momentum': momentum, 'decay': decay} # This defines our NN. Since BayOpt does not support categorical data, we just # use a fixed hidden layer length and transfer functions. m = Mlp(784, [800], 10, hidden_transfers=['sigmoid'], out_transfer='softmax', loss='cat_ce', optimizer=optimizer, batch_size=batch_size) climin.initialize.randomize_normal(m.parameters.data, 0, 1e-1) losses = [] weight_decay = ((m.parameters.in_to_hidden**2).sum() + (m.parameters.hidden_to_out**2).sum()) weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] c_wd = c_wd m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay n_wrong = 1 - T.eq(T.argmax(m.exprs['output'], axis=1), T.argmax(m.exprs['target'], axis=1)).mean() f_n_wrong = m.function(['inpt', 'target'], n_wrong) stop = climin.stops.AfterNIterations(max_iter) pause = climin.stops.ModuloNIterations(n_report) start = time.time() # Set up a nice printout. keys = '#', 'seconds', 'loss', 'val loss', 'train emp', 'val emp' max_len = max(len(i) for i in keys) header = '\t'.join(i for i in keys) #print header #print '-' * len(header) for i, info in enumerate(m.powerfit((X, Z), (VX, VZ), stop, pause)): passed = time.time() - start losses.append((info['loss'], info['val_loss'])) #img = tile_raster_images(fe.parameters['in_to_hidden'].T, image_dims, feature_dims, (1, 1)) #save_and_display(img, 'filters-%i.png' % i) info.update({ 'time': passed, 'train_emp': f_n_wrong(X, Z), 'val_emp': f_n_wrong(VX, VZ), }) row = '%(n_iter)i\t%(time)g\t%(loss)g\t%(val_loss)g\t%(train_emp)g\t%(val_emp)g' % info # Comment in this row if you want updates during the computation. #print row return info["val_emp"]
def run_mlp(arch, func, step, batch, init, X, Z, VX, VZ, wd): max_passes = 200 batch_size = batch max_iter = max_passes * X.shape[0] / batch_size n_report = X.shape[0] / batch_size input_size = len(X[0]) stop = climin.stops.after_n_iterations(max_iter) pause = climin.stops.modulo_n_iterations(n_report) #optimizer = 'rmsprop', {'steprate': 0.0001, 'momentum': 0.95, 'decay': 0.8} optimizer = 'gd', {'steprate': step} m = Mlp(input_size, arch, 2, hidden_transfers=func, out_transfer='softmax', loss='cat_ce', optimizer=optimizer, batch_size=batch_size) climin.initialize.randomize_normal(m.parameters.data, 0, init) losses = [] print 'max iter', max_iter weight_decay = ((m.parameters.in_to_hidden**2).sum() + (m.parameters.hidden_to_out**2).sum() + (m.parameters.hidden_to_hidden_0**2).sum()) weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] c_wd = wd m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay n_wrong = 1 - T.eq(T.argmax(m.exprs['output'], axis=1), T.argmax(m.exprs['target'], axis=1)).mean() f_n_wrong = m.function(['inpt', 'target'], n_wrong) start = time.time() # Set up a nice printout. keys = '#', 'seconds', 'loss', 'val loss', 'train emp', 'val emp' max_len = max(len(i) for i in keys) header = '\t'.join(i for i in keys) print header print '-' * len(header) results = open('results.txt','a') results.write(header + '\n') results.write('-' * len(header) + '\n') results.close() for i, info in enumerate(m.powerfit((X, Z), (VX, VZ), stop, pause)): if info['n_iter'] % n_report != 0: continue passed = time.time() - start losses.append((info['loss'], info['val_loss'])) info.update({ 'time': passed, 'train_emp': f_n_wrong(X, Z), 'val_emp': f_n_wrong(VX, VZ), }) row = '%(n_iter)i\t%(time)g\t%(loss)g\t%(val_loss)g\t%(train_emp)g\t%(val_emp)g' % info results = open('results.txt','a') print row results.write(row + '\n') results.close() m.parameters.data[...] = info['best_pars'] cp.dump(info['best_pars'],open('best_%s_%s_%s_%s_%s.pkl' %(arch,func,step,batch,init),'w'))
def do_one_eval(X, Z, TX, TZ, test_labels, train_labels, step_rate, momentum, decay, c_wd, counter, opt): seed = 3453 np.random.seed(seed) max_passes = 200 batch_size = 25 max_iter = 5000000 n_report = X.shape[0] / batch_size weights = [] optimizer = 'gd', { 'step_rate': step_rate, 'momentum': momentum, 'decay': decay } stop = climin.stops.AfterNIterations(max_iter) pause = climin.stops.ModuloNIterations(n_report) # This defines our NN. Since BayOpt does not support categorical data, we just # use a fixed hidden layer length and transfer functions. m = Mlp(2100, [400, 100], 1, X, Z, hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared', optimizer=optimizer, batch_size=batch_size, max_iter=max_iter) #climin.initialize.randomize_normal(m.parameters.data, 0, 1e-3) # Transform the test data #TX = m.transformedData(TX) TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0) losses = [] print 'max iter', max_iter m.init_weights() for layer in m.mlp.layers: weights.append(m.parameters[layer.weights]) weight_decay = ((weights[0]**2).sum() + (weights[1]**2).sum() + (weights[2]**2).sum()) weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] c_wd = c_wd m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay mae = T.abs_((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels)) - m.exprs['target']).mean() f_mae = m.function(['inpt', 'target'], mae) rmse = T.sqrt( T.square((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels)) - m.exprs['target']).mean()) f_rmse = m.function(['inpt', 'target'], rmse) start = time.time() # Set up a nice printout. keys = '#', 'seconds', 'loss', 'val loss', 'mae_train', 'rmse_train', 'mae_test', 'rmse_test' max_len = max(len(i) for i in keys) header = '\t'.join(i for i in keys) print header print '-' * len(header) results = open('result.txt', 'a') results.write(header + '\n') results.write('-' * len(header) + '\n') results.write("%f %f %f %f %s" % (step_rate, momentum, decay, c_wd, opt)) results.write('\n') results.close() EXP_DIR = os.getcwd() base_path = os.path.join(EXP_DIR, "pars_hp_" + opt + str(counter) + ".pkl") n_iter = 0 if os.path.isfile(base_path): with open("pars_hp_" + opt + str(counter) + ".pkl", 'rb') as tp: n_iter, best_pars = dill.load(tp) m.parameters.data[...] = best_pars for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)): if info['n_iter'] % n_report != 0: continue passed = time.time() - start if math.isnan(info['loss']) == True: info.update({'mae_test': f_mae(TX, test_labels)}) n_iter = info['n_iter'] break losses.append((info['loss'], info['val_loss'])) info.update({ 'time': passed, 'mae_train': f_mae(m.transformedData(X), train_labels), 'rmse_train': f_rmse(m.transformedData(X), train_labels), 'mae_test': f_mae(TX, test_labels), 'rmse_test': f_rmse(TX, test_labels) }) info['n_iter'] += n_iter row = '%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g' % info results = open('result.txt', 'a') print row results.write(row + '\n') results.close() with open("pars_hp_" + opt + str(counter) + ".pkl", 'wb') as fp: dill.dump((info['n_iter'], info['best_pars']), fp) with open("apsis_pars_" + opt + str(counter) + ".pkl", 'rb') as fp: LAss, opt, step_rate, momentum, decay, c_wd, counter, n_iter1, result1 = dill.load( fp) n_iter1 = info['n_iter'] result1 = info['mae_test'] with open("apsis_pars_" + opt + str(counter) + ".pkl", 'wb') as fp: dill.dump((LAss, opt, step_rate, momentum, decay, c_wd, counter, n_iter1, result1), fp) return info['mae_test'], info['n_iter']
def do_one_eval(X, Z, VX, VZ, step_rate, momentum, decay, c_wd): """ Does one evaluation of a neural network with the above parameters. Parameters ---------- X, Z : matrix Feature and Target matrices of the training set, one-hot encoded. VX, VZ : matrix Feature and Target matrices of the validation set, one-hot encoded. step_rate : float The step-rate/learning rate of the rmsprop-algorithm momentum : float The momentum of the rmsprop. decay : float The step-rate decay c_wd : float Penalty term for the weight Returns ------- val_emp : float The percentage of wrongly classified samples. """ max_passes = 100 batch_size = 250 max_iter = max_passes * X.shape[0] / batch_size n_report = X.shape[0] / batch_size optimizer = 'rmsprop', { 'step_rate': step_rate, 'momentum': momentum, 'decay': decay } # This defines our NN. Since BayOpt does not support categorical data, we just # use a fixed hidden layer length and transfer functions. m = Mlp(784, [800], 10, hidden_transfers=['sigmoid'], out_transfer='softmax', loss='cat_ce', optimizer=optimizer, batch_size=batch_size) climin.initialize.randomize_normal(m.parameters.data, 0, 1e-1) losses = [] weight_decay = ((m.parameters.in_to_hidden**2).sum() + (m.parameters.hidden_to_out**2).sum()) weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] c_wd = c_wd m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay n_wrong = 1 - T.eq(T.argmax(m.exprs['output'], axis=1), T.argmax(m.exprs['target'], axis=1)).mean() f_n_wrong = m.function(['inpt', 'target'], n_wrong) stop = climin.stops.AfterNIterations(max_iter) pause = climin.stops.ModuloNIterations(n_report) start = time.time() # Set up a nice printout. keys = '#', 'seconds', 'loss', 'val loss', 'train emp', 'val emp' max_len = max(len(i) for i in keys) header = '\t'.join(i for i in keys) #print header #print '-' * len(header) for i, info in enumerate(m.powerfit((X, Z), (VX, VZ), stop, pause)): passed = time.time() - start losses.append((info['loss'], info['val_loss'])) #img = tile_raster_images(fe.parameters['in_to_hidden'].T, image_dims, feature_dims, (1, 1)) #save_and_display(img, 'filters-%i.png' % i) info.update({ 'time': passed, 'train_emp': f_n_wrong(X, Z), 'val_emp': f_n_wrong(VX, VZ), }) row = '%(n_iter)i\t%(time)g\t%(loss)g\t%(val_loss)g\t%(train_emp)g\t%(val_emp)g' % info # Comment in this row if you want updates during the computation. #print row return info["val_emp"]
def __init__(self): with open('config.txt', 'r') as config_f: for line in config_f: if not line.find('mode='): self.mode = line.replace('mode=', '').replace('\n', '') if not line.find('robust='): self.robust = line.replace('robust=', '').replace('\n', '') print 'mode=%s\nrobustness=%s' %(self.mode, self.robust) if self.robust == 'majority': self.pred_count = 0 self.predictions = np.zeros((13,)) if self.robust == 'markov': self.markov = Markov_Chain() self.last_state = 0 self.current_state = 0 if self.robust == 'markov_2nd': self.markov = Markov_Chain_2nd() self.pre_last_state = 0 self.last_state = 0 self.current_state = 0 self.sample_count = 0 self.sample = [] if self.mode == 'cnn': self.bin_cm = 10 self.max_x_cm = 440 self.min_x_cm = 70 self.max_y_cm = 250 self.max_z_cm = 200 self.nr_z_intervals = 2 self.x_range = (self.max_x_cm - self.min_x_cm)/self.bin_cm self.y_range = self.max_y_cm*2/self.bin_cm self.z_range = self.nr_z_intervals self.input_size = 3700 self.output_size = 13 self.n_channels = 2 self.im_width = self.y_range self.im_height = self.x_range print 'initializing cnn model.' self.model = Cnn(self.input_size, [16, 32], [200, 200], self.output_size, ['tanh', 'tanh'], ['tanh', 'tanh'], 'softmax', 'cat_ce', image_height=self.im_height, image_width=self.im_width, n_image_channel=self.n_channels, pool_size=[2, 2], filter_shapes=[[5, 5], [5, 5]], batch_size=1) self.model.parameters.data[...] = cp.load(open('./best_cnn_pars.pkl', 'rb')) if self.mode == 'crafted': self.input_size = 156 self.output_size = 13 self.means = cp.load(open('means_crafted.pkl', 'rb')) self.stds = cp.load(open('stds_crafted.pkl', 'rb')) print 'initializing crafted features model.' self.model = Mlp(self.input_size, [1000, 1000], self.output_size, ['tanh', 'tanh'], 'softmax', 'cat_ce', batch_size=1) self.model.parameters.data[...] = cp.load(open('./best_crafted_pars.pkl', 'rb')) # this is just a trick to make the internal C-functions compile before the first real sample arrives compile_sample = np.random.random((1,self.input_size)) self.model.predict(compile_sample) print 'starting to listen to topic.' self.listener()
def test_mlp_predict(): X = np.random.standard_normal((10, 2)) X, = theano_floatx(X) mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10) mlp.predict(X)
def do_one_eval(X, Z, TX, TZ, test_labels, train_labels, step_rate, momentum, decay, c_wd, counter, opt): seed = 3453 np.random.seed(seed) max_passes = 200 batch_size = 25 max_iter = 5000000 n_report = X.shape[0] / batch_size weights = [] optimizer = 'gd', {'step_rate': step_rate, 'momentum': momentum, 'decay': decay} stop = climin.stops.AfterNIterations(max_iter) pause = climin.stops.ModuloNIterations(n_report) # This defines our NN. Since BayOpt does not support categorical data, we just # use a fixed hidden layer length and transfer functions. m = Mlp(2100, [400, 100], 1, X, Z, hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared', optimizer=optimizer, batch_size=batch_size, max_iter=max_iter) #climin.initialize.randomize_normal(m.parameters.data, 0, 1e-3) # Transform the test data #TX = m.transformedData(TX) TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0) losses = [] print 'max iter', max_iter m.init_weights() for layer in m.mlp.layers: weights.append(m.parameters[layer.weights]) weight_decay = ((weights[0]**2).sum() + (weights[1]**2).sum() + (weights[2]**2).sum()) weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] c_wd = c_wd m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay mae = T.abs_((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels))- m.exprs['target']).mean() f_mae = m.function(['inpt', 'target'], mae) rmse = T.sqrt(T.square((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels))- m.exprs['target']).mean()) f_rmse = m.function(['inpt', 'target'], rmse) start = time.time() # Set up a nice printout. keys = '#', 'seconds', 'loss', 'val loss', 'mae_train', 'rmse_train', 'mae_test', 'rmse_test' max_len = max(len(i) for i in keys) header = '\t'.join(i for i in keys) print header print '-' * len(header) results = open('result.txt', 'a') results.write(header + '\n') results.write('-' * len(header) + '\n') results.write("%f %f %f %f %s" %(step_rate, momentum, decay, c_wd, opt)) results.write('\n') results.close() EXP_DIR = os.getcwd() base_path = os.path.join(EXP_DIR, "pars_hp_"+opt+str(counter)+".pkl") n_iter = 0 if os.path.isfile(base_path): with open("pars_hp_"+opt+str(counter)+".pkl", 'rb') as tp: n_iter, best_pars = dill.load(tp) m.parameters.data[...] = best_pars for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)): if info['n_iter'] % n_report != 0: continue passed = time.time() - start if math.isnan(info['loss']) == True: info.update({'mae_test': f_mae(TX, test_labels)}) n_iter = info['n_iter'] break losses.append((info['loss'], info['val_loss'])) info.update({ 'time': passed, 'mae_train': f_mae(m.transformedData(X), train_labels), 'rmse_train': f_rmse(m.transformedData(X), train_labels), 'mae_test': f_mae(TX, test_labels), 'rmse_test': f_rmse(TX, test_labels) }) info['n_iter'] += n_iter row = '%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g' % info results = open('result.txt','a') print row results.write(row + '\n') results.close() with open("pars_hp_"+opt+str(counter)+".pkl", 'wb') as fp: dill.dump((info['n_iter'], info['best_pars']), fp) with open("apsis_pars_"+opt+str(counter)+".pkl", 'rb') as fp: LAss, opt, step_rate, momentum, decay, c_wd, counter, n_iter1, result1 = dill.load(fp) n_iter1 = info['n_iter'] result1 = info['mae_test'] with open("apsis_pars_"+opt+str(counter)+".pkl", 'wb') as fp: dill.dump((LAss, opt, step_rate, momentum, decay, c_wd, counter, n_iter1, result1), fp) return info['mae_test'], info['n_iter']
nets = [ f for f in listdir(path) if isfile(join(path,f)) and not f.find('best') ] best_error = np.inf best_net = '' for net in nets: file = net net = net.replace('.pkl','') net = net.replace('best_','') net = net.replace('[','') net = net.replace(']','') net = net.split('_') arch = [int(n) for n in net[0].split(',')] func = [n.replace(' ','')[1:-1] for n in net[1].split(',')] batch_size = int(net[3]) optimizer = 'gd', {'steprate': 0.1} m = Mlp(input_size, arch, 2, hidden_transfers=func, out_transfer='softmax', loss='cat_ce', optimizer=optimizer, batch_size=batch_size) best_pars = cp.load(open(file,'r')) m.parameters.data[...] = best_pars n_wrong = 1 - T.eq(T.argmax(m.exprs['output'], axis=1), T.argmax(m.exprs['target'], axis=1)).mean() f_n_wrong = m.function(['inpt', 'target'], n_wrong) error = f_n_wrong(VX,VZ) if error < best_error: best_error = error best_net = net print 'loaded best parameters from file %s' % net print 'percentage of misclassified samples on validation/test set: %f' % error print 'the best net found was ' + str(net) + ' with an error of %f ' % error
def run_mlp(arch, func, step, batch, X, Z, TX, TZ, wd, opt): batch_size = batch #max_iter = max_passes * X.shape[ 0] / batch_size max_iter = 100000 n_report = X.shape[0] / batch_size weights = [] input_size = len(X[0]) train_labels = Z test_labels = TZ stop = climin.stops.AfterNIterations(max_iter) pause = climin.stops.ModuloNIterations(n_report) optimizer = opt, {'step_rate': step} typ = 'plain' if typ == 'plain': m = Mlp(input_size, arch, 1, X, Z, hidden_transfers=func, out_transfer='identity', loss='squared', optimizer=optimizer, batch_size=batch_size, max_iter=max_iter) elif typ == 'fd': m = FastDropoutNetwork(2099, [400, 100], 1, X, Z, TX, TZ, hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared', p_dropout_inpt=.1, p_dropout_hiddens=.2, optimizer=optimizer, batch_size=batch_size, max_iter=max_iter) climin.initialize.randomize_normal(m.parameters.data, 0, 1 / np.sqrt(m.n_inpt)) # Transform the test data #TX = m.transformedData(TX) TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0) losses = [] print 'max iter', max_iter m.init_weights() X, Z, TX, TZ = [breze.learn.base.cast_array_to_local_type(i) for i in (X, Z, TX, TZ)] for layer in m.mlp.layers: weights.append(m.parameters[layer.weights]) weight_decay = ((weights[0]**2).sum() + (weights[1]**2).sum() + (weights[2]**2).sum() + (weights[3]**2).sum() ) weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] c_wd = wd m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay ''' weight_decay = ((m.parameters.in_to_hidden**2).sum() + (m.parameters.hidden_to_out**2).sum() + (m.parameters.hidden_to_hidden_0**2).sum()) weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] c_wd = 0.1 m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay ''' mae = T.abs_((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels))- m.exprs['target']).mean() f_mae = m.function(['inpt', 'target'], mae) rmse = T.sqrt(T.square((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels))- m.exprs['target']).mean()) f_rmse = m.function(['inpt', 'target'], rmse) start = time.time() # Set up a nice printout. keys = '#', 'seconds', 'loss', 'val loss', 'mae_train', 'rmse_train', 'mae_test', 'rmse_test' max_len = max(len(i) for i in keys) header = '\t'.join(i for i in keys) print header print '-' * len(header) results = open('result.txt', 'a') results.write(header + '\n') results.write('-' * len(header) + '\n') results.close() for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)): if info['n_iter'] % n_report != 0: continue passed = time.time() - start losses.append((info['loss'], info['val_loss'])) info.update({ 'time': passed, 'mae_train': f_mae(m.transformedData(X), train_labels), 'rmse_train': f_rmse(m.transformedData(X), train_labels), 'mae_test': f_mae(TX, test_labels), 'rmse_test': f_rmse(TX, test_labels) }) row = '%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g' % info results = open('result.txt','a') print row results.write(row + '\n') results.close() m.parameters.data[...] = info['best_pars'] cp.dump(info['best_pars'], open('best_pars.pkl', 'w')) Y = m.predict(m.transformedData(X)) TY = m.predict(TX) output_train = Y * np.std(train_labels) + np.mean(train_labels) output_test = TY * np.std(train_labels) + np.mean(train_labels) print 'TRAINING SET\n' print('MAE: %5.2f kcal/mol'%np.abs(output_train - train_labels).mean(axis=0)) print('RMSE: %5.2f kcal/mol'%np.square(output_train - train_labels).mean(axis=0) ** .5) print 'TESTING SET\n' print('MAE: %5.2f kcal/mol'%np.abs(output_test - test_labels).mean(axis=0)) print('RMSE: %5.2f kcal/mol'%np.square(output_test - test_labels).mean(axis=0) ** .5) mae_train = np.abs(output_train - train_labels).mean(axis=0) rmse_train = np.square(output_train - train_labels).mean(axis=0) ** .5 mae_test = np.abs(output_test - test_labels).mean(axis=0) rmse_test = np.square(output_test - test_labels).mean(axis=0) ** .5 results = open('result.txt', 'a') results.write('Training set:\n') results.write('MAE:\n') results.write("%5.2f" %mae_train) results.write('\nRMSE:\n') results.write("%5.2f" %rmse_train) results.write('\nTesting set:\n') results.write('MAE:\n') results.write("%5.2f" %mae_test) results.write('\nRMSE:\n') results.write("%5.2f" %rmse_test) results.close()
stop = climin.stops.AfterNIterations(max_iter) pause = climin.stops.ModuloNIterations(n_report) optimizer = "gd", {"step_rate": 0.001, "momentum": 0} typ = "plain" if typ == "plain": m = Mlp( 2099, [400, 100], 1, X, Z, hidden_transfers=["tanh", "tanh"], out_transfer="identity", loss="squared", optimizer=optimizer, batch_size=batch_size, max_iter=max_iter, ) elif typ == "fd": m = FastDropoutNetwork( 2099, [400, 100], 1, X, Z, TX, TZ,
batch_size = 25 #max_iter = max_passes * X.shape[ 0] / batch_size max_iter = 75000000 n_report = X.shape[0] / batch_size stop = climin.stops.AfterNIterations(max_iter) pause = climin.stops.ModuloNIterations(n_report) optimizer = 'gd', {'step_rate': 0.001, 'momentum': 0} typ = 'plain' if typ == 'plain': m = Mlp(2099, [800, 800], 15, X, Z, hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared', optimizer=optimizer, batch_size=batch_size, max_iter=max_iter) elif typ == 'fd': m = FastDropoutNetwork(2099, [800, 800], 15, X, Z, TX, TZ, hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared', p_dropout_inpt=.1, p_dropout_hiddens=.2, optimizer=optimizer, batch_size=batch_size, max_iter=max_iter) #climin.initialize.randomize_normal(m.parameters.data, 0, 1 / np.sqrt(m.n_inpt)) m.init_weights() #Transform the test data #TX = m.transformedData(TX) TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0)
def run_mlp(func, step, momentum, X, Z, TX, TZ, wd, opt, counter): print func, step, momentum, wd, opt, counter seed = 3453 np.random.seed(seed) batch_size = 25 # max_iter = max_passes * X.shape[ 0] / batch_size max_iter = 25000000 n_report = X.shape[0] / batch_size weights = [] input_size = len(X[0]) stop = climin.stops.AfterNIterations(max_iter) pause = climin.stops.ModuloNIterations(n_report) optimizer = opt, {"step_rate": step, "momentum": momentum} typ = "plain" if typ == "plain": m = Mlp( input_size, [400, 100], 1, X, Z, hidden_transfers=func, out_transfer="identity", loss="squared", optimizer=optimizer, batch_size=batch_size, max_iter=max_iter, ) elif typ == "fd": m = FastDropoutNetwork( 2099, [400, 100], 1, X, Z, TX, TZ, hidden_transfers=["tanh", "tanh"], out_transfer="identity", loss="squared", p_dropout_inpt=0.1, p_dropout_hiddens=0.2, optimizer=optimizer, batch_size=batch_size, max_iter=max_iter, ) # climin.initialize.randomize_normal(m.parameters.data, 0, 1 / np.sqrt(m.n_inpt)) # Transform the test data # TX = m.transformedData(TX) TX = np.array([m.transformedData(TX) for _ in range(10)]).mean(axis=0) print TX.shape losses = [] print "max iter", max_iter m.init_weights() X, Z, TX, TZ = [breze.learn.base.cast_array_to_local_type(i) for i in (X, Z, TX, TZ)] for layer in m.mlp.layers: weights.append(m.parameters[layer.weights]) weight_decay = (weights[0] ** 2).sum() + (weights[1] ** 2).sum() + (weights[2] ** 2).sum() weight_decay /= m.exprs["inpt"].shape[0] m.exprs["true_loss"] = m.exprs["loss"] c_wd = wd m.exprs["loss"] = m.exprs["loss"] + c_wd * weight_decay mae = T.abs_((m.exprs["output"] * np.std(train_labels) + np.mean(train_labels)) - m.exprs["target"]).mean() f_mae = m.function(["inpt", "target"], mae) rmse = T.sqrt( T.square((m.exprs["output"] * np.std(train_labels) + np.mean(train_labels)) - m.exprs["target"]).mean() ) f_rmse = m.function(["inpt", "target"], rmse) start = time.time() # Set up a nice printout. keys = "#", "seconds", "loss", "val loss", "mae_train", "rmse_train", "mae_test", "rmse_test" max_len = max(len(i) for i in keys) header = "\t".join(i for i in keys) print header print "-" * len(header) results = open("result_hp.txt", "a") results.write(header + "\n") results.write("-" * len(header) + "\n") results.close() EXP_DIR = os.getcwd() base_path = os.path.join(EXP_DIR, "pars_hp" + str(counter) + ".pkl") n_iter = 0 if os.path.isfile(base_path): with open("pars_hp" + str(counter) + ".pkl", "rb") as tp: n_iter, best_pars = cp.load(tp) m.parameters.data[...] = best_pars for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)): if info["n_iter"] % n_report != 0: continue passed = time.time() - start losses.append((info["loss"], info["val_loss"])) info.update( { "time": passed, "mae_train": f_mae(m.transformedData(X), train_labels), "rmse_train": f_rmse(m.transformedData(X), train_labels), "mae_test": f_mae(TX, test_labels), "rmse_test": f_rmse(TX, test_labels), } ) info["n_iter"] += n_iter row = ( "%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g" % info ) results = open("result_hp.txt", "a") print row results.write(row + "\n") results.close() with open("pars_hp" + str(counter) + ".pkl", "wb") as fp: cp.dump((info["n_iter"], info["best_pars"]), fp) with open("hps" + str(counter) + ".pkl", "wb") as tp: cp.dump((func, step, momentum, wd, opt, counter, info["n_iter"]), tp) m.parameters.data[...] = info["best_pars"] cp.dump(info["best_pars"], open("best_pars.pkl", "wb")) Y = m.predict(m.transformedData(X)) TY = m.predict(TX) output_train = Y * np.std(train_labels) + np.mean(train_labels) output_test = TY * np.std(train_labels) + np.mean(train_labels) print "TRAINING SET\n" print ("MAE: %5.2f kcal/mol" % np.abs(output_train - train_labels).mean(axis=0)) print ("RMSE: %5.2f kcal/mol" % np.square(output_train - train_labels).mean(axis=0) ** 0.5) print "TESTING SET\n" print ("MAE: %5.2f kcal/mol" % np.abs(output_test - test_labels).mean(axis=0)) print ("RMSE: %5.2f kcal/mol" % np.square(output_test - test_labels).mean(axis=0) ** 0.5) mae_train = np.abs(output_train - train_labels).mean(axis=0) rmse_train = np.square(output_train - train_labels).mean(axis=0) ** 0.5 mae_test = np.abs(output_test - test_labels).mean(axis=0) rmse_test = np.square(output_test - test_labels).mean(axis=0) ** 0.5 results = open("result_hp.txt", "a") results.write("Training set:\n") results.write("MAE:\n") results.write("%5.2f" % mae_train) results.write("\nRMSE:\n") results.write("%5.2f" % rmse_train) results.write("\nTesting set:\n") results.write("MAE:\n") results.write("%5.2f" % mae_test) results.write("\nRMSE:\n") results.write("%5.2f" % rmse_test) results.close()
TZ = one_hot(TZ, 10) image_dims = 28, 28 max_passes = 150 batch_size = 250 max_iter = max_passes * X.shape[0] / batch_size n_report = X.shape[0] / batch_size stop = climin.stops.AfterNIterations(max_iter) pause = climin.stops.ModuloNIterations(n_report) #optimizer = 'rmsprop', {'steprate': 0.0001, 'momentum': 0.95, 'decay': 0.8} optimizer = 'gd', {'steprate': 0.1} m = Mlp(784, [800], 10, hidden_transfers=['sigmoid'], out_transfer='softmax', loss='cat_ce', optimizer=optimizer, batch_size=batch_size) climin.initialize.randomize_normal(m.parameters.data, 0, 1e-1) losses = [] print 'max iter', max_iter weight_decay = ((m.parameters.in_to_hidden**2).sum() + (m.parameters.hidden_to_out**2).sum()) weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] c_wd = 0.001 m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay n_wrong = 1 - T.eq(T.argmax(m.exprs['output'], axis=1), T.argmax(m.exprs['target'], axis=1)).mean() f_n_wrong = m.function(['inpt', 'target'], n_wrong)
class Predictor: # initialize the object def __init__(self): with open('config.txt', 'r') as config_f: for line in config_f: if not line.find('mode='): self.mode = line.replace('mode=', '').replace('\n', '') if not line.find('robust='): self.robust = line.replace('robust=', '').replace('\n', '') print 'mode=%s\nrobustness=%s' %(self.mode, self.robust) if self.robust == 'majority': self.pred_count = 0 self.predictions = np.zeros((13,)) if self.robust == 'markov': self.markov = Markov_Chain() self.last_state = 0 self.current_state = 0 if self.robust == 'markov_2nd': self.markov = Markov_Chain_2nd() self.pre_last_state = 0 self.last_state = 0 self.current_state = 0 self.sample_count = 0 self.sample = [] if self.mode == 'cnn': self.bin_cm = 10 self.max_x_cm = 440 self.min_x_cm = 70 self.max_y_cm = 250 self.max_z_cm = 200 self.nr_z_intervals = 2 self.x_range = (self.max_x_cm - self.min_x_cm)/self.bin_cm self.y_range = self.max_y_cm*2/self.bin_cm self.z_range = self.nr_z_intervals self.input_size = 3700 self.output_size = 13 self.n_channels = 2 self.im_width = self.y_range self.im_height = self.x_range print 'initializing cnn model.' self.model = Cnn(self.input_size, [16, 32], [200, 200], self.output_size, ['tanh', 'tanh'], ['tanh', 'tanh'], 'softmax', 'cat_ce', image_height=self.im_height, image_width=self.im_width, n_image_channel=self.n_channels, pool_size=[2, 2], filter_shapes=[[5, 5], [5, 5]], batch_size=1) self.model.parameters.data[...] = cp.load(open('./best_cnn_pars.pkl', 'rb')) if self.mode == 'crafted': self.input_size = 156 self.output_size = 13 self.means = cp.load(open('means_crafted.pkl', 'rb')) self.stds = cp.load(open('stds_crafted.pkl', 'rb')) print 'initializing crafted features model.' self.model = Mlp(self.input_size, [1000, 1000], self.output_size, ['tanh', 'tanh'], 'softmax', 'cat_ce', batch_size=1) self.model.parameters.data[...] = cp.load(open('./best_crafted_pars.pkl', 'rb')) # this is just a trick to make the internal C-functions compile before the first real sample arrives compile_sample = np.random.random((1,self.input_size)) self.model.predict(compile_sample) print 'starting to listen to topic.' self.listener() # build the full samples from the arriving point clouds def build_samples(self, sample_part): for point in read_points(sample_part): self.sample.append(point) self.sample_count += 1 if self.sample_count == 6: if self.mode == 'cnn': self.cnn_predict() if self.mode == 'crafted': self.crafted_predict() self.sample = [] self.sample_count = 0 # start listening to the point cloud topic def listener(self): rospy.init_node('listener', anonymous=True) rospy.Subscriber("/USArray_pc", PointCloud2, self.build_samples) rospy.spin() # let the model predict the output def cnn_predict(self): grid = np.zeros((self.z_range, self.x_range, self.y_range)) for point in self.sample: if point[0]*100 < self.min_x_cm or point[0]*100 > self.max_x_cm-1 or point[1]*100 > self.max_y_cm-1 or point[1]*100 < -self.max_y_cm: continue x = (int(point[0]*100) - self.min_x_cm) / self.bin_cm y = (int(point[1]*100) + self.max_y_cm) / self.bin_cm z = int(point[2]*100) > (self.max_z_cm / self.nr_z_intervals) pow = point[4] if grid[z][x][y] != 0: if grid[z][x][y] < pow: grid[z][x][y] = pow else: grid[z][x][y] = pow grid = np.reshape(grid,(1,-1)) self.output_prediction(self.model.predict(grid)) # let the model predict the output def crafted_predict(self): vec = np.zeros((156,), dtype=np.float32) area_points = [[] for _ in np.arange(12)] area_counts = np.zeros(12) area_x_means = np.zeros(12) area_y_means = np.zeros(12) area_z_means = np.zeros(12) area_highest = np.zeros(12) area_highest_pow = np.zeros(12) area_pow_means = np.zeros(12) area_x_vars = np.zeros(12) area_y_vars = np.zeros(12) area_z_vars = np.zeros(12) area_xy_covars = np.zeros(12) area_xz_covars = np.zeros(12) area_yz_covars = np.zeros(12) bad = False for qpoint in self.sample: # need to substract -1 since the function returns the value starting with 1 label = determine_label((float(qpoint[0]), float(qpoint[1]), float(qpoint[2])))-1 area_points[label].append(qpoint) area_counts[label] += 1 if float(qpoint[2]) > area_highest[label]: area_highest[label] = float(qpoint[2]) if float(qpoint[4]) > area_highest_pow[label]: area_highest_pow[label] = float(qpoint[4]) for area in np.arange(12): for point in area_points[area]: area_x_means[area] += float(point[0]) area_y_means[area] += float(point[1]) area_z_means[area] += float(point[2]) area_pow_means[area] += float(point[4]) if area_counts[area] > 0: area_x_means[area] /= area_counts[area] area_y_means[area] /= area_counts[area] area_z_means[area] /= area_counts[area] area_pow_means[area] /= area_pow_means[area] for point in area_points[area]: area_x_vars[area] += (float(point[0]) - area_x_means[area])**2 area_y_vars[area] += (float(point[1]) - area_y_means[area])**2 area_z_vars[area] += (float(point[2]) - area_z_means[area])**2 # if there is only one point, we assume the uncorrected estimator and implicitly divide by one if area_counts[area] > 1: area_x_vars[area] *= 1/(area_counts[area]-1) area_y_vars[area] *= 1/(area_counts[area]-1) area_z_vars[area] *= 1/(area_counts[area]-1) for point in area_points[area]: area_xy_covars[area] += (float(point[0]) - area_x_means[area])*(float(point[1]) - area_y_means[area]) area_xz_covars[area] += (float(point[0]) - area_x_means[area])*(float(point[2]) - area_z_means[area]) area_yz_covars[area] += (float(point[1]) - area_y_means[area])*(float(point[2]) - area_z_means[area]) # if there is only one point, we assume the uncorrected estimator and implicitly divide by one if area_counts[area] > 1: area_xy_covars[area] *= 1/(area_counts[area]-1) area_xz_covars[area] *= 1/(area_counts[area]-1) area_yz_covars[area] *= 1/(area_counts[area]-1) for area in np.arange(12): vec[area*11] = area_counts[area] vec[area*11+1] = area_x_means[area] vec[area*11+2] = area_y_means[area] vec[area*11+3] = area_z_means[area] vec[area*11+4] = area_x_vars[area] vec[area*11+5] = area_y_vars[area] vec[area*11+6] = area_z_vars[area] vec[area*11+7] = area_xy_covars[area] vec[area*11+8] = area_xz_covars[area] vec[area*11+9] = area_yz_covars[area] vec[area*11+10] = area_highest[area] vec[area*11+11] = area_highest_pow[area] vec[area*11+12] = area_pow_means[area] vec = np.reshape(vec, (1, 156)) vec -= self.means vec /= self.stds self.output_prediction(self.model.predict(vec)) # create the output def output_prediction(self, probabilites): if self.robust == 'majority': prediction = np.argmax(probabilites) # majority vote among the last three predictions self.predictions[prediction] += 1 self.pred_count += 1 if self.pred_count == 3: print 'majority prediction: %d' %np.argmax(self.predictions) self.pred_count = 0 self.predictions = np.zeros((13,)) if self.robust == 'markov': markov_probs = self.markov.transition_table[self.last_state] probabilites *= markov_probs probabilites /= np.sum(probabilites) prediction = np.argmax(probabilites) print 'markov prediction: %d' %prediction self.last_state = prediction if self.robust == 'markov_2nd': markov_probs = self.markov.transition_table[self.pre_last_state][self.last_state] probabilites *= markov_probs probabilites /= np.sum(probabilites) prediction = np.argmax(probabilites) print 'markov 2nd order prediction: %d' %prediction self.pre_last_state = self.last_state self.last_state = prediction if self.robust == 'off': prediction = np.argmax(probabilites) print 'fast prediction: %d' %prediction