class Classifier: def __init__(self): self.mlp = None def predict_input_img(self, img): plt.imshow(img, cmap="hot", interpolation="nearest") plt.show() preprocessing_normalized = preprocessing( dataset=tf.keras.datasets.mnist.load_data(), symetric_dataset=True, dimension_reduction=False, data_whitening=False, normalize=True, ) X_train, y_train, X_test, y_test = preprocessing_normalized.preprocess_mnist( [img] ) if self.mlp is None: print("Training the model.\nThis will only happen once.") self.mlp = MLP(ReLU, X_train.shape[1], [128, 128]) gen = self.mlp.fit(X_train, y_train, 25, 50) ls = list(gen) pred = self.mlp.predict(X_test) print(f"Our MultiLayer Perceptron thinks the digit is: {pred[0]}") ascii_banner = pyfiglet.figlet_format(str(pred[0])) print(ascii_banner)
def load_checkpoint(filename, Model, MiddleModel: nn.Module = None): with bz2.BZ2File(filename, "rb") as f: obj = pickle.load(f) pose_autoencoder = MLP.load_checkpoint(obj["pose_autoencoder_path"]) cost_encoder = MLP.load_checkpoint(obj["cost_encoder_path"]) generationModel = Model.load_checkpoint( obj["motionGenerationModelPath"]) model = MotionGenerationModelRNN(config=obj["config"], feature_dims=obj["feature_dims"], input_slicers=obj["in_slices"], output_slicers=obj["out_slices"], name=obj["name"]) if MiddleModel is None: MiddleModel = nn.Linear( in_features=pose_autoencoder.dimensions[-1], out_features=pose_autoencoder.dimensions[-11]) MiddleModel.load_state_dict(obj["middle_layer_dict"]) model.in_slices = obj["in_slices"] model.out_slices = obj["out_slices"] model.pose_autoencoder = pose_autoencoder model.cost_encoder = cost_encoder model.generationModel = generationModel return model
def Main(): x_train = pd.read_csv('Dataset/xtrain_3spirals.txt', sep=' ', header=None) x_test = pd.read_csv('Dataset/xtest_3spirals.txt', sep=' ', header=None) d_train = pd.read_csv('Dataset/dtrain_3spirals.txt', sep=',', header=None) d_test = pd.read_csv('Dataset/dtest_3spirals.txt', sep=',', header=None) ## Aplication of MLP algorithm mlp = MLP(15000, 0.15, 0.000001, [4, 3], 0.5) mlp.train(x_train.to_numpy(), d_train.to_numpy()) new_classes = mlp.application(x_test.to_numpy()) comparative = np.concatenate((d_test.to_numpy(), new_classes), 1) print("Matrix of comparative between classes") print(comparative) print("------------------------------") hit_table = np.zeros((len(new_classes), 1)) for row in range(len(new_classes)): if all(d_test.to_numpy()[row] == new_classes[row]): hit_table[row] = 1 tax_hit = sum(hit_table) / len(new_classes) print("------------------------------") print("Matrix of hits") print(hit_table) print("------------------------------") print("Tax of hits: " + str(tax_hit))
def __init__(self): self.type = None self.nn = MLP() self.training_method = None self.activation_function = None self.dropout_rate = 0.0 self.training = True self.learning_rate = 0.1 self.fitness_threshold = 0.75 self.epoch_threshold = -1 self.batch_size = 100 self.shuffle_rate = 2500 self.display_step = 1000 self.epoch = 0 self.layers = [] self.data_set = None self.bed = BinaryEncoderDecoder() self.utils = Utilities() self.debug_mode = False # Plotting variables self.losses = [] self.fitnesses = [] self.iterations = [] self.save_location = './nn/log/'
def __init__(self, x, model_file): """ Sampling works as follows. You feed it a model and a dataset, and model_files. The model_files allow you to load in models from different checkpoints. A feed through function is compiled that samples from the test dataset. It calculates the error and the output for each element in test dataset. It generates two distributions -- output for signal, and output for background. args: model: MLP object dataset Dataset object model_files: list of files corresponding to saved models """ self.model_file = model_file self.param = self.detect_params(self.model_file) self.dataset = Dataset(self.param['dataset']) self.dataset.set_indexing(self.param['indexing']) self.shared_train_x = self.dataset.train_x self.shared_train_y = self.dataset.train_y self.shared_test_x = self.dataset.test_x self.shared_test_y = self.dataset.test_y try: self.train_labels = self.dataset.train_labels self.test_labels = self.dataset.test_labels except AttributeError: print( "You're used a dataset without labels. You won't be able to call gen_labeled_outputs" ) mlp = MLP(x, [self.param['h0'], self.param['h1'], 2], np.random.RandomState(1234), transfer_func=T.nnet.relu) mlp.load_params(self.model_file, mode='hdf5') self.model = mlp self.predicted = dict()
def __init__(self, word_embeddings_dim, tag_embeddings_dim, vocabulary_size, tag_uniqueCount, label_uniqueCount, pretrainedWordEmbeddings=None, pretrainedTagEmbeddings=None): super().__init__() self.word_embeddings = nn.Embedding(vocabulary_size, word_embeddings_dim) if pretrainedWordEmbeddings.any(): assert pretrainedWordEmbeddings.shape == (vocabulary_size, word_embeddings_dim) self.word_embeddings.weight.data.copy_(torch.from_numpy(pretrainedWordEmbeddings)) self.tag_embeddings = nn.Embedding(tag_uniqueCount, tag_embeddings_dim) if pretrainedTagEmbeddings.any(): assert pretrainedTagEmbeddings.shape == (tag_uniqueCount, tag_embeddings_dim) self.tag_embeddings.weight.data.copy_(torch.from_numpy(pretrainedTagEmbeddings)) # Save computation time by not training already trained word vectors # disableTrainingForEmbeddings(self.word_embeddings, self.tag_embeddings) # Now we need to train the embeddings for <root> and <unk> self.inputSize = word_embeddings_dim + tag_embeddings_dim # The number of expected features in the input x self.hiddenSize = self.inputSize #* 2 # 512? is this the same as outputSize? self.nLayers = 2 self.biLstm = nn.LSTM(self.inputSize, self.hiddenSize, self.nLayers, bidirectional=True) self.nDirections = 2 self.batch = 1 # this is per recommendation # Input size of the MLP for arcs scores is the size of the output from previous step concatenated with another of the same size biLstmOutputSize = self.hiddenSize * self.nDirections mlpForScoresInputSize = biLstmOutputSize * 2 self.mlpArcsScores = MLP(mlpForScoresInputSize, hidden_size=mlpForScoresInputSize, output_size=1) # MLP for labels self.label_uniqueCount = label_uniqueCount self.mlpLabels = MLP(mlpForScoresInputSize, hidden_size=mlpForScoresInputSize, output_size=self.label_uniqueCount)
def add_models(self, input_dims: list = None, pose_labels: list = None, freeze=False): n = len(self.models) + 1 if pose_labels is not None: self.models += [ MLP(config=self.config, dimensions=[input_dims[i]], pose_labels=pose_labels[i], name="M" + str(i + n), single_module=0) for i in range(len(input_dims)) ] else: self.models += [ MLP(config=self.config, dimensions=[input_dims[i]], name="M" + str(i + n), single_module=0) for i in range(len(input_dims)) ] if freeze: for model in self.active_models: model.freeze(True) self.active_models = self.models[n - 1:] self.input_dims = input_dims else: self.active_models = self.models self.input_dims += input_dims self.input_slice = [0] + list(accumulate(add, self.input_dims))
def load_pretrain_weights(self): """Loading weights from trained MLP model & GMF model""" config = self.config config['latent_dim'] = config['latent_dim_mlp'] mlp_model = MLP(config) if config['use_cuda'] is True: mlp_model.cuda() resume_checkpoint(mlp_model, model_dir=config['pretrain_mlp'], device_id=config['device_id']) self.embedding_user_mlp.weight.data = mlp_model.embedding_user.weight.data self.embedding_item_mlp.weight.data = mlp_model.embedding_item.weight.data for idx in range(len(self.fc_layers)): self.fc_layers[idx].weight.data = mlp_model.fc_layers[ idx].weight.data config['latent_dim'] = config['latent_dim_mf'] gmf_model = GMF(config) if config['use_cuda'] is True: gmf_model.cuda() resume_checkpoint(gmf_model, model_dir=config['pretrain_mf'], device_id=config['device_id']) self.embedding_user_mf.weight.data = gmf_model.embedding_user.weight.data self.embedding_item_mf.weight.data = gmf_model.embedding_item.weight.data self.affine_output.weight.data = 0.5 * torch.cat([ mlp_model.affine_output.weight.data, gmf_model.affine_output.weight.data ], dim=-1) self.affine_output.bias.data = 0.5 * ( mlp_model.affine_output.bias.data + gmf_model.affine_output.bias.data)
def __init__(self, env, alpha, gamma, episode_num, target_reward, step_count, minbatch, memory_size, flag): self.env = env self.alpha = alpha self.gamma = gamma self.episode_num = episode_num self.target_reward = target_reward self.step_count = step_count # self.test_step=test_step self.minbatch = minbatch self.memory_size = memory_size self.flag = flag self.Q = MLP() self.state_dim = env.observation_space.shape[0] self.action_dim = env.action_space.spaces[ 0].n * env.action_space.spaces[1].n # self.action_dim = env.action_space.n self.Q.creat2(self.state_dim, env.action_space.spaces[0].n, env.action_space.spaces[1].n) self.memory_num = 0 self.memory = np.zeros((memory_size, self.state_dim * 2 + 4)) self.optimizer = torch.optim.Adam(self.Q.parameters(), lr=alpha) self.loss_func = nn.MSELoss()
def __init__(self,x, model_file): """ Sampling works as follows. You feed it a model and a dataset, and model_files. The model_files allow you to load in models from different checkpoints. A feed through function is compiled that samples from the test dataset. It calculates the error and the output for each element in test dataset. It generates two distributions -- output for signal, and output for background. args: model: MLP object dataset Dataset object model_files: list of files corresponding to saved models """ self.model_file = model_file self.param = self.detect_params(self.model_file) self.dataset = Dataset(self.param['dataset']) self.dataset.set_indexing(self.param['indexing']) self.shared_train_x = self.dataset.train_x self.shared_train_y = self.dataset.train_y self.shared_test_x = self.dataset.test_x self.shared_test_y = self.dataset.test_y try: self.train_labels = self.dataset.train_labels self.test_labels = self.dataset.test_labels except AttributeError: print("You're used a dataset without labels. You won't be able to call gen_labeled_outputs") mlp = MLP(x,[self.param['h0'],self.param['h1'],2],np.random.RandomState(1234), transfer_func=T.nnet.relu) mlp.load_params(self.model_file,mode='hdf5') self.model = mlp self.predicted = dict()
def main(argv): argv = FLAGS(argv) inputs, outputs = load_CIFAR_train(FLAGS.datapath) X_test, y_test = load_CIFAR_test(FLAGS.datapath) nn = MLP(3072, FLAGS.hidden_dim, 10, FLAGS.activation, FLAGS.loss_type, FLAGS.layer_num) nn.fit(inputs, outputs, FLAGS.epoch, FLAGS.batch, [FLAGS.lr_W, FLAGS.lr_b], X_test, y_test) print nn.test(X_test, y_test)
def test(): model_to_be_restored = MLP() checkpoint = tf.train.Checkpoint(myAwesomeModel=model_to_be_restored) checkpoint.restore(tf.train.latest_checkpoint('./check_point')) y_pred = np.argmax(model_to_be_restored.predict(data_loader.test_data), axis=-1) print("test accuracy: %f" % (sum(y_pred == data_loader.test_label) / data_loader.num_test_data))
def __init__(self, config: dict = None, Model=None, pose_autoencoder=None, feature_dims=None, input_slicers: list = None, output_slicers: list = None, train_set=None, val_set=None, test_set=None, name="MotionGeneration"): super().__init__() self.feature_dims = feature_dims self.config = config self.loss_fn = config[ "loss_fn"] if "loss_fn" in config else nn.functional.mse_loss self.opt = config[ "optimizer"] if "optimizer" in config else torch.optim.Adam self.scheduler = config["scheduler"] if "scheduler" in config else None self.scheduler_param = config[ "scheduler_param"] if "scheduler_param" in config else None self.batch_size = config["batch_size"] self.learning_rate = config["lr"] self.best_val_loss = np.inf self.phase_smooth_factor = 0.9 self.pose_autoencoder = pose_autoencoder if pose_autoencoder is not None else \ MLP(config=config, dimensions=[feature_dims["pose_dim"]], name="PoseAE") self.use_label = pose_autoencoder is not None and pose_autoencoder.use_label cost_hidden_dim = config["cost_hidden_dim"] self.cost_encoder = MLP(config=config, dimensions=[ feature_dims["cost_dim"], cost_hidden_dim, cost_hidden_dim, cost_hidden_dim ], name="CostEncoder", single_module=-1) self.generationModel = Model(config=config, dimensions=[ feature_dims["g_input_dim"], feature_dims["g_output_dim"] ], phase_input_dim=feature_dims["phase_dim"]) self.input_dims = input_slicers self.output_dims = output_slicers self.in_slices = [0] + list(accumulate(add, input_slicers)) self.out_slices = [0] + list(accumulate(add, output_slicers)) self.train_set = train_set self.val_set = val_set self.test_set = test_set self.name = name
def experiment_learning_curves_error(): train_test = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1] use_validation_set = True num_hidden_nodes_layer_1 = 20 num_iterations = 1000 learning_rate = 0.001 verbose = False cases = [1, 2, 3, 4] train_MSE = [] val_MSE = [] for case in cases: [inputs, inputs_labels, input_validation, input_validation_labels] = Utils.create_non_linearly_separable_data_2( use_validation_set=use_validation_set, case=case) print(case) current_train = [] current_validation = [] for check in train_test: X_train, X_test, y_train, y_test = train_test_split( inputs.T, inputs_labels, test_size=check, random_state=42) mlp_batch = MLP(inputs=X_train.T, inputs_labels=y_train, input_validation=input_validation, input_validation_labels=input_validation_labels, num_nodes_hidden_layer=num_hidden_nodes_layer_1, num_iterations=num_iterations, learning_rate=learning_rate, batch_train=True, verbose=verbose) [_, _, mse_batch] = mlp_batch.fit() current_train.append(mlp_batch.mse[-1]) current_validation.append(mlp_batch.validation_mse[-1]) train_MSE.append(current_train) val_MSE.append(current_validation) legend_names = [ 'train mse error case 1', 'train mse error case 2', 'train mse error case 3', 'train mse error case 4', 'validation mse error case 1', 'validation mse error case 2', 'validation mse error case 3', 'validation mse error case 4' ] Utils.plot_learning_curves( train_MSE, legend_names=legend_names, train_size=train_test, title='Learning curve with lr = {0}, iterations = {1} '.format( learning_rate, num_iterations), loss=val_MSE)
def runTest(self): inputs = np.array([[1, 0], [0, 1], [1, 1], [0, 0]], dtype = util.FLOAT).T outputs = np.array([[0, 1], [0, 1], [1, 0], [1, 0]], dtype = util.FLOAT).T mlp = MLP([2, 5, 2], Lambda = 0.0001, alpha = .1, activation = "sigmoid", costType = "mse") mlp.fit(inputs, outputs, 20000, 4) mlp_result, mlp.prediction = mlp.predict(inputs, outputs) loss = np.mean( (mlp_result - outputs)**2) print "Prediction: ", mlp_result print "Loss: ", loss np.testing.assert_almost_equal(loss, 0, 2)
def experiment_train_validation_nodes(): use_validation_set = True num_iterations = 1000 learning_rate = 0.002 verbose = False nodes = [1, 5, 10, 20, 25] cases = [1, 2, 3, 4] train_MSE = [] val_MSE = [] for case in cases: print(case) [inputs, inputs_labels, input_validation, input_validation_labels] = Utils.create_non_linearly_separable_data_2( use_validation_set=use_validation_set, case=case) current_mse = [] current_val_mse = [] for node in nodes: mlp_batch = MLP(inputs=inputs, inputs_labels=inputs_labels, input_validation=input_validation, input_validation_labels=input_validation_labels, num_nodes_hidden_layer=node, num_iterations=num_iterations, learning_rate=learning_rate, batch_train=True, verbose=verbose) [_, _, mse_batch] = mlp_batch.fit() current_mse.append(mlp_batch.mse[-1]) current_val_mse.append(mlp_batch.validation_mse[-1]) train_MSE.append(current_mse) val_MSE.append(current_val_mse) legend_names = [ 'train mse error case 1', 'train mse error case 2', 'train mse error case 3', 'train mse error case 4', 'validation mse error case 1', 'validation mse error case 2', 'validation mse error case 3', 'validation mse error case 4' ] Utils.plot_error_hidden_nodes( train_MSE, legend_names=legend_names, hidden_nodes=nodes, title='MLP with learning rate {0}, iterations {1} '.format( learning_rate, num_iterations), loss=val_MSE)
def run_test(): print 'starting ....' Y = read_image_data('data/observations.arff') print 'image reading is finished...' print len(Y) #Allocate an n-by-k matrix, X, to hold intrinsic vectors X , K = list(), 2 for _ in xrange(len(Y)): X.append([0.0*x for x in xrange(K)]) w,h= 64,48 us_mlp = UnsupervisedMLP([4, 12, 12, 3]) us_mlp.set_image_dim(w, h) us_mlp.train(Y, X, K) print 'USMLP training is finished' plot_intrinsic(X, w, h) print 'intrinsic plotting is done' #2nd MLP normalize(X) print 'reading action file' A = read_action_data('data/actions.arff') print len(A), len(X) for i in xrange(len(A)-1): row = A[i] row.extend(X[i]) row.extend(X[i+1]) del A[-1] print 'training mlp with action ' mlp = MLP([6,6,2]) mlp.train1(A, K) print 'training done' #operate the crane now! #'a':[1.0,0.0,0.0,0.0],'c': [0.0,0.0,1.0,0.0] s = [1.0,0.0,0.0,0.0] s.extend(X[0]) for i in range(5): predict=mlp.predict(s) s[4] = predict[0] s[5] = predict[1] # image_generation(us_mlp, s, w, h, 'frame'+str(i)) #up s[0]=0 ; s[2]=1.0 for i in range(5,10): predict=mlp.predict(s) s[4] = predict[0] s[5] = predict[1] image_generation(us_mlp, s, w, h, 'frame'+str(i+1))
def __init__(self, episode_size=150): self.model = MLP((SCREEN_HEIGHT_g, SCREEN_WIDTH_g), 300) #self.load("models/model_1185.npz") self.activations = [] self.frames = [] self.states_alive = [] self.episode_size = episode_size self.episode_decisions = np.zeros((8)) self.episodes_wins = 0 self.episodes_nb = 0 self.iter = 0
def test_MLP_Layer_size(X, Y, Z, num_hidden_nodes_layer_1): targets = np.reshape(Z, (1, (len(X) * len(Y)))).T n_X = len(X) n_Y = len(Y) num_data = n_X * n_Y xx = np.reshape(X, (1, (num_data))) yy = np.reshape(Y, (1, (num_data))) patterns = np.vstack((xx, yy)).T num_iterations = 500 learning_rate = 0.01 verbose = False X_train, X_test, y_train, y_test = train_test_split(patterns, targets, test_size=0.2, random_state=42) X_train, X_test = X_train.T, X_test.T MSEs = [] Models = [] for layers in num_hidden_nodes_layer_1: mlp_batch = MLP(inputs=X_train, inputs_labels=y_train, num_nodes_hidden_layer=layers, num_iterations=num_iterations, learning_rate=learning_rate, batch_train=True, verbose=verbose, binary=False, num_output_layers=1) mlp_batch.fit() o_out = mlp_batch.predict(patterns.T) # print(o_out.shape) Z = np.reshape(o_out, (n_X, n_Y)) [_, mse] = Utils.compute_error(targets, o_out, False) MSEs.append(mse) Models.append(mlp_batch) title = 'Number of hidden layer: {0} and MSE: {1}'.format( layers, round(mse, 4)) Utils.plot_3d_data(X, Y, Z, title) return MSEs, Models
def check_MlP_test_sizes(X, Y, Z): targets = np.reshape(Z, (1, (len(X) * len(Y)))).T n_X = len(X) n_Y = len(Y) num_data = n_X * n_Y xx = np.reshape(X, (1, (num_data))) yy = np.reshape(Y, (1, (num_data))) patterns = np.vstack((xx, yy)).T num_hidden_nodes_layer_1 = 20 num_iterations = 5000 learning_rate = 0.001 verbose = False train_test = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] MSEs = [] Models = [] for check in train_test: X_train, X_test, y_train, y_test = train_test_split(patterns, targets, test_size=check, random_state=42) X_train, X_test = X_train.T, X_test.T print(X_train.shape) mlp_batch = MLP(inputs=X_train, inputs_labels=y_train, num_nodes_hidden_layer=num_hidden_nodes_layer_1, num_iterations=num_iterations, learning_rate=learning_rate, batch_train=True, verbose=verbose, binary=False, num_output_layers=1) mlp_batch.fit() o_out = mlp_batch.predict(patterns.T) Z = np.reshape(o_out, (n_X, n_Y)) [_, mse] = Utils.compute_error(targets, o_out, False) MSEs.append(mse) Models.append(mlp_batch) title = 'MSE: {0}'.format(round(mse, 4)) # Utils.plot_3d_data(X, Y, Z, title) return MSEs, Models
def treinar(self, base, qtd_hidden): n_x = len(base[1][0]) n_y = len(base[1][1]) #print([n_x, n_y]) rna = MLP(qtd_input=n_x, qtd_hidden=qtd_hidden, qtd_output=n_y) for e in range(0, 300000): erroEpoca = 0 erro_classificacao = 0 for a in range(0, len(base)): amostra = base[a] x = amostra[0] y = amostra[1] # Saída da Rede Neural out = rna.treinar(x, y) # Calculo do erro simples err = [] for er in zip(out, y): err.append(abs(er[0] - er[1])) # Erro de aproximação for er in err: erroEpoca += er # Erro de Classificação 01 vet_cla = [] for o in out: if o >= 0.5: vet_cla.append(1) else: vet_cla.append(0) aux = 0 for v in zip(vet_cla, y): aux += abs(v[1] - v[0]) ecl = 0 if aux > 0: ecl = 1 erro_classificacao += ecl print('Época: {}'.format(e + 1), end=' --- ') print('Erro: {}'.format(erroEpoca), end=' --- ') print('Classificação: {}'.format(erro_classificacao))
def __init__(self, num_input, num_units, num_classes, batch_size=100, num_epochs=10, display=False, blacklist=[], whitelist=[]): """Creates classifier for finding the version""" # Network parameters self.l_rate = 0.001 self.dropout_prob = 1 self.training_epochs = num_epochs self.display_step = 10 self.batch_size = batch_size self.display = display self.blacklist = blacklist self.whitelist = whitelist assert not (self.blacklist and self.whitelist), ( 'Both whitelist and blacklist are defined') # Placeholders self.X = tf.placeholder('float', [None, num_input], name='X') self.Y = tf.placeholder('int64', [None], name='Y') self.keep_prob = tf.placeholder('float') # Create Network self.mlp = MLP([num_input, num_units, num_classes], [tf.nn.relu, tf.identity]) logits = self.mlp.create_network(self.X, self.keep_prob) self.cost = tf.nn.sparse_softmax_cross_entropy_with_logits( logits, self.Y) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.l_rate) self.optimizer = self.optimizer.minimize(self.cost) # for evaluation predictions = tf.equal(tf.argmax(logits, 1), self.Y) self.accuracy = tf.reduce_mean(tf.cast(predictions, 'float')) self.init_op = tf.initialize_all_variables() self.saver = tf.train.Saver() # for gpu self.config = tf.ConfigProto(log_device_placement=False)
def test(args, device): full_data = get_data_loader(args) if args.model_type == "CNN": from CNN import CNN model = CNN(args).to(device) elif args.model_type == "MLP": from MLP import MLP model = MLP(args).to(device) elif args.model_type == "LSTM": from LSTM import LSTM model = LSTM(args).to(device) optimiser = optim.Adam( model.parameters(), lr=args.learning_rate) state = torch.load(args.model_path, map_location=device) model.load_state_dict(state['model']) optimiser.load_state_dict(state['optimiser']) total_difference = 0 n = 0 for batch_num, data in enumerate(full_data): x, y = data[0].float().to(device), data[1].float().to(device) num_of_predictions = x.shape[0] pred = model(x) pred = pred.reshape(y.shape) total_difference += sum((abs(pred - y)/y) * 100) n += num_of_predictions return total_difference/n
def __init__(self, config: dict = None, input_dims: list = None, pose_labels=None, train_set=None, val_set=None, test_set=None, name: str = "model", save_period=5, workers=6): super(RBF, self).__init__() M = len(input_dims) self.name = name self.input_dims = input_dims self.input_slice = [0] + list(accumulate(add, input_dims)) self.act = nn.ELU self.save_period = save_period self.workers = workers self.pose_labels = pose_labels if pose_labels is not None else [ None for _ in range(M) ] self.config = config self.basis_func = basis_func_dict()[config["basis_func"]] self.hidden_dim = config["hidden_dim"] self.keep_prob = config["keep_prob"] self.k = config["k"] self.learning_rate = config["lr"] self.batch_size = config["batch_size"] self.loss_fn = config[ "loss_fn"] if "loss_fn" in config else nn.functional.mse_loss self.opt = config[ "optimizer"] if "optimizer" in config else torch.optim.Adam self.scheduler = config["scheduler"] if "scheduler" in config else None self.scheduler_param = config[ "scheduler_param"] if "scheduler_param" in config else None self.models = [ MLP(config=config, dimensions=[input_dims[i]], pose_labels=self.pose_labels[i], name="M" + str(i), single_module=0) for i in range(M) ] self.active_models = self.models self.cluster_model = RBF_Layer(in_features=self.k, out_features=self.k, basis_func=self.basis_func) self.train_set = train_set self.val_set = val_set self.test_set = test_set self.best_val_loss = np.inf
def test_units_accuracy(units, steps, epochs): accuracies = [] units = range(1, units, steps) for unit in units: mlp = MLP(ReLU, X_train.shape[1], [unit]) accuracies.append(test_mlp_model(mlp, epochs, 30, plot=False)) plot_unit_accuracy(accuracies, units)
def mlp_test(test_set, Model, n_input=2030, n_output=150, n_hidden=50): datasets = load_data(test_set, test_set, test_set) test_set_x, test_set_y = datasets[0] index = T.lscalar() # index to a [mini]batch x = T.vector('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP(rng=rng, input=x, n_in=n_input, n_hidden=n_hidden, n_out=n_output, Model=Model) #classifier.hiddenLayer.__setstate__((Model['hidden_W'], Model['hidden_b'])) #classifier.logRegressionLayer.__setstate__((Model['logRegression_W'], Model['logRegression_b'])) test_model = theano.function(inputs=[index], outputs=classifier.predictAll, givens={ x: test_set_x[index], }) out = test_model(0).tolist() return out
def experiment_train_validation_error(): use_validation_set = True num_hidden_nodes_layer_1 = 20 num_iterations = 1000 learning_rate = 0.002 verbose = False cases = [1, 2, 3, 4] cases = [1, 2, 3, 4] train_MSE = [] val_MSE = [] mse = [] for case in cases: [inputs, inputs_labels, input_validation, input_validation_labels] = Utils.create_non_linearly_separable_data_2( use_validation_set=use_validation_set, case=case) # Utils.plot_initial_data(inputs.T, inputs_labels) mlp_batch = MLP(inputs=inputs, inputs_labels=inputs_labels, input_validation=input_validation, input_validation_labels=input_validation_labels, num_nodes_hidden_layer=num_hidden_nodes_layer_1, num_iterations=num_iterations, learning_rate=learning_rate, batch_train=True, verbose=verbose) [_, _, mse_batch] = mlp_batch.fit() mse.append(mlp_batch.mse) mse.append(mlp_batch.validation_mse) legend_names = [ 'train mse error case 1', 'validation mse error case 1', 'train mse error case 2', 'validation mse error case 2', 'train mse error case 3', 'validation mse error case 3', 'train mse error case 4', 'validation mse error case 4' ] Utils.plot_error_with_epochs( mse, legend_names=legend_names, num_epochs=num_iterations, title='MLP with lr = {0}, iterations = {1} , hidden nodes = {2} '. format(learning_rate, num_iterations, num_hidden_nodes_layer_1))
def main(client_id): while True: try: response = requests.get(url + 'deepLearning') json_data = json.loads(response.text) # response_message = response.content().decode('utf-8') image_file_index = int(json_data['image_file_index']) epoch_number = int(json_data['epoch_number']) print('image_index_file: ' + str(image_file_index)) print('epoch_number: ' + str(epoch_number)) mode = str(json_data['mode']) print('mode: ' + mode) if mode == 'stop': return if mode == 'wait': time.sleep(1.5) continue client = MongoClient( 'mongodb://*****:*****@ds111529.mlab.com:11529/primre') _db = client.primre print('start download network') try: network = _db.Network.find_one({'id': 1}) l1_w_list = network['l1_list'] l2_w_list = network['l2_list'] except: _db.GlobalParameters.update_one( {'id': 1}, {'$inc': { 'image_file_index': -1 }}) continue print('finish download network') lin_neural_network_l1 = L.Linear(784, 300) lin_neural_network_l2 = L.Linear(300, 10) for i in range(300): for j in range(784): lin_neural_network_l1.W.data[i][j] = l1_w_list[i][j] for i in range(10): for j in range(300): lin_neural_network_l2.W.data[i][j] = l2_w_list[i][j] mlp = MLP(lin_neural_network_l1, lin_neural_network_l2) file_images_name = '~/images_train/image_' + str(image_file_index) file_labels_name = '~/labels_train/label_' + str(image_file_index) if mode == 'test': file_images_name = '~/images_test/images_' + str( image_file_index) file_labels_name = '~/labels_test/label_' + str( image_file_index) if mode == "train": train(_db, client_id, mlp, file_images_name, file_labels_name, l1_w_list, l2_w_list) else: validate_test(_db, mode, mlp, epoch_number, file_images_name, file_labels_name) except: continue
def runTest(self): MNIST_DIR = "../Data/mnist.pkl.gz" f = gzip.open(MNIST_DIR, "rb") train_set, valid_set, test_set = cPickle.load(f) f.close() X_train, y_train = translate(train_set) X_test, y_test = translate(test_set) mlp = MLP([784, 800, 300, 10], 0, 0.2, "sigmoid", "mse", load="True") mlp.fit(X_train, y_train, 100, 500000) mlp_result, mlp_prediction = mlp.predict(X_test, y_test) mlp_result, mlp_prediction = mlp.predict(X_train, y_train) loss = np.mean( (mlp_result - y_train)**2) print "Loss: ", loss error = sum([mlp_prediction[i] != train_set[1][i] for i in xrange(len(mlp_prediction))]) error /= float(len(mlp_prediction)) print "Error: ", error self.assertTrue(error < .1)
def test_depth_accuracy(depth, epochs): accuracies = [] depths = range(depth) for depth in depths: print(f"MLP depth: {depth}") mlp = MLP(ReLU, X_train.shape[1], [128] * depth) accuracies.append(test_mlp_model(mlp, epochs, 30, plot=False)) plot_depth_accuracy(accuracies, depths)
def get_mlp(self): try: hidden_nodes = int(self.hidden_nodes_input.get()) learning_rate = float(self.learning_rate_input.get()) return MLP(input_nodes=784, hidden_nodes=hidden_nodes, output_nodes=10, learning_rate=learning_rate) except ValueError: return None
def tune(n): model = MLP((4, ), training_epochs=5000, beta=betas[n], debug=False) m = Model(model, transfs, gen_x, gen_y, RMSE) window = [1, 4, 12] ret = m.expanding_window(X_train, y_train, TRAIN_OFFSET, window, 'dynamic') print(betas[n]) return betas[n], ret[1][3].iloc[-1, 0], ret[1][0], ret[4][0], ret[12][0]
def main(): """ Run the test case XOR """ print("...Reading dataset") dataset = load_dataset("datasets/xor.dat") print("...done!") print("...Spliting the dataset") training_samples, testing_samples, labels_train, labels_test = split_dataset( dataset) print("...done!") print("...Creating the classifier") clf = MLP(input_layer=2, hidden=2, output=1) print("...done!") print("...Fiting the clf") clf.fit(training_samples, labels_train, verbose_error=True) print("...done!") print("...Made a prediction") pred = clf.predict(testing_samples) print("...done!") print('Convergence: with MSE:{}'.format(clf.error)) print(clf) print( pd.DataFrame.from_items([('Expected', labels_test), ('Obtained', pred)])) clf.plot_errors()
def __init__(self, img=None, model_path='my_model.npz'): self.origin_img = img self.preprocess_img = None self.detected_number = None self.number_model_path = model_path self.net = L.Classifier(MLP(1000, 10)) self.data_directory = "data" self.img_directory = "imgs" # 画像を保管するディレクトリ self.setup()
def generateModel(learningRate): layer0 = Layer(4, 0, 4, ActivationFunction.linear) layer1 = Layer(3, 1, 4, ActivationFunction.sigmoid) layer2 = Layer(3, 2, 3, ActivationFunction.sigmoid) layers = [] layers.append(layer0) layers.append(layer1) layers.append(layer2) return MLP(layers, learningRate)
def __init__ (self, size_x, size_y, beta, hidden, learning_rate, reward): Bot.__init__(self) self.bot_name = "Bot_RL_MLP" self.mlp = MLP (size_x * size_y, hidden, size_x * size_y, learning_rate) self.reward = reward[:] #hoher Wert für beta (50?): exploitation #niedriger Wert für beta : exploration self.beta = beta
def RecognizerFactory(recognizer_data): """Initialize the recognizer. Recognizer data may describe either an SLP or an MLP, so determine which and represent it appropriately.""" if LinearRecognizer.validate_input(recognizer_data): return LinearRecognizer(recognizer_data) elif SLP.validate_input(recognizer_data): #Should never eval to true return SLP(recognizer_data) elif MLP.validate_input(recognizer_data): return MLP(recognizer_data) else: raise Exception("Couldn't parse recognizer data")
def playMLP(self): # allow to test all combinations of settings i = 1 ## number hidden layers step_epochs = 5 ## number of epochs ################################################################################################################ ######## To calculate the number of hidden nodes we use a general rule of: (Number of inputs + outputs) x 2/3### ################################################################################################################ k = 3 ## number of hidden neurons l = 0.0001 ## eta learning rate s = 0.1 ## step for k in range(4, 100): for j in range(1, 20): self.obj_mlp = MLP(self.int_num_features, self.int_num_classes, i, j * step_epochs, k, l) self.obj_mlp.train(self.training) #self.obj_mlp.plotMSE() self.obj_mlp.test(self.testing) k += 5
def __init__ (self, size_x = 3, size_y = 3, beta = 1, hidden = 20, learning_rate = 0.1, reward = [0, 1.0, -1.0], initial_field = [0], player_ID = 1): Bot.__init__(self) self.initial_field = initial_field self.player_ID = player_ID self.bot_name = "Bot_RL_MLP" self.version = 1 self.counter = 0 self.optimization = [] self.reward = reward[:] self.first_action = True self.beta = beta #hoher Wert für beta (50?): exploitation #niedriger Wert für beta : exploration self.mlp = MLP (size_x * size_y, hidden, size_x * size_y, learning_rate) self.new_game()
def unit_test2(): mlp = MLP([1,2,1]); mlp.layers[1].set_params([[-.3],[4]],[.15,-2]) mlp.layers[2].set_params([[0,2]],[-1]) num_labels=1 data= [1.0, 1.0] predicted = mlp.predict(data[:-num_labels]) print 'predicted: ', predicted mlp.output_error_calculation(data[-num_labels:]) mlp.backpropagation() print 'errors:' for layer in mlp.layers: print layer.e mlp.gradient_descend(.5) print 'weights:' for layer in mlp.layers: print zip( layer.b, layer.weight)
def unit_test(): mlp = MLP([2,2,2,2]); mlp.layers[1].set_params([[.2,-.1],[.3,-.3]],[.1,-.2]) mlp.layers[2].set_params([[-.2,-.3],[-.1,.3]],[.1,.2]) mlp.layers[3].set_params([[-.1,.3],[-.2,-.3]],[.2,.1]) num_labels=2 data= [.3, .7, .1, 1.0] predicted = mlp.predict(data[:-num_labels]) print 'predicted: ', predicted mlp.output_error_calculation(data[-num_labels:]) mlp.backpropagation() print 'errors:' for layer in mlp.layers: print layer.e mlp.gradient_descend(.1) print 'weights:' for layer in mlp.layers: print zip( layer.b, layer.weight)
def main(): inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) targets = np.array([0, 1, 1, 0]) # initialize mlp = MLP(n_input_units=2, n_hidden_units=3, n_output_units=1) mlp.print_configuration() # training mlp.fit(inputs, targets) print('--- training ---') print('first layer weight: ') print(mlp.v) print('second layer weight: ') print(mlp.w) # predict print('--- predict ---') for i in [[0, 0], [0, 1], [1, 0], [1, 1]]: print(i, mlp.predict(i))
def main(): digits = load_digits() X = digits.data y = digits.target X -= X.min() X /= X.max() mlp = MLP(64, 100, 10) mlp.print_configuration() X_train, X_test, y_train, y_test = train_test_split(X, y) labels_train = LabelBinarizer().fit_transform(y_train) labels_test = LabelBinarizer().fit_transform(y_test) mlp.fit(X_train, labels_train) predictions = [] for i in range(X_test.shape[0]): o = mlp.predict(X_test[i]) predictions.append(np.argmax(o)) print confusion_matrix(y_test, predictions) print classification_report(y_test, predictions)
def train(self, x_input, y_input): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: /datasets/ACEInhibitors_processed.csv """ index = T.lscalar('index') # index to a [mini]batch train_size = x_input.shape[0] * self.train_percent max_size = x_input.shape[0] - (x_input.shape[0] % 10) train_set_x = x_input[:train_size, :] train_set_y = y_input[:train_size] valid_set_x = x_input[(train_size + 1 ):max_size, :] valid_set_y = y_input[(train_size + 1):max_size] #compute number of minibatches for training, validation and testing n_train_batches = int(train_set_x.shape[0] / self.batch_size) n_valid_batches = int(valid_set_x.shape[0] / self.batch_size) # n_test_batches = int(test_set_x.shape[0] / batch_size) number_in = train_set_x.shape[1] valid_set_x = theano.shared(valid_set_x, 'valid_set_x') valid_set_y = theano.shared(valid_set_y, 'valid_set_y') train_set_x = theano.shared(train_set_x, 'train_set_x') train_set_y = theano.shared(train_set_y, 'train_set_y') # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically self.mlp = MLP( rng= numpy.random.RandomState(), input=self.x, n_in = number_in, n_out=self.output_size, a_function = self.activation_function, n_hidden_sizes=self.hidden_layer_sizes, dropout=self.dropout, dropout_rate=self.dropout_rate ) cost = ( self.mlp.negative_log_likelihood(self.y) + self.L1_reg * self.mlp.L1 + self.L2_reg * self.mlp.L2_sqr ) # end-snippet-4 # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch validate_model = theano.function( inputs=[index], outputs=self.mlp.errors(self.y), givens={ self.x: valid_set_x[index * self.batch_size:(index + 1) * self.batch_size], self.y: valid_set_y[index * self.batch_size:(index + 1) * self.batch_size] } ) training_errors = theano.function( inputs=[index], outputs=self.mlp.errors(self.y), givens={ self.x: train_set_x[index * self.batch_size:(index + 1) * self.batch_size], self.y: train_set_y[index * self.batch_size:(index + 1) * self.batch_size] } ) # start-snippet-5 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams parameter_gradients = [T.grad(cost, param) for param in self.mlp.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs updates = [] if self.momentum: delta_before=[] for param_i in self.mlp.params: delta_before_i=theano.shared(value=numpy.zeros(param_i.get_value().shape)) delta_before.append(delta_before_i) for param, parameter_gradients, delta_before_i in zip(self.mlp.params, parameter_gradients, delta_before): delta_i = -self.learning_rate * parameter_gradients + self.momentum_term*delta_before_i updates.append((param, param + delta_i)) updates.append((delta_before_i,delta_i)) else: for param, parameter_gradients in zip(self.mlp.params, parameter_gradients): updates.append((param, param - self.learning_rate * parameter_gradients)) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ self.x: train_set_x[index * self.batch_size: (index + 1) * self.batch_size], self.y: train_set_y[index * self.batch_size: (index + 1) * self.batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 1000000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < self.n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%, cost %f' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100., cost ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)))
def test_mlp(learning_rate=0.01, L1_reg=0.0, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=20, n_hidden=500): # get the datasets datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation & testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') index = T.lscalar() # index to minibatch x = T.matrix(name='x') y = T.ivector(name='y') rng = numpy.random.RandomState(1234) # MLP class classifier = MLP(rng, x, 28*28, n_hidden, 10) # cost cost = (classifier.negative_log_likelihood(y) + \ L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # test function on minibatch test_model = theano.function(inputs=[index], outputs=classifier.errors(y), \ givens={ x: test_set_x[index*batch_size : (index+1)*batch_size], y: test_set_y[index*batch_size : (index+1)*batch_size] }) # validation function on minibatch validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), \ givens={ x: valid_set_x[index*batch_size : (index+1)*batch_size], y: valid_set_y[index*batch_size : (index+1)*batch_size] }) # gradient params gparams = [T.grad(cost, param) for param in classifier.params] # updates for training updates = [(param, param - learning_rate * gparam) \ for param, gparam in zip(classifier.params, gparams)] # train model on minibatch train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index*batch_size : (index+1)*batch_size], y: train_set_y[index*batch_size : (index+1)*batch_size] }) ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [test_model(i) for i in range(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def trainLeNet(train_x, train_y, validation_x, validation_y, test_x, test_y, convolution_layer_size = None, rate = 0.1, batch_size = 500, n_epochs = 200): rng = np.random.RandomState(274563533) x = T.matrix('x') y = T.ivector('y') layer_0_input = x.reshape((batch_size, 1, 28, 28)) layer_0 = LeNetConvPoolLayer(rng, input = layer_0_input, layer_shape = (convolution_layer_size[0], 1, 5, 5), input_shape = (batch_size, 1, 28, 28), pool_size = (2,2)) layer_1 = LeNetConvPoolLayer(rng, input = layer_0.output, layer_shape = (convolution_layer_size[1], convolution_layer_size[0], 5, 5), input_shape = (batch_size, convolution_layer_size[0], 12, 12), pool_size = (2,2)) MLP_input = layer_1.output.flatten(2) layer_final = MLP(MLP_input, convolution_layer_size[1] * 4 * 4, 500, 10) cost = layer_final.negativeLogLikelihood(y) error = layer_final.errors(y) index = T.lscalar('index') validation_model = function([index], error, givens={x: validation_x[index * batch_size : (index + 1) * batch_size], y: validation_y[index * batch_size : (index + 1) * batch_size]}) test_model = function([index], error, givens={x: test_x[index * batch_size : (index + 1) * batch_size], y: test_y[index * batch_size : (index + 1) * batch_size]}) params = layer_final.params + layer_1.params + layer_0.params #for param in params: # pickle.dump(param, serial) param_grad = T.grad(cost, params) updates = [(p, p - rate * pg) for p, pg in zip(params, param_grad)] train_model = function([index], cost, givens={x:train_x[index * batch_size : (index + 1) * batch_size], y:train_y[index * batch_size : (index + 1) * batch_size]}, updates = updates) n_train_batches = train_x.get_value().shape[0] // batch_size n_test_batches = test_x.get_value().shape[0] // batch_size n_validation_batches = validation_x.get_value().shape[0] // batch_size epoch = 0 best_validation_cost = np.Inf patience = 1000000 improvement_thread = 0.995 patience_increase = 2 validation_frequency = min(n_train_batches, patience / 2) loop_done = False while epoch <= n_epochs and not loop_done: epoch += 1 for minibatch_index in range(n_train_batches): batch_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter if(iter + 1) % validation_frequency == 0: validation_losses = [validation_model(i) for i in range(n_validation_batches)] this_validation_loss = np.mean(validation_losses) print 'epoch %i, minibatch %i / %i, validation error %f %%' \ % (epoch, minibatch_index+1, n_train_batches, this_validation_loss * 100) if this_validation_loss < best_validation_cost: with open('LeNet_params.pkl', 'w') as serial: pickle.dump(params, serial) if this_validation_loss < best_validation_cost * improvement_thread: patience = max(patience, iter * patience_increase) best_validation_cost = this_validation_loss test_losses = [test_model(i) for i in range(n_test_batches)]#lkfanldnfaklfnklasnfklasnklfnalksdfnkl test_loss = np.mean(test_losses) print 'test error: %f %%'%(test_loss * 100) if patience <= iter: loop_done = True break
def train_model(filename): learning_rate = 0.05 patience = 10000 size = 1000 batch = 100 loader = DataLoader(filename, batch) rng = numpy.random.RandomState() print '... building the model' x = T.matrix('x') y = T.ivector('y') # construct the MLP class classifier = MLP( rng=rng, input=x, n_in=12*12*5, n_hidden=size, n_out=12 ) cost = ( classifier.negative_log_likelihood(y) ) gparams = [T.grad(cost, param) for param in classifier.params] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] print '... training' for i in xrange(patience): ip, op = loader.get_data(); test_model = theano.function( inputs=[], outputs=classifier.errors(y), givens={ x: ip, y: op } ) train_model = theano.function( inputs=[], outputs=cost, updates=updates, givens={ x: ip, y: op } ) before = test_model() train_model() after = test_model() print 100.0 * i / patience, '%', before, after W1 = classifier.params[0].get_value() b1 = classifier.params[1].get_value() W2 = classifier.params[2].get_value() b2 = classifier.params[3].get_value() W3 = classifier.params[4].get_value() b3 = classifier.params[5].get_value() out = open('W1.txt', 'w') print >> out, '\n'.join(['\t'.join(['%.6f'%item for item in row]) for row in W1]) out.close() out = open('b1.txt', 'w') print >> out, '\n'.join(['%.6f'%item for item in b1]) out.close() out = open('W2.txt', 'w') print >> out, '\n'.join(['\t'.join(['%.6f'%item for item in row]) for row in W2]) out.close() out = open('b2.txt', 'w') print >> out, '\n'.join(['%.6f'%item for item in b2]) out.close() out = open('W3.txt', 'w') print >> out, '\n'.join(['\t'.join(['%.6f'%item for item in row]) for row in W3]) out.close() out = open('b3.txt', 'w') print >> out, '\n'.join(['%.6f'%item for item in b3]) out.close()
def evaluate_lenet5(learning_rate=0.33, n_epochs=200, dataset="mnist.pkl.gz", nkerns=[32, 32, 32], batch_size=500): """ Demonstrates lenet on CIFAR-10 dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, "int32") data_batch_1 = unpickle("cifar-10-batches-py/data_batch_1") data_batch_2 = unpickle("cifar-10-batches-py/data_batch_2") data_batch_3 = unpickle("cifar-10-batches-py/data_batch_3") data_batch_4 = unpickle("cifar-10-batches-py/data_batch_4") data_batch_5 = unpickle("cifar-10-batches-py/data_batch_5") test = unpickle("cifar-10-batches-py/test_batch") train_set_1 = data_batch_1["data"] train_set_2 = data_batch_2["data"] train_set_3 = data_batch_3["data"] train_set_4 = data_batch_4["data"] train_set_5 = data_batch_5["data"] X_train = numpy.concatenate((train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0) y_train = numpy.concatenate( ( data_batch_1["labels"], data_batch_2["labels"], data_batch_3["labels"], data_batch_4["labels"], data_batch_5["labels"], ) ) test_set = test["data"] Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3) Yte = numpy.asarray(test["labels"]) Xval_rows = X_train[:7500, :] # take first 1000 for validation Yval = y_train[:7500] Xtr_rows = X_train[7500:50000, :] # keep last 49,000 for train Ytr = y_train[7500:50000] mean_train = Xtr_rows.mean(axis=0) stdv_train = Xte_rows.std(axis=0) Xtr_rows = (Xtr_rows - mean_train) / stdv_train Xval_rows = (Xval_rows - mean_train) / stdv_train Xte_rows = (Xte_rows - mean_train) / stdv_train learning_rate = theano.shared(learning_rate) """whitening""" """ Xtr_rows -= numpy.mean(Xtr_rows, axis=0) # zero-center the data (important) cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0] U,S,V = numpy.linalg.svd(cov) Xrot = numpy.dot(Xtr_rows, U)# decorrelate the data Xrot_reduced = numpy.dot(Xtr_rows, U[:,:100]) # whiten the data: # divide by the eigenvalues (which are square roots of the singular values) Xwhite = Xrot / numpy.sqrt(S + 1e-5)""" """whitening""" # Xtr_rows = whiten(Xtr_rows) # zero-center the data (important) """cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0] U,S,V = numpy.linalg.svd(cov) Xrot = numpy.dot(Xtr_rows, U) Xtr_rows = Xrot / numpy.sqrt(S + 1e-5) Xval_rot = numpy.dot(Xval_rows,U) Xval_rows = Xval_rot / numpy.sqrt(S + 1e-5) Xte_rot = numpy.dot(Xte_rows,U) Xte_rows = Xte_rot / numpy.sqrt(S + 1e-5) """ train_set = (Xtr_rows, Ytr) valid_set = (Xval_rows, Yval) test_set = (Xte_rows, Yte) test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix("x") # the data is presented as rasterized images y = T.ivector("y") # the labels are presented as 1D vector of [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print("... building the model") # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 3, 32, 32)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (32-5+1 , 32-5+1) = (28, 28) # maxpooling reduces this further to (28/2, 28/2) = (14, 14) # 4D output tensor is thus of shape (batch_size, nkerns[0], 14, 14) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 5, 5), poolsize=(2, 2) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (14-5+1, 14-5+1) = (10, 10) # maxpooling reduces this further to (10/2, 10/2) = (5, 5) # 4D output tensor is thus of shape (batch_size, nkerns[1], 5, 5) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 14, 14), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2), ) # Construct the third convolutional pooling layer # filtering reduces the image size to (5-2+1, 5-2+1) = (4, 4) # maxpooling reduces this further to (4/2, 4/2) = (2, 2) # 4D output tensor is thus of shape (batch_size, nkerns[2], 2, 2) layer2conv = LeNetConvPoolLayer( rng, input=layer1.output, image_shape=(batch_size, nkerns[1], 5, 5), filter_shape=(nkerns[2], nkerns[1], 2, 2), poolsize=(2, 2), ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer3_input = layer2conv.output.flatten(2) print(layer3_input.shape) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * 2 * 2, n_out=64, activation=relu) layer3_1 = MLP(rng, input=layer3.output, n_in=64, n_hidden=200, n_out=10) # classify the values of the fully-connected sigmoidal layer # layer4 = LogisticRegression(input=layer3_1.output, n_in=10, n_out=10) # the cost we minimize during training is the NLL of the model L2_reg = 0.005 L2_sqr_model = ( (layer0.W ** 2).sum() + (layer1.W ** 2).sum() + (layer2conv.W ** 2).sum() + (layer3.W ** 2).sum() + (layer0.W ** 2).sum() + (layer3_1.L2_sqr ** 2).sum() ) cost = layer3_1.negative_log_likelihood(y) + L2_reg * L2_sqr_model # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3_1.errors(y), givens={ x: test_set_x[index * batch_size : (index + 1) * batch_size], y: test_set_y[index * batch_size : (index + 1) * batch_size], }, ) validate_model = theano.function( [index], layer3_1.errors(y), givens={ x: valid_set_x[index * batch_size : (index + 1) * batch_size], y: valid_set_y[index * batch_size : (index + 1) * batch_size], }, ) # create a list of all model parameters to be fit by gradient descent params = layer3_1.params + layer3.params + layer2conv.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size : (index + 1) * batch_size], y: train_set_y[index * batch_size : (index + 1) * batch_size], }, ) # end-snippet-1 ############### # TRAIN MODEL # ############### print("... training") # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0.0 start_time = timeit.default_timer() epoch = 0 done_looping = False epoch_loss_list = [] epoch_val_list = [] while (epoch < n_epochs) and (not done_looping): epoch += 1 if epoch == 20: learning_rate.set_value(0.1) if epoch >= 21 and learning_rate.get_value() >= 0.1 * (0.9 ** 6): learning_rate.set_value(learning_rate.get_value() * 0.9) if epoch > 3: epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3)) epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3)) numpy.savetxt(fname="epoc_cost.csv", X=epoch_loss_np, fmt="%1.3f") numpy.savetxt(fname="epoc_val_error.csv", X=epoch_val_np, fmt="%1.3f") for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print("training @ iter = ", iter) cost_ij = train_model(minibatch_index) epoch_loss_entry = [iter, epoch, float(cost_ij)] epoch_loss_list.append(epoch_loss_entry) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( "epoch %i, minibatch %i/%i, validation error %f %%" % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.0) ) epoch_val_entry = [iter, epoch, this_validation_loss] epoch_val_list.append(epoch_val_entry) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [test_model(i) for i in range(n_test_batches)] test_score = numpy.mean(test_losses) print( (" epoch %i, minibatch %i/%i, test error of " "best model %f %%") % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.0) ) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print("Optimization complete.") print( "Best validation score of %f %% obtained at iteration %i, " "with test performance %f %%" % (best_validation_loss * 100.0, best_iter + 1, test_score * 100.0) ) print( ("The code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0)), file=sys.stderr, ) epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3)) epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3)) epoch_loss = pandas.DataFrame( {"iter": epoch_loss_np[:, 0], "epoch": epoch_loss_np[:, 1], "cost": epoch_loss_np[:, 2]} ) epoch_vall = pandas.DataFrame( {"iter": epoch_val_np[:, 0], "epoch": epoch_val_np[:, 1], "val_error": epoch_val_np[:, 2]} ) epoc_avg_loss = pandas.DataFrame(epoch_loss.groupby(["epoch"]).mean()["cost"]) epoc_avg_val = pandas.DataFrame(epoch_vall.groupby(["epoch"]).mean()["val_error"]) epoc_avg_loss = pandas.DataFrame({"epoch": epoc_avg_loss.index.values, "cost": epoc_avg_loss["cost"]}) epoc_avg_loss_val = pandas.DataFrame({"epoch": epoc_avg_val.index.values, "val_error": epoc_avg_val["val_error"]}) epoc_avg_loss.plot(kind="line", x="epoch", y="cost") plt.show() epoc_avg_loss_val.plot(kind="line", x="epoch", y="val_error") plt.show()
class Bot_RL_MLP (Bot): def __init__ (self, size_x = 3, size_y = 3, beta = 1, hidden = 20, learning_rate = 0.1, reward = [0, 1.0, -1.0], initial_field = [0], player_ID = 1): Bot.__init__(self) self.initial_field = initial_field self.player_ID = player_ID self.bot_name = "Bot_RL_MLP" self.version = 1 self.counter = 0 self.optimization = [] self.reward = reward[:] self.first_action = True self.beta = beta #hoher Wert für beta (50?): exploitation #niedriger Wert für beta : exploration self.mlp = MLP (size_x * size_y, hidden, size_x * size_y, learning_rate) self.new_game() """ Initializes a new game """ def new_game(self): self.first_action = True self.counter += 1 self.mlp.new_game() """ Loads """ def load_data(self, filename): fo = open(filename , "r") data = json.loads(fo.read()) fo.close() if (data["bot"] == self.bot_name): if (data["version"] <= self.version): self.player_ID = data["player_ID"] self.initial_field = data["initial_field"] self.counter = data["counter"] self.optimization = data["optimization"] self.reward = data["reward"] self.first_action = data["first_action"] self.beta = data["beta"] self.mlp.set_data(data["MLP"]) else: raise ValueError('dataset is not usable by Bot : different Bot identifier') else: raise ValueError('dataset is not usable by this Bot version : dataset version is higher than Bot version') return data """ Saves """ def save_data(self, filename): data = {"bot" : self.bot_name, "version" : self.version, "player_ID" : self.player_ID, "initial_field" : self.initial_field, "counter" : self.counter, "optimization" : self.optimization, "reward" : self.reward, "first_action" : self.first_action, "beta" : self.beta, "MLP" : self.mlp.get_data()} fo = open(filename , "w") fo.write(json.dumps(data)) fo.close() """ Returns an action depending on the given world """ def get_action(self, world_old): self.info_tic = world_old.get_sensor_info() self.h_tic = self.mlp.get_action(self.info_tic) #for i in range(len(self.h_tic)): # if (self.info_tic[i] > 0): # self.h_tic[i] = -100000 #Workaround: Wenn nur noch 1 Zug möglich ist, automatisch setzen moves = world_old.get_moves() if (len(moves) == 1): self.act_tic = moves[0] else: #Auswahl wiederholen bis ein gültiger Zug ausgewählt wurde validation = False while (validation == False): new_h_tic = [] for i in range(len(self.h_tic)): if (i in moves): new_h_tic.append(self.h_tic[i]) self.act_tic = moves[self.rand_winner (new_h_tic, self.beta)] # choose action #print self.info, self.act #print "----------\n",self.h_tic, "\n",moves, "\n",new_h_tic, "\n",self.act_tic x = self.act_tic % world_old.size_x y = self.act_tic / world_old.size_y validation = world_old.check_action(x, y) #Umrechnen 1D -> 2D x = self.act_tic % world_old.size_x y = self.act_tic / world_old.size_y #print "--------------------------" #print self.h, "->", self.act, "->", x, ",", y #print "--------------------------" return (x, y) """ Adapts the MLP considering the results (world_new) of its last action """ def evaluate_action(self, world_new): if (self.first_action == False): r = self.get_reward(world_new.get_winner()) # read reward #Berechnen der Q-Werte vor und nach der Aktion q0 = self.h[self.act] q1 = self.mlp.get_action(world_new.get_sensor_info())[self.act_tic] #Berechnen der Belohnung auf dem neuen Feld r = self.get_reward(world_new.get_winner()) # read reward if (r == self.get_reward(1)): # This is cleaner than defining target = r # target as r + 0.9 * q1, else: # because weights now converge. target = 0.9 * q1 # gamma = 0.9 delta = target - q0 # prediction error #Wichtig : nur das delta an der Position der Aktion wird als Fehler betrachtet, für alle anderen #Positionen ist der Fehler 0 error = np.zeros (self.mlp.input_size) error[self.act] = delta #Wichtig : Das Lernen erfolgt mittels des Fehlers und der Welt VOR der Aktion self.mlp.evaluate_action_RL(self.info, error) #print q0, q1, delta self.info = self.info_tic self.h = self.h_tic self.act = self.act_tic self.first_action = False """ Selects an action """ def rand_winner (self, S_from, beta): #for i in range (len(S_from)): # if S_from[i] > 200: # print S_from # time.sleep(0.2) #print "--------------------\n",S_from #time.sleep(0.2) sum = 0.0 p_i = 0.0 rnd = np.random.random() d_r = len (S_from) sel = 0 try: for i in range (d_r): sum += np.exp (beta * min(S_from[i],200)) #if field is empty, set reward to 1 for all fields #to get a probablity higher than 0 if (sum == 0): sum = d_r S_from = [1]*d_r for i in range (d_r): p_i += np.exp (beta * min(S_from[i],200)) / sum if p_i > rnd: sel = i rnd = 1.1 # out of reach, so the next will not be turned ON except Exception: print beta, S_from[i], S_from, sum return sel """ Calculates the reward for the actual board setup """ def get_reward (self, winner): if ((winner >= 0) and (winner <= 2)): return self.reward[int(winner)] else: return 0.0
return [(a-m)*b-2 for a,b,m in zip(x,norm,minv)] ####### all files used ftr = open(('./'+ feature +'/train.ark') , 'r') #training set fte = open(('./'+ feature +'/test.ark') , 'r') # testing set if useState: flab = open('./state_label/train.lab' , 'r') # label else: flab = open('./label/train.lab' , 'r') # label fmap = open('./phones/state_48_39.map' , 'r') # label mapping 48-39 ######## model initialization model = MLP( n_in=raw_feat_num*(pre+post+1) , n_out=1943 if useState else 48 , hidStruct = hiddenStruct , hidAct = hiddenActFunc , pDrop = dropout if doDrop else 0.0 ) ###### prediction function (theano type) x = T.matrix('x') pred = theano.function([x] , model.predict(x)) ######## model trainer initialization trainer = MLPtrainer.MLPtrainer( net = model , learning_rate = alpha , momentum = momentum , L1 = L1 , L2 = L2 )
def test_pickle_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=10, dataset='../data/mnist.pkl.gz', batch_size=20, pickle_file='/scratch/z/zhaolei/lzamparo/gpu_tests/mlp_results/MLP_pickle.pkl',n_hidden=500): """ Interrupt the training of an MLP, pickle the MLP object, unpickle, and continue """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for each set n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ### Build the model ### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() x = T.matrix('x') y = T.ivector('y') rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP(rng = rng, input = x, n_in=28*28, n_hidden=n_hidden, n_out=10) # cost to be minimized cost = classifier.negative_log_likelihood(y) \ + L1_reg * classifier.L1 \ + L2_reg * classifier.L2_sqr # theano function that computes the mistakes made by the model on a minibatch test_model = theano.function(inputs=[index], outputs = classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size]}) # theano function to validate the model validate_model = theano.function(inputs=[index], outputs = classifier.errors(y), givens = { x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) # compute the gradient of the cost function w.r.t theta gparams = [] for param in classifier.params: gparam = T.grad(cost, param) gparams.append(gparam) # build the list of parameter updates. This consists of tuples of paramters and values updates = [] for param, gparam in zip(classifier.params, gparams): updates.append((param, param - learning_rate * gparam)) # compile a Theano function to return the cost, update the parameters based on the # updates list train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) ### train the model ### print '... training' # early-stopping parameters patience = 10000 # look at this number of examples regardless patience_increase = 2 # wait this many more epochs when a new best comes up improvement_threshold = 0.995 # a relative improvement threshold for significance validation_frequency = min(n_train_batches, patience / 2) # train for this many minibatches before checking the model on the validation set best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False halfway_point = n_epochs / 2 while (epoch < halfway_point) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index # do we validate? if (iter + 1) % validation_frequency == 0: validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: # increase patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_scores = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Halfway point reached. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) print "Pickling model..." f = file(pickle_file, 'wb') cPickle.dump(classifier, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() print "Unpickling the model..." f = file(pickle_file, 'rb') unpickled_classifier = cPickle.load(f) unpickled_classifier.reconstruct_state(x, T.tanh) f.close() ### Re-establish the cost, grad, parameter updates ### # cost to be minimized cost = unpickled_classifier.negative_log_likelihood(y) \ + L1_reg * unpickled_classifier.L1 \ + L2_reg * unpickled_classifier.L2_sqr # theano function that computes the mistakes made by the model on a minibatch test_model = theano.function(inputs=[index], outputs = unpickled_classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size]}) # theano function to validate the model validate_model = theano.function(inputs=[index], outputs = unpickled_classifier.errors(y), givens = { x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) # compute the gradient of the cost function w.r.t theta gparams = [] for param in unpickled_classifier.params: gparam = T.grad(cost, param) gparams.append(gparam) # build the list of parameter updates. This consists of tuples of paramters and values updates = [] for param, gparam in zip(unpickled_classifier.params, gparams): updates.append((param, param - learning_rate * gparam)) print(("Continue training for %i epochs ") % (n_epochs - epoch)) start_time = time.clock() while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index # do we validate? if (iter + 1) % validation_frequency == 0: validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: # increase patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_scores = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('End point reached. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
class Controller: # dataset data = [] training = [] testing = [] # characteristics int_set_size = 0 int_num_features = 0 int_training_size = 30 int_testing_size = 20 int_num_per_class = 50 int_num_classes = 50 # composition obj_mlp = MLP # initializiation def __init__(self): iris = load_iris() self.prepareDataSet(iris) # normalize or scale data # divide data to training and testing def prepareDataSet(self, iris): self.int_num_classes = numpy.unique(iris.target).shape[0] self.int_set_size = iris.data.shape[0] self.int_num_features = iris.data.shape[1] ## normalize data self.data = preprocessing.normalize(iris.data) self.data = preprocessing.minmax_scale(self.data, (-1, 1)) # load data in arrays for i in range(0, len(self.data)): Y = iris.target[i] X = numpy.append(self.data[i], 1) ## bias input = 1 check_i = i % self.int_num_per_class if check_i < self.int_training_size: self.training.append([X, Y]) else: self.testing.append([X, Y]) numpy.random.shuffle(self.training) numpy.random.shuffle(self.testing) # play the MLP def playMLP(self): # allow to test all combinations of settings i = 1 ## number hidden layers step_epochs = 5 ## number of epochs ################################################################################################################ ######## To calculate the number of hidden nodes we use a general rule of: (Number of inputs + outputs) x 2/3### ################################################################################################################ k = 3 ## number of hidden neurons l = 0.0001 ## eta learning rate s = 0.1 ## step for k in range(4, 100): for j in range(1, 20): self.obj_mlp = MLP(self.int_num_features, self.int_num_classes, i, j * step_epochs, k, l) self.obj_mlp.train(self.training) #self.obj_mlp.plotMSE() self.obj_mlp.test(self.testing) k += 5
class Bot_RL_MLP (Bot): def __init__ (self, size_x, size_y, beta, hidden, learning_rate, reward): Bot.__init__(self) self.bot_name = "Bot_RL_MLP" self.mlp = MLP (size_x * size_y, hidden, size_x * size_y, learning_rate) self.reward = reward[:] #hoher Wert für beta (50?): exploitation #niedriger Wert für beta : exploration self.beta = beta """ Returns an action depending on the given world """ def get_action(self, world): self.info = world.get_sensor_info() self.h = self.mlp.get_action(self.info) for i in range(len(self.h)): if (self.info[i] > 0): self.h[i] = -10 #Workaround: Wenn nur noch 1 Zug möglich ist, automatisch setzen moves = world.get_moves() if (len(moves) == 1): self.act = moves[0] else: #Auswahl wiederholen bis ein gültiger Zug ausgewählt wurde validation = False while (validation == False): self.act = self.rand_winner (self.h, self.beta) # choose action #print self.info, self.act x = self.act % world.size_x y = self.act / world.size_y validation = world.check_action(x, y) #Umrechnen 1D -> 2D x = self.act % world.size_x y = self.act / world.size_y #print "--------------------------" #print self.h, "->", self.act, "->", x, ",", y #print "--------------------------" return (x, y) """ Adapts the MLP considering the results (world_new) of its last action """ def evaluate_action(self, world_new): #Erstellen des Aktions-Vektors act_vec = np.zeros (self.mlp.input_size) act_vec[self.act] = 1.0 #Berechnen der Q-Werte vor und nach der Aktion q0 = self.h[self.act] q1 = self.mlp.get_action(world_new.get_sensor_info())[self.act] #Berechnen der Belohnung auf dem neuen Feld r = self.get_reward(world_new.get_winner()) # read reward if (r == self.get_reward(1)): # This is cleaner than defining target = r # target as r + 0.9 * q1, else: # because weights now converge. target = 0.9 * q1 # gamma = 0.9 delta = target - q0 # prediction error #Wichtig : nur das delta an der Position der Aktion wird als Fehler betrachtet, für alle anderen #Positionen ist der Fehler 0 error = np.zeros (self.mlp.input_size) error[self.act] = delta #Wichtig : Das Lernen erfolgt mittels des Fehlers und der Welt VOR der Aktion self.mlp.evaluate_action(self.info, error) """ Selects an action """ def rand_winner (self, S_from, beta): #for i in range (len(S_from)): # if S_from[i] > 200: # print S_from # time.sleep(0.2) #print "--------------------\n",S_from #time.sleep(0.2) sum = 0.0 p_i = 0.0 rnd = np.random.random() d_r = len (S_from) sel = 0 try: for i in range (d_r): sum += np.exp (beta * min(S_from[i],200)) #if field is empty, set reward to 1 for all fields #to get a probablity higher than 0 if (sum == 0): sum = d_r S_from = [1]*d_r for i in range (d_r): p_i += np.exp (beta * min(S_from[i],200)) / sum if p_i > rnd: sel = i rnd = 1.1 # out of reach, so the next will not be turned ON except Exception: print beta, S_from[i], S_from, sum return sel """ Calculates the reward for the actual board setup """ def get_reward (self, winner): if ((winner >= 0) and (winner <= 2)): return self.reward[int(winner)] else: return 0.0 """ Loads """ #def load_data(self, filename): # fo = open(filename , "r") # #self.w_mot = json_tricks.load(fo.read())["w_mot"] # data = json_tricks.load(fo.read()) # fo.close() # # return data """ Saves """ #def save_data(self, filename): # data = {"bot" : "Bot_RL_MLP", # "version" : 1, # "mlp" : self.mlp} # fo = open(filename , "w") # fo.write(json_tricks.dumps(data)) # fo.close()
return [(a-m)/b for a,b,m in zip(x,std,mean)] ####### all files used ftr = open(('./'+ feature +'/train.ark') , 'r') #training set fte = open(('./'+ feature +'/test.ark') , 'r') # testing set if useState: flab = open('./state_label/train.lab' , 'r') # label else: flab = open('./label/train.lab' , 'r') # label fmap = open('./phones/state_48_39.map' , 'r') # label mapping 48-39 ######## model initialization model = MLP( n_in=raw_feat_num*(pre+post+1) , n_out=1943 if useState else 48 , hidStruct = hiddenStruct , hidAct = hiddenActFunc , pDrop = dropout if doDrop else 0.0 ) ###### prediction function (theano type) x = T.matrix('x') pred = theano.function([x] , model.predict(x)) pred_max = theano.function([x] , model.predict_max(x)) ######## model trainer initialization trainer = MLPtrainer.MLPtrainer( net = model , learning_rate = alpha , momentum = momentum , L1 = L1 ,
momentum = 0.98 L1 = 0.0 L2 = 0.0 dropout = 0.3 outfilename = 'result_wav.csv' feat_num = 400 ####### all files used wavDir = './wav/' flab = open('./label/train.lab' , 'r') # label fmap = open('./phones/48_39.map' , 'r') # label mapping 48-39 ######## model initialization model = MLP( n_in=feat_num , n_out=48 , hidStruct = hiddenStruct ) ###### prediction function (theano type) x = T.vector('x') pred = theano.function([x] , model.predict(x)) ######## model trainer initialization trainer = MLPtrainer.MLPtrainer( net = model , learning_rate = alpha , momentum = momentum , L1 = L1 , L2 = L2 )
def MLP_demo(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=1, n_hidden=309): datasets = load_multi() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size print '... building the model' index = T.lscalar() x = T.matrix('x') y = T.ivector('y') rng = np.random.RandomState(1234) classifier = MLP(rng, x, y, n_in=103, n_hidden=n_hidden, n_out=9) test_model = theano.function(inputs=[index], outputs=classifier.errors(), givens={x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size]}) validate_model = theano.function(inputs=[index], outputs=classifier.errors(), givens={x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) cost, updates = classifier.get_cost_updates(learning_rate=learning_rate, L1_reg=L1_reg, L2_reg=L2_reg) train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) print '... training' patience = 10000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
class NeuralNet(): """ Attributes: features: Numpy array matrix that represents features targets: Numpy array matrix that represents the """ def __init__(self, n_hidden_units, batch_size, output_size, metric_list="none", learning_rate=1, l1_term=0, l2_term=0, n_epochs=100, activation_function='tanh', train_p=.6, dropout=False, dropout_rate=.5, momentum=False, momentum_term=.9, adaptive_learning_rate=False): # allocate symbolic variables for the data self.x = T.matrix('x') # self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels self.dropout = dropout self.dropout_rate = dropout_rate if metric_list == "none": self.metrics = {"F1": 0, "Accuracy": 0, "AUC": 0, "Precision": 0, "Recall": 0} else: self.metrics = metric_list self.learning_rate = learning_rate self.L1_reg = l1_term self.L2_reg = l2_term self.n_epochs = n_epochs self.batch_size = batch_size self.train_percent = train_p #Define new ReLU activation function def relu(x): return T.switch(x < 0, 0, x) if activation_function == 'relu': self.activation_function = relu elif activation_function == 'tanh': self.activation_function = T.tanh elif activation_function == 'sigmoid': self.activation_function = T.nnet.sigmoid self.output_size = output_size self.hidden_layer_sizes = n_hidden_units self.n_epochs = n_epochs self.momentum = momentum self.momentum_term = momentum_term def train(self, x_input, y_input): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: /datasets/ACEInhibitors_processed.csv """ index = T.lscalar('index') # index to a [mini]batch train_size = x_input.shape[0] * self.train_percent max_size = x_input.shape[0] - (x_input.shape[0] % 10) train_set_x = x_input[:train_size, :] train_set_y = y_input[:train_size] valid_set_x = x_input[(train_size + 1 ):max_size, :] valid_set_y = y_input[(train_size + 1):max_size] #compute number of minibatches for training, validation and testing n_train_batches = int(train_set_x.shape[0] / self.batch_size) n_valid_batches = int(valid_set_x.shape[0] / self.batch_size) # n_test_batches = int(test_set_x.shape[0] / batch_size) number_in = train_set_x.shape[1] valid_set_x = theano.shared(valid_set_x, 'valid_set_x') valid_set_y = theano.shared(valid_set_y, 'valid_set_y') train_set_x = theano.shared(train_set_x, 'train_set_x') train_set_y = theano.shared(train_set_y, 'train_set_y') # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically self.mlp = MLP( rng= numpy.random.RandomState(), input=self.x, n_in = number_in, n_out=self.output_size, a_function = self.activation_function, n_hidden_sizes=self.hidden_layer_sizes, dropout=self.dropout, dropout_rate=self.dropout_rate ) cost = ( self.mlp.negative_log_likelihood(self.y) + self.L1_reg * self.mlp.L1 + self.L2_reg * self.mlp.L2_sqr ) # end-snippet-4 # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch validate_model = theano.function( inputs=[index], outputs=self.mlp.errors(self.y), givens={ self.x: valid_set_x[index * self.batch_size:(index + 1) * self.batch_size], self.y: valid_set_y[index * self.batch_size:(index + 1) * self.batch_size] } ) training_errors = theano.function( inputs=[index], outputs=self.mlp.errors(self.y), givens={ self.x: train_set_x[index * self.batch_size:(index + 1) * self.batch_size], self.y: train_set_y[index * self.batch_size:(index + 1) * self.batch_size] } ) # start-snippet-5 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams parameter_gradients = [T.grad(cost, param) for param in self.mlp.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs updates = [] if self.momentum: delta_before=[] for param_i in self.mlp.params: delta_before_i=theano.shared(value=numpy.zeros(param_i.get_value().shape)) delta_before.append(delta_before_i) for param, parameter_gradients, delta_before_i in zip(self.mlp.params, parameter_gradients, delta_before): delta_i = -self.learning_rate * parameter_gradients + self.momentum_term*delta_before_i updates.append((param, param + delta_i)) updates.append((delta_before_i,delta_i)) else: for param, parameter_gradients in zip(self.mlp.params, parameter_gradients): updates.append((param, param - self.learning_rate * parameter_gradients)) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ self.x: train_set_x[index * self.batch_size: (index + 1) * self.batch_size], self.y: train_set_y[index * self.batch_size: (index + 1) * self.batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 1000000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < self.n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%, cost %f' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100., cost ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))) def setup_labels(self, y): assert "There is no need to relabel if n_classes < 2 ", y < 2 negative_example_label = 2 #Transform matrices and relabel them for the neural network for i, yi in enumerate(y): if i > 0: negative_example_label = negative_example_label+2 positive_example_label = negative_example_label+1 relabeled_y = yi relabeled_y[relabeled_y == 0] = negative_example_label relabeled_y[relabeled_y == 1] = positive_example_label if i == 0: neural_net_y = relabeled_y else: neural_net_y = numpy.hstack((neural_net_y, relabeled_y)) neural_net_y = numpy.intc(neural_net_y) return neural_net_y def test(self, x, y): prediction = self.predict(x) f1 = f1_score(y, prediction) precision = precision_score(y, prediction) recall = recall_score(y, prediction) auc = roc_auc_score(y, prediction) accuracy = accuracy_score(y, prediction) self.metrics["F1"] = f1 self.metrics["Precision"] = precision self.metrics["Recall"] = recall self.metrics["AUC"] = auc self.metrics["Accuracy"] = accuracy def predict(self, x): #Create a theano shared variable for the input x: the data to be predicted test_set_x = theano.shared(x, 'test_set_x') input = test_set_x #Iterate over all the hidden layers in the MLP for i_hidden_layer, hidden_layer in enumerate(self.mlp.hidden_layers): hl_W = hidden_layer.W hl_b = hidden_layer.b if self.dropout: hl_W *= self.dropout_rate weight_matrix = self.activation_function(T.dot(input, hl_W) + hl_b) #Multiply the weights by the expected value of the dropout which is just the #dropoutrate so in most cases half the weights but only at test time input = weight_matrix #Get the weights and bias from the softmax output layer W = self.mlp.logRegressionLayer.W b = self.mlp.logRegressionLayer.b #compile the thenao function for calculating the outputs from the softmax layer get_y_prediction = theano.function( inputs=[], outputs=T.argmax(T.nnet.softmax(T.dot(weight_matrix, W) + b), axis=1), on_unused_input='ignore', ) return get_y_prediction() def transfer_learned_weights(self, x): a_function = self.activation_function final_hidden_layer = self.mlp.hidden_layers[-1] w = final_hidden_layer.W b = final_hidden_layer.b if self.dropout: transformation_function = theano.function( inputs=[], outputs=a_function(T.dot(x, (w * self.dropout_rate)) + b), on_unused_input='ignore', ) else: transformation_function = theano.function( inputs=[], outputs=a_function(T.dot(x, w) + b), on_unused_input='ignore', ) return transformation_function() def __str__(self): return "MLP:\nF1 Score: {}\nPrecision: {}\n" \ "Recall: {}\nAccuracy: {}\nROC: {}\n".format(self.metrics['F1'], self.metrics['Precision'], self.metrics['Recall'], self.metrics['Accuracy'], self.metrics['AUC'])
valid_labels, clf.predict(valid_dataset) ) logging.info('Epoch [%d] Cost %f, Validation Accurary %.2f%%'%(iepoch, cost, valid_acc * 100)) if epoch_costs and iepoch > 0 and iepoch % model_commit_freq == 0: commit_model(clf) end_time = time.time() logging.info('Model training completed in %.0fs'%(end_time - start_time)) plt.plot(np.arange(0, num_epochs), avg_costs) plt.show() commit_model(clf) return clf = load_model() if clf is None: clf = MLP(beta=beta, n_in=image_size * image_size, n_hidden=1024, num_hidden_layers=num_hidden_layers, n_out=num_labels, activation=theano.tensor.nnet.sigmoid) train_model(clf) elif force_training: logging.info('force_training is set. Model will be retrained') train_model(clf) acc = accuracy_score(test_labels, clf.predict(test_dataset)) * 100 logging.info('Model accuracy %.2f%%'%acc)
from MLP import MLP from datasetmanager import download_extract_randomize_save, get_and_reformat_all_datasets # ===== MAIN ===== # DOWNLOAD DATASETS # download_extract_randomize_save() # DEFINE MLP """ With those parameters, I get 91.7% of accuracy """ image_size = 28 num_labels = 10 network_shape = [image_size * image_size,600,300,150,num_labels] initial_learning_rate = 0.0001 decay_steps = 0 decay_rate = 0 regularization_parameter = 0.0 dropout_keep_prob = 0.5 mlp = MLP(network_shape, initial_learning_rate, decay_steps, decay_rate, regularization_parameter, dropout_keep_prob) # GET AND REFORMAT ALL DATASETS train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels = get_and_reformat_all_datasets() print('Training set', train_dataset.shape, train_labels.shape) print('Validation set', valid_dataset.shape, valid_labels.shape) print('Test set', test_dataset.shape, test_labels.shape) # RUN TRAINING batch_size = 150 num_epochs = 3 num_steps = len(train_dataset)/batch_size * num_epochs print('Steps : ', num_steps) mlp.train(train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels, batch_size, num_steps)
def flush(self): self.log.flush() self.terminal.flush() pass sys.stdout = Logger() train_count = 60000 train_labels = MNIST.label(*range(train_count), setname='train', decode=True) train_samples = MNIST.image(*range(train_count), setname='train', flat=True, normalize=True) test_count = 9000 test_labels = MNIST.label(*range(test_count), setname='test', decode=True) test_samples = MNIST.image(*range(test_count), setname='test', flat=True, normalize=True) mlp = MLP([784, 30, 10]) epoch_count = 10 train_errors = np.zeros(epoch_count) test_errors = np.zeros(epoch_count) begin_time = time.time() print(time.strftime("Began training at %Y%m%d-%H%M%S")) print() for epoch_idx in range(1, epoch_count+1): mlp.train(train_samples, train_labels, epochs=1, block_size=1, learn_rate=1/epoch_idx) train_errors[epoch_idx-1] = mlp.validate(train_samples, train_labels) #training error test_errors[epoch_idx-1] = mlp.validate(test_samples, test_labels) #test error print("Epoch %d done at " % (epoch_idx) + time.strftime("%Y%m%d-%H%M%S")) print("Training Accuracy: %.2f" % (train_errors[epoch_idx-1]))