def load_pretrain_weights(self): """Loading weights from trained MLP model & GMF model""" config = self.config mlp_model = MLP(config) device_id = -1 if config['use_cuda'] is True: mlp_model.cuda() device_id = config['device_id'] resume_checkpoint(mlp_model, model_dir=config['pretrain_mlp'], device_id=device_id) self.embedding_account_mlp.weight.data = mlp_model.embedding_account.weight.data self.embedding_location_mlp.weight.data = mlp_model.embedding_location.weight.data for idx in range(len(self.fc_layers)): self.fc_layers[idx].weight.data = mlp_model.fc_layers[ idx].weight.data config['latent_dim'] = config['latent_dim_mf'] gmf_model = GMF(config) if config['use_cuda'] is True: gmf_model.cuda() resume_checkpoint(gmf_model, model_dir=config['pretrain_mf'], device_id=device_id) self.embedding_account_mf.weight.data = gmf_model.embedding_account.weight.data self.embedding_location_mf.weight.data = gmf_model.embedding_location.weight.data self.embedding_account_mlp.require = False self.embedding_location_mlp.require = False self.embedding_account_mf.require = False self.embedding_location_mf.require = False
def create_brain(): topology = [24,48,24,12,1] brain = MLP(topology) brain = load_training('data/train.csv', brain) brain.saveNetwork() return brain
def wine_test(eta=0.1, alpha=0, max_iter=500, train_size=0.7): for file in os.listdir("datasets"): if (file.endswith('.data')): print('\nfile: ', file) # Aqui estamos fazendo o pré-processamento do Dataset 'wine' data = open("datasets/" + file).read() X = dados_in(data) X = np.array(X) X = X.astype(np.float) Y = X[:, 0] X = X[:, 1:X.shape[1]] # Normalizando X for i in range(X.shape[1]): X[:, i] = (X[:, i] - np.amin(X[:, i])) / (np.amax(X[:, i]) - np.amin(X[:, i])) # Binarizando as classes output Y = class_ind(Y) print('Processamento do wine') mlp = MLP() return mlp.run(X, Y, 'C', alpha=alpha, max_iter=max_iter, eta=eta, train_size=train_size)
def music_geo_test(eta=0.1, alpha=0.5, max_iter=500, train_size=0.7): #track_testes mlp = MLP() for file in os.listdir("datasets"): if (file.endswith('.txt')): print('\nfile: ', file) # Aqui vamos fazer um processamento inicial do aquivo Music data = open('datasets/' + file).read() X = dados_in(data) #matrix X = np.array(X) X = X.astype(np.float) Y = X[:, X.shape[1] - 2:X.shape[1]] X = X[:, 0:X.shape[1] - 2] for i in range(X.shape[1]): X[:, i] = (X[:, i] - np.amin(X[:, i])) / \ (np.amax(X[:, i]) - np.amin(X[:, i])) for i in range(Y.shape[1]): Y[:, i] = (Y[:, i] - np.amin(Y[:, i])) / \ (np.amax(Y[:, i]) - np.amin(Y[:, i])) print('Processando do Music ') return mlp.run(X, Y, 'R', alpha=alpha, max_iter=max_iter, eta=eta, train_size=train_size)
def __init__(self, dim_x, dim_y, embed_size=16, hidden_layer_size=96): super(NewRRN, self).__init__() self.max_digit = dim_x * dim_y self.embed_size = embed_size self.hidden_layer_size = hidden_layer_size self.edges = rrn.determine_edges(dim_x, dim_y) self.embed_layer = nn.Linear(self.max_digit + 1, self.embed_size) self.input_mlp = MLP([ self.embed_size, self.hidden_layer_size, self.hidden_layer_size, self.hidden_layer_size ]) self.f = MLP([ 2 * self.hidden_layer_size, self.hidden_layer_size, self.hidden_layer_size, self.hidden_layer_size ]) self.g_mlp = MLP([ 2 * self.hidden_layer_size, self.hidden_layer_size, self.hidden_layer_size, self.hidden_layer_size ]) self.g_lstm = nn.LSTM(self.hidden_layer_size, self.hidden_layer_size) self.r = MLP([ self.hidden_layer_size, self.hidden_layer_size, self.hidden_layer_size, self.max_digit + 1 ])
def main(): """ .. todo:: * TODO: Make stratified train/test split * TODO: Stochastic gradien descent and mini batch * TODO: Adam solver * TODO: Learning rate change during training """ name_of_labels = get_dict_labels() train_data, train_labels, test_data, test_labels = get_train_and_test() show_example_image(train_data, train_labels, name_of_labels) mlp = MLP(verbose=False, restore=True) params_values, cost_history, accuracy_history = mlp.train( np.transpose(train_data), train_labels, epochs=100, learning_rate=0.03) plt.plot(accuracy_history) plt.ylabel('acc') plt.xlabel('epochs') plt.show() plt.plot(cost_history) plt.ylabel('loss') plt.xlabel('epochs') plt.show() acc = mlp.test(np.transpose(test_data), test_labels) print(acc)
def __init__(self, input_dim, hidden_dim, num_layers, output_dim, window_size, gpu=False): super(Cnn, self).__init__() self.input_dim = input_dim self.hidden_dim = hidden_dim self.num_layers = num_layers self.output_dim = output_dim self.window_size = window_size self.gpu = gpu if not num_layers <= 1: self.cnn = \ Conv1d(input_dim, hidden_dim, window_size) self.mlp = \ MLP(hidden_dim, hidden_dim, num_layers - 1, output_dim) else: self.cnn = \ Conv1d(input_dim, output_dim, window_size) self.mlp = None
def __init__(self, window_size, num_cnn_layers, cnn_hidden_dim, num_mlp_layers, mlp_hidden_dim, num_classes, embeddings, pooling=max_pool_seq, gpu=False): super(PooledCnnClassifier, self).__init__() self.window_size = window_size self.hidden_dim = cnn_hidden_dim self.num_cnn_layers = num_cnn_layers self.num_mlp_layers = num_mlp_layers self.num_classes = num_classes self.embeddings = embeddings self.pooling = pooling self.cnn = \ Cnn(len(embeddings[0]), cnn_hidden_dim, num_cnn_layers, cnn_hidden_dim, window_size, gpu=gpu) self.mlp = \ MLP(cnn_hidden_dim, mlp_hidden_dim, num_mlp_layers, num_classes) self.to_cuda = to_cuda(gpu) print("# params:", sum(p.nelement() for p in self.parameters()))
def __init__(self, dim_x, dim_y, embed_size=16, hidden_layer_size=96): super(RRN, self).__init__() self.max_digit = dim_x * dim_y self.embed_size = embed_size self.hidden_layer_size = hidden_layer_size self.edges = sudoku_model_utils.determine_edges(dim_x, dim_y) self.embed_layer = nn.Embedding(self.max_digit + 1, self.embed_size) self.input_mlp = MLP([self.embed_size, self.hidden_layer_size, self.hidden_layer_size, self.hidden_layer_size]) self.f = MLP([2 * self.hidden_layer_size, self.hidden_layer_size, self.hidden_layer_size, self.hidden_layer_size]) self.g_mlp = MLP([2 * self.hidden_layer_size, self.hidden_layer_size, self.hidden_layer_size, self.hidden_layer_size]) self.g_lstm = nn.LSTM(self.hidden_layer_size, self.hidden_layer_size) self.r = MLP([self.hidden_layer_size, self.hidden_layer_size, self.hidden_layer_size, self.max_digit])
def language_dependent(dataset, classification_threshold, report_name, mlp_params, hidden_layer_sizes=None): csv_file = '{}/{}/{}_complete.csv'.format(FEATURES, dataset, dataset) experiment = "baseline" if hidden_layer_sizes == None else "baseline_200" report_path = get_report_path(experiment, dataset, mlp_params["test_size"], mlp_params["alpha"], mlp_params["max_iter"], mlp_params["activation"], mlp_params["solver"]) if report_already_exists(report_path): return csv = pd.read_csv(csv_file, sep=";") # hidden layer sizes is features + 1 # csv.columns is features + 1 (features + label) if hidden_layer_sizes == None: hidden_layer_sizes = (len(csv.columns)) mlp = MLP(hidden_layer_sizes, mlp_params=mlp_params, dataset=dataset, experiment=experiment) train, test = mlp.split_train_test(csv, test_size=mlp_params["test_size"]) fit_and_score(mlp, train, test, classification_threshold, report_path, report_name)
def __init__(self, input_size=1, output_size=1, hidden_units=4, activations=['tanh', 'tanh'], learning_rate=0.01, max_epoch=200, random_state=0, feedback_coef=1): """ Constructor of Jordan class :param input_size: number of features :param output_size: size of output vector for a sample :param hidden_units: number of hidden layer units :param activations: activation functions for different layers :param learning_rate: learning rate :param max_epoch: :param random_state: :param feedback_coef: coefficient of feedback signal """ # create MLP n = [input_size + 1, hidden_units, output_size] self.mlp = MLP(n=n, activations=activations, type_of_cost='MSE', learning_rate=learning_rate, max_epoch=max_epoch, mode='stochastic', random_state=random_state) self.__max_epochs = max_epoch self.feedback_coef = feedback_coef self.shape = [input_size, hidden_units, output_size] self.activations = activations
def __init__(self, input_dim, hidden_dim, layers, dropout, device): ''' num_layers: number of layers in the neural networks (INCLUDING the input layer) input_dim: dimensionality of input features hidden_dim: dimensionality of hidden units at ALL layers dropout: dropout ratio on the final linear layer device: which device to use ''' super(GraphCNN, self).__init__() self.device = device self.hidden_dim = hidden_dim ### List of MLPs self.layers = layers self.mlps = nn.ModuleList() self.res = nn.ModuleList() for layer in range(self.layers): if layer == 0: self.mlps.append(MLP(input_dim, hidden_dim)) self.res.append(RES(input_dim, hidden_dim)) else: self.mlps.append(MLP(hidden_dim, hidden_dim)) self.res.append(RES(hidden_dim, hidden_dim)) self.dropout = dropout
def testMLP(self): ''' Using MLP of one hidden layer and one softmax layer ''' conf_filename = './snippet_mlp.conf' start_time = time.time() configer = MLPConfiger(conf_filename) mlpnet = MLP(configer, verbose=True) end_time = time.time() pprint('Time used to build the architecture of MLP: %f seconds' % (end_time - start_time)) # Training start_time = time.time() for i in xrange(configer.nepoch): cost, accuracy = mlpnet.train(self.snippet_train_set, self.snippet_train_label) pprint('epoch %d, cost = %f, accuracy = %f' % (i, cost, accuracy)) end_time = time.time() pprint( 'Time used for training MLP network on Snippet task: %f minutes' % ((end_time - start_time) / 60)) # Test test_size = self.snippet_test_label.shape[0] prediction = mlpnet.predict(self.snippet_test_set) accuracy = np.sum( prediction == self.snippet_test_label) / float(test_size) pprint('Test accuracy: %f' % accuracy)
def testMLP(self): ''' Sentiment analysis task for sentence representation using MLP, with one hidden layer and one softmax layer. ''' conf_filename = './sentiment_mlp.conf' start_time = time.time() configer = MLPConfiger(conf_filename) mlpnet = MLP(configer, verbose=True) end_time = time.time() pprint('Time used to build the architecture of MLP: %f seconds.' % (end_time - start_time)) # Training start_time = time.time() for i in xrange(configer.nepoch): rate = 2.0 / ((1.0 + i / 500)**2) cost, accuracy = mlpnet.train(self.senti_train_set, self.senti_train_label, rate) pprint('epoch %d, cost = %f, accuracy = %f' % (i, cost, accuracy)) end_time = time.time() pprint( 'Time used for training MLP network on Sentiment analysis task: %f minutes.' % ((end_time - start_time) / 60)) # Test prediction = mlpnet.predict(self.senti_test_set) accuracy = np.sum(prediction == self.senti_test_label) / float( self.test_size) pprint('Test accuracy: %f' % accuracy)
def __init__(self, args, num_users, num_items): BaseModel.__init__(self, args, num_users, num_items) self.layers = eval(args.layers) self.lambda_layers = eval(args.reg_layers) self.num_factors = args.num_factors self.model_GMF = GMF(args, num_users, num_items) self.model_MLP = MLP(args, num_users, num_items)
def load_pretrain_weights(self): """Loading weights from trained MLP model & GMF model""" config = self.config config['latent_dim'] = config['latent_dim_mlp'] mlp_model = MLP(config) if config['use_cuda'] is True: mlp_model.cuda() resume_checkpoint(mlp_model, model_dir=config['pretrain_mlp'], device_id=config['device_id']) self.embedding_user_mlp.weight.data = mlp_model.embedding_user.weight.data self.embedding_item_mlp.weight.data = mlp_model.embedding_item.weight.data for idx in range(len(self.fc_layers)): self.fc_layers[idx].weight.data = mlp_model.fc_layers[ idx].weight.data config['latent_dim'] = config['latent_dim_mf'] gmf_model = GMF(config) if config['use_cuda'] is True: gmf_model.cuda() resume_checkpoint(gmf_model, model_dir=config['pretrain_mf'], device_id=config['device_id']) self.embedding_user_mf.weight.data = gmf_model.embedding_user.weight.data self.embedding_item_mf.weight.data = gmf_model.embedding_item.weight.data self.affine_output.weight.data = 0.5 * torch.cat([ mlp_model.affine_output.weight.data, gmf_model.affine_output.weight.data ], dim=-1) self.affine_output.bias.data = 0.5 * ( mlp_model.affine_output.bias.data + gmf_model.affine_output.bias.data)
class DBN(object): def __init__(self, layers, n_labels): self.rbms = [] self.n_labels = n_labels for n_v, n_h in zip(layers[:-1], layers[1:]): self.rbms.append(RBM(n_v, n_h, epochs=10, lr=0.1)) self.mlp = MLP(act_type='Sigmoid', opt_type='Adam', layers=layers + [n_labels], epochs=20, learning_rate=0.01, lmbda=1e-2) def pretrain(self, x): v = x for rbm in self.rbms: rbm.fit(v) v = rbm.marginal_h(v) def finetuning(self, x, labels): # assign weights self.mlp.w = [rbm.w for rbm in self.rbms] + \ [np.random.randn(self.rbms[-1].w.shape[1], self.n_labels)] self.mlp.b = [rbm.b for rbm in self.rbms] + \ [np.random.randn(1, self.n_labels)] self.mlp.fit(x, labels) def fit(self, x, y): self.pretrain(x) self.finetuning(x, y) def predict(self, x): return self.mlp.predict(x)
def __init__(self, num_layers, num_mlp_layers, input_dim, hidden_dim, output_dim, final_drop_out, learn_eps, neighbor_aggregating_type, graph_pooling_type, device): super(GraphIsomorphismNetwork, self).__init__() self.num_layers = num_layers self.final_drop_out = final_drop_out self.neighbor_aggregating_type = neighbor_aggregating_type self.graph_pooling_type = graph_pooling_type self.learn_eps = learn_eps self.device = device self.mlps = nn.ModuleList() self.batch_norms = torch.nn.ModuleList() self.eps = nn.Parameter(torch.zeros(num_layers - 1)) for layer in range(num_layers - 1): if layer == 0: self.mlps.append( MLP(num_mlp_layers, input_dim, hidden_dim, hidden_dim)) else: self.mlps.append( MLP(num_mlp_layers, hidden_dim, hidden_dim, hidden_dim)) self.batch_norms.append(nn.BatchNorm1d(hidden_dim)) self.linears = nn.ModuleList() for layer in range(num_layers): if layer == 0: self.linears.append(nn.Linear(input_dim, output_dim)) else: self.linears.append(nn.Linear(hidden_dim, output_dim))
def __init__(self, env, optim=Adam, policy_lr=0.001, value_lr=0.001, policy_hidden_size=[32], value_hidden_size=[32], gamma=0.95, policy_lambda=0.9, value_lambda=0.9, batch_size=3000, epochs=15, update_every=50, render=False): self.env = env self.batch_size = batch_size self.render = render self.epochs = epochs self.gamma = gamma self.policy_lambda = policy_lambda self.value_lambda = value_lambda self.update_every = update_every self.writer_count = 0 obs_size = env.obs_space_size action_size = env.action_space_size self.policy_mlp = CategoricalMLP([obs_size] + policy_hidden_size + [action_size]) self.policy_optim = optim(self.policy_mlp.parameters(), lr=policy_lr) self.value_mlp = MLP([obs_size] + value_hidden_size + [1]) self.value_optim = optim(self.value_mlp.parameters(), lr=value_lr)
def __init__(self, config): self.config = config # model configuration self.model = MLP(config) if config['use_cuda'] is True: self.model.cuda() self.opt = use_optimizer(self.model, config) self.crit = torch.nn.MSELoss()
def __init__(self, input_shape, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, learning_rate=3e-4, batch_size=1000): self.input_shape = input_shape self.hidden_sizes = hidden_sizes self.learning_rate = learning_rate self.batch_size = batch_size self.sess = None with tf.variable_scope("mlp_fitting"): self.mlp = MLP(input_shape=input_shape, output_size=1, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=None, name='value') self.x = self.mlp.get_input_layer() self.y = tf.reshape(self.mlp.get_output_layer(), shape=(-1,)) self.params = self.mlp.get_params() self.z = tf.placeholder(dtype=tf.float32, shape=(None,), name='z') loss = tf.reduce_mean(tf.square(self.z - self.y)) self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(loss, var_list=self.params)
def test_XOR(self): mlp = MLP(dims =[2, 5, 1], eta = 0.1, activation = 'sigmoid', max_epochs=4000, alpha=0.55) X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) T = np.array([[0], [1], [1], [0]]) mlp.fit(X, T) ## VISUALISATION ## X = np.linspace(-0.5, 1.5, 100) Y = np.linspace(-0.5, 1.5, 100) X, Y = np.meshgrid(X, Y) def F(x,y): return mlp.predict(np.array([[x,y]])) Z = np.vectorize(F)(X,Y) plt.pcolor(X,Y,Z, cmap='RdBu') plt.colorbar() cntr = plt.contour(X,Y,Z, levels = [0.5]) plt.clabel(cntr, inline=1, fontsize=10) plt.scatter([0,1], [0,1], s = 500, c = 'r') plt.scatter([1,0], [0,1], s = 500, marker = 'v') plt.grid() plt.show() ################### prediction = mlp.predict(X) self.assertTrue(np.all( (prediction > 0.5) == T))
def __init__(self, input_shape, output_size, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh): self.input_shape = input_shape self.output_size = output_size self.hidden_sizes = hidden_sizes self.locals = locals() self.distribution = Categorical(output_size) self.params = [] with tf.variable_scope("policy"): # Mean network self.prob_mlp = MLP(input_shape=input_shape, output_size=output_size, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=tf.nn.softmax, name='prob') self.x = self.prob_mlp.get_input_layer() self.prob = self.prob_mlp.get_output_layer() self.params += self.prob_mlp.get_params()
def _perform(self): mlp = MLP(self.params[1]) train_res = mlp.train(self.X_train, self.y_train, self.params[2], self.params[3]) return (mlp, train_res)
def __init__(self, config): self.config = config # model configuration self.share_layer_A = torch.nn.Linear(config['latent_dim'], config['latent_dim']) self.share_layer_B = torch.nn.Linear(config['latent_dim'], config['latent_dim']) self.metric_layer_A = torch.nn.Linear(config['latent_dim'], config['latent_dim']) self.metric_layer_B = torch.nn.Linear(config['latent_dim'], config['latent_dim']) self.modelA = MLP(config) self.modelB = MLP(config) self.sharelayer = ShareLayer(config) if config['use_cuda'] is True: self.modelA.cuda() self.modelB.cuda() self.sharelayer.cuda() self.optA = use_optimizer(self.modelA, config) self.optB = use_optimizer(self.modelB, config) self.optshare = torch.optim.SGD(self.sharelayer.parameters(), lr=1e-1) self.optmetric_A = torch.optim.SGD(self.metric_layer_A.parameters(), lr=1e-1) self.optmetric_B = torch.optim.SGD(self.metric_layer_B.parameters(), lr=1e-1) self.crit = torch.nn.MSELoss()
def set_params(self): self.params = OrderedDict() if self.prior is None: self.prior = Binomial(self.dim_h) if self.posterior is None: self.posterior = MLP(self.dim_in, self.dim_h, dim_hs=[], rng=self.rng, trng=self.trng, distribution='binomial') elif isinstance(self.posterior, DARN): raise ValueError('DARN posterior not supported ATM') if self.conditional is None: self.conditional = MLP(self.dim_h, self.dim_in, dim_hs=[], rng=self.rng, trng=self.trng, distribution='binomial') self.posterior.name = self.name + '_posterior' self.conditional.name = self.name + '_conditional'
def create_brain(): topology = [24, 48, 24, 12, 1] brain = MLP(topology) brain = load_training('data/train.csv', brain) brain.saveNetwork() return brain
def main(): # prepare sample data and target variable X, y = load_digits(return_X_y=True) # split sample data into training data and test data and standardize them X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y) sc = StandardScaler().fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) # compare performance of MLP classifiers with different parameters classifiers = [ MLP(n_hidden=100, l2=0.01, epochs=200, eta=0.0005, minibatch_size=100, shuffle=True, seed=1), MLP(n_hidden=100, l2=0.01, epochs=200, eta=0.01, minibatch_size=100, shuffle=True, seed=1), MLP(n_hidden=100, l2=1.0, epochs=200, eta=0.0005, minibatch_size=100, shuffle=True, seed=1), MLP(n_hidden=10, l2=0.01, epochs=200, eta=0.0005, minibatch_size=100, shuffle=True, seed=1) ] for classifier in classifiers: # fit classifier classifier.fit(X_train_std, y_train) # show accuracy y_pred = classifier.predict(X_test_std) print('test accuracy: {}'.format(accuracy_score(y_test, y_pred))) # show some misclassified images indices = (y_test != y_pred) show_images(X_test[indices], y_test[indices], y_pred[indices]) # show learning history show_learning_history(classifier)
def __init__(self, ndata=1000, n_hidden=10, L1_reg=0.00, L2_reg=0.0001): train_x, train_t, test_x, test_t = get_data() train_x = train_x[:ndata, :] train_t = train_t[:ndata] train_t = np.asarray(train_t, dtype="int32") self.L1_reg = L1_reg self.L2_reg = L2_reg print "range of target values: ", set(train_t) # allocate symbolic variables for the data. # Make it shared so it cab be passed only once x = theano.shared( value=train_x, name='x') # the data is presented as rasterized images t = theano.shared(value=train_t, name='t') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP(rng=rng, input=x, n_in=28 * 28, n_hidden=n_hidden, n_out=10) self.classifier = classifier # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(t) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] outputs = [cost] + gparams self.theano_cost_gradient = theano.function(inputs=(), outputs=outputs) # compute the errors applied to test set self.theano_testset_errors = theano.function( inputs=(), outputs=self.classifier.errors(t), givens={ x: test_x, t: test_t }) # res = get_gradient(train_x, train_t) # print "result" # print res # print "" self.nparams = sum([p.get_value().size for p in classifier.params]) self.param_sizes = [p.get_value().size for p in classifier.params] self.param_shapes = [p.get_value().shape for p in classifier.params]
def __init__(self, num_layers, num_mlp_layers, input_dim, hidden_dim, output_dim, final_dropout, learn_eps, graph_pooling_type, neighbor_pooling_type, random, node_classification, device): ''' num_layers: number of layers in the neural networks (INCLUDING the input layer) num_mlp_layers: number of layers in mlps (EXCLUDING the input layer) input_dim: dimensionality of input features hidden_dim: dimensionality of hidden units at ALL layers output_dim: number of classes for prediction final_dropout: dropout ratio on the final linear layer learn_eps: If True, learn epsilon to distinguish center nodes from neighboring nodes. If False, aggregate neighbors and center nodes altogether. neighbor_pooling_type: how to aggregate neighbors (mean, average, or max) graph_pooling_type: how to aggregate entire nodes in a graph (mean, average) device: which device to use ''' super(GraphCNN, self).__init__() if random: input_dim += 1 self.final_dropout = final_dropout self.device = device self.num_layers = num_layers self.graph_pooling_type = graph_pooling_type self.neighbor_pooling_type = neighbor_pooling_type self.learn_eps = learn_eps self.eps = nn.Parameter(torch.zeros(self.num_layers - 1)) self.random = random self.node_classification = node_classification ###List of MLPs self.mlps = torch.nn.ModuleList() ###List of batchnorms applied to the output of MLP (input of the final prediction linear layer) self.batch_norms = torch.nn.ModuleList() for layer in range(self.num_layers - 1): if layer == 0: self.mlps.append( MLP(num_mlp_layers, input_dim, hidden_dim, hidden_dim)) else: self.mlps.append( MLP(num_mlp_layers, hidden_dim, hidden_dim, hidden_dim)) self.batch_norms.append(nn.BatchNorm1d(hidden_dim)) #Linear function that maps the hidden representation at dofferemt layers into a prediction score self.linears_prediction = torch.nn.ModuleList() for layer in range(num_layers): if layer == 0: self.linears_prediction.append(nn.Linear( input_dim, output_dim)) else: self.linears_prediction.append( nn.Linear(hidden_dim, output_dim)) #! additional linear layer self.fc1 = nn.Linear(hidden_dim, output_dim)
def train_ceae(dataloader, **kwargs): """ :param s_dataloaders: :param t_dataloaders: :param kwargs: :return: """ p_autoencoder = CEAE(input_dim=kwargs['p_input_dim'], latent_dim=50).to(kwargs['device']) t_autoencoder = CEAE(input_dim=kwargs['t_input_dim'], latent_dim=50).to(kwargs['device']) # construct transmitter transmitter = MLP(input_dim=50, output_dim=50, hidden_dims=[50]).to(kwargs['device']) ae_eval_train_history = defaultdict(list) ae_eval_test_history = defaultdict(list) ceae_params = [ p_autoencoder.parameters(), t_autoencoder.parameters(), transmitter.parameters() ] ceae_optimizer = torch.optim.AdamW(chain(*ceae_params), lr=kwargs['lr']) # start autoencoder pretraining for epoch in range(int(kwargs['train_num_epochs'])): for step, batch in enumerate(dataloader): ae_eval_train_history = ceae_train_step( p_ae=p_autoencoder, t_ae=t_autoencoder, transmitter=transmitter, batch=batch, device=kwargs['device'], optimizer=ceae_optimizer, history=ae_eval_train_history) if epoch % 50 == 0: print(f'----CE Autoencoder Training Epoch {epoch} ----') torch.save( p_autoencoder.encoder.state_dict(), os.path.join(kwargs['model_save_folder'], f'train_epoch_{epoch}_p_encoder.pt')) torch.save( t_autoencoder.encoder.state_dict(), os.path.join(kwargs['model_save_folder'], f'train_epoch_{epoch}_t_encoder.pt')) torch.save( transmitter.state_dict(), os.path.join(kwargs['model_save_folder'], f'train_epoch_{epoch}_transmitter.pt')) encoder = EncoderDecoder(encoder=t_autoencoder.encoder, decoder=transmitter).to(kwargs['device']) # # torch.save(encoder.state_dict(), # os.path.join(kwargs['model_save_folder'], f'train_epoch_{epoch}_encoder.pt')) return encoder, (ae_eval_train_history, ae_eval_test_history)
def train(self, X, Y, learning_rate=0.1, n_epochs=100, report_frequency=10, lambda_l2=0.0): self.report_frequency = report_frequency # allocate symbolic variables for the data x = T.matrix('x') y = T.matrix('y') # put the data in shared memory self.shared_x = theano.shared(numpy.asarray(X, dtype=theano.config.floatX)) self.shared_y = theano.shared(numpy.asarray(Y, dtype=theano.config.floatX)) rng = numpy.random.RandomState(1234) # initialize the mlp mlp = MLP(rng=rng, input=x, n_in=self.n_in, n_out=self.n_out, n_hidden=self.n_hidden, activation=self.activation) # define the cost function, possibly with regularizing term if lambda_l2>0.0: cost = mlp.cost(y) + lambda_l2*mlp.l2 else: cost = mlp.cost(y) # compute the gradient of cost with respect to theta (stored in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in mlp.params] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(mlp.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # at the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[], outputs=cost, updates=updates, givens={ x: self.shared_x, y: self.shared_y } ) #define function that returns model prediction self.predict_model = theano.function( inputs=[mlp.input], outputs=mlp.y_pred) ############### # TRAIN MODEL # ############### epoch = 0 while (epoch < n_epochs): epoch = epoch + 1 epoch_cost = train_model() if epoch % self.report_frequency == 0: print("epoch: %d cost: %f" % (epoch, epoch_cost))
def fit_model(self, X, Y, num_classes): if self.modeltype == "mlp": classifier = MLP(self.input_size, self.hidden_sizes, num_classes) else: classifier = RNN(self.input_size, self.hidden_size, num_classes) train_func = classifier.get_train_func(self.learning_rate) for num_iter in range(self.max_iter): for x, y in zip(X, Y): train_func(x, y) return classifier
def load_nn_dwl(paramFileName): paramList = numpy.load(open(paramFileName, 'r')) W1, b1, W2, b2 = paramList['arr_0'] n_input = len(W1) n_hidden = len(W2) n_out = len(W2[0]) x = T.matrix('x') rng = numpy.random.RandomState(1234) classifier = MLP(rng=rng, input=x, n_in=n_input, n_hidden=n_hidden, n_out=n_out) classifier.load_model_params(paramList['arr_0']) return classifier
def __init__(self, n_ins, hidden_layers_sizes, n_outs, numpy_rng=None, theano_rng=None): MLP.__init__(self, n_ins, hidden_layers_sizes, n_outs, numpy_rng, theano_rng) # labels (used for minibatch sgd during RL) self.y = T.vector('y') # actions (for each label, there is a corresponding # number here representing the ouput node value that # it should be compared to during SGD self.a = T.ivector('a') # The training error self.training_cost = T.sum(T.sqr(self.outLayer.output[T.arange(self.a.shape[0]),self.a] - self.y))
def main(): dataset = [((0, 0), (0, 1)), ((0, 1), (1, 0)), ((1, 0), (1, 0)), ((1, 1), (0, 1))] #dtanh = lambda o: 1 - o ** 2 dsigm = lambda o: o * (1 - o) activation_functions = (np.vectorize(sigmoid), np.vectorize(sigmoid)) #activation_functions = (np.tanh, np.tanh) derivation_functions = (np.vectorize(dsigm), np.vectorize(dsigm)) #derivation_functions = (np.vectorize(dtanh), np.vectorize(dtanh)) m = MLP((2, 3, 2), activation_functions, derivation_functions) m.train(dataset, epsilon=0, alpha=0.9, eta=.25, epochs=2500) for i in range(len(dataset)): o = m.feedForward(dataset[i][0]) print(i, dataset[i][0], encode(o.argmax(), len(o)), ' (expected ', dataset[i][1], ')')
def setUp(self): xor = MLP() xor.add_layer(Layer(2)) xor.add_layer(Layer(2)) xor.add_layer(Layer(1)) xor.init_network() xor.patterns = [([0, 0], [0]), ([0, 1], [1]), ([1, 0], [1]), ([1, 1], [0])] self.xor = xor
def test_xor(self): xor = MLP() xor.add_layer(Layer(2)) xor.add_layer(Layer(2)) xor.add_layer(Layer(1)) xor.init_network() xor_patterns = [ ([0, 0], [0]), ([0, 1], [1]), ([1, 0], [1]), ([1, 1], [0]), ] xor.train(xor_patterns) for inp, outp in xor_patterns: self.assertEqual(xor.run(inp), outp)
class CWS: def __init__(self, s): self.mlp = MLP(s['ne'], s['de'], s['win'], s['nh'], 4, s['L2_reg'], np.random.RandomState(s['seed'])) self.s = s def fit(self, lex, label): s = self.s n_sentences = len(lex) n_train = int(n_sentences * (1. - s['valid_size'])) s['clr'] = s['lr'] best_f = 0 for e in xrange(s['n_epochs']): shuffle([lex, label], s['seed']) train_lex, valid_lex = lex[:n_train], lex[n_train:] train_label, valid_label = label[:n_train], label[n_train:] tic = time.time() cost = 0 for i in xrange(n_train): if len(train_lex[i]) == 2: continue words = np.asarray(contextwin(train_lex[i], s['win']), dtype = 'int32') labels = [0] + train_label[i] + [0] y_pred = self.mlp.predict(words) cost += self.mlp.fit(words, [0]+y_pred, [0]+labels, s['clr']) self.mlp.normalize() if s['verbose']: print '[learning] epoch %i >> %2.2f%%' % (e+1, (i+1)*100./n_train), 'completed in %s << \r' % time_format(time.time() - tic), sys.stdout.flush() print '[learning] epoch %i >> cost = %f' % (e+1, cost / n_train), ', %s used' % time_format(time.time() - tic) pred_y = self.predict(valid_lex) p, r, f = evaluate(pred_y, valid_label) print ' P: %2.2f%% R: %2.2f%% F: %2.2f%%' % (p*100., r*100., f*100.) ''' if f > best_f: best_f = f self.save() ''' def predict(self, lex): s = self.s y = [self.mlp.predict(np.asarray(contextwin(x, s['win'])).astype('int32'))[1:-1] for x in lex] return y def save(self): if not os.path.exists('params'): os.mkdir('params') self.mlp.save() def load(self): self.mlp.load()
def main(): training, dev = get_data() window_size = 5 n_input = window_size n_hidden = 100 n_output = 1 A = 1 num_hidden_layers = 1 mlp = MLP(n_input, num_hidden_layers, n_hidden, n_output) n_epochs = 50 step = False l = loss(mlp, training, window_size, window_size/2) print "initial loss: " + str(l) for j in range(0, n_epochs): print "epoch " + str(j) random.shuffle(training) c = 0 for xs, y in training: if c == 10: break c += 1 if step: train(mlp, xs, y, window_size, window_size/2) else: train(mlp, xs, y, window_size, 1) if step: error(mlp, training, window_size, window_size/2) else: error(mlp, training, window_size, 1) if step: l = loss(mlp, training, window_size, window_size/2) else: l = loss(mlp, training, window_size, 1) print "loss: " + str(l) eta = A / float(j/float(n_epochs) + 1) mlp.eta = eta print "lr:", mlp.eta print "Getting Dev Accuracy..." if step: error(mlp, dev, window_size, window_size/2) else: error(mlp, dev, window_size, 1)
class MLP_VAD(object): def __init__(self, model_file): rng = np.random.RandomState(1234) self.x = T.matrix('x') self.classifier = MLP( rng=rng, input=self.x, n_in=200, n_hidden=180, n_out=2 ) self.classifier.load_model(model_file) def classify(self, fs, sig): if fs != SAMPLE_RATE: sig = downsample(fs, sig) num_samples = int(WINDOW_SIZE * SAMPLE_RATE) num_frames = len(sig)/num_samples sig = sig[0:num_frames*num_samples].reshape((num_frames, num_samples)) sig = sig * np.hamming(num_samples) spec = np.abs(np.fft.fft(sig)) # spectrum of signal shared_x = theano.shared(np.asarray(spec, dtype=theano.config.floatX), borrow=True) index = T.lscalar() # index to a [mini]batch predict_model = theano.function( inputs=[index], outputs=self.classifier.y_pred, givens={ self.x: shared_x[index:index + 1], } ) # classify each frame predicted_values = [predict_model(i)[0] for i in xrange(num_frames)] return np.asarray(predicted_values)
def __init__(self,input_size,output_size,n_hidden=500,learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,batch_size=20): self.learning_rate = learning_rate self.L1_reg = L1_reg self.L2_reg = L2_reg self.n_epochs = n_epochs self.batch_size=batch_size self.n_hidden = n_hidden self.x = T.matrix('x') self.mlp = MLP(input = self.x, n_in = input_size, \ n_hidden = n_hidden, n_out = output_size)
def testMLP(self): ''' Using MLP of one hidden layer and one softmax layer ''' conf_filename = './snippet_mlp.conf' start_time = time.time() configer = MLPConfiger(conf_filename) mlpnet = MLP(configer, verbose=True) end_time = time.time() pprint('Time used to build the architecture of MLP: %f seconds' % (end_time-start_time)) # Training start_time = time.time() for i in xrange(configer.nepoch): cost, accuracy = mlpnet.train(self.snippet_train_set, self.snippet_train_label) pprint('epoch %d, cost = %f, accuracy = %f' % (i, cost, accuracy)) end_time = time.time() pprint('Time used for training MLP network on Snippet task: %f minutes' % ((end_time-start_time)/60)) # Test test_size = self.snippet_test_label.shape[0] prediction = mlpnet.predict(self.snippet_test_set) accuracy = np.sum(prediction == self.snippet_test_label) / float(test_size) pprint('Test accuracy: %f' % accuracy)
def test_add_layer(self): a = MLP() with self.assertRaises(AssertionError): a.add_layer('') a.add_layer(Layer(1)) a.add_layer(Layer(2)) a.add_layer(Layer(3)) self.assertEqual(len(a.layers), 3) for l in a.layers: self.assertIsInstance(l, Layer)
def testMLP(self): ''' Sentiment analysis task for sentence representation using MLP, with one hidden layer and one softmax layer. ''' conf_filename = './sentiment_mlp.conf' start_time = time.time() configer = MLPConfiger(conf_filename) mlpnet = MLP(configer, verbose=True) end_time = time.time() pprint('Time used to build the architecture of MLP: %f seconds.' % (end_time-start_time)) # Training start_time = time.time() for i in xrange(configer.nepoch): rate = 2.0 / ((1.0 + i/500) ** 2) cost, accuracy = mlpnet.train(self.senti_train_set, self.senti_train_label, rate) pprint('epoch %d, cost = %f, accuracy = %f' % (i, cost, accuracy)) end_time = time.time() pprint('Time used for training MLP network on Sentiment analysis task: %f minutes.' % ((end_time-start_time)/60)) # Test prediction = mlpnet.predict(self.senti_test_set) accuracy = np.sum(prediction == self.senti_test_label) / float(self.test_size) pprint('Test accuracy: %f' % accuracy)
def __init__(self, model_file): rng = np.random.RandomState(1234) self.x = T.matrix('x') self.classifier = MLP( rng=rng, input=self.x, n_in=200, n_hidden=180, n_out=2 ) self.classifier.load_model(model_file)
def test_activate(self): a = MLP() a.add_layer(Layer(3)) a.add_layer(Layer(2)) a.init_network() a.layers[0].values = [1, 1, 1] a.layers[0].weights[0][0] = 1 a.layers[0].weights[1][0] = -1 a.layers[0].weights[2][0] = 1 a.layers[0].weights[0][1] = -0.1 a.layers[0].weights[1][1] = -0.5 a.layers[0].weights[2][1] = 1 a._activate() self.assertGreater(a.layers[1].values[0], 0.5) self.assertLess(a.layers[1].values[1], 0.5)
def test_init_network(self): a = MLP() a.add_layer(Layer(1)) a.add_layer(Layer(2)) a.add_layer(Layer(3)) a.init_network() self.assertIsNone(a.layers[0].prev) self.assertIsNotNone(a.layers[0].weights) self.assertIsNotNone(a.layers[0].next) self.assertIsNotNone(a.layers[1].prev) self.assertIsNotNone(a.layers[1].weights) self.assertIsNotNone(a.layers[1].next) self.assertIsNotNone(a.layers[2].prev) self.assertIsNone(a.layers[2].weights) self.assertIsNone(a.layers[2].next)
def __init__(self, k, nb_epochs, H1, H2, nu, mu, batchsize, data): self.k = k self.data = data self.H1 = H1 self.H2 = H2 self.mu = mu self.nu = nu self.batchsize = batchsize self.mlp = MLP(H1,H2,576, nu, mu, batchsize, self.k) self.error = Error() self.NUM_EPOCH = nb_epochs self.validation_error = sp.zeros(self.NUM_EPOCH+1) self.misclassified_val = sp.zeros(self.NUM_EPOCH+1) self.training_error = sp.zeros(self.NUM_EPOCH+1) self.misclassified_train = sp.zeros(self.NUM_EPOCH+1)
def test_mlp(dataset, hyper): train_set_x, train_set_y = dataset.sharedTrain valid_set_x, valid_set_y = dataset.sharedValid test_set_x, test_set_y = dataset.sharedTest n_train_batches = train_set_x.get_value(borrow=True).shape[0] / hyper.batchSize n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / hyper.batchSize n_test_batches = test_set_x.get_value(borrow=True).shape[0] / hyper.batchSize validationFrequency = min(n_train_batches, hyper.patience / 2) print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP(rng=rng, input=x, n_in=dataset.n_in, n_hidden=hyper.nHidden1, n_out=dataset.n_out) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = classifier.negative_log_likelihood(y) \ + hyper.L1Reg * classifier.L1 \ + hyper.L2Reg * classifier.L2_sqr # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * hyper.batchSize:(index + 1) * hyper.batchSize], y: test_set_y[index * hyper.batchSize:(index + 1) * hyper.batchSize]}) validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * hyper.batchSize:(index + 1) * hyper.batchSize], y: valid_set_y[index * hyper.batchSize:(index + 1) * hyper.batchSize]}) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [] for param in classifier.params: gparam = T.grad(cost, param) gparams.append(gparam) # specify how to update the parameters of the model as a list of # (variable, update expression) pairs updates = [] # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of # same length, zip generates a list C of same size, where each element # is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] for param, gparam in zip(classifier.params, gparams): updates.append((param, param - hyper.learningRate * gparam)) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * hyper.batchSize:(index + 1) * hyper.batchSize], y: train_set_y[index * hyper.batchSize:(index + 1) * hyper.batchSize]}) ############### # TRAIN MODEL # ############### print '... training' best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.time() epoch = 0 done_looping = False patience = hyper.patience while (epoch < hyper.numberEpochs) and (not done_looping): epoch = epoch + 1 print('epoch %i, time %0.2fm' % (epoch, (time.clock() - start_time) / 60.0)) for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validationFrequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ hyper.improvementThreshold: patience = max(patience, iter * hyper.patienceIncrease) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.time() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
import numpy import cPickle import pickle import gzip from LR import Logisticlayer from mlp import MLP if __name__=="__main__": numpy.set_printoptions(threshold=numpy.nan) input_dim = 4 output_dim = 3 sample_size = 100 #X=numpy.random.normal(0,1,(sample_size,input_dim)) #temp,Y=numpy.nonzero(numpy.random.multinomial(1,[1.0/output_dim]*output_dim,size=sample_size)) mlp = MLP(4,3,[10,10]) with open('debug_nnet.pickle') as f: init_param = pickle.load(f) init_param = numpy.concatenate([i.flatten() for i in init_param]) mlp.packParam(init_param) with open('debug_data.pickle') as f: data = pickle.load(f) X = data[0] Y = data[1] with open('HJv.pickle') as f: HJv_theano = pickle.load(f) num_param = numpy.sum(mlp.sizes) batch_size = 100
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=500, batch_size=20, n_hidden=3): numpy.random.seed(1) rng = numpy.random.RandomState(1234) # 集団内の要素数 (散布図の通り、同じ色の2集団で 1クラスを形成) N = 100 # 説明変数 x = numpy.matrix([[0] * N + [1] * N + [0] * N + [1] * N, [0] * N + [1] * N + [1] * N + [0] * N], dtype=numpy.float32).T x += numpy.random.rand(N * 4, 2) / 2 # 目的変数 y = numpy.array([0] * N * 2 + [1] * N * 2, dtype=numpy.int32) # 2 次元にプロット fig = plt.figure() ax = fig.add_subplot(111) colors = ['red'] * N * 2 + ['blue'] * N * 2 ax.scatter(x[:, 0], x[:, 1], color=colors) plt.show() # Theano の共有変数として宣言 x_data = theano.shared(value=x, name='x', borrow=True) y_data = theano.shared(value=y, name='y', borrow=True) n_train_batches = x_data.get_value(borrow=True).shape[0] / batch_size index = T.lscalar() x = T.matrix('x') y = T.ivector('y') # MLPインスタンスを生成 classifier = MLP(rng=rng, input=x, n_in=2, n_hidden=n_hidden, n_out=2) # 損失関数 cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # 各係数行列、バイアスの更新処理 gparams = [T.grad(cost, param) for param in classifier.params] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: x_data[index * batch_size: (index + 1) * batch_size], y: y_data[index * batch_size: (index + 1) * batch_size] } ) # 隠れ層の出力を取得 apply_hidden = theano.function(inputs=[x], outputs=classifier.hiddenLayer.output) labels = y_data.eval() # 3 次元にプロット fig = plt.figure() ax = fig.add_subplot(111, projection='3d') # 表示領域 / カメラアングルを指定 ax.set_xlabel('x0') ax.set_xlim(-1, 1.5) ax.set_ylabel('x1') ax.set_ylim(-0.5, 1.5) ax.set_zlabel('z') ax.set_zlim(-1, 1) ax.view_init(azim=30, elev=30) # 座標 x0, x1 について 分離平面の z 座標を計算 def calc_z(classifier, x0, x1): w = classifier.logRegressionLayer.W.get_value() b = classifier.logRegressionLayer.b.get_value() z = ((w[0, 0] - w[0, 1]) * x0 + (w[1, 0] - w[1, 1]) * x1 + b[0] - b[1]) / (w[2, 1] - w[2, 0]) return z objs = [] colors = ['red'] * N * 2 + ['blue'] * N * 2 for epoch in range(n_epochs): for minibatch_index in xrange(n_train_batches): train_model(minibatch_index) # 10 エポックごとに描画 if epoch % 10 == 0: z_data = apply_hidden(x_data.get_value()) s = ax.scatter(z_data[:, 0], z_data[:, 1], z_data[:, 2], color=colors) zx0_min = z_data[:, 0].min() zx0_max = z_data[:, 0].max() zx1_min = z_data[:, 1].min() zx1_max = z_data[:, 1].max() bx0 = numpy.array([zx0_min, zx0_min, zx0_max, zx0_max]) bx1 = numpy.array([zx1_min, zx1_max, zx1_max, zx0_min]) bz = calc_z(classifier, bx0, bx1) # 分離平面 tri = mplot3d.art3d.Poly3DCollection([zip(bx0, bx1, bz)], facecolor='gray', alpha=0.5) area = ax.add_collection3d(tri) objs.append((s, tri)) # アニメーション開始 ani = animation.ArtistAnimation(fig, objs, interval=40, repeat=False) Writer = animation.writers['ffmpeg'] writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800) ani.save('im.mp4', writer=writer)
} if not os.path.exists(dest_dir): os.makedirs(dest_dir) num_trials = 10 n_tried = 0 best_valid, best_test, best_conf = None, None, None while n_tried < num_trials: # choose randomly a configuration for the paramters try_this = [np.random.randint(len(p)) for p in params.values()] try_params = OrderedDict([(k, v[try_this[i]]) for i, (k, v) in enumerate(params.items())]) model = MLP( n_classes=10, optim='adagrad', n_inputs=train_x.shape[1], activation='relu', layers=[256, 128]) model.set_params(**try_params) fname = os.path.join(dest_dir, 'mlp_{}_{}_l{}_lr{}_m{}_di{}_dh{}.npz'.format( model.optimization, model.activation, '-'.join(map(str, model.layers)), model.learning_rate, model.momentum, model.dropout_p_input, model.dropout_p_hidden)) # check if this configuration has already been tried
###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP(rng=rng, input=x, n_in=28 * 28, n_hidden=500, n_out=10) # load trained parameters params = numpy.load("mlp_mnist.npz") classifier.hiddenLayer.W.set_value(params['hidden_W']) classifier.hiddenLayer.b.set_value(params['hidden_b']) classifier.logRegressionLayer.W.set_value(params['logreg_W']) classifier.logRegressionLayer.b.set_value(params['logreg_b']) # test model functions train_loss = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]})
def sgd_optimization_mnist_mlp(learning_rate=0.01, L1_reg=0.0, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=20, n_hidden=500): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # Notice that get_value is called with borrow # so that a deep copy of the input is not created n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size print("... Building the model") index = T.lscalar() # index to a mini-batch # Symbolic variables for input and output for a batch x = T.matrix('x') y = T.ivector('y') rng = numpy.random.RandomState(1234) # Build the logistic regression class # Images in MNIST are 28*28, there are 10 output classes classifier = MLP( rng=rng, input=x, n_in=28*28, n_hidden=n_hidden, n_out=10) # Cost to minimize cost = ( classifier.loss(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sq ) # Compile function that measures test performance wrt the 0-1 loss test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens=[ (x, test_set_x[index * batch_size: (index + 1) * batch_size]), (y, test_set_y[index * batch_size: (index + 1) * batch_size]) ] ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens=[ (x, valid_set_x[index * batch_size: (index + 1) * batch_size]), (y, valid_set_y[index * batch_size: (index + 1) * batch_size]) ] ) # Stochastic Gradient descent updates = simple_sgd(cost, classifier.params, learning_rate) train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens=[ (x, train_set_x[index * batch_size: (index + 1) * batch_size]), (y, train_set_y[index * batch_size: (index + 1) * batch_size]) ] ) ################ # TRAIN MODEL # ################ print("... Training the model") # Early stopping parameters patience = 10000 # Look at these many parameters regardless # Increase patience by this quantity when a best score is achieved patience_increase = 2 improvement_threshold = 0.995 # Minimum significant improvement validation_frequency = min(n_train_batches, patience // 2) best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # Iteration number iter = (epoch - 1) * n_train_batches + minibatch_index # Check if validation needs to be performed if (iter + 1) % validation_frequency == 0: # Compute average 0-1 loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # Check if this is the best validation score if this_validation_loss < best_validation_loss: # Increase patience if gain is gain is significant if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # Get test scores test_losses = [test_model(i) for i in range(n_test_batches)] test_score = numpy.mean(test_losses) print( 'epoch %i, minibatch %i/%i, test error of' ' best model %f %%' % ( epoch, minibatch_index + 1, n_train_batches, test_score * 100. ) ) # Save the best model #with open(script_path + '/best_model_mlp.pkl', 'wb') as f: #cPickle.dump(classifier, f) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print( ( 'Optimization complete with best validation error of %f %%,' 'with test error of %f %%' ) % (best_validation_loss * 100., test_score * 100.) ) print ('The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)))
def run_mlp(dataset_path, neurons): dataset_input = np.loadtxt(dataset_path) dataset_output = dataset_input[:, - 1] input = dataset_input[:,:-1] print(input) print(dataset_output) scv = SCV(dataset_input, 5) training, training_out, validation, validation_out = scv.select_fold_combination() print("training ", training) print("training out ",training_out) print("validation ",validation) print("validation_out ", validation_out) hide = np.array([int(neurons)]) print(training.shape[1]) print(hide[0]) ann = MLP(training.shape[1],training.shape[1], hide) ann.set_learningRate(0.95) ann.set_learningDescent(0.5) ann.set_momentum(0.02) ann.set_erro(0.005) ann.validation_set(validation, validation) ann.train_mlp(training, training) print("Validation Error: ", ann.get_validationError()) print("Training Error: ", ann.get_trainingError()) title = str(neurons) + " Neurons" #ann.plot_learning_curve(title) ann.plot_neurons(title)
def fun_mlp(shared_args, private_args, this_queue, that_queue): ''' shared_args contains neural network parameters private_args contains parameters for process run on each gpu this_queue and that_queue are used for synchronization between processes. ''' learning_rate = shared_args['learning_rate'] n_epochs = shared_args['n_epochs'] dataset = shared_args['dataset'] batch_size = shared_args['batch_size'] L1_reg = shared_args['L1_reg'] L2_reg = shared_args['L2_reg'] n_hidden = shared_args['n_hidden'] #### # pycuda and zmq environment drv.init() dev = drv.Device(private_args['ind_gpu']) ctx = dev.make_context() sock = zmq.Context().socket(zmq.PAIR) if private_args['flag_client']: sock.connect('tcp://localhost:5000') else: sock.bind('tcp://*:5000') #### #### # import theano related import theano.sandbox.cuda theano.sandbox.cuda.use(private_args['gpu']) import theano import theano.tensor as T from logistic_sgd import load_data from mlp import MLP import theano.misc.pycuda_init import theano.misc.pycuda_utils #### datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = np.random.RandomState(1234) classifier = MLP(rng=rng, input=x, n_in=28 * 28, n_hidden=n_hidden, n_out=10) cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]} ) gparams = [T.grad(cost, param) for param in classifier.params] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) #### # setting pycuda and # pass handles, only done once param_ga_list = [] # a list of pycuda gpuarrays which point to value of theano shared variable on this gpu param_other_list = [] # a list of theano shared variables that are used to store values of theano shared variable from the other gpu param_ga_other_list = [] # a list of pycuda gpuarrays which point to theano shared variables in param_other_list h_list = [] # a list of pycuda IPC handles shape_list = [] # a list containing shapes of variables in param_ga_list dtype_list = [] # a list containing dtypes of variables in param_ga_list average_fun_list = [] # a list containing theano functions for averaging parameters for param in classifier.params: param_other = theano.shared(param.get_value()) param_ga = \ theano.misc.pycuda_utils.to_gpuarray(param.container.value) param_ga_other = \ theano.misc.pycuda_utils.to_gpuarray( param_other.container.value) h = drv.mem_get_ipc_handle(param_ga.ptr) average_fun = \ theano.function([], updates=[(param, (param + param_other) / 2.)]) param_other_list.append(param_other) param_ga_list.append(param_ga) param_ga_other_list.append(param_ga_other) h_list.append(h) shape_list.append(param_ga.shape) dtype_list.append(param_ga.dtype) average_fun_list.append(average_fun) # pass shape, dtype and handles sock.send_pyobj((shape_list, dtype_list, h_list)) shape_other_list, dtype_other_list, h_other_list = sock.recv_pyobj() param_ga_remote_list = [] # create gpuarray point to the other gpu use the passed information for shape_other, dtype_other, h_other in zip(shape_other_list, dtype_other_list, h_other_list): param_ga_remote = \ gpuarray.GPUArray(shape_other, dtype_other, gpudata=drv.IPCMemoryHandle(h_other)) param_ga_remote_list.append(param_ga_remote) #### ############### # TRAIN MODEL # ############### print '... training' this_queue.put('') that_queue.get() start_time = time.time() epoch = 0 while epoch < n_epochs: epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): if minibatch_index % 2 == private_args['mod']: train_model(minibatch_index) this_queue.put('') that_queue.get() # exchanging weights for param_ga, param_ga_other, param_ga_remote in \ zip(param_ga_list, param_ga_other_list, param_ga_remote_list): drv.memcpy_peer(param_ga_other.ptr, param_ga_remote.ptr, param_ga_remote.dtype.itemsize * param_ga_remote.size, ctx, ctx) ctx.synchronize() this_queue.put('') that_queue.get() for average_fun in average_fun_list: average_fun() if private_args['verbose']: validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) end_time = time.time() this_queue.put('') that_queue.get() if private_args['verbose']: print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
def convert(image_file, text_file=None): img = ip.get_image(image_file) lines = [] for line in ip.get_lines(img): words = [] for word in ip.get_words(img, line): chars = [] for char in ip.get_chars(img, word): c = convert_char(img, char) chars.append(c) words.append(''.join(chars)) lines.append(' '.join(words)) if text_file: f = open(text_file, 'w') f.write('\n'.join(lines)) f.close() else: print '\n'.join(lines) def convert_char(img, char): c = ip.process_char(img, char) return decode(network.activate(c)) network = MLP.load('lower2.dmp') if __name__ == '__main__': convert('./samples/otra_prueba.png')
x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of rng = numpy.random.RandomState(1234) learning_rate=0.01 L1_reg=0.00 L2_reg=0.0001 n_epochs=1000 dataset='train.mat' batch_size=1000 n_hidden=50 classifier = MLP( rng=rng, input=x, n_in=3000, n_hidden=n_hidden, n_out=2 ) cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) validate_model = theano.function( inputs=[x,y], outputs=classifier.errors(y) )
def main(): xor = MLP() cnf = lambda: 0 xor.add_layer(Layer(2)) xor.add_layer(Layer(2, cnf)) xor.add_layer(Layer(1)) xor.add_bias() xor.init_network() xor.patterns = [ ([0, 0], [0]), ([0, 1], [1]), ([1, 0], [1]), ([1, 1], [0]), ] print xor.train(xor.patterns) for inp, target in xor.patterns: tolerance = 0.1 computed = xor.forward(inp) error = abs(computed[0] - target[0]) print 'input: %s target: %s, output: %s, error: %.4f' % (inp, target, computed, error)