def gradient_check(conv=True): if conv: layera = Conv(in_shape=[16, 32, 28], k_num=12, k_size=3) layerb = Conv(in_shape=[16, 32, 28], k_num=12, k_size=3) else: layera = FullyConnect(in_shape=[16, 32, 28], out_dim=12) layerb = FullyConnect(in_shape=[16, 32, 28], out_dim=12) act_layer = Activation(act_type='Tanh') layerb.w = layera.w.copy() layerb.b = layera.b.copy() eps = 1e-4 x = np.random.randn(10, 16, 32, 28) * 10 for i in range(100): idxes = tuple((np.random.uniform(0, 1, 4) * x.shape).astype(int)) x_a = x.copy() x_b = x.copy() x_a[idxes] += eps x_b[idxes] -= eps out = act_layer.forward(layera.forward(x)) gradient = layera.gradient(act_layer.gradient(np.ones(out.shape))) delta_out = (act_layer.forward(layera.forward(x_a)) - act_layer.forward(layerb.forward(x_b))).sum() # the output should be in the order of eps*eps print(idxes, (delta_out / eps / 2 - gradient[idxes]) / eps / eps)
def __init__(self, eps=1): self.n_episodes = 1000 self.batch_size = 32 self.n_epochs = 300 self.training_size = self.n_epochs * self.batch_size self.gamma = 0.95 self.eps = eps self.eps_decay = 0.99 lr = 0.01 self.policy_net, self.target_net = [ NN([ Conv((2, n_size, n_size), k_size=n_connect, k_num=16, optimizer='RMSProp'), Activation(act_type='ReLU'), FullyConnect( [16, n_size - n_connect + 1, n_size - n_connect + 1], [16], lr=lr, optimizer='RMSProp'), Activation(act_type='ReLU'), FullyConnect([16], [n_size * n_size], lr=lr, optimizer='RMSProp'), ]) for _ in range(2) ] self.states = np.zeros((0, 2, n_size, n_size)) self.next_states = np.zeros((0, 2, n_size, n_size)) self.actions = np.zeros(0).astype(int) self.rewards = np.zeros(0) self.unfinish_mask = np.zeros(0)
def vanilla_gan(self): gen_lr, dis_lr = 2e-3, 5e-4 self.generator = NN([ FullyConnect([self.gen_input], [256], lr=gen_lr), BatchNormalization([256], lr=gen_lr), Activation(act_type='ReLU'), FullyConnect([256], [512], lr=gen_lr), BatchNormalization([512], lr=gen_lr), Activation(act_type='ReLU'), FullyConnect([512], [1024], lr=gen_lr), BatchNormalization([1024], lr=gen_lr), Activation(act_type='ReLU'), FullyConnect([1024], [1, 28, 28], lr=gen_lr), Activation(act_type='Tanh') ]) self.discriminator = NN([ FullyConnect([1, 28, 28], [1024], lr=dis_lr), Activation(act_type='LeakyReLU'), FullyConnect([1024], [512], lr=dis_lr), Activation(act_type='LeakyReLU'), FullyConnect([512], [256], lr=dis_lr), Activation(act_type='LeakyReLU'), FullyConnect([256], [1], lr=dis_lr), Activation(act_type='Sigmoid') ])
def dc_gan(self): gen_lr, dis_lr = 2e-3, 5e-4 tconv1 = TrasposedConv((128, 7, 7), k_size=4, k_num=128, stride=2, padding=1, lr=gen_lr) tconv2 = TrasposedConv(tconv1.out_shape, k_size=4, k_num=128, stride=2, padding=1, lr=gen_lr) tconv3 = TrasposedConv(tconv2.out_shape, k_size=7, k_num=1, stride=1, padding=3, lr=gen_lr) self.generator = NN([ FullyConnect([self.gen_input], tconv1.in_shape, lr=gen_lr), BatchNormalization(tconv1.in_shape, lr=gen_lr), Activation(act_type='ReLU'), tconv1, BatchNormalization(tconv1.out_shape, lr=gen_lr), Activation(act_type='ReLU'), tconv2, BatchNormalization(tconv2.out_shape, lr=gen_lr), Activation(act_type='ReLU'), tconv3, BatchNormalization(tconv3.out_shape, lr=gen_lr), Activation(act_type='Tanh') ]) conv1 = Conv((1, 28, 28), k_size=7, k_num=128, stride=1, padding=3, lr=dis_lr) conv2 = Conv(conv1.out_shape, k_size=4, k_num=128, stride=2, padding=1, lr=dis_lr) conv3 = Conv(conv2.out_shape, k_size=4, k_num=128, stride=2, padding=1, lr=dis_lr) self.discriminator = NN([ conv1, Activation(act_type='LeakyReLU'), conv2, BatchNormalization(conv2.out_shape, lr=dis_lr), Activation(act_type='LeakyReLU'), conv3, BatchNormalization(conv3.out_shape, lr=dis_lr), Activation(act_type='LeakyReLU'), FullyConnect(conv3.out_shape, [1], lr=dis_lr), Activation(act_type='Sigmoid') ])
def train(self, x, y): lr = self.lr conv1 = Conv(in_shape=x.shape[1:4], k_num=6, k_size=5, lr=lr) bn1 = BatchNormalization(in_shape=conv1.out_shape, lr=lr) relu1 = Activation(act_type="ReLU") pool1 = MaxPooling(in_shape=conv1.out_shape, k_size=2) conv2 = Conv(in_shape=pool1.out_shape, k_num=16, k_size=3, lr=lr) bn2 = BatchNormalization(in_shape=conv2.out_shape, lr=lr) relu2 = Activation(act_type="ReLU") pool2 = MaxPooling(in_shape=conv2.out_shape, k_size=2) fc = FullyConnect(pool2.out_shape, [self.n_labels], lr=lr) softmax = Softmax() nn = NN([ conv1, bn1, relu1, pool1, conv2, bn2, relu2, pool2, fc, softmax ]) nn.fit(x, y) return nn
def __init__(self, x_shape, label_num): self.batch_size, lr = 32, 1e-3 # Conv > Normalization > Activation > Dropout > Pooling conv1 = Conv(in_shape=x_shape, k_num=6, k_size=5, lr=lr) bn1 = BatchNormalization(in_shape=conv1.out_shape, lr=lr) relu1 = Activation(act_type="ReLU") pool1 = MaxPooling(in_shape=conv1.out_shape, k_size=2) conv2 = Conv(in_shape=pool1.out_shape, k_num=16, k_size=3, lr=lr) bn2 = BatchNormalization(in_shape=conv2.out_shape, lr=lr) relu2 = Activation(act_type="ReLU") pool2 = MaxPooling(in_shape=conv2.out_shape, k_size=2) fc1 = FullyConnect(pool2.out_shape, [120], lr=lr) bn3 = BatchNormalization(in_shape=[120], lr=lr) relu3 = Activation(act_type="ReLU") fc2 = FullyConnect([120], [label_num], lr=lr) softmax = Softmax() self.layers = [ conv1, bn1, relu1, pool1, conv2, bn2, relu2, pool2, fc1, bn3, relu3, fc2, softmax ]
def __init__(self, dim_in, dim_hidden, dim_z): self.n_epochs, self.batch_size = 10, 32 self.C = 1 # trade off of reconstruction and KL divergence # architecture is hard-coded self.encoder_hidden = FullyConnect([dim_in], [dim_hidden], lr=1e-2) self.encoder_act = Activation(act_type='ReLU') self.encoder_mu = FullyConnect([dim_hidden], [dim_z], lr=1e-2) self.encoder_log_sigma = FullyConnect([dim_hidden], [dim_z], lr=1e-2) self.decoder_hidden = FullyConnect([dim_z], [dim_hidden], lr=1e-2) self.decoder_act_hidden = Activation(act_type='ReLU') self.decoder_out = FullyConnect([dim_hidden], [dim_in], lr=1e-2) self.decoder_act_out = Activation(act_type='Sigmoid')
class VAE(object): def __init__(self, dim_in, dim_hidden, dim_z): self.n_epochs, self.batch_size = 10, 32 self.C = 1 # trade off of reconstruction and KL divergence # architecture is hard-coded self.encoder_hidden = FullyConnect([dim_in], [dim_hidden], lr=1e-2) self.encoder_act = Activation(act_type='ReLU') self.encoder_mu = FullyConnect([dim_hidden], [dim_z], lr=1e-2) self.encoder_log_sigma = FullyConnect([dim_hidden], [dim_z], lr=1e-2) self.decoder_hidden = FullyConnect([dim_z], [dim_hidden], lr=1e-2) self.decoder_act_hidden = Activation(act_type='ReLU') self.decoder_out = FullyConnect([dim_hidden], [dim_in], lr=1e-2) self.decoder_act_out = Activation(act_type='Sigmoid') def fit(self, x): for epoch in range(self.n_epochs): permut = np.random.permutation(x.shape[0] // self.batch_size * self.batch_size).reshape( [-1, self.batch_size]) for b_idx in range(permut.shape[0]): x_batch = x[permut[b_idx, :]] mu, log_sigma = self.encoder_forward(x_batch) z = self.sampling(mu, log_sigma) out = self.decoder_forward(z) recon_grad = self.C * (out - x_batch) grad_d_act_out = self.decoder_act_out.gradient(recon_grad) grad_d_out = self.decoder_out.gradient(grad_d_act_out) grad_d_act_hidden = self.decoder_act_hidden.gradient( grad_d_out) grad_z = self.decoder_hidden.gradient(grad_d_act_hidden) kl_mu_grad = mu kl_sigma_grad = np.exp(2 * log_sigma) - 1 grad_mu = self.encoder_mu.gradient(grad_z + kl_mu_grad) grad_log_sigma = self.encoder_log_sigma.gradient(grad_z + kl_sigma_grad) grad_e_act = self.encoder_act.gradient(grad_mu + grad_log_sigma) grad_e_hidden = self.encoder_hidden.gradient(grad_e_act) self.backward() print('epoch: {}, log loss: {}, kl loss: {}'.format( epoch, self.log_loss(out, x_batch), self.kl_loss(mu, log_sigma))) def encoder_forward(self, x): hidden = self.encoder_hidden.forward(x) hidden = self.encoder_act.forward(hidden) mu = self.encoder_mu.forward(hidden) log_sigma = self.encoder_log_sigma.forward(hidden) return mu, log_sigma def sampling(self, mu, log_sigma): noise = np.random.randn(mu.shape[0], mu.shape[1]) return mu + noise * np.exp(log_sigma) def decoder_forward(self, z): hidden = self.decoder_hidden.forward(z) hidden = self.decoder_act_hidden.forward(hidden) out = self.decoder_out.forward(hidden) out = self.decoder_act_out.forward(out) return out def backward(self): self.decoder_act_out.backward() self.decoder_out.backward() self.decoder_act_hidden.backward() self.decoder_hidden.backward() self.encoder_mu.backward() self.encoder_log_sigma.backward() self.encoder_act.backward() self.encoder_hidden.backward() def log_loss(self, pred, x): return 0.5 * self.C * np.square(pred - x).mean() def kl_loss(self, mu, log_sigma): return 0.5 * (-2 * log_sigma + np.exp(2 * log_sigma) + np.square(mu) - 1).mean()
) # Test after train trained_decision_tree.test(test_file_path="test_file.txt") else: # Test trained_decision_tree = load_model_from_pickle( file_name="tree.pkl") trained_decision_tree.test(test_file_path="test_file.txt") elif model == "nnet": if train_or_test == "train": trained_neural_network = start( layers=[ Dense(300), Activation("relu"), Dropout(0.2), Dense(300), Activation("relu"), Dropout(0.2), Dense(4), Activation("softmax"), ], epochs=35, learning_rate=0.001, rho=0.9, number_of_folds=5, ) save_model_to_pickle(obj=trained_neural_network, file_name="nnet.pkl")