def init_gen_model(state): gen_model = nn.Sequential() gen_model.add( nn.Linear(state['noise_size'], state['hidden_size'], weight=state['init_g'])) gen_model.add(nn.BatchNorm(state['hidden_size'])) gen_model.add(nn.Expression(T.nnet.relu)) gen_model.add( nn.Linear(state['hidden_size'], state['hidden_size'], weight=state['init_g'])) gen_model.add(nn.BatchNorm(state['hidden_size'])) gen_model.add(nn.Expression(T.nnet.relu)) gen_model.add( nn.Linear(state['hidden_size'], state['input_size'], weight=state['init_g'])) return gen_model
def __init__(self, img_size=256, style_dim=64, max_conv_dim=512, w_hpf=1): super().__init__() dim_in = 2**14 // img_size self.img_size = img_size self.from_rgb = nn.Conv2d(3, dim_in, 3, 1, 1) self.encode = nn.ModuleList() self.decode = nn.ModuleList() self.to_rgb = nn.Sequential(nn.InstanceNorm2d(dim_in, affine=True), nn.LeakyReLU(0.2), nn.Conv2d(dim_in, 3, 1, 1, 0)) # down/up-sampling blocks repeat_num = int(np.log2(img_size)) - 4 if w_hpf > 0: repeat_num += 1 for _ in range(repeat_num): dim_out = min(dim_in * 2, max_conv_dim) self.encode.append( ResBlk(dim_in, dim_out, normalize=True, downsample=True)) self.decode.insert(0, AdainResBlk(dim_out, dim_in, style_dim, w_hpf=w_hpf, upsample=True)) # stack-like dim_in = dim_out # bottleneck blocks for _ in range(2): self.encode.append(ResBlk(dim_out, dim_out, normalize=True)) self.decode.insert( 0, AdainResBlk(dim_out, dim_out, style_dim, w_hpf=w_hpf)) if w_hpf > 0: device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.hpf = HighPass(w_hpf, device)
def init_gen_model(state): gen_model = nn.Sequential() gen_model.add(nn.Linear(state['noise_size'], state['g_num_filters']*4*7*7, weight=state['g_init'], use_bias=False)) gen_model.add(nn.BatchNorm(state['g_num_filters']*4*7*7)) gen_model.add(nn.Expression(T.nnet.relu)) gen_model.add(nn.Expression(lambda x: T.reshape(x, (x.shape[0], state['g_num_filters']*4, 7, 7)))) gen_model.add(nn.Deconvolutional(filter_size=(4, 4), num_filters=state['g_num_filters']*4, num_channels=state['g_num_filters']*2, step=(2, 2), border_mode=(1, 1), use_bias=False, weight=state['g_conv_init'])) gen_model.add(nn.BatchNorm(state['g_num_filters']*2)) gen_model.add(nn.Expression(T.nnet.relu)) # out_shape == (b, num_filters, 14, 14) gen_model.add(nn.Deconvolutional(filter_size=(4, 4), num_filters=state['g_num_filters']*2, num_channels=state['g_num_filters'], step=(2, 2), border_mode=(1, 1), use_bias=False, weight=state['g_conv_init'])) gen_model.add(nn.BatchNorm(state['g_num_filters'])) gen_model.add(nn.Expression(T.nnet.relu)) # out_shape == (b, input_channels, 28, 28) gen_model.add(nn.Deconvolutional(filter_size=(3, 3), num_filters=state['g_num_filters'], num_channels=state['input_channels'], step=(1, 1), border_mode=(1, 1), use_bias=True, weight=state['g_conv_init'])) # gen_model.add(nn.Expression(T.nnet.sigmoid)) # out_shape == (b, input_channels, 28, 28) return gen_model
def __init__(self, x_dim, z1_dim, z2_dim): super(G, self).__init__() self.z1_dim = z1_dim self.z2_dim = z2_dim self.x_dim = x_dim hid_d = max(250, 2*z1_dim) #hid_d = z1_dim+z2_dim self.fc = nn.Linear(self.z1_dim, hid_d) self.fu = nn.Linear(self.z2_dim, hid_d, bias=False) self.main = nn.Sequential( nn.Softplus(), nn.Linear(hid_d, hid_d), nn.Softplus(), nn.Linear(hid_d, hid_d), nn.Softplus(), nn.Linear(hid_d, hid_d), nn.Softplus(), nn.Linear(hid_d, hid_d), nn.Softplus(), nn.Linear(hid_d, self.x_dim), ) print(self) self.faster_parameters = [p for p in self.parameters()]
def mnist( layers, # pylint: disable=invalid-name activation="sigmoid", batch_size=128, mode="train"): """Mnist classification with a multi-layer perceptron.""" if activation == "sigmoid": activation_op = tf.sigmoid elif activation == "relu": activation_op = tf.nn.relu else: raise ValueError("{} activation not supported".format(activation)) # Data. data = mnist_dataset.load_mnist() data = getattr(data, mode) images = tf.constant(data.images, dtype=tf.float32, name="MNIST_images") images = tf.reshape(images, [-1, 28, 28, 1]) labels = tf.constant(data.labels, dtype=tf.int64, name="MNIST_labels") # Network. mlp = nn.MLP(list(layers) + [10], activation=activation_op, initializers=_nn_initializers) network = nn.Sequential([nn.BatchFlatten(), mlp]) def build(): indices = tf.random_uniform([batch_size], 0, data.num_examples, tf.int64) batch_images = tf.gather(images, indices) batch_labels = tf.gather(labels, indices) output = network(batch_images) return _xent_loss(output, batch_labels) return build
def main(): # do gradient check batch = np.random.rand(20, 10) grad_check_all(batch, 1e-8, 1e-6) # load data mndata = MNIST("./../../../datasets/mnist") # using python-mnist 3.0 package inputs,targets = mndata.load_training() # init dim = len(inputs[0]) model = nn.Sequential(dim, 1) model.add(nn.Linear(dim, dim)) model.add(nn.LogSoftMax(dim)) model.add(nn.CrossEntropy(dim)) print(len(inputs), len(targets)) N = len(inputs) for i in xrange(N): model.forward(np.atleast_2d(inputs[i]).T, targets[i]) model.backward([1]) loss = model.forward() print("Loss: ", loss)
def __init__(self, img_size=256, style_dim=64, num_domains=2, max_conv_dim=512): super().__init__() dim_in = 2**14 // img_size blocks = [] blocks += [nn.Conv2d(3, dim_in, 3, 1, 1)] repeat_num = int(np.log2(img_size)) - 2 for _ in range(repeat_num): dim_out = min(dim_in * 2, max_conv_dim) blocks += [ResBlk(dim_in, dim_out, downsample=True)] dim_in = dim_out blocks += [nn.LeakyReLU(0.2)] blocks += [nn.Conv2d(dim_out, dim_out, 4, 1, 0)] blocks += [nn.LeakyReLU(0.2)] self.shared = nn.Sequential(*blocks) self.unshared = nn.ModuleList() for _ in range(num_domains): self.unshared.append(nn.Linear(dim_out, style_dim))
def init_gen_model(state): gen_model = nn.Sequential() gen_model.add( nn.Linear(state['noise_size'], state['g_num_filters'] * 4 * 4 * 4, weight=state['g_init'], use_bias=False)) gen_model.add(nn.BatchNorm(state['g_num_filters'] * 4 * 4 * 4)) gen_model.add(nn.Expression(T.nnet.relu)) gen_model.add( nn.Expression(lambda x: T.reshape(x, (x.shape[0], state['g_num_filters' ] * 4, 4, 4)))) gen_model.add( nn.Deconvolutional(filter_size=(4, 4), num_filters=state['g_num_filters'] * 4, num_channels=state['g_num_filters'] * 4, step=(2, 2), border_mode=(1, 1), use_bias=False, weight=state['g_conv_init'])) gen_model.add(nn.BatchNorm(state['g_num_filters'] * 4)) gen_model.add(nn.Expression(T.nnet.relu)) # out_shape == (b, g_num_filters*6, 8, 8) gen_model.add( nn.Deconvolutional(filter_size=(4, 4), num_filters=state['g_num_filters'] * 4, num_channels=state['g_num_filters'] * 2, step=(2, 2), border_mode=(1, 1), use_bias=False, weight=state['g_conv_init'])) gen_model.add(nn.BatchNorm(state['g_num_filters'] * 2)) gen_model.add(nn.Expression(T.nnet.relu)) # out_shape == (b, g_num_filters*2, 16, 16) gen_model.add( nn.Deconvolutional(filter_size=(4, 4), num_filters=state['g_num_filters'] * 2, num_channels=state['g_num_filters'], step=(2, 2), border_mode=(1, 1), use_bias=False, weight=state['g_conv_init'])) gen_model.add(nn.BatchNorm(state['g_num_filters'])) gen_model.add(nn.Expression(T.nnet.relu)) # out_shape == (b, g_num_filters, 32, 32) gen_model.add( nn.Deconvolutional(filter_size=(4, 4), num_filters=state['g_num_filters'], num_channels=state['input_channels'], step=(2, 2), border_mode=(1, 1), use_bias=True, weight=state['g_conv_init'])) gen_model.add(nn.Expression(T.tanh)) # out_shape == (b, input_channels, 64, 64) return gen_model
def disc_shared_structure(state): # inp_shape == (b, input_channels, 64, 64) model = nn.Sequential() if state['dropout'] > 0: model.add(nn.Dropout(state['dropout'])) model.add( nn.Convolutional(filter_size=(4, 4), num_filters=state['d_num_filters'], num_channels=state['input_channels'], step=(2, 2), border_mode=(1, 1), weight=state['d_conv_init'], use_bias=False, name='d_conv1')) model.add(nn.BatchNorm(state['d_num_filters'])) # model.add(nn.LeakyRectify()) model.add(nn.Expression(T.nnet.relu)) # out_shape == (b, num_filters, 32, 32) model.add( nn.Convolutional(filter_size=(4, 4), num_filters=state['d_num_filters'] * 2, num_channels=state['d_num_filters'], step=(2, 2), border_mode=(1, 1), weight=state['d_conv_init'], use_bias=False, name='d_conv2')) model.add(nn.BatchNorm(state['d_num_filters'] * 2)) # model.add(nn.LeakyRectify()) model.add(nn.Expression(T.nnet.relu)) # out_shape == (b, num_filters, 16, 16) model.add( nn.Convolutional(filter_size=(4, 4), num_filters=state['d_num_filters'] * 4, num_channels=state['d_num_filters'] * 2, step=(2, 2), border_mode=(1, 1), weight=state['d_conv_init'], use_bias=False)) model.add(nn.BatchNorm(state['d_num_filters'] * 4)) # model.add(nn.LeakyRectify()) model.add(nn.Expression(T.nnet.relu)) # out_shape == (b, num_filters, 8, 8) model.add( nn.Convolutional(filter_size=(4, 4), num_filters=state['d_num_filters'] * 4, num_channels=state['d_num_filters'] * 4, step=(2, 2), border_mode=(1, 1), weight=state['d_conv_init'], use_bias=False)) model.add(nn.BatchNorm(state['d_num_filters'] * 4)) # model.add(nn.LeakyRectify()) model.add(nn.Expression(T.nnet.relu)) # out_shape == (b, num_filters, 4, 4) model.add(nn.Expression(lambda x: T.flatten(x, 2))) return model
def cifar10( path, # pylint: disable=invalid-name conv_channels=None, linear_layers=None, batch_norm=True, batch_size=128, num_threads=4, min_queue_examples=1000, mode="train"): """Cifar10 classification with a convolutional network.""" # Data. _maybe_download_cifar10(path) # Read images and labels from disk. if mode == "train": filenames = [ os.path.join(path, CIFAR10_FOLDER, "data_batch_{}.bin".format(i)) for i in xrange(1, 6) ] elif mode == "test": filenames = [os.path.join(path, "test_batch.bin")] else: raise ValueError("Mode {} not recognised".format(mode)) depth = 3 height = 32 width = 32 label_bytes = 1 image_bytes = depth * height * width record_bytes = label_bytes + image_bytes reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) _, record = reader.read(tf.train.string_input_producer(filenames)) record_bytes = tf.decode_raw(record, tf.uint8) label = tf.cast(tf.slice(record_bytes, [0], [label_bytes]), tf.int32) raw_image = tf.slice(record_bytes, [label_bytes], [image_bytes]) image = tf.cast(tf.reshape(raw_image, [depth, height, width]), tf.float32) # height x width x depth. image = tf.transpose(image, [1, 2, 0]) image = tf.div(image, 255) queue = tf.RandomShuffleQueue( capacity=min_queue_examples + 3 * batch_size, min_after_dequeue=min_queue_examples, dtypes=[tf.float32, tf.int32], shapes=[image.get_shape(), label.get_shape()]) enqueue_ops = [queue.enqueue([image, label]) for _ in xrange(num_threads)] tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops)) # Network. def _conv_activation(x): # pylint: disable=invalid-name return tf.nn.max_pool(tf.nn.relu(x), ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") conv = nn.ConvNet2D(output_channels=conv_channels, kernel_shapes=[5], strides=[1], paddings=[nn.SAME], activation=_conv_activation, activate_final=True, initializers=_nn_initializers, use_batch_norm=batch_norm) if batch_norm: linear_activation = lambda x: tf.nn.relu(nn.BatchNorm()(x)) else: linear_activation = tf.nn.relu mlp = nn.MLP(list(linear_layers) + [10], activation=linear_activation, initializers=_nn_initializers) network = nn.Sequential([conv, nn.BatchFlatten(), mlp]) def build(): image_batch, label_batch = queue.dequeue_many(batch_size) label_batch = tf.reshape(label_batch, [batch_size]) output = network(image_batch) return _xent_loss(output, label_batch) return build
numpy.random.seed(seed) if not os.path.exists('qm7.mat'): os.system('wget http://www.quantum-machine.org/data/qm7.mat') dataset = scipy.io.loadmat('qm7.mat') # -------------------------------------------- # Extract training data # -------------------------------------------- P = dataset['P'][range(0,split)+range(split+1,5)].flatten() X = dataset['X'][P] T = dataset['T'][0,P] # -------------------------------------------- # Create a neural network # -------------------------------------------- I,O = nn.Input(X),nn.Output(T) nnsgd = nn.Sequential([I,nn.Linear(I.nbout,400),nn.Sigmoid(),nn.Linear(400,100),nn.Sigmoid(),nn.Linear(100,O.nbinp),O]) nnsgd.modules[-2].W *= 0 nnavg = copy.deepcopy(nnsgd) # -------------------------------------------- # Train the neural network # -------------------------------------------- for i in range(1,1000001): if i > 0: lr = 0.001 # learning rate if i > 500: lr = 0.0025 if i > 2500: lr = 0.005 if i > 12500: lr = 0.01 r = numpy.random.randint(0,len(X),[mb]) Y = nnsgd.forward(X[r])
def main(X_train, y_train, X_test, y_test, X_val, y_val): model = nn.Sequential() model.add(nn.Linear(784, 10)) model.add(nn.CrossEntropyCriterion()) predictions_train = model.forward(inputs=X_train, target=y_train, return_loss=False) accuracy_train = accuracy_score( (predictions_train == np.amax(predictions_train, axis=1).reshape( (-1, 1))).astype(int), y_train) print('Prediction accuracy at start: %-5.5f' % accuracy_train) learning_rate = 1e-3 for epoch in xrange(NEPOCH): X, y = shuffle(X_train, y_train) avg_loss = 0 if epoch == NEPOCH // 2: learning_rate /= 2 elif epoch == 3 * NEPOCH // 4: learning_rate /= 2 for it in xrange(NITER): inputs = X[it:it + BSIZE, :] target = y[it:it + BSIZE, :] loss = model.forward(inputs=inputs, target=target) avg_loss += loss model.backward(learning_rate) avg_loss /= NITER gcheck = model.gradient_check('Linear', inputs, target)[-1] predictions_train = model.forward(inputs=X_train, target=y_train, return_loss=False) accuracy_train = accuracy_score( (predictions_train == np.amax(predictions_train, axis=1).reshape( (-1, 1))).astype(int), y_train) predictions_val = model.forward(inputs=X_val, target=y_val, return_loss=False) accuracy_val = accuracy_score( (predictions_val == np.amax(predictions_val, axis=1).reshape( (-1, 1))).astype(int), y_val) print( 'Epoch: %-5dLoss: %-10.5fGrads: %-10.5fAccuracy (train): %-10.5fAccuracy (val): %-10.5f' % (epoch, avg_loss, gcheck, accuracy_train, accuracy_val)) predictions_test = model.forward(inputs=X_test, target=y_test, return_loss=False) accuracy_test = accuracy_score( (predictions_test == np.amax(predictions_test, axis=1).reshape( (-1, 1))).astype(int), y_test) print('Final accuracy on test_set: %-10.5f' % accuracy_test) return True
else: import glob for files in ['%s/*' % d for d in dirs]: for f in glob.glob(files): os.remove(f) pkl.dump(state, file('%s/state.pkl' % saveto, 'w')) np.random.seed(12345) ############################ # Init model & parameters ############################ # 1) Eneregy discriminator disc_model = nn.Sequential() disc_model.add(nn.Convolutional(filter_size=(3, 3), num_filters=state['d_num_filters'], num_channels=state['input_channels'], step=(1, 1), border_mode=(1, 1), weight=state['d_init'], use_bias=False, name='d_conv1')) disc_model.add(nn.BatchNorm(state['d_num_filters'], name='d_bn1')) disc_model.add(nn.Expression(T.nnet.relu)) # out_shape == (b, num_filters, 32, 32) disc_model.add(nn.Convolutional(filter_size=(4, 4), num_filters=state['d_num_filters']*2, num_channels=state['d_num_filters'], step=(2, 2), border_mode=(1, 1), weight=state['d_init'], use_bias=False,
def make_agent(env, **kwargs): # See https://stackoverflow.com/a/42506478 import nn class A3C(Agent): def __init__(self, policy, critic, gradients_queue, parameters_queue, index=None, t_max=5, optimizer=None, transitions=-1, **kwargs): super(A3C, self).__init__(transitions=transitions, **kwargs) self.policy = policy self.critic = critic self.optimizer = optimizer or nn.Adam() self.t_max = t_max self.gradients_queue = gradients_queue self.parameters_queue = parameters_queue self.index = index def act(self, state): probs = self.policy(state[None])[0].numpy() action = np.random.choice(len(probs), p=probs) return action def on_step_end(self): if len(self.transitions) == self.t_max: self.learn() def on_episode_end(self): if len(self.transitions) > 0: self.learn() def learn(self): batch_size = len(self.transitions) data = self.transitions.get() self.transitions.reset() S, A, R, Snext, dones = data A = A.reshape([-1, 1]) batch_shape = (batch_size, ) gamma, policy, critic = self.gamma, self.policy, self.critic # If last state is not terminal then bootstrap from it if not dones[-1]: R[-1] += gamma * critic( Snext[-1:])[0][0].numpy() # handle batching G = self.compute_returns(R) deltas = G - critic(S).detach().flatten() U.check_shape(deltas, batch_shape) with nn.GradientTape() as tape: # Policy Objective probs = policy(S).gather(A, batch_dims=1).flatten() U.check_shape(probs, batch_shape) policy_objective = deltas * probs.log() U.check_shape(policy_objective, batch_shape) policy_objective = policy_objective.mean() U.check_shape(policy_objective, ()) # Critic Loss V = critic(S).flatten() U.check_shape(V, batch_shape) critic_loss = (G - V).pow(2).mean() U.check_shape(critic_loss, ()) # Total Loss loss = -policy_objective + critic_loss grads = tape.gradient(loss, self.parameters) self.send_gradients(grads) self.receive_parameters() def send_gradients(self, grads): self.gradients_queue.put((self.index, grads)) def receive_gradients(self): i, grads = self.gradients_queue.get() if grads is not None: self.apply_gradients(grads) return i, grads def apply_gradients(self, grads): self.optimizer.apply_gradients(zip(grads, self.parameters)) def get_weights(self): return self.policy.get_weights(), self.critic.get_weights() def set_weights(self, weights): policy_weights, critic_weights = weights self.policy.set_weights(policy_weights) self.critic.set_weights(critic_weights) def send_parameters(self, i=None): params = self.get_weights() if i is None: queues = self.parameters_queue else: queues = self.parameters_queue[i:i + 1] for q in queues: q.put(params) def receive_parameters(self): params = self.parameters_queue[self.index].get() self.set_weights(params) @property def parameters(self): if self._parameters is None: params = self.policy.trainable_variables + self.critic.trainable_variables params = U.unique(params) self._parameters = params return self._parameters n_actions = env.action_space.n hidden = 16 policy = nn.Sequential([ nn.Dense(hidden, activation='relu'), nn.Dense(n_actions, activation='softmax'), ]) critic = nn.Sequential([ nn.Dense(hidden, activation='relu'), nn.Dense(1), ]) # initialize weights state = env.observation_space.sample() policy(state[None]) critic(state[None]) agent = A3C(policy=policy, critic=critic, env=env, **kwargs) return agent
import os os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' train_data = scipy.io.loadmat('../data/nist36_train_set1.mat') train_x, train_y = train_data['train_data'], train_data['train_labels'] input_length = len(train_x[0]) batch_size = 30 num_epochs = 100 batches = get_random_batches(train_x, train_y, batch_size) model = nn.Sequential(nn.Linear(1024, 64), nn.Sigmoid(), nn.Linear(64, 36), nn.Softmax()) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) loss_overall = [] accuracy_overall = [] for epoch in range(num_epochs): total_loss = 0 total_acc = 0 for xb, yb in batches: ## Converting np array to torch tensor
def __init__(self, in_channels, out_channels=1): super().__init__() self.main = nn.Sequential( nn.Dropout(0.5), nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False))