def build(self, input_shape=(28, 28, 1), classes=10): inputs = keras.Input(shape=input_shape) outputs = conv2d(filters=6, kernel_size=(6, 6))(inputs) outputs = max_pooling2d(pool_size=(2, 2), strides=(2, 2))(outputs) outputs = sigmoid()(outputs) outputs = conv2d(filters=16, kernel_size=(6, 6))(inputs) outputs = max_pooling2d(pool_size=(2, 2), strides=(2, 2))(outputs) outputs = sigmoid()(outputs) outputs = flatten()(outputs) outputs = dense(120)(outputs) outputs = sigmoid()(outputs) outputs = dense(64)(outputs) outputs = sigmoid()(outputs) outputs = dense(classes)(outputs) outputs = softmax()(outputs) model = keras.Model(inputs, outputs) model.summary() return model
def __call__(self, input, prev_read): layer = self.layers # alias daddress = self.daddress dmemory = self.dmemory I = layer['INPUT'](input) P = layer['PREVIOUS_READ'](prev_read) C = layer['CONTROL_KEY'](np.concatenate([I, P])) address_r = C[0] address_w = C[1] erase = 1 add = I output = sigmoid(layer['OUTPUT'](C[2:2 + self.doutput])) #content_r = layer['CONTENT_KEY_R'](C)) #content_w = layer['CONTENT_KEY_W'](C)) #loc_r = layer['LOCATION_R'](C) #loc_w = layer['LOCATION_W'](C) #g_r = sigmoid(layer['GATE_R'](C)) #g_w = sigmoid(layer['GATE_W'](C)) #address_r = loc_r * g_r + (1 - g_r) * layer['HASH'](content_r) #address_w = loc_w * g_w + (1 - g_w) * layer['HASH'](content_w) #erase = sigmoid(layer['ERASE'](C)) #add = layer['ADD'](C) #output = sigmoid(layer['OUTPUT'](C)) return address_r, address_w, erase, add, output
def activate(self, param): inp = self.result with tf.name_scope('activation_' + str(self.layernum)): if param == 0: res = L.relu(inp, name='relu_' + str(self.layernum)) elif param == 1: res = L.lrelu(inp, name='lrelu_' + str(self.layernum)) elif param == 2: res = L.elu(inp, name='elu_' + str(self.layernum)) elif param == 3: res = L.tanh(inp, name='tanh_' + str(self.layernum)) elif param == 4: self.inpsize[-1] = self.inpsize[-1] // 2 res = L.MFM(inp, self.inpsize[-1], name='mfm_' + str(self.layernum)) elif param == 5: self.inpsize[-1] = self.inpsize[-1] // 2 res = L.MFMfc(inp, self.inpsize[-1], name='mfm_' + str(self.layernum)) elif param == 6: res = L.sigmoid(inp, name='sigmoid_' + str(self.layernum)) else: res = inp self.result = res return self.result
def glu(input, dim=-1): """ The Gated Linear Units(GLU) composed by split, sigmoid activation and element-wise multiplication. Specifically, Split the input into two equal sized parts, :math:`a` and :math:`b`, along the given dimension and then compute as following: .. math:: {GLU}(a, b)= a \otimes \sigma(b) Refer to `Language Modeling with Gated Convolutional Networks <https://arxiv.org/pdf/1612.08083.pdf>`_. Args: input (Variable): The input variable which is a Tensor or LoDTensor. dim (int): The dimension along which to split. If :math:`dim < 0`, the dimension to split along is :math:`rank(input) + dim`. Default -1. Returns: Variable: Variable with half the size of input. Examples: .. code-block:: python data = fluid.layers.data(name="words", shape=[3, 6, 9], dtype="float32") output = fluid.nets.glu(input=data, dim=1) # shape of output: [3, 3, 9] """ a, b = layers.split(input, num_or_sections=2, dim=dim) act_b = layers.sigmoid(x=b) out = layers.elementwise_mul(x=a, y=act_b) return out
def build(self, input_shape=(28, 28, 1), classes=10): inputs = keras.Input(shape=input_shape) outputs = flatten()(inputs) outputs = dense(300)(outputs) outputs = sigmoid()(outputs) outputs = dense(100)(outputs) outputs = sigmoid()(outputs) outputs = dense(10)(outputs) outputs = softmax()(outputs) model = keras.Model(inputs, outputs) model.summary() return model
def denseTest(): data = np.random.random((100, 2)) labels = np.array(list(map(test, data))) dense = layers.denseLayer(2, 1) sig = layers.sigmoid() cost = layers.sumSquareError() for i in range(1000): itError = 0 for datum, label in zip(data, labels): value_pre= dense.eval(datum) value = sig.eval(value_pre) error = cost.eval(value, label) itError += error Derror = cost.setUpdate(value, label) Derror = sig.setUpdate(value_pre, Derror) dense.setUpdate(datum, Derror) print("Iteration ", i, " Error : ",itError) dense.doUpdate() print(dense.weights) print(dense.bias)
def listTest(): data = np.random.random((100, 2)) labels = np.array(list(map(test, data))) lays = [] for i in range(5): lays.append(layers.denseLayer(2, 2)) lays.append(layers.sigmoid()) out = layers.sumSquareError() for iteration in range(100): IterationError = 0 for datum, label in zip(data, labels): res = [lays[0].eval(datum)] for i in lays: res.append(i.eval(res[-1])) E = out.eval(res[-1], label) IterationError += E dE = out.inputDerivatives(res[-1], label) for i in range(len(lays)-1, -1,-1): layer = lays[i] dE = layer.update(res[i], dE) print(IterationError)
def main(N=300, K=3, D=2, nodes=100, lr=1e-3, reg=1e-8): """Main""" # Generate and plot data set X, Y = gen_data(N, K, D) print("Plotting data...") col_levels = np.array(list(range(K + 1)), dtype=np.float) - 0.5 col_cmap = plt.cm.gist_rainbow col_norm = col.BoundaryNorm(col_levels, col_cmap.N) plt.ion() plt.subplot(1, 1, 1) plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=col_cmap, norm=col_norm, vmin=np.min(Y), vmax=np.max(Y)) plt.draw() input("Press <ENTER> to continue.") # Set up layers layers = [] layers += [L.input(X)] layers += [L.fc(layers[-1].Y, nodes)] layers += [L.sigmoid(layers[-1].Y)] layers += [L.dropout(layers[-1].Y, 0.25)] layers += [L.fc(layers[-1].Y, nodes)] layers += [L.sigmoid(layers[-1].Y)] layers += [L.dropout(layers[-1].Y, 0.25)] layers += [L.fc(layers[-1].Y, K)] layers += [L.softmax(layers[-1].Y)] layers += [L.loss(layers[-1].Y, Y)] nlayers = len(layers) # TODO (architecture): Instead of calling fwd on each layer, connect layers # with "pointers" and call fwd only on the first layer try: itx = 1 while True: # Forward propagation for i, layer in enumerate(layers): if i == 0: layer.X = X else: layer.reshape(layers[i - 1].Y.shape) layer.X = layers[i - 1].Y layer.fwd() if np.isnan(layers[-1].Y[0, 0]): pdb.set_trace() print("Iteration {}, Loss = {:.4f}".format( itx, np.asscalar(layers[-1].Y)), end='\r') if itx % 1000 == 0: print("") # Backprop for i in list(range(nlayers))[::-1]: if i == nlayers - 1: layers[i].dy = 1 else: layers[i].dy = layers[i + 1].dx layers[i].bck() if itx % 5000 == 0: # Gradient check if np.all(layers[i].dx == 0): continue r, c = [ np.random.choice(layers[i].X.shape[j]) for j in (0, 1) ] h = 1e-4 if abs(layers[i].dx[r, c]) < 1e-5: continue print("Checking gradient on {}...".format(layers[i]), end=' ') X_store = layers[i].X Y_ = [] for X_ in [layers[i].X[r, c] + s * h for s in (-1, 1)]: layers[i].X[r, c] = X_ for j in range(i, nlayers): if j > i: layers[j].X = layers[j - 1].Y stochastic_store = layers[j].stochastic layers[j].stochastic = False layers[j].fwd() layers[j].stochastic = stochastic_store Y_.append(np.asscalar(layers[-1].Y)) layers[i].X = X_store dx = layers[i].dx[r, c] ndx = (Y_[1] - Y_[0]) / (2 * h) diff = abs(ndx - dx) / max(abs(ndx), abs(dx), 1e-10) print("Diff: {:.8f}".format(diff)) if diff > 1e-2: pdb.set_trace() for layer in layers: layer.step(lr, reg) if itx % 1000 == 0: range_ = [np.max(X[:, i]) - np.min(X[:, i]) for i in (0, 1)] x, y = [ np.linspace( np.min(X[:, i]) - range_[i] / 2, np.max(X[:, i]) + range_[i] / 2, 400) for i in (0, 1) ] xx, yy = np.meshgrid(x, y) X_ = np.c_[xx.flatten(), yy.flatten()] for i, layer in enumerate(layers[:-1]): if i == 0: layer.X = X_ else: layer.reshape(layers[i - 1].Y.shape) layer.X = layers[i - 1].Y temp = layer.stochastic layer.stochastic = False layer.fwd() layer.stochastic = temp z = np.argmax(layers[-2].Y, axis=1).reshape(xx.shape) plt.clf() plt.contourf(xx, yy, z, levels=col_levels, cmap=col_cmap, norm=col_norm) plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=col_cmap, norm=col_norm) plt.draw() plt.pause(1e-10) itx += 1 except KeyboardInterrupt: # print(layers[-2].Y) pass