Exemplo n.º 1
0
    def __init__(self, n_inputs, n_hidden, n_output, activation='sigmoid'):
        """
        :param n_inputs: dimension of inputs
        :param n_hidden: dimension of hidden layer
        :param n_output: dimension of output (token)
        :param activation: either sigmoid or tanh
        """
        super().__init__()
        self.n_inputs = n_inputs
        self.n_hidden = n_hidden
        self.n_output = n_output

        if activation == 'sigmoid':
            self.activation = Sigmoid()
        elif activation == 'tanh':
            self.activation = Tanh()
        else:
            raise Exception("Non-linearity not found")

        self.w_ih = Linear(n_inputs, n_hidden)
        self.w_hh = Linear(n_hidden, n_hidden)
        self.w_ho = Linear(n_hidden, n_output)

        self.parameters += self.w_ih.get_parameters()
        self.parameters += self.w_hh.get_parameters()
        self.parameters += self.w_ho.get_parameters()
Exemplo n.º 2
0
    def _init_params(self):
        self._dv = {}

        self.conv_dilation = Conv1D(
            stride=1,
            pad="causal",
            init=self.init,
            kernel_width=2,
            dilation=self.dilation,
            out_ch=self.ch_dilation,
            optimizer=self.optimizer,
            act_fn=Affine(slope=1, intercept=0),
        )

        self.tanh = Tanh()
        self.sigm = Sigmoid()
        self.multiply_gate = Multiply(act_fn=Affine(slope=1, intercept=0))

        self.conv_1x1 = Conv1D(
            stride=1,
            pad="same",
            dilation=0,
            init=self.init,
            kernel_width=1,
            out_ch=self.ch_residual,
            optimizer=self.optimizer,
            act_fn=Affine(slope=1, intercept=0),
        )

        self.add_residual = Add(act_fn=Affine(slope=1, intercept=0))
        self.add_skip = Add(act_fn=Affine(slope=1, intercept=0))
Exemplo n.º 3
0
    def set_params(self, summary_dict):
        cids = self.hyperparameters["component_ids"]
        for k, v in summary_dict["parameters"].items():
            if k == "components":
                for c, cd in summary_dict["parameters"][k].items():
                    if c in cids:
                        getattr(self, c).set_params(cd)

            elif k in self.parameters:
                self.parameters[k] = v

        for k, v in summary_dict["hyperparameters"].items():
            if k == "components":
                for c, cd in summary_dict["hyperparameters"][k].items():
                    if c in cids:
                        getattr(self, c).set_params(cd)

            if k in self.hyperparameters:
                if k == "act_fn" and v == "ReLU":
                    self.hyperparameters[k] = ReLU()
                elif v == "act_fn" and v == "Sigmoid":
                    self.hyperparameters[k] = Sigmoid()
                elif v == "act_fn" and v == "Tanh":
                    self.hyperparameters[k] = Tanh()
                elif v == "act_fn" and "Affine" in v:
                    r = r"Affine\(slope=(.*), intercept=(.*)\)"
                    slope, intercept = re.match(r, v).groups()
                    self.hyperparameters[k] = Affine(float(slope),
                                                     float(intercept))
                elif v == "act_fn" and "Leaky ReLU" in v:
                    r = r"Leaky ReLU\(alpha=(.*)\)"
                    alpha = re.match(r, v).groups()[0]
                    self.hyperparameters[k] = LeakyReLU(float(alpha))
                else:
                    self.hyperparameters[k] = v
Exemplo n.º 4
0
def test_tanh_grad(N=None):
    from activations import Tanh

    N = np.inf if N is None else N

    mine = Tanh()
    gold = torch_gradient_generator(torch.tanh)

    i = 0
    while i < N:
        n_ex = np.random.randint(1, 100)
        n_dims = np.random.randint(1, 100)
        z = random_tensor((n_ex, n_dims))
        assert_almost_equal(mine.grad(z), gold(z))
        print("PASSED")
        i += 1
Exemplo n.º 5
0
def build_model():
    model = Sequential(MSE(), input_size=2)
    model.add_layer(Linear(2, 25))
    model.add_layer(ReLU(25))
    model.add_layer(Linear(25, 25))
    model.add_layer(ReLU(25))
    model.add_layer(Linear(25, 25))
    model.add_layer(Tanh(25))
    model.add_layer(Linear(25, 2))
    return model
Exemplo n.º 6
0
    def __init__(self) -> None:
        super().__init__()
        self.activation = Tanh()

        #self.layer1 = self.Conv2D((1, 28, 28), (8, 3, 3), 1)
        #self.layer2 = self.MaxPool()
        #self.layer3 = self.Conv2D((2, 3, 3), 2)
        self.layer4 = Linear(784, 16)
        self.layer5 = Linear(16, 16)
        self.layer6 = Linear(16, 10)
Exemplo n.º 7
0
def main():
    # 先讀取資料,並建立模型。
    # 輸入的維度為一個資料的長度,因為資料量小,batch size即為資料總數。
    x, y = ParityBits(8).load_data()
    batch_size, input_dim = x.shape
    model = Sequential(
        [Dense(64, activation=ReLU()),
         Dense(32, activation=Tanh()),
         Dense(16, activation=Tanh()),
         Dense(4, activation=None),
         Dense(1, activation=Sigmoid())],
        input_dim=input_dim,
        # 使用GD為優化器,MSE為損失函式。
        optimizer=GradientDescent(learning_rate=0.01, momentum=0.0),
        loss=MeanSquaredError())
    # 設定好epochs後訓練模型,訓練完後取得預測結果和每個epoch的損失值。
    y_pred, losses = model.train(
        x, y, batch_size=batch_size, epochs=200, verbose_step=10)

    # 因為答案皆為整數0或1,因此訓練的成果為模型預測的結果取整數。
    result = np.around(y_pred).astype(int)
    # 將答案與訓練成果相減。
    diff = np.subtract(y, result)
    print(pd.DataFrame({
        # 印出表格時,須將輸入的資料的每項陣列例如`[0 0 0 0 0 0 0 0]`轉成字串,
        # 因為Pandas的DataFrame的每一項不能吃陣列。
        "Data": [np.array_str(v) for v in x],
        "Answer": y[:, 0],
        "Prediction": [f'{v:.8f}' for v in y_pred[:, 0]],
        "Result": result[:, 0],
        # 如果答案與訓練成果在相減之後為0的話代表預測正確,否則失敗。
        "Correct": [True if v == 0 else False for v in diff[:, 0]]
    }, index=np.arange(1, len(x) + 1)).to_string())
    # 輸出最後的損失值和訓練成果與答案差了幾項,並繪製每個epoch與其損失值的變化圖表。
    print(f'loss: {losses[-1]:.8f}, difference: {np.count_nonzero(diff)}')
    plt.figure(figsize=(8, 4))
    plt.plot(losses)
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.show()
Exemplo n.º 8
0
def main():
    
    print('Loading data')
    data=Dataset("mnist_train.csv","mnist_test.csv",0.10,10)
    dset=data.create_dataset()
    print('Data loaded')
    
    image_size = 28 # width and length of mnist image
    num_labels = 10 #  i.e. 0, 1, 2, 3, ..., 9, since mnist has 10 classes.
    image_pixels = image_size * image_size
    
    # create hidden layes list, the list length should be equal to number of layers and the numbers should correspond to number of hidden neurons in each layer.
    hiddens = [128,128,64]# this gives 3 hidden layes of size 128,128 and 64 respectively
    
    #since cross entropy is used the last activation layer should be identity.
    # size of activation list should be equal to len(hidden)+1 with last layer as identity for cross entrpoy
    activations = [Sigmoid(), Tanh(), Sigmoid(), Identity()]
    lr = 0.1
    num_epochs = 100
    batch_size = 784
    

    # build your MLP model
    mlp = MLP(
        input_size=image_pixels, 
        output_size=num_labels, 
        hiddens=hiddens, 
        activations=activations, 
        criterion=SoftmaxCrossEntropy(), 
        lr=lr
    )

    # train the neural network
    t=train_test(mlp, dset, num_epochs, batch_size)
    t.train_network()
    
    #create plots
    t.plots()
    
    #test network
    t.test_network()
    
    #save trained weights.
    t.save('weights.npz')
Exemplo n.º 9
0
class RNNCell(Layer):
    """
    Vanilla RNN implementation
    Hidden(t) = Activation(Linear(Hidden(t-1) + Linear(Input(t)))
    Output(t) = Linear(Hidden(t))
    """
    def __init__(self, n_inputs, n_hidden, n_output, activation='sigmoid'):
        """
        :param n_inputs: dimension of inputs
        :param n_hidden: dimension of hidden layer
        :param n_output: dimension of output (token)
        :param activation: either sigmoid or tanh
        """
        super().__init__()
        self.n_inputs = n_inputs
        self.n_hidden = n_hidden
        self.n_output = n_output

        if activation == 'sigmoid':
            self.activation = Sigmoid()
        elif activation == 'tanh':
            self.activation = Tanh()
        else:
            raise Exception("Non-linearity not found")

        self.w_ih = Linear(n_inputs, n_hidden)
        self.w_hh = Linear(n_hidden, n_hidden)
        self.w_ho = Linear(n_hidden, n_output)

        self.parameters += self.w_ih.get_parameters()
        self.parameters += self.w_hh.get_parameters()
        self.parameters += self.w_ho.get_parameters()

    def forward(self, input_tensor, hidden):
        """ Forward prop - returns both the output and the hidden """
        from_prev_hidden = self.w_hh.forward(hidden)
        combined = self.w_ih.forward(input_tensor) + from_prev_hidden
        new_hidden = self.activation.forward(combined)
        output = self.w_ho.forward(new_hidden)
        return output, new_hidden

    def init_hidden(self, batch_size=1):
        """ First hidden state is all zeros"""
        return Tensor(np.zeros((batch_size, self.n_hidden)), autograd=True)
Exemplo n.º 10
0
    def __init__(
        self,
        n_out,
        act_fn=None,
        gate_fn=None,
        merge_mode="concat",
        init="glorot_uniform",
        optimizer=None,
    ):
        """
        A single bidirectional long short-term memory (LSTM) layer.

        Parameters
        ----------
        n_out : int
            The dimension of a single hidden state / output on a given timestep
        act_fn : `activations.Activation` instance (default: None)
            The activation function for computing A[t]. If not specified, use
            Tanh by default.
        gate_fn : `activations.Activation` instance (default: None)
            The gate function for computing the update, forget, and output
            gates. If not specified, use Sigmoid by default.
        merge_mode : str (default: "concat")
            Mode by which outputs of the forward and backward LSTMs will be
            combined. Valid values are {"sum", "multiply", "concat", "average"}.
        init : str (default: 'glorot_uniform')
            The weight initialization strategy. Valid entries are
            {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'}
        optimizer : str or `OptimizerBase` instance (default: None)
            The optimization strategy to use when performing gradient updates
            within the `update` method.  If `None`, use the `SGD` optimizer with
            default parameters.
        """
        super().__init__()

        self.init = init
        self.n_in = None
        self.n_out = n_out
        self.optimizer = optimizer
        self.merge_mode = merge_mode
        self.act_fn = Tanh() if act_fn is None else act_fn
        self.gate_fn = Sigmoid() if gate_fn is None else gate_fn
        self._init_params()
Exemplo n.º 11
0
 def init_from_str(self, act_str):
     act_str = act_str.lower()
     if act_str == "relu":
         act_fn = ReLU()
     elif act_str == "tanh":
         act_fn = Tanh()
     elif act_str == "sigmoid":
         act_fn = Sigmoid()
     elif "affine" in act_str:
         r = r"affine\(slope=(.*), intercept=(.*)\)"
         slope, intercept = re.match(r, act_str).groups()
         act_fn = Affine(float(slope), float(intercept))
     elif "leaky relu" in act_str:
         r = r"leaky relu\(alpha=(.*)\)"
         alpha = re.match(r, act_str).groups()[0]
         act_fn = LeakyReLU(float(alpha))
     else:
         raise ValueError("Unknown activation: {}".format(act_str))
     return act_fn
Exemplo n.º 12
0
def plot_activations():
    fig, axes = plt.subplots(2, 3, sharex=True, sharey=True)
    fns = [Affine(), Tanh(), Sigmoid(), ReLU(), LeakyReLU(), ELU()]
    for ax, fn in zip(axes.flatten(), fns):
        X = np.linspace(-3, 3, 100).astype(float).reshape(100, 1)
        ax.plot(X, fn(X), label=r"$y$", alpha=0.7)
        ax.plot(X, fn.grad(X), label=r"$\frac{dy}{dx}$", alpha=0.7)
        ax.plot(X, fn.grad2(X), label=r"$\frac{d^2 y}{dx^2}$", alpha=0.7)
        ax.hlines(0, -3, 3, lw=1, linestyles="dashed", color="k")
        ax.vlines(0, -1.2, 1.2, lw=1, linestyles="dashed", color="k")
        ax.set_ylim(-1.1, 1.1)
        ax.set_xlim(-3, 3)
        ax.set_xticks([])
        ax.set_yticks([-1, 0, 1])
        ax.xaxis.set_visible(False)
        #  ax.yaxis.set_visible(False)
        ax.set_title("{}".format(fn))
        ax.legend(frameon=False)
        sns.despine(left=True, bottom=True)

    fig.set_size_inches(8, 5)
    plt.tight_layout()
    plt.savefig("plot.png", dpi=300)
    plt.close("all")
Exemplo n.º 13
0
def cross_val_results(verbose=True):
    """
    Function for generating the accuracy results of four models presented in the report with their best parameters,
    averaged over 10 runs and using different combinations of the available optimizers and loss
    
    :param verbose: whether to print average results for each (Model, Optimizer, Loss) combination,
                    boolean, optional, default is True

    :returns: list of tuples containing (mean, std) of each (Model, Optimizer, Loss) combination, each tuple in [0, 1]^2
    """

    datasets = []

    for i in range(10):
        datasets.append((generate_disc_set(1000), generate_disc_set(1000)))

    relu_model = Sequential(Linear(2, 25), ReLU(),
                            Linear(25, 25), ReLU(),
                            Linear(25, 25), ReLU(),
                            Linear(25, 2), xavier_init=True)

    leaky_relu_model = Sequential(Linear(2, 25), LeakyReLU(),
                                  Linear(25, 25), LeakyReLU(),
                                  Linear(25, 25), LeakyReLU(),
                                  Linear(25, 2), xavier_init=True)

    tanh_model = Sequential(Linear(2, 25), Tanh(),
                            Linear(25, 25), Tanh(),
                            Linear(25, 25), Tanh(),
                            Linear(25, 2), xavier_init=True)

    sigmoid_model = Sequential(Linear(2, 25), Sigmoid(),
                               Linear(25, 25), Sigmoid(),
                               Linear(25, 25), Sigmoid(),
                               Linear(25, 2))

    models = [relu_model, leaky_relu_model, tanh_model, sigmoid_model]

    final_scores = []

    optimizers_names = ["SGD", "Adam"]
    models_names = ["ReLU", "Leaky", "Tanh", "Sigmoid"]

    losses_names = ["MSE", "CrossEntropy"]
    losses = [LossMSE(), LossCrossEntropy()]

    adam_params = {"ReLU": {"lr": 0.001, "b1": 0.9, "b2": 0.999, "epsilon": 1e-08},
                   "Leaky": {"lr": 0.001, "b1": 0.9, "b2": 0.999, "epsilon": 1e-08},
                   "Tanh": {"lr": 0.001, "b1": 0.9, "b2": 0.999, "epsilon": 1e-08},
                   "Sigmoid": {"lr": 0.001, "b1": 0.9, "b2": 0.999, "epsilon": 1e-08}}

    sgd_params = {"ReLU": {"lr": 0.001},
                  "Leaky": {"lr": 0.001},
                  "Tanh": {"lr": 0.001},
                  "Sigmoid": {"lr": 0.01}}

    for optim_name in optimizers_names:
        for loss_name, loss in zip(losses_names, losses):
            for model_name, model in zip(models_names, models):
                if verbose:
                    print("Validating model {} with {} and {} loss...".format(model_name, optim_name, loss_name),
                          end='')
                scores = []

                if optim_name == "Adam":
                    params = adam_params[model_name]
                    optim = Adam(model, criterion=loss, nb_epochs=50, mini_batch_size=10, lr=params["lr"],
                                 b1=params["b1"], b2=params["b2"], epsilon=params["epsilon"])
                else:
                    params = sgd_params[model_name]
                    optim = SGD(relu_model, criterion=loss, nb_epochs=50, mini_batch_size=10, lr=params["lr"])

                for ((train_input, train_target), (test_input, test_target)) in datasets:
                    optim.model = copy.deepcopy(model)

                    optim.train(train_input, train_target, verbose=False)

                    evaluator = Evaluator(optim.model)
                    accuracy = evaluator.compute_accuracy(test_input, test_target)

                    scores.append(accuracy)
                scores = torch.FloatTensor(scores)
                scores_mean = torch.mean(scores).item()
                scores_var = torch.std(scores).item()

                if verbose:
                    print("Score : {0:.3f} (+/- {1:.3f}) ".format(scores_mean, scores_var))

                final_scores.append((scores_mean, scores_var))

    return final_scores
Exemplo n.º 14
0
x_train = x_train.astype('float32')
x_train /= 255
# encode output which is a number in range [0,9] into a vector of size 10
# e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
y_train = np_utils.to_categorical(y_train)
y_train = y_train.reshape(y_train.shape[0], 10, 1)

# same for test data: 10000 samples
x_test = x_test.reshape(x_test.shape[0], 28 * 28, 1)
x_test = x_test.astype('float32')
x_test /= 255
y_test = np_utils.to_categorical(y_test)
y_test = y_test.reshape(y_test.shape[0], 10, 1)

# neural network
network = [Dense(28 * 28, 40), Tanh(), Dense(40, 10), Tanh()]

epochs = 100
learning_rate = 0.1

# train
for e in range(epochs):
    error = 0
    # train on 1000 samples, since we're not training on GPU...
    for x, y in zip(x_train[:1000], y_train[:1000]):
        # forward
        output = x
        for layer in network:
            output = layer.forward(output)

        # error
import matplotlib.pyplot as plt

from activations import Sigmoid, ReLU, Tanh, Exponential
from fullnetwork import onelayer, fullnetwork
from mpl_toolkits.mplot3d import Axes3D

#number of hidden layers#
L = 3
#network size for each hidden layer n[0]=n_1, ..., n[L-1]=n_L#
n = np.random.randint(1, 10, size=L)
#training set size#
training_size = 1
#(N, N) meshgrid#
N = 3
#activation function#
sigma = Tanh()

#set the network#
network = fullnetwork(L=L, n=n, activation=sigma)
#set the initial weight and bias#
weight, bias = network.setparameter()

#choose one layer from from [1, L-1]#
weightindex_startlayer = np.random.randint(1, L, size=None)
#its next layer#
weightindex_nextlayer = weightindex_startlayer + 1
#the two weights taken from randomly sample two neurons from each of the above layers, from [1, width of that layer]#
weightindex_neuron_startlayer = np.random.randint(
    1, n[weightindex_startlayer - 1] + 1, size=2)
weightindex_neuron_nextlayer = np.random.randint(1,
                                                 n[weightindex_nextlayer - 1] +
Exemplo n.º 16
0
    ]
    #trainingIn = [trainingIn0]

    trainingOut0 = (np.cumsum(trainingIn0, axis=0) % 2)[:, 0, :]
    trainingOut1 = (np.cumsum(trainingIn1, axis=0) % 2)[:, 0, :]
    trainingOut2 = (np.cumsum(trainingIn2, axis=0) % 2)[:, 0, :]
    trainingOut3 = (np.cumsum(trainingIn3, axis=0) % 2)[:, 0, :]
    trainingOut4 = (np.cumsum(trainingIn4, axis=0) % 2)[:, 0, :]
    trainingOut5 = (np.cumsum(trainingIn5, axis=0) % 2)[:, 0, :]

    trainingOut = [
        trainingOut1, trainingOut2, trainingOut3, trainingOut4, trainingOut5
    ]
    #trainingOut = [trainingOut0]

    f, g, h = Logistic(), Logistic(), Tanh()
    lstm_layer1 = LSTMLayer(2, 4, f, g, h)
    lstm_layer2 = NNLayer(4, 4, Tanh(), usebias=False)
    lstm_layer3 = NNLayer(4, 1, Tanh(), usebias=False)
    #lstm_layer1 = LSTMLayerWeights(2, 4, f, g, h)
    #lstm_layer2 = NNLayer(4, 1, h)
    #d_weight1 = [np.zeros(w.shape) for w in lstm_layer1.to_weights_array()]
    #d_weight2 = [np.zeros(w.shape) for w in lstm_layer2.to_weights_array()]
    #d_weights = [d_weight1, d_weight2]
    #d_weights = [d_weight1]

    lstm = LSTMNetwork([lstm_layer1, lstm_layer2, lstm_layer3], loss=Squared())
    #lstm = LSTMNetwork([lstm_layer1, nn_layer1])
    """
    for trial in range(200):
        #import pdb; pdb.set_trace()
Exemplo n.º 17
0
val = df.drop(train.index)

yr = train.iloc[:, 0].to_numpy()
X_train, y_train = train.iloc[:, 1:].to_numpy() / 255.0, onehotcode(yr)

X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], 1))
print(X_train.shape, y_train.shape)

X_val, y_val = val.iloc[:, 1:].to_numpy() / 255.0, val.iloc[:, 0].to_numpy()

X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
print(X_val.shape, y_val.shape)

nn = NN()
nn.sequential(network=[784, 128, 10],
              activation=[Tanh(), Softmax()],
              loss=Cross_entropy(),
              regu=Ridge(n=X_train.shape[0], lmda=5),
              weight_type='glorot_normal')
nn.load_model('tanL2128')
#nn.fit(X_train,y_train,X_val,y_val,32,2)
#nn.save_model('tanL2128')
nn.weight_heatmap()

df2 = pd.read_csv('mnist_test.csv')
X_test, y_test = df2.iloc[:, 1:].to_numpy() / 255.0, df2.iloc[:, 0].to_numpy()
X_test = X_test.reshape((X_test.shape[0], 28, 28))
print(X_test.shape, y_test.shape)
nn.annote_test(X_test[:100], 10, 10)
test_input.sub_(mean).div_(std)
#Convert to Labels so that we can train
train_target_hot = conv_to_one_hot(train_target)
test_target_hot = conv_to_one_hot(test_target)

### Build the Network
hidden_layers = 3

layers = []
linear = Linear(2, 25, bias_init=True)
layers.append(linear)
layers.append(Relu())
for i in range(hidden_layers - 1):
    layers.append(Linear(25, 25, bias_init=True))
    layers.append(Relu())
layers.append(Tanh())
layers.append(Linear(25, 2, bias_init=True))
model = Sequential(layers)

#print model summary
print("Model Summary:")
print(model)

### Select Parameters to train the model
criterion = MSE()
lr = 0.05
nb_epochs = 250
print_step = 25
mini_batch_size = 100

loss_at_print = []
Exemplo n.º 19
0
              activation=LeakyReLU(0.2),
              optimizer=optimizer))
    generator.addLayer(
        Dense(inputDim=256,
              outputDim=512,
              activation=LeakyReLU(0.2),
              optimizer=optimizer))
    generator.addLayer(
        Dense(inputDim=512,
              outputDim=1024,
              activation=LeakyReLU(0.2),
              optimizer=optimizer))
    generator.addLayer(
        Dense(inputDim=1024,
              outputDim=28 * 28,
              activation=Tanh(),
              optimizer=optimizer))

    discriminator = MLP()
    discriminator.addLayer(
        Dense(inputDim=28 * 28,
              outputDim=1024,
              activation=LeakyReLU(0.2),
              optimizer=optimizer))
    discriminator.addLayer(
        Dense(inputDim=1024,
              outputDim=512,
              activation=LeakyReLU(0.2),
              optimizer=optimizer))
    discriminator.addLayer(
        Dense(inputDim=512,
Exemplo n.º 20
0
from dense import Dense
from activations import Tanh
from losses import mse, mse_prime

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

X = np.reshape([[0, 0], [0, 1], [1, 0], [1, 1]], (4, 2, 1))
Y = np.reshape([[0], [1], [1], [0]], (4, 1, 1))

epochs = 10000
learning_rate = 0.1

network = [Dense(2, 3), Tanh(), Dense(3, 1), Tanh()]

# train
for e in range(epochs):
    error = 0
    for x, y in zip(X, Y):
        # forward
        output = x
        for layer in network:
            output = layer.forward(output)

        # error
        error += mse(y, output)

        # backward
        grad = mse_prime(y, output)
        for layer in reversed(network):
Exemplo n.º 21
0
class WavenetResidualModule(ModuleBase):
    def __init__(
        self,
        ch_residual,
        ch_dilation,
        dilation,
        kernel_width,
        optimizer=None,
        init="glorot_uniform",
    ):
        """
        A WaveNet-like residual block with causal dilated convolutions.

        *Skip path in* >-------------------------------------------> + --------> *Skip path out*
                          Causal      |--> Tanh --|                  |
        *Main    |--> Dilated Conv1D -|           * --> 1x1 Conv1D --|
         path >--|                    |--> Sigm --|                  |
         in*     |-------------------------------------------------> + --------> *Main path out*
                                     *Residual path*

        On the final block, the output of the skip path is further processed to
        produce the network predictions.

        See van den Oord et al. (2016) at https://arxiv.org/pdf/1609.03499.pdf
        for further details.

        Parameters
        ----------
        ch_residual : int
            The number of output channels for the 1x1 Conv1D layer in the main
            path
        ch_dilation : int
            The number of output channels for the causal dilated Conv1D layer
            in the main path
        dilation : int
            The dilation rate for the causal dilated Conv1D layer in the main
            path
        kernel_width : int
            The width of the causal dilated Conv1D kernel in the main path
        init : str (default: 'glorot_uniform')
            The weight initialization strategy. Valid entries are
            {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'}
        optimizer : str or `OptimizerBase` instance (default: None)
            The optimization strategy to use when performing gradient updates
            within the `update` method.  If `None`, use the `SGD` optimizer with
            default parameters.
        """
        super().__init__()

        self.init = init
        self.dilation = dilation
        self.optimizer = optimizer
        self.ch_residual = ch_residual
        self.ch_dilation = ch_dilation
        self.kernel_width = kernel_width

        self._init_params()

    def _init_params(self):
        self._dv = {}

        self.conv_dilation = Conv1D(
            stride=1,
            pad="causal",
            init=self.init,
            kernel_width=2,
            dilation=self.dilation,
            out_ch=self.ch_dilation,
            optimizer=self.optimizer,
            act_fn=Affine(slope=1, intercept=0),
        )

        self.tanh = Tanh()
        self.sigm = Sigmoid()
        self.multiply_gate = Multiply(act_fn=Affine(slope=1, intercept=0))

        self.conv_1x1 = Conv1D(
            stride=1,
            pad="same",
            dilation=0,
            init=self.init,
            kernel_width=1,
            out_ch=self.ch_residual,
            optimizer=self.optimizer,
            act_fn=Affine(slope=1, intercept=0),
        )

        self.add_residual = Add(act_fn=Affine(slope=1, intercept=0))
        self.add_skip = Add(act_fn=Affine(slope=1, intercept=0))

    @property
    def parameters(self):
        return {
            "components": {
                "conv_1x1": self.conv_1x1.parameters,
                "add_skip": self.add_skip.parameters,
                "add_residual": self.add_residual.parameters,
                "conv_dilation": self.conv_dilation.parameters,
                "multiply_gate": self.multiply_gate.parameters,
            }
        }

    @property
    def hyperparameters(self):
        return {
            "layer":
            "WavenetResidualModule",
            "init":
            self.init,
            "dilation":
            self.dilation,
            "optimizer":
            self.optimizer,
            "ch_residual":
            self.ch_residual,
            "ch_dilation":
            self.ch_dilation,
            "kernel_width":
            self.kernel_width,
            "component_ids": [
                "conv_1x1",
                "add_skip",
                "add_residual",
                "conv_dilation",
                "multiply_gate",
            ],
            "components": {
                "conv_1x1": self.conv_1x1.hyperparameters,
                "add_skip": self.add_skip.hyperparameters,
                "add_residual": self.add_residual.hyperparameters,
                "conv_dilation": self.conv_dilation.hyperparameters,
                "multiply_gate": self.multiply_gate.hyperparameters,
            },
        }

    @property
    def derived_variables(self):
        dv = {
            "conv_1x1_out": None,
            "conv_dilation_out": None,
            "multiply_gate_out": None,
            "components": {
                "conv_1x1": self.conv_1x1.derived_variables,
                "add_skip": self.add_skip.derived_variables,
                "add_residual": self.add_residual.derived_variables,
                "conv_dilation": self.conv_dilation.derived_variables,
                "multiply_gate": self.multiply_gate.derived_variables,
            },
        }
        dv.update(self._dv)
        return dv

    @property
    def gradients(self):
        return {
            "components": {
                "conv_1x1": self.conv_1x1.gradients,
                "add_skip": self.add_skip.gradients,
                "add_residual": self.add_residual.gradients,
                "conv_dilation": self.conv_dilation.gradients,
                "multiply_gate": self.multiply_gate.gradients,
            }
        }

    def forward(self, X_main, X_skip=None):
        self.X_main, self.X_skip = X_main, X_skip
        conv_dilation_out = self.conv_dilation.forward(X_main)

        tanh_gate = self.tanh.fn(conv_dilation_out)
        sigm_gate = self.sigm.fn(conv_dilation_out)

        multiply_gate_out = self.multiply_gate.forward([tanh_gate, sigm_gate])
        conv_1x1_out = self.conv_1x1.forward(multiply_gate_out)

        # if this is the first wavenet block, initialize the "previous" skip
        # connection sum to 0
        self.X_skip = np.zeros_like(conv_1x1_out) if X_skip is None else X_skip

        Y_skip = self.add_skip.forward([X_skip, conv_1x1_out])
        Y_main = self.add_residual.forward([X_main, conv_1x1_out])

        self._dv["tanh_out"] = tanh_gate
        self._dv["sigm_out"] = sigm_gate
        self._dv["conv_dilation_out"] = conv_dilation_out
        self._dv["multiply_gate_out"] = multiply_gate_out
        self._dv["conv_1x1_out"] = conv_1x1_out
        return Y_main, Y_skip

    def backward(self, dY_skip, dY_main=None):
        dX_skip, dConv_1x1_out = self.add_skip.backward(dY_skip)

        # if this is the last wavenet block, dY_main will be None. if not,
        # calculate the error contribution from dY_main and add it to the
        # contribution from the skip path
        dX_main = np.zeros_like(self.X_main)
        if dY_main is not None:
            dX_main, dConv_1x1_main = self.add_residual.backward(dY_main)
            dConv_1x1_out += dConv_1x1_main

        dMultiply_out = self.conv_1x1.backward(dConv_1x1_out)
        dTanh_out, dSigm_out = self.multiply_gate.backward(dMultiply_out)

        conv_dilation_out = self.derived_variables["conv_dilation_out"]
        dTanh_in = dTanh_out * self.tanh.grad(conv_dilation_out)
        dSigm_in = dSigm_out * self.sigm.grad(conv_dilation_out)
        dDilation_out = dTanh_in + dSigm_in

        conv_back = self.conv_dilation.backward(dDilation_out)
        dX_main += conv_back

        self._dv["dLdTanh"] = dTanh_out
        self._dv["dLdSigmoid"] = dSigm_out
        self._dv["dLdConv_1x1"] = dConv_1x1_out
        self._dv["dLdMultiply"] = dMultiply_out
        self._dv["dLdConv_dilation"] = dDilation_out
        return dX_main, dX_skip
Exemplo n.º 22
0
def main():
    """
    Function containing the main code definition, display all functionalities provided by the framework
    """

    # Different activation functions and setting of automatic Xavier parameter initialization
    relu_model = Sequential(Linear(2, 25),
                            ReLU(),
                            Linear(25, 25),
                            ReLU(),
                            Linear(25, 25),
                            ReLU(),
                            Linear(25, 2),
                            xavier_init=True)
    leaky_relu_model = Sequential(Linear(2, 25),
                                  LeakyReLU(),
                                  Linear(25, 25),
                                  LeakyReLU(),
                                  Linear(25, 25),
                                  LeakyReLU(),
                                  Linear(25, 2),
                                  xavier_init=True)
    tanh_model = Sequential(Linear(2, 25),
                            Tanh(),
                            Linear(25, 25),
                            Tanh(),
                            Linear(25, 25),
                            Tanh(),
                            Linear(25, 2),
                            xavier_init=True)
    sigmoid_model = Sequential(Linear(2, 25),
                               Sigmoid(),
                               Linear(25, 25),
                               Sigmoid(),
                               Linear(25, 25),
                               Sigmoid(),
                               Linear(25, 2),
                               xavier_init=False)

    model_names = ["ReLU", "Leaky", "Tanh", "Sigmoid"]

    train_input, train_target = generate_disc_set(1000)
    test_input, test_target = generate_disc_set(1000)

    # Model training without cross-validation of the optimizer parameters
    optimizer = SGDCV(leaky_relu_model, nb_epochs=25)
    optimizer.train(train_input, train_target)

    evaluator = Evaluator(leaky_relu_model)

    print("Train accuracy using LeakyReLU: {:.1f}%".format(
        (evaluator.compute_accuracy(train_input, train_target) * 100).item()))
    print("Test accuracy using LeakyReLU: {:.1f}%".format(
        (evaluator.compute_accuracy(test_input, test_target) * 100).item()))

    models = (relu_model, leaky_relu_model, tanh_model, sigmoid_model)

    sgd_cross_val_param_grid = {"lr": [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]}
    adam_cross_val_param_grid = {
        "lr": [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
        "b1": [0.9, 0.8],
        "b2": [0.999, 0.888],
        "epsilon": [1e-8, 1e-7, 1e-6]
    }

    adam_params = {
        "ReLU": {
            "lr": [0.001],
            "b1": [0.9],
            "b2": [0.999],
            "epsilon": [1e-08]
        },
        "Leaky": {
            "lr": [0.001],
            "b1": [0.9],
            "b2": [0.999],
            "epsilon": [1e-08]
        },
        "Tanh": {
            "lr": [0.001],
            "b1": [0.9],
            "b2": [0.999],
            "epsilon": [1e-08]
        },
        "Sigmoid": {
            "lr": [0.001],
            "b1": [0.9],
            "b2": [0.999],
            "epsilon": [1e-08]
        }
    }

    sgd_params = {
        "ReLU": {
            "lr": [0.001]
        },
        "Leaky": {
            "lr": [0.001]
        },
        "Tanh": {
            "lr": [0.001]
        },
        "Sigmoid": {
            "lr": [0.01]
        }
    }

    mse_loss = not args.CE
    optimizer_sgd = not args.Adam
    cross_validate = args.cross_val

    # Different loss functions
    if mse_loss:
        criterion = LossMSE()
    else:
        criterion = LossCrossEntropy()

    for name, model in zip(model_names, models):
        if optimizer_sgd:
            # SGD optimizer parameter cross-validation
            optimizer = SGDCV(model, mini_batch_size=10, criterion=criterion)

            if cross_validate:
                params = sgd_cross_val_param_grid
            else:
                params = sgd_params[name]

            cross_val_results, best_params_score = optimizer.cross_validate(
                values=params)

            print("Best params for model using {} : (lr={:.3f})".format(
                name, best_params_score["lr"]))
        else:
            # Adam optimizer parameter cross-validation
            optimizer = AdamCV(model, mini_batch_size=10, criterion=criterion)

            if cross_validate:
                params = adam_cross_val_param_grid
            else:
                params = adam_params[name]

            cross_val_results, best_params_score = optimizer.cross_validate(
                values=params)

            print(
                "Best params for model using {} : (lr={:.3f}, b1={:.3f}, b2={:.3f}, epsilon={:.1e})"
                .format(name, best_params_score["lr"], best_params_score["b1"],
                        best_params_score["b2"], best_params_score["epsilon"]))

        print("Best score for model using {} : {:.3f} (+/- {:.3f})".format(
            name, best_params_score["mean"], best_params_score["std"]))
Exemplo n.º 23
0
import numpy as np
from tensor import Tensor
from layers import Sequential, Linear
from activations import Tanh, Sigmoid
from optimizers import SGD
from losses import MSELoss

np.random.seed(0)

data = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), autograd=True)
target = Tensor(np.array([[0], [1], [0], [1]]), autograd=True)

model = Sequential([Linear(2, 3), Tanh(), Linear(3, 1), Sigmoid()])
criterion = MSELoss()

optim = SGD(parameters=model.get_parameters(), alpha=1)

for i in range(10):
    pred = model.forward(data)
    loss = criterion.forward(pred, target)

    loss.backward()
    optim.step()
    print(loss)