Пример #1
0
    def forward_propagation(self, model, X, Y, hyper_dic):
        activation_str = hyper_dic["activation"]
        model = self._init_model(hyper_dic, model)
        W1 = model["W1"]
        b1 = model["b1"]
        W2 = model["W2"]
        b2 = model["b2"]

        X = self._normalization(X)
        Z1 = np.dot(X, W1) + b1
        a1 = func.activation(Z1, activation_str)
        logits = np.dot(a1, W2) + b2
        prob = func.softmax(logits)

        correct_probs = prob[range(X.shape[0]), np.argmax(Y, axis=1)]
        correct_logprobs = -func.log(correct_probs)

        data_loss = np.sum(correct_logprobs)
        loss = 1. / X.shape[0] * data_loss

        pre_Y = np.argmax(prob, axis=1)
        comp = pre_Y == np.argmax(Y, axis=1)
        accuracy = len(np.flatnonzero(comp)) / Y.shape[0]

        return model, prob, a1, Z1, loss, accuracy, comp
Пример #2
0
    def predict(self, x):
        W1, W2 = self.params["W1"], self.params["W2"]
        b1, b2 = self.params["b1"], self.params["b2"]

        a1 = np.dot(x, W1) + b1
        z1 = func.sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = func.softmax(a2)

        return y
Пример #3
0
Файл: c3.py Проект: o93/aizero
def predict(network, x):
    W1, W2, W3 = network["W1"], network["W2"], network["W3"]
    b1, b2, b3 = network["b1"], network["b2"], network["b3"]

    a1 = np.dot(x, W1) + b1
    z1 = func.sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = func.sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = func.softmax(a3)

    return y
Пример #4
0
    def forward_propagation(self, model, X, Y, hyper_dic):
        activation_str = hyper_dic["activation"]
        architecture = hyper_dic["architecture"]
        epsilon = hyper_dic["epsilon"]
        model = self._init_model(hyper_dic, model)
        weight_lt = model["weight_lt"]
        bias_lt = model["bias_lt"]

        linear_output_lt = []
        activation_output_lt = []
        model["linear_output_lt"] = linear_output_lt
        model["activation_output_lt"] = activation_output_lt
        batch_size = hyper_dic["batch_size"]
        activation_output_lt.append(X)

        a = X
        Z = None

        for i in range(len(architecture) - 2):
            Z = np.dot(a, weight_lt[i]) + bias_lt[i]
            model["linear_output_lt"].append(Z)
            a = func.activation(Z, activation_str)
            model["activation_output_lt"].append(a)

        Z = np.dot(a, weight_lt[len(architecture) -
                                2]) + bias_lt[len(architecture) - 2]
        model["linear_output_lt"].append(Z)

        prob = func.softmax(Z)

        correct_probs = prob[range(batch_size), np.argmax(Y, axis=1)]
        correct_logprobs = -func.log(correct_probs)

        data_loss = np.sum(correct_logprobs)
        loss = 1. / batch_size * data_loss

        pre_Y = np.argmax(prob, axis=1)
        comp = pre_Y == np.argmax(Y, axis=1)
        accuracy = len(np.flatnonzero(comp)) / Y.shape[0]

        return model, prob, loss, accuracy, comp
Пример #5
0
 def fit(self, x, y, learning_rate=0.1, epochs: int = 10, L2=0.1):
     # 如果y只有一列,给他reshape一下,如果shape是(m,)容易出错,改为(m,1)
     if y.ndim < 2:
         raise Exception("y dims should be greater than 1")
     # 初始化w,目标函数,损失函数,求导,正则项,更新w
     # w是一个列向量。
     (m, k) = y.shape
     n = x.shape[1]
     np.random.seed(42)
     self.w = np.random.random((n, k))
     # 采用平方误差
     err = []
     for i in range(epochs):
         target = softmax(x, self.w)
         delta_regular = L2 * self.w
         # 向量化的梯度np.dot(x.T, (y - target))
         grad = -np.dot(x.T, (y - target)) / m
         grad = grad + delta_regular
         self.w = self.w - learning_rate * grad
         self.w[0, :] = np.sum(y - target, axis=0) / m
         err.append(loss(y, target, self.w))
     return self.w, err
Пример #6
0
    def gradient(self, x, t):
        W1, W2 = self.params["W1"], self.params["W2"]
        b1, b2 = self.params["b1"], self.params["b2"]

        grads = {}

        batch_num = x.shape[0]

        a1 = np.dot(x, W1) + b1
        z1 = func.sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = func.softmax(a2)

        dy = (y - t) / batch_num
        grads['W2'] = np.dot(z1.T, dy)
        grads['b2'] = np.sum(dy, axis=0)

        dz1 = np.dot(dy, W2.T)
        da1 = func.sigmoid_grad(a1) * dz1
        grads['W1'] = np.dot(x.T, da1)
        grads['b1'] = np.sum(da1, axis=0)

        return grads
Пример #7
0
    def forward_propagation(self, model, X, Y, hyper_dic):
        activation_str = hyper_dic["activation"]
        architecture = hyper_dic["architecture"]
        epsilon = hyper_dic["epsilon"]
        model = self._init_model(hyper_dic, model)
        weight_lt = model["weight_lt"]
        bias_lt = model["bias_lt"]

        linear_output_lt = []
        activation_output_lt = []
        model["linear_output_lt"] = linear_output_lt
        model["activation_output_lt"] = activation_output_lt
        batch_size = hyper_dic["batch_size"]
        activation_output_lt.append(X)

        a = X
        Z = None

        for i in range(len(architecture) - 2):
            Z = func.dot(a, weight_lt[i]) + bias_lt[i]
            model["linear_output_lt"].append(Z)
            a = func.activation(Z, activation_str)
            model["activation_output_lt"].append(a)

        Z = func.dot(a, weight_lt[len(architecture) -
                                  2]) + bias_lt[len(architecture) - 2]
        model["linear_output_lt"].append(Z)

        prob = func.softmax(Z)
        #print(prob)
        #raise
        loss = func.get_loss(prob, Y)

        comp = func.get_comp(prob, Y)
        accuracy = func.get_accuracy(comp, Y)

        return model, prob, loss, accuracy, comp
Пример #8
0
  def forward(self, x, t):
    self.t = t
    self.y = func.softmax(x)
    self.loss = func.cross_entropy_error(self.y, self.t)

    return self.loss
Пример #9
0
    def __init__(self,
                 layers,
                 Ws=None,
                 Whs=None,
                 bs=None,
                 batch_size=1,
                 momentum_type="None",
                 act_type="ReLU",
                 cost_type="CE"):
        #test parameter define ( should be inputs later.)
        self.layers = layers
        self.batch_size = batch_size

        l_rate = T.scalar(dtype='float32')  # np.float32(0.0001)
        init = np.float32(0.1)
        rms_alpha = T.scalar(dtype='float32')  # np.float32(0.9)
        clip_range = T.scalar(dtype='float32')
        momentum = T.scalar(dtype='float32')
        # validation.
        if Ws is not None and bs is not None and Whs is not None:
            assert len(layers) == len(Ws) and len(layers) == len(bs) and len(
                layers) == len(Whs)

    # train input
        x_seq = T.tensor3(dtype='float32')
        y_hat = T.tensor3(dtype='float32')
        mask = T.tensor3(dtype='float32')

        # train parameter initialization
        self.W = [None]
        self.Wh = [None]
        self.b = [None]

        a_seq = [x_seq]
        ls = [None]

        for idx in range(len(self.layers) - 1):
            # init b , Wh , W
            #self.b.append ( theano.shared(np.asarray (np.random.uniform(-init , init , size = ( self.layers[idx+1] )) , 'float32')))
            self.b.append(
                theano.shared(
                    np.asarray(np.zeros(self.layers[idx + 1]), 'float32')))
            self.Wh.append(
                theano.shared(
                    np.asarray(
                        np.cast['float32'](0.1) *
                        np.identity(self.layers[idx + 1]), 'float32')))
            self.W.append(
                theano.shared(
                    np.asarray(
                        np.random.uniform(-init,
                                          init,
                                          size=(self.layers[idx],
                                                self.layers[idx + 1])),
                        'float32')))
            # import the  model from outside
            if Ws is not None:
                self.W[idx + 1].set_value(Ws[idx + 1].get_value())
            if bs is not None:
                self.b[idx + 1].set_value(bs[idx + 1].get_value())
            if Whs is not None:
                self.Wh[idx + 1].set_value(Whs[idx + 1].get_value())

            # declaration a RNN layer
            if idx == 0:  #means it's the first layer
                temp_layers = RNN_first_layer(self.W[idx + 1],
                                              self.Wh[idx + 1],
                                              self.b[idx + 1],
                                              self.layers[idx + 1], a_seq[idx],
                                              self.batch_size, act_type)
            elif idx == len(self.layers) - 2:  # Last Layer
                temp_layers = RNN_last_layer(self.W[idx + 1], self.b[idx + 1],
                                             a_seq[idx])
            else:
                temp_layers = RNN_layers(self.W[idx + 1], self.Wh[idx + 1],
                                         self.b[idx + 1], self.layers[idx + 1],
                                         a_seq[idx], self.batch_size, act_type)

            ls.append(temp_layers)
            # output the 'a' of RNN layers
            a_seq.append(temp_layers.layer_out)

    # define parameters
        parameters = self.W[1:] + self.Wh[1:-1] + self.b[1:]

        # define what are outputs.
        y_seq = a_seq[-1]
        y_out = y_seq * T.addbroadcast(mask, 2)

        # define cost
        if (cost_type == "CE"):
            y_out_a = F.softmax(y_out)
        else:
            y_out_a = F.softmax(y_out)
        cost = F.cost_func(y_out_a, y_hat, cost_type)
        # compute gradient

        gradients = T.grad(cost, parameters)
        gradient = []
        for idx in range(len(gradients)):
            gradient.append(T.clip(gradients[idx], -clip_range, clip_range))
        #
        pre_parameters = []
        for param in parameters:
            pre_parameters.append(
                theano.shared(
                    np.asarray(np.zeros(param.get_value().shape), 'float32')))
        # for rmsprop
        sq_sum_grad = []
        for param in parameters:
            sq_sum_grad.append(
                theano.shared(
                    np.asarray(np.zeros(param.get_value().shape), 'float32')))
        # for NAG
        pre_update = []
        for param in parameters:
            pre_update.append(
                theano.shared(
                    np.asarray(np.zeros(param.get_value().shape), 'float32')))

        def update(parameters, gradients):
            if momentum_type == "rmsprop":
                parameter_updates = [ (p, p - l_rate * g / T.sqrt(ssg) )
                    if ssg.get_value().sum() != 0 else (p, p-l_rate*g) \
                    for p,g,ssg in izip(parameters,gradient,sq_sum_grad) ]
                parameter_updates += [ (ssg, rms_alpha*ssg + (np.cast['float32'](1.0)-rms_alpha)*(g**2)  ) \
                           for g , ssg in izip( gradient , sq_sum_grad) ]
                return parameter_updates
            elif momentum_type == "NAG":
                parameter_updates = [ ( pre_p , pre_p + momentum*v - l_rate*g )\
                    for pre_p , g , v in izip(pre_parameters, gradient, pre_update) ]
                parameter_updates += [ ( p , pre_p + 2*( momentum*v - l_rate*g ) ) \
                    for p , pre_p , g , v in izip(parameters, pre_parameters, gradient, pre_update) ]
                parameter_updates += [ ( v , -l_rate*g + momentum*v )\
                    for g , v in izip(gradient , pre_update) ]
                return parameter_updates
            elif momentum_type == "rms+NAG":
                parameter_updates =  [ ( pre_p , pre_p + momentum*v - l_rate*g/T.sqrt(ssg) ) \
                    if ssg.get_value().sum() != 0 else (pre_p , pre_p - l_rate*g + momentum*v ) \
                    for pre_p , g , v , ssg in izip(pre_parameters, gradient, pre_update,sq_sum_grad) ]
                parameter_updates += [ ( p , pre_p + 2*( momentum*v - l_rate*g/T.sqrt(ssg) ) ) \
                    if ssg.get_value().sum() != 0 else ( p , pre_p + 2*( -l_rate*g + momentum*v) ) \
                    for p , pre_p , g , v ,ssg in izip(parameters, pre_parameters, gradient, pre_update , sq_sum_grad) ]
                parameter_updates += [ ( v , -l_rate*g/T.sqrt(ssg) + momentum*v )\
                    if ssg.get_value().sum() != 0 else ( v  , - l_rate*g + momentum*v ) \
                    for g , v , ssg in izip(gradient , pre_update , sq_sum_grad) ]
                parameter_updates += [ (ssg, rms_alpha*ssg + (np.cast['float32'](1.0)-rms_alpha)*(g**2)  ) \
                    for g , ssg in izip( gradient , sq_sum_grad) ]
                return parameter_updates
            elif momentum_type == "None":
                parameter_updates = [ ( p, p - l_rate*g) \
                    for p , g in izip(parameters , gradient ) ]
                return parameter_updates

    # define theano.functions

        self.train = theano.function(
            inputs=[
                x_seq, y_hat, mask, l_rate, rms_alpha, clip_range, momentum
            ],
            updates=update(parameters, gradient),
            outputs=cost,
        )

        self.test = theano.function(inputs=[x_seq, mask], outputs=y_out)
        self.test_sof = theano.function(inputs=[x_seq, mask], outputs=y_out_a)
Пример #10
0
    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    ax2 = ax1.twinx()

    ln1 = ax1.plot(epochs, valid_costs, label='Valid Cost', color='blue')
    ln2 = ax2.plot(epochs,
                   valid_accuracies,
                   label='Valid Accuracy',
                   color='red')

    ax2.set_ylim([0, 1])
    h1, l1 = ax1.get_legend_handles_labels()
    h2, l2 = ax2.get_legend_handles_labels()
    ax1.legend(h1 + h2, l1 + l2, loc='center right')

    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Cost')
    ax1.grid(True)
    ax2.set_ylabel('Accuracy')

    plt.show()

    y_test_pred = f.softmax(np.matmul(x_test, W) + b)
    submission = pd.Series(y_test_pred.argmax(axis=1), name='label')
    submission.to_csv('../data/submission_pred.csv',
                      header=True,
                      index_label='id')

    print("fin")
Пример #11
0
    def __init__( self, layers, Ws = None, Wis = None, Wfs = None, Wos = None, bs = None, bis = None, bfs = None, bos = None, \
                batch_size = 1, momentum_type = "None", act_type = "ReLU" , cost_type = "EU" ):

        self.layers = layers
        self.batch_size = batch_size

        l_rate = T.scalar('float32')
        init = np.float32(0.1)
        rms_alpha = T.scalar('float32')  # np.float32(0.9)
        clip_range = T.scalar('float32')
        momentum = T.scalar('float32')

        x_seq = T.tensor3(dtype='float32')
        y_h_seq = T.tensor3(dtype='float32')
        mask = T.tensor3(dtype='float32')

        self.W = [None]
        self.Wi = [None]
        self.Wf = [None]
        self.Wo = [None]
        self.b = [None]
        self.bi = [None]
        self.bf = [None]
        self.bo = [None]

        a_seq = [x_seq]
        lstm_layers = [None]

        parameters = [
            self.W, self.Wi, self.Wf, self.Wo, self.b, self.bi, self.bf,
            self.bo
        ]

        for idx in xrange(1, len(layers)):
            # Initializing model parameters.
            for i, p in enumerate(parameters):
                if i < 4:  # Weight Matrices
                    if idx == len(layers) - 1:
                        p.append( theano.shared( np.random.uniform( -init, init, \
                                size=(layers[idx-1],layers[idx]) ).astype("float32") ))
                    else:
                        p.append( theano.shared( np.random.uniform( -init, init, \
                                size=(layers[idx-1]+2*layers[idx],layers[idx]) ).astype("float32") ))

                else:  # bias vectors
                    p.append( theano.shared( np.random.uniform( -init, init, \
                                size = (layers[idx]) ).astype('float32') ))

            # Create LSTM layers and pass in the corresponding parameters.
            if Ws and Wis and Wfs and Wos and bs and bis and bfs and bos:
                layer_params = (Ws[idx], Wis[idx], Wfs[idx], Wos[idx], bs[idx],
                                bis[idx], bfs[idx], bos[idx])
            else:
                if idx == len(layers) - 1:
                    layer_params = [parameters[0][idx]] + [None] * 3 + [
                        parameters[4][idx]
                    ] + [None] * 3
                else:
                    layer_params = [p[idx] for p in parameters]

            if idx == len(layers) - 1:
                lstm = LSTM_last_layer(layer_params[0], layer_params[4],
                                       a_seq[idx - 1], act_type)
            else:
                lstm = LSTMLayer(batch_size, layers[idx - 1], layers[idx],
                                 a_seq[idx - 1], layer_params, act_type)

            a_seq.append(lstm.y_seq)
            lstm_layers.append(lstm)

        y_seq = a_seq[-1]
        y_out = y_seq * T.addbroadcast(mask, 2)

        if (cost_type == "CE"):
            y_out = F.softmax(y_out)

        cost = F.cost_func(y_out, y_h_seq, cost_type)

        if Ws and Wis and Wfs and Wos and bs and bis and bfs and bos:
            parameters = Ws[1:] + Wis[1:-1] + Wfs[1:-1] + Wos[1:-1] + \
       bs[1:] + bis[1:-1] + bfs[1:-1] + bos[1:-1]
        else:
            parameters = self.W[1:] + self.Wi[1:-1] + self.Wf[1:-1] + self.Wo[1:-1]+ \
                     self.b[1:] + self.bi[1:-1] + self.bf[1:-1] + self.bo[1:-1]

        gradients = T.grad(cost, parameters)

        gradient = []
        for idx in range(len(gradients)):
            gradient.append(T.clip(gradients[idx], -clip_range, clip_range))

        pre_parameters = []
        for param in parameters:
            pre_parameters.append(
                theano.shared(
                    np.asarray(np.zeros(param.get_value().shape), 'float32')))
        # for rmsprop
        sq_sum_grad = []
        for param in parameters:
            sq_sum_grad.append(
                theano.shared(
                    np.asarray(np.zeros(param.get_value().shape), 'float32')))
        # for NAG
        pre_update = []
        for param in parameters:
            pre_update.append(
                theano.shared(
                    np.asarray(np.zeros(param.get_value().shape), 'float32')))

        def update(parameters, gradients):
            if momentum_type == "rmsprop":
                parameter_updates = [ (p, p - l_rate * g / T.sqrt(ssg) )
                    if ssg.get_value().sum() != 0 else (p, p-l_rate*g) \
                    for p,g,ssg in izip(parameters,gradient,sq_sum_grad) ]
                parameter_updates += [ (ssg, rms_alpha*ssg + (np.cast['float32'](1.0)-rms_alpha)*(g**2)  ) \
                           for g , ssg in izip( gradient , sq_sum_grad) ]
                return parameter_updates
            elif momentum_type == "NAG":
                parameter_updates = [ ( pre_p , pre_p + momentum*v - l_rate*g )\
                    for pre_p , g , v in izip(pre_parameters, gradient, pre_update) ]
                parameter_updates += [ ( p , pre_p + 2*( momentum*v - l_rate*g ) ) \
                    for p , pre_p , g , v in izip(parameters, pre_parameters, gradient, pre_update) ]
                parameter_updates += [ ( v , -l_rate*g + momentum*v )\
                    for g , v in izip(gradient , pre_update) ]
                return parameter_updates
            elif momentum_type == "rms+NAG":
                parameter_updates =  [ ( pre_p , pre_p + momentum*v - l_rate*g/T.sqrt(ssg) ) \
                    if ssg.get_value().sum() != 0 else (pre_p , pre_p - l_rate*g + momentum*v ) \
                    for pre_p , g , v , ssg in izip(pre_parameters, gradient, pre_update,sq_sum_grad) ]
                parameter_updates += [ ( p , pre_p + 2*( momentum*v - l_rate*g/T.sqrt(ssg) ) ) \
                    if ssg.get_value().sum() != 0 else ( p , pre_p + 2*( -l_rate*g + momentum*v) ) \
                    for p , pre_p , g , v ,ssg in izip(parameters, pre_parameters, gradient, pre_update , sq_sum_grad) ]
                parameter_updates += [ ( v , -l_rate*g/T.sqrt(ssg) + momentum*v )\
                    if ssg.get_value().sum() != 0 else ( v  , - l_rate*g + momentum*v ) \
                    for g , v , ssg in izip(gradient , pre_update , sq_sum_grad) ]
                parameter_updates += [ (ssg, rms_alpha*ssg + (np.cast['float32'](1.0)-rms_alpha)*(g**2)  ) \
                    for g , ssg in izip( gradient , sq_sum_grad) ]
                return parameter_updates
            elif momentum_type == "None":
                parameter_updates = [ ( p, p - l_rate*g) \
                    for p , g in izip(parameters , gradient ) ]
                return parameter_updates

        self.train = theano.function(inputs=[
            x_seq, y_h_seq, mask, l_rate, rms_alpha, clip_range, momentum
        ],
                                     outputs=cost,
                                     updates=update(parameters, gradients),
                                     allow_input_downcast=True)

        self.test = theano.function(inputs=[x_seq, mask],
                                    outputs=y_out,
                                    allow_input_downcast=True)
Пример #12
0
    def __init__(self , layers , Ws = None , Whs = None , bs = None , batch_size = 1 ,
                    momentum_type = "None" , act_type = "ReLU" , cost_type = "CE"  ):
        #test parameter define ( should be inputs later.)
        self.layers        = layers
        self.batch_size    = batch_size

        l_rate             = T.scalar(dtype='float32') # np.float32(0.0001)
        init               = np.float32(0.1)
        rms_alpha          = T.scalar(dtype='float32') # np.float32(0.9)
        clip_range         = T.scalar(dtype='float32')
        momentum           = T.scalar(dtype='float32')
       # validation.
        if Ws is not None and bs is not None and Whs is not None:
            assert len(layers) == len(Ws) and len(layers) == len(bs) and len(layers) == len(Whs)

       # train input
        x_seq = T.tensor3(dtype='float32')
        y_hat = T.tensor3(dtype='float32')
        mask  = T.tensor3(dtype='float32')

       # train parameter initialization
        self.W  =  [ None ]
        self.Wh =  [ None ]
        self.b  =  [ None ]

        a_seq = [ x_seq ]
        ls    = [ None ]

        for idx in range( len(self.layers)-1 ):
            # init b , Wh , W
            #self.b.append ( theano.shared(np.asarray (np.random.uniform(-init , init , size = ( self.layers[idx+1] )) , 'float32')))
            self.b.append ( theano.shared(np.asarray ( np.zeros( self.layers[idx+1] ) , 'float32')) )
            self.Wh.append (theano.shared(np.asarray ( np.cast['float32'](0.1)*np.identity(self.layers[idx+1]), 'float32')) )
            self.W.append(theano.shared(np.asarray ( np.random.uniform(-init , init , size = ( self.layers[idx] , self.layers[idx+1] )), 'float32'  )  ))
            # import the  model from outside
            if Ws is not None:
                self.W[idx+1].set_value( Ws[idx+1].get_value() )
            if bs is not None:
                self.b[idx+1].set_value( bs[idx+1].get_value() )
            if Whs is not None:
                self.Wh[idx+1].set_value( Whs[idx+1].get_value() )

            # declaration a RNN layer
            if idx==0 : #means it's the first layer
                temp_layers = RNN_first_layer(self.W[idx+1] , self.Wh[idx+1] , self.b[idx+1] , self.layers[idx+1] , a_seq[idx] , self.batch_size  , act_type)
            elif idx == len(self.layers)-2: # Last Layer
                temp_layers = RNN_last_layer(self.W[idx+1]  , self.b[idx+1] , a_seq[idx] )
            else:
                temp_layers = RNN_layers(self.W[idx+1] , self.Wh[idx+1] , self.b[idx+1] , self.layers[idx+1] , a_seq[idx] , self.batch_size  , act_type)

            ls.append(temp_layers)
            # output the 'a' of RNN layers
            a_seq.append(temp_layers.layer_out)
       
       # define parameters 
        parameters = self.W[1:] + self.Wh[1:-1] + self.b[1:] 
 
       # define what are outputs.
        y_seq = a_seq[-1]
        y_out = y_seq * T.addbroadcast( mask , 2  )

       # define cost
        if(cost_type == "CE"):
            y_out_a = F.softmax(y_out)
        else:
            y_out_a = F.softmax(y_out)
        cost = F.cost_func( y_out_a , y_hat , cost_type )
       # compute gradient

        gradients = T.grad(cost , parameters )
        gradient = [ ]
        for idx in range(len(gradients)):
            gradient.append(T.clip(gradients[idx] , -clip_range , clip_range) )
        #
        pre_parameters = []
        for param in parameters:
            pre_parameters.append( theano.shared(
                np.asarray(
                    np.zeros(param.get_value().shape) , 'float32' )
            ))
        # for rmsprop
        sq_sum_grad = []
        for param in parameters:
            sq_sum_grad.append( theano.shared(
                np.asarray(
                    np.zeros(param.get_value().shape) , 'float32' )
            ))
        # for NAG
        pre_update = []
        for param in parameters:
            pre_update.append( theano.shared(
                np.asarray(
                    np.zeros(param.get_value().shape) , 'float32' )
            ))

        def update(parameters , gradients ):
            if momentum_type == "rmsprop":
                parameter_updates = [ (p, p - l_rate * g / T.sqrt(ssg) )
                    if ssg.get_value().sum() != 0 else (p, p-l_rate*g) \
                    for p,g,ssg in izip(parameters,gradient,sq_sum_grad) ]
                parameter_updates += [ (ssg, rms_alpha*ssg + (np.cast['float32'](1.0)-rms_alpha)*(g**2)  ) \
                           for g , ssg in izip( gradient , sq_sum_grad) ]
                return parameter_updates
            elif momentum_type == "NAG":
                parameter_updates = [ ( pre_p , pre_p + momentum*v - l_rate*g )\
                    for pre_p , g , v in izip(pre_parameters, gradient, pre_update) ]
                parameter_updates += [ ( p , pre_p + 2*( momentum*v - l_rate*g ) ) \
                    for p , pre_p , g , v in izip(parameters, pre_parameters, gradient, pre_update) ]
                parameter_updates += [ ( v , -l_rate*g + momentum*v )\
                    for g , v in izip(gradient , pre_update) ]
                return parameter_updates
            elif momentum_type == "rms+NAG":
                parameter_updates =  [ ( pre_p , pre_p + momentum*v - l_rate*g/T.sqrt(ssg) ) \
                    if ssg.get_value().sum() != 0 else (pre_p , pre_p - l_rate*g + momentum*v ) \
                    for pre_p , g , v , ssg in izip(pre_parameters, gradient, pre_update,sq_sum_grad) ]
                parameter_updates += [ ( p , pre_p + 2*( momentum*v - l_rate*g/T.sqrt(ssg) ) ) \
                    if ssg.get_value().sum() != 0 else ( p , pre_p + 2*( -l_rate*g + momentum*v) ) \
                    for p , pre_p , g , v ,ssg in izip(parameters, pre_parameters, gradient, pre_update , sq_sum_grad) ]
                parameter_updates += [ ( v , -l_rate*g/T.sqrt(ssg) + momentum*v )\
                    if ssg.get_value().sum() != 0 else ( v  , - l_rate*g + momentum*v ) \
                    for g , v , ssg in izip(gradient , pre_update , sq_sum_grad) ]
                parameter_updates += [ (ssg, rms_alpha*ssg + (np.cast['float32'](1.0)-rms_alpha)*(g**2)  ) \
                    for g , ssg in izip( gradient , sq_sum_grad) ]
                return parameter_updates
            elif momentum_type == "None":
                parameter_updates = [ ( p, p - l_rate*g) \
                    for p , g in izip(parameters , gradient ) ]
                return parameter_updates



       # define theano.functions
        self.train = theano.function( inputs = [ x_seq , y_hat , mask ,
                                                l_rate ,
                                                rms_alpha ,
                                                clip_range ,
                                                momentum
                                                ] ,
                                        updates = update(parameters , gradient) ,
                                        outputs = cost,
                                         )


        self.test  = theano.function( inputs = [x_seq , mask ]  , outputs =  y_out  )
        self.test_sof  = theano.function( inputs = [x_seq , mask ]  , outputs =  y_out_a  )
Пример #13
0
    def __init__( self, layers, Ws = None, Wis = None, Wfs = None, Wos = None, bs = None, bis = None, bfs = None, bos = None, \
                batch_size = 1, momentum_type = "None", act_type = "ReLU" , cost_type = "EU" ):

        self.layers       = layers
        self.batch_size   = batch_size

        l_rate            = T.scalar('float32')
        init              = np.float32(0.1)
        rms_alpha         = T.scalar('float32') # np.float32(0.9)
        clip_range        = T.scalar('float32')
        momentum          = T.scalar('float32')


        x_seq   = T.tensor3(dtype='float32')
        y_h_seq = T.tensor3(dtype='float32')
        mask    = T.tensor3(dtype='float32')

        self.W  = [ None ]
        self.Wi = [ None ]
        self.Wf = [ None ]
        self.Wo = [ None ]
        self.b  = [ None ]
        self.bi = [ None ]
        self.bf = [ None ]
        self.bo = [ None ]

        a_seq       = [ x_seq ]
        lstm_layers = [ None ]

        parameters = [ self.W, self.Wi, self.Wf, self.Wo,
                       self.b, self.bi, self.bf, self.bo ]

        for idx in xrange(1,len(layers)):
            # Initializing model parameters.
            for i,p in enumerate(parameters):
                if i < 4: # Weight Matrices
                    if idx == len(layers) - 1:
                        p.append( theano.shared( np.random.uniform( -init, init, \
                                size=(layers[idx-1],layers[idx]) ).astype("float32") ))
                    else:
                        p.append( theano.shared( np.random.uniform( -init, init, \
                                size=(layers[idx-1]+2*layers[idx],layers[idx]) ).astype("float32") ))

                else: # bias vectors
                    p.append( theano.shared( np.random.uniform( -init, init, \
                                size = (layers[idx]) ).astype('float32') ))

            # Create LSTM layers and pass in the corresponding parameters.
            if Ws and Wis and Wfs and Wos and bs and bis and bfs and bos:
                layer_params = ( Ws[idx],Wis[idx],Wfs[idx],Wos[idx],bs[idx],bis[idx],bfs[idx],bos[idx] )
            else:
                if idx == len(layers) - 1:
                    layer_params = [ parameters[0][idx] ] + [ None ] * 3 + [ parameters[4][idx] ] + [ None ] * 3
                else:
                    layer_params = [ p[idx] for p in parameters ]

            if idx == len(layers) - 1:
                lstm = LSTM_last_layer( layer_params[0], layer_params[4], a_seq[idx-1], act_type )
            else:
                lstm = LSTMLayer( batch_size, layers[idx-1], layers[idx], a_seq[idx-1], layer_params, act_type )

            a_seq.append( lstm.y_seq )
            lstm_layers.append( lstm )

        y_seq = a_seq[-1]
        y_out = y_seq * T.addbroadcast( mask , 2  )

        if( cost_type == "CE" ):
            y_out = F.softmax(y_out)

        cost = F.cost_func( y_out , y_h_seq , cost_type )

        if Ws and Wis and Wfs and Wos and bs and bis and bfs and bos:
        	parameters = Ws[1:] + Wis[1:-1] + Wfs[1:-1] + Wos[1:-1] + \
				bs[1:] + bis[1:-1] + bfs[1:-1] + bos[1:-1]
        else:
            parameters = self.W[1:] + self.Wi[1:-1] + self.Wf[1:-1] + self.Wo[1:-1]+ \
                     self.b[1:] + self.bi[1:-1] + self.bf[1:-1] + self.bo[1:-1]

        gradients = T.grad(cost , parameters )

        gradient = [ ]
        for idx in range(len(gradients)):
            gradient.append(T.clip(gradients[idx] , -clip_range , clip_range) )

        pre_parameters = []
        for param in parameters:
            pre_parameters.append( theano.shared(
                np.asarray(
                    np.zeros(param.get_value().shape) , 'float32' )
            ))
        # for rmsprop
        sq_sum_grad = []
        for param in parameters:
            sq_sum_grad.append( theano.shared(
                np.asarray(
                    np.zeros(param.get_value().shape) , 'float32' )
            ))
        # for NAG
        pre_update = []
        for param in parameters:
            pre_update.append( theano.shared(
                np.asarray(
                    np.zeros( param.get_value().shape ) , 'float32' )
            ))

        def update(parameters , gradients ):
            if momentum_type == "rmsprop":
                parameter_updates = [ (p, p - l_rate * g / T.sqrt(ssg) )
                    if ssg.get_value().sum() != 0 else (p, p-l_rate*g) \
                    for p,g,ssg in izip(parameters,gradient,sq_sum_grad) ]
                parameter_updates += [ (ssg, rms_alpha*ssg + (np.cast['float32'](1.0)-rms_alpha)*(g**2)  ) \
                           for g , ssg in izip( gradient , sq_sum_grad) ]
                return parameter_updates
            elif momentum_type == "NAG":
                parameter_updates = [ ( pre_p , pre_p + momentum*v - l_rate*g )\
                    for pre_p , g , v in izip(pre_parameters, gradient, pre_update) ]
                parameter_updates += [ ( p , pre_p + 2*( momentum*v - l_rate*g ) ) \
                    for p , pre_p , g , v in izip(parameters, pre_parameters, gradient, pre_update) ]
                parameter_updates += [ ( v , -l_rate*g + momentum*v )\
                    for g , v in izip(gradient , pre_update) ]
                return parameter_updates
            elif momentum_type == "rms+NAG":
                parameter_updates =  [ ( pre_p , pre_p + momentum*v - l_rate*g/T.sqrt(ssg) ) \
                    if ssg.get_value().sum() != 0 else (pre_p , pre_p - l_rate*g + momentum*v ) \
                    for pre_p , g , v , ssg in izip(pre_parameters, gradient, pre_update,sq_sum_grad) ]
                parameter_updates += [ ( p , pre_p + 2*( momentum*v - l_rate*g/T.sqrt(ssg) ) ) \
                    if ssg.get_value().sum() != 0 else ( p , pre_p + 2*( -l_rate*g + momentum*v) ) \
                    for p , pre_p , g , v ,ssg in izip(parameters, pre_parameters, gradient, pre_update , sq_sum_grad) ]
                parameter_updates += [ ( v , -l_rate*g/T.sqrt(ssg) + momentum*v )\
                    if ssg.get_value().sum() != 0 else ( v  , - l_rate*g + momentum*v ) \
                    for g , v , ssg in izip(gradient , pre_update , sq_sum_grad) ]
                parameter_updates += [ (ssg, rms_alpha*ssg + (np.cast['float32'](1.0)-rms_alpha)*(g**2)  ) \
                    for g , ssg in izip( gradient , sq_sum_grad) ]
                return parameter_updates
            elif momentum_type == "None":
                parameter_updates = [ ( p, p - l_rate*g) \
                    for p , g in izip(parameters , gradient ) ]
                return parameter_updates

        self.train = theano.function(
                        inputs  = [ x_seq, y_h_seq, mask, l_rate, rms_alpha ,clip_range, momentum ],
                        outputs = cost,
                        updates = update( parameters, gradients),
                        allow_input_downcast=True
        )

        self.test  = theano.function(
                        inputs = [ x_seq, mask ],
                        outputs = y_out,
                        allow_input_downcast=True
        )
Пример #14
0
 def predict(self, x):
     pred = softmax(x, self.w)
     return pred