Пример #1
0
    def __init__(self, n_voc, trainset, testset,dataname, classes, prefix):
        if prefix != None:
            prefix += '/'
        self.trainset = trainset
        self.testset = testset

        docs = T.imatrix()
        label = T.ivector()
        length = T.fvector()
        sentencenum = T.fvector()
        wordmask = T.fmatrix()
        sentencemask = T.fmatrix()
        maxsentencenum = T.iscalar()
        isTrain = T.iscalar()

        rng = numpy.random

        layers = []
        layers.append(EmbLayer(rng, docs, n_voc, 200, 'emblayer', dataname, prefix))
        layers.append(LSTMLayer(rng, layers[-1].output, wordmask, 200, 200, 'wordlstmlayer', prefix)) 
        layers.append(MeanPoolLayer(layers[-1].output, length))
        layers.append(SentenceSortLayer(layers[-1].output,maxsentencenum))
        layers.append(LSTMLayer(rng, layers[-1].output, sentencemask, 200, 200, 'sentencelstmlayer', prefix))
        layers.append(MeanPoolLayer(layers[-1].output, sentencenum))
        layers.append(HiddenLayer(rng, layers[-1].output, 200, 200, 'fulllayer', prefix))
        layers.append(HiddenLayer(rng, layers[-1].output, 200, int(classes), 'softmaxlayer', prefix, activation=T.nnet.softmax))
        self.layers = layers
        
        cost = -T.mean(T.log(layers[-1].output)[T.arange(label.shape[0]), label], acc_dtype='float32')
        correct = T.sum(T.eq(T.argmax(layers[-1].output, axis=1), label), acc_dtype='int32')
        err = T.argmax(layers[-1].output, axis=1) - label
        mse = T.sum(err * err)
        
        params = []
        for layer in layers:
            params += layer.params
        L2_rate = numpy.float32(1e-5)
        for param in params[1:]:
            cost += T.sum(L2_rate * (param * param), acc_dtype='float32')
        gparams = [T.grad(cost, param) for param in params]

        updates = AdaUpdates(params, gparams, 0.95, 1e-6)

        self.train_model = theano.function(
            inputs=[docs, label,length,sentencenum,wordmask,sentencemask,maxsentencenum],
            outputs=cost,
            updates=updates,
        )

        self.test_model = theano.function(
            inputs=[docs, label,length,sentencenum,wordmask,sentencemask,maxsentencenum],
            outputs=[correct, mse],
        )
Пример #2
0
    def __init__(self, n_voc, trainset, testset, em_path, classes, prefix):
        if prefix is not None:
            prefix += '/'
        self.trainset = trainset
        self.testset = testset

        docs = T.imatrix()
        label = T.ivector()
        length = T.fvector()
        sentencenum = T.fvector()
        wordmask = T.fmatrix()
        sentencemask = T.fmatrix()
        maxsentencenum = T.iscalar()
        isTrain = T.iscalar()

        rng = numpy.random

        layers = []
        layers.append(
            EmbLayer(em_path, rng, docs, n_voc, 200, "emblayer", prefix))
        layers.append(
            LSTMLayer(rng, layers[-1].output, wordmask, 200, 200,
                      'wordlstmlayer', prefix))
        layers.append(MeanPoolLayer(layers[-1].output, length))
        layers.append(SentenceSortLayer(layers[-1].output, maxsentencenum))
        layers.append(
            LSTMLayer(rng, layers[-1].output, sentencemask, 200, 200,
                      'sentencelstmlayer', prefix))
        layers.append(MeanPoolLayer(layers[-1].output, sentencenum))
        layers.append(
            HiddenLayer(rng, layers[-1].output, 200, 200, 'fulllayer', prefix))
        # 保存这层的输出  是numpy.ndarray类型
        feature_vector = layers[-1].output
        layers.append(
            HiddenLayer(rng,
                        layers[-1].output,
                        200,
                        int(classes),
                        'softmaxlayer',
                        prefix,
                        activation=T.nnet.softmax))
        # softmax这层输出是 n行2维向量(2分类),表示每个句子属于每个分类的概率,之和为1
        predict_probability = layers[-1].output
        self.layers = layers

        cost = -T.mean(T.log(layers[-1].output)[T.arange(label.shape[0]),
                                                label],
                       acc_dtype='float32')
        correct = T.sum(T.eq(T.argmax(layers[-1].output, axis=1), label),
                        acc_dtype='int32')
        err = T.argmax(layers[-1].output, axis=1) - label
        mse = T.sum(err * err)
        # 返回预测的各个文本的类别(0和1)
        predict_label = T.argmax(layers[-1].output, axis=1)

        params = []
        for layer in layers:
            params += layer.params
        L2_rate = numpy.float32(1e-5)
        for param in params[1:]:
            cost += T.sum(L2_rate * (param * param), acc_dtype='float32')
        gparams = [T.grad(cost, param) for param in params]

        updates = AdaUpdates(params, gparams, 0.95, 1e-6)

        self.train_model = theano.function(inputs=[
            docs, label, length, sentencenum, wordmask, sentencemask,
            maxsentencenum
        ],
                                           outputs=[cost, feature_vector],
                                           updates=updates,
                                           mode='FAST_RUN')
        self.test_model = theano.function(inputs=[
            docs, label, length, sentencenum, wordmask, sentencemask,
            maxsentencenum
        ],
                                          outputs=[
                                              correct, mse, predict_label,
                                              feature_vector,
                                              predict_probability
                                          ],
                                          mode='FAST_RUN')
Пример #3
0
    def __init__(self, n_voc, n_usr, n_prd, trainset, testset, dataname, classes, prefix):
        if prefix != None:
            prefix += '/'
        self.trainset = trainset
        self.testset = testset

        docs = T.imatrix()
        label = T.ivector()
        usr = T.ivector()
        prd = T.ivector()
        wordmask = T.fmatrix()
        sentencemask = T.fmatrix()
        maxsentencenum = T.iscalar()
        isTrain = T.iscalar()

        rng = numpy.random

        layers = []
        docsemb = EmbLayer(rng, docs, n_voc, 200, 'emblayer', dataname, prefix)
        Uemb = UsrEmbLayer(rng, n_usr, 200, 'usremblayer', prefix)
        Pemb = PrdEmbLayer(rng, n_prd, 200, 'prdemblayer', prefix)
        layers.append(docsemb)
        layers.append(Uemb)
        layers.append(Pemb)
        layers.append(LSTMLayer(rng, docsemb.output, wordmask, 200, 200, 'wordlstmlayer', prefix)) 
        uemb_sentence = GetuEmbLayer(usr, Uemb.output, maxsentencenum, 'uemb_sentence', prefix)
        pemb_sentence = GetpEmbLayer(prd, Pemb.output, maxsentencenum, 'pemb_sentence', prefix)
        layers.append(AttentionLayer(rng, layers[-1].output, uemb_sentence.output, pemb_sentence.output, wordmask, 200,200,200,200, 'wordattentionLayer', prefix))
        layers.append(SentenceSortLayer(layers[-1].output, maxsentencenum, prefix))
        layers.append(LSTMLayer(rng, layers[-1].output, sentencemask, 200, 200, 'sentencelstmlayer', prefix))
        uemb_doc = GetuEmbLayer(usr, Uemb.output, maxsentencenum, 'uemb_doc', prefix)
        pemb_doc = GetpEmbLayer(prd, Pemb.output, maxsentencenum, 'pemb_doc', prefix)
        layers.append(AttentionLayer(rng, layers[-1].output, uemb_doc.output, pemb_doc.output, sentencemask, 200,200,200,200, 'sentenceattentionLayer', prefix))
        layers.append(HiddenLayer(rng, layers[-1].output, 200, 200, 'fulllayer', prefix))
        layers.append(HiddenLayer(rng, layers[-1].output, 200, int(classes), 'softmaxlayer', prefix, activation=T.nnet.softmax))
        self.layers = layers
        
        cost = -T.mean(T.log(layers[-1].output)[T.arange(label.shape[0]), label], acc_dtype='float32')
        correct = T.sum(T.eq(T.argmax(layers[-1].output, axis=1), label), acc_dtype='int32')
        err = T.argmax(layers[-1].output, axis=1) - label
        mse = T.sum(err * err)
        
        params = []
        for layer in layers:
            params += layer.params
        L2_rate = numpy.float32(1e-5)
        for param in params[3:]:
            cost += T.sum(L2_rate * (param * param), acc_dtype='float32')
        gparams = [T.grad(cost, param) for param in params]

        updates = AdaUpdates(params, gparams, 0.95, 1e-6)

        self.train_model = theano.function(
            inputs=[docs, label, usr, prd, wordmask,sentencemask,maxsentencenum],
            outputs=cost,
            updates=updates,
        )

        self.test_model = theano.function(
            inputs=[docs, label, usr, prd, wordmask,sentencemask,maxsentencenum],
            outputs=[correct, mse],
        )
Пример #4
0
    def __init__(self, n_voc, trainset, testset, dataname, classes, prefix):
        if prefix != None:
            prefix += '/'
        self.trainset = trainset
        self.testset = testset
        self.classes = int(classes)

        docs = T.imatrix()
        label = T.imatrix()
        length = T.fvector()
        wordmask = T.fmatrix()
        sentencemask = T.fmatrix()
        maxsentencenum = T.iscalar()
        sentencenum = T.fvector()
        isTrain = T.iscalar()

        rng = numpy.random

        # layers = []
        # layers.append(EmbLayer(rng, docs, n_voc, 50, 'emblayer', dataname, prefix))
        # layers.append(LSTMLayer(rng, layers[-1].output, wordmask, 50, 50, 'wordlstmlayer', prefix))
        # layers.append(SimpleAttentionLayer(rng, layers[-1].output, wordmask,50, 50, 'wordattentionlayer', prefix))
        # layers.append(SentenceSortLayer(layers[-1].output,maxsentencenum,prefix))
        # layers.append(LSTMLayer(rng, layers[-1].output, sentencemask, 50, 50, 'sentencelstmlayer', prefix))
        # layers.append(SimpleAttentionLayer(rng, layers[-1].output, sentencemask,50, 50, 'sentenceattentionlayer', prefix))
        # layers.append(HiddenLayer(rng, layers[-1].output, 50, 50, 'fulllayer', prefix))
        # layers.append(HiddenLayer(rng, layers[-1].output, 50, int(classes), 'softmaxlayer', prefix, activation=T.nnet.sigmoid))
        # self.layers = layers
        layers = []
        layers.append(
            EmbLayer(rng, docs, n_voc, 50, 'emblayer', dataname, prefix))
        layers.append(
            LSTMLayer(rng, layers[-1].output, wordmask, 50, 50,
                      'wordlstmlayer', prefix))
        layers.append(MeanPoolLayer(layers[-1].output, length))
        layers.append(SentenceSortLayer(layers[-1].output, maxsentencenum))
        layers.append(
            LSTMLayer(rng, layers[-1].output, sentencemask, 50, 50,
                      'sentencelstmlayer', prefix))
        layers.append(MeanPoolLayer(layers[-1].output, sentencenum))
        layers.append(
            HiddenLayer(rng, layers[-1].output, 50, 50, 'fulllayer', prefix))
        layers.append(
            HiddenLayer(rng,
                        layers[-1].output,
                        50,
                        int(classes),
                        'softmaxlayer',
                        prefix,
                        activation=T.nnet.sigmoid))
        self.layers = layers

        predict = layers[-1].output
        cost = T.nnet.binary_crossentropy(layers[-1].output, label).sum(1)
        cost = cost.mean()
        # modifu corrrect.
        # predicted_value = ((layers[-1].output) >= EVALUTION_THRESHOLD_FOR_MULTI_LABEL).astype(int)
        # predicted_value = predicted_value.astype(bool)
        # true_value = label.astype(bool)
        # equal = true_value == predicted_value
        # match = np.sum(equal, axis=1) == np.size(equal, axis=1)
        # # value 1 match_ratio
        # exact_match_ratio = np.sum(match) / np.size(match)
        # true_and_predict = np.sum(true_value & predicted_value, axis=1)
        # true_or_predict = np.sum(true_value | predicted_value, axis=1)
        # # value 2 accuracy
        # accuracy = np.mean(true_and_predict / true_or_predict)
        # # value 3 pression
        # precison = np.mean(true_and_predict / (np.sum(predicted_value, axis=1) + 1e-9))
        # # recall 4 recall
        # recall = np.mean(true_and_predict / np.sum(true_value, axis=1))
        # # f1_Measure
        # F1_Measure = np.mean((true_and_predict * 2) / (np.sum(true_value, axis=1) + np.sum(predicted_value, axis=1)))
        # # HammingLoss
        # HammingLoss = np.mean(true_value ^ total_predicted_value)
        # TP
        # TP = np.sum(true_value & predicted_value,axis=0,dtype=np.int32)
        # FP = np.sum((~true_value) & predicted_value,axis=0,dtype=np.int32)
        # FN = np.sum(true_value & (~predicted_value),axis=0,dtype=np.int32)
        # _P = np.sum(TP) / (np.sum(TP) + np.sum(FP)  + 1e-9 )
        # _R = np.sum(TP) / (np.sum(TP) + np.sum(FN)  + 1e-9 )
        # Micro_F1 = (2 * _P *_R) / (_P + _R)
        # _P_t = TP / (TP + FP + 1e-9)
        # _R_t = TP / (TP + FN + 1e-9)
        # Macro_F1 = np.mean((2 * _P_t * _R_t) / (_P_t + _R_t + 1e-9))
        #cost = -T.mean(T.log(layers[-1].output)[T.arange(label.shape[0]), label], acc_dtype='float32')
        #modify this
        #correct = T.sum(T.eq(T.argmax(layers[-1].output, axis=1), label), acc_dtype='int32')
        #err = T.argmax(layers[-1].output, axis=1) - label
        #mse = T.sum(err * err)

        params = []
        for layer in layers:
            params += layer.params
        L2_rate = numpy.float32(1e-5)
        for param in params[1:]:
            cost += T.sum(L2_rate * (param * param), acc_dtype='float32')
        gparams = [T.grad(cost, param) for param in params]

        updates = AdaUpdates(params, gparams, 0.95, 1e-6)

        self.train_model = theano.function(
            inputs=[
                docs, label, length, sentencenum, wordmask, sentencemask,
                maxsentencenum
            ],
            outputs=cost,
            updates=updates,
        )

        self.test_model = theano.function(
            inputs=[
                docs, length, sentencenum, wordmask, sentencemask,
                maxsentencenum
            ],
            outputs=predict,
        )