예제 #1
0
def SegPredic_api(inputtext):

    material = inputtext
    #material = 'data/24s/*'
    #material = "data/sjw/A05*"
    filename = 'model'
    charstop = True  # True means label attributes to previous char
    crfmethod = "lbfgs"  # {‘lbfgs’, ‘l2sgd’, ‘ap’, ‘pa’, ‘arow’}
    #將文本從JSON轉換
    rawalldata = json.loads(material)
    testdata = testdataconvert(rawalldata['testdata'])
    trainidx = []
    testidx = []
    text_score = []  #紀錄每個區塊的不確定

    print(datetime.datetime.now())
    modelname = filename.replace('/', '').replace('*',
                                                  '') + str(charstop) + ".m"

    tagger = pycrfsuite.Tagger()
    #modelname = 'modelTrue1.m'
    print(modelname)

    tagger.open(modelname)
    print(datetime.datetime.now())
    print("Start testing...")

    results = []
    lines = []
    Spp = []
    Npp = []
    all_len = 0

    #while testdata:
    x, yref = testdata.pop()
    yout = tagger.tag(x)
    #print(yout)
    #pr = tagger.probability(yref)
    results.append(util.eval(yref, yout, "S"))

    tp, fp, fn, tn = zip(*results)
    tp, fp, fn, tn = sum(tp), sum(fp), sum(fn), sum(tn)
    #print(tp, fp, fn, tn)
    if tp <= 0 or fp <= 0:
        p = 0
        r = 0
        f_score = 0
    else:
        p, r = tp / (tp + fp), tp / (tp + fn)
        f_score = 2 * p * r / (p + r)

    print("Total tokens in Test Set:", tp + fp + fn + tn)
    print("Total S in REF:", tp + fn)
    print("Total S in OUT:", tp + fp)
    print("Presicion:", p)
    print("Recall:", r)
    print("F1-score:", f_score)

    return yout
예제 #2
0
def solve(input_path):
  result = 0
  f = open(input_path)
  for l in f:
    e = util.eval(l)
    print('Line result', e, l)
    result += e
  return result
예제 #3
0
 def advance_batch():
   print("Step for model(%s) is %s"%(model.name, u.eval(model.step)))
   sess = u.get_default_session()
   # TODO: get rid of _sampled_labels
   sessrun([sampled_labels.initializer, _sampled_labels.initializer])
   if args.advance_batch:
     with u.timeit("advance_batch"):
       sessrun(advance_batch_op)
   sessrun(advance_step_op)
예제 #4
0
    def evaluate(self,
                 ann_file,
                 res_file,
                 style_name,
                 return_img_scores=False):
        metrics, img_scores = util.eval(ann_file, res_file, True)

        with open(res_file, 'r') as f:
            d = json.load(f)
            sent_list = [i['caption'] for i in d]

        if self.use_lm:
            lm_score, _ = lm_kenlm.eval_sentences(self.lm[style_name],
                                                  sent_list)
            for i, score in enumerate(img_scores):
                score['lm'] = _[i]
            metrics['lm'] = lm_score

        if self.use_clf:
            clf_score, _ = clf_path.eval_sentences(self.clf[style_name],
                                                   sent_list)
            for i, score in enumerate(img_scores):
                score['clf'] = int(_[i])
            metrics['clf'] = clf_score

        if self.use_srilm:
            lm_score, _ = lm_srilm.eval_sentences(self.srilm[style_name],
                                                  sent_list)
            for i, score in enumerate(img_scores):
                score['srilm'] = _[i]
            metrics['srilm'] = lm_score

        if self.use_nnclf:
            nnclf_score, _ = self.nn_clf[style_name].evaluate(
                ann_file, res_file)
            for i, score in enumerate(img_scores):
                score['nnclf'] = _[i]
            metrics['nnclf'] = nnclf_score

        if return_img_scores:
            return metrics, img_scores
        else:
            return metrics
예제 #5
0
                yref.append(a)
        #LSTM需要轉換
        x_test_seq = token.texts_to_sequences(x)
        x_test = sequence.pad_sequences(x_test_seq, maxlen=MAX_LEN_OF_TOKEN)

        yout = model.predict_classes(x_test)
        #pr = tagger.probability(yref)
        p_1 = 0
        p_2 = 0
        prob = model.predict_proba(x_test)
        for i in range(len(yout)):
            p_1 = prob[i, 0]
            Spp.append(p_1)  #標記的機率
            np_2p = prob[i, 1]
            Npp.append(p_2)  #標記的機率
        results.append(util.eval(yref, yout, "S"))

        score_array = []
        All_u_score = 0
        p_Scount = 0
        p_Ncount = 0

        for i in range(len(Spp)):
            _s = 0
            if Spp[i] > Npp[i]:
                _s = Spp[i]
            else:
                _s = Npp[i]
            #_s = (_s - 0.5) * 10
            _s = (1 - _s)
            #U_score = U_score + _s
 def test_eval(self):
   self.assertEqual(51, util.eval('1 + (2 * 3) + (4 * (5 + 6))'))
   self.assertEqual(26, util.eval('2 * 3 + (4 * 5)'))
   self.assertEqual(437, util.eval('5 + (8 * 3 + 9 + 3 * 4 * 3)'))
   self.assertEqual(12240, util.eval('5 * 9 * (7 * 3 * 3 + 9 * 3 + (8 + 6 * 4))'))
   self.assertEqual(13632, util.eval('((2 + 4 * 9) * (6 + 9 * 8 + 6) + 6) + 2 + 4 * 2'))
예제 #7
0
def predic_unscore_api(inputtext):
    charstop = True  # True means label attributes to previous char
    features = 3  # 1=discrete; 2=vectors; 3=both
    dictfile = 'vector/24scbow50.txt'
    modelname = 'datalunyu5001.m'
    vdict = util.readvec(dictfile)
    inputtext = inputtext
    #li = [line for line in util.text_to_lines(inputtext)]
    li = util.text_to_lines(inputtext)

    print(li)
    data = []
    for line in li:
        x, y = util.line_toseq(line, charstop)
        print(x)
        if features == 1:
            d = crf.x_seq_to_features_discrete(x, charstop), y
        elif features == 2:
            d = crf.x_seq_to_features_vector(x, vdict, charstop), y
        elif features == 3:
            d = crf.x_seq_to_features_both(x, vdict, charstop), y
        data.append(d)

    tagger = pycrfsuite.Tagger()
    tagger.open(modelname)
    print("Start testing...")
    results = []
    lines = []
    Spp = []
    Npp = []
    out = []
    #while data:
    for index in range(len(data)):
        print(len(data))
        xseq, yref = data.pop(0)
        yout = tagger.tag(xseq)
        sp = 0
        np = 0
        for i in range(len(yout)):
            sp = tagger.marginal('S', i)
            Spp.append(sp)  #S標記的機率
            print(sp)
            np = tagger.marginal('N', i)
            Npp.append(np)  #Nㄅ標記的機率
            print(np)
        results.append(util.eval(yref, yout, "S"))
        lines.append(
            util.seq_to_line([x['gs0'] for x in xseq], yout, charstop, Spp,
                             Npp))
        #print(util.seq_to_line([x['gs0'] for x in xseq], (str(sp) +'/'+ str(np)),charstop))
        out.append(yout)

    tp, fp, fn, tn = zip(*results)
    tp, fp, fn, tn = sum(tp), sum(fp), sum(fn), sum(tn)

    p, r = tp / (tp + fp), tp / (tp + fn)
    score = ''
    score = score + '<br>' + "Total tokens in Test Set:" + repr(tp + fp + fn +
                                                                tn)
    score = score + '<br>' + "Total S in REF:" + repr(tp + fn)
    score = score + '<br>' + "Total S in OUT:" + repr(tp + fp)
    score = score + '<br>' + "Presicion:" + repr(p)
    score = score + '<br>' + "Recall:" + repr(r)
    score = score + '<br>' + "*******************F1-score:" + repr(2 * p * r /
                                                                   (p + r))

    output = ''
    print(lines)

    for line in lines:
        #line = unquote(line)
        print("output:")
        print(line.encode('utf8'))
        #output = output + '<br>' + line
        output += line
        print(line)
    output = score + '<br>' + output

    #output = jsonify({'str': output})

    return (out)
예제 #8
0
def predic():
    charstop = True  # True means label attributes to previous char
    features = 3  # 1=discrete; 2=vectors; 3=both
    dictfile = 'vector/24scbow50.txt'
    modelname = 'datalunyu5001.m'
    vdict = util.readvec(dictfile)
    inputtext = request.form.get('input_text', '')
    #li = [line for line in util.text_to_lines(inputtext)]
    li = util.text_to_lines(inputtext)

    print(li)
    data = []
    for line in li:
        x, y = util.line_toseq(line, charstop)
        print(x)
        if features == 1:
            d = crf.x_seq_to_features_discrete(x, charstop), y
        elif features == 2:
            d = crf.x_seq_to_features_vector(x, vdict, charstop), y
        elif features == 3:
            d = crf.x_seq_to_features_both(x, vdict, charstop), y
        data.append(d)

    tagger = pycrfsuite.Tagger()
    tagger.open(modelname)
    print("Start testing...")
    results = []
    lines = []
    Spp = []
    Npp = []
    #while data:
    for index in range(len(data)):
        print(len(data))
        xseq, yref = data.pop(0)
        yout = tagger.tag(xseq)
        sp = 0
        np = 0
        for i in range(len(yout)):
            sp = tagger.marginal('S', i)
            Spp.append(sp)  #S標記的機率
            print(sp)
            np = tagger.marginal('N', i)
            Npp.append(np)  #Nㄅ標記的機率
            print(np)
        results.append(util.eval(yref, yout, "S"))
        lines.append(
            util.seq_to_line([x['gs0'] for x in xseq], yout, charstop, Spp,
                             Npp))
        #print(util.seq_to_line([x['gs0'] for x in xseq], (str(sp) +'/'+ str(np)),charstop))

    U_score = 0
    p_Scount = 0
    p_Ncount = 0
    for i in range(len(Spp)):
        _s = 0
        if Spp[i] > Npp[i]:
            _s = Spp[i]
        else:
            _s = Npp[i]
        _s = (_s - 0.5) * 10
        U_score = U_score + _s
        p_Scount = p_Scount + Spp[i]
        p_Ncount = p_Ncount + Npp[i]

    tp, fp, fn, tn = zip(*results)
    tp, fp, fn, tn = sum(tp), sum(fp), sum(fn), sum(tn)

    p, r = tp / (tp + fp), tp / (tp + fn)
    score = ''
    score = score + '<br>' + "Total tokens in Test Set:" + repr(tp + fp + fn +
                                                                tn)
    score = score + '<br>' + "Total S in REF:" + repr(tp + fn)
    score = score + '<br>' + "Total S in OUT:" + repr(tp + fp)
    score = score + '<br>' + "Presicion:" + repr(p)
    score = score + '<br>' + "Recall:" + repr(r)
    score = score + '<br>' + "*******************F1-score:" + repr(2 * p * r /
                                                                   (p + r))
    score = score + '<br>' + "======================="
    score = score + '<br>' + "character count:" + str(len(Spp))
    score = score + '<br>' + "block uncertain rate:" + str(
        (U_score / len(Spp)))

    output = ''
    key = 0
    for line in lines:
        #print (line.encode('utf8'))

        output = output + '<br>' + line
        #print (line)
        key = key + 1

    #for index_m in ypp:
    #  output = output + '<br>' + line

    output = score + '<br>' + output

    return (output)
예제 #9
0
def buildCrf(inputtext):
    material = inputtext
    #material = 'data/24s/*'
    #material = "data/sjw/A05*"
    filename = 'model'
    size = 80
    trainportion = 0.9
    dictfile = 'data/vector/24scbow300.txt'
    crfmethod = "l2sgd"  # {‘lbfgs’, ‘l2sgd’, ‘ap’, ‘pa’, ‘arow’}
    charstop = True  # True means label attributes to previous char
    features = 1  # 1=discrete; 2=vectors; 3=both
    random.seed(101)

    #宣告指令式
    "python runcrf.py 'data/sjw/*' 80 data/vector/vectors300.txt 1 1"
    args = sys.argv
    '''
    if len(args)>1:
        material = args[1]
        size = int(args[2])
        dictfile = args[3]
        features = int(args[4])
        charstop = int(args[5])
    '''
    cut = int(size * trainportion)

    #訓練模型名稱
    modelname = filename.replace('/', '').replace(
        '*', '') + str(size) + str(charstop) + ".m"
    print(modelname)
    print("Material:", material)
    print("Size:", size, "entries,", trainportion, "as training")

    print(datetime.datetime.now())

    # Prepare li: list of random lines
    if features > 1:
        vdict = util.readvec(dictfile)  #先處理文本
        print("Dict:", dictfile)
    li = [line for line in util.file_to_lines(glob.glob(material))]  #已經切成陣列
    random.shuffle(li)  #做亂數取樣
    li = li[:size]

    # Prepare data: list of x(char), y(label) sequences
    data = []

    for line in li:
        x, y = util.line_toseq(line, charstop)
        #print(x)
        #print(y[:5])

        #這邊在做文本做gram
        if features == 1:
            d = crf.x_seq_to_features_discrete(x, charstop), y
        elif features == 2:
            d = crf.x_seq_to_features_vector(x, vdict, charstop), y
        elif features == 3:
            d = crf.x_seq_to_features_both(x, vdict, charstop), y
        data.append(d)

    traindata = data[:cut]
    testdata = data[cut:]
    #print(traindata)

    trainer = pycrfsuite.Trainer()
    #print trainer.params()
    #print(traindata[0])
    for t in traindata:
        x, y = t

        trainer.append(x, y)

    trainer.select(crfmethod)  #做訓練
    trainer.set('max_iterations', 10)  #測試迴圈
    #trainer.set('delta',0)
    #print ("!!!!before train", datetime.datetime.now())
    trainer.train(modelname)
    #print ("!!!!after train", datetime.datetime.now())

    tagger = pycrfsuite.Tagger()
    #建立訓練模型檔案
    tagger.open(modelname)
    tagger.dump(modelname + ".txt")

    print(datetime.datetime.now())
    print("Start closed testing...")
    results = []
    print(traindata)
    while traindata:
        x, yref = traindata.pop()
        yout = tagger.tag(x)
        pr = tagger.marginal('S', 0)
        pp = tagger.probability(yout)
        results.append(util.eval(yref, yout, "S"))

    tp, fp, fn, tn = zip(*results)
    tp, fp, fn, tn = sum(tp), sum(fp), sum(fn), sum(tn)

    p, r = tp / (tp + fp), tp / (tp + fn)
    print("Total tokens in Train Set:", tp + fp + fn + tn)
    print("Total S in REF:", tp + fn)
    print("Total S in OUT:", tp + fp)
    print("Presicion:", p)
    print("Recall:", r)
    print("*******************F1-score:", 2 * p * r / (p + r))
    print("*******************:", pr)
    print("*******************:", pp)
    print("*******************:", yout)
    print(datetime.datetime.now())

    return (modelname)
예제 #10
0
def trainAndpredic_api(inputtext):

    material = inputtext
    #material = 'data/24s/*'
    #material = "data/sjw/A05*"
    filename = 'model'
    charstop = True  # True means label attributes to previous char
    crfmethod = "lbfgs"  # {‘lbfgs’, ‘l2sgd’, ‘ap’, ‘pa’, ‘arow’}
    #將文本從JSON轉換
    rawalldata = json.loads(material)
    print(rawalldata)
    traindata = traindataconvert(rawalldata['traindata'])
    testdata = testdataconvert(rawalldata['testdata'])
    trainidx = []
    testidx = []
    text_obj = {}
    text_score = []  #紀錄每個區塊的不確定
    f = open('UserRES.txt', 'w')

    #組織全部文本資訊
    for i in rawalldata['testdata']:
        testidx.append(i)
        text_obj[i] = ([len(rawalldata['testdata'][i]['text']), 0])

    for i in rawalldata['traindata']:
        trainidx.append(i)
    print('info:', text_obj)
    print(datetime.datetime.now())
    modelname = filename.replace('/', '').replace('*',
                                                  '') + str(charstop) + ".m"
    print(modelname)
    trainer = pycrfsuite.Trainer()
    #trainer.clear()
    #print trainer.params()
    #print(traindata[0])
    #for t in traindata:
    #    x, y = t
    #    trainer.append(x, y)

    trainer.append(traindata[0], traindata[1])
    trainer.select(crfmethod)  #做訓練
    trainer.set('max_iterations', 30)  #測試迴圈
    trainer.train(modelname)

    tagger = pycrfsuite.Tagger()
    #modelname = 'modelTrue1.m'
    #建立訓練模型檔案
    tagger.open(modelname)
    #tagger.dump(modelname+".txt")

    print(datetime.datetime.now())
    print("Start testing...")

    results = []
    results = []
    lines = []
    Spp = []
    Npp = []
    all_len = 0
    ftt = open('reslog.txt', 'w')
    while testdata:
        x, yref = testdata.pop(0)
        ftt.write(str(x))
        ftt.write(str(yref))
        yout = tagger.tag(x)
        ftt.write(str(yout))
        #print(yout)
        #pr = tagger.probability(yref)
        sp = 0
        np = 0
        for i in range(len(yout)):
            sp = tagger.marginal('S', i)
            Spp.append(sp)  #S標記的機率
            #print(sp)
            np = tagger.marginal('N', i)
            Npp.append(np)  #N標記的機率
            #print(np)
        results.append(util.eval(yref, yout, "S"))

        score_array = []
        All_u_score = 0
        p_Scount = 0
        p_Ncount = 0

        for i in range(len(Spp)):
            _s = 0
            if Spp[i] > Npp[i]:
                _s = Spp[i]
            else:
                _s = Npp[i]
            #_s = (_s - 0.5) * 10
            _s = (1 - _s)
            #U_score = U_score + _s
            p_Scount = p_Scount + Spp[i]
            p_Ncount = p_Ncount + Npp[i]
            score_array.append(_s)
    for i in range(len(testidx)):
        U_score = 0  #文本區塊的不確定值
        text_count = 0  #字數
        end = 0
        if i == 0:
            start = 0
        else:
            start = end
        end = text_obj[testidx[i]][0]
        #print(text_obj[testidx[i]])
        #print(len(score_array),end)
        for a in range(start, end):
            text_count = text_obj[testidx[i]][0]
            U_score += score_array[a]
        print('text_count:', text_count)
        print('U_score:', U_score)
        U_score = U_score / text_count
        text_obj[testidx[i]][1] = U_score
        All_u_score += U_score
        text_score.append([str(testidx[i]), U_score])

    tp, fp, fn, tn = zip(*results)
    tp, fp, fn, tn = sum(tp), sum(fp), sum(fn), sum(tn)
    #print(tp, fp, fn, tn)
    if tp <= 0 or fp <= 0:
        p = 0
        r = 0
        f_score = 0
    else:
        p, r = tp / (tp + fp), tp / (tp + fn)
        f_score = 2 * p * r / (p + r)

    print("Total tokens in Test Set:", tp + fp + fn + tn)
    print("Total S in REF:", tp + fn)
    print("Total S in OUT:", tp + fp)
    print("Presicion:", p)
    print("Recall:", r)
    print("F1-score:", f_score)
    print(text_score)
    log_text = ''
    log_text += "----Doc Result-----" + "\n"
    log_text += "Total tokens in Test Set:" + str(tp + fp + fn + tn) + '\n'
    log_text += "Total S in REF:" + str(tp + fn) + '\n'
    log_text += "Total S in OUT:" + str(tp + fp) + '\n'
    log_text += "Presicion:" + str(p) + '\n'
    log_text += "Recall:" + str(r) + '\n'
    log_text += "F1-Score:" + str(f_score) + '\n'
    log_text += '\n' + "=============" + '\n'
    log_text += 'End Time:' + str(datetime.datetime.now()) + '\n'
    log_text += '\n'

    f.write(str(log_text))
    f.close()
    ftt.close()
    return text_score
    def fit(self, X_train, y_train, len_train,pos_train,length_train,position_train,
            X_validation, y_validation, len_validation, pos_validation,length_validation,position_validation,
            name, print_log=True):
        # ---------------------------------------forward computation--------------------------------------------#
        y_train_pw = y_train[0]
        y_train_pph = y_train[1]
        #y_train_iph = y_train[2]

        y_validation_pw = y_validation[0]
        y_validation_pph = y_validation[1]
        #y_validation_iph = y_validation[2]
        # ---------------------------------------define graph---------------------------------------------#
        with self.graph.as_default():
            # data place holder
            self.X_p = tf.placeholder(
                dtype=tf.int32,
                shape=(None, self.max_sentence_size),
                name="input_placeholder"
            )

            # pos info placeholder
            self.pos_p = tf.placeholder(
                dtype=tf.int32,
                shape=(None, self.max_sentence_size),
                name="pos_placeholder"
            )

            # length info placeholder
            self.length_p = tf.placeholder(
                dtype=tf.int32,
                shape=(None, self.max_sentence_size),
                name="length_placeholder"
            )

            # position info placeholder
            self.position_p = tf.placeholder(
                dtype=tf.int32,
                shape=(None, self.max_sentence_size),
                name="length_placeholder"
            )

            self.y_p_pw = tf.placeholder(
                dtype=tf.int32,
                shape=(None, self.max_sentence_size),
                name="label_placeholder_pw"
            )
            self.y_p_pph = tf.placeholder(
                dtype=tf.int32,
                shape=(None, self.max_sentence_size),
                name="label_placeholder_pph"
            )

            #self.y_p_iph = tf.placeholder(
            #    dtype=tf.int32,
            #    shape=(None, self.max_sentence_size),
            #    name="label_placeholder_iph"
            #)
            # dropout 占位
            self.keep_prob_p = tf.placeholder(dtype=tf.float32, shape=[], name="keep_prob_p")
            self.input_keep_prob_p = tf.placeholder(dtype=tf.float32, shape=[], name="input_keep_prob_p")
            self.output_keep_prob_p=tf.placeholder(dtype=tf.float32, shape=[], name="output_keep_prob_p")

            # 相应序列的长度占位
            self.seq_len_p = tf.placeholder(
                dtype=tf.int32,
                shape=(None,),
                name="seq_len"
            )

            #用来去掉padding的mask
            self.mask = tf.sequence_mask(
                lengths=self.seq_len_p,
                maxlen=self.max_sentence_size,
                name="mask"
            )

            #去掉padding之后的labels
            y_p_pw_masked = tf.boolean_mask(                #shape[seq_len1+seq_len2+....+,]
                tensor=self.y_p_pw,
                mask=self.mask,
                name="y_p_pw_masked"
            )

            y_p_pph_masked = tf.boolean_mask(               # shape[seq_len1+seq_len2+....+,]
                tensor=self.y_p_pph,
                mask=self.mask,
                name="y_p_pph_masked"
            )

            #y_p_iph_masked = tf.boolean_mask(               # shape[seq_len1+seq_len2+....+,]
            #    tensor=self.y_p_iph,
            #    mask=self.mask,
            #    name="y_p_iph_masked"
            #)

            # embeddings
            #self.embeddings = tf.Variable(
            #    initial_value=tf.zeros(shape=(self.vocab_size, self.embedding_size), dtype=tf.float32),
            #    name="embeddings"
            #)

            self.word_embeddings=tf.Variable(
                initial_value=util.getCWE(
                        word_embed_file="../data/embeddings/word_vec.txt",
                        char_embed_file="../data/embeddings/char_vec.txt"
                    ),
                name="word_embeddings"
            )

            print("word_embeddings.shape",self.word_embeddings.shape)

            # pos one-hot
            self.pos_one_hot = tf.one_hot(
                indices=self.pos_p,
                depth=self.pos_num,
                name="pos_one_hot"
            )
            print("shape of pos_one_hot:", self.pos_one_hot.shape)

            # length one-hot
            self.length_one_hot = tf.one_hot(
                indices=self.length_p,
                depth=self.length_num,
                name="pos_one_hot"
            )
            print("shape of length_one_hot:", self.length_one_hot.shape)

            # position one-hot
            self.position_one_hot = tf.one_hot(
                indices=self.position_p,
                depth=self.max_sentence_size,
                name="pos_one_hot"
            )
            print("shape of position_one_hot:", self.position_one_hot.shape)

            # -------------------------------------PW-----------------------------------------------------
            # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size]
            inputs_pw = tf.nn.embedding_lookup(params=self.word_embeddings, ids=self.X_p, name="embeded_input_pw")
            print("shape of inputs_pw:",inputs_pw.shape)
            #concat all information
            inputs_pw = tf.concat(
                values=[inputs_pw, self.pos_one_hot, self.length_one_hot, self.position_one_hot],
                axis=2,
                name="input_pw"
            )
            print("shape of cancated inputs_pw:", inputs_pw.shape)

            # forward part
            en_lstm_forward1_pw = rnn.BasicLSTMCell(num_units=self.hidden_units_num)
            en_lstm_forward2_pw=rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            en_lstm_forward_pw=rnn.MultiRNNCell(cells=[en_lstm_forward1_pw,en_lstm_forward2_pw])
            #dropout
            en_lstm_forward_pw=rnn.DropoutWrapper(
                cell=en_lstm_forward_pw,
                input_keep_prob=self.input_keep_prob_p,
                output_keep_prob=self.output_keep_prob_p
            )

            # backward part
            en_lstm_backward1_pw = rnn.BasicLSTMCell(num_units=self.hidden_units_num)
            en_lstm_backward2_pw=rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            en_lstm_backward_pw=rnn.MultiRNNCell(cells=[en_lstm_backward1_pw,en_lstm_backward2_pw])
            #dropout
            en_lstm_backward_pw=rnn.DropoutWrapper(
                cell=en_lstm_backward_pw,
                input_keep_prob=self.input_keep_prob_p,
                output_keep_prob=self.output_keep_prob_p
            )

            outputs, states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=en_lstm_forward_pw,
                cell_bw=en_lstm_backward_pw,
                inputs=inputs_pw,
                sequence_length=self.seq_len_p,
                dtype=tf.float32,
                scope="pw"
            )

            outputs_forward_pw = outputs[0]                 # shape [batch_size, max_time, cell_fw.output_size]
            outputs_backward_pw = outputs[1]                # shape [batch_size, max_time, cell_bw.output_size]
            # concat final outputs [batch_size, max_time, cell_fw.output_size*2]
            h_pw = tf.concat(values=[outputs_forward_pw, outputs_backward_pw], axis=2)
            h_pw=tf.reshape(tensor=h_pw,shape=(-1,self.hidden_units_num*2),name="h_pw")
            print("h_pw.shape",h_pw.shape)

            # 全连接dropout
            h_pw = tf.nn.dropout(x=h_pw, keep_prob=self.keep_prob_p, name="dropout_h_pw")

            # fully connect layer(projection)
            w_pw = tf.Variable(
                initial_value=tf.random_normal(shape=(self.hidden_units_num*2, self.class_num)),
                name="weights_pw"
            )
            b_pw = tf.Variable(
                initial_value=tf.random_normal(shape=(self.class_num,)),
                name="bias_pw"
            )
            #logits
            logits_pw = tf.matmul(h_pw, w_pw) + b_pw        #logits_pw:[batch_size*max_time, 2]
            logits_normal_pw=tf.reshape(                    #logits in an normal way:[batch_size,max_time_stpes,2]
                tensor=logits_pw,
                shape=(-1,self.max_sentence_size,self.class_num),
                name="logits_normal_pw"
            )
            logits_pw_masked = tf.boolean_mask(             # logits_pw_masked [seq_len1+seq_len2+....+,3]
                tensor=logits_normal_pw,
                mask=self.mask,
                name="logits_pw_masked"
            )

            # prediction
            pred_pw = tf.cast(tf.argmax(logits_pw, 1), tf.int32, name="pred_pw")   # pred_pw:[batch_size*max_time,]
            pred_normal_pw = tf.reshape(                    # pred in an normal way,[batch_size, max_time]
                tensor=pred_pw,
                shape=(-1, self.max_sentence_size),
                name="pred_normal_pw"
            )

            pred_pw_masked = tf.boolean_mask(               # logits_pw_masked [seq_len1+seq_len2+....+,]
                tensor=pred_normal_pw,
                mask=self.mask,
                name="pred_pw_masked"
            )

            pred_normal_one_hot_pw = tf.one_hot(            # one-hot the pred_normal:[batch_size, max_time,class_num]
                indices=pred_normal_pw,
                depth=self.class_num,
                name="pred_normal_one_hot_pw"
            )

            # loss
            self.loss_pw = tf.losses.sparse_softmax_cross_entropy(
                labels=y_p_pw_masked,
                logits=logits_pw_masked
            )+tf.contrib.layers.l2_regularizer(self.lambda_pw)(w_pw)
            # ---------------------------------------------------------------------------------------

            # ----------------------------------PPH--------------------------------------------------
            # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size]
            inputs_pph = tf.nn.embedding_lookup(params=self.word_embeddings, ids=self.X_p, name="embeded_input_pph")
            print("shape of input_pph:", inputs_pph.shape)
            # concat all information
            inputs_pph = tf.concat(
                values=[inputs_pph, self.pos_one_hot, self.length_one_hot, self.position_one_hot,
                        pred_normal_one_hot_pw],
                axis=2,
                name="inputs_pph"
            )
            print("shape of input_pph:", inputs_pph.shape)

            # forward part
            en_lstm_forward1_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num)
            en_lstm_forward2_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            en_lstm_forward_pph = rnn.MultiRNNCell(cells=[en_lstm_forward1_pph, en_lstm_forward2_pph])
            #dropout
            en_lstm_forward_pph=rnn.DropoutWrapper(
                cell=en_lstm_forward_pph,
                input_keep_prob=self.input_keep_prob_p,
                output_keep_prob=self.output_keep_prob_p
            )

            # backward part
            en_lstm_backward1_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num)
            en_lstm_backward2_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            en_lstm_backward_pph = rnn.MultiRNNCell(cells=[en_lstm_backward1_pph, en_lstm_backward2_pph])
            #dropout
            en_lstm_backward_pph=rnn.DropoutWrapper(
                cell=en_lstm_backward_pph,
                input_keep_prob=self.input_keep_prob_p,
                output_keep_prob=self.output_keep_prob_p
            )

            outputs, states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=en_lstm_forward_pph,
                cell_bw=en_lstm_backward_pph,
                inputs=inputs_pph,
                sequence_length=self.seq_len_p,
                dtype=tf.float32,
                scope="pph"
            )

            outputs_forward_pph = outputs[0]  # shape [batch_size, max_time, cell_fw.output_size]
            outputs_backward_pph = outputs[1]  # shape [batch_size, max_time, cell_bw.output_size]
            # concat final outputs [batch_size, max_time, cell_fw.output_size*2]
            h_pph = tf.concat(values=[outputs_forward_pph, outputs_backward_pph], axis=2)
            h_pph = tf.reshape(tensor=h_pph, shape=(-1, self.hidden_units_num * 2), name="h_pph")

            # 全连接dropout
            h_pph = tf.nn.dropout(x=h_pph, keep_prob=self.keep_prob_p, name="dropout_h_pph")

            # fully connect layer(projection)
            w_pph = tf.Variable(
                initial_value=tf.random_normal(shape=(self.hidden_units_num*2, self.class_num)),
                name="weights_pph"
            )
            b_pph = tf.Variable(
                initial_value=tf.random_normal(shape=(self.class_num,)),
                name="bias_pph"
            )
            # logits
            logits_pph = tf.matmul(h_pph, w_pph) + b_pph  # shape of logits:[batch_size*max_time, 2]
            logits_normal_pph = tf.reshape(                 # logits in an normal way:[batch_size,max_time_stpes,2]
                tensor=logits_pph,
                shape=(-1, self.max_sentence_size, self.class_num),
                name="logits_normal_pph"
            )
            logits_pph_masked = tf.boolean_mask(            # [seq_len1+seq_len2+....+,3]
                tensor=logits_normal_pph,
                mask=self.mask,
                name="logits_pph_masked"
            )

            # prediction
            pred_pph = tf.cast(tf.argmax(logits_pph, 1), tf.int32, name="pred_pph")  # pred_pph:[batch_size*max_time,]
            pred_normal_pph = tf.reshape(                       # pred in an normal way,[batch_size, max_time]
                tensor=pred_pph,
                shape=(-1, self.max_sentence_size),
                name="pred_normal_pph"
            )
            pred_pph_masked = tf.boolean_mask(                  # logits_pph_masked [seq_len1+seq_len2+....+,]
                tensor=pred_normal_pph,
                mask=self.mask,
                name="pred_pph_masked"
            )
            pred_normal_one_hot_pph = tf.one_hot(               # one-hot the pred_normal:[batch_size, max_time,class_num]
                indices=pred_normal_pph,
                depth=self.class_num,
                name="pred_normal_one_hot_pph"
            )

            # loss
            self.loss_pph = tf.losses.sparse_softmax_cross_entropy(
                labels=y_p_pph_masked,
                logits=logits_pph_masked
            )+tf.contrib.layers.l2_regularizer(self.lambda_pph)(w_pph)
            # ------------------------------------------------------------------------------------

            '''
            # ---------------------------------------IPH------------------------------------------
            # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size]
            inputs_iph = tf.nn.embedding_lookup(params=self.embeddings, ids=self.X_p, name="embeded_input_iph")
            # shape of inputs[batch_size,max_time_stpes,embeddings_dims+class_num]
            inputs_iph = tf.concat(values=[inputs_iph, pred_normal_one_hot_pph], axis=2, name="inputs_pph")
            # print("shape of input_pph:", inputs_pph.shape)
            # encoder cells
            # forward part
            en_lstm_forward1_iph = rnn.BasicLSTMCell(num_units=self.hidden_units_num)
            # en_lstm_forward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            # en_lstm_forward=rnn.MultiRNNCell(cells=[en_lstm_forward1,en_lstm_forward2])

            # backward part
            en_lstm_backward1_iph = rnn.BasicLSTMCell(num_units=self.hidden_units_num)
            # en_lstm_backward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            # en_lstm_backward=rnn.MultiRNNCell(cells=[en_lstm_backward1,en_lstm_backward2])

            # decoder cells
            de_lstm_iph = rnn.BasicLSTMCell(num_units=self.hidden_units_num*2)

            # encode
            encoder_outputs_iph, encoder_states_iph = self.encoder(
                cell_forward=en_lstm_forward1_iph,
                cell_backward=en_lstm_backward1_iph,
                inputs=inputs_iph,
                seq_length=self.seq_len_p,
                scope_name="en_lstm_iph"
            )
            # shape of h is [batch*time_steps,hidden_units*2]
            h_iph = self.decoder(
                cell=de_lstm_iph,
                initial_state=encoder_states_iph,
                inputs=encoder_outputs_iph,
                scope_name="de_lstm_iph"
            )

            # fully connect layer(projection)
            w_iph = tf.Variable(
                initial_value=tf.random_normal(shape=(self.hidden_units_num*2, self.class_num)),
                name="weights_iph"
            )
            b_iph = tf.Variable(
                initial_value=tf.random_normal(shape=(self.class_num,)),
                name="bias_iph"
            )
            # logits
            logits_iph = tf.matmul(h_iph, w_iph) + b_iph  # shape of logits:[batch_size*max_time, 3]
            logits_normal_iph = tf.reshape(                # logits in an normal way:[batch_size,max_time_stpes,3]
                tensor=logits_iph,
                shape=(-1, self.max_sentence_size, 3),
                name="logits_normal_iph"
            )
            logits_iph_masked = tf.boolean_mask(  # [seq_len1+seq_len2+....+,3]
                tensor=logits_normal_iph,
                mask=self.mask,
                name="logits_iph_masked"
            )

            # prediction
            pred_iph = tf.cast(tf.argmax(logits_iph, 1), tf.int32, name="pred_iph")  # pred_iph:[batch_size*max_time,]
            pred_normal_iph = tf.reshape(  # pred in an normal way,[batch_size, max_time]
                tensor=pred_iph,
                shape=(-1, self.max_sentence_size),
                name="pred_normal_iph"
            )
            pred_iph_masked = tf.boolean_mask(  # logits_iph_masked [seq_len1+seq_len2+....+,]
                tensor=pred_normal_iph,
                mask=self.mask,
                name="pred_iph_masked"
            )
            pred_normal_one_hot_iph = tf.one_hot(  # one-hot the pred_normal:[batch_size, max_time,class_num]
                indices=pred_normal_iph,
                depth=self.class_num,
                name="pred_normal_one_hot_iph"
            )
            # loss
            self.loss_iph = tf.losses.sparse_softmax_cross_entropy(
                labels=y_p_iph_masked,
                logits=logits_iph_masked
            )+tf.contrib.layers.l2_regularizer(self.lambda_iph)(w_iph)

            # ---------------------------------------------------------------------------------------
            '''
            # adjust learning rate
            global_step = tf.Variable(initial_value=1, trainable=False)
            start_learning_rate = self.learning_rate
            learning_rate = tf.train.exponential_decay(
                learning_rate=start_learning_rate,
                global_step=global_step,
                decay_steps=(X_train.shape[0] // self.batch_size) + 1,
                decay_rate=self.decay_rate,
                staircase=True,
                name="decay_learning_rate"
            )

            # loss
            self.loss = self.loss_pw + self.loss_pph


            # optimizer
            self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss,global_step)
            self.init_op = tf.global_variables_initializer()
            self.init_local_op = tf.local_variables_initializer()

        # ------------------------------------Session-----------------------------------------
        with self.session as sess:
            print("Training Start")
            sess.run(self.init_op)                  # initialize all variables
            sess.run(self.init_local_op)

            train_Size = X_train.shape[0];
            validation_Size = X_validation.shape[0]
            self.best_validation_loss = 1000                # best validation accuracy in training process

            # epoch
            for epoch in range(1, self.max_epoch + 1):
                print("Epoch:", epoch)
                start_time = time.time()  # time evaluation
                # training loss/accuracy in every mini-batch
                self.train_losses = []
                self.train_accus_pw = []
                self.train_accus_pph = []
                #self.train_accus_iph = []

                self.c1_f_pw = [];
                self.c2_f_pw = []  # each class's f1 score
                self.c1_f_pph = [];
                self.c2_f_pph = []
                #self.c1_f_iph = [];
                #self.c2_f_iph = []

                lrs = []

                # mini batch
                for i in range(0, (train_Size // self.batch_size)):
                    #注意:这里获取的都是mask之后的值
                    _, train_loss, y_train_pw_masked,y_train_pph_masked,\
                    train_pred_pw, train_pred_pph,lr = sess.run(
                        fetches=[self.optimizer, self.loss,
                                 y_p_pw_masked,y_p_pph_masked,
                                 pred_pw_masked, pred_pph_masked,learning_rate],
                        feed_dict={
                            self.X_p: X_train[i * self.batch_size:(i + 1) * self.batch_size],
                            self.y_p_pw: y_train_pw[i * self.batch_size:(i + 1) * self.batch_size],
                            self.y_p_pph: y_train_pph[i * self.batch_size:(i + 1) * self.batch_size],
                            self.seq_len_p: len_train[i * self.batch_size:(i + 1) * self.batch_size],
                            self.pos_p: pos_train[i * self.batch_size:(i + 1) * self.batch_size],
                            self.length_p: length_train[i * self.batch_size:(i + 1) * self.batch_size],
                            self.position_p: position_train[i * self.batch_size:(i + 1) * self.batch_size],
                            self.keep_prob_p: self.keep_prob,
                            self.input_keep_prob_p:self.input_keep_prob,
                            self.output_keep_prob_p:self.output_keep_prob
                        }
                    )
                    lrs.append(lr)

                    # loss
                    self.train_losses.append(train_loss)

                    # metrics
                    accuracy_pw, f1_pw= util.eval(y_true=y_train_pw_masked,y_pred=train_pred_pw)       # pw
                    accuracy_pph, f1_pph= util.eval(y_true=y_train_pph_masked,y_pred=train_pred_pph)   # pph
                    #accuracy_iph, f1_1_iph, f1_2_iph = util.eval(y_true=y_train_iph_masked,y_pred=train_pred_iph)   # iph

                    self.train_accus_pw.append(accuracy_pw)
                    self.train_accus_pph.append(accuracy_pph)
                    #self.train_accus_iph.append(accuracy_iph)
                    # F1-score
                    self.c1_f_pw.append(f1_pw[0]);
                    self.c2_f_pw.append(f1_pw[1])
                    self.c1_f_pph.append(f1_pph[0]);
                    self.c2_f_pph.append(f1_pph[1])
                    #self.c1_f_iph.append(f1_1_iph);
                    #self.c2_f_iph.append(f1_2_iph)

                print("learning rate:", sum(lrs) / len(lrs))
                # validation in every epoch
                self.validation_loss, y_valid_pw_masked,y_valid_pph_masked,\
                valid_pred_pw, valid_pred_pph = sess.run(
                    fetches=[self.loss, y_p_pw_masked,y_p_pph_masked,
                             pred_pw_masked, pred_pph_masked],
                    feed_dict={
                        self.X_p: X_validation,
                        self.y_p_pw: y_validation_pw,
                        self.y_p_pph: y_validation_pph,
                        self.seq_len_p: len_validation,
                        self.pos_p: pos_validation,
                        self.length_p: length_validation,
                        self.position_p: position_validation,
                        self.keep_prob_p: 1.0,
                        self.input_keep_prob_p:1.0,
                        self.output_keep_prob_p:1.0
                    }
                )
                # print("valid_pred_pw.shape:",valid_pred_pw.shape)
                # print("valid_pred_pph.shape:",valid_pred_pph.shape)
                # print("valid_pred_iph.shape:",valid_pred_iph.shape)

                # metrics
                self.valid_accuracy_pw, self.valid_f1_pw = util.eval(y_true=y_valid_pw_masked,y_pred=valid_pred_pw)
                self.valid_accuracy_pph, self.valid_f1_pph = util.eval(y_true=y_valid_pph_masked,y_pred=valid_pred_pph)

                #self.valid_accuracy_iph, self.valid_f1_1_iph, self.valid_f1_2_iph = util.eval(y_true=y_valid_iph_masked,y_pred=valid_pred_iph)
                print("Epoch ", epoch, " finished.", "spend ", round((time.time() - start_time) / 60, 2), " mins")
                self.showInfo(type="training")
                self.showInfo(type="validation")


                # when we get a new best validation accuracy,we store the model
                if self.best_validation_loss < self.validation_loss:
                    self.best_validation_loss = self.validation_loss
                    print("New Best loss ", self.best_validation_loss, " On Validation set! ")
                    print("Saving Models......\n\n")
                    # exist ./models folder?
                    if not os.path.exists("./models/"):
                        os.mkdir(path="./models/")
                    if not os.path.exists("./models/" + name):
                        os.mkdir(path="./models/" + name)
                    if not os.path.exists("./models/" + name + "/bilstm"):
                        os.mkdir(path="./models/" + name + "/bilstm")
                    # create saver
                    saver = tf.train.Saver()
                    saver.save(sess, "./models/" + name + "/bilstm/my-model-10000")
                    # Generates MetaGraphDef.
                    saver.export_meta_graph("./models/" + name + "/bilstm/my-model-10000.meta")
                print("\n\n")

                # test:using X_validation_pw
                test_pred_pw, test_pred_pph = sess.run(
                    fetches=[pred_pw, pred_pph],
                    feed_dict={
                        self.X_p: X_validation,
                        self.seq_len_p: len_validation,
                        self.pos_p: pos_validation,
                        self.length_p: length_validation,
                        self.position_p: position_validation,
                        self.keep_prob_p: 1.0,
                        self.input_keep_prob_p:1.0,
                        self.output_keep_prob_p:1.0
                    }
                )

                # recover to original corpus txt
                # shape of valid_pred_pw,valid_pred_pw,valid_pred_pw:[corpus_size*time_stpes]
                util.recover2(
                    X=X_validation,
                    preds_pw=test_pred_pw,
                    preds_pph=test_pred_pph,
                    filename="../result/bilstm_cwe/recover_epoch_" + str(epoch) + ".txt"
                )
예제 #12
0
    def fit(self,
            X_train,
            y_train,
            len_train,
            X_validation,
            y_validation,
            len_validation,
            name,
            print_log=True):
        # ---------------------------------------forward computation--------------------------------------------#
        y_train_pw = y_train[0]
        y_train_pph = y_train[1]
        y_train_iph = y_train[2]

        y_validation_pw = y_validation[0]
        y_validation_pph = y_validation[1]
        y_validation_iph = y_validation[2]
        # ---------------------------------------define graph---------------------------------------------#
        with self.graph.as_default():
            # data place holder
            self.X_p = tf.placeholder(dtype=tf.int32,
                                      shape=(None, self.max_sentence_size),
                                      name="input_placeholder")

            self.y_p_pw = tf.placeholder(dtype=tf.int32,
                                         shape=(None, self.max_sentence_size),
                                         name="label_placeholder_pw")
            self.y_p_pph = tf.placeholder(dtype=tf.int32,
                                          shape=(None, self.max_sentence_size),
                                          name="label_placeholder_pph")
            self.y_p_iph = tf.placeholder(dtype=tf.int32,
                                          shape=(None, self.max_sentence_size),
                                          name="label_placeholder_iph")

            # 相应序列的长度占位
            self.seq_len_p = tf.placeholder(dtype=tf.int32,
                                            shape=(None, ),
                                            name="seq_len")

            #用来去掉padding的mask
            self.mask = tf.sequence_mask(lengths=self.seq_len_p,
                                         maxlen=self.max_sentence_size,
                                         name="mask")

            #去掉padding之后的labels
            y_p_pw_masked = tf.boolean_mask(  #shape[seq_len1+seq_len2+....+,]
                tensor=self.y_p_pw,
                mask=self.mask,
                name="y_p_pw_masked")
            y_p_pph_masked = tf.boolean_mask(  # shape[seq_len1+seq_len2+....+,]
                tensor=self.y_p_pph,
                mask=self.mask,
                name="y_p_pph_masked")
            y_p_iph_masked = tf.boolean_mask(  # shape[seq_len1+seq_len2+....+,]
                tensor=self.y_p_iph,
                mask=self.mask,
                name="y_p_iph_masked")

            # embeddings
            self.embeddings = tf.Variable(initial_value=tf.zeros(
                shape=(self.vocab_size, self.embedding_size),
                dtype=tf.float32),
                                          name="embeddings")

            # -------------------------------------PW-----------------------------------------------------
            # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size]
            inputs_pw = tf.nn.embedding_lookup(params=self.embeddings,
                                               ids=self.X_p,
                                               name="embeded_input_pw")

            # encoder cells
            # forward part
            en_lstm_forward1_pw = rnn.BasicLSTMCell(
                num_units=self.hidden_units_num)
            # en_lstm_forward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            # en_lstm_forward=rnn.MultiRNNCell(cells=[en_lstm_forward1,en_lstm_forward2])

            # backward part
            en_lstm_backward1_pw = rnn.BasicLSTMCell(
                num_units=self.hidden_units_num)
            # en_lstm_backward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            # en_lstm_backward=rnn.MultiRNNCell(cells=[en_lstm_backward1,en_lstm_backward2])

            # decoder cells
            de_lstm_pw = rnn.BasicLSTMCell(num_units=self.hidden_units_num * 2)

            # encode
            encoder_outputs_pw, encoder_states_pw = self.encoder(
                cell_forward=en_lstm_forward1_pw,
                cell_backward=en_lstm_backward1_pw,
                inputs=inputs_pw,
                seq_length=self.seq_len_p,
                scope_name="en_lstm_pw")
            # decode
            h_pw = self.decoder(  # shape of h is [batch*time_steps,hidden_units*2]
                cell=de_lstm_pw,
                initial_state=encoder_states_pw,
                inputs=encoder_outputs_pw,
                scope_name="de_lstm_pw")

            # fully connect layer(projection)
            w_pw = tf.Variable(initial_value=tf.random_normal(
                shape=(self.hidden_units_num * 2, self.class_num)),
                               name="weights_pw")
            b_pw = tf.Variable(
                initial_value=tf.random_normal(shape=(self.class_num, )),
                name="bias_pw")
            #logits
            logits_pw = tf.matmul(
                h_pw, w_pw) + b_pw  #logits_pw:[batch_size*max_time, 3]
            logits_normal_pw = tf.reshape(  #logits in an normal way:[batch_size,max_time_stpes,3]
                tensor=logits_pw,
                shape=(-1, self.max_sentence_size, 3),
                name="logits_normal_pw")
            logits_pw_masked = tf.boolean_mask(  # logits_pw_masked [seq_len1+seq_len2+....+,3]
                tensor=logits_normal_pw,
                mask=self.mask,
                name="logits_pw_masked")

            # prediction
            pred_pw = tf.cast(tf.argmax(logits_pw, 1),
                              tf.int32,
                              name="pred_pw")  # pred_pw:[batch_size*max_time,]
            pred_normal_pw = tf.reshape(  # pred in an normal way,[batch_size, max_time]
                tensor=pred_pw,
                shape=(-1, self.max_sentence_size),
                name="pred_normal_pw")

            pred_pw_masked = tf.boolean_mask(  # logits_pw_masked [seq_len1+seq_len2+....+,]
                tensor=pred_normal_pw,
                mask=self.mask,
                name="pred_pw_masked")

            pred_normal_one_hot_pw = tf.one_hot(  # one-hot the pred_normal:[batch_size, max_time,class_num]
                indices=pred_normal_pw,
                depth=self.class_num,
                name="pred_normal_one_hot_pw")

            # loss
            self.loss_pw = tf.losses.sparse_softmax_cross_entropy(
                labels=y_p_pw_masked, logits=logits_pw_masked)
            # ---------------------------------------------------------------------------------------

            # ----------------------------------PPH--------------------------------------------------
            # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size]
            inputs_pph = tf.nn.embedding_lookup(params=self.embeddings,
                                                ids=self.X_p,
                                                name="embeded_input_pph")
            # shape of inputs[batch_size,max_time_stpes,embeddings_dims+class_num]
            inputs_pph = tf.concat(values=[inputs_pph, pred_normal_one_hot_pw],
                                   axis=2,
                                   name="inputs_pph")
            # print("shape of input_pph:", inputs_pph.shape)

            # encoder cells
            # forward part
            en_lstm_forward1_pph = rnn.BasicLSTMCell(
                num_units=self.hidden_units_num)
            # en_lstm_forward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            # en_lstm_forward=rnn.MultiRNNCell(cells=[en_lstm_forward1,en_lstm_forward2])

            # backward part
            en_lstm_backward1_pph = rnn.BasicLSTMCell(
                num_units=self.hidden_units_num)
            # en_lstm_backward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            # en_lstm_backward=rnn.MultiRNNCell(cells=[en_lstm_backward1,en_lstm_backward2])

            # decoder cells
            de_lstm_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num *
                                            2)

            # encode
            encoder_outputs_pph, encoder_states_pph = self.encoder(
                cell_forward=en_lstm_forward1_pph,
                cell_backward=en_lstm_backward1_pph,
                inputs=inputs_pph,
                seq_length=self.seq_len_p,
                scope_name="en_lstm_pph")
            # shape of h is [batch*time_steps,hidden_units*2]
            h_pph = self.decoder(cell=de_lstm_pph,
                                 initial_state=encoder_states_pph,
                                 inputs=encoder_outputs_pph,
                                 scope_name="de_lstm_pph")

            # fully connect layer(projection)
            w_pph = tf.Variable(initial_value=tf.random_normal(
                shape=(self.hidden_units_num * 2, self.class_num)),
                                name="weights_pph")
            b_pph = tf.Variable(
                initial_value=tf.random_normal(shape=(self.class_num, )),
                name="bias_pph")
            # logits
            logits_pph = tf.matmul(
                h_pph,
                w_pph) + b_pph  # shape of logits:[batch_size*max_time, 3]
            logits_normal_pph = tf.reshape(  # logits in an normal way:[batch_size,max_time_stpes,3]
                tensor=logits_pph,
                shape=(-1, self.max_sentence_size, 3),
                name="logits_normal_pph")
            logits_pph_masked = tf.boolean_mask(  # [seq_len1+seq_len2+....+,3]
                tensor=logits_normal_pph,
                mask=self.mask,
                name="logits_pph_masked")

            # prediction
            pred_pph = tf.cast(
                tf.argmax(logits_pph, 1), tf.int32,
                name="pred_pph")  # pred_pph:[batch_size*max_time,]
            pred_normal_pph = tf.reshape(  # pred in an normal way,[batch_size, max_time]
                tensor=pred_pph,
                shape=(-1, self.max_sentence_size),
                name="pred_normal_pph")
            pred_pph_masked = tf.boolean_mask(  # logits_pph_masked [seq_len1+seq_len2+....+,]
                tensor=pred_normal_pph,
                mask=self.mask,
                name="pred_pph_masked")
            pred_normal_one_hot_pph = tf.one_hot(  # one-hot the pred_normal:[batch_size, max_time,class_num]
                indices=pred_normal_pph,
                depth=self.class_num,
                name="pred_normal_one_hot_pph")

            # loss
            self.loss_pph = tf.losses.sparse_softmax_cross_entropy(
                labels=y_p_pph_masked, logits=logits_pph_masked)
            # ------------------------------------------------------------------------------------

            # ---------------------------------------IPH------------------------------------------
            # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size]
            inputs_iph = tf.nn.embedding_lookup(params=self.embeddings,
                                                ids=self.X_p,
                                                name="embeded_input_iph")
            # shape of inputs[batch_size,max_time_stpes,embeddings_dims+class_num]
            inputs_iph = tf.concat(
                values=[inputs_iph, pred_normal_one_hot_pph],
                axis=2,
                name="inputs_pph")
            # print("shape of input_pph:", inputs_pph.shape)
            # encoder cells
            # forward part
            en_lstm_forward1_iph = rnn.BasicLSTMCell(
                num_units=self.hidden_units_num)
            # en_lstm_forward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            # en_lstm_forward=rnn.MultiRNNCell(cells=[en_lstm_forward1,en_lstm_forward2])

            # backward part
            en_lstm_backward1_iph = rnn.BasicLSTMCell(
                num_units=self.hidden_units_num)
            # en_lstm_backward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            # en_lstm_backward=rnn.MultiRNNCell(cells=[en_lstm_backward1,en_lstm_backward2])

            # decoder cells
            de_lstm_iph = rnn.BasicLSTMCell(num_units=self.hidden_units_num *
                                            2)

            # encode
            encoder_outputs_iph, encoder_states_iph = self.encoder(
                cell_forward=en_lstm_forward1_iph,
                cell_backward=en_lstm_backward1_iph,
                inputs=inputs_iph,
                seq_length=self.seq_len_p,
                scope_name="en_lstm_iph")
            # shape of h is [batch*time_steps,hidden_units*2]
            h_iph = self.decoder(cell=de_lstm_iph,
                                 initial_state=encoder_states_iph,
                                 inputs=encoder_outputs_iph,
                                 scope_name="de_lstm_iph")

            # fully connect layer(projection)
            w_iph = tf.Variable(initial_value=tf.random_normal(
                shape=(self.hidden_units_num * 2, self.class_num)),
                                name="weights_iph")
            b_iph = tf.Variable(
                initial_value=tf.random_normal(shape=(self.class_num, )),
                name="bias_iph")
            # logits
            logits_iph = tf.matmul(
                h_iph,
                w_iph) + b_iph  # shape of logits:[batch_size*max_time, 3]
            logits_normal_iph = tf.reshape(  # logits in an normal way:[batch_size,max_time_stpes,3]
                tensor=logits_iph,
                shape=(-1, self.max_sentence_size, 3),
                name="logits_normal_iph")
            logits_iph_masked = tf.boolean_mask(  # [seq_len1+seq_len2+....+,3]
                tensor=logits_normal_iph,
                mask=self.mask,
                name="logits_iph_masked")

            # prediction
            pred_iph = tf.cast(
                tf.argmax(logits_iph, 1), tf.int32,
                name="pred_iph")  # pred_iph:[batch_size*max_time,]
            pred_normal_iph = tf.reshape(  # pred in an normal way,[batch_size, max_time]
                tensor=pred_iph,
                shape=(-1, self.max_sentence_size),
                name="pred_normal_iph")
            pred_iph_masked = tf.boolean_mask(  # logits_iph_masked [seq_len1+seq_len2+....+,]
                tensor=pred_normal_iph,
                mask=self.mask,
                name="pred_iph_masked")
            pred_normal_one_hot_iph = tf.one_hot(  # one-hot the pred_normal:[batch_size, max_time,class_num]
                indices=pred_normal_iph,
                depth=self.class_num,
                name="pred_normal_one_hot_iph")
            # loss
            self.loss_iph = tf.losses.sparse_softmax_cross_entropy(
                labels=y_p_iph_masked, logits=logits_iph_masked)

            # ---------------------------------------------------------------------------------------
            # loss
            self.loss = self.loss_pw + self.loss_pph + self.loss_iph
            # optimizer
            self.optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate).minimize(self.loss)
            self.init_op = tf.global_variables_initializer()
            self.init_local_op = tf.local_variables_initializer()

        # ------------------------------------Session-----------------------------------------
        with self.session as sess:
            print("Training Start")
            sess.run(self.init_op)  # initialize all variables
            sess.run(self.init_local_op)

            train_Size = X_train.shape[0]
            validation_Size = X_validation.shape[0]
            best_validation_loss = 1000  # best validation accuracy in training process

            # epoch
            for epoch in range(1, self.max_epoch + 1):
                print("Epoch:", epoch)
                start_time = time.time()  # time evaluation
                # training loss/accuracy in every mini-batch
                train_losses = []
                train_accus_pw = []
                train_accus_pph = []
                train_accus_iph = []

                c1_f_pw = []
                c2_f_pw = []  # each class's f1 score
                c1_f_pph = []
                c2_f_pph = []
                c1_f_iph = []
                c2_f_iph = []

                # mini batch
                for i in range(0, (train_Size // self.batch_size)):
                    #注意:这里获取的都是mask之后的值
                    _, train_loss, y_train_pw_masked,y_train_pph_masked,y_train_iph_masked,\
                    train_pred_pw, train_pred_pph, train_pred_iph = sess.run(
                        fetches=[self.optimizer, self.loss,
                                 y_p_pw_masked,y_p_pph_masked,y_p_iph_masked,
                                 pred_pw_masked, pred_pph_masked, pred_iph_masked],
                        feed_dict={
                            self.X_p: X_train[i * self.batch_size:(i + 1) * self.batch_size],
                            self.y_p_pw: y_train_pw[i * self.batch_size:(i + 1) * self.batch_size],
                            self.y_p_pph: y_train_pph[i * self.batch_size:(i + 1) * self.batch_size],
                            self.y_p_iph: y_train_iph[i * self.batch_size:(i + 1) * self.batch_size],
                            self.seq_len_p: len_train[i * self.batch_size:(i + 1) * self.batch_size]
                        }
                    )

                    # loss
                    train_losses.append(train_loss)
                    # metrics

                    accuracy_pw, f1_1_pw, f1_2_pw = util.eval(
                        y_true=y_train_pw_masked, y_pred=train_pred_pw)  # pw
                    accuracy_pph, f1_1_pph, f1_2_pph = util.eval(
                        y_true=y_train_pph_masked,
                        y_pred=train_pred_pph)  # pph
                    accuracy_iph, f1_1_iph, f1_2_iph = util.eval(
                        y_true=y_train_iph_masked,
                        y_pred=train_pred_iph)  # iph

                    train_accus_pw.append(accuracy_pw)
                    train_accus_pph.append(accuracy_pph)
                    train_accus_iph.append(accuracy_iph)
                    # F1-score
                    c1_f_pw.append(f1_1_pw)
                    c2_f_pw.append(f1_2_pw)
                    c1_f_pph.append(f1_1_pph)
                    c2_f_pph.append(f1_2_pph)
                    c1_f_iph.append(f1_1_iph)
                    c2_f_iph.append(f1_2_iph)

                # validation in every epoch
                validation_loss, y_valid_pw_masked,y_valid_pph_masked,y_valid_iph_masked,\
                valid_pred_pw, valid_pred_pph, valid_pred_iph = sess.run(
                    fetches=[self.loss, y_p_pw_masked,y_p_pph_masked,y_p_iph_masked,
                             pred_pw_masked, pred_pph_masked, pred_iph_masked],
                    feed_dict={
                        self.X_p: X_validation,
                        self.y_p_pw: y_validation_pw,
                        self.y_p_pph: y_validation_pph,
                        self.y_p_iph: y_validation_iph,
                        self.seq_len_p: len_validation
                    }
                )
                # print("valid_pred_pw.shape:",valid_pred_pw.shape)
                # print("valid_pred_pph.shape:",valid_pred_pph.shape)
                # print("valid_pred_iph.shape:",valid_pred_iph.shape)

                # metrics
                valid_accuracy_pw, valid_f1_1_pw, valid_f1_2_pw = util.eval(
                    y_true=y_valid_pw_masked, y_pred=valid_pred_pw)
                valid_accuracy_pph, valid_f1_1_pph, valid_f1_2_pph = util.eval(
                    y_true=y_valid_pph_masked, y_pred=valid_pred_pph)
                valid_accuracy_iph, valid_f1_1_iph, valid_f1_2_iph = util.eval(
                    y_true=y_valid_iph_masked, y_pred=valid_pred_iph)

                # show information
                print("Epoch ", epoch, " finished.", "spend ",
                      round((time.time() - start_time) / 60, 2), " mins")
                print("                             /**Training info**/")
                print("----avarage training loss:",
                      sum(train_losses) / len(train_losses))
                print("PW:")
                print("----avarage accuracy:",
                      sum(train_accus_pw) / len(train_accus_pw))
                print("----avarage f1-Score of N:",
                      sum(c1_f_pw) / len(c1_f_pw))
                print("----avarage f1-Score of B:",
                      sum(c2_f_pw) / len(c2_f_pw))
                print("PPH:")
                print("----avarage accuracy :",
                      sum(train_accus_pph) / len(train_accus_pph))
                print("----avarage f1-Score of N:",
                      sum(c1_f_pph) / len(c1_f_pph))
                print("----avarage f1-Score of B:",
                      sum(c2_f_pph) / len(c2_f_pph))
                print("IPH:")
                print("----avarage accuracy:",
                      sum(train_accus_iph) / len(train_accus_iph))
                print("----avarage f1-Score of N:",
                      sum(c1_f_iph) / len(c1_f_iph))
                print("----avarage f1-Score of B:",
                      sum(c2_f_iph) / len(c2_f_iph))

                print("                             /**Validation info**/")
                print("----avarage validation loss:", validation_loss)
                print("PW:")
                print("----avarage accuracy:", valid_accuracy_pw)
                print("----avarage f1-Score of N:", valid_f1_1_pw)
                print("----avarage f1-Score of B:", valid_f1_2_pw)
                print("PPH:")
                print("----avarage accuracy :", valid_accuracy_pph)
                print("----avarage f1-Score of N:", valid_f1_1_pph)
                print("----avarage f1-Score of B:", valid_f1_2_pph)
                print("IPH:")
                print("----avarage accuracy:", valid_accuracy_iph)
                print("----avarage f1-Score of N:", valid_f1_1_iph)
                print("----avarage f1-Score of B:", valid_f1_2_iph)

                # when we get a new best validation accuracy,we store the model
                if best_validation_loss < validation_loss:
                    best_validation_loss = validation_loss
                    print("New Best loss ", best_validation_loss,
                          " On Validation set! ")
                    print("Saving Models......\n\n")
                    # exist ./models folder?
                    if not os.path.exists("./models/"):
                        os.mkdir(path="./models/")
                    if not os.path.exists("./models/" + name):
                        os.mkdir(path="./models/" + name)
                    if not os.path.exists("./models/" + name + "/bilstm"):
                        os.mkdir(path="./models/" + name + "/bilstm")
                    # create saver
                    saver = tf.train.Saver()
                    saver.save(sess,
                               "./models/" + name + "/bilstm/my-model-10000")
                    # Generates MetaGraphDef.
                    saver.export_meta_graph("./models/" + name +
                                            "/bilstm/my-model-10000.meta")
                print("\n\n")
                # test:using X_validation_pw
                test_pred_pw, test_pred_pph, test_pred_iph = sess.run(
                    fetches=[pred_pw, pred_pph, pred_iph],
                    feed_dict={
                        self.X_p: X_validation,
                        self.seq_len_p: len_validation
                    })
                # recover to original corpus txt
                # shape of valid_pred_pw,valid_pred_pw,valid_pred_pw:[corpus_size*time_stpes]
                util.recover(X=X_validation,
                             preds_pw=test_pred_pw,
                             preds_pph=test_pred_pph,
                             preds_iph=test_pred_iph,
                             filename="recover_epoch_" + str(epoch) + ".txt")
예제 #13
0
# traindata shape: [[(x,y),(x,y), ...],[],[],...]
# testdata shape: [([x1, x2, ...],[y1,y2,...]),([],[])]

stt = datetime.datetime.now()
print "Start training...", stt
hmmtagger = nt.hmm.HiddenMarkovModelTagger.train(traindata)


print "################# Training took:", datetime.datetime.now()-stt
results = []
for line in testdata:
    x, yref = util.line_toseq(line, charstop)
    out = hmmtagger.tag(x)
    _, yout = zip(*out)
    results.append(util.eval(yref, yout, "S"))
tp, fp, fn, tn = zip(*results)
tp, fp, fn, tn = sum(tp), sum(fp), sum(fn), sum(tn)

p, r = tp/(tp+fp), tp/(tp+fn)
print "Total tokens in Test Set:", tp+fp+fn+tn
print "Total S in REF:", tp+fn
print "Total S in OUT:", tp+fp
print "Presicion:", p
print "Recall:", r
print "F1-score:", 2*p*r/(p+r)



print "Start close testing...", datetime.datetime.now()
results = []
예제 #14
0
    def fit(self,
            X_train,
            y_train,
            len_train,
            pos_train,
            length_train,
            position_train,
            X_valid,
            y_valid,
            len_valid,
            pos_valid,
            length_valid,
            position_valid,
            X_test,
            y_test,
            len_test,
            pos_test,
            length_test,
            position_test,
            name,
            print_log=True):
        # handle data
        y_train_pw = y_train[0]
        y_train_pph = y_train[1]
        # y_train_iph = y_train[2]

        y_valid_pw = y_valid[0]
        y_valid_pph = y_valid[1]
        # y_valid_iph = y_valid[2]

        y_test_pw = y_test[0]
        y_test_pph = y_test[1]
        # y_valid_iph = y_valid[2]

        # ------------------------------------------define graph---------------------------------------------#
        with self.graph.as_default():
            #***********************Dataset API****************************
            # create dataset_train object
            dataset_train = tf.data.Dataset.from_tensor_slices(
                tensors=(X_train, y_train_pw, y_train_pph, len_train,
                         pos_train, length_train,
                         position_train)).repeat().batch(
                             batch_size=self.batch_size).shuffle(buffer_size=2)

            # create iterator_train object
            iterator_train = dataset_train.make_one_shot_iterator()

            # get batch
            batch_train = iterator_train.get_next()
            #print("batch_train:", batch_train)

            # dataset_valid=
            # dataset_test=
            #***************************************************************

            #****************** data place holder***************************
            self.X_p = tf.placeholder(dtype=tf.int32,
                                      shape=(None, self.max_sentence_size),
                                      name="input_p")
            self.y_p_pw = tf.placeholder(dtype=tf.int32,
                                         shape=(None, self.max_sentence_size),
                                         name="label_p_pw")
            self.y_p_pph = tf.placeholder(dtype=tf.int32,
                                          shape=(None, self.max_sentence_size),
                                          name="label_p_pph")
            #self.y_p_iph = tf.placeholder(dtype=tf.int32,shape=(None, self.max_sentence_size),name="label_p_iph")

            # 相应序列的长度占位
            self.seq_len_p = tf.placeholder(dtype=tf.int32,
                                            shape=(None, ),
                                            name="seq_len")

            # 用来去掉padding的mask
            self.mask = tf.sequence_mask(lengths=self.seq_len_p,
                                         maxlen=self.max_sentence_size,
                                         name="mask")

            # 去掉padding之后的labels,shape[seq_len1+seq_len2+....+,]
            y_p_pw_masked = tf.boolean_mask(tensor=self.y_p_pw,
                                            mask=self.mask,
                                            name="y_p_pw_masked")
            y_p_pph_masked = tf.boolean_mask(tensor=self.y_p_pph,
                                             mask=self.mask,
                                             name="y_p_pph_masked")
            # y_p_iph_masked = tf.boolean_mask(tensor=self.y_p_iph,mask=self.mask,name="y_p_iph_masked")

            # pos info placeholder
            self.pos_p = tf.placeholder(dtype=tf.int32,
                                        shape=(None, self.max_sentence_size),
                                        name="pos_p")
            self.pos_one_hot = tf.one_hot(indices=self.pos_p,
                                          depth=self.pos_num,
                                          name="pos_one_hot")
            #print("shape of pos_one_hot:", self.pos_one_hot.shape)

            # length info placeholder
            self.length_p = tf.placeholder(dtype=tf.int32,
                                           shape=(None,
                                                  self.max_sentence_size),
                                           name="length_p")
            self.length_one_hot = tf.one_hot(indices=self.length_p,
                                             depth=self.length_num,
                                             name="pos_one_hot")
            #print("shape of length_one_hot:", self.length_one_hot.shape)

            # position info placeholder
            self.position_p = tf.placeholder(dtype=tf.int32,
                                             shape=(None,
                                                    self.max_sentence_size),
                                             name="position_p")
            self.position_one_hot = tf.one_hot(indices=self.position_p,
                                               depth=self.max_sentence_size,
                                               name="pos_one_hot")
            #print("shape of position_one_hot:", self.position_one_hot.shape)

            # dropout 占位
            self.keep_prob_p = tf.placeholder(dtype=tf.float32,
                                              shape=[],
                                              name="keep_prob_p")
            self.input_keep_prob_p = tf.placeholder(dtype=tf.float32,
                                                    shape=[],
                                                    name="input_keep_prob_p")
            self.output_keep_prob_p = tf.placeholder(dtype=tf.float32,
                                                     shape=[],
                                                     name="output_keep_prob_p")

            # word embeddings
            self.word_embeddings = tf.Variable(
                initial_value=util.readEmbeddings(
                    file="../data/embeddings/word_vec.txt"),
                trainable=False,
                name="word_embeddings")
            print("wordembedding.shape", self.word_embeddings.shape)

            # -------------------------------------PW-----------------------------------------------------
            # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size]
            inputs_pw = tf.nn.embedding_lookup(params=self.word_embeddings,
                                               ids=self.X_p,
                                               name="embeded_input_pw")
            print("shape of inputs_pw:", inputs_pw.shape)
            inputs_pw = tf.concat(values=[
                inputs_pw, self.pos_one_hot, self.length_one_hot,
                self.position_one_hot
            ],
                                  axis=2,
                                  name="input_pw")
            print("shape of cancated inputs_pw:", inputs_pw.shape)
            self.loss_pw, prob_pw_masked, pred_pw, pred_pw_masked, pred_normal_one_hot_pw = self.hierarchy(
                inputs=inputs_pw,
                y_masked=y_p_pw_masked,
                seq_length=self.seq_len_p,
                scope_name="pw")

            # ----------------------------------PPH--------------------------------------------------
            # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size]
            inputs_pph = tf.nn.embedding_lookup(params=self.word_embeddings,
                                                ids=self.X_p,
                                                name="embeded_input_pph")
            print("input_pph.shape", inputs_pph.shape)
            # concat all information
            inputs_pph = tf.concat(values=[
                inputs_pph, self.pos_one_hot, self.length_one_hot,
                self.position_one_hot, pred_normal_one_hot_pw
            ],
                                   axis=2,
                                   name="inputs_pph")
            print("shape of input_pph:", inputs_pph.shape)

            self.loss_pph, prob_pph_masked, pred_pph, pred_pph_masked, pred_normal_one_hot_pph = self.hierarchy(
                inputs=inputs_pph,
                y_masked=y_p_pph_masked,
                seq_length=self.seq_len_p,
                scope_name="pph")

            # adjust learning rate
            global_step = tf.Variable(initial_value=1, trainable=False)
            start_learning_rate = self.learning_rate
            learning_rate = tf.train.exponential_decay(
                learning_rate=start_learning_rate,
                global_step=global_step,
                decay_steps=(X_train.shape[0] // self.batch_size) + 1,
                decay_rate=self.decay_rate,
                staircase=True,
                name="decay_learning_rate")

            # loss
            self.loss = self.loss_pw + self.loss_pph

            # optimizer
            self.optimizer = tf.train.AdamOptimizer(
                learning_rate=learning_rate).minimize(self.loss,
                                                      global_step=global_step)
            self.init_op = tf.global_variables_initializer()
            self.init_local_op = tf.local_variables_initializer()

        # --------------------------------------------Session-------------------------------------------------
        with self.session as sess:
            print("Training Start")
            sess.run(self.init_op)  # initialize all variables
            sess.run(self.init_local_op)

            train_Size = X_train.shape[0]
            validation_Size = X_valid.shape[0]
            test_Size = X_test.shape[0]

            self.best_validation_loss = 1000  # best validation accuracy in training process
            # store result
            if not os.path.exists("../result/bilstm/"):
                os.mkdir("../result/bilstm/")

            # epoch
            for epoch in range(1, self.max_epoch + 1):
                print("Epoch:", epoch)
                start_time = time.time()  # time evaluation
                # training loss/accuracy in every mini-batch
                self.train_losses = []
                self.train_accus_pw = []
                self.train_accus_pph = []
                # self.train_accus_iph = []

                self.c1_f_pw = []
                self.c2_f_pw = []  # each class's f1 score
                self.c1_f_pph = []
                self.c2_f_pph = []
                # self.c1_f_iph = [];
                # self.c2_f_iph = []
                lrs = []

                # mini batch
                for i in range(0, (train_Size // self.batch_size)):
                    elements = sess.run(batch_train)
                    # 注意:这里获取的都是mask之后的值
                    _, train_loss, lr,y_train_pw_masked, y_train_pph_masked, \
                    train_pred_pw, train_pred_pph,  \
                    train_prob_pw_masked, train_prob_pph_masked = sess.run(
                        fetches=[self.optimizer, self.loss,learning_rate,y_p_pw_masked, y_p_pph_masked,
                                 pred_pw_masked, pred_pph_masked, prob_pw_masked, prob_pph_masked ],
                        feed_dict={
                            self.X_p: elements[0],
                            self.y_p_pw: elements[1],
                            self.y_p_pph: elements[2],
                            self.seq_len_p: elements[3],
                            self.pos_p: elements[4],
                            self.length_p: elements[5],
                            self.position_p: elements[6],
                            self.keep_prob_p: self.keep_prob,
                            self.input_keep_prob_p: self.input_keep_prob,
                            self.output_keep_prob_p: self.output_keep_prob
                        }
                    )

                    # write the prob to files
                    util.writeProb(
                        prob_pw=train_prob_pw_masked,
                        prob_pph=train_prob_pph_masked,
                        outFile="../result/bilstm/bilstm_prob_train_epoch" +
                        str(epoch) + ".txt")

                    lrs.append(lr)
                    # loss
                    self.train_losses.append(train_loss)
                    # metrics
                    accuracy_pw, f1_pw = util.eval(y_true=y_train_pw_masked,
                                                   y_pred=train_pred_pw)  # pw
                    accuracy_pph, f1_pph = util.eval(
                        y_true=y_train_pph_masked,
                        y_pred=train_pred_pph)  # pph
                    # accuracy_iph, f1_1_iph, f1_2_iph = util.eval(y_true=y_train_iph_masked,y_pred=train_pred_iph)   # iph

                    self.train_accus_pw.append(accuracy_pw)
                    self.train_accus_pph.append(accuracy_pph)
                    # self.train_accus_iph.append(accuracy_iph)
                    # F1-score
                    self.c1_f_pw.append(f1_pw[0])
                    self.c2_f_pw.append(f1_pw[1])
                    self.c1_f_pph.append(f1_pph[0])
                    self.c2_f_pph.append(f1_pph[1])
                    # self.c1_f_iph.append(f1_1_iph);
                    # self.c2_f_iph.append(f1_2_iph)

                # ----------------------------------validation in every epoch----------------------------------
                self.valid_loss, y_valid_pw_masked, y_valid_pph_masked, \
                valid_pred_pw_masked, valid_pred_pph_masked, valid_pred_pw, valid_pred_pph, \
                valid_prob_pw_masked, valid_prob_pph_masked = sess.run(
                    fetches=[self.loss, y_p_pw_masked, y_p_pph_masked,
                             pred_pw_masked, pred_pph_masked, pred_pw, pred_pph,
                             prob_pw_masked, prob_pph_masked
                             ],
                    feed_dict={
                        self.X_p: X_valid,
                        self.y_p_pw: y_valid_pw,
                        self.y_p_pph: y_valid_pph,
                        self.seq_len_p: len_valid,
                        self.pos_p: pos_valid,
                        self.length_p: length_valid,
                        self.position_p: position_valid,
                        self.keep_prob_p: 1.0,
                        self.input_keep_prob_p: 1.0,
                        self.output_keep_prob_p: 1.0
                    }
                )
                #write the prob to files
                util.writeProb(
                    prob_pw=valid_prob_pw_masked,
                    prob_pph=valid_prob_pph_masked,
                    outFile="../result/bilstm/bilstm_prob_valid_epoch" +
                    str(epoch) + ".txt")

                # metrics
                self.valid_accuracy_pw, self.valid_f1_pw = util.eval(
                    y_true=y_valid_pw_masked, y_pred=valid_pred_pw_masked)
                self.valid_accuracy_pph, self.valid_f1_pph = util.eval(
                    y_true=y_valid_pph_masked, y_pred=valid_pred_pph_masked)
                # recover to original corpus txt
                # shape of valid_pred_pw,valid_pred_pw,valid_pred_pw:[corpus_size*time_stpes]
                util.recover2(
                    X=X_valid,
                    preds_pw=valid_pred_pw,
                    preds_pph=valid_pred_pph,
                    filename="../result/bilstm/valid_recover_epoch_" +
                    str(epoch) + ".txt")
                # ----------------------------------------------------------------------------------------

                # ----------------------------------test in every epoch----------------------------------
                self.test_loss, y_test_pw_masked, y_test_pph_masked, \
                test_pred_pw_masked, test_pred_pph_masked, test_pred_pw, test_pred_pph, \
                test_prob_pw_masked, test_prob_pph_masked = sess.run(
                    fetches=[self.loss, y_p_pw_masked, y_p_pph_masked,
                             pred_pw_masked, pred_pph_masked, pred_pw, pred_pph,
                             prob_pw_masked, prob_pph_masked
                             ],
                    feed_dict={
                        self.X_p: X_test,
                        self.y_p_pw: y_test_pw,
                        self.y_p_pph: y_test_pph,
                        self.seq_len_p: len_test,
                        self.pos_p: pos_test,
                        self.length_p: length_test,
                        self.position_p: position_test,
                        self.keep_prob_p: 1.0,
                        self.input_keep_prob_p: 1.0,
                        self.output_keep_prob_p: 1.0
                    }
                )
                # write the prob to files
                util.writeProb(
                    prob_pw=test_prob_pw_masked,
                    prob_pph=test_prob_pph_masked,
                    outFile="../result/bilstm/bilstm_prob_test_epoch" +
                    str(epoch) + ".txt")

                # metrics
                self.test_accuracy_pw, self.test_f1_pw = util.eval(
                    y_true=y_test_pw_masked, y_pred=test_pred_pw_masked)
                self.test_accuracy_pph, self.test_f1_pph = util.eval(
                    y_true=y_test_pph_masked, y_pred=test_pred_pph_masked)
                # recover to original corpus txt
                # shape of test_pred_pw,test_pred_pw,test_pred_pw:[corpus_size*time_stpes]
                util.recover2(X=X_test,
                              preds_pw=test_pred_pw,
                              preds_pph=test_pred_pph,
                              filename="../result/bilstm/test_recover_epoch_" +
                              str(epoch) + ".txt")
                # -----------------------------------------------------------------------------------

                # self.valid_accuracy_iph, self.valid_f1_1_iph, self.valid_f1_2_iph = util.eval(y_true=y_valid_iph_masked,y_pred=valid_pred_iph)

                # show information
                print("Epoch ", epoch, " finished.", "spend ",
                      round((time.time() - start_time) / 60, 2), " mins")
                print("learning rate:", sum(lrs) / len(lrs))
                self.showInfo(type="training")
                self.showInfo(type="validation")
                self.showInfo(type="test")

                # when we get a new best validation accuracy,we store the model
                if self.best_validation_loss < self.valid_loss:
                    self.best_validation_loss = self.valid_loss
                    print("New Best loss ", self.best_validation_loss,
                          " On Validation set! ")
                    print("Saving Models......\n\n")
                    # exist ./models folder?
                    if not os.path.exists("./models/"):
                        os.mkdir(path="./models/")
                    if not os.path.exists("./models/" + name):
                        os.mkdir(path="./models/" + name)
                    if not os.path.exists("./models/" + name + "/bilstm"):
                        os.mkdir(path="./models/" + name + "/bilstm")
                    # create saver
                    saver = tf.train.Saver()
                    saver.save(sess,
                               "./models/" + name + "/bilstm/my-model-10000")
                    # Generates MetaGraphDef.
                    saver.export_meta_graph("./models/" + name +
                                            "/bilstm/my-model-10000.meta")
                print("\n\n")
예제 #15
0
    def fit(self,
            X_train,
            y_train,
            X_validation,
            y_validation,
            name,
            print_log=True):
        #---------------------------------------forward computation--------------------------------------------#
        X_train_pw = X_train[0]
        X_train_pph = X_train[1]
        X_train_iph = X_train[2]
        y_train_pw = y_train[0]
        y_train_pph = y_train[1]
        y_train_iph = y_train[2]

        X_validation_pw = X_validation[0]
        X_validation_pph = X_validation[1]
        X_validation_iph = X_validation[2]
        y_validation_pw = y_validation[0]
        y_validation_pph = y_validation[1]
        y_validation_iph = y_validation[2]

        #---------------------------------------define graph---------------------------------------------#
        with self.graph.as_default():
            # data place holder
            self.X_p_pw = tf.placeholder(dtype=tf.int32,
                                         shape=(None, self.max_sentence_size),
                                         name="input_placeholder_pw")
            self.y_p_pw = tf.placeholder(dtype=tf.int32,
                                         shape=(None, self.max_sentence_size),
                                         name="label_placeholder_pw")

            self.X_p_pph = tf.placeholder(dtype=tf.int32,
                                          shape=(None, self.max_sentence_size),
                                          name="input_placeholder_pph")

            self.y_p_pph = tf.placeholder(dtype=tf.int32,
                                          shape=(None, self.max_sentence_size),
                                          name="label_placeholder_pph")
            self.X_p_iph = tf.placeholder(dtype=tf.int32,
                                          shape=(None, self.max_sentence_size),
                                          name="input_placeholder_iph")

            self.y_p_iph = tf.placeholder(dtype=tf.int32,
                                          shape=(None, self.max_sentence_size),
                                          name="label_placeholder_iph")

            #attention variables
            self.attention_W = tf.Variable(tf.random_uniform(
                [self.hidden_units_num, self.hidden_units_num], 0.0, 1.0),
                                           name="attention_W")
            self.attention_U = tf.Variable(tf.random_uniform(
                [self.hidden_units_num * 2, self.hidden_units_num], 0.0, 1.0),
                                           name="attention_U")

            self.attention_V = tf.Variable(tf.random_uniform(
                [self.hidden_units_num, 1], 0.0, 1.0),
                                           name="attention_V")

            #embeddings
            self.embeddings = tf.Variable(initial_value=tf.zeros(
                shape=(self.vocab_size, self.embedding_size),
                dtype=tf.float32),
                                          name="embeddings")

            #-------------------------------------PW-----------------------------------------------------
            #embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size]
            inputs_pw = tf.nn.embedding_lookup(params=self.embeddings,
                                               ids=self.X_p_pw,
                                               name="embeded_input_pw")

            # encoder cells
            # forward part
            en_lstm_forward1_pw = rnn.BasicLSTMCell(
                num_units=self.hidden_units_num)
            # en_lstm_forward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            # en_lstm_forward=rnn.MultiRNNCell(cells=[en_lstm_forward1,en_lstm_forward2])

            # backward part
            en_lstm_backward1_pw = rnn.BasicLSTMCell(
                num_units=self.hidden_units_num)
            # en_lstm_backward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            # en_lstm_backward=rnn.MultiRNNCell(cells=[en_lstm_backward1,en_lstm_backward2])

            # decoder cells
            de_lstm_pw = rnn.BasicLSTMCell(num_units=self.hidden_units_num,
                                           reuse=tf.AUTO_REUSE)

            # encode
            encoder_outputs_pw, encoder_states_pw = self.encoder(
                cell_forward=en_lstm_forward1_pw,
                cell_backward=en_lstm_backward1_pw,
                inputs=inputs_pw,
                scope_name="en_lstm_pw")
            #print("shape of encoder_outputs:",encoder_outputs_pw.shape)
            #print("shape encoder_states_pw.h",encoder_states_pw.h.shape)
            #print("shape encoder_states_pw.c",encoder_states_pw.c.shape)

            #attention test
            #self.attention(prev_state=encoder_states_pw,enc_outputs=encoder_outputs_pw)

            #decode test
            h_pw = self.decode(cell=de_lstm_pw,
                               init_state=encoder_states_pw,
                               enc_outputs=encoder_outputs_pw)
            #h_pw = self.decode(self.dec_lstm_cell, enc_state, enc_outputs)
            #h_pw = self.decoder(
            #    cell=de_lstm_pw,
            #    initial_state=encoder_states_pw,
            #    inputs=encoder_outputs_pw,
            #    scope_name="de_lstm_pw"
            #)
            '''
            )
            if is_training:
                self.
            else:
                self.dec_outputs = self.decode(self.dec_lstm_cell, enc_state, enc_outputs, self.loop_function)
            # shape of h is [batch*time_steps,hidden_units]
            
            '''
            # fully connect layer(projection)
            w_pw = tf.Variable(
                initial_value=tf.random_normal(shape=(self.hidden_units_num2,
                                                      self.class_num)),
                name="weights_pw")
            b_pw = tf.Variable(
                initial_value=tf.random_normal(shape=(self.class_num, )),
                name="bias_pw")
            logits_pw = tf.matmul(
                h_pw, w_pw) + b_pw  # shape of logits:[batch_size*max_time, 3]

            # prediction
            # shape of pred[batch_size*max_time, 1]
            pred_pw = tf.cast(tf.argmax(logits_pw, 1),
                              tf.int32,
                              name="pred_pw")

            # pred in an normal way,shape is [batch_size, max_time,1]
            pred_normal_pw = tf.reshape(tensor=pred_pw,
                                        shape=(-1, self.max_sentence_size),
                                        name="pred_normal")

            # one-hot the pred_normal:[batch_size, max_time,class_num]
            pred_normal_one_hot_pw = tf.one_hot(indices=pred_normal_pw,
                                                depth=self.class_num,
                                                name="pred_normal_one_hot_pw")

            # loss
            self.loss_pw = tf.losses.sparse_softmax_cross_entropy(
                labels=tf.reshape(self.y_p_pw, shape=[-1]), logits=logits_pw)
            #---------------------------------------------------------------------------------------
            '''
            #----------------------------------PPH--------------------------------------------------
            # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size]
            inputs_pph = tf.nn.embedding_lookup(params=self.embeddings, ids=self.X_p_pph, name="embeded_input_pph")
            # shape of inputs[batch_size,max_time_stpes,embeddings_dims+class_num]
            inputs_pph = tf.concat(values=[inputs_pph, pred_normal_one_hot_pw], axis=2, name="inputs_pph")
            print("shape of input_pph:", inputs_pph.shape)

            # encoder cells
            # forward part
            en_lstm_forward1_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num)
            # en_lstm_forward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            # en_lstm_forward=rnn.MultiRNNCell(cells=[en_lstm_forward1,en_lstm_forward2])

            # backward part
            en_lstm_backward1_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num)
            # en_lstm_backward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            # en_lstm_backward=rnn.MultiRNNCell(cells=[en_lstm_backward1,en_lstm_backward2])

            # decoder cells
            de_lstm_pph = rnn.BasicLSTMCell(num_units=self.hidden_units_num)

            # encode
            encoder_outputs_pph, encoder_states_pph = self.encoder(
                cell_forward=en_lstm_forward1_pph,
                cell_backward=en_lstm_backward1_pph,
                inputs=inputs_pph,
                scope_name="en_lstm_pph"
            )
            # shape of h is [batch*time_steps,hidden_units]
            h_pph = self.decoder(
                cell=de_lstm_pph,
                initial_state=encoder_states_pph,
                inputs=encoder_outputs_pph,
                scope_name="de_lstm_pph"
            )

            # fully connect layer(projection)
            w_pph = tf.Variable(
                initial_value=tf.random_normal(shape=(self.hidden_units_num2, self.class_num)),
                name="weights_pph"
            )
            b_pph = tf.Variable(
                initial_value=tf.random_normal(shape=(self.class_num,)),
                name="bias_pph"
            )
            logits_pph = tf.matmul(h_pph, w_pph) + b_pph  # shape of logits:[batch_size*max_time, 5]

            # prediction
            # shape of pred[batch_size*max_time, 1]
            pred_pph = tf.cast(tf.argmax(logits_pph, 1), tf.int32, name="pred_pph")

            # pred in an normal way,shape is [batch_size, max_time,1]
            pred_normal_pph = tf.reshape(
                tensor=pred_pph,
                shape=(-1, self.max_sentence_size),
                name="pred_normal"
            )
            # one-hot the pred_normal:[batch_size, max_time,class_num]
            pred_normal_one_hot_pph = tf.one_hot(
                indices=pred_normal_pph,
                depth=self.class_num,
                name="pred_normal_one_hot_pph"
            )

            # loss
            self.loss_pph = tf.losses.sparse_softmax_cross_entropy(
                labels=tf.reshape(self.y_p_pph, shape=[-1]),
                logits=logits_pph
            )
            #------------------------------------------------------------------------------------

            #---------------------------------------IPH------------------------------------------
            # embeded inputs:[batch_size,MAX_TIME_STPES,embedding_size]
            inputs_iph = tf.nn.embedding_lookup(params=self.embeddings, ids=self.X_p_iph, name="embeded_input_iph")
            # shape of inputs[batch_size,max_time_stpes,embeddings_dims+class_num]
            inputs_iph = tf.concat(values=[inputs_iph, pred_normal_one_hot_pph], axis=2, name="inputs_pph")
            print("shape of input_pph:", inputs_pph.shape)
            # encoder cells
            # forward part
            en_lstm_forward1_iph = rnn.BasicLSTMCell(num_units=self.hidden_units_num)
            # en_lstm_forward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            # en_lstm_forward=rnn.MultiRNNCell(cells=[en_lstm_forward1,en_lstm_forward2])

            # backward part
            en_lstm_backward1_iph = rnn.BasicLSTMCell(num_units=self.hidden_units_num)
            # en_lstm_backward2=rnn.BasicLSTMCell(num_units=self.hidden_units_num2)
            # en_lstm_backward=rnn.MultiRNNCell(cells=[en_lstm_backward1,en_lstm_backward2])

            # decoder cells
            de_lstm_iph = rnn.BasicLSTMCell(num_units=self.hidden_units_num)

            # encode
            encoder_outputs_iph, encoder_states_iph = self.encoder(
                cell_forward=en_lstm_forward1_iph,
                cell_backward=en_lstm_backward1_iph,
                inputs=inputs_iph,
                scope_name="en_lstm_iph"
            )
            # shape of h is [batch*time_steps,hidden_units]
            h_iph = self.decoder(
                cell=de_lstm_iph,
                initial_state=encoder_states_iph,
                inputs=encoder_outputs_iph,
                scope_name="de_lstm_iph"
            )

            # fully connect layer(projection)
            w_iph = tf.Variable(
                initial_value=tf.random_normal(shape=(self.hidden_units_num2, self.class_num)),
                name="weights_iph"
            )
            b_iph = tf.Variable(
                initial_value=tf.random_normal(shape=(self.class_num,)),
                name="bias_iph"
            )
            logits_iph = tf.matmul(h_iph, w_iph) + b_iph  # shape of logits:[batch_size*max_time, 5]

            # prediction
            # shape of pred[batch_size*max_time, 1]
            pred_iph = tf.cast(tf.argmax(logits_iph, 1), tf.int32, name="pred_iph")

            # pred in an normal way,shape is [batch_size, max_time,1]
            pred_normal_iph = tf.reshape(
                tensor=pred_iph,
                shape=(-1, self.max_sentence_size),
                name="pred_normal"
            )

            # one-hot the pred_normal:[batch_size, max_time,class_num]
            pred_normal_one_hot_iph = tf.one_hot(
                indices=pred_normal_iph,
                depth=self.class_num,
                name="pred_normal_one_hot_iph"
            )

            # loss
            self.loss_iph = tf.losses.sparse_softmax_cross_entropy(
                labels=tf.reshape(self.y_p_iph, shape=[-1]),
                logits=logits_iph
            )

            #---------------------------------------------------------------------------------------
            '''
            #loss
            self.loss = self.loss_pw  #+self.loss_pph+self.loss_iph
            #optimizer
            self.optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate).minimize(self.loss)
            self.init_op = tf.global_variables_initializer()
            self.init_local_op = tf.local_variables_initializer()

        #------------------------------------Session-----------------------------------------
        with self.session as sess:
            print("Training Start")
            sess.run(self.init_op)  # initialize all variables
            sess.run(self.init_local_op)

            train_Size = X_train_pw.shape[0]
            validation_Size = X_validation_pw.shape[0]
            best_validation_loss = 0  # best validation accuracy in training process

            #epoch
            for epoch in range(1, self.max_epoch + 1):
                print("Epoch:", epoch)
                start_time = time.time()  # time evaluation
                # training loss/accuracy in every mini-batch
                train_losses = []
                train_accus_pw = []
                train_accus_pph = []
                train_accus_iph = []

                c1_f_pw = []
                c2_f_pw = []  # each class's f1 score
                c1_f_pph = []
                c2_f_pph = []
                c1_f_iph = []
                c2_f_iph = []

                # mini batch
                for i in range(0, (train_Size // self.batch_size)):
                    _, train_loss, train_pred_pw = sess.run(
                        fetches=[self.optimizer, self.loss, pred_pw],
                        feed_dict={
                            self.X_p_pw:
                            X_train_pw[i * self.batch_size:(i + 1) *
                                       self.batch_size],
                            self.y_p_pw:
                            y_train_pw[i * self.batch_size:(i + 1) *
                                       self.batch_size],
                        })

                    # loss
                    train_losses.append(train_loss)
                    # metrics
                    # pw
                    accuracy_pw, f1_1_pw, f1_2_pw = util.eval(
                        y_true=np.reshape(
                            y_train_pw[i * self.batch_size:(i + 1) *
                                       self.batch_size], [-1]),
                        y_pred=train_pred_pw)
                    print("f1_score of N:", f1_1_pw)
                    print("f1_score of B:", f1_2_pw)
                    print()

                    #c1_f_pw.append(f1_1_pw);
                    #c2_f_pw.append(f1_2_pw)
                '''