Пример #1
0
    def __classifict(self, feature, sentences, incr=False):
        if isinstance(sentences, basestring):
            sentences = [sentences]

        # 获得主客观分类器
        feature.subjective = False
        objective_clf = get_objective_classification(feature)

        # 主客观部分
        test_datas_objective, c_true_objective, danger_index_objective = feature.get_key_words(
            sentences)

        test_objective = test_datas_objective
        if not sp.issparse(test_datas_objective):
            test_objective = feature.cal_weight_improve(
                test_datas_objective, c_true_objective)

        c_pred_objective = objective_clf.predict(test_objective)

        # 获得情绪分类器
        feature.subjective = True
        emotion_clf = get_emotion_classification(feature, incr=incr)

        # 情绪部分
        test_datas, c_true, danger_index = feature.get_key_words(sentences)

        test = test_datas
        if not sp.issparse(test_datas):
            test = feature.cal_weight_improve(test_datas, c_true)

        c_pred = []
        for i in range(len(sentences)):
            if i not in danger_index_objective and i not in danger_index:
                before_i_in_danger_obj = np.sum(
                    np.asarray(danger_index_objective) < i)
                before_i_in_danger_ = np.sum(np.asarray(danger_index) < i)

                c = emotion_clf.predict(test[i - before_i_in_danger_])[0] if c_pred_objective[i - before_i_in_danger_obj] == "Y" \
                    else c_pred_objective[i - before_i_in_danger_obj]
                c_pred.append(c)
            else:
                c_pred.append("none(insufficient key_words)")

        return c_pred
Пример #2
0
    def __classifict(self, feature, sentences, incr=False):
        if isinstance(sentences, basestring):
            sentences = [sentences]

        # 获得主客观分类器
        feature.subjective = False
        objective_clf = get_objective_classification(feature)

        # 主客观部分
        test_datas_objective, c_true_objective, danger_index_objective = feature.get_key_words(sentences)

        test_objective = test_datas_objective
        if not sp.issparse(test_datas_objective):
            test_objective = feature.cal_weight_improve(test_datas_objective, c_true_objective)

        c_pred_objective = objective_clf.predict(test_objective)

        # 获得情绪分类器
        feature.subjective = True
        emotion_clf = get_emotion_classification(feature, incr=incr)

        # 情绪部分
        test_datas, c_true, danger_index = feature.get_key_words(sentences)

        test = test_datas
        if not sp.issparse(test_datas):
            test = feature.cal_weight_improve(test_datas, c_true)

        c_pred = []
        for i in range(len(sentences)):
            if i not in danger_index_objective and i not in danger_index:
                before_i_in_danger_obj = np.sum(np.asarray(danger_index_objective) < i)
                before_i_in_danger_ = np.sum(np.asarray(danger_index) < i)

                c = emotion_clf.predict(test[i - before_i_in_danger_])[0] if c_pred_objective[i - before_i_in_danger_obj] == "Y" \
                    else c_pred_objective[i - before_i_in_danger_obj]
                c_pred.append(c)
            else:
                c_pred.append("none(insufficient key_words)")

        return c_pred
Пример #3
0
def classifict(feature, sentences, incr=False, out=False):
    if isinstance(sentences, basestring):
        sentences = [sentences]

    # 获得主客观分类器
    feature.subjective = False
    objective_clf = get_objective_classification(feature)

    # 测试集
    # 主客观部分
    test_datas_objective, c_true_objective, danger_index_objective = feature.get_key_words(sentences)

    test_objective = test_datas_objective
    if not sp.issparse(test_datas_objective):
        test_objective = feature.cal_weight_improve(test_datas_objective, c_true_objective)

    c_pred_objective = objective_clf.predict(test_objective)

    # 获得情绪分类器
    feature.subjective = True
    emotion_clf = get_emotion_classification(feature, incr=incr)

    # 测试集
    # 情绪部分
    test_datas, c_true, danger_index = feature.get_key_words(sentences)

    test = test_datas
    if not sp.issparse(test_datas):
        test = feature.cal_weight_improve(test_datas, c_true)

    c_pred = []
    for i in range(len(sentences)):
        if i not in danger_index_objective and i not in danger_index:
            before_i_in_danger_obj = np.sum(np.asarray(danger_index_objective) < i)
            before_i_in_danger_ = np.sum(np.asarray(danger_index) < i)

            c = emotion_clf.predict(test[i - before_i_in_danger_])[0] if c_pred_objective[i - before_i_in_danger_obj] == "Y"\
                else c_pred_objective[i - before_i_in_danger_obj]
            c_pred.append(c)

    if out:
        dir_ = os.path.join(OUT_BASE_URL, "out0")
        FileUtil.mkdirs(dir_)
        current = time.strftime('%Y-%m-%d %H:%M:%S')
        o = os.path.join(dir_, current + ".xml")

        with open(o, "w") as fp:
            for i, s in enumerate(sentences):
                if i not in danger_index_objective and i not in danger_index:
                    before_i_in_danger_obj = np.sum(np.asarray(danger_index_objective) < i)
                    before_i_in_danger_ = np.sum(np.asarray(danger_index) < i)
                    fp.write(
                        """<weibo emotion-type="%s">
    <sentence emotion-1-type="%s" emotion-2-type="none" emotion-tag="%s">
        %s
    </sentence>
</weibo>
""" % (c_pred[i - before_i_in_danger_], c_pred[i - before_i_in_danger_], "N" if c_pred_objective[i - before_i_in_danger_obj] == "N" else "Y", s))
                else:
                    fp.write(
                        """<weibo emotion-type="%s">
    <sentence emotion-1-type="%s" emotion-2-type="none" emotion-tag="%s">
        %s
    </sentence>
</weibo>
""" % ("None", "None", "N", s + "\n Can't recognize because it has insufficient key_words"))

    else:
        print c_pred