Exemplo n.º 1
0
#: 载入案由配置以及地区配置
from configure import anyou_replace, Area_duct, case_mode, OFFICE
#from StringIO import StringIO
#from hashlib import md5
from configure import PROXY
#: 载入基础模块
import time
import json
import re
import arrow
import sys
import requests

reload(sys)
sys.setdefaultencoding('utf-8')
actions_id, actions, replace, anyou_alias = anyou_replace('Judgment')
area = Area_duct()


class JudgmentAnalysis(Process):

    def to_ot_rawdata_judgement_court_gov_cn_old(self, old, todat):

        new = ot_rawdata_judgement_court_gov_cn_old()
        new.url = old.url
        new.referer = old.url

        analy = Analyse()
        try:
            raw_html = XPath(old.source_data).execute(
                '//*[@id="ws"]/table')[0].to_html()
Exemplo n.º 2
0
#: from pybamboo import Bamboo

#: 载入基础模块
import time
import json
import sys
import arrow
import re

reload(sys)
sys.setdefaultencoding('utf-8')


#: 定制全局变量
AREA = Area_duct()
ACTION_ID, ACTIONS, REPLACE, ANYOU_ALIAS = anyou_replace('Judgment')


class JudgmentAnalysis(Process):

    """继承程序运行模块,开始对任务进行分析


    """

    def fuzzy_analyse(self, old_data):
        "分析方法,开始对数据进行分析"
        analy = Analyse()
        analy.text = old_data.content_all

        _header, part, content, case_sign = analy.split_to_four_parts()
Exemplo n.º 3
0
def insert_base(old, Update_id=0):
    area = Area_duct()
    actions_id, actions, replace, anyou_alias = anyou_replace("Judgment")
    new = ot_judge_base()
    for attr in ("content", "case_sign", "case_type", "department", "end_date"):
        if getattr(old, attr) is None:
            print >> __output__, u"【提示】%s 字段为空,请检查数据" % attr
            return
    # if old.case_type not in self.case_mode:
    #    return
    for mode in case_mode:
        if mode in old.case_type:
            new.case_type = mode
    if not new.case_type:
        print >> __output__, u"【提示】文书字号为空"
        return
    if old.content == "":
        print >> __output__, "u你所访问的数据为空"
    new.content = "<p>" + "</p><p>".join(old.content.split("\n")) + "</p>"
    new.content_md5 = md5(new.content.encode("utf8")).hexdigest()
    new.case_sign = "<p>" + "</p><p>".join(old.case_sign.split("\n")) + "</p>"
    new.case_number = old.case_number

    new.type = new.case_type[:-3]
    #: 如果是仲裁,那属于民事
    if new.type == u"仲裁":
        new.type = u"民事"
    new.title = old.title
    if not new.title:
        new.title = old.content_all.split("\n")[0]

    Pules = {}
    #: 更新案由信息

    if new.type == u"行政":
        anyou = filter(lambda x: x in new.title, actions)
        if not anyou:
            anyou = filter(lambda x: x in old.content.split("\n")[0], actions)
            if anyou:
                anyou = anyou[0]
        else:
            for item in anyou:
                Pules[len(item)] = item
    else:
        anyou = filter(lambda x: x in old.content.split("\n")[0], actions)
        if not anyou:
            anyou = filter(lambda x: x in new.title, actions)
            for item in anyou:
                Pules[len(item)] = item
        else:
            anyou = anyou[0]
    if Pules:
        anyou = Pules[max(Pules)]

    new.anyou_id = actions_id[anyou.strip()]
    new.anyou = anyou
    # new.anyou_id = actions_id[new.anyou]

    new.department = old.department

    new.chief_judge = old.chief_judge

    new.judge = old.judge

    if old.acting_judges:
        new.acting_judges = old.acting_judges
    else:
        new.acting_judges = "无"

    new.clerk = old.clerk

    new.plaintiff = ";".join(u"%s:%s:%s" % client for client in old.clients_attr[u"原告"])
    new.plaintiff_lawyers = ";".join(u"%s:%s" % lawyer for lawyer in old.lawyers_attr[u"原告"])
    print >> __plai_people__, u"原告:%s, 律师:%s" % (new.plaintiff, new.plaintiff_lawyers)
    new.defendant = ";".join(u"%s:%s:%s" % client for client in old.clients_attr[u"被告"])
    new.defendant_lawyers = ";".join(u"%s:%s" % lawyer for lawyer in old.lawyers_attr[u"被告"])
    print >> __defan_people__, u"被告:%s, 律师:%s" % (new.defendant, new.defendant_lawyers)
    new.procedure = old.procedure

    new.end_date = arrow.get(old.end_date, "Asia/Shanghai").timestamp

    # 分析地区
    area_item = area.ident(new.department.encode("gbk"))

    if area_item and area_item.get("staut") == "timed out":
        new.areacode = area_item["areano"]

    new.url = old.referer

    print >> __infomation__, old.case_sign
    new.replace_data = json.dumps(old.replace_data)
    # dic = {}
    # for k, v in old.replace_data.iteritems():
    #    if not re.match(ur".*(某|X|x|\*).*", k):
    #        dic.update({k:v})
    # new.replace_data = json.dumps(dic)
    new.input_time = arrow.now().timestamp
    if (not new.chief_judge and not new.judge and not new.acting_judges.strip()) or (
        u"事务所" not in new.plaintiff_lawyers and u"事务所" not in new.defendant_lawyers
    ):
        print >> __output__, u"不存在事务所或者署名信息"
        return
    print old.id
    if Update_id != 0:
        new.id = Update_id
        new.url = old.url
        point = insert_database("Judgment", tablename=ot_judge_base, editor=new)
        point.update()
    else:
        point = insert_database("Judgment", tablename=ot_judge_base, editor=new)
        point.insert()