Python IdentArea примеры использования

Язык программирования: Python

Пространство имен/Пакет: wytool.utils.area

Класс/Тип: IdentArea

Примеров на hotexamples.com: 2

Python IdentArea - 2 примера найдено. Это лучшие примеры Python кода для wytool.utils.area.IdentArea, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ident(1)

set_area(1)

Пример #1

Показать файл

Файл: merge.py Проект: SimpleAnalysis/devel

    def on_init(self):
        """ 初始化
        """

        self.cause_of_action_id = {}
        self.cause_of_actions = []
        engine = create_engine(DB_URL, echo=False)

        with engine.connect() as conn:
            old_anyou = conn.execute(
                "select anyou_name,new_id,max(level) from ot_judge_anyou_old group by anyou_name").fetchall()

            new_anyou = conn.execute(
                "select anyou_name,id,max(level) from ot_judge_anyou group by anyou_name").fetchall()

            self.replace_keywords = dict(conn.execute(
                "select keyword,keyword_replace from ot_judge_keyword_filter").fetchall()
            )

        self.cause_of_action_id.update(dict((x[0], x[1]) for x in old_anyou))
        self.cause_of_action_id.update(dict((x[0], x[1]) for x in new_anyou))

        self.cause_of_actions.extend(
            sorted(set(x[0] for x in new_anyou), lambda x, y: len(y) - len(x)))
        self.cause_of_actions.extend(
            sorted(set(x[0] for x in old_anyou), lambda x, y: len(y) - len(x)))

        # 获取地区识别模块
        self.area_parse = IdentArea('192.168.3.234', 7779, 'area')

        dbarg = {"name": "uc_area", "sql_host": "192.168.1.216", "sql_user": "******",
                 "sql_db": "user_cloud_db", "sql_pass": "******", "sql_port": 57789}

        sql = 'select id,  province, city,country, grade from uc_area'

        res, desc = dbtool.exec_sql(dbarg, sql)

        """
        self.area_parse.set_areas(
            [(result['id'], result['province'], result['city'],
            result['country'])
            for result in desc])
        """
        for result in desc:
            if result['grade'] == 2:
                self.area_parse.set_area(
                    result['country'], result['id'], 2, result['city'])
            if result['grade'] == 1:
                self.area_parse.set_area(
                    result['city'], result['id'], 1, result['province'])
            if result['grade'] == 0:
                self.area_parse.set_area(
                    result['province'], result['id'], 0, '')

Пример #2

Показать файл

Файл: merge.py Проект: SimpleAnalysis/devel

class CourtGovCnMergeProcesserLawyer(JudgmentProcesser):

    """ 按好律师表中的律师名单抽取到正式库 """

    from_warehouses = (ot_rawdata_judgement_court_gov_cn_old,)
    to_warehouses = (ot_judge_base,)

    sql_ml = 'select parent_id from ot_judge_base where come_from="CourtGovCnMergeProcesserLawyer" order by parent_id desc limit 1'

    startid = DATAS

    case_mode = (u'民事判决书', u'民事调解书', u'仲裁裁决书', u'仲裁调解书', u'刑事判决书', u'行政判决书',
                 u"民事裁定书", u"国家赔偿决定书",  u"刑事裁定书", u"刑事附带民事判决书", u"刑事附带民事裁定书",
                 u"刑事附带民事调解书", u"刑事再审判决书", u"强制医疗决定书", u"行政裁定书", u"行政附带民事判决书",
                 u"行政附带民事调解书", u"行政赔偿调解书")

    def on_init(self):
        """ 初始化
        """

        self.cause_of_action_id = {}
        self.cause_of_actions = []
        engine = create_engine(DB_URL, echo=False)

        with engine.connect() as conn:
            old_anyou = conn.execute(
                "select anyou_name,new_id,max(level) from ot_judge_anyou_old group by anyou_name").fetchall()

            new_anyou = conn.execute(
                "select anyou_name,id,max(level) from ot_judge_anyou group by anyou_name").fetchall()

            self.replace_keywords = dict(conn.execute(
                "select keyword,keyword_replace from ot_judge_keyword_filter").fetchall()
            )

        self.cause_of_action_id.update(dict((x[0], x[1]) for x in old_anyou))
        self.cause_of_action_id.update(dict((x[0], x[1]) for x in new_anyou))

        self.cause_of_actions.extend(
            sorted(set(x[0] for x in new_anyou), lambda x, y: len(y) - len(x)))
        self.cause_of_actions.extend(
            sorted(set(x[0] for x in old_anyou), lambda x, y: len(y) - len(x)))

        # 获取地区识别模块
        self.area_parse = IdentArea('192.168.3.234', 7779, 'area')

        dbarg = {"name": "uc_area", "sql_host": "192.168.1.216", "sql_user": "******",
                 "sql_db": "user_cloud_db", "sql_pass": "******", "sql_port": 57789}

        sql = 'select id,  province, city,country, grade from uc_area'

        res, desc = dbtool.exec_sql(dbarg, sql)

        """
        self.area_parse.set_areas(
            [(result['id'], result['province'], result['city'],
            result['country'])
            for result in desc])
        """
        for result in desc:
            if result['grade'] == 2:
                self.area_parse.set_area(
                    result['country'], result['id'], 2, result['city'])
            if result['grade'] == 1:
                self.area_parse.set_area(
                    result['city'], result['id'], 1, result['province'])
            if result['grade'] == 0:
                self.area_parse.set_area(
                    result['province'], result['id'], 0, '')

    def check_lawyer(self,  old):
        """ 查找该裁判文书中的律师是否要匹配的 """

        #import pdb
        # pdb.set_trace()

        # 该裁判文书中的所有人姓名
        peoples = bamboo.people_name(old.content)

        pp = False
        name = ""
        for k in old.lawyers_attr.keys():
            # {原告:[(name, firm),(...)]， 被告: ...}
            if not old.lawyers_attr[k]:
                continue
            for v in old.lawyers_attr[k]:
                if v[0] in DATAS[0]:
                    print v[0].encode("utf8")
                    pp = True
                    name = v[0]
                    return (pp, name)
        if not pp:
            for lawyer in DATAS[0]:
                # 先全文模糊查找该律师名是否存在, 若存在则逐行查找, 不存在则退出此次循环, 换一律师
                if lawyer in peoples:
                    # 过滤不存在律师名的行
                    lines = filter(
                        lambda line: lawyer in line, old.content.split('\n'))
                    for line in lines:
                        num = line.index(lawyer)
                        # 律师xx  xx律师
                        if u"律师" in line[num - 2:num] or u"律师" in line[len(lawyer) + num:len(lawyer) + num + 2]:
                            print lawyer.encode("utf8")
                            pp = True
                            name = lawyer
                            return (pp, name)
                        for item in LAWYER_TYPE:
                            if (item in line) and (lawyer in line[line.index(item) + len(item):line.index(item) + len(item) + len(lawyer)]):
                                print lawyer.encode("utf8")
                                pp = True
                                name = lawyer
                                return (pp, name)
                else:
                    continue
        return (pp, name)

    def to_ot_judge_base(self, old):
        """ 抽取字段 """

        #import pdb
        # pdb.set_trace()

        print old.url

        # if self.p_type and old.id > self.pages:
        #    os._exit(0)

        if MODLE == "CourtGovCnMergeProcesserLawyer":
            data = {"max": old.id, "up": DATAS[2]}
            id_or_name(fname="./keyword/id.json", mode="write", indata=data)

        if MODLE == "CourtGovCnMergeProcesserLawyerUp":
            if DATAS[1] < old.id:
                print "更新模式父id已大于普通模式下的父id, 请使用非更新模式进行抽取!"
                os._exit(0)
            elif DATAS[1] >= old.id:
                data = {"max": DATAS[1], "up": old.id}
                id_or_name(
                    fname="./keyword/id.json", mode="write", indata=data)

        # 如果此id已抽取过, 则直接返回
        if old.id in OLD_ID:
            return

        for attr in ('content', 'case_sign', 'case_type', 'department', 'end_date'):
            if getattr(old, attr) is None:
                return

        # 只分析以下类型的裁判文书
        if old.case_type not in self.case_mode:
            return

        #import pdb
        # pdb.set_trace()
        # 如果没有匹配的律师, 则直接返回
        result = self.check_lawyer(old)
        if not result[0]:
            return
        else:
            OLD_ID.add(old.id)
            data = {"id": list(OLD_ID)}
            id_or_name(fname="./keyword/oldid.json", mode="write", indata=data)

        new = ot_judge_base()

        new.content = '<p>' + '</p><p>'.join(old.content.split('\n')) + '</p>'
        new.content_md5 = md5(new.content.encode('utf8')).hexdigest()
        new.case_sign = '<p>' + \
            '</p><p>'.join(old.case_sign.split('\n')) + '</p>'

        new.case_type = old.case_type

        for item in TYPE_ID:
            if item["name"] == new.case_type.encode("gbk"):
                new.case_type_id = item["value"]
                break

        new.type = new.case_type[:-3]

        # 如果是仲裁,那属于民事
        if new.type == u'仲裁':
            new.type = u'民事'

        new.case_number = old.case_number

        new.title = re.split("\n", old.content_all)[0].strip()

        ff = False
        title = new.title
        # 先把标题中裁判文书类型文字去掉
        for item in self.case_mode:
            if item in title:
                title = title.replace(item, "")
        for ca in self.cause_of_actions:
            if ca in title:
                new.anyou = ca
                ff = True
                break
        if not ff:
            for ca in self.cause_of_actions:
                if ca in old.content_all:
                    new.anyou = ca
                    break
                else:
                    return
        new.anyou_id = self.cause_of_action_id[new.anyou]

        new.department = old.department

        new.chief_judge = old.chief_judge

        new.judge = old.judge

        new.acting_judges = old.acting_judges

        new.clerk = old.clerk

        new.plaintiff = ';'.join(
            u"%s:%s:%s" % client for client in old.clients_attr[u'原告'])
        new.plaintiff_lawyers = ';'.join(
            u"%s:%s" % lawyer for lawyer in old.lawyers_attr[u'原告'])

        new.defendant = ';'.join(
            u"%s:%s:%s" % client for client in old.clients_attr[u'被告'])
        new.defendant_lawyers = ';'.join(
            u"%s:%s" % lawyer for lawyer in old.lawyers_attr[u'被告'])

        new.procedure = old.procedure

        new.end_date = arrow.get(old.end_date, 'Asia/Shanghai').timestamp

        # 分析地区
        area = self.area_parse.ident(new.department.encode('gbk'))
        if area:
            new.areacode = area['areano']

        new.url = old.referer

        #import pdb
        # pdb.set_trace()
        # 把某某都去掉
        # new.replace_data = json.dumps(
        #    {k: v for k, v in old.replace_data.iteritems() if not re.match(ur'.*(某|X|x|\*).*', k)}
        #)

        dic = {}
        for k, v in old.replace_data.iteritems():
            if not re.match(ur".*(某|X|x|\*).*", k):
                dic.update({k: v})
        new.replace_data = json.dumps(dic)

        new.input_time = arrow.now().timestamp
        new._MASK_SYNC_V2 = datetime.now()
        new.from_host = urlparse(old.referer).hostname

        print "OK"
        # 更新该律师的裁判文书数
        if MODLE <> "CourtGovCnMake":
            sql_up = "update user_lawyer_main set count=count+1, update_datetime=%d where name='%s'" \
                % (arrow.now().timestamp, result[1].encode("utf8"))
            local_lawyers("insert", sql_up)

        return new