예제 #1
0
def parse_content_html(raw):
    pattern = re.compile(r"(?s)<div class=TRS_Editor>(.*?)<\/div>")
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"累计.*?确诊病例.*?>(\d+)<.*?例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r"治愈出院.*?>(\d+)<.*?例")
    hm = pattern_heal.search(content)
    if hm is not None:
        province.Healed = int(hm.groups()[0])

    city = {}
    pattern_data = re.compile(r"[,、]([\u4E00-\u9FA5]+).*?>(\d+)<.*?例")
    for i in pattern_data.finditer(content[content.rfind("确诊病例中"):]):
        name = utils.remove_preposition(i.groups()[0])
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
    return province, city
예제 #2
0
def parse_content_html(raw):
    pattern = re.compile(r"(?s)<!--content begin -->(.*)<!--content end -->")
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"累计确诊.*?(\d+)例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r"出院(\d+)例")
    hm = pattern_heal.search(content)
    if hm is not None:
        province.Healed = int(hm.groups()[0])
    pattern_dead = re.compile(r"死亡(\d+)例")
    dm = pattern_dead.search(content)
    if dm is not None:
        province.Dead = int(dm.groups()[0])

    city = {}
    pattern_data = re.compile(r"[。,、]([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(content[content.rfind("累计确诊"):]):
        name = utils.remove_preposition(i.groups()[0])
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
    return province, city
예제 #3
0
def parse_content_html(raw):
    pattern = re.compile(r"(?s)<div class=\"ze-art\" style=\"width: 100%;\">(.*)<\/div>")
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"累计.*?确诊.*?(\d+)例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r"累计治愈出院(\d+)例")
    hm = pattern_heal.search(content)
    if hm is not None:
        province.Healed = int(hm.groups()[0])

    city = {}
    pattern_data = re.compile(r"([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(content[content.rfind("累计报告"):content.rfind("累计治愈")]):
        name = utils.remove_preposition(i.groups()[0])
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
    return province, city
예제 #4
0
def parse_content_html(raw):
    pattern = re.compile(
        r"(?s)<!------------------------- mian开始 ------------------------->(.*)<!--------责任编辑相关---------->"
    )
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"累计.*?确诊.*?(\d+)例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r",出院病例(\d+)例")
    for hm in pattern_heal.finditer(content):
        province.Healed = int(hm.groups()[0])

    city = {}
    pattern_data = re.compile(r"[\.\u200b]([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(
            content[content.rfind("累计报告"):content.rfind("重症病例")]):
        name = utils.remove_preposition(i.groups()[0])
        if '盟' in name[:-1]:
            name = name[:name.find('盟') + 1]
        if '市' in name[:-1]:
            name = name[:name.find('市') + 1]
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
            else:
                city[id].Confirmed += int(i.groups()[1])
    return province, city
예제 #5
0
def parse_content_html(raw):
    pattern = re.compile(r"<p>(.*?累计.*?)<\/p>")
    m = pattern.search(raw)
    content = m.groups()[0]

    province = ProvinceData(provinceName, provinceKey)
    pattern_confirm = re.compile(r"确诊病例(\d+)例")
    cm = pattern_confirm.search(content)
    if cm is not None:
        province.Confirmed = int(cm.groups()[0])
    pattern_heal = re.compile(r"治愈出院(\d+)例")
    hm = pattern_heal.search(content)
    if hm is not None:
        province.Healed = int(hm.groups()[0])
    pattern_dead = re.compile(r"死亡(\d+)例(([\u4E00-\u9FA5]+))")
    dm = pattern_dead.search(content)
    dc = {}
    if dm is not None:
        province.Dead = int(dm.groups()[0])
        dc[dm.groups()[1]] = int(dm.groups()[0])  # SPECIAL HANDLE

    city = {}
    pattern_data = re.compile(r"[(,、。]([\u4E00-\u9FA5]+)(\d+)例")
    for i in pattern_data.finditer(content):
        name = utils.remove_preposition(i.groups()[0])
        if name in alia_cities.keys():
            name = alia_cities[name]
        if name in cities.keys():
            id = cities[name]
            if id not in city.keys():
                d = CityData(name, id)
                d.Confirmed = int(i.groups()[1])
                city[id] = d
            else:
                city[id].Confirmed += int(i.groups()[1])
    # SPECIAL HANDLE
    for dd in dc.items():
        if dd[0] in cities.keys():
            id = cities[dd[0]]
            if id not in city.keys():
                d = CityData(dd[0], id)
                d.Confirmed = dd[1]
                city[id] = d
            else:
                city[id].Confirmed += dd[1]
    return province, city
예제 #6
0
 def test_remove_preposition(self):
     s = '其中广州市'
     self.assertEqual(utils.remove_preposition(s), "广州市")