示例#1
0
def analyzeInfo_one(item):
    result = {}
    a_title = item.find_all('a')
    result['disasterid'] = '10107'  #类别:暴雨
    result['link'] = 'http://www.cibeicn.com' + a_title[0]['href']  # 新闻链接
    source = get_source(result['link'])
    result['source'] = re.findall(r'来源:(.+)', source)[0]  #新闻来源
    result['originalText'] = get_original(result['link'])  # 新闻原文
    release = get_releaseTime(result['link'])
    time_str1 = re.sub("\D", "", release)
    datetime_struct1 = parser.parse(time_str1)
    releaseTime = datetime_struct1.strftime('%Y-%m-%d %H:%M:%S')
    result['releaseTime'] = releaseTime  # 发布时间
    strong_info_list = item.find('strong')
    if strong_info_list == None:
        a_info_list = a_title[0].get_text().strip()
        result['title'] = a_info_list  # 标题
    else:
        result['title'] = strong_info_list.get_text().strip()
    originalText = result['title'] + ',' + result['originalText']
    latlngadd_tuple = address.placeMany(originalText)
    result['place'] = latlngadd_tuple[0]  #发生地点
    result['longitude'] = str(latlngadd_tuple[1])  #地点经度
    result['latitude'] = str(latlngadd_tuple[2])  #地点纬度
    result['strength'] = ''  #灾害强度
    result['occurTime'] = result['releaseTime']  #发生时间
    death = toYc.death(originalText)
    injured = toYc.Injured(originalText)
    lossNumber = toYc.loss(originalText)
    result['loss'] = str(lossNumber)  #经济损失
    result['injured'] = str(injured)  #受伤人数
    result['death'] = str(death)  #死亡人数
    result['pictures'] = ''  #多个路径之间用分号隔开
    result['more'] = ''  #特殊字段
    result['regional'] = '国内'
    result['province'] = latlngadd_tuple[3]  #灾害发生的一级行政区划
    result['country'] = latlngadd_tuple[4]  #灾害发生国家
    result['current_website'] = '防灾网'  #灾害当前网站
    result['isreleasetime'] = '1'  #灾害发生时间是否是用发布时间代替

    resultSun = {}
    resultSun['title'] = result['title']
    resultSun['originalText'] = result['originalText']
    resultSun['pictures'] = result['pictures']

    try:
        title = 'rainstorm_ZH001'
        res = postgreCommand.insertData(result, resultSun, title)
        if res == 1:
            print(title, '数据插入成功!')
        elif res == 0:
            print(title, '数据更新成功!')
    except Exception as e:
        print("插入数据失败", str(e))
示例#2
0
def analyzeInfo(item):
    result = {}
    h3_list = item.find_all('h3', limit=1)
    a_list = h3_list[0].find_all('a', limit=1)
    div_list = item.find('div', attrs={'class': 'c-summary c-row '
                                       })  #.find('p').get_text().split()
    p_list = div_list.find('p').get_text().split()
    time_str = re.sub("\D", "", p_list[1] + p_list[2])
    datetime_struct1 = parser.parse(time_str)
    releaseTime = datetime_struct1.strftime('%Y-%m-%d %H:%M:%S')
    result['link'] = a_list[0]['href']  #新闻链接
    result['title'] = a_list[0].get_text().strip()  #新闻标题
    result['releaseTime'] = releaseTime  #发布时间
    result['disasterid'] = '10201'  #灾害类型
    originalList = get_original(result['link'])
    result['source'] = originalList[0]  #新闻来源
    result['originalText'] = originalList[1]  #新闻原文
    result['pictures'] = originalList[2]  #新闻图片
    originalText = result['title'] + ',' + result['originalText']
    latlngadd_tuple = address.placeMany(originalText)
    result['place'] = latlngadd_tuple[0]  #发生地点
    result['longitude'] = str(latlngadd_tuple[1])  #地点经度
    result['latitude'] = str(latlngadd_tuple[2])  #地点纬度
    death = toYc.death(originalText)
    injured = toYc.Injured(originalText)
    lossNumber = toYc.loss(originalText)
    result['loss'] = str(lossNumber)  #经济损失
    result['injured'] = str(injured)  #受伤人数
    result['death'] = str(death)  #死亡人数
    result['province'] = latlngadd_tuple[3]  #灾害发生的一级行政区划
    result['country'] = latlngadd_tuple[4]  #灾害发生国家
    result['strength'] = ''
    result['occurTime'] = result['releaseTime']  #多个路径之间用分号隔开
    result['more'] = ''  #特殊字段
    result['regional'] = '国内'  #新闻发布地区                               #灾害发生国家
    result['current_website'] = '百度新闻'  #灾害当前网站
    result['isreleasetime'] = '1'  #灾害发生时间是否是用发布时间代替
    result['isrellonandlat'] = '0'
    resultSun = {}
    resultSun['title'] = result['title']
    resultSun['originalText'] = result['originalText']
    resultSun['pictures'] = result['pictures']
    try:
        title = 'stormSurge_ZH002'
        res = postgreCommand.insertData(result, resultSun, title)
        if res == 1:
            print(title, '数据插入成功!')
        elif res == 0:
            print(title, '数据更新成功!')
    except Exception as e:
        print("插入数据失败", str(e))
示例#3
0
def analyzeInfo(item):
    result = {}
    a_list = item.find_all('a', limit=1)
    h3_list = item.find_all('h3', attrs={'class': 'tit'}, limit=1)
    span_link_list = h3_list[0].find_all('span', limit=1)
    div_list = item.find_all('div', attrs={'class': 'src-tim'}, limit=1)
    span2_list = div_list[0].find_all('span', attrs={'class': 'tim'}, limit=1)
    time_str = re.sub('\D', "", span2_list[0].get_text().strip())
    datetime_struct = parser.parse(time_str)
    releaseTime = datetime_struct.strftime('%Y-%m-%d %H:%M:%S')
    result['disasterid'] = '10201'  #新闻类别
    result['link'] = span_link_list[0]['lanmu1']  #新闻链接
    result['title'] = a_list[0].get_text().strip()  #新闻标题
    result['releaseTime'] = releaseTime  #发布时间
    analyze = analyzeInfoSun(result['link'])
    result['source'] = analyze[1]  #新闻来源
    result['originalText'] = analyze[0]  #新闻原文
    originalText = result['title'] + ',' + result['originalText']
    latlngadd_tuple = address.placeMany(originalText)
    result['place'] = latlngadd_tuple[0]  #发生地点
    result['longitude'] = str(latlngadd_tuple[1])  #地点经度
    result['latitude'] = str(latlngadd_tuple[2])  #地点纬度
    result['strength'] = ''  #灾害强度
    result['occurTime'] = result['releaseTime']  #发生时间
    death = toYc.death(originalText)
    injured = toYc.Injured(originalText)
    lossNumber = toYc.loss(originalText)
    result['loss'] = str(lossNumber)  #经济损失
    result['injured'] = str(injured)  #受伤人数
    result['death'] = str(death)  #死亡人数
    result['pictures'] = analyze[2]  #多个路径之间用分号隔开
    result['more'] = ''  #特殊字段
    result['regional'] = '国内'
    result['province'] = latlngadd_tuple[3]  #灾害发生的一级行政区划
    result['country'] = latlngadd_tuple[4]  #灾害发生国家
    result['current_website'] = '央视网'  #灾害当前网站
    result['isreleasetime'] = '1'  #灾害发生时间是否是用发布时间代替
    result['isrellonandlat'] = '0'
    resultSun = {}
    resultSun['title'] = result['title']
    resultSun['originalText'] = result['originalText']
    resultSun['pictures'] = result['pictures']
    try:
        title = 'stormSurge_ZH005'
        res = postgreCommand.insertData(result, resultSun, title)
        if res == 1:
            print(title, '数据插入成功!')
        elif res == 0:
            print(title, '数据更新成功!')
    except Exception as e:
        print("插入数据失败", str(e))
示例#4
0
def analyzeInfo_One(item):
    result = {}
    divs = item.find_all('div')
    title = divs[0].find('a').get_text().strip()
    link = 'http://www.qxkp.net' + divs[0].find('a')['href']
    time_str1 = re.sub("\D", "", divs[1].get_text())
    datetime_struct1 = parser.parse(time_str1)
    releaseTime = datetime_struct1.strftime('%Y-%m-%d %H:%M:%S')
    result['disasterid'] = '10107'  #类别:暴雨
    result['link'] = link  # 新闻链接
    resultSun = analyzeInfo_Two(link)
    result['source'] = resultSun['source']  #新闻来源
    result['originalText'] = resultSun['originalText']  # 新闻原文
    result['releaseTime'] = releaseTime  # 发布时间
    result['title'] = title  # 标题
    originalText = result['title'] + ',' + result['originalText']
    latlngadd_tuple = address.placeMany(originalText)
    result['place'] = latlngadd_tuple[0]  #发生地点
    result['longitude'] = str(latlngadd_tuple[1])  #地点经度
    result['latitude'] = str(latlngadd_tuple[2])  #地点纬度
    result['strength'] = ''  #灾害强度
    result['occurTime'] = result['releaseTime']  #发生时间
    death = toYc.death(originalText)
    injured = toYc.Injured(originalText)
    lossNumber = toYc.loss(originalText)
    result['loss'] = str(lossNumber)  #经济损失
    result['injured'] = str(injured)  #受伤人数
    result['death'] = str(death)  #死亡人数
    result['pictures'] = resultSun['pictures']  #多个路径之间用分号隔开
    result['more'] = ''  #特殊字段
    result['regional'] = '国内'
    result['province'] = latlngadd_tuple[3]  #灾害发生的一级行政区划
    result['country'] = latlngadd_tuple[4]  #灾害发生国家
    result['current_website'] = '气象科普网'  #灾害当前网站
    result['isreleasetime'] = '1'  #灾害发生时间是否是用发布时间代替
    result['isrellonandlat'] = '0'
    resultSun = {}
    resultSun['title'] = result['title']
    resultSun['originalText'] = result['originalText']
    resultSun['pictures'] = result['pictures']

    try:
        title = 'rainstorm_ZH002'
        res = postgreCommand.insertData(result, resultSun, title)
        if res == 1:
            print(title, '数据插入成功!')
        elif res == 0:
            print(title, '数据更新成功!')
    except Exception as e:
        print("插入数据失败", str(e))
示例#5
0
def analyzeInfo(item):
    result = {}
    h4_list = item.find_all('h4', limit=1)
    a_list = h4_list[0].find_all('a', limit=1)
    i_list = item.find_all('i', limit=1)
    time_str = (i_list[0].get_text().strip())[3:]
    datetime_struct1 = parser.parse(time_str)
    releaseTime = datetime_struct1.strftime('%Y-%m-%d %H:%M:%S')
    result['link'] = a_list[0]['href']  #新闻链接
    result['title'] = a_list[0].get_text().strip()  #新闻标题
    result['releaseTime'] = releaseTime  #发布时间
    originalList = get_original(result['link'])
    result['source'] = originalList[1]  #新闻来源
    result['originalText'] = originalList[0]  #新闻原文
    originalText = result['title'] + ',' + result['originalText']
    latlngadd_tuple = address.placeMany(originalText)
    result['disasterid'] = '10107'  #灾害类型
    result['place'] = latlngadd_tuple[0]  #发生地点
    result['longitude'] = str(latlngadd_tuple[1])  #地点经度
    result['latitude'] = str(latlngadd_tuple[2])  #地点纬度
    result['strength'] = ''
    result['occurTime'] = result['releaseTime']
    death = toYc.death(originalText)
    injured = toYc.Injured(originalText)
    lossNumber = toYc.loss(originalText)
    result['loss'] = str(lossNumber)  #经济损失
    result['injured'] = str(injured)  #受伤人数
    result['death'] = str(death)  #死亡人数
    result['pictures'] = originalList[2]  #多个路径之间用分号隔开
    result['more'] = ''  #特殊字段
    result['regional'] = '国内'  #新闻发布地区
    result['province'] = latlngadd_tuple[3]  #灾害发生的一级行政区划
    result['country'] = latlngadd_tuple[4]  #灾害发生国家
    result['current_website'] = '天气网'  #灾害当前网站
    result['isreleasetime'] = '1'  #灾害发生时间是否是用发布时间代替
    result['isrellonandlat'] = '0'
    resultSun = {}
    resultSun['title'] = result['title']
    resultSun['originalText'] = result['originalText']
    resultSun['pictures'] = result['pictures']
    try:
        title = 'rainstorm_ZH006'
        res = postgreCommand.insertData(result, resultSun, title)
        if res == 1:
            print(title, '数据插入成功!')
        elif res == 0:
            print(title, '数据更新成功!')
    except Exception as e:
        print("插入数据失败", str(e))
示例#6
0
def analyzeInfo(item):
    result = {}
    h2_list = item.find_all('h2', limit=1)
    a_list = h2_list[0].find_all('a')
    span_list = h2_list[0].find_all('span')
    span_new = span_list[0].get_text().strip().split()
    result['link'] = a_list[0]['href']  #新闻链接
    result['title'] = a_list[0].get_text().strip()  #新闻标题
    result['releaseTime'] = span_new[1] + ' ' + span_new[2]  #发布时间
    originalList = get_original(result['link'])
    if originalList[3]:
        result['source'] = originalList[0]  #新闻来源
        result['originalText'] = originalList[1]  #新闻原文
        result['pictures'] = originalList[2]  #新闻图片
        result['disasterid'] = '10201'  #灾害类型
        originalText = result['title'] + ',' + result['originalText']
        latlngadd_tuple = address.placeMany(originalText)
        result['place'] = latlngadd_tuple[0]  #发生地点
        result['longitude'] = str(latlngadd_tuple[1])  #地点经度
        result['latitude'] = str(latlngadd_tuple[2])  #地点纬度
        death = toYc.death(originalText)
        injured = toYc.Injured(originalText)
        lossNumber = toYc.loss(originalText)
        result['loss'] = str(lossNumber)  #经济损失
        result['injured'] = str(injured)  #受伤人数
        result['death'] = str(death)  #死亡人数
        result['province'] = latlngadd_tuple[3]  #灾害发生的一级行政区划
        result['country'] = latlngadd_tuple[4]  #灾害发生国家
        result['strength'] = ''
        result['occurTime'] = result['releaseTime']  #多个路径之间用分号隔开
        result['more'] = ''  #特殊字段
        result[
            'regional'] = '国内'  #新闻发布地区                               #灾害发生国家
        result['current_website'] = '新浪网'  #灾害当前网站
        result['isreleasetime'] = '1'  #灾害发生时间是否是用发布时间代替
        result['isrellonandlat'] = '0'
        resultSun = {}
        resultSun['title'] = result['title']
        resultSun['originalText'] = result['originalText']
        resultSun['pictures'] = result['pictures']
        try:
            title = 'stormSurge_ZH004'
            res = postgreCommand.insertData(result, resultSun, title)
            if res == 1:
                print(title, '数据插入成功!')
            elif res == 0:
                print(title, '数据更新成功!')
        except Exception as e:
            print("插入数据失败", str(e))
示例#7
0
def analyzeInfo(item):

    result = {}
    a_title = item.find_all('a')

    result['disasterid'] = '10201'  #新闻类别:风暴潮
    result['link'] = 'http://www.oceanguide.org.cn' + a_title[0]['href']  #新闻标题
    result['title'] = a_title[0].get_text().strip()  #新闻标题
    time_str1 = re.sub("\D", "", item.find('p').get_text().strip())
    datetime_struct1 = parser.parse(time_str1)
    releaseTime = datetime_struct1.strftime('%Y-%m-%d %H:%M:%S')
    result['releaseTime'] = releaseTime  #发布时间
    result['originalText'] = get_original(result['link'])  #新闻原文
    result['source'] = get_source(result['link'])  #新闻来源
    originalText = result['originalText'] + ',' + result['title']
    latlngadd_tuple = address.placeMany(originalText)
    result['place'] = latlngadd_tuple[0]  #发生地点
    result['longitude'] = str(latlngadd_tuple[1])  #地点经度
    result['latitude'] = str(latlngadd_tuple[2])  #地点纬度
    result['strength'] = ''  #灾害强度
    result['occurTime'] = releaseTime  #发生时间
    result['injured'] = '0'  #受伤人数
    result['death'] = '0'  #死亡人数
    result['loss'] = '0'  #经济损失
    result['pictures'] = ''  #多个路径之间用分号隔开
    result['more'] = ''  #特殊字段
    result['regional'] = '国内'
    result['province'] = latlngadd_tuple[3]  #灾害发生的一级行政区划
    result['country'] = latlngadd_tuple[4]  #灾害发生国家
    result['current_website'] = '中国海洋预报网'  #灾害当前网站
    result['isreleasetime'] = '0'  #灾害发生时间是否是用发布时间代替
    result['isrellonandlat'] = '0'
    resultSun = {}
    resultSun['title'] = result['title']
    resultSun['originalText'] = result['originalText']
    resultSun['pictures'] = result['pictures']

    try:
        title = 'stormSurge_ZH001'
        res = postgreCommand.insertData(result, resultSun, title)
        if res == 1:
            print(title, '数据插入成功!')
        elif res == 0:
            print(title, '数据更新成功!')
    except Exception as e:
        print("插入数据失败", str(e))