Пример #1
0
def get_citylist_to_mongo(citylist_xml):
    save_to_db = DB_Utils()
    soup = BeautifulSoup(citylist_xml, 'lxml')
    #print(soup.prettify())
    city_info = soup.select('d')
    for i in city_info:
        data = {
            'province': i.get('d4'),
            'city_name': i.get('d2'),
            'city_code': i.get('d1'),
            'city_piny': i.get('d3')
        }
        if i.get('d4') == '韩国':
            break
        save_to_db.save_one_to_mongo('citylist', data)
        print('正在将城市列表存储到mongodb: #', i.get('d4'), i.get('d2'))
Пример #2
0
def get_1day_weather_data(web_data):
    soup = BeautifulSoup(web_data, 'lxml')
    html = etree.HTML(web_data)
    # print(html)
    # print(soup)
    try:
        datas = html.xpath('//script')
        # print(datas)
        abc = soup.select('.ctop .crumbs')
        db_utils = DB_Utils()
        for data in datas:
            # print(data.text)
            # print(data.text())
            if 'hour3data' in str(data.text):
                print(data.text)
                data1 = {}
                # print('###########', data.text)
                data1 = data.text.split('=')[-1]
                # print('aaaa',data1)
                data2 = json.loads(data1)
                # print(data2['1d'])
                print(abc[0].text.replace('\n', '').replace(' ', ''), '实时天气:')
                city = abc[0].text.replace('\n', '').replace(' ', '')
                city_info = city + '实时天气:\n'
                data4 = []
                for i in data2['1d']:
                    # print(i.split(','))
                    #print(i.split(',')[0], end=' ')
                    # print(i.split(',')[1])
                    #print(i.split(',')[2], end=' ')
                    #print(i.split(',')[3], end=' ')
                    #print(i.split(',')[4], end=' ')
                    #print(i.split(',')[5])
                    data3 = [
                        i.split(',')[0],
                        i.split(',')[2],
                        i.split(',')[3],
                        i.split(',')[4],
                        i.split(',')[5]
                    ]
                    print(data3)
                    data4.append(data3)
                weather_data = {'city': city, '24hour_weather': data4}
                db_utils.save_one_to_mongo('24hour_weather', weather_data)
                print('save to mongo')
    except Exception as e:
        print(e)
Пример #3
0
def get_1day_weather_data(web_data):
    soup = BeautifulSoup(web_data, 'lxml')
    datas = soup.select('script')
    abc = soup.select('.ctop .crumbs')
    db_utils = DB_Utils()
    for data in datas:
        if 'hour3data' in str(data):
            data1 = {}
            data1 = data.text.split('=')[-1]
            # print(data1)
            data2 = json.loads(data1)
            # print(data2['1d'])
            print(abc[0].text.replace('\n', '').replace(' ', ''), '实时天气:')
            city = abc[0].text.replace('\n', '').replace(' ', '')
            city_info = city + '实时天气:\n'
            data4 = []
            for i in data2['1d']:
                # print(i.split(','))
                #print(i.split(',')[0], end=' ')
                # print(i.split(',')[1])
                #print(i.split(',')[2], end=' ')
                #print(i.split(',')[3], end=' ')
                #print(i.split(',')[4], end=' ')
                #print(i.split(',')[5])
                data3 = [
                    i.split(',')[0],
                    i.split(',')[2],
                    i.split(',')[3],
                    i.split(',')[4],
                    i.split(',')[5]
                ]
                print(data3)
                data4.append(data3)
            weather_data = {'city': city, '24hour_weather': data4}
            db_utils.save_one_to_mongo('24hour_weather', weather_data)
            print('save to mongo')
Пример #4
0
        f.write('}')
        f.close()


#使用BeautifulSoup解析列表xml并提取城市信息存储到MongoDB
def get_citylist_to_mongo(citylist_xml):
    save_to_db = DB_Utils()
    soup = BeautifulSoup(citylist_xml, 'lxml')
    #print(soup.prettify())
    city_info = soup.select('d')
    for i in city_info:
        data = {
            'province': i.get('d4'),
            'city_name': i.get('d2'),
            'city_code': i.get('d1'),
            'city_piny': i.get('d3')
        }
        if i.get('d4') == '韩国':
            break
        save_to_db.save_one_to_mongo('citylist', data)
        print('正在将城市列表存储到mongodb: #', i.get('d4'), i.get('d2'))


if __name__ == "__main__":
    #url = CITYLIST_URL
    db_utils = DB_Utils()
    db_utils.drop_collection('citylist')
    url = 'http://mobile.weather.com.cn/js/citylist.xml'
    citylist_xml = get_citylist_xml(url)
    get_citylist_to_mongo(citylist_xml)
Пример #5
0
'''
author : DannyWu
site   : www.idannywu.com
'''
from db import DB_Utils
from get_weather import *
db1 = DB_Utils()
arg = {'city_name': '蚌埠'}
result = db1.query_of_arg('1day_weather_data', arg)
header = ['1', '2', '3', '4', '5']
for i in result:
    #print(i)
    #print(i.get('24hour_weather'))
    print(i.get('city'))
    weather = [i.get('day_wea'), i.get('night_wea')]

    pretty_print(weather, ['1', '2', '3', '4'])
    print('24小时实时天气:')
    pretty_print(i.get('24hour_weather'), header)
#url = get_city_weather_url('蚌埠')
#web_data = get_weather_html(url)
#print(web_data)
#get_weather_data(web_data)
Пример #6
0
                #print(i.split(',')[4], end=' ')
                #print(i.split(',')[5])
                data3 = [
                    i.split(',')[0],
                    i.split(',')[2],
                    i.split(',')[3],
                    i.split(',')[4],
                    i.split(',')[5]
                ]
                print(data3)
                data4.append(data3)
            weather_data = {'city': city, '24hour_weather': data4}
            db_utils.save_one_to_mongo('24hour_weather', weather_data)
            print('save to mongo')


if __name__ == "__main__":
    db_util = DB_Utils()
    db_util.drop_collection('24hour_weather')
    web_data_list = []
    for key in cities:
        url = get_city_weather_url(key)
        web_data = get_weather_html(url)
        get_1day_weather_data(web_data)
        #web_data_list.append(web_data)

    #get_weather_data(web_data)
    #pool = Pool(1)
    #pool.map(get_1day_weather_data,web_data_list)
    #pool.close()
Пример #7
0
def get_1day_weather_data(province, city_name, city_code, city_piny, web_data):
    try:
        soup = BeautifulSoup(web_data, 'lxml')
        datas = soup.select('script')
        address = soup.select('.ctop .crumbs')
        day_wea = soup.select('.t .clearfix .wea')[0].text
        night_wea = soup.select('.t .clearfix .wea')[1].text
        day_tem = soup.select('.t .clearfix .tem')[0].text.replace('\n', '')
        night_tem = soup.select('.t .clearfix .tem')[1].text.replace('\n', '')
        day_win = soup.select('.t .clearfix .win span')[0].get(
            'title') + soup.select('.t .clearfix .win span')[0].text
        night_win = soup.select('.t .clearfix .win span')[1].get(
            'title') + soup.select('.t .clearfix .win span')[1].text
        sun_up = soup.select('.t .clearfix .sunUp')[0].text.replace('\n', '')
        sun_down = soup.select('.t .clearfix .sunDown')[0].text.replace(
            '\n', '')
        day_wea_list = [day_wea, day_tem, day_win, sun_up]
        night_wea_list = [night_wea, night_tem, night_win, sun_down]
        print(day_wea_list)
        print(night_wea_list)
        db_utils = DB_Utils()
        for data in datas:
            if 'hour3data' in str(data):
                data1 = {}
                data1 = data.text.split('=')[-1]
                # print(data1)
                data2 = json.loads(data1)
                # print(data2['1d'])
                print(address[0].text.replace('\n', '').replace(' ', ''),
                      '实时天气:')
                city = address[0].text.replace('\n', '').replace(' ', '')
                # city_info = city + '实时天气:\n'
                data4 = []
                for i in data2['1d']:
                    data3 = [
                        i.split(',')[0],
                        i.split(',')[2],
                        i.split(',')[3],
                        i.split(',')[4],
                        i.split(',')[5]
                    ]
                    print(data3)
                    data4.append(data3)
                weather_data_of_1day = {
                    'province': province,
                    'city_name': city_name,
                    'city_code': city_code,
                    'city_piny': city_piny,
                    'city': city,
                    'day_wea': day_wea_list,
                    'night_wea': night_wea_list,
                    '24hour_weather': data4
                }
                db_utils.save_one_to_mongo('1day_weather_data',
                                           weather_data_of_1day)
                print('save to mongo')
    except:
        print(province, city_name, city_code)
        db_utils1 = DB_Utils()
        error_city = {
            'province': province,
            'city_name': city_name,
            'city_code': city_code,
        }
        db_utils1.save_one_to_mongo('error', error_city)
        print('save error to mongo')
        pass
Пример #8
0
                                           weather_data_of_1day)
                print('save to mongo')
    except:
        print(province, city_name, city_code)
        db_utils1 = DB_Utils()
        error_city = {
            'province': province,
            'city_name': city_name,
            'city_code': city_code,
        }
        db_utils1.save_one_to_mongo('error', error_city)
        print('save error to mongo')
        pass


if __name__ == "__main__":
    db_util = DB_Utils()
    db_util.drop_collection('1day_weather_data')
    data = db_util.query_all('citylist')
    for i in data:
        url = city_code_to_url(i.get('city_code'))
        province = i.get('province')
        city_name = i.get('city_name')
        city_code = i.get('city_code')
        city_piny = i.get('city_piny')
        info = [city_name, city_code, city_piny]
        print(info)
        web_data = get_weather_html(url)
        get_1day_weather_data(province, city_name, city_code, city_piny,
                              web_data)