#print(i.split(',')[4], end=' ') #print(i.split(',')[5]) data3 = [ i.split(',')[0], i.split(',')[2], i.split(',')[3], i.split(',')[4], i.split(',')[5] ] print(data3) data4.append(data3) weather_data = {'city': city, '24hour_weather': data4} db_utils.save_one_to_mongo('24hour_weather', weather_data) print('save to mongo') if __name__ == "__main__": db_util = DB_Utils() db_util.drop_collection('24hour_weather') web_data_list = [] for key in cities: url = get_city_weather_url(key) web_data = get_weather_html(url) get_1day_weather_data(web_data) #web_data_list.append(web_data) #get_weather_data(web_data) #pool = Pool(1) #pool.map(get_1day_weather_data,web_data_list) #pool.close()
f.write('}') f.close() #使用BeautifulSoup解析列表xml并提取城市信息存储到MongoDB def get_citylist_to_mongo(citylist_xml): save_to_db = DB_Utils() soup = BeautifulSoup(citylist_xml, 'lxml') #print(soup.prettify()) city_info = soup.select('d') for i in city_info: data = { 'province': i.get('d4'), 'city_name': i.get('d2'), 'city_code': i.get('d1'), 'city_piny': i.get('d3') } if i.get('d4') == '韩国': break save_to_db.save_one_to_mongo('citylist', data) print('正在将城市列表存储到mongodb: #', i.get('d4'), i.get('d2')) if __name__ == "__main__": #url = CITYLIST_URL db_utils = DB_Utils() db_utils.drop_collection('citylist') url = 'http://mobile.weather.com.cn/js/citylist.xml' citylist_xml = get_citylist_xml(url) get_citylist_to_mongo(citylist_xml)
weather_data_of_1day) print('save to mongo') except: print(province, city_name, city_code) db_utils1 = DB_Utils() error_city = { 'province': province, 'city_name': city_name, 'city_code': city_code, } db_utils1.save_one_to_mongo('error', error_city) print('save error to mongo') pass if __name__ == "__main__": db_util = DB_Utils() db_util.drop_collection('1day_weather_data') data = db_util.query_all('citylist') for i in data: url = city_code_to_url(i.get('city_code')) province = i.get('province') city_name = i.get('city_name') city_code = i.get('city_code') city_piny = i.get('city_piny') info = [city_name, city_code, city_piny] print(info) web_data = get_weather_html(url) get_1day_weather_data(province, city_name, city_code, city_piny, web_data)