def _crawl_AQI(self): now = datetime.datetime.now() day = now.strftime("%Y-%m-%d") hour = now.strftime("%H:%M:%S") city_name, city_code = self.city.split(',') result = [city_code, city_name, day, hour, '', '', ''] url_template = 'http://www.baidu.com/s?wd=%s空气质量指数' try: logger.info('city %s start crawling'%city_code) response = requests.get(url_template % city_name, timeout=self.request_timeout) html = response.text soup = BeautifulSoup(html, 'html.parser') aqi_index = soup.find('span', class_='op_pm25_graexp') aqi_grade = aqi_index.next_sibling aqi_time = soup.find('span', class_='op_pm25_date') result[-3:] = [ aqi_index.text.encode('utf8') if aqi_index else '', aqi_grade.text.encode('utf8') if aqi_grade else '', aqi_time.text.encode('utf8') if aqi_time else '' ] except Exception as e: message = 'city %s failed with cause: %s'%(city_code, str(e)) logger.exception(message) ErrorLog.create(message) return result
def migrate_file(path, data_type): if data_type=='data': records = [] with open(path,'rb') as f: csvfile = csv.reader(f) next(csvfile) for line in csvfile: record = line[:] city_code = city_dict.get(line[0],'') record.insert(0,city_code) records.append(record) CityAQI.create_bulk(records) elif data_type=='log': with open(path,'rb') as f: for line in f: if 'ERROR' in line: times,message = line.strip().split('ERROR :') date,time = times.split(' ')[:2] year, month, day = date.split('-') hour, minute, second = time.split(':') date_time = datetime(int(year), int(month), int(day), int(hour), int(minute), int(second), tzinfo=GMT8()) ErrorLog.create(message.strip(), date_time=date_time) print('migrate one error log, log time is %s'%date_time)