def crawlarea(): dataList = readnAreaFromTencent() if len(dataList) == 0: logger.warning('没有采集到数据') return False try: logger.info('开始写入数据...') counter = 0 for item in dataList: # counter += 1 if item.level != 'country': pos = readPositionFromBaidu(item.name, item.parentName) if pos: item.longitude = pos["lng"] item.latitude = pos["lat"] db.session.add(item) db.session.commit() # if counter > 10: # break logger.info('写入数据完成...') return True except BaseException as e: logger.error('抓取发生异常,' + str(e)) return False
def readnCoVFromTencent(): logger.info('开始抓取疫情数据') result = {'data': [], 'updateTime': datetime(2020, 1, 1)} try: lastTime = getLastestUpdateTime() url = 'https://service-f9fjwngp-1252021671.bj.apigw.tencentcs.com/release/pneumonia' # url = 'http://lab.isaaclin.cn/nCoV/api/area?latest=0' r = requests.get(url, timeout=10) data = r.json()['data'] totalData = convertTotalData(data['statistics']) if totalData.updateTime <= lastTime: logger.warning('数据未更新, ' + str(totalData.updateTime)) return result dataList = [totalData] dataList.extend( convertProvinceList(data['listByArea'], totalData.updateTime)) dataList.extend( convertOtherCountryList(data['listByOther'], totalData.updateTime)) return {'data': dataList, 'updateTime': totalData.updateTime} except Exception as e: logger.error('readnCoVFromTencent error, ' + str(e)) return result
def cachedata(): startDate = datetime(2020,2,3,0,0,0,0) endDate = datetime(2020,2,4,0,0,0,0) oneDay = timedelta(1) while startDate<endDate: logger.info('转存%s数据', startDate) queryOneDay(startDate) startDate = startDate + oneDay
def realtime(level, name): logger.info('level=%s, name=%s', level, name) dataList = [] query = getDatalogsQuery(level, name) if not query: return jsonify(code=-1, msg="param error") datalog = query.order_by(DataLogs.updateTime.desc()).first() return jsonify(code=0, data=dataLogToDict(datalog))
def datalogs(level, name): logger.info('level=%s, name=%s', level, name) dataList = [] query = getDatalogsQuery(level, name) if not query: return jsonify(code=-1, msg="param error") dataList = query.order_by(DataLogs.updateTime).all() result = [dataLogToDict(item) for item in dataList] return jsonify(code=0, data=result)
def readOverallDataFromIsaaclin(): logger.info('抓取全局疫情数据') try: url = 'https://lab.isaaclin.cn/nCoV/api/overall?latest=1' r = requests.get(url, timeout=10) dataList = r.json()['results'] return convertOverallDataList(dataList[0]) except Exception as e: logger.error('readnOverallDataFromIsaaclin error,' + str(e)) return []
def updateToDayCaches(datalogList): logger.info('开始更新到缓存表...') try: for data in datalogList: updateOneDayCachesLog(data) except BaseException as e: logger.error('写入缓存表发生异常' + str(e)) return False logger.info('完成缓存表更新') return True
def queryOneDay(theday): startDate = theday.strftime('%Y-%m-%d') endDate = (theday + timedelta(1)).strftime('%Y-%m-%d') logger.info('queryOneDay, %s, %s', startDate, endDate) dataList = DataLogs.query.distinct(DataLogs.countryName,DataLogs.provinceName, DataLogs.cityName).filter(and_(DataLogs.updateTime>startDate, DataLogs.updateTime<endDate)).order_by(DataLogs.countryName,DataLogs.provinceName, DataLogs.cityName, DataLogs.updateTime.desc()).all() logger.info('queryOneDay, count %d', len(dataList)) for data in dataList: dayData = DayCaches(countryName=data.countryName, provinceName=data.provinceName, cityName=data.cityName, confirmedCount=data.confirmedCount, suspectedCount=data.suspectedCount, curedCount=data.curedCount, deadCount=data.deadCount) dayData.updateTime = theday db.session.add(dayData) db.session.commit()
def readProvinceDataFromIsaaclin(): logger.info('开始抓取疫情数据') try: url = 'https://lab.isaaclin.cn/nCoV/api/area?latest=0' r = requests.get(url, timeout=10) dataList = r.json()['results'] return convertProvinceList(dataList) except Exception as e: logger.error('readnProvinceDataFromIsaaclin error,' + str(e)) return []
def testsql(): # dataList = DataLogs.query.distinct(DataLogs.countryName,DataLogs.provinceName, DataLogs.cityName).filter(and_(DataLogs.updateTime>'2020-01-30', DataLogs.updateTime<'2020-01-31')).order_by(DataLogs.countryName,DataLogs.provinceName, DataLogs.cityName, DataLogs.updateTime.desc()).all() # nameList = ','.join(['\'武汉\'', '\'深圳\'']) # sql = f'name in ( {nameList} )' nameList = ['\'武汉\'', '\'深圳\''] nameStr = ','.join(nameList) sql = f'name in ({nameStr})' dataList = Area.query.filter(text(sql)).all() logger.info('count is %d', len(dataList)) for item in dataList: logger.info(item.to_json()) # logger.info("%s %s", item.cityName, item.updateTime)
def readnAreaFromTencent(): logger.info('开始抓取区域数据') try: url = 'https://service-f9fjwngp-1252021671.bj.apigw.tencentcs.com/release/pneumonia' r = requests.get(url, timeout=10) data = r.json()['data'] dataList = [] dataList.extend(convertProvinceList(data['listByArea'])) dataList.extend(convertOtherCountryList(data['listByOther'])) return dataList except Exception as e: logger.error('readnAreaFromTencent error, ' +str(e)) return []
def crawlprovincehistory(): dataList = readProvinceDataFromIsaaclin() if len(dataList) == 0: logger.warning('没有采集到数据') return False try: logger.info('开始写入数据...') for item in dataList: db.session.add(item) db.session.commit() logger.info('写入数据完成...') return True except BaseException as e: logger.error('抓取发生异常, ' +str(e)) return False
def readnCovFromIsasclin(): logger.info('开始抓取疫情数据') result = {'data':[], 'updateTime': datetime(2020,1,1)} try: lastTime = getLastestUpdateTime() totalData = readOverallDataFromIsaaclin() if totalData.updateTime <= lastTime: logger.warning('数据未更新, ' + str(totalData.updateTime)) return result dataList = [totalData] time.sleep(1) dataList.extend(readProvinceDataFromIsaaclin(totalData.updateTime)) return { 'data': dataList, 'updateTime': totalData.updateTime } except Exception as e: logger.error('readnCoVFromTencent error, ' +str(e)) return result
def readPositionFromBaidu(name, parent): logger.info('读取%s %s经纬度', parent, name) try: url = f'http://api.map.baidu.com/geocoding/v3/?address={name}&city={parent}&output=json&ak=<your key>' # logger.info(url) r = requests.get(url, timeout=10) # logger.info('result %s', r.text) data = r.json() if data["status"] != 0: logger.error('读取%s %s经纬度信息失败, %s', parent, name, data) return None data = data['result'] if data["level"] not in ["国家", "省份", "城市", "区县"]: logger.error('读取%s经纬度信息失败,返回数据级别不对, level=%s', name, data["level"]) return None return data["location"] except Exception as e: logger.error('readPositionFromBaidu error, %s, %s,', name, str(e)) return None
def beforeRequest(): try: paths = request.path.split('/') if len(paths) <= 2: logger.info('beforeRequest, illigle path, %s', paths) return logTime = datetime.now() params = ' '.join(paths[2:]) if paths[1] == 'apilog': return aLog = ApiLog(logTime=logTime, api=paths[1], params=params, remoteAddr=request.remote_addr) db.session.add(aLog) db.session.commit() except BaseException as e: logger.error('beforeRequest 异常, %s, %s', request, str(e)) return
def incrementlogs(level, name): logger.info('level=%s, name=%s', level, name) if level not in ['country', 'province', 'city']: return jsonify(code=-1, msg="not supported level") dayCountList = queryDayLogs(level, name) dayIncList = [] for index, data in enumerate(dayCountList): item = data.copy() if index == 0: dayIncList.append(item) continue item["confirmedCount"] = data['confirmedCount'] - dayCountList[ index - 1]['confirmedCount'] item["suspectedCount"] = data['suspectedCount'] - dayCountList[ index - 1]['suspectedCount'] item["curedCount"] = data['curedCount'] - dayCountList[index - 1]['curedCount'] item["deadCount"] = data['deadCount'] - dayCountList[index - 1]['deadCount'] dayIncList.append(item) return jsonify(code=0, data=dayIncList)
def do_crawl(): # data = readnCoVFromTencent() data = readnCovFromIsasclin() if len(data['data']) == 0: logger.warning('没有采集到数据') return False try: logger.info('开始写入数据...') if not updateToDayCaches(data['data']): return False for item in data['data']: db.session.add(item) db.session.commit() updateUpdateTime(data['updateTime']) logger.info('写入数据完成...') return True except BaseException as e: logger.error('抓取发生异常' + str(e)) return False
def daylogs(level, name): logger.info('level=%s, name=%s', level, name) if level not in ['country', 'province', 'city']: return jsonify(code=-1, msg="not supported level") dataList = queryDayLogs(level, name) return jsonify(code=0, data=dataList)
def testupdate(): dayLog = DayCaches.query.filter(and_(DayCaches.countryName =='全球', DayCaches.updateTime=='2020-02-02')).first() # dayLog = DayCaches.query.first() logger.info(dayLog.to_json()) dayLog.confirmedCount = 14411 db.session.commit()
def allAreaData(level, date): def convertCity(data): return { "name": data.cityName, "confirmedCount": data.confirmedCount, "suspectedCount": data.suspectedCount, "curedCount": data.curedCount, "deadCount": data.deadCount } def convertProvince(data): return { "name": data.provinceName, "confirmedCount": data.confirmedCount, "suspectedCount": data.suspectedCount, "curedCount": data.curedCount, "deadCount": data.deadCount } def convertProvinceList(dataList): provinceDict = {} provinceList = [] for item in dataList: if not item.provinceName: continue if item.provinceName not in provinceDict: province = {'children': []} provinceDict[item.provinceName] = province provinceList.append(province) provinceObj = provinceDict[item.provinceName] if not item.cityName: provinceObj['name'] = item.provinceName provinceObj['confirmedCount'] = item.confirmedCount provinceObj['suspectedCount'] = item.suspectedCount provinceObj['curedCount'] = item.curedCount provinceObj['deadCount'] = item.deadCount else: provinceObj['children'].append(convertCity(item)) return provinceList logger.info('allAreaData %s %s', level, date) startDate = date if strToDatetime(date) > datetime.now(): return jsonify(code=-1, msg="not supported error.") endDate = (strToDatetime(date) + timedelta(1)).strftime('%Y-%m-%d') dataList = DayCaches.query.filter(DayCaches.updateTime == date).all() if level == 'city': dataList = [convertCity(item) for item in dataList if item.cityName] elif level == 'province': dataList = convertProvinceList(dataList) else: return jsonify(code=-1, msg="not supported error.") posList = {} if len(dataList): posList = getPositionList( ['\'' + item["name"] + '\'' for item in dataList]) result = [] for data in dataList: if data["name"] not in posList: logger.info('pos not ') continue data["lng"] = posList[data["name"]].longitude data["lat"] = posList[data["name"]].latitude result.append(data) return jsonify(code=0, data=result)