def get(self): ''' 航班风险预测 :return: ''' self.parser.add_argument('flynum', type=str, location='args', required=False, default=None) self.parser.add_argument('area', type=str, location='args', required=False, default=None) args = self.parser.parse_args() try: if not args.flynum and not args.area: return pretty_result(Code.PARAM_ERROR) risk_result = flight_risk(args.flynum, args.area) if risk_result: if type(risk_result) == tuple: depCity, arrCity, risk = risk_result ret = { 'depCity': depCity, 'arrCity': arrCity, 'forcast': round(risk, 2) } else: ret = { 'forcast': round(risk_result, 2) } return pretty_result(Code.OK, data=ret) return pretty_result(Code.CUSTOM_ERROR, msg='航班或地点暂未收录或不存在') except Exception as e: logRecord(f'when request /pneumonia/flight-risk: {str(e)}', level='error', debug=args.debug) return pretty_result(Code.UNKNOWN_ERROR, msg=e, debug=args.debug)
def request_html(self, query): ''' 请求html页面 :param query: :return: ''' url = self.SOURCES.get(self.source) headers = self.HEADERS.get(self.source) assert url count = 0 while count < 5: try: resp = requests.get(url.format(query), headers=headers, timeout=2) break except: logRecord(f'{self.source} request timeout!', 'error') time.sleep(1) count += 1 else: return None if resp.status_code == 200: return resp.text else: logRecord(f'ERROR! {resp.status_code}!', 'error') return None
def inner(*args, **kwargs): time1 = time.time() ret = f(*args, **kwargs) time2 = time.time() logRecord('[{0}] {1}() called, time delta: {2}s'.format( ctime(), f.__name__, round(time2 - time1, 2))) return ret
def get(self): args = self.parser.parse_args() try: entity_result = interface.show_update() return pretty_result(Code.OK, data=entity_result) except Exception as e: logRecord(f'when GET /pneumonia/hit: {str(e)}', level='error', debug=args.debug) return pretty_result(Code.UNKNOWN_ERROR, msg=e, debug=args.debug)
def get(self): args = self.parser.parse_args() try: ret = predict() return pretty_result(Code.OK, data=ret) except Exception as e: logRecord(f'when request /pneumonia/prediction: {str(e)}', level='error', debug=args.debug) return pretty_result(Code.UNKNOWN_ERROR, msg=e, debug=args.debug)
def get(self): self.parser.add_argument('news', type=str, location='args', trim=True) args = self.parser.parse_args() try: ret = interface.get_answer(args.news) return pretty_result(Code.OK, data=ret) except Exception as e: logRecord(f'when PUT /pneumonia/entitylink: {str(e)}', level='error', debug=args.debug) return pretty_result(Code.UNKNOWN_ERROR, msg=e, debug=args.debug)
def get(self): self.parser.add_argument('question', type=str, required=True, trim=True, location='args') args = self.parser.parse_args() try: entity_result = interface.QA(args.question) return pretty_result(Code.OK, data=entity_result) except Exception as e: logRecord(f'when GET /pneumonia/hit: {str(e)}', level='error', debug=args.debug) return pretty_result(Code.UNKNOWN_ERROR, msg=e, debug=args.debug)
def wrapper(*args, **kwargs): try: signal.signal(signal.SIGALRM, handler) signal.alarm(interval) # interval秒后向进程发送SIGALRM信号 result = func(*args, **kwargs) signal.alarm(0) # 函数在规定时间执行完后关闭alarm闹钟 return result except TimeoutError as e: logRecord(e.msg, level='error')
def get(self): self.parser.add_argument('nums', type=int, location='args', default=0) args = self.parser.parse_args() try: data_path = f'{file_path}/entities_labeled.xls' entity_result = interface.update_mongo(data_path, file_path) return pretty_result(Code.OK, data=entity_result) except Exception as e: logRecord(f'when GET /pneumonia/hit: {str(e)}', level='error', debug=args.debug) return pretty_result(Code.UNKNOWN_ERROR, msg=e, debug=args.debug)
def get(self): self.parser.add_argument('url',type=str, location='args', required=True, trim=True) self.parser.add_argument('lang',type=str, location='args', required=True, trim=True) args = self.parser.parse_args() try: entity_relation = interface.get_entity_relation(args.url, args.lang) return pretty_result(Code.OK, data=entity_relation) except Exception as e: logRecord(f'when GET /pneumonia/entity: {str(e)}', level='error', debug=args.debug) return pretty_result(Code.UNKNOWN_ERROR, msg=e, debug=args.debug)
def get(self): self.parser.add_argument('cla', type=str, location='args', required=False, default='Covid19_Datasets_dingYue') self.parser.add_argument('sortby', type=str, location='args', required=False, default='date') self.parser.add_argument('order', type=int, location='args', required=False, default=-1) args = self.parser.parse_args() try: ret = query_feedback(args.cla, args.sortby, args.order) return pretty_result(Code.OK, data=ret) except Exception as e: logRecord(f'when GET /feedback: {str(e)}', level='error', debug=args.debug) return pretty_result(Code.DB_ERROR, msg=e, debug=args.debug)
def get(self): self.parser.add_argument('entity', type=str, location='args', required=True, trim=True) self.parser.add_argument('mini', type=int, location='args', default=0) args = self.parser.parse_args() try: entity_result = interface.query_entity(args.entity, args.mini) return pretty_result(Code.OK, data=entity_result) except Exception as e: logRecord(f'when GET /pneumonia/entityquery: {str(e)}', level='error', debug=args.debug) return pretty_result(Code.UNKNOWN_ERROR, msg=e, debug=args.debug)
def delete(self, id): ''' 删除数据集 :return: ''' args = self.parser.parse_args() try: del_dataset_by_id(id) return pretty_result(Code.OK, data=[]) except Exception as e: logRecord(f'when DELETE /pneumonia/dataset: {str(e)}', level='error', debug=args.debug) return pretty_result(Code.DB_ERROR, msg=e, debug=args.debug)
def post(self): self.parser.add_argument("text", type=str, location='json', required=True, trim=True) args = self.parser.parse_args() try: ret = magi_selenium.run(args.get('text')) return pretty_result(Code.OK, data=ret) except Exception as e: logRecord(f'when request /mg/: {str(e)}', 'error', args.debug) return pretty_result(Code.UNKNOWN_ERROR, msg=e, debug=args.debug)
def put(self, id): ''' 编辑数据集 :return: ''' self.parser.add_argument("data", type=dict, location="json", required=True) args = self.parser.parse_args() try: ret = edit_dataset_by_id(id, args.data) return pretty_result(Code.OK, data=ret) except Exception as e: logRecord(f'when PUT /pneumonia/dataset: {str(e)}', level='error', debug=args.debug) return pretty_result(Code.DB_ERROR, msg=e, debug=args.debug)
def get(self): ''' 获取全部数据集 :return: ''' self.parser.add_argument('sortby', type=str, location='args', required=False, default='Time') self.parser.add_argument('order', type=int, location='args', required=False, default=1) args = self.parser.parse_args() try: ret = query_all_dataset(args.sortby, args.order) return pretty_result(Code.OK, data=ret) except Exception as e: logRecord(f'when GET /pneumonia/dataset: {str(e)}', level='error', debug=args.debug) return pretty_result(Code.DB_ERROR, msg=e, debug=args.debug)
def crawlNoKeywordNews(name: str, conditions: list) -> list: ''' 利用专家姓名+[org,tag,academic_type] 抓取,抓到新闻信息就退出 :param name: :param conditions: :return: ''' nc = NewsCrawler() for c in conditions: if c is not None: ret = nc.experts_news(name, c) if ret: logRecord(f'Search condition: {name} {c}') return ret
def get(self): ''' type: { "china": 0, "world": 1, "inflows": 2 } :return: ''' self.parser.add_argument('type', type=int, location='args', required=False, default=0) args = self.parser.parse_args() try: ret = getattr(interface, f"ncov_{self.type_dic.get(args.type, 'china')}")() return pretty_result(Code.OK, data=ret) except Exception as e: logRecord(f'when request /pneumonia/data: {str(e)}', level='error', debug=args.debug) return pretty_result(Code.UNKNOWN_ERROR, msg=e, debug=args.debug)
def get(self): self.parser.add_argument("flynum", type=str, location="args", required=True, trim=True) self.parser.add_argument("flydate", type=str, location="args", required=True, trim=True) args = self.parser.parse_args() try: data = FlightInfo(args.flynum, args.flydate).crawl() return pretty_result(Code.OK, data=data) except Exception as e: logRecord(f'when request /flight: {str(e)}', 'error', args.debug) return pretty_result(Code.UNKNOWN_ERROR, msg=e, debug=args.debug)
def post(self): self.parser.add_argument("ids", type=list, location="json", required=True, trim=True) self.parser.add_argument("en_kw", type=str, location="json", required=False, trim=True, default=None) args = self.parser.parse_args() try: ret = crawlExpertsEnNews(args.ids, args.get('en_kw')) return pretty_result(Code.OK, data=ret) except Exception as e: logRecord(f'when request /experts/news-en: {str(e)}', 'error', args.debug) return pretty_result(Code.UNKNOWN_ERROR, msg=e, debug=args.debug)
def put(self, id): ''' 修改反馈数据状态 { 0: 确认, 1: 删除, 2: 取消确认, 3: 取消删除 } :param id: 反馈数据id :return: ''' self.parser.add_argument('option', type=int, location='json', required=False, default=0) args = self.parser.parse_args() try: ret = edit_feedback(id, args.option) return pretty_result(Code.OK, data=ret) except Exception as e: logRecord(f'when PUT /feedback: {str(e)}', level='error', debug=args.debug) return pretty_result(Code.DB_ERROR, msg=e, debug=args.debug)
def crawler(self, crawl=False): if not crawl: today = datetime.datetime.now().strftime('%m.%d') data = nCoV_dxy.find_one({'date': today}) if data: logRecord('overall and area from db') return data['overall_new'], data['area_new'] overall, area, abroad = {}, {}, {} count = 0 while True: if count == 20: break self.crawl_timestamp = int( datetime.datetime.timestamp(datetime.datetime.now()) * 1000) try: headers = {'user-agent': random.choice(USER_AGENTS)} # r = self.session.get(url='https://3g.dxy.cn/newh5/view/pneumonia', timeout=10) r = requests.get(url='https://3g.dxy.cn/newh5/view/pneumonia', headers=headers, timeout=10) except Exception as e: count += 1 time.sleep(1) logRecord('请求链接: https://3g.dxy.cn/newh5/view/pneumonia 失败', 'error') continue soup = BeautifulSoup(r.content, 'html.parser') overall_information = re.search( r'\{("id".*?)\}\}', str(soup.find('script', attrs={'id': 'getStatisticsService'}))) # province_information = re.search(r'\[(.*?)\]', str(soup.find('script', attrs={'id': 'getListByCountryTypeService1'}))) area_information = re.search( r'\[(.*)\]', str(soup.find('script', attrs={'id': 'getAreaStat'}))) # abroad_information = re.search(r'\[(.*)\]', str(soup.find('script', attrs={'id': 'getListByCountryTypeService2'}))) # news = re.search(r'\[(.*?)\]', str(soup.find('script', attrs={'id': 'getTimelineService'}))) if not overall_information or not area_information: count += 1 print('dxy没抓到') time.sleep(2) continue # if not overall_information and not area_information: # count += 1 # continue if overall_information: overall = self.overall_parser( overall_information=overall_information) # self.province_parser(province_information=province_information) if area_information: area = self.area_parser(area_information=area_information) # abroad = self.abroad_parser(abroad_information=abroad_information) # self.news_parser(news=news) break # while True: # self.crawl_timestamp = int(datetime.datetime.timestamp(datetime.datetime.now()) * 1000) # try: # r = self.session.get(url='https://file1.dxycdn.com/2020/0127/797/3393185293879908067-115.json') # except requests.exceptions.ChunkedEncodingError: # continue # # Use try-except to ensure the .json() method will not raise exception. # try: # if r.status_code != 200: # continue # elif r.json().get('code') == 'success': # self.rumor_parser(rumors=r.json().get('data')) # break # else: # continue # except json.decoder.JSONDecodeError: # continue logRecord('Successfully crawled.') return overall, area