def search(self, keyword: str): logger.info(f"Searching for: {keyword}") ret = [] resp = self.get(self._search_api, params={ "kw": keyword, "per_page": 100, "page": 1 }) # 取前 100 条结果 if resp.status_code != 200 or resp.json()["code"] != 1: logger.warning( f"Response error: {resp.status_code} {self._search_api}") return ret data = resp.json() anime_meta_list = data.get("data").get("data") if data else [] for meta in anime_meta_list: anime = AnimeMetaInfo() anime.title = meta["name"] anime.cover_url = meta["pic"] anime.category = meta["type"] anime.detail_page_url = str(meta["vid"]) anime.desc = meta["label"] ret.append(anime) return ret
def get_detail(self, detail_page_url: str): url = self._base_url + detail_page_url logger.info(f"Parsing detail page: {url}") resp = self.get(url) if resp.status_code != 200: logger.warning(f"Response error: {resp.status_code} {url}") return AnimeDetailInfo() body = self.xpath(resp.text, '//div[@class="fire l"]')[0] anime_detail = AnimeDetailInfo() anime_detail.title = body.xpath("./div/h1/text()")[0] anime_detail.category = " ".join( body.xpath('.//div[@class="sinfo"]/span[3]/a/text()')) anime_detail.desc = body.xpath( './/div[@class="info"]/text()')[0].replace("\r\n", "").strip() anime_detail.cover_url = body.xpath( './/div[@class="thumb l"]/img/@src')[0] vc = VideoCollection() vc.name = "播放列表" video_blocks = body.xpath('.//div[@class="movurl"]//li') for block in video_blocks: video = Video() video.name = block.xpath("./a/text()")[0] video.raw_url = block.xpath("./a/@href")[0] # '/v/3849-162.html' video.handler = "YHDMVideoHandler" vc.append(video) anime_detail.append(vc) return anime_detail
def disable_danmaku(self, danmaku: str) -> bool: """禁用某个弹幕引擎""" if danmaku in self.get_all_danmaku(): logger.warning(f"Danmaku {danmaku} disabled") self._dict["danmaku"][danmaku] = False self._save() return True return False
def clean_db(): if request.method == 'GET': result = show_db() return result if request.method == 'POST': logger.warning("{} 请求清理数据库,等待服务端处理".format(request.remote_addr)) clean_status['clean_db'] = "waiting process" return "start clean db"
def disable_engine(self, engine: str) -> bool: """禁用某个引擎""" if engine in self.get_all_engines(): logger.warning(f"Engine {engine} disabled") self._dict["engines"][engine] = False self._save() return True return False
def get_video_data(hash_key: str): """通过API代理访问, 获取视频数据流""" video = self._anime_db.fetch(hash_key) if not video: return "URL Invalid" if not video.real_url: logger.warning("Not real url") real_url = self._engine_mgr.get_video_url(video) video.real_url = real_url self._anime_db.update(hash_key, video) return self._engine_mgr.make_response_for(video)
def del_client_rules(): if request.method == 'POST': del_info = request.get_data().decode('utf-8') del_id = json.loads(del_info)['id'] del_result = api.web.del_rules(del_id) logger.warning("{} 规则删除".format(del_id)) return del_result if request.method == 'DELETE': del_result = api.web.del_rules("all") logger.warning("防御策略重置") return del_result
def head(url: str, params=None, allow_redirects=True, **kwargs) -> requests.Response: """封装 HEAD 方法, 默认开启 302 重定向, 用于获取目标直链""" try: logger.debug(f"url: {url}, params: {params}, allow_redirects: {allow_redirects}") kwargs.setdefault("timeout", 10) kwargs.setdefault("headers", HtmlParseHelper._headers) return requests.head(url, params=params, verify=False, allow_redirects=allow_redirects, **kwargs) except requests.Timeout as e: logger.warning(e) return requests.Response() except requests.RequestException: return requests.Response()
def get_city(ip, language="en", location=False): try: response_data = GeoipDatabase.city(ip) except geoip2.errors.AddressNotFoundError: original_ip = ip try: response_data = GeoipDatabase.city(config['client_ip']) logger.warning("远程客户端内网遭受攻击") except: response_data = GeoipDatabase.city(config['ip'][0]) logger.warning("本地客户端内网遭到攻击") if location: try: ip_info = {} try: ip_info["ip"] = original_ip except: ip_info["ip"] = ip ip_info["country"] = response_data.country.names[language] ip_info["city"] = response_data.city.names[language] ip_info["longitude"] = response_data.location.longitude ip_info["latitude"] = response_data.location.latitude return ip_info except KeyError: ip_info = {} try: ip_info["ip"] = original_ip except: ip_info["ip"] = ip ip_info["country"] = response_data.country.names[language] ip_info["city"] = response_data.city.name if ip_info["city"] == None: ip_info["city"] = ip_info["country"] ip_info["longitude"] = response_data.location.longitude ip_info["latitude"] = response_data.location.latitude return ip_info else: try: ip_info = {} try: ip_info["ip"] = original_ip except: ip_info["ip"] = ip ip_info["country"] = response_data.country.names[language] ip_info["city"] = response_data.city.names[language] return ip_info except KeyError: ip_info = {} ip_info["country"] = response_data.country.names[language] ip_info["city"] = response_data.city.name return ip_info
def post(url: str, data=None, html_encoding="utf-8", **kwargs) -> requests.Response: """"封装 POST 方法, 默认网页编码为 utf-8""" try: logger.debug(f"url: {url}, data: {data}") kwargs.setdefault("timeout", 5) kwargs.setdefault("headers", HtmlParseHelper._headers) ret = requests.post(url, data, verify=False, **kwargs) ret.encoding = html_encoding return ret except requests.Timeout as e: logger.warning(e) return requests.Response() except requests.RequestException: return requests.Response()
def clean_mongo(): clean_status['last_clean'] = int(time.time()) src_iplist = [] myclient = pymongo.MongoClient(config['mongo_url'], connect=False) mydb = myclient["mariodb"] alert_info = mydb['alert'] coll_names = mydb.list_collection_names(session=None) for alert in alert_info.find(): src_iplist.append(str(alert['src_ip'])) for coll in coll_names: db = mydb[coll] del_id = [] del_count = 0 if coll == "fileinfo": del_count += db.find({ "fileinfo.filename": { "$regex": "eve_.*json" } }).count() db.delete_many({"fileinfo.filename": {"$regex": "eve_.*json"}}) del_count += db.find({ "fileinfo.filename": { "$regex": "/api/" } }).count() db.delete_many({"fileinfo.filename": {"$regex": "/api/"}}) del_count += db.find({ "fileinfo.filename": { "$regex": "local.rules" } }).count() db.delete_many({"fileinfo.filename": {"$regex": "local.rules"}}) if coll in ['alert', 'flow', 'stats']: continue logger.info("清理数据库 {}".format(coll)) for item in list(db.find().batch_size(500)[:]): try: if item['src_ip'] not in src_iplist: del_id.append(item['_id']) else: continue except: del_id.append(item['_id']) continue for data_id in del_id: query = {"_id": data_id} delresult = db.delete_one(query) del_count += delresult.deleted_count logger.warning("清理 {} 条数据".format(del_count)) myclient.close()
def get(url: str, params=None, html_encoding="utf-8", **kwargs) -> requests.Response: """封装 GET 方法, 默认网页编码为 utf-8""" try: logger.debug(f"url: {url}, params: {params}") kwargs.setdefault("timeout", 5) kwargs.setdefault("headers", HtmlParseHelper._headers) ret = requests.get(url, params, verify=False, **kwargs) ret.encoding = html_encoding # 有些网页仍然使用 gb2312/gb18030 之类的编码, 需要单独设置 return ret except requests.Timeout as e: logger.warning(e) return requests.Response() except requests.RequestException: return requests.Response()
def search(self, keyword: str): logger.info(f"Searching for: {keyword}") resp = self.post(self._search_api, data={"userid": "", "key": keyword}) if resp.status_code != 200: logger.warning(f"Response error: {resp.status_code} {self._search_api}") return [] anime_meta_list = resp.json().get("data") ret = [] for meta in anime_meta_list: anime = AnimeMetaInfo() anime.title = meta["videoName"] anime.cover_url = meta["videoImg"] anime.category = meta["videoClass"] anime.detail_page_url = meta["videoId"] ret.append(anime) return ret
def get_dbfile(): ftphost = settings.GO_FTPHOST ftpuser = settings.GO_FTPUSER ftppass = settings.GO_FTPPASS dbpass = settings.GO_DBPASS if ftphost is None or ftpuser is None or ftppass is None: if os.path.exists('URLs.mdb'): logger.info('No credentials in env, using local MDB database file') logger.warning( 'If this occurs outside development, contact an administrator') return 'URLs.mdb' else: raise Exception( 'FTP credentials not provided (GO_FTPHOST, GO_FTPUSER, GO_FTPPASS)' ) if dbpass is None: raise Exception( 'Database encryption password not provided (GO_DBPASS)') logger.info('Attempting connection to FTP') ftp = FTP(ftphost) ftp.login(user=ftpuser, passwd=ftppass) ftp.cwd('/dmis/') data = [] ftp.dir('-t', data.append) filename = data[-1].split()[3] # check if we already have this file files = glob('URLs*zip') if filename in files and os.path.exists('URLs.mdb'): ftp.quit() return 'URLs.mdb' # clean up old files for f in files: os.remove(f) logger.info('Fetching %s' % filename) with open(filename, 'wb') as f: ftp.retrbinary('RETR ' + filename, f.write, 2014) ftp.quit() logger.info('Unzipping database file') zp = ZipFile(filename) zp.extractall('./', pwd=dbpass.encode('cp850', 'replace')) return 'URLs.mdb'
def get_real_url(self): """通过视频的 play_id 获取视频链接""" play_api = "http://service-agbhuggw-1259251677.gz.apigw.tencentcs.com/android/video/newplay" play_id = self.get_raw_url() secret_key = "zandroidzz" now = int(time.time() * 1000) # 13 位时间戳 sing = secret_key + str(now) sing = md5(sing.encode("utf-8")).hexdigest() logger.info(f"Parsing real url for {play_id}") payload = {"playid": play_id, "userid": "", "apptoken": "", "sing": sing, "map": now} resp = self.post(play_api, data=payload) if resp.status_code != 200: logger.warning(f"Response error: {resp.status_code} {play_api}") logger.debug(f"POST params: {payload}") return "error" real_url = resp.json()["data"]["videoplayurl"] logger.info(f"Video real url: {real_url}") return real_url
def get_detail(self, detail_page_url: str): resp = self.get(self._detail_api, params={"userid": "", "videoId": detail_page_url}) if resp.status_code != 200: logger.warning(f"Response error: {resp.status_code} {self._search_api}") return AnimeDetailInfo() detail = resp.json().get("data") # 视频详情信息 anime_detail = AnimeDetailInfo() anime_detail.title = detail["videoName"] anime_detail.cover_url = detail["videoImg"] anime_detail.desc = detail["videoDoc"].replace("\r\n", "") # 完整的简介 anime_detail.category = detail["videoClass"] for play_list in detail["videoSets"]: vc = VideoCollection() # 番剧的视频列表 vc.name = play_list["load"] # 列表名, 线路 I, 线路 II for video in play_list["list"]: vc.append(Video(video["ji"], video["playid"], "ZZFunVideoHandler")) anime_detail.append(vc) return anime_detail
def get_real_url(self) -> str: url = "http://www.yhdm.tv/" + self.get_raw_url() logger.info(f"Parsing real url for {url}") resp = self.get(url) if resp.status_code != 200: logger.warning(f"Response error: {resp.status_code} {url}") return "error" video_url = self.xpath( resp.text, '//div[@id="playbox"]/@data-vid')[0] # "url$format" video_url = video_url.split( "$" )[0] # "http://quan.qq.com/video/1098_ae4be38407bf9d8227748e145a8f97a5" if not video_url.startswith("http"): # 偶尔出现一些无效视频 logger.warning(f"This video is not valid: {video_url}") return "error" logger.debug(f"Redirect for {video_url}") resp = self.head(video_url, allow_redirects=True) # 获取直链时会重定向 2 次 logger.info(f"Video real url: {resp.url}") return resp.url # 重定向之后的视频直链
def parse_one_page(self, keyword: str, page: int): logger.info(f"Searching for {keyword}, page {page}") resp = self.get(self._search_api + "/" + keyword, params={"page": page}) if resp.status_code != 200: logger.warning( f"Response error: {resp.status_code} {self._search_api}") return [], "" anime_meta_list = self.xpath(resp.text, '//div[@class="lpic"]//li') ret = [] for meta in anime_meta_list: anime = AnimeMetaInfo() anime.title = " ".join(meta.xpath(".//h2/a/@title")) anime.cover_url = meta.xpath("./a/img/@src")[0] anime.category = " ".join(meta.xpath("./span[2]/a/text()")) anime.desc = meta.xpath("./p/text()")[0] anime.detail_page_url = meta.xpath("./a/@href")[ 0] # /show/5031.html ret.append(anime) return ret, resp.text
def get_detail(self, detail_page_url: str): resp = self.get(self._detail_api, params={"vid": detail_page_url}) if resp.status_code != 200 or resp.json()["code"] != 1: logger.warning( f"Response error: {resp.status_code} {self._search_api}") return AnimeDetailInfo() detail = resp.json().get("data") # 视频详情信息 anime_detail = AnimeDetailInfo() anime_detail.title = detail["name"] anime_detail.cover_url = detail["pic"] anime_detail.desc = detail["label"] anime_detail.category = detail["type"] vc = VideoCollection() vc.name = "视频列表" video_set = dict(detail["playUrl"]) for name, url in video_set.items(): vc.append(Video(name, url)) anime_detail.append(vc) return anime_detail
def get_go_event(tags): ''' Returns a GO Event object, by looking for a tag like `OP-<event_id>` or None if there is not a valid OP- tag on the Position ''' event = None for tag in tags: if tag['name'].startswith('OP-'): event_id = tag['name'].replace('OP-', '').strip() try: event_id_int = int(event_id) except: logger.warning('%s tag is not a valid OP- tag' % event_id) continue try: event = Event.objects.get(id=event_id_int) except: logger.warning('Emergency with ID %d not found' % event_id_int) continue return event return event
def delete_es_index(instance): ''' instance needs an es_id() ''' if ES_CLIENT and ES_PAGE_NAME: # To make sure it doesn't run for tests if hasattr(instance, 'es_id'): try: deleted, errors = bulk(client=ES_CLIENT, actions=[{ '_op_type': 'delete', '_index': ES_PAGE_NAME, '_type': 'page', '_id': instance.es_id() }]) logger.info(f'Deleted {deleted} records') log_errors(errors) except Exception: logger.error( 'Could not reach Elasticsearch server or index was already missing.' ) else: logger.warning('instance does not have an es_id() method')
def parse_country(self, gec_code, country_name): # If gec_code has a mapping then we use that Country straight gec = GEC_CODES.filter(code=gec_code).first() if gec: return gec.country # Otherwise gec_code must be an ISO code, but we're using country_name as a backup check if len(gec_code) == 2: # Filter for 'Country' types only country = Country.objects.filter(iso__iexact=gec_code, record_type=1).first() if country is None: country = Country.objects.filter( name__iexact=country_name).first() else: country = Country.objects.filter(name__iexact=country_name).first() if not country: logger.warning( f'Could not find Country with: {gec_code} OR {country_name}') return country
def show_ioc(): try: config['mongo_url'] except: start() myclient = pymongo.MongoClient(config['mongo_url'], connect=False) try: mydb = myclient["azkaban"] except: logger.warning("无 IOC 插件") return "no ioc plug" coll_names = mydb.list_collection_names(session=None) db_info = {} db_info['data'] = [] db_info['sum'] = 0 for coll in coll_names: db = mydb[coll] info = {} info['name'] = coll info['count'] = db.find().count() db_info['sum'] += info['count'] db_info['data'].append(info) return json.dumps(db_info)
def change_setting(settings): with open('./ThirPath/marioips/bin/senteve.sh', 'r') as script_senteve: old_base_settings = script_senteve.read() max_logfile_num = re.findall(r'-ge (.*?) ]', old_base_settings, re.S)[0] heartbeat_time = re.findall(r'sleep (.*?);', old_base_settings, re.S)[0] new_base_settings = old_base_settings.replace( max_logfile_num, settings['max_logfile_num']).replace(heartbeat_time, settings['heartbeat_time']) with open('./ThirPath/marioips/bin/senteve.sh', 'w') as script_senteve: script_senteve.write(new_base_settings) # with open('./ThirPath/marioips/marioips.yaml','r') as marioips_yaml: # old_mario_setting = marioips_yaml.read() # save_pcap = re.findall(r'pcap-log:.+?enabled: (.*?) #setting save_pcap',old_mario_setting,re.DOTALL)[0] # pcap_size = re.findall(r'\slimit: (.*?b)',old_mario_setting)[0] # save_file = re.findall(r'file-store:.+?enabled: (.*?) #setting save_file',old_mario_setting,re.DOTALL)[0] # new_mario_settings = old_mario_setting.replace(save_pcap,settings['save_pcap']).replace(pcap_size,settings['pcap_size']).replace(save_file,settings['save_file']) # with open('./ThirPath/marioips/marioips.yaml','w') as marioips_yaml: # marioips_yaml.write(new_mario_settings) logger.warning("配置文件修改 {}".format(settings)) config['update_setting_time'] = int(time.time()) return "修改成功"
def detect_video_format(self) -> str: """判断视频真正的格式, url 可能没有视频后缀""" # 尝试从 url 提取后缀 url = self._get_real_url() try: ext = url.split("?")[0].split(".")[-1].lower() if ext in ["mp4", "flv"]: return ext if ext == "m3u8": return "hls" except (IndexError, AttributeError): pass # 视频的元数据中包含了视频的格式信息, 在视频开头寻找十六进制标识符推断视频格式 format_hex = { "mp4": ["69736F6D", "70617663", "6D703432", "4D50454734", "4C617666"], "flv": ["464C56"], "hls": ["4558544D3355"] } _, data_iter = self._get_stream_from_server(0, 512) if not data_iter: logger.warning("Could not get video stream from server") return "unknown" logger.debug("Detecting video format from binary stream") video_meta = next(data_iter).hex().upper() for format_, hex_list in format_hex.items(): for hex_sign in hex_list: if hex_sign in video_meta: logger.debug(f"Video format: {format_}") return format_ logger.error("Could not detect video format from stream") logger.debug("Video raw binary stream (512byte):") logger.debug(video_meta) return "unknown"
def handle(self, *args, **options): # get latest filename = get_dbfile() # numeric details records details_rc = extract_table(filename, 'EW_Report_NumericDetails') # check for 1 record for each field report fids = [r['ReportID'] for r in details_rc] if len(set(fids)) != len(fids): raise Exception( 'More than one NumericDetails record for a field report') # numeric details records details_gov = extract_table(filename, 'EW_Report_NumericDetails_GOV') # check for 1 record for each field report fids = [r['ReportID'] for r in details_gov] if len(set(fids)) != len(fids): raise Exception( 'More than one NumericDetails record for a field report') # information info_table = extract_table(filename, 'EW_Report_InformationManagement') fids = [r['ReportID'] for r in info_table] if len(set(fids)) != len(fids): raise Exception( 'More than one InformationManagement record for a field report' ) # ## many-to-many # actions taken actions_national = extract_table(filename, 'EW_Report_ActionTakenByRedCross') actions_foreign = extract_table(filename, 'EW_Report_ActionTakenByPnsRC') actions_federation = extract_table( filename, 'EW_Report_ActionTakenByFederationRC') # source types source_types = extract_table(filename, 'EW_lofSources') for s in source_types: SourceType.objects.get_or_create( pk=s['SourceID'], defaults={'name': s['SourceName']}) source_table = extract_table(filename, 'EW_Reports_Sources') # disaster response dr_table = extract_table(filename, 'EW_DisasterResponseTools') # check for 1 record for each field report fids = [r['ReportID'] for r in dr_table] if len(set(fids)) != len(fids): raise Exception( 'More than one DisasterResponseTools record for a field report' ) # contacts contacts = extract_table(filename, 'EW_Report_Contacts') # field report reports = extract_table(filename, 'EW_Reports') rids = [r.rid for r in FieldReport.objects.all()] num_reports_created = 0 logger.info('%s reports in database' % len(reports)) for i, report in enumerate(reports): # Skip reports that we've already ingested. # We don't have to update them because field reports can't be updated in DMIS. rid = report['ReportID'] if rid in rids: continue report_name = report['Summary'] report_description = report['BriefSummary'] report_dtype = DisasterType.objects.get( pk=PK_MAP[report['DisasterTypeID']]) record = { 'rid': rid, 'summary': report_name, 'description': report_description, 'dtype': report_dtype, 'status': report['StatusID'], 'request_assistance': report['GovRequestsInternAssistance'], 'actions_others': report['ActionTakenByOthers'], 'report_date': datetime.strptime(report['Inserted'], REPORT_DATE_FORMAT).replace(tzinfo=pytz.utc), } details = fetch_relation(details_rc, report['ReportID']) assert (len(details) <= 1) if len(details) > 0: details = details[0] record.update({ 'num_injured': details['NumberOfInjured'], 'num_dead': details['NumberOfCasualties'], 'num_missing': details['NumberOfMissing'], 'num_affected': details['NumberOfAffected'], 'num_displaced': details['NumberOfDisplaced'], 'num_assisted': details['NumberOfAssistedByRC'], 'num_localstaff': details['NumberOfLocalStaffInvolved'], 'num_volunteers': details['NumberOfVolunteersInvolved'], 'num_expats_delegates': details['NumberOfExpatsDelegates'] }) details = fetch_relation(details_gov, report['ReportID']) assert (len(details) <= 1) if len(details) > 0: details = details[0] record.update({ 'gov_num_injured': details['NumberOfInjured_GOV'], 'gov_num_dead': details['NumberOfDead_GOV'], 'gov_num_missing': details['NumberOfMissing_GOV'], 'gov_num_affected': details['NumberOfAffected_GOV'], 'gov_num_displaced': details['NumberOfDisplaced_GOV'], 'gov_num_assisted': details['NumberOfAssistedByGov_GOV'] }) info = fetch_relation(info_table, report['ReportID']) if len(info) > 0: info = {k: '' if v is None else v for k, v in info[0].items()} record.update({ 'bulletin': { '': 0, 'None': 0, 'Planned': 2, 'Published': 3 }[info['InformationBulletin']], 'dref': { '': 0, 'No': 0, 'Planned': 2, 'Yes': 3 }[info['DREFRequested']], 'dref_amount': 0 if info['DREFRequestedAmount'] == '' else float( info['DREFRequestedAmount']), 'appeal': { '': 0, 'Planned': 2, 'Yes': 3, 'NB': 0, 'No': 0, 'YES': 3 }[info['EmergencyAppeal']], 'appeal_amount': 0 if info['EmergencyAppealAmount'] == '' else float( info['EmergencyAppealAmount']), }) # disaster response response = fetch_relation(dr_table, report['ReportID']) if len(response) > 0: response = { k: '' if v is None else v for k, v in response[0].items() } record.update({ 'rdrt': { '': 0, 'No': 0, 'Yes': 3, 'Planned/Requested': 2 }[response['RDRT']], 'fact': { '': 0, 'No': 0, 'Yes': 3, 'Planned/Requested': 2 }[response['FACT']], 'eru_relief': { '': 0, 'Yes': 3, 'Planned/Requested': 2, 'No': 0 }[response['ERU']] }) field_report = FieldReport(**record) # Create an associated event object event_record = { 'name': report_name if len(report_name) else report_dtype.name, 'summary': report_description, 'dtype': report_dtype, 'disaster_start_date': datetime.utcnow().replace(tzinfo=timezone.utc), 'auto_generated': True, 'auto_generated_source': SOURCES['report_ingest'], } event = Event(**event_record) event.save() field_report.event = event field_report.save() num_reports_created = num_reports_created + 1 try: country = Country.objects.select_related().get( pk=report['CountryID']) except ObjectDoesNotExist: logger.warning('Could not find a matching country for %s' % report['CountryID']) country = None if country is not None: field_report.countries.add(country) event.countries.add(country) if country.region is not None: # No need to add a field report region, as that happens through a trigger. field_report.regions.add(country.region) event.regions.add(country.region) # ## add items with foreignkeys to report # national red cross actions actions = fetch_relation(actions_national, report['ReportID']) if len(actions) > 0: txt = ' '.join( [a['Value'] for a in actions if a['Value'] is not None]) act = ActionsTaken(organization='NTLS', summary=txt, field_report=field_report) act.save() for pk in [a['ActionTakenByRedCrossID'] for a in actions]: act.actions.add(*Action.objects.filter(pk=pk)) # foreign red cross actions actions = fetch_relation(actions_foreign, report['ReportID']) if len(actions) > 0: txt = ' '.join( [a['Value'] for a in actions if a['Value'] is not None]) act = ActionsTaken(organization='PNS', summary=txt, field_report=field_report) act.save() for pk in [a['ActionTakenByRedCrossID'] for a in actions]: act.actions.add(*Action.objects.filter(pk=pk)) # federation red cross actions actions = fetch_relation(actions_federation, report['ReportID']) if len(actions) > 0: txt = ' '.join( [a['Value'] for a in actions if a['Value'] is not None]) act = ActionsTaken(organization='FDRN', summary=txt, field_report=field_report) act.save() for pk in [a['ActionTakenByRedCrossID'] for a in actions]: act.actions.add(*Action.objects.filter(pk=pk)) # sources sources = fetch_relation(source_table, report['ReportID']) for s in sources: spec = '' if s['Specification'] is None else s['Specification'] Source.objects.create( stype=SourceType.objects.get(pk=s['SourceID']), spec=spec, field_report=field_report) # disaster response response = fetch_relation(dr_table, report['ReportID']) # contacts contact = fetch_relation(contacts, report['ReportID']) if len(contact) > 0: # make sure just one contacts record assert (len(contact) == 1) contact = contact[0] fields = [ 'Originator', 'Primary', 'Federation', 'NationalSociety', 'MediaNationalSociety', 'Media' ] for f in fields: if contact_is_valid(contact, f): FieldReportContact.objects.create( ctype=f, name=contact['%sName' % f], title=contact['%sFunction' % f], email=contact['%sContact' % f], field_report=field_report, ) total_reports = FieldReport.objects.all() logger.info('%s reports created' % num_reports_created) logger.info('%s reports in database' % total_reports.count()) # org type mapping org_types = { '1': 'NTLS', '2': 'DLGN', '3': 'SCRT', '4': 'ICRC', } last_login_threshold = timezone.now() - timedelta(days=365) # add users user_records = extract_table(filename, 'DMISUsers') processed_users = 0 for i, user_data in enumerate(user_records): if user_data['LoginLastSuccess'] == '': continue last_login = datetime.strptime( user_data['LoginLastSuccess'], REPORT_DATE_FORMAT, ) last_login = pytz.UTC.localize(last_login) # skip users who haven't logged in for a year if last_login < last_login_threshold: continue try: user = User.objects.get(username=user_data['UserName']) except ObjectDoesNotExist: user = None if user is None: name = user_data['RealName'].split() first_name = name[0] last_name = ' '.join(name[1:]) if len(name) > 1 else '' user = User.objects.create( username=user_data['UserName'], first_name=first_name if len(first_name) <= 30 else '', last_name=last_name if len(last_name) <= 30 else '', email=user_data['EmailAddress'], last_login=last_login, ) user.set_password(user_data['Password']) user.is_staff = True if user_data[ 'UserIsSysAdm'] == '1' else False # set user profile info user.profile.org = user_data['OrgTypeSpec'] if len( user_data['OrgTypeSpec']) <= 100 else '' user.profile.org_type = org_types.get(user_data['OrgTypeID']) # print(i, user_data['CountryID']) # - for debug purposes. Check go-api/data/Countries.csv for details. if user_data['CountryID'] in ['275', '281']: user_data[ 'CountryID'] = '47' # Hong Kong or Taiwan should be handled as China. Macao (279) is other case. elif user_data['CountryID'] in ['284']: user_data[ 'CountryID'] = '292' # Zone Central and West Africa -> Central Africa Country Cluster user.profile.country = Country.objects.get( pk=user_data['CountryID']) user.profile.city = user_data['City'] if len( user_data['City']) <= 100 else '' user.profile.department = user_data['Department'] if len( user_data['Department']) <= 100 else '' user.profile.position = user_data['Position'] if len( user_data['Position']) <= 100 else '' user.profile.phone_number = user_data['PhoneNumberProf'] if len( user_data['PhoneNumberProf']) <= 100 else '' user.save() processed_users = processed_users + 1 logger.info('%s updated active user records' % processed_users)
def sync_open_positions(molnix_positions, molnix_api, countries): molnix_ids = [p['id'] for p in molnix_positions] warnings = [] messages = [] successful_creates = 0 successful_updates = 0 for position in molnix_positions: event = get_go_event(position['tags']) country = get_go_country(countries, position['country_id']) if not country: warning = 'Position id %d does not have a valid Country' % ( position['id']) logger.warning(warning) warnings.append(warning) continue # If no valid GO Emergency tag is found, skip Position if not event: warning = 'Position id %d does not have a valid Emergency tag.' % position[ 'id'] logger.warning(warning) warnings.append(warning) continue go_alert, created = SurgeAlert.objects.get_or_create( molnix_id=position['id']) # We set all Alerts coming from Molnix to RR / Alert go_alert.atype = SurgeAlertType.RAPID_RESPONSE go_alert.category = SurgeAlertCategory.ALERT # print(json.dumps(position, indent=2)) go_alert.molnix_id = position['id'] go_alert.message = position['name'] go_alert.molnix_status = position['status'] go_alert.event = event go_alert.country = country go_alert.opens = get_datetime(position['opens']) go_alert.closes = get_datetime(position['closes']) go_alert.start = get_datetime(position['start']) go_alert.end = get_datetime(position['end']) go_alert.is_active = True go_alert.save() add_tags_to_obj(go_alert, position['tags']) if created: successful_creates += 1 else: successful_updates += 1 # Find existing active alerts that are not in the current list from Molnix existing_alerts = SurgeAlert.objects.filter(is_active=True).exclude( molnix_id__isnull=True) existing_alert_ids = [e.molnix_id for e in existing_alerts] inactive_alerts = list(set(existing_alert_ids) - set(molnix_ids)) # Mark alerts that are no longer in Molnix as inactive for alert in SurgeAlert.objects.filter(molnix_id__in=inactive_alerts): # We need to check the position ID in Molnix # If the status is "unfilled", we don't mark the position as inactive, # just set status to unfilled position = molnix_api.get_position(alert.molnix_id) if not position: warnings.append('Position id %d not found in Molnix API' % alert.molnix_id) if position and position['status'] == 'unfilled': alert.molnix_status = position['status'] else: alert.is_active = False alert.save() marked_inactive = len(inactive_alerts) messages = [ 'Successfully created: %d' % successful_creates, 'Successfully updated: %d' % successful_updates, 'Marked inactive: %d' % marked_inactive, 'No of Warnings: %d' % len(warnings) ] return messages, warnings, successful_creates
def handle(self, *args, **options): logger.info('Starting appeal document ingest') # v smoke test baseurl = 'https://www.ifrc.org/appeals/' # no more ...en/publications-and-reports... http = PoolManager( ) # stackoverflow.com/questions/36516183/what-should-i-use-to-open-a-url-instead-of-urlopen-in-urllib3 smoke_response = http.request('GET', baseurl) joy_to_the_world = False if smoke_response.status == 200: joy_to_the_world = True # We log the success later, when we know the numeric results. else: body = { "name": "ingest_appeal_docs", "message": f'Error ingesting appeals_docs on url: {baseurl}, error_code: {smoke_response.code}', "status": CronJobStatus.ERRONEOUS } CronJob.sync_cron(body) # ^ smoke test if options['fullscan']: # If the `--fullscan` option is passed (at the end of command), check ALL appeals. Runs an hour! print('Doing a full scan of all Appeals') qset = Appeal.objects.all() else: # By default, only check appeals for the past 3 months where Appeal Documents is 0 now = datetime.now().replace(tzinfo=timezone.utc) six_months_ago = now - relativedelta(months=6) # This was the original qset, but it wouldn't get newer docs for the same Appeals # qset = Appeal.objects.filter(appealdocument__isnull=True).filter(end_date__gt=six_months_ago) qset = Appeal.objects.filter(end_date__gt=six_months_ago) # qset = Appeal.objects.filter(code='Something') # could help debug # First get all Appeal Codes appeal_codes = [a.code for a in qset] # Modify code taken from https://pastebin.com/ieMe9yPc to scrape `publications-and-reports` and find # Documents for each appeal code output = [] page_not_found = [] for code in appeal_codes: code = code.replace(' ', '') docs_url = f'{baseurl}?appeal_code={code}' # no more ac={code}&at=0&c=&co=&dt=1&f=&re=&t=&ti=&zo= try: http = PoolManager() response = http.request('GET', docs_url) except Exception: # if we get an error fetching page for an appeal, we ignore it page_not_found.append(code) continue soup = BeautifulSoup(response.data, "lxml") div = soup.find('div', class_='row appeals-view__row') for t in div.findAll('tbody'): output = output + self.makelist(t) # Once we have all Documents in output, we add all missing Documents to the associated Appeal not_found = [] existing = [] created = [] acodes = list(set([a['appealcode'] for a in output])) for code in acodes: try: appeal = Appeal.objects.get(code=code) except ObjectDoesNotExist: not_found.append(code) continue existing_docs = list(appeal.appealdocument_set.all()) docs = [a for a in output if code == a['appealcode']] for doc in docs: if doc['url'].startswith('/'): # can be /docs or /sites also doc['url'] = f'https://www.ifrc.org{doc["url"]}' # href only contains relative path to the document if it's available at the ifrc.org site exists = len([ a for a in existing_docs if a.document_url == doc['url'] ]) > 0 if exists: existing.append(doc['url']) else: try: created_at = self.parse_date(doc['date']) except Exception: created_at = None AppealDocument.objects.create( document_url=doc['url'], name=doc[ 'appealtype'], # not ['name'], because this is the appeal's name created_at=created_at, appeal=appeal, ) created.append(doc['url']) text_to_log = [] text_to_log.append('%s appeal documents created' % len(created)) text_to_log.append('%s existing appeal documents' % len(existing)) text_to_log.append('%s pages not found for appeal' % len(page_not_found)) for t in text_to_log: logger.info(t) # body = { "name": "ingest_appeal_docs", "message": t, "status": CronJobStatus.SUCCESSFUL } # CronJob.sync_cron(body) if len(not_found): t = '%s documents without appeals in system' % len(not_found) logger.warning(t) body = { "name": "ingest_appeal_docs", "message": t, "num_result": len(not_found), "status": CronJobStatus.WARNED } CronJob.sync_cron(body) if (joy_to_the_world): body = { "name": "ingest_appeal_docs", "message": (f'Done ingesting appeals_docs on url {baseurl},' f' {len(created)} appeal document(s) were created,' f' {len(existing)} already exist,' f' {len(page_not_found)} not found'), "num_result": len(created), "status": CronJobStatus.SUCCESSFUL } CronJob.sync_cron(body)
def handle(self, *args, **options): logger.info('Starting Deployment ingest') # url = 'https://proxy.hxlstandard.org/data.json?url=https%3A%2F%2Fdocs.google.com%2Fspreadsheets%2Fd%2F1CBvledFYc_uwlvHTvJE0SYS7_mPGU2L-zhrqbB4KNIA%2Fedit%23gid%3D0&header-row=1' # not enough. url = 'https://proxy.hxlstandard.org/data.json?tagger-match-all=on&' \ + 'tagger-01-header=year&' \ + 'tagger-01-tag=%23a1&' \ + 'tagger-02-header=%2Aappeal+code&' \ + 'tagger-02-tag=%23a2&' \ + 'tagger-03-header=region&' \ + 'tagger-03-tag=%23a3&' \ + 'tagger-04-header=country&' \ + 'tagger-04-tag=%23a4&' \ + 'tagger-05-header=location&' \ + 'tagger-05-tag=%23a5&' \ + 'tagger-06-header=disaster+type&' \ + 'tagger-06-tag=%23a6&' \ + 'tagger-07-header=%2Adisaster+name&' \ + 'tagger-07-tag=%23a7&' \ + 'tagger-08-header=%2Aname&' \ + 'tagger-08-tag=%23a8&' \ + 'tagger-09-header=%2Adeploying+ns+%2F+ifrc+office&' \ + 'tagger-09-tag=%23a9&' \ + 'tagger-10-header=%2Agender&' \ + 'tagger-10-tag=%23b1&' \ + 'tagger-11-header=language&' \ + 'tagger-11-tag=%23b2&' \ + 'tagger-12-header=%2Aposition&' \ + 'tagger-12-tag=%23b3&' \ + 'tagger-13-header=%2Atype&' \ + 'tagger-13-tag=%23b4&' \ + 'tagger-14-header=supported+by+ns&' \ + 'tagger-14-tag=%23b5&' \ + 'tagger-15-header=availability&' \ + 'tagger-15-tag=%23b6&' \ + 'tagger-16-header=%2Aexp+start+date&' \ + 'tagger-16-tag=%23b7&' \ + 'tagger-17-header=%2Aexp+duration&' \ + 'tagger-17-tag=%23b8&' \ + 'tagger-18-header=%2Aalert&' \ + 'tagger-18-tag=%23b9&' \ + 'tagger-19-header=deployment+message&' \ + 'tagger-19-tag=%23c1&' \ + 'tagger-20-header=%2Astart+of+mission&' \ + 'tagger-20-tag=%23c2&' \ + 'tagger-21-header=%2Aend+of+mission&' \ + 'tagger-21-tag=%23c3&' \ + 'tagger-22-header=deployment+duration&' \ + 'tagger-22-tag=%23c4&' \ + 'tagger-23-header=deployed&' \ + 'tagger-23-tag=%23c5&' \ + 'tagger-24-header=rotation&' \ + 'tagger-24-tag=%23c6&' \ + 'tagger-25-header=comments&' \ + 'tagger-25-tag=%23c7&' \ + 'url=https%3A%2F%2Fdocs.google.com%2Fspreadsheets%2Fd%2F1CBvledFYc_uwlvHTvJE0SYS7_mPGU2L-zhrqbB4KNIA%2Fedit%23gid%3D0&' \ + 'header-row=1' response = requests.get(url) if response.status_code != 200: logger.error('Error querying Deployment HXL API') raise Exception('Error querying Deployment HXL API') records = response.json() # some logging variables not_found = [] existing = [] created = [] columns = [a.replace('*', '').replace(' ', '') for a in records[0]] # ['Year', 'AppealCode', 'Region', 'Country', 'Location', 'Disastertype', 'Disastername', 'Name', 'DeployingNS/IFRCOffice', 'Gender', 'Language', 'Position', 'Type', 'SupportedbyNS', 'Availability', 'Expstartdate', 'expduration', 'Alert', 'Deploymentmessage', 'Startofmission', 'Endofmission', 'DeploymentDuration', 'Deployed', 'Rotation', 'Comments'] # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 # if empty name -> Alert, otherwise -> Deployment # OBSOLETE: # # group records by appeal code # acodes = list(set([a[2] for a in records[2:]])) # for code in acodes: # try: # appeal = Appeal.objects.get(code=code) # except ObjectDoesNotExist: # not_found.append(code) # continue # # existing_docs = list(appeal.appealdocument_set.all()) # docs = [a for a in records if a[2] == code] # for doc in docs: # exists = len([a for a in existing_docs if a.document_url == doc[0]]) > 0 # if exists: # existing.append(doc[0]) # else: # try: # created_at = self.parse_date(doc[5]) # except: # created_at = None # # AppealDocument.objects.create( # document_url=doc[0], # name=doc[4], # created_at=created_at, # appeal=appeal, # ) # created.append(doc[0]) logger.info('%s Deployments created' % len(created)) logger.info('%s existing Deployments' % len(existing)) logger.warning('%s documents without appeals in system' % len(not_found))
def clear(self): logger.warning( f"{self.__class__.__name__} cleaning, object in total: {len(self._db)}" ) self._db.clear()