def downLoadStory(storyno, urls): requests.adapters.DEFAULT_RETRIES = 5 s = requests.session() s.keep_alive = False stroy_text = {} for url in urls: logging.info(url) flag = True while flag: try: user_agent = user_Agent() res = s.get(url, headers=user_agent) flag = False # print(res.headers["User-Agent"]) # logging.info(res.headers["User-Agent"]) except Exception as e: logging.info("- - 连接失败,正在重连- ") logging.error(e) continue text_reg = re.compile( r'<div class="articlecon font-large"><p>(.+)<br/><br/></p></div>') result = text_reg.findall(res.text) new_result = result[0].replace("<br/>", "") new_result.lstrip("") new_result = re.sub(' +', '\n ', new_result) db.insertStory(url, new_result, storyno) return stroy_text
def flush_data(): global last_flush_time db_session = db_session_mk() time_now = datetime.now().strftime("%Y-%m-%d 00:00:00") add_ls = [] for name in data_dct: record = db_session.query(AMZTaskStatistic)\ .filter(and_(AMZTaskStatistic.name==name, AMZTaskStatistic.time==time_now))\ .first() if not record: amz_stats = AMZTaskStatistic() amz_stats.name = name amz_stats.count = data_dct[name] amz_stats.time = time_now add_ls.append(amz_stats) else: record.count += data_dct[name] if add_ls: db_session.bulk_save_objects(add_ls) try: db_session.commit() except: logger.error("flush db error") db_session.rollback() else: last_flush_time = time.time() finally: data_dct.clear() db_session.close()
def start(): manager = Mymanager() manager.start() model_filename = 'model_data/mars-small128.pb' CreateBoxEncoder = manager.CreateBoxEncoder(model_filename, batch_size=1) yolo = manager.YOLO() video_mes = VIDEO_NAME if ENVIRO and os.environ[DOCKER_ID]: video_mes = video_mes[ int(os.environ[DOCKER_ID]) * PROCESS_NUM - PROCESS_NUM:int(os.environ[DOCKER_ID]) * PROCESS_NUM] logger.debug('video_url size: {} Total is {}'.format(len(video_mes), video_mes)) gpu_proccess = PROCESS_NUM if 0 < len(video_mes) < gpu_proccess: gpu_proccess = len(video_mes) logger.debug('proccess num {}'.format(gpu_proccess)) if len(video_mes) > 0: # urls = [video_mes[i:i + step] for i in range(0, len(video_mes), step)] logger.debug('proccess loading') urls = video_mes with ProcessPoolExecutor(max_workers=gpu_proccess) as pool: for url in urls: pool.submit(rec_start, url, yolo, CreateBoxEncoder) else: logger.error('No stream was read') logger.error('game over')
def event_LOCATION(self, msg): # Need log to record location information of users openid = msg['FromUserName'] Latitude = msg['Latitude'] Longitude = msg['Longitude'] #print Latitude,Longitude user = self.db.get_openid(openid) if not user: user = self.get_subscribed_user(openid) #insert info to mysql self.db.insert_item(user) if user: # self.db.execute("update user set latitude=%s,longitude = %s" # "where openid = %s" # , Latitude, Longitude, openid) valueDict = {} valueDict['latitude'] = Latitude valueDict['longitude'] = Longitude self.db.update_item(openid, valueDict) logger.info( "update location of user, openid = " + openid + ", latitude = " + Latitude + ", Longitude = " + Longitude) else: logger.error("get user from weixin error, openid = " + openid)
def producer(url, q): while True: i = 0 logger.debug('rtmp: {} read+'.format(url)) video_capture = cv2.VideoCapture(url) ret_val, image = video_capture.read() if False is video_capture.isOpened() or False is ret_val: logger.warning('{} url is: {} {}'.format(url, video_capture.isOpened(), ret_val)) continue logger.debug('rtmp: {} load finish'.format(url)) while True: i += 1 ret, frame = video_capture.read() if not ret: break if i % TIMES != 0 or image is None: continue if not FX == FY == 1: try: logger.debug('{}: {} fps image resize'.format(url, i)) frame = cv2.resize(frame, (0, 0), fx=FX, fy=FY) except Exception as e: logger.error(e) logger.error('image is bad') break if q.full(): q.get() q.put(frame) logger.info('{} image save to queue {}'.format(i, q.qsize()))
def getStoryContentUrl(storyno, url): storyContentNum = config.STORYNUM requests.adapters.DEFAULT_RETRIES = 5 s = requests.session() s.keep_alive = False flag = True # logging.info(url) while flag: try: user_agent = user_Agent() res = requests.get(url, headers=user_agent) flag = False # res.request.headers 获取设置的user_agent except Exception as e: logging.error(e) continue reg = re.compile(r'http://m.xsqishu.com(.+).html') identical = reg.findall(url)[0] #同一小说相同的部分 storyurlreg = re.compile(r'<a href=(%s/\d+).html><li>' % (identical)) #获取小说url storyUrls = storyurlreg.findall(res.text) newstoryUrls = [] if storyContentNum == False: storyContentNum = len(storyUrls) for i in storyUrls[0:storyContentNum - 1]: reg = re.compile(r'%s/(\d+)' % (identical)) chapter_num = reg.findall(i)[0] url = "http://m.xsqishu.com" + i + ".html" new_chapter_num = storyno + str(chapter_num.zfill(5)) newstoryUrls.append(url) print(storyno, new_chapter_num, url) db.insetStoryContentUrl(storyno, new_chapter_num, url) return newstoryUrls
def post_text(self, msg): try: openid = msg['FromUserName'] keyword = msg['Content'].strip().encode('utf-8') logger.info("get text: user openid = " + str(openid) + ";msg = " + keyword) if keyword.startswith("bu"): #补数据模式 today = keyword[2:12] month = keyword[2:9] info = content_ope.record_msg(openid, keyword[12:], month, today) elif not keyword.startswith("hi"): info = "每日记录要以hi开头,我才记录哦" else: today = self.curr_date month = self.curr_month info = content_ope.record_msg(openid, keyword[2:], month, today) url = Const.TRACK_URL % (openid, month) info += "\n\n" + "<a href='" + url + "'>本月记录</a>" return info except Exception, e: logger.error(e) return "不好意思,发生了一点错误,请联系我的主人"
def get_page_url(): url = config.url db = MySQL() requests.adapters.DEFAULT_RETRIES = 5 s = requests.session() s.keep_alive = False flag = True while flag: try: user_agent = user_Agent() res = requests.get(url, headers=user_agent) flag = False # res.request.headers 获取设置的user_agent except Exception as e: logging.error(e) continue max_index_reg = re.compile(r'<a id="pt_mulu">\d+/(\d+)</a>') max_index = max_index_reg.findall(res.text)[0] logging.info(max_index) already_index_count = db.getStoryPageIndexCount() if already_index_count < int(max_index): for i in range(already_index_count + 1, int(max_index) + 1): if i == 1: page_url = "http://m.xsqishu.com/newbook/index.html" else: page_url = "http://m.xsqishu.com/newbook/index_" + str( i) + ".html" db.inertStoryPageIndex(i, page_url) msg = "新增第" + str(i) + "页" logging.info(msg) else: logging.info("当前总页数未更新")
def run(self): try: # 读取待转换的书籍信息 wait_converts_info = self.db.convert_get(self.convert_id) if not wait_converts_info: raise Exception(u'未找到待转换的书籍信息') # 读取书籍所需图片 book_img_info = self.db.book_img_get(self.book_id) if book_img_info: # 更新书籍所需图片表信息 self.db.book_img_update_local_path(self.book_id, self.book_images_task.get()) # 读取书籍信息 book_info = self.db.book_get_by_book_id(self.book_id) ## 调用转换功能 out_file_path = proc_helper.convert( str(wait_converts_info['book_html_local_path']), self.out_dir, book_info['book_author'], book_info['book_cover_local_path']) if out_file_path == None: # 转换失败 self.db.convert_update_status(globals.STATUS.get('error'), self.convert_id) raise Exception(u'rai转换html to mobi失败') # 转换成功,修改状态,添加书籍输出路径 self.db.convert_update_status(globals.STATUS.get('complete'), self.convert_id, out_file_path) # 修改书籍文件路径 self.db.book_update_file_path(self.book_id, out_file_path) # 修改待发送邮件附件信息 self.db.email_update_attach_file(self.email_id, out_file_path) # 读取待发送邮件信息 wait_email_info = self.db.email_get(self.email_id) if not wait_email_info: raise Exception(u'未找到待发送邮件信息,邮件ID:%s' % self.email_id) # 发送邮件 Api.send_mail(self.email_id, wait_email_info['email_attach_file'], str(wait_email_info['email_to_user']), str(wait_email_info['email_title']), str(wait_email_info['email_auth'])) # 发送邮件至个人邮箱to_private_email if self.to_private_email: private_email_id = RandomUtil.random32Str() self.db.email_add_full(private_email_id, self.to_private_email, str(wait_email_info['email_title']), str(wait_email_info['email_auth']), wait_email_info['email_attach_file']) MailTask.send.delay(private_email_id, wait_email_info['email_attach_file'], self.to_private_email, str(wait_email_info['email_title']), str(wait_email_info['email_auth'])) except Exception as err: logger.error(u'异步线程出错,转换ID:%s,错误信息:%s', self.convert_id, err) exit(-1)
def on_failure(self, exc, task_id, args, kwargs, einfo): try: logger.error(u'发送邮件失败,celery task id: %s, 参数:%s, 错误信息:%s' % (task_id, str(args), str(exc))) db = Database() db.email_update_status(str(args[0]), globals.STATUS.get('error')) except Exception as e: logger.error(u'更新发送邮件状态异常,错误:%s,参数:%s' % (str(e), str(args)))
async def pub_to_nsq(address, topic, msg): url = "http://{}/pub".format(address) logger.info(url) async with ClientSession() as session: async with session.post(url, params="topic="+topic, json=msg) as resp: if resp.status != 200: logger.error("[pub to nsq error] topic: {}".format(topic)) return resp.status
def on_success(self, retval, task_id, args, kwargs): try: logger.info(u'发送邮件成功,参数:%s' % str(args)) # 更新发送邮件状态 db = Database() db.email_update_status(str(args[0]), globals.STATUS.get('complete')) except Exception as e: logger.error(u'更新发送邮件状态异常,错误:%s,参数:%s' % (str(e), str(args)))
def get_privacy(self, tag_list): api_names = [] api_descriptions = [] api2signature = {} tp = 0 fp = 0 pkg_name = "" classname = "" for i in range(0, len(tag_list)): tag = tag_list[i] if tag.name == 'h2': pkg_name = tag_list[i - 1].getText() classname = tag.getText().split(" ")[-1] is_method_section = False if tag.name == 'h3': des_text = tag.getText() if "Public Member Functions" in des_text or "Method Detail" in des_text or "方法详细资料" in des_text or "メソッドの詳細" in des_text: is_method_section = True if not is_method_section: continue for j in range(i + 1, len(tag_list)): next_tag = tag_list[j] if next_tag.name == 'h4': if j + 1 >= len(tag_list): break api_name = next_tag.getText() pre_tag = tag_list[j + 1] if pre_tag.name == "pre": signature = pre_tag.getText() else: continue api_names.append(api_name) self.apis.append(api_name) api2signature[api_name] = signature if j + 2 < len(tag_list) and tag_list[j + 2].name == "div": description = tag_list[j + 2].getText() # print("*******************") # print(api_name) # print(description) # print("===================") api_descriptions.append(description) else: api_descriptions.append("") for i in range(0, len(api_names)): api_name = api_names[i] api_description = api_descriptions[i] is_sensitive, privacy_item = check_api_by_class(classname, api_name) if is_sensitive: api_description = api_description.replace("\n", " ") self.sensitive_apis.append((pkg_name + "." + classname, api_name, privacy_item, api_description)) if pkg_name == "": logger.error(classname) fp = fp + 1 continue tp = tp + 1 return tp, fp
def get_story_urls(urls): downloadnum = config.DOWNLOADNUM db = MySQL() stroy_urls = {} downloadstoryno = [] for url in urls: requests.adapters.DEFAULT_RETRIES = 5 s = requests.session() s.keep_alive = False flag = True while flag: try: user_agent = user_Agent() res = requests.get(url, headers=user_agent) flag = False # res.request.headers 获取设置的user_agent except Exception as e: logging.error(e) continue url_reg = re.compile(r'<a href="/txt/(\d+).html">') allUrl = url_reg.findall(res.text) if downloadnum == False: downloadnum = len(allUrl) for i in allUrl[0:downloadnum]: story_url = "http://m.xsqishu.com/txt/" + i + ".html" stroy_urls[i] = story_url downloadstoryno.append(i) for num, compileurl in stroy_urls.items(): requests.adapters.DEFAULT_RETRIES = 5 s = requests.session() s.keep_alive = False flag = True while flag: try: user_agent = user_Agent() res = requests.get(compileurl, headers=user_agent) res.encoding = "gbk" flag = False # res.request.headers 获取设置的user_agent except Exception as e: logging.error(e) continue reg = re.compile(r'<a href="/book/(.+).html" class="bdbtn greenBtn">') url = reg.findall(res.text) story_title_reg = re.compile(r'<h1 class="title">(.+)</h1>') title = story_title_reg.findall(res.text)[0] download_url = "http://m.xsqishu.com/book/" + url[0] + ".html" if db.isExistStory(num): msg = "小说---" + title + "---已入库" logging.info(msg) else: db.inertStoryUrl(num, title, download_url) return downloadstoryno
def calculate_interpretation(status): if status == "known": return "Pathogenic" if status == "likely": return "Likely pathogenic" if status == "unknown": return "Uncertain significance" if status == "ambiguous": return "other" logger.error("Failed to resolve interpretation: %s", status) return ""
def _handle(self, data: bytes, source_addr: tuple) -> None: """Handle the received data. Parse received data to DNS message, loop up the requested domain name in local database or foreign DNS server and send the result back to the user. Args: data: Received data. source_addr: Source host addr. """ message = Message.from_bytes(data) question = message.Questions[0] if question is None: logger.error('{addr}Format error'.format(addr=source_addr)) return if question.QTYPE == RR_type.A: rdata = self._cache.get(question.get_QNAME(), 'A') if rdata is not None: if rdata != b'\x00\x00\x00\x00': logger.info('{addr}Found A of {name}'.format( name=question.get_QNAME(), addr=source_addr)) header = message.Header header.RA = b'\x01' header.QDCOUNT = b'\x00\x01' header.ANCOUNT = b'\x00\x01' header.QR = b'\x01' ttl = self._cache.get_ttl(question.get_QNAME()) \ if self._cache.get_ttl(question.get_QNAME()) != -1 else 0 answer = ResourceRecord(b'\xc0\x0c', RR_type.A, RR_class.IN, ttl, 4, self._cache.get(question.get_QNAME(), 'A')) response = Message(header, [question], [answer], [], []).get_bytes() else: logger.warning('{addr}Blocked {name}'.format( name=question.get_QNAME(), addr=source_addr)) header = message.Header header.RA = b'\x01' header.QDCOUNT = b'\x00\x00' header.ANCOUNT = b'\x00\x00' header.QR = b'\x01' header.RCODE = b'\x03' response = Message(header, [], [], [], []).get_bytes() else: logger.debug('{addr}Forward to remote DNS server: {name}'.format( name=question.get_QNAME(), addr=source_addr)) response = self._forward(data) else: logger.debug('{addr}Forward to remote DNS server: {name}'.format( name=question.get_QNAME(), addr=source_addr)) response = self._forward(data) self._socket.sendto(response, source_addr)
def redis_execute_wrapper(*args, **kwargs): while True: try: return func(*args, **kwargs) except redis.ConnectionError as e: logger.error('Redis ConnectionError') redis_client.connection_pool.disconnect() continue except redis.TimeoutError as e: logger.error('Redis TimeoutError') redis_client.connection_pool.disconnect() continue
async def initialize(self): while True: try: self._connection = await aio_pika.connect_robust(**self._conf) self._channel = await self._connection.channel() self._queue = await self._channel.declare_queue( self._queue_name, durable=True) except Exception as exc: logger.error("Connect error") logger.error(exc) else: break
def calculate_status(equivocal, copy_type): if copy_type == "amplification": if equivocal == "true": return "gain" return "amplification" if copy_type == "loss": if equivocal == "true": return "partial_loss" return "loss" if copy_type == "partial amplification": return "gain" logger.error("Failed to resolve copy type: %s, equivocal: %s", copy_type, equivocal) return ""
async def initialize(self): while True: try: self._connection = await aio_pika.connect_robust(**self._conf) self._channel = await self._connection.channel() await self._channel.set_qos(prefetch_count=self._qos) self._queue = await self._channel.declare_queue( self._queue_name, durable=True) except Exception as exc: logger.error("Connect error") logger.error(exc) else: await self._queue.consume(self._callback) break
def post_image(self, msg): #图片消息 try: openid = msg['FromUserName'] url = msg['PicUrl'] logger.info("get a photo: user openid = " + str(openid) + ";photo = " + url) today = self.curr_date month = self.curr_month info = content_ope.record_photo(openid, url, month, today) return info except Exception, e: logger.error(e) return "不好意思,发生了一点错误,请联系我的主人"
def post_default(self): msg = self.get_msg() # print msg try: msg_type = msg['MsgType'] if self.verification(): #add cookie self.set_secure_cookie("openid", msg["FromUserName"]) info = getattr(self, 'post_' + msg_type)(msg) else: info = u"Message verification failed" if info: logger.info("send a message to weixin, info = " + info) self.write(self.rep_text(msg, info)) except Exception, e: logger.error(e)
def get_story_url(urls): stroy_urls = [] download_urls = {} for url in urls: requests.adapters.DEFAULT_RETRIES = 5 s = requests.session() s.keep_alive = False flag = True while flag: try: user_agent = user_Agent() res = requests.get(url, headers=user_agent) flag = False # res.request.headers 获取设置的user_agent except Exception as e: logging.error(e) continue url_reg = re.compile(r'<a href="/txt/(\d+).html">') allUrl = url_reg.findall(res.text) for i in allUrl: story_url = "http://m.xsqishu.com/txt/" + i + ".html" stroy_urls.append(story_url) for compileurl in stroy_urls: requests.adapters.DEFAULT_RETRIES = 5 s = requests.session() s.keep_alive = False flag = True while flag: try: user_agent = user_Agent() res = requests.get(compileurl, headers=user_agent) res.encoding = "gbk" flag = False # res.request.headers 获取设置的user_agent except Exception as e: logging.error(e) continue reg = re.compile(r'<a href="/book/(.+).html" class="bdbtn greenBtn">') url = reg.findall(res.text) story_title_reg = re.compile(r'<h1 class="title">(.+)</h1>') title = story_title_reg.findall(res.text)[0] download_url = "http://m.xsqishu.com/book/" + url[0] + ".html" download_urls[title] = download_url msg = title + ":" + download_url logging.info(msg) return download_urls
def _proxy_used(self, proxy_id, is_succ, speed): """ 代理使用反馈 """ req = requests.post( self.used_url, json.dumps({ 'id': proxy_id, 'is_succ': str(is_succ), 'speed': speed, 'use_flag': self.use_flag })) if not req: logger.error('used post error, %s' % req) elif req.text.find('error') != -1: logger.error('Used post return error.')
def read_netcdf_mpi(fpath, nc_dict, root, format='NETCDF4'): data_dict = dict() if myrank == root: for key in nc_dict.keys(): if key not in ['dimension', 'variable']: logger.error("Error: wrong keyword of nc_dict, '%s')"%(key)) ncf = nc.Dataset(fpath, 'r', format='NETCDF4') for dname in nc_dict.get('dimension',[]): data_dict[dname] = len( ncf.dimensions[dname] ) for vname in nc_dict.get('variable',[]): data_dict[vname] = ncf.variables[vname][:] return comm.bcast(data_dict, root=root)
def get_url(url): requests.adapters.DEFAULT_RETRIES = 5 s = requests.session() s.keep_alive = False flag = True while flag: try: user_agent = user_Agent() res = requests.get(url, headers=user_agent) flag = False # res.request.headers 获取设置的user_agent except Exception as e: logging.error(e) continue url_reg = re.compile(r'<a href=(/book/22/69708/\d+).html><li>') allUrl = url_reg.findall(res.text) return allUrl
def event_subscribe(self, msg): openid = msg["FromUserName"] #get useinfo form weixin userFromWeixin = self.get_subscribed_user(openid) if not userFromWeixin: logger.error("cannot get userInfo of a subscribed user, openid = " + openid) else: user = self.db.get_openid(openid) #update if user: #update self.db.update_item(openid,userFromWeixin) logger.info("update a userInfo, user= "******"insert a userInfo, user= " + str(userFromWeixin)) return self.rep_follow(msg)
async def handle_worker(group, task): """Handle callback task """ tp = TaskProtocal(task) task_dct = tp.get_data() if 'extra' in task_dct and 'cb' in task_dct['extra']: url = task_dct['extra']['cb'].get('url') async with aiohttp.ClientSession(conn_timeout=7) as session: try: async with session.post( url, timeout=TIME_OUT, data=zlib.compress( json.dumps(task_dct).encode('utf-8'))) as resp: html = await resp.read() if resp.status != 200: logger.error('[%d] %s' % (resp.status, url)) except Exception as exc: logger.error('Request page fail : %s' % exc)
def post_default(self): msg = self.get_msg() # print msg try: msg_type = msg['MsgType'] if self.verification(): #add cookie self.set_secure_cookie("openid", msg["FromUserName"]) info = getattr(self, 'post_' + msg_type)(msg) else: info = u"Message verification failed" if info: logger.info("send a message to weixin, info = " + info) self.write(self.rep_text(msg, info)) else: self.write(self.rep_text(msg, u"不好意思,没处理好你的信息,请联系我的主人~")) except Exception, e: logger.error(e)
def get_illus(self, cxt_data): self.page.div()#class_='section' # 获取最大图片信息 orig = cxt_data.get('size').get('orig') # 获取中等[medium]图片信息 medium = self.get_cxt_pic(cxt_data) if medium == None: logger.error(u'获取图片信息失败') # 图片src medium_src = str(medium.get('src')) # 图片路径(格式:主目录/作者/书名标题/书籍大小/图片名称) cxt_image_path = '%s/%s' %(self.file_dir, medium_src[medium_src.rfind('/')+1:]) self.page.img(width=orig.get('width'), height=orig.get('height'), src=cxt_image_path) # 添加图片备注 legend = cxt_data.get('legend') legend_type = type(legend) if legend_type == str: self.page.label(str(legend), style='color:#555; font-size:.75em; line-height:1.5;') elif legend_type == dict: legend_data = legend.get('data') legend_data_paragraphs = legend_data.get('paragraphs') # [] for legend_data_paragraph in legend_data_paragraphs: desc_text = self.get_legend_paragraph_text(legend_data_paragraph) self.page.label(desc_text, style='color:#555; font-size:.75em; line-height:1.5;') elif legend_type == list: for legend_data in legend: l_type = legend_data.get('type') l_data = legend_data.get('data') if l_type == 'paragraph': desc_text = l_data.get('text') desc_text_type = type(desc_text) if desc_text_type == str: self.page.label(str(desc_text), style='color:#555; font-size:.75em; line-height:1.5;') elif desc_text_type == list: for desc_text_data in desc_text: self.page.label(str(desc_text_data.get('content')), style='font-weight:bold;font-size: .75em; color: #555;line-height: 1;font-style: normal;') else: logger.unknown(u'未知的内容:%s, 类型:%s' %(str(desc_text), str(desc_text_type))) else: logger.unknown(u'未知的内容:%s, 类型:%s' %(str(l_data), str(l_type))) self.page.div.close() return medium_src
def get(url: str, timeout=default_timeout, **kwargs): """ 发送GET请求 :param url: 请求地址 :param timeout: 超时时间 :param kwargs: 参数 :return: Response对象,当请求失败时抛出HttpRequestException异常 """ try: response = requests.get(url, headers=headers, timeout=timeout, params=kwargs.get('params')) if not response.status_code == 200: raise HttpRequestException('url={},status_code {} 状态异常'.format( url, response.status_code)) return response except Exception as e: logger.error('发送GET请求失败,url={},{}'.format(url, e.args)) raise HttpRequestException(e)
def post(url: str, data: dict, timeout=default_timeout): """ 发送POST请求 :param url: 请求地址 :param data: 参数 :param timeout: 超时时间 :return: 返回Response对象,请求失败时返回HttpRequestException异常 """ try: response = requests.post(url, headers=headers, data=data, timeout=timeout) if not response.status_code == 200: raise HttpRequestException('url={},status_code {} 状态异常'.format( url, response.status_code)) return response except Exception as e: logger.error('发送POST请求失败,url={},{}'.format(url, e.args)) raise HttpRequestException(e)
async def handle_worker(group, task): tp = TaskProtocal(task) f = tp.get_from() tid = tp.get_tid() step = tp.get_step() logger.info("ep: %s, tid: %s, step: %s" % (f, tid, step)) if tid not in flow_conf[FLOW_TASK_CONF]: logger.error("Task ID [%s] error" % tid) return task_ls = [] task_data = tp.get_data() if step+1 < len(flow_conf[FLOW_TASK_CONF][tid]): endpoint_name = flow_conf[FLOW_TASK_CONF][tid][step+1]['name'] next_tp = tp.new_task(task_data, next_step=True) next_tp.set_to(endpoint_name) task_ls.append(next_tp) for f_tid in flow_conf[FLOW_TASK_CONF][tid][step].get('fork', []): endpoint_name = flow_conf[FLOW_TASK_CONF][f_tid][0]['name'] fork_tp = tp.new_task(task_data, tid=f_tid) fork_tp.set_to(endpoint_name) task_ls.append(fork_tp) return task_ls
def get_index_url(indexnum, indexurl): db = MySQL() requests.adapters.DEFAULT_RETRIES = 5 s = requests.session() s.keep_alive = False flag = True while flag: try: user_agent = user_Agent() res = requests.get(indexurl, headers=user_agent) flag = False # res.request.headers 获取设置的user_agent except Exception as e: logging.error(e) continue max_index_reg = re.compile(r'<a id="pt_mulu">\d+/(\d+)</a>') max_index = max_index_reg.findall(res.text)[0] if indexnum == 0: logging.info("---索引下载中,请等待---") for i in range(1, int(max_index) + 1): if i == 1: page_url = "http://m.xsqishu.com/newbook/index.html" else: page_url = "http://m.xsqishu.com/newbook/index_" + str( i) + ".html" db.inertStoryPageIndex(i, page_url) msg = "下载第" + str(i) + "页" logging.info(msg) elif indexnum == int(max_index): logging.info("----当前已是最新索引,无需更新----") else: logging.info("----索引更新中,请等待----") for i in range(indexnum + 1, int(max_index) + 1): page_url = "http://m.xsqishu.com/newbook/index_" + str(i) + ".html" db.inertStoryPageIndex(i, page_url) msg = "更新第" + str(i) + "页" logging.info(msg)
def make_mpi_tables(self): ''' Destination, source, weight from the sparse matrix Make Generate the meta index grouped by rank local_group: {dst:[(src,wgt),...]} send_group: {rank:{dst:[(src,wgt),...]),...} recv_group: {rank:[dst,...],...} All dictionaries are OrderedDicts. ''' logger.debug('Make MPI tables') lids = self.lids arr_dict = self.arr_dict self.spmat_size = arr_dict['spmat_size'] #--------------------------------------- # local_group #--------------------------------------- local_dsts = arr_dict['local_dsts'] local_srcs = arr_dict['local_srcs'] local_wgts = arr_dict['local_wgts'] dsw_list = [(d,s,w) for d,s,w in zip(local_dsts,local_srcs,local_wgts)] local_group = OrderedDict([(dst, [(s,w) for (d,s,w) in val]) \ for (dst, val) in groupby(dsw_list, lambda x:x[0])]) local_src_size = len(dsw_list) local_buf_size = len(local_group) #--------------------------------------- # send_group #--------------------------------------- send_ranks = arr_dict['send_ranks'] send_dsts = arr_dict['send_dsts'] send_srcs = arr_dict['send_srcs'] send_wgts = arr_dict['send_wgts'] rdsw_list = [(r,d,s,w) for r,d,s,w in \ zip(send_ranks,send_dsts,send_srcs,send_wgts)] sorted_rdsw_list = sorted(rdsw_list, key=lambda x:x[0]) send_group_tmp = OrderedDict([(rank, [(d,s,w) for (r,d,s,w) in val]) \ for (rank, val) in groupby(sorted_rdsw_list, lambda x:x[0])]) send_group = OrderedDict() for rank, dsw_list in send_group_tmp.items(): send_group[rank] = OrderedDict([(dst, [(s,w) for (d,s,w) in val]) \ for (dst, val) in groupby(dsw_list, lambda x:x[0])]) #--------------------------------------- # recv_group #--------------------------------------- recv_ranks = arr_dict['recv_ranks'] recv_dsts = arr_dict['recv_dsts'] rd_list = [(r,d) for r,d in zip(recv_ranks,recv_dsts)] sorted_rd_list = sorted(rd_list, key=lambda x:x[0]) recv_group = OrderedDict([(rank, np.unique([d for (r,d) in val])) \ for (rank, val) in groupby(sorted_rd_list, lambda x:x[0])]) #----------------------------------------------------- # Make the send_schedule, send_dsts, send_srcs, send_wgts #----------------------------------------------------- logger.debug('Make the send_schedule, send_dsts, send_srcs, send_wgts') #--------------------------------------- # size and allocation #--------------------------------------- send_sche_size = len(send_group) send_buf_size = np.unique(send_dsts).size send_map_size = local_dsts.size + send_dsts.size send_schedule = np.zeros((send_sche_size,3), 'i4') #(rank,start,size) send_dsts = np.zeros(send_map_size, 'i4') send_srcs = np.zeros(send_map_size, 'i4') send_wgts = np.zeros(send_map_size, 'f8') send_buf = np.zeros(send_buf_size, 'i4') # global dst index #--------------------------------------- # send_schedule #--------------------------------------- send_buf_seq = 0 for seq, rank in enumerate( send_group.keys() ): start = send_buf_seq size = len(send_group[rank]) send_schedule[seq][:] = (rank, start, size) send_buf_seq += size if send_buf_size != send_buf_seq: logger.error("Error: send_buf_size(%d) != send_buf_seq(%d)"%(send_buf_size, send_buf_seq)) raise SystemError #--------------------------------------- # send local indices in myrank # directly go to the recv_buf, not to the send_buf #--------------------------------------- seq = 0 recv_buf_seq = 0 for dst, sw_list in local_group.items(): for src, wgt in sw_list: send_dsts[seq] = recv_buf_seq send_srcs[seq] = lids[src] send_wgts[seq] = wgt seq += 1 recv_buf_seq += 1 #--------------------------------------- # send indices for the other ranks #--------------------------------------- send_buf_seq = 0 for rank, dst_dict in send_group.items(): for dst, sw_list in dst_dict.items(): for src, wgt in sw_list: send_dsts[seq] = send_buf_seq send_srcs[seq] = lids[src] send_wgts[seq] = wgt seq += 1 send_buf[send_buf_seq] = dst # for diagnostics send_buf_seq += 1 if seq != send_map_size: logger.error("Error: seq(%d) != send_map_size(%d)"%(seq, send_map_size)) raise SystemError if send_buf_seq != send_buf_size: logger.error("Error: send_buf_seq(%d) != send_buf_size(%d)"%(send_buf_seq, send_buf_size)) raise SystemError #----------------------------------------------------- # Make the recv_schedule, recv_dsts, recv_srcs #----------------------------------------------------- logger.debug('Make the recv_schedule, recv_dsts, recv_srcs') #--------------------------------------- # size and allocation #--------------------------------------- recv_sche_size = len(recv_group) recv_buf_size = local_buf_size \ + np.sum([d_unique.size for d_unique in recv_group.values()]) recv_map_size = recv_buf_size recv_schedule = np.zeros((recv_sche_size,3), 'i4') #(rank,start,size) recv_dsts = np.zeros(recv_map_size, 'i4') recv_srcs = np.zeros(recv_map_size, 'i4') #--------------------------------------- # recv_schedule #--------------------------------------- recv_buf_seq = local_buf_size for seq, (rank,d_unique) in enumerate( recv_group.items() ): start = recv_buf_seq size = d_unique.size recv_schedule[seq][:] = (rank, start, size) recv_buf_seq += size #--------------------------------------- # recv indices #--------------------------------------- recv_buf_list = local_group.keys() # destinations for rank, d_unique in recv_group.items(): recv_buf_list.extend(d_unique) recv_buf = np.array(recv_buf_list, 'i4') unique_dsts = np.unique(recv_buf) seq = 0 for dst in unique_dsts: for bsrc in np.where(recv_buf==dst)[0]: recv_dsts[seq] = lids[dst] # local index recv_srcs[seq] = bsrc # buffer index seq += 1 #----------------------------------------------------- # Public variables for diagnostic #----------------------------------------------------- self.local_group = local_group self.send_group = send_group self.recv_group = recv_group self.send_buf = send_buf # global dst index self.recv_buf = recv_buf # global dst index #----------------------------------------------------- # Public variables #----------------------------------------------------- self.local_src_size = local_src_size self.send_buf_size = send_buf_size self.recv_buf_size = recv_buf_size self.send_schedule = send_schedule # (rank,start,size) self.send_dsts = np.array(send_dsts, 'i4') # to buffer self.send_srcs = np.array(send_srcs, 'i4') # from local self.send_wgts = np.array(send_wgts, 'f8') self.recv_schedule = recv_schedule # (rank,start,size) self.recv_dsts = np.array(recv_dsts, 'i4') # to local self.recv_srcs = np.array(recv_srcs, 'i4') # from buffer
def __init__(self, cubegrid, method): self.cubegrid = cubegrid self.method = method # method represented by the sparse matrix self.ne = ne = cubegrid.ne self.ngq = ngq = cubegrid.ngq self.nproc = nproc = cubegrid.nproc self.myrank = myrank = cubegrid.myrank self.ranks = ranks = cubegrid.ranks self.lids = lids = cubegrid.lids #----------------------------------------------------- # Read the sparse matrix #----------------------------------------------------- if method.upper() == 'AVG': # Average the boundary of elements for the Spectral Element Method spmat_fpath = fdir + 'spmat_avg_ne%dngq%d.nc'%(ne, ngq) elif method.upper() == 'COPY': # Copy from UP to EPs at the boundary of elements spmat_fpath = fdir + 'spmat_copy_ne%dngq%d.nc'%(ne, ngq) elif method.upper() == 'IMPVIS': # Implicit Viscosity # High-Order Elliptic Filter spmat_fpath = fdir + 'spmat_impvis_ne%dngq%d.nc'%(ne, ngq) else: raise ValueError, "The method must be one of 'AVG', 'COPY', 'IMPVIS'" spmat_ncf = nc.Dataset(spmat_fpath, 'r', format='NETCDF4') spmat_size = len( spmat_ncf.dimensions['spmat_size'] ) dsts = spmat_ncf.variables['dsts'][:] srcs = spmat_ncf.variables['srcs'][:] wgts = spmat_ncf.variables['weights'][:] #----------------------------------------------------- # Destination, source, weight from the sparse matrix # Make Generate the meta index grouped by rank # local_group: {dst:[(src,wgt),...]} # send_group: {rank:{dst:[(src,wgt),...]),...} # recv_group: {rank:{dst:[src,...]),...} # All dictionaries are OrderedDicts. #----------------------------------------------------- logger.debug('Make Generate the meta index grouped by rank') rank_dsts = ranks[dsts] # rank number of destinations rank_srcs = ranks[srcs] # rank number of sources myrank_dsts = (rank_dsts == myrank) # bool type array myrank_srcs = (rank_srcs == myrank) local_idxs = np.where( myrank_dsts * myrank_srcs )[0] send_idxs = np.where( np.invert(myrank_dsts) * myrank_srcs )[0] recv_idxs = np.where( myrank_dsts * np.invert(myrank_srcs) )[0] #--------------------------------------- # local_group #--------------------------------------- local_dsts = dsts[local_idxs] local_srcs = srcs[local_idxs] local_wgts = wgts[local_idxs] ''' dsw_list = [(dsts[i],srcs[i],wgts[i]) for i in local_idxs] local_group = OrderedDict([(dst, [(s,w) for (d,s,w) in val]) \ for (dst, val) in groupby(dsw_list, lambda x:x[0])]) local_src_size = len(dsw_list) local_buf_size = len(local_group) ''' #--------------------------------------- # send_group #--------------------------------------- send_ranks = rank_dsts[send_idxs] send_dsts = dsts[send_idxs] send_srcs = srcs[send_idxs] send_wgts = wgts[send_idxs] ''' rdsw_list = [(rank_dsts[i],dsts[i],srcs[i],wgts[i]) for i in send_idxs] sorted_rdsw_list = sorted(rdsw_list, key=lambda x:x[0]) send_group_tmp = OrderedDict([(rank, [(d,s,w) for (r,d,s,w) in val]) \ for (rank, val) in groupby(sorted_rdsw_list, lambda x:x[0])]) send_group = OrderedDict() for rank, dsw_list in send_group_tmp.items(): send_group[rank] = OrderedDict([(dst, [(s,w) for (d,s,w) in val]) \ for (dst, val) in groupby(dsw_list, lambda x:x[0])]) ''' #--------------------------------------- # recv_group #--------------------------------------- recv_ranks = rank_srcs[recv_idxs] recv_dsts = dsts[recv_idxs] recv_srcs = srcs[recv_idxs] ''' rds_list = [(rank_srcs[i],dsts[i],srcs[i]) for i in recv_idxs] sorted_rds_list = sorted(rds_list, key=lambda x:x[0]) recv_group_tmp = OrderedDict([(rank, [(d,s) for (r,d,s) in val]) \ for (rank, val) in groupby(sorted_rds_list, lambda x:x[0])]) recv_group = OrderedDict() for rank, ds_list in recv_group_tmp.items(): recv_group[rank] = OrderedDict([(dst, [s for (d,s) in val]) \ for (dst, val) in groupby(ds_list, lambda x:x[0])]) ''' #----------------------------------------------------- # Make the send_schedule, send_dsts, send_srcs, send_wgts #----------------------------------------------------- logger.debug('Make the send_schedule, send_dsts, send_srcs, send_wgts') #--------------------------------------- # size and allocation #--------------------------------------- r_uniques, r_indices, r_counts = \ np.unique(send_ranks, unique_index=True, return_counts=True) send_schedule_size = r_uniques.size send_buf_size = np.unique(send_dsts).size send_map_size = local_dsts.size + send_dsts.size send_schedule = np.zeros((send_schedule_size,3), 'i4') #(rank,start,size) send_dsts = np.zeros(send_map_size, 'i4') send_srcs = np.zeros(send_map_size, 'i4') send_wgts = np.zeros(send_map_size, 'f8') send_buf = np.zeros(send_buf_size, 'i4') # global dst index #--------------------------------------- # send_schedule #--------------------------------------- send_buf_seq = 0 for rank, r_start, r_size in zip(r_uniques, r_indices, r_counts): r_end = r_start + r_size start = send_buf_seq size = np.unique(send_dsts[r_start:r_end]).size send_schedule[i][:] = (rank, start, size) send_buf_seq += size logger.error("Error: send_buf_size(%d) != send_buf_seq(%d)"%(send_buf_size, send_buf_seq)) #--------------------------------------- # send local indices in myrank # directly go to the recv_buf, not to the send_buf #--------------------------------------- d_uniques, d_indices, d_counts = \ np.unique(local_dsts, unique_index=True, return_counts=True) seq = 0 recv_buf_seq = 0 for d_start, d_size in zip(d_indices, d_counts): d_end = d_start + d_size send_dsts[seq:seq+d_size] = recv_buf_seq send_srcs[seq:seq+d_size] = lids[local_srcs[d_start:d_end]] send_wgts[seq:seq+d_size] = local_wgts[d_start:d_end] seq += d_size recv_buf_seq += 1 #--------------------------------------- # send indices for the other ranks #--------------------------------------- send_buf_seq = 0 for r_start, r_size in zip(r_indices, r_counts): r_end = r_start + r_size d_uniques, d_indices, d_counts = \ np.unique(send_dsts[r_start:r_end], \ unique_index=True, return_counts=True) for dst, d_start, d_size in zip(d_uniques, d_indices, d_counts): d_end = d_start + d_size send_dsts[seq:seq+d_size] = send_buf_seq send_srcs[seq:seq+d_size] = lids[send_srcs[d_start:d_end]] send_wgts[seq:seq+d_size] = send_wgts[d_start:d_end] send_buf[send_buf_seq] = dst # for diagnostics seq += d_size send_buf_seq += 1 logger.error("Error: seq(%d) != send_map_size(%d)"%(seq, send_map_size)) logger.error("Error: send_buf_seq(%d) != send_buf_size(%d)"%(send_buf_seq, send_buf_size)) #----------------------------------------------------- # Make the recv_schedule, recv_dsts, recv_srcs #----------------------------------------------------- logger.debug('Make the recv_schedule, recv_dsts, recv_srcs') #--------------------------------------- # sorting #--------------------------------------- sort_idx = np.argsort(recv_ranks) recv_ranks = recv_ranks[sort_idx] recv_dsts = recv_dsts[sort_idx] recv_srcs = recv_srcs[sort_idx] #--------------------------------------- # size and allocation #--------------------------------------- r_uniques, r_indices, r_counts = \ np.unique(recv_ranks, unique_index=True, return_counts=True) recv_schedule_size = r_uniques.size unique_local_dsts = np.unique(local_dsts) recv_buf_local_size = unique_local_dsts.size recv_buf_size = recv_buf_local_size + np.unique(recv_dsts).size recv_map_size = recv_dsts.size recv_schedule = np.zeros((recv_schedule_size,3), 'i4') #(rank,start,size) recv_dsts = np.zeros(recv_map_size, 'i4') recv_srcs = np.zeros(recv_map_size, 'i4') recv_buf = np.zeros(recv_buf_size, 'i4') #--------------------------------------- # recv_schedule #--------------------------------------- recv_buf_seq = 0 for rank, r_start, r_size in zip(r_uniques, r_indices, r_counts): r_end = r_start + r_size start = recv_buf_seq size = np.unique(recv_dsts[r_start:r_end]).size recv_schedule[i][:] = (rank, start, size) recv_buf_seq += size logger.error("Error: recv_buf_size(%d) != recv_buf_seq(%d)"%(recv_buf_size, recv_buf_seq)) #--------------------------------------- # recv indices #--------------------------------------- recv_buf[:recv_buf_local_size] = unique_local_dsts[:] # destinations for rank, r_start, r_size in zip(r_uniques, r_indices, r_counts): r_end = r_start + r_size sort_idx = np.argsort(recv_dsts[r_start:r_end]) recv_dsts = recv_dsts[r_start:r_end][sort_idx] recv_srcs = recv_srcs[r_start:r_end][sort_idx] d_uniques, d_indices, d_counts = \ np.unique(recv_dsts, unique_index=True, return_counts=True) for dst, d_start, d_size in zip(d_uniques, d_indices, d_counts): d_end = d_start + d_size for rank in recv_group.keys(): recv_buf_list.extend( recv_group[rank].keys() ) recv_buf = np.array(recv_buf_list, 'i4') equal(recv_buf_size, len(recv_buf)) unique_dsts = np.unique(recv_buf) recv_dsts, recv_srcs = [], [] for dst in unique_dsts: for bsrc in np.where(recv_buf==dst)[0]: recv_dsts.append(lids[dst]) # local index recv_srcs.append(bsrc) # buffer index #----------------------------------------------------- # Public variables for diagnostic #----------------------------------------------------- self.send_buf = send_buf # global dst index self.recv_buf = recv_buf # global dst index #----------------------------------------------------- # Public variables #----------------------------------------------------- self.spmat_size = spmat_size self.local_gids = cubegrid.local_gids self.local_src_size = local_src_size self.send_buf_size = send_buf_size self.recv_buf_size = recv_buf_size self.send_schedule = send_schedule # (rank,start,size) self.send_dsts = np.array(send_dsts, 'i4') # to buffer self.send_srcs = np.array(send_srcs, 'i4') # from local self.send_wgts = np.array(send_wgts, 'f8') self.recv_schedule = recv_schedule # (rank,start,size) self.recv_dsts = np.array(recv_dsts, 'i4') # to local self.recv_srcs = np.array(recv_srcs, 'i4') # from buffer