def listener2(host): if host not in hosts: log.error('MM:00 ERROR: LISTENER: unknown host: ' + host) return h = hosts[host] for p in h.tcp: listener3(host, h.bind, p)
def fn(): # response = session.get(url=request_url, headers=headers, cookies=cookie, proxies=proxies) response = requests.get( request_url, headers=headers, cookies=cookie, proxies=proxies, timeout=10, ) # update cookies after first visit # cookie = response.cookies # cookie = dict(cookies_are='') if response.status_code == 200: try: items = response.json( )['content']['data']['page']['result'] if len(items) > 0: for each_item in items: if "今天" in each_item['createTime']: each_item['createTime'] = re.sub( "今天.*", str(datetime.date.today()), each_item['createTime'], ) elif "昨天" in each_item['createTime']: today = datetime.date.today() oneday = datetime.timedelta(days=1) yesterday = today - oneday each_item['createTime'] = re.sub( "昨天.*", str(yesterday), each_item['createTime']) JOB_DATA.append([ each_item['positionId'], each_item['positionName'], each_item['city'], each_item['createTime'], each_item['salary'], each_item['companyId'], each_item['companyName'], each_item['companyFullName'], ]) print(each_item) print('crawling page %d done...' % i) time.sleep(random.randint(6, 10)) return True else: return False except Exception: print( 'Error occurs during visiting Lagou. The ERROR_CODE is {0}. Return: {1}' .format(response.status_code, response.text)) pass elif response.status_code == 403: log.error('request is forbidden by the server...') return True else: log.error(response.status_code)
def genericObjNW(host, label, cmd): if host not in participants: log.error('MM:' + host + ' ERROR: ' + label + ': Can only send to a participant: ' + host) return None try: hostdata = hosts[host] except: try: hostdata = bgprouters[host] except: hostdata = participants[host] #print 'MM:' + host + ' INFO: ' + why + ': Connecting to ' + host + ' at ' + hostdata.host + ':' + str(hostdata.port) try: if hostdata.port is None: s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) # @UndefinedVariable s.connect(hostdata.host) else: s = Client((hostdata.host, int(hostdata.port))) except Exception, e: log.error('MM:' + host + ' ERROR: ' + label + ': ' + repr(e)) return None
def generic(host, label, cmd): if host in participants: log.error('MM:' + host + ' ERROR: ' + label + ': Cannot send to a partipant: ' + host) return None s = connect(host, label) if s == None: return None alldata = '' try: s.send(cmd) while True: data = s.recv(1024) if len(data) == 0: break alldata += data #sys.stdout.write(data) s.close() except Exception, e: log.error('MM:' + host + ' ERROR: ' + label + ': ' + repr(e)) try: s.close() except: pass return None
def connect(host, why): # should be either a listener host or a router host (edge-router) if host not in bgprouters and host not in hosts and host not in participants: log.error('MM:' + host + ' ERROR: ' + why + ': Unknown host: ' + host) return None try: hostdata = hosts[host] except: try: hostdata = bgprouters[host] except: hostdata = participants[host] #print 'MM:' + host + ' INFO: ' + why + ': Connecting to ' + host + ' at ' + hostdata.host + ':' + str(hostdata.port) try: if hostdata.port is None: s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) # @UndefinedVariable s.connect(hostdata.host) else: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((hostdata.host, int(hostdata.port))) except Exception, e: log.error('MM:' + host + ' ERROR: ' + why + ': ' + repr(e)) return None
def connect(host, why): # should be either a listener host or a router host (edge-router) try: hostdata = hosts[host] except: if host not in bgprouters: log.error('MM:' + host + ' ERROR: ' + why + ': Unknown host: ' + host) return None try: cmdifc = hostdata['cmdifc'] cmdport = hostdata['cmdport'] except: if platform.system() == 'Windows': cmdifc = '127.0.0.1' cmdport = base36(host) else: cmdifc = '/tmp/' + host cmdport = 0 #print 'MM:' + host + ' INFO: ' + why + ': Connecting to ' + host + ' at ' + cmdifc + ':' + str(cmdport) try: if cmdifc.find('/') >= 0: s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) # @UndefinedVariable s.connect(cmdifc) else: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((cmdifc, cmdport)) except Exception, e: log.error('MM:' + host + ' ERROR: ' + why + ': ' + repr(e)) return None
def _on_connection_success_item(self, connection_item, stream): self._off_connection_timeout_handler() log.debug(u"Connection Success {}".format(self.client_config.address_str)) try: self.stream = stream self.stream.set_close_callback(self._on_connection_close) self.stream.set_nodelay(True) #: send message self._sending_connection_item(connection_item) #: fetch message read_status = yield self._read_message(connection_item) if read_status: connection_item.callback(RPCMessage( CONNECTION_TYPE_IN_RESPONSE, self._message.topic, self._message.body)) else: log.error("Malformed Client Request") except Exception as e: log.error(e) traceback.print_exc() finally: self.close()
def initialize_segments(config_segments: Union[dict, list]) -> List[Segment]: seen_segment_names: Set[str] = set() ret = [] for i, seg_yaml in enumerate(config_segments): # rompos marker if isinstance(seg_yaml, list) and len(seg_yaml) == 1: continue seg_type = Segment.parse_segment_type(seg_yaml) segment_class = Segment.get_class_for_type(seg_type) this_start = Segment.parse_segment_start(seg_yaml) next_start = Segment.parse_segment_start(config_segments[i + 1]) segment: Segment = Segment.from_yaml(segment_class, seg_yaml, this_start, next_start) if segment.require_unique_name: if segment.name in seen_segment_names: log.error(f"segment name '{segment.name}' is not unique") seen_segment_names.add(segment.name) ret.append(segment) return ret
def build_local_confidence_loss(output_list, mask_list, weights, loss_name, scale=False, regularizer_weight=1e-2): log.warn('Build {} loss: len(output_list): {}'.format( loss_name, len(output_list))) # l1 loss l1_loss = 0 for i in range(len(output_list)): normalized_mask = tf.reshape( tf.nn.l2_normalize(tf.reshape(mask_list[i], [self.batch_size, -1]), dim=-1), [self.batch_size, self.input_height, self.input_width]) current_weight = weights[i % num_scale] if scale else 1 img = output_list[i] loss_map = tf.reduce_mean(tf.abs(img - tf.image.resize_images( target_image, [int(img.get_shape()[1]), int(img.get_shape()[2])])), axis=-1) log.error('loss map shape: {}'.format( loss_map.get_shape().as_list())) l1_loss += tf.reduce_mean(loss_map * normalized_mask) * current_weight / \ (int(img.get_shape()[1]) * int(img.get_shape()[2])) * regularizer_weight l1_loss = l1_loss / len(output_list) return l1_loss
def crawl_company_stage(company_id): req_url = 'https://m.lagou.com/gongsi/%s.html' % str(company_id) headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Host': 'm.lagou.com', 'Referer': 'https://m.lagou.com', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1' } response = requests.get(req_url, headers=headers, cookies=m_lagou_spider.init_cookies(), timeout=20) print(response.url) if response.status_code == 200: soup = BeautifulSoup(response.text, 'html5lib') company_desc = soup.find_all(class_="desc")[0].get_text().strip() industryField = company_desc.split('/')[0].strip() financeStage = company_desc.split('/')[1].strip() staffNum = company_desc.split('/')[2].strip() elif response.status_code == 403: log.error('403 forbidden...') else: log.error(response.status_code) time.sleep(random.randint(3, 6)) return [company_id, industryField, financeStage, staffNum]
def get_max_pageNo(positionName): """ return the max page number of a specific job """ request_url = 'https://m.lagou.com/search.json?city=%E5%85%A8%E5%9B%BD&positionName=' + parse.quote( positionName) + '&pageNo=1&pageSize=15' headers = { 'Accept': 'application/json', 'Accept-Encoding': 'gzip, deflate, sdch', 'Host': 'm.lagou.com', 'Referer': 'https://m.lagou.com/search.html', 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) ' 'Version/8.0 Mobile/12A4345d Safari/600.1.4', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive' } response = requests.get(request_url, headers=headers, cookies=init_cookies(), timeout=10) print("Getting data from %s successfully. URL: " % positionName + request_url) if response.status_code == 200: max_page_no = int(int(response.json()['content']['data']['page']['totalCount']) / 15 + 1) return max_page_no elif response.status_code == 403: log.error('request is forbidden by the server...') return 0 else: log.error(response.status_code) return 0
def crawl_company_stage(company_id): # 请求url req_url = 'https://m.lagou.com/gongsi/%s.html' % str(company_id) # 请求头 headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Host': 'm.lagou.com', 'Referer': 'https://m.lagou.com', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1' } # 发送请求,cookie为m_lagou_spider的cookie,即为第一次登陆时的cookie response = requests.get(req_url, headers=headers, cookies=m_lagou_spider.get_cookies(), timeout=20) # 打印出响应的url print(response.url) # 如果返回200,使用Beautifulsoup爬取各部分内容 if response.status_code == 200: soup = BeautifulSoup(response.text, 'html5lib') company_desc = soup.find_all(class_="desc")[0].get_text().strip() industryField = company_desc.split('/')[0].strip() financeStage = company_desc.split('/')[1].strip() staffNum = company_desc.split('/')[2].strip() # 如果返回403,打印出以下信息 elif response.status_code == 403: log.error('403 forbidden...') else: log.error(response.status_code) # 休眠2s time.sleep(config.TIME_SLEEP) # 返回列表 return [company_id, industryField, financeStage, staffNum]
def __init__(self, rom_start, rom_end, type, name, vram_start, extract, given_subalign, given_is_overlay, given_dir, args, yaml): super().__init__(rom_start, rom_end, type, name, vram_start, extract, given_subalign, given_is_overlay, given_dir, args, yaml) self.raster: 'Optional[Raster]' = None # palette segments must be named as one of the following: # 1) same as the relevant raster segment name (max. 1 palette) # 2) relevant raster segment name + "." + unique palette name # 3) unique, referencing the relevant raster segment using `raster_name` self.raster_name = yaml.get("raster_name", self.name.split(".")[0]) if isinstance( yaml, dict) else self.name.split(".")[0] if self.extract: if self.rom_end == "auto": log.error( f"segment {self.name} needs to know where it ends; add a position marker [0xDEADBEEF] after it" ) if self.max_length() and isinstance(self.rom_end, int): expected_len = int(self.max_length()) actual_len = self.rom_end - self.rom_start if actual_len > expected_len and actual_len - expected_len > self.subalign: log.error( f"Error: {self.name} should end at 0x{self.rom_start + expected_len:X}, but it ends at 0x{self.rom_end:X}\n(hint: add a 'bin' segment after it)" )
def disassemble_data(self, rom_bytes): vertex_data = rom_bytes[self.rom_start:self.rom_end] segment_length = len(vertex_data) if (segment_length) % 16 != 0: error( f"Error: Vtx segment {self.name} length ({segment_length}) is not a multiple of 16!" ) lines = [] preamble = options.get_generated_c_premble() lines.append(preamble) lines.append("") vertex_count = segment_length // 16 cname = re.sub(r"[^0-9a-zA-Z_]", "_", self.name) lines.append(f"Vtx {cname}[{vertex_count}] = {{") for vtx in struct.iter_unpack(">hhhHhhBBBB", vertex_data): x, y, z, flg, t, c, r, g, b, a = vtx vtx_string = f" {{{{{{ {x:5}, {y:5}, {z:5} }}, {flg}, {{ {t:5}, {c:5} }}, {{ {r:3}, {g:3}, {b:3}, {a:3} }}}}}}," if flg != 0: self.warn(f"Non-zero flag found in vertex data {self.name}!") lines.append(vtx_string) lines.append("};") # enforce newline at end of file lines.append("") return "\n".join(lines)
def parse (line): global cmdfuncs tokens = line.split() n = len(tokens) if n == 0: return cmd = tokens[0] if cmd[0] == '#': return # search for the command # if there is a dictionary in the list, iterate over all the elements or all the arguments # else pass the entire set of arguments to the command foo = cmdfuncs.get(cmd) if foo != None: func, jset, _ = foo if jset is None: func(tokens) return if n == 1: for h in sorted(jset): func(h) else: for i in range(1, n): func(tokens[i]) return log.error('MM:00 ERROR: unknown command: ' + cmd)
def get_max_pageNo(positionName): """return the max page number of a specific job""" cookies = get_cookies() request_url = 'https://m.lagou.com/search.json?city=%E5%85%A8%E5%9B%BD&positionName=' + parse.quote( positionName) + '&pageNo=1&pageSize=15' headers = { 'Accept': 'application/json', 'Accept-Encoding': 'gzip, deflate, sdch', 'Host': 'm.lagou.com', 'Referer': 'https://m.lagou.com/search.html', 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) ' 'Version/8.0 Mobile/12A4345d Safari/600.1.4', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive' } response = requests.get(request_url, headers=headers, cookies=cookies, timeout=10) print("Getting data from %s successfully~" % positionName + request_url) if response.status_code == 200: max_page_no = int(int(response.json()['content']['data']['page']['totalCount']) / 15 + 1) return max_page_no elif response.status_code == 403: log.error('request is forbidden by the server...') return 0 else: log.error(response.status_code) return 0
def _on_connection_success_item(self, connection_item, stream): self._off_connection_timeout_handler() log.debug(u"Connection Success {}".format( self.client_config.address_str)) try: self.stream = stream self.stream.set_close_callback(self._on_connection_close) self.stream.set_nodelay(True) #: send message self._sending_connection_item(connection_item) #: fetch message read_status = yield self._read_message(connection_item) if read_status: connection_item.callback( RPCMessage(CONNECTION_TYPE_IN_RESPONSE, self._message.topic, self._message.body)) else: log.error("Malformed Client Request") except Exception as e: log.error(e) traceback.print_exc() finally: self.close()
def connect (host, why): # should be either a listener host or a router host (edge-router) try: hostdata = hosts[host] except: if host not in bgprouters: log.error('MM:' + host + ' ERROR: ' + why + ': Unknown host: ' + host) return None try: cmdifc = hostdata['cmdifc'] cmdport = hostdata['cmdport'] except: if platform.system() == 'Windows': cmdifc = '127.0.0.1' cmdport = base36(host) else: cmdifc = '/tmp/' + host cmdport = 0 #print 'MM:' + host + ' INFO: ' + why + ': Connecting to ' + host + ' at ' + cmdifc + ':' + str(cmdport) try: if cmdifc.find('/') >= 0: s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) # @UndefinedVariable s.connect(cmdifc) else: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((cmdifc, cmdport)) except Exception, e: log.error('MM:' + host + ' ERROR: ' + why + ': ' + repr(e)) return None
def execute_fetchone(self, query, args=None): try: self.execute(query, args) return self.fetchone() except Exception as e: error('execute_fetchone$$sql:%s$$args:%s$$error:%s', query, sequence_to_string(args), get_string(e)) return None
def delay(args): if len(args) == 1: try: log.info('MM:00: DELAY ' + args[0]) time.sleep(float(args[0])) except Exception, e: log.error('MM:00 ERROR: DELAY: exception: ' + repr(e))
def connect (host, why): # should be either a listener host or a router host (edge-router) if host not in bgprouters and host not in hosts and host not in participants: log.error('MM:' + host + ' ERROR: ' + why + ': Unknown host: ' + host) return None try: hostdata = hosts[host] except: try: hostdata = bgprouters[host] except: hostdata = participants[host] #print 'MM:' + host + ' INFO: ' + why + ': Connecting to ' + host + ' at ' + hostdata.host + ':' + str(hostdata.port) try: if hostdata.port is None: s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) # @UndefinedVariable s.connect(hostdata.host) else: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((hostdata.host, int(hostdata.port))) except Exception, e: log.error('MM:' + host + ' ERROR: ' + why + ': ' + repr(e)) return None
def crawl_jobs(positionName, detail=False): """crawl the job info from lagou H5 web pages""" # joblist = list() JOB_DATA = list() max_page_number = get_max_pageNo(positionName) log.info("%s, 共有 %s 页记录, 共约 %s 记录", positionName, max_page_number, max_page_number * 15) cookies = get_cookies() for i in range(1, max_page_number + 1): request_url = 'https://m.lagou.com/search.json?city=%E5%85%A8%E5%9B%BD&positionName=' + parse.quote( positionName) + '&pageNo=' + str(i) + '&pageSize=15' response = requests.get(request_url, headers=headers, cookies=cookies) if response.status_code == 200: for each_item in response.json()['content']['data']['page']['result']: # print each_item JOB_DATA.append([each_item.get('positionId', 0), each_item.get('positionName', None), each_item.get('city', None), each_item.get('createTime', None), each_item.get('salary', None), each_item.get('companyId', 0), each_item.get('companyName', None), each_item.get('companyFullName', None)]) if detail: for _ in range(LOOP_TIMES): status = crawl_job_detail(each_item['positionId'], positionName) if status or _ > LOOP_TIMES: break print('crawling page %d done...' % i) time.sleep(TIME_SLEEP) elif response.status_code == 403: log.error('request is forbidden by the server...') else: log.error(response.status_code) return JOB_DATA
def listener (host): interfaces = config["hosts"][host]['interfaces'] for name in sorted(interfaces): s = connect(host, 'LISTENER') if s == None: return try: interface = interfaces[name] addr = interface['bind'] port = interface['port'] except: log.error('MM: ' + host + 'ERROR: Bad interface spec ' + name) continue try: s.send('listener ' + name + ' ' + addr + ' ' + str(port) + '\n') while True: data = s.recv(1024) if len(data) == 0: break sys.stdout.write(data) s.close() except Exception, e: log.error('MM:00 ERROR: ' + repr(e))
def test (tn): global config rand = str(random.randint(1000000000, 9999999999)) # must be 10 characters try: src = config["tests"][tn]['src'] baddr = config["tests"][tn]['baddr'] daddr = config["tests"][tn]['daddr'] dport = config["tests"][tn]['dport'] xifc = config["tests"][tn]['xifc'] xdst = config["tests"][tn]['xdst'] except: log.error('MM:00 ERROR: TEST FAILED unknown or poorly specified test: ' + tn) return s = connect(src, 'TEST') if s == None: return try: s.send('test ' + rand + ' ' + baddr + ' ' + daddr + ' ' + str(dport) + '\n') alldata = '' while True: data = s.recv(1024) if len(data) == 0: break alldata += data #sys.stdout.write(data) s.close() except Exception, e: log.error('MM:' + src + ' ERROR: TEST FAILED ' + repr(e)) return
def run (args): if len(args) < 2: print 'MM:00 EXEC: ERROR usage: exec cmd cmd ...' print 'Commands are:' for c in sorted(commands): print ' ' + c + ': ' + commands[c].get('cmd', '<CMD>') return for i in range(1, len(args)): cmdname = args[i] try: c = commands[cmdname]['cmd'] except: log.error('MM:00 ERROR: EXEC FAILED unknown or poorly specified cmd: ' + cmdname) continue log.info('MM:00 EXEC: ' + cmdname + ' cmd = ' + c) ca = c.split() try: p = subprocess.Popen(ca, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() except Exception, e: out = '' err = 'Command Failed: ' + repr(e) r = out + err log.debug('MM:00 EXEC: ' + cmdname + ' output = \n' + r.strip())
def connect (host, why): global config try: hostdata = config['hosts'][host] except: log.error('MM:' + host + ' ERROR: ' + why + ': Unknown host: ' + host) return None try: cmdifc = hostdata['cmdifc'] cmdport = hostdata['cmdport'] except: if platform.system() == 'Windows': cmdifc = '127.0.0.1' cmdport = base36(host) else: cmdifc = '/tmp/' + host cmdport = 0 #print 'MM:' + host + ' INFO: ' + why + ': Connecting to ' + host + ' at ' + cmdifc + ':' + str(cmdport) try: if cmdifc.find('/') >= 0: s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) # @UndefinedVariable s.connect(cmdifc) else: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((cmdifc, cmdport)) except Exception, e: log.error('MM:' + host + ' ERROR: ' + why + ': ' + repr(e)) return None
def query_for_list(self, query, args=None): try: self.execute(query, args) for row in self.cursor: yield row except Exception as e: error('query_for_list$$sql:%s$$args:%s$$error:%s', query, sequence_to_string(args), get_string(e))
def generic (host, label, cmd): if host in participants: log.error('MM:' + host + ' ERROR: ' + label + ': Cannot send to a partipant: ' + host) return None s = connect(host, label) if s == None: return None alldata = '' try: s.send(cmd) while True: data = s.recv(1024) if len(data) == 0: break alldata += data #sys.stdout.write(data) s.close() except Exception, e: log.error('MM:' + host + ' ERROR: ' + label + ': '+ repr(e)) try: s.close() except: pass return None
def _get_item_texture(self, item): item_config = G.res_mgr.get_item_config_by_name(item.get_name()) assert item_config tex = G.res_mgr.get_item_texture(item_config['texture']) if not tex: log.error("texture %s not found", item_config['texture']) return tex
def delay (args): if len(args) == 1: try: log.info('MM:00 DELAY ' + args[0]) time.sleep(float(args[0])) except Exception, e: log.error('MM:00 ERROR: DELAY: exception: ' + repr(e))
def get_max_page_no(company_id): """ return the max page number of interviewees' comments based on particular company :param company_id: :return: """ request_url = 'https://www.lagou.com/gongsi/searchInterviewExperiences.json' headers = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Accept-Encoding': 'gzip, deflate, br', 'Host': 'www.lagou.com', 'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0' ' Mobile/13B143 Safari/601.1', 'Referer': 'https://www.lagou.com/gongsi/interviewExperiences.html?companyId=%s' % company_id } params = { 'companyId': company_id, 'positionType': '', 'pageSize': '10', 'pageNo': '1' } response = requests.post(request_url, headers=headers, params=params, cookies=init_cookies()) if response.status_code == 200: maxpage = int(response.json()['content']['data']['page']['totalCount']) else: log.error('Error code is ' + str(response.status_code)) maxpage = 0 return int(maxpage / 10) + 1
def post_check(): log.info("Check post validation for all posts") postid_list = get_postid_list() status, obj = check_post_list(postid_list) if status is False: log.error("post check fail, msg: %s" % obj) return False
def load_model_checkpoint(self, checkpoint_dir): log.info(" [Checkpoint] Loading checkpoints ...") checkpoint_dir = os.path.join(checkpoint_dir, "model") checkpoint = tf.train.get_checkpoint_state(checkpoint_dir) if checkpoint and checkpoint.model_checkpoint_path: log.info(" [Checkpoint] Checkpoint path : %s ..." % checkpoint.model_checkpoint_path) #checkpoint_name = os.path.basename(checkpoint.model_checkpoint_path) self.load_model_from_checkpoint_file( checkpoint.model_checkpoint_path) else: return False # especially, learning rate can be overriden # NOTE that (self.learning_rate) might not be a variable, # but a tensor (e.g. exponential deacy or scheduled scalar)!! if isinstance(self.learning_rate, tf.Variable): learning_rate_reassign_op = self.learning_rate.assign( self.config.initial_learning_rate) self.session.run(learning_rate_reassign_op) else: log.error( "Seems using learning rate decay scheme (it is not a variable.\n" + " restoring learning rate has failed as implemented in this case,\n" " which might cause that learning rate has been INCREASED!!!!" ) log.error("self.config.initial_learning_rate = %f", self.config.initial_learning_rate) return True
def _equip(self, equip_item): """ 装备物品到匹配的一栏。可能会返回装备后的物品。 :param equip_item: :return: """ equippable = equip_item.get_component(ItemEquippable) if not equippable: return consts.BAG_PUT_FAILED target_slots = equippable.get_slots() if not target_slots: return consts.BAG_PUT_FAILED for target_slot in target_slots: # priority. idx = self._name2index.get(target_slot, -1) if idx < 0: log.error('unexpected equipment slot `%s`', target_slot) continue res = self._bag.put_item_at(idx, equip_item) if res == consts.PUT_INTO_EMPTY: return consts.BAG_PUT_TOTALLY, target_slot elif res == consts.PUT_SWITCH: # get old weapon with get_switched_equipment() return consts.BAG_PUT_TOTALLY, target_slot elif res == consts.PUT_FORBIDDEN: return consts.BAG_PUT_FAILED, None else: raise RuntimeError("unexpected put-item result %s", res) return consts.BAG_PUT_FAILED, None
def merge_configs(main_config, additional_config): # Merge rules are simple # For each key in the dictionary # - If list then append to list # - If a dictionary then repeat merge on sub dictionary entries # - Else assume string or number and replace entry for curkey in additional_config: if curkey not in main_config: main_config[curkey] = additional_config[curkey] elif type(main_config[curkey]) != type(additional_config[curkey]): log.error(f"Type for key {curkey} in configs does not match") else: # keys exist and match, see if a list to append if type(main_config[curkey]) == list: main_config[curkey] += additional_config[curkey] elif type(main_config[curkey]) == dict: # need to merge sub areas main_config[curkey] = merge_configs(main_config[curkey], additional_config[curkey]) else: # not a list or dictionary, must be a number or string, overwrite main_config[curkey] = additional_config[curkey] return main_config
def crawl_jobs(positionName): """crawl the job info from lagou H5 web pages""" JOB_DATA = list() max_page_number = get_max_pageNo(positionName) log.info("%s, 共有 %s 页记录, 共约 %s 记录", positionName, max_page_number, max_page_number * 15) cookies = get_cookies() for i in range(1, max_page_number + 1): request_url = 'https://m.lagou.com/search.json?city=%E5%85%A8%E5%9B%BD&positionName=' + parse.quote( positionName) + '&pageNo=' + str(i) + '&pageSize=15' headers = { 'Accept': 'application/json', 'Accept-Encoding': 'gzip, deflate, sdch', 'Host': 'm.lagou.com', 'Referer': 'https://m.lagou.com/search.html', 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, ' 'like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive' } response = requests.get(request_url, headers=headers, cookies=cookies) if response.status_code == 200: for each_item in response.json()['content']['data']['page']['result']: JOB_DATA.append([each_item['positionId'], each_item['positionName'], each_item['city'], each_item['createTime'], each_item['salary'], each_item['companyId'], each_item['companyName'], each_item['companyFullName']]) crawl_job_detail(each_item['positionId'], positionName) print('crawling page %d done...' % i) time.sleep(TIME_SLEEP) elif response.status_code == 403: log.error('request is forbidden by the server...') else: log.error(response.status_code) return JOB_DATA
def __init__(self, rom_start, rom_end, type, name, vram_start, extract, given_subalign, given_is_overlay, given_dir, args=[], yaml={}): super().__init__(rom_start, rom_end, type, name, vram_start, extract, given_subalign, given_is_overlay, given_dir, args, yaml) if len(self.args) > 0: self.subtype = self.args[0] if self.subtype in ("i4", "rgba16"): if len(self.args) < 3: log.error( f"Error: {self.name} is missing 'width' and 'height' parameters" ) self.width = self.args[1] self.height = self.args[2] # append type to filename self.name += f".{self.subtype}" else: self.subtype = None # append .rnc self.name += ".rnc"
def request_ice(args): try: data=json.loads(args.get('Input')) except Exception, e: print e log.error(e)
def listener3(host, bind, port): if host not in hosts: log.error('MM:00 ERROR: LISTENER: unknown host: ' + host) return #print 'listener ' + host + ' ' + bind + ' ' + port r = generic(host, 'LISTENER', 'listener ' + bind + ' ' + port + '\n') if r is not None and len(r) > 0: log.info('MM:' + host + ' LISTENER: ' + r.strip())
def new_execute(self, query, args, kwargs): try: return self.cursor.execute(query, args or kwargs) except OperationalError: error("Error connecting to MySQL") warn('Error to execute sql:%s', query) self.close() raise
def update_many(self, query, args=None): try: self.executemany(query, args) if self.cursor.rowcount > 0: return True except Exception as e: error('update_many:%s$$args:%s$$error:%s', query, sequence_to_string(args), get_string(e)) return False
def insert(self, query, args=None): try: self.execute(query, args) if self.cursor.rowcount > 0: return True except Exception as e: error('insert$$sql:%s$$args:%s$$error:%s', query, sequence_to_string(args), get_string(e)) return False
def init(file): '''解析excel文件用例''' log.info('解析excel文件数据') cases_list=[] try: module_list=open_excel.excel_table_by_index(file) except Exception, e: print e log.error('%s文件打开失败'%file)
def blackholing (args): if len(args) < 3: log.error('MM:00 EXEC: ERROR usage: blackholing participant_id remove/insert id[,id...]') return part_id = args[0] #participant id part_action = args[1] #action insert or remove rule_ids = [] for policy_id in args[2].split(','): #rule ids rule_ids.append(int(policy_id)+2**12) #additional 4096 for cookie id client_path = '/home/vagrant/endeavour/pclnt/participant_client.py' config_file = 'participant_' + part_id + '_bh.cfg' cmd = '' for arg in args: cmd += arg + ' ' log.info('MM:00 BLACKHOLING: ' + cmd + config_file) policy_path = os.path.abspath(os.path.join(os.path.realpath(sys.argv[1]), "..", "..", "policies")) config_path = os.path.join(policy_path, config_file) part_info = config.participants[str(part_id)] part_url = 'http://' + str(part_info["EH_SOCKET"][0]) + ':' + str(part_info["EH_SOCKET"][1]) + '/bh/inbound/' content_header = {'Content-Type':'application/json'} # prepare for insert blackholing policy if part_action == 'insert': new_policy = [] # Open File and Parse with open(config_path, 'r') as f: policies=json.load(f) for policy in policies['inbound']: if int(policy['cookie']) in rule_ids: new_policy.append(policy) # insert only inbound policys data = {} data['inbound'] = new_policy data=json.dumps(data) # post to participant api r = requests.post(part_url, data=data, headers=content_header) # prepare for remove seperate blackholing policy elif part_action == 'remove': for rule_id in rule_ids: new_url = part_url + str(rule_id) # post to participant api r = requests.delete(new_url, headers=content_header) else: log.error('MM:00 EXEC: ERROR usage: error in blackholing - wrong action')
def parse (line): global cmdfuncs tokens = line.partition('#')[0].split() try: tokens = shlex.split(line.partition('#')[0]) except Exception, e: log.error('MM:00 PARSE ERROR: ' + repr(e)) return
def crawl_company(havemark=0): # 定义公司信息为列表形式 COMPANY_LIST = list() # 请求的url req_url = 'https://www.lagou.com/gongsi/0-0-0.json?havemark=%d' % havemark # 请求头部 headers = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Host': 'www.lagou.com', 'Origin': 'https://www.lagou.com', 'Referer': 'https://www.lagou.com/gongsi/0-0-0?havemark=0', 'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 ' 'Mobile/13B143 Safari/601.1' } # 进行页数循环 for pn in range(20): params = { 'first': 'false', 'pn': str(pn), 'sortField': '0', 'havemark': str(havemark) } response = requests.post(req_url, headers=headers, params=params, cookies=m_lagou_spider.get_cookies(), timeout=10) print(response.url) if response.status_code == 200: company_list_per_page = response.json()['result'] for company in company_list_per_page: COMPANY_LIST.append([ company['companyId'], company['companyShortName'], company['city'], company['companyFeatures'], company['companyFullName'], company['financeStage'], company['industryField'], company['interviewRemarkNum'], company['positionNum'], company['processRate'] ]) log.info('page %d has been crawled down~' % (pn + 1)) elif response.status_code == 403: log.error('403 forbidden...') else: log.error(response.status_code) # 睡眠 time.sleep(config.TIME_SLEEP) return COMPANY_LIST
def get_linker_entries(self): from segtypes.linker_entry import LinkerEntry if self.sibling: path = self.sibling.out_path() else: log.error("Unlinked bss sections currently unsupported") return [LinkerEntry(self, [path], path, self.get_linker_section())]
def get(url,target,overwrite=False,tag=None): ''' Get the file or directory tree at the given URL and place it at the given target path. If overwrite is True and target is preexisting it will be overwritten (updated). The URL is expected to be in a standard form: SCHEME://HOSTNAME/PATH The following URL schemes are supported: http: - the file given in PATH via HTTP ftp: - the file given in PATH via anonymous FTP git+TRANSPORT: git-clone a repository. TRANSPORT can be http, rsync, ssh or empty to use native git protocol (the '+' can be omitted). For local repository, use "git+file:///path/to/git". See git-clone(1) for details. If overwriting a git-pull is done. svn+TRANSPORT: - perform "svn co" using the remaining URL with 'svn+' removed. If overwritting, an "svn update" is done. cvs+:TRANSPORT: - perform "cvs co" using the remaining URL with 'cvs+' removed. If overwritting, an "cvs update" is done. ''' log.info('Getting url "%s" --> "%s"'%(url,target)) urlp = uriparse(url) if urlp[0] == 'http' or urlp[0] == 'ftp': return get_http_ftp(urlp[0],url,target,overwrite) scheme = urlp[0].split('+') #print 'scheme=',scheme print urlp,scheme if urlp[0] == 'git' or scheme[0] == 'git': return get_git(scheme,url,target,overwrite,tag) if scheme[0] == 'svn': return get_svn(scheme[1]+'://'+urlp[1]+'/'+urlp[2]+'/'+tag,target,overwrite) if scheme[0] == 'cvs': # get_cvs(url,module,tag,target,overwrite): #print 'CVS: "%s", "%s", "%s"'%(urlp[0],urlp[1],urlp[2]) url = ':%s:%s:%s'%(scheme[1],urlp[1],'/'.join(urlp[2].split('/')[:-1])) module = urlp[2].split('/')[-1] #print 'url=%s, module=%s'%(url,module) print 'Note: getting from CVS, if this appears to hang, it is waiting for a password' return get_cvs(url,module,tag,target,overwrite) msg = 'Unhandled URL: "%s"'%url log.error(msg) raise ValueError, msg
def bgp (args): if len(args) != 1: log.error('MM:XX' + ' ERROR: usage: bgp bgp_router') return host = args[0] if host not in bgprouters: log.error('MM:' + host + ' ERROR: ' + 'BGP' + ' ' + host + ' : must be a BGP router') log.info('MM:' + host + ' BGP') r = generic(host, 'BGP', 'bgp\n') if r is not None and len(r) > 0: log.info('MM:' + host + ' BGP: ' + r.strip())
def crawl_interviewee_comments(company_id): request_url = 'https://www.lagou.com/gongsi/searchInterviewExperiences.json' headers = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Accept-Encoding': 'gzip, deflate, br', 'Host': 'www.lagou.com', 'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0' ' Mobile/13B143 Safari/601.1', 'Referer': 'https://www.lagou.com/gongsi/interviewExperiences.html?companyId=%s' % company_id } maxpage_no = get_max_page_no(company_id) if maxpage_no > 0: for pn in range(maxpage_no): params = { 'companyId': company_id, 'positionType': '', 'pageSize': '10', 'pageNo': str(pn + 1) } response = requests.post(request_url, headers=headers, params=params, cookies=init_cookies()) log.info('Crawl page %s successfully~' % response.url) if response.status_code == 200: comment_list = response.json()['content']['data']['page']['result'] for comment in comment_list: insert_item(comment) log.info('insert one item successfully~') """ intervieweeComment = IntervieweeComment() intervieweeComment.id = comment['id'] intervieweeComment.companyId = comment['companyId'] intervieweeComment.companyName = comment['companyName'] intervieweeComment.companyScore = comment['companyScore'] intervieweeComment.comprehensiveScore = comment['comprehensiveScore'] intervieweeComment.interviewerScore = comment['interviewerScore'] intervieweeComment.describeScore = comment['describeScore'] intervieweeComment.myScore = comment['myScore'] intervieweeComment.content = comment['content'] intervieweeComment.createTime = comment['createTime'] intervieweeComment.hrId = comment['hrId'] intervieweeComment.positionId = comment['positionId'] intervieweeComment.positionName = comment['positionName'] intervieweeComment.positionStatus = comment['positionStatus'] intervieweeComment.positionType = comment['positionType'] intervieweeComment.tagArray = comment['tagArray'] intervieweeComment.usefulCount = comment['usefulCount'] insert_item(intervieweeComment) """ else: log.error('Error code is ' + str(response.status_code)) time.sleep(random.randint(3, 6))
def regress (rtest): global regressions try: r = regressions[rtest] except: log.error('MM:00 ERROR: REGRESSION TEST FAILED unknown or poorly specified cmd: ' + rtest) return log.info('MM:00 INFO: REGRESSION TEST: ' + rtest + ": " + r) for l in shlex.split(r): parse(l)
def fs_check(): log.info("Check the validation of post files") status, obj = check_duplicate_file() if status is False: log.error("There are some duplicate files, msg: %s" % obj) return False status, obj = check_posts_in_db() if status is False: log.error("Some files are not imported to db, msg: %s" % obj) return False
def main (argv): global config, bgprouters, hosts, tests, cmdfuncs if len(argv) < 2: log.error('usage: tmgr config.spec [ commands ]') exit() try: config = tlib.parser(argv[1]) except Exception, e: log.error('Bad configuration: ' + repr(e)) exit()
def killp(args): if len(args) < 2: log.error('MM:00 EXEC: ERROR: usage: killp anynode ID ...') return host = args[0] del args[0] cmd = '' for arg in args: cmd += arg + ' ' log.info('MM:' + host + ' KILLP: ' + cmd) r = generic(host, 'KILLP', 'killp ' + cmd + '\n') if r is not None: log.debug('MM:' + host + ' KILLP: output = \n' + r.strip())
def test (args): if len(args) == 0: print json.dumps(tests, indent=4, sort_keys=True) # log.error('MM:00 ERROR: TEST: usage: test test_name ...') return for arg in args: if arg not in tests: log.error('MM:00 ERROR: TEST: undefined test: ' + arg) return for arg in args: log.info('MM:00 INFO: TEST: ' + arg) for l in tests[arg]: parse(l)
def remote (args): if len(args) < 2: log.error('MM:00 EXEC: ERROR: usage: exec anynode cmd arg ...') return host = args[0] del args[0] cmd = '' for arg in args: cmd += arg + ' ' log.info('MM:' + host + ' REXEC: ' + cmd) r = generic(host, 'REXEC', 'exec ' + cmd + '\n') if r is not None: log.debug('MM:' + host + ' REXEC: output = \n' + r.strip())
def verify (args): if len(args) != 3: log.error('MM:00 ERROR: VERIFY: usage: verify src dst port') return src = args[0] xdst = args[1] dport = args[2] rand = str(random.randint(1000000000, 9999999999)) # must be 10 characters try: baddr, daddr = config.verifycheck(src, xdst, dport) except Exception, e: log.error('MM:00 ERROR: VERIFY: ' + repr(e)) return