def _post_LoginValidate_OA(self): """ 验证用户名密码 :return: """ url = self.oaHost + '/LoginValidate/Servlet/LoginCheck2' payload = { 'userid': self.userName, 'passwd': self.passWord, 'macAddr': self.macAddr, 'isSendSMS': '1', } headers = { 'User-Agent': self.spider_session.get_user_agent(), 'Referer': self.oaHost + '/', } resp = self.session.post(url=url, headers=headers, params=payload) if not response_status(resp): logger.error('【OA登录】用户名密码验证失败') return False resp_json = resp.text #print('OA用户名密码验证:',resp_json) if resp_json == 'true': logger.info('【OA登录】用户名密码验证成功') return True else: logger.error('【OA登录】用户名密码验证失败') return False
def booking_room_parser(taskcontent): try: hotel_id,url_hotel_name,check_in_temp = taskcontent.strip().split('&')[0], \ taskcontent.strip().split('&')[1], taskcontent.strip().split('&')[2] except Exception, e: logger.error('Parse taskcontent failed with ' + str(e)) return []
def elong_request_parser(content): result = -1 try: infos = content.split('|') flight_info = infos[0].strip() time_info = infos[1].strip() ticketsource = infos[2].strip() flight_no = flight_info.split('-')[0] dept_id,dest_id = flight_info.split('-')[1],flight_info.split('-')[2] #date:20140510,time:09:10 dept_day,dept_hour = time_info.split('_')[0],time_info.split('_')[1] dept_date = dept_day[0:4] + '-' + dept_day[4:6] + '-' + dept_day[6:]#2014-05-10 dept_time = dept_date + 'T' + dept_hour + ':00' dept_id = cities_dict[dept_id] dest_id = cities_dict[dest_id] location = dept_id + '-' + dest_id origday = datetime.datetime(string.atoi(dept_date[0:4]),string.atoi(dept_date[5:7]),string.atoi(dept_date[8:])) urlday = (origday - datetime.datetime.today()).days #dept_date = orig_date #logger.info('contents: %s %s %s %s '%(location,flight_no,dept_date,str(urlday))) except Exception,e: logger.error(str(e)) logger.error('Content Error: Wrong content format with %s'%content) return result
def test_plan_tasks(self, case): logger.info('开始测试{}'.format(case.title)) case.data = context.replace(case.data) print("case.data:", case.data) resp = self.http_request.http_request(case.method, case.url, case.data) results = json.loads(resp.text) print('返回结果:', results) try: self.assertEqual(ast.literal_eval(case.expected), results["success"]) self.excel.writer_result(case.case_id + 1, resp.text, "PASS") # 判断添加作业成功后,查询数据库,取到homeworkId/也可以在返回结果中取到homeworkId try: if resp.json()["data"]["homeworkId"]: homeworkId = results["data"]["homeworkId"] print('homeworkId:', homeworkId) # 保存到类属性里面 setattr(Context, "homeworkId", homeworkId) except KeyError as e: pass except ValueError as e: self.excel.writer_result(case.case_id + 1, resp.text, "FAIL") logger.error('报错了{}'.format(e)) raise e logger.info('测试结束:{}'.format(case.title))
def pinch(self, page_name): """ Pinch page view, If it is a image, it will be zoom ↘↖ :param page_name: :return: """ try: size = self.get_size(page_name) ma = MultiAction(self.driver) ta1 = TouchAction(self.driver) ta2 = TouchAction(self.driver) ta1.press(x=size["width"] * 0.2, y=size["height"] * 0.1).wait(200). \ move_to(x=size["width"] * 0.5 - 1, y=size["height"] * 0.5 - 1).wait(200).release() ta2.press(x=size["width"] * 0.8, y=size["height"] * 0.7).wait(200). \ move_to(x=size["width"] * 0.5 + 1, y=size["height"] * 0.5 + 1).wait(200).release() ma.add(ta1, ta2) ma.perform() except exceptions.WebDriverException as e: logger.error(f"Page: {page_name} Action:[zoom_out↘↖]\n" f"Msg: zoom_out operation failed! Track:{e}") raise e else: logger.info(f"Page: {page_name} Action:[zoom_out↘↖]\n" f"Msg: zoom_out operation successful!") return self
def switch_page_handle(self, page_name, keyword): """ switch WebView handle :param page_name: :param keyword: :return: """ handles = self.get_handles try: for handle in handles: self.driver.switch_to.window(handle) logger.debug(f"Page: {page_name} Action:[switch_page_handle]\n" f"Msg: Switch to handle:{handle}") if keyword in self.get_page_source: break if keyword not in self.get_page_source: raise exceptions.NoSuchElementException( "No such keyword:{} in page source:{}".format( keyword, self.get_page_source)) except exceptions.NoSuchElementException as e: handle = self.get_handle self.capture_screen_shot(page_name) logger.error( f"Page: {page_name} Action:[switch_page_handle]\n" f"Msg: Switch to handle:{handle}, keyword not found! Track:{e}" ) raise e else: handle = self.get_handle logger.info(f"Page: {page_name} Action:[switch_page_handle]\n" f"Msg: Switch to handle:{handle} keyword found") return self
def get_size(self, page_name, *loc, timeout=30, polling=0.5): """ get full window or locator block size :param page_name: :param loc: :param timeout: :param polling: :return: size """ try: if loc: size = self.find_ele(page_name, loc, timeout, polling).size else: size = self.driver.get_window_size() except exceptions.WebDriverException as e: if loc: logger.error( f"Page: {page_name} Action:[get_size] Element: {loc}\n" f"Msg:get size failed! Track:{e}") else: logger.error(f"Page: {page_name} Action:[get_all_size]\n" f"Msg:Get full window size failed! Track:{e}") self.capture_screen_shot(page_name) raise e else: if loc: logger.info( "Page: {} Action:[get_size] Element: {} Get size: {}". format(page_name, loc, size)) else: logger.info( "Page: {} Action:[get_all_size] Get window size: {}". format(page_name, size)) return size
def biyi_room_task_parser(taskcontent): result = {} result['para'] = None result['error'] = 0 taskcontent = taskcontent.encode('utf-8') try: hotel_id = taskcontent.strip().split('&')[0] hotel_name = taskcontent.strip().split('&')[1] map_info = taskcontent.strip().split('&')[2] city_name_zh = taskcontent.strip().split('&')[3] city_name_en = taskcontent.strip().split('&')[4] country_name_zh = taskcontent.strip().split('&')[5] check_in_day_temp = taskcontent.strip().split('&')[6] check_in_day = check_in_day_temp[:4] + '-' + check_in_day_temp[ 4:6] + '-' + check_in_day_temp[6:] check_out_day_temp = datetime.datetime(int(check_in_day_temp[:4]), int(check_in_day_temp[4:6]), int(check_in_day_temp[6:])) check_out_day = str(check_out_day_temp + datetime.timedelta(days=1))[:10] except Exception, e: logger.error('biyiHotel: Wrong Content Format with %s' % taskcontent) result['error'] = TASK_ERROR return result
def elong_room_task_parser(taskcontent): room_list = [] result = {} result['para'] = None result['error'] = 0 try: taskcontent = taskcontent.strip() city_name_zh,hotel_id,hotel_name,country_name,city_id,city_name_temp,check_in_temp = \ taskcontent.split('&&')[0], taskcontent.split('&&')[1], \ taskcontent.split('&&')[2], taskcontent.split('&&')[3], \ taskcontent.split('&&')[4], taskcontent.split('&&')[5], \ taskcontent.split('&&')[6] check_in = check_in_temp[:4] + '-' + check_in_temp[ 4:6] + '-' + check_in_temp[6:] check_out_temp = datetime.datetime(int(check_in_temp[:4]), int(check_in_temp[4:6]), int(check_in_temp[6:])) check_out = str(check_out_temp + datetime.timedelta(days=1))[:10] hotel_id_temp = hotel_id.split('_')[1] except Exception, e: logger.error('elongHotelParser: Wrong Content Format with %s' % taskcontent) result['error'] = TASK_ERROR return result
def get_glow(): """ :return: 领取结果的基本格式 """ # 需要先访问一次直播间才会获得道具 logger.info("------正在获取荧光棒------") go_room() glow_url = "/japi/prop/backpack/web/v1?rid=12306" glow_res = dyreq.request("get", glow_url) global Bags logger.info("------背包检查开始------") try: # 查询获取到的荧光棒 assert glow_res.status_code == 200 assert glow_res.json()['msg'] == "success" # 防止没有道具导致程序报错 if glow_res.json()['data']['list']: global Own Own = jsonpath(glow_res.json(), '$..list[?(@.id == 268)].count') logger.info("当前拥有荧光棒%s个,给你喜欢的主播进行赠送吧" % Own) Bags = 1 logger.info("------背包检查结束------") else: logger.warning("当前背包中没有任何道具") logger.info("------背包检查结束------") except AssertionError: if glow_res.json()['msg'] == '请登录': logger.error("请更新COOKIE") else: logger.error("领取荧光棒时发生错误") logger.info("------背包检查结束------") return glow_res
def get_post_data(content, dept_day): post_data = {} try: flight_json = json.loads(content) except Exception, e: logger.error(('Loading flight json failed error: ' + str(e))) return post_data
def get_asir_loadnames(mode): """ get asir loadnames :param mode: ASIR FDD = FL ASIR TDD = TL :return: loadname list """ crt_type = str(mode) + '%' logger.debug('Type is: %s', mode) sql_str = ''' select enb_build from test_results where enb_build !='Null' and enb_build !='' and enb_build not like '%MF%' and crt_type='CRT1_DB' and enb_release like("''' + crt_type + '''") and enb_hw_type='AIRSCALE' GROUP BY enb_build order by time_epoch_start desc limit 30 ''' try: data = mysqldb.get_DB(sql_str) results = [] for row in data: loadname = row[0] results.append(loadname) return results except Exception, e: logger.error('error: get_loadnames %s', e)
def write_default_log_2_csv(service): """ :type service: audio_identify.identify.AudioIdentify """ while service.can_write: for com, com_result_d in result_map.items(): if len(com_result_d) == 0: continue file_name = '{0}_{1}_{2}.csv'.format( corpus_conf.log_name_by_serial.get(com), corpus_conf.wav_count_one_cmder, format_time(parse_time(corpus_conf.start_time, format_2), format_3)) try: with open(os.path.join(corpus_conf.output_path, file_name), 'w+', encoding='utf-8') as wf: cmds_result = [v for _, v in dict(com_result_d).items()] row_format = write_csv_header(cmds_result, wf) for cmd_result in cmds_result: write_one_cmd_log(cmd_result, row_format, wf) except Exception as e: logger.error('Failed to write test result, err: %s, %s' % (e, traceback.format_exc())) sleep(10) else: logger.info('write csv thread finish...')
def getpostdata(dept_city, dest_city, year, month, day, nump, tp, ages): numOfInfant = 0 numOfAdult = 0 numOfChild = 0 try: for i in range(len(ages)): if ages[i] == 'INF': numOfInfant += 1 if ages[i] == 'CHD': numOfChild += 1 else: numOfAdult += 1 except: logger.error('airfranceFlight :: content format error') numOfAdult = 1 postdatanew = 'arrivalType=AIRP&cabin=' + tp + '&plusOptions=&nbEnfants=' + str( numOfChild ) + '&nbBebes=' + str(numOfInfant) + '&nbPassenger=' + str( nump ) + '&standardMandatory=true&calendarSearch=0&nbAdults=' + str( numOfAdult ) + '&optionalUM=false&familyTrip=NON&flyingBlueMember=false&subCabin=MCHER&selectCabin=on&yearMonthDate=' + str( year ) + str(month) + '&corporateMode=false&isUM=false&dayDate=' + str( day ) + '&departure=' + dept_city + '&mandatoryUM=true&paxTypoList=ADT&departureType=AIRP&typeTrip=1&arrival=' + dest_city + '&partnerRequest=false&nbChildren=' + str( numOfChild) + '&haul=LH' return postdatanew
def is_element_present(self, how, what): try: self.driver.find_element(by=how, value=what) return True except NoSuchElementException as e: logger.error("Element is not present. %s" % e) return False
def get_failed_count(self): sql_str = ''' select count(*) from (select test_case_name from test_results where crt_type = 'CRT1_DB' and record_valid = 1 and test_status != 'Passed' and enb_build = "''' + self.loadname + '''" and test_case_name not in(select test_case_name from test_results where enb_build = "''' + self.loadname + '''" and crt_type = 'CRT1_DB' and record_valid = 1 and test_status = 'Passed' group by test_case_name) group by test_case_name order by robot_ip) as t; ''' try: data = mysqldb.get_DB(sql_str) results = data[0][0] return results except Exception, e: logger.error('error: get_failed_count %s', e)
def elong_request_parser(content): result = -1 try: infos = content.split('|') flight_info = infos[0].strip() time_info = infos[1].strip() ticketsource = infos[2].strip() flight_no = flight_info.split('-')[0] dept_id, dest_id = flight_info.split('-')[1], flight_info.split('-')[2] #date:20140510,time:09:10 dept_day, dept_hour = time_info.split('_')[0], time_info.split('_')[1] dept_date = dept_day[0:4] + '-' + dept_day[4:6] + '-' + dept_day[ 6:] #2014-05-10 dept_time = dept_date + 'T' + dept_hour + ':00' dept_id = cities_dict[dept_id] dest_id = cities_dict[dest_id] location = dept_id + '-' + dest_id origday = datetime.datetime(string.atoi(dept_date[0:4]), string.atoi(dept_date[5:7]), string.atoi(dept_date[8:])) urlday = (origday - datetime.datetime.today()).days #dept_date = orig_date #logger.info('contents: %s %s %s %s '%(location,flight_no,dept_date,str(urlday))) except Exception, e: logger.error(str(e)) logger.error('Content Error: Wrong content format with %s' % content) return result
def openAppium(self, port='4723'): self.port = port self.caps = {} s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: s.connect(('127.0.0.1', int(port))) s.shutdown(2) logger.error('port %s is uesd !' % port) portstatus = False except: logger.info('port %s is available!' % port) portstatus = True bootstrap_port = str(int(port) + 1) dir_path = dirname(dirname(abspath(__file__))) logPath = dir_path + "/lib/logs/AppiumLog.log" try: if portstatus: cmd = 'cmd /c start appium -a ' + '127.0.0.1 -p ' + str( port ) + ' --bootstrap-port ' + str( bootstrap_port ) + " --log " + logPath + " --log-timestamp --local-timezone" p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) logger.info('运行cmd命令:' + str('start /b ' + cmd) + '成功') p.wait() except Exception as e: logger.info('运行cmd命令:' + str('start /b ' + cmd) + '失败') logger.error(str(e))
def tongcheng_request_parser(content): result = -1 content.encode('utf-8') try: info_list = content.split('|') flight_info = info_list[0] time_info = info_list[1] source = info_list[2] flight_no = flight_info.split('-')[0] dept_id = flight_info.split('-')[1] dest_id = flight_info.split('-')[2] dept_day = time_info.split('_')[0] dept_minute = time_info.split('_')[1] + ':00' dept_city = city_dict[dept_id] dest_city = city_dict[dest_id] dept_city_cn = city_dict_cn[dept_id].encode('utf-8') dest_city_cn = city_dict_cn[dest_id].encode('utf-8') dept_date = dept_day[:4] + '-' + dept_day[4:6] + '-' + dept_day[6:] dept_time = dept_date + 'T' + dept_minute except Exception, e: logger.error('tongchengFlight: Wrong Content Format with %s' % content + str(e)) return result
def get_debug_asir_result(self): loadname = 'ASIR_' + self.loadname logger.debug('loadname is: %s ', loadname) try: dic = get_jenkins_data(loadname) if dic: list_temp = [] dic_pci = self.get_target_value('pci', dic, list_temp) list_temp2 = [] dic_children = self.get_target_value( 'children', self.get_key_value('name', 'crt', dic_pci), list_temp2) dic_debug = self.get_key_value('name', 'debug', dic_children) if dic_debug.get('cases'): dic_cases = dic_debug['cases'][0] debug_status = dic_cases['result'] if debug_status == 'PASS': debug_status = 'Yes' if debug_status == 'FAIL': debug_status = 'No' return debug_status else: debug_status = 'NULL' else: debug_status = 'NULL' return debug_status except Exception, e: logger.error('error:%s', e)
def _report(self): if self._error == 1: logger.debug(f"The error flag: {self._error}. Exit the function.") return "" logger.info("Try to report in the alternate method.") param = parse.parse_qs(parse.urlparse(str(self._navigation_url)).query) url = f"{self._host}/project_add.asp" payload = { "id": param["id"][0], "province": "四川省", "city": "成都市", "area": "龙泉驿区", "wuhan": "否", "fare": "否", "wls": "否", "kesou": "否", "zhengduan": "", "Submit": "提交", "action": "add", "adds": "undefined", "addsxy": "undefined" } res = self._session.post(url=url, headers=self._headers, data=payload) logger.debug( f"URL:{url}. Payload:{payload}. Status code:{res.status_code}") if res.status_code != 200: logger.error( f"Failed:GET request. URL:{url}. Status code:{res.status_code}" ) res.encoding = "utf-8" return res.text
def booking_room_request_parser(taskcontent): try: hotel_id,url_hotel_name,check_in_temp = taskcontent.strip().split('&')[0], \ taskcontent.strip().split('&')[1], taskcontent.strip().split('&')[2] except Exception,e: logger.error('Parse taskcontent failed with ' + str(e)) return []
def get_unexecuted_count(self): branch = self.get_release() if branch is None: raise ValueError("Invalid branch: None. Load name: %s" % (self.loadname, )) logger.debug('branch is %s :', branch) logger.debug('loadname is %s :', self.loadname) sql_str = ''' SELECT count(*) FROM (SELECT * FROM (SELECT crt_testcase_name.casename FROM crt_testcase_release INNER JOIN crt_testcase_name ON crt_testcase_name.id = crt_testcase_release.case_id WHERE crt_testcase_release.release_id = "''' + branch + '''") AS t1 WHERE t1.casename NOT IN (SELECT test_case_name FROM test_results WHERE enb_build = "''' + self.loadname + '''" AND record_valid = 1 AND crt_type = 'CRT1_DB')) AS t2 ''' try: data = mysqldb.get_DB(sql_str) results = data[0][0] return results except Exception, e: logger.error('error: get_unexecuted_count %s', e)
def _get_login_page_EOMS(self): """ 点击电子运维的链接 :return: """ url = self.oaHost + "/uniworkSomeLinks/updateSomeLinkClick.do" payload = { 'uid': self.userName, 'someLinkID': 256, 'someLinkURL': '0510171C494049040216170D1F410E1D43070B051D0E0B1B0F0D0F095D0C09194213060E001C095B3E372C42191C164B1F010E03070A27041D2D07511706071A170D14091A0713', } headers = { 'User-Agent': self.spider_session.get_user_agent(), 'Referer': self.oaHost + '/uniworkSomeLinks/getVaryLink.do?issgs=true', } resp = self.session.get(url=url, headers=headers, params=payload) location_url = resp.headers.get('Location') if not response_status(resp): logger.error('第一步:点击电子运维链接失败') return False else: logger.info('第一步:点击电子运维链接成功') return True
def find_one(self, sql, expected=None, times=None) -> tuple: """ Query all results :param times: loop times :param sql: Execute database expression -> str :param expected: :return: results -> tuple """ res = None if not times: times = 20 try: logger.info("Model: fetchone, SQL: 【{}】".format(sql)) for i in range(times): row = self.cur.execute(sql) logger.debug("row: {}".format(row)) if not row: time.sleep(6) self.con.commit() continue res = self.cur.fetchone() logger.info("result: {}".format(res)) if not expected or res[0] == expected: return res time.sleep(6) self.con.commit() return res except pymysql.err.InterfaceError as e: self.con.ping(reconnect=True) logger.warning(f"Database connection failed: {e}") return self.find_one(sql, expected, times) except (pymysql.err.Error, TypeError) as e: logger.error("Database error rolling back: {}".format(e)) self.con.rollback() raise e
def main(self, uid, password): self._error = 0 self._errno = 0 self._session = requests.Session() self._host = "https://xsswzx.cdu.edu.cn/" + security.get_random_host( ) + "/com_user" self._headers = {"User-Agent": security.get_random_useragent()} self._get_captcha_code() self._login(uid, password) self._get_navigation_url("疫情信息登记") ret = self._report_default_method() if "已存在" in ret: logger.info(f"The report is already existed. ID:{uid}") return 0, self._errno elif "提交成功" in ret: logger.info(f"Successful to report. ID:{uid}") return 1, self._errno else: logger.error("Failed to report in the default method.") ret = self._report() if "已存在" in ret: logger.info(f"The report is already existed. ID:{uid}") return 0, self._errno elif "提交成功" in ret: logger.info(f"Successful to report. ID:{uid}") return 1, self._errno else: logger.error(f"Failed to report. ID:{uid}") if self._errno == 0: self._set_error(7, self._error) return 2, self._errno
def _login(self, uid, password): if self._error == 1: logger.debug(f"The error flag: {self._error}. Exit the function.") return url = f"{self._host}/weblogin.asp" payload = { "username": uid, "userpwd": password, "code": self._captcha_code, "login": "******", "checkcode": "1", "rank": "0", "action": "login", "m5": "1", } res = self._session.post(url=url, headers=self._headers, data=payload) logger.debug( f"URL:{url}. Payload:{payload}. Status code:{res.status_code}") res.encoding = "utf-8" if res.status_code != 200: logger.error( f"Failed:POST request. URL:{url}. Status code:{res.status_code}" ) self._set_error(2, 1) elif "alert" in res.text: logger.error( "Failed to login the ISP.[Incorrect username, password or captcha code]" ) self._set_error(2, 1) else: logger.info("Successful to login the ISP.")
def get_loadnames(mode): """ :param mode: FZM FDD = FLF FZM TDD = TLF CFZC FDD = FLC CFZC TDD = TLC :return loadname list example: get_loadname('TLF') """ crt_type = str(mode) + '%' logger.debug('Type is: %s', mode) sql_str = ''' select enb_build from test_results where enb_build !='Null' and enb_build !='' and enb_build not like '%MF%' and crt_type='CRT1_DB' and enb_release like("''' + crt_type + '''") GROUP BY enb_build order by time_epoch_start desc limit 30 ''' try: data = mysqldb.get_DB(sql_str) results = [] for row in data: loadname = row[0] results.append(loadname) return results except Exception, e: logger.error('error: get_loadnames %s', e)
def wego_request_parser(content): result = -1 #解析字符串 try: infos = content.split('|') flight_info = infos[0].strip() time_info = infos[1].strip() ticketsource = infos[2].strip() flight_no = flight_info.split('-')[0] dept_id, arr_id = flight_info.split('-')[1], flight_info.split('-')[2] #date:20140510,time:09:10 dept_day, dept_hour = time_info.split('_')[0], time_info.split('_')[1] dept_date = dept_day[0:4] + '-' + dept_day[4:6] + '-' + dept_day[ 6:] #2014-05-10 dept_min = 60 * int(dept_hour.split(':')[0]) + int( dept_hour.split(':')[1]) - 30 if dept_min < 0: dept_min = 0 dept_time = dept_date + 'T' + dept_hour + ':00' except Exception, e: logger.error( 'wegoFlight Content Error: cannot extract information from %s' % content) return result
def work(): ''' ''' task = workload.assign_workload() if task == None: time.sleep(1) return # 根据source选指定的parser if task.source not in parsers: logger.error("no parser for the source %s" % task.source) time.sleep(1) error = PARSER_ERROR return error info.process_task_num += 1 #logger.info("begin to parse %s" % task.source) parser = parsers[task.source] error = 0 try: #logger.info("start parse %s" % task.str()) error = parser.parse(task) #logger.info("complete parse %s" % task.str()) except Exception, e: logger.error("Parser Exception: task_id: %s %s" % (task.id, str(e))) error = SLAVE_ERROR
def copy_open(self, srcfile, dstfile): """ 复制并打开excel,用于防止原用例文件被错误修改 :param srcfile: 要打开的源文件名 :param dstfile: 复制后的目标文件名 """ # 判断要复制的文件是否存在 if not os.path.isfile(srcfile): logger.error(f'{srcfile}不存在!!') return # 判断要新建的文档是否存在,存在则提示 if os.path.isfile(dstfile): logger.warn(dstfile + " 文件已存在!") # 记录要保存的文件 self.df = dstfile # 读取excel到缓存 # formatting_info带格式的复制 self.workbook = openpyxl.load_workbook(filename=srcfile) # 拷贝,也在内存里面 copyfile(srcfile, dstfile) # 打开复制后的excel文件 self.wb = openpyxl.load_workbook(filename=dstfile) return
def tongcheng_request_parser(taskcontent): result = -1 taskcontent.encode('utf-8') try: info_list = taskcontent.split('|') flight_info = info_list[0] time_info = info_list[1] source = info_list[2] flight_no = flight_info.split('-')[0] dept_id = flight_info.split('-')[1] dest_id = flight_info.split('-')[2] dept_day = time_info.split('_')[0] #20140510 dept_minute = time_info.split('_')[1] + ':00' #18:30:00 dept_city = city_dict[dept_id] dest_city = city_dict[dest_id] dept_city_cn = city_dict_cn[dept_id].encode('utf-8') dest_city_cn = city_dict_cn[dest_id].encode('utf-8') except Exception, e: logger.error('tongchengFlight: wrong content format with %s' % taskcontent + str(e)) return -1
def get_post_data(content, dept_day, dest_day): post_data = {} try: flight_json = json.loads(content) except Exception, e: logger.error(('Loading flight json failed error: ' + str(e))) return post_data
def parsePrice(content): price_dict = {} try: price_json = json.loads(content) except Exception, e: logger.error('Loading price failed with error: ' + str(e)) return price_dict
def formatMessage(self, data): # 得到header body的 封装数据 try: infoDict = dict(data.get('header').items() + data.get('body').items()) except Exception, e: self.errorCode = 1 logger.error('QueryMessage: error occured in generating message dict! (%s)' % e) return
def vueling_task_parser(taskcontent): try: dept_id, dest_id, dept_date_temp = taskcontent.split('&')[0].strip(), \ taskcontent.split('&')[1].strip(), \ taskcontent.split('&')[2].strip() except Exception,e: logger.error('VuelingFlight: Content Error wrong content format' + str(e)) return None
def feifan_task_parser(taskcontent): #解析taskcontent 中的出发城市和到达城市的三字码以及出发日期 try: dept_code, dest_code, dept_date = taskcontent.strip().split('&')[0], \ taskcontent.strip().split('&')[1], \ taskcontent.strip().split('&')[2] except Exception,e: logger.error('feifanFlight: wrong content format with %s'%taskcontent) return []
def data_writer(room_list, taskcontent): if room_list == None or room_list == []: logger.error('No hotel parsed') return try: InsertHotel_room(room_list) logger.info('Insert hotel [success] ' + taskcontent) except Exception, e: logger.error('Insert hotel [failed] ' + taskcontent) return
def wego_task_parser(content): content = content.encode('UTF-8').strip() try: info = content.split('&') dept_id = info[0] arr_id = info[1] dept_date = info[2][:4] + '-' + info[2][4:6] + '-' + info[2][6:] except Exception,e: logger.error('wegoFlight Content Error: cannot extract information from %s'%content) return None
def ryanair_task_parser(content): try: contents = content.split('&') dept_id = contents[0] dest_id = contents[1] dept_date = contents[2][:4] + '-' + contents[2][4:6] + '-' + contents[2][6:] ret_date = str(datetime.datetime.strptime(dept_date[2:], '%y-%m-%d') + datetime.timedelta(10)).split(' ')[0].strip()#do not use this value except Exception,e: logger.error('ryanairFlight: wrong content format with %s'%content) return None
def haoding_room_task_parser(taskcontent_temp): taskcontent = taskcontent_temp.encode('utf8') try: city_name_zh,country_name_zh,city_id,hotel_id,check_in_temp = \ taskcontent.split('&&')[0], taskcontent.split('&&')[1], \ taskcontent.split('&&')[2], taskcontent.split('&&')[3], \ taskcontent.split('&&')[4] except Exception, e: logger.error('haodingHotel::Cannot parse task content with error: ' + str(e)) return {'para':[], 'error':TASK_ERROR}
def parseRoom(content,hotel_id,hotel_name,city,check_in,check_out): rooms = [] try: all_info = all_room_info_pat.findall(content)[0] each_room_info_list = each_room_info_pat.findall(all_info) print len(each_room_info_list) time.sleep(3) except Exception, e: logger.error('Can not parse rooms info!' + str(e)) return rooms
def data_writer(room_list,taskcontent): if room_list == []: logger.error('room_list.size == 0') return try: InsertHotel_room(room_list) logger.info(taskcontent + ' [success]') logger.info('with ' + str(len(room_list)) + ' values!') except Exception, e: logger.info(taskcontent + ' [failed]') logger.info(str(e))
def parseRoom(content,city_name_zh,country_name_zh,hotel_id,check_in,check_out): room_list = [] try: rooms_content = rooms_content_pat.findall(content)[0] if len(rooms_content) == 0: return room_list except Exception, e: logger.error('haodingHotel::Cannot parse rooms of this hotel [' + hotel_id + ']') logger.error('haodingHotel::' + str(e)) return room_list
def formatMessage(self, data): # 封装'deliveryMessage'的数据 try: tempDeliveryMessage = self._addDeliveryMessage(data.get('deliveryMessage')) # 得到header body extendInfo deliveryMessage的 封装数据 infoDict = dict(data.get('header').items() + data.get('body').items() + data.get('extendInfo').items()) infoDict['deliveryMessage'] = tempDeliveryMessage except Exception, e: self.errorCode = 1 logger.error("CreateMessage: error occured in generating message dict! (%s)" % e) return
def pageParser(content): if len(content) < 200: logger.error( '[Error in PageParser] [content.len = ' + str(len(content)) + ']') return 0 page_num_temp = '' page_num = 0 try: page_num_temp = page_pat.findall(content)[0] except Exception,e: logger.error( "page_num_temp Error: %s" %str(e)) return page_num
def elong_room_request_parser(taskcontent): room_list = [] try: taskcontent = taskcontent.strip() city_name_zh,hotel_id,hotel_name,country_name,city_id,city_name_temp,check_in_temp = \ taskcontent.split('&&')[0], taskcontent.split('&&')[1], \ taskcontent.split('&&')[2], taskcontent.split('&&')[3], \ taskcontent.split('&&')[4], taskcontent.split('&&')[5], \ taskcontent.split('&&')[6] except Exception, e: logger.error('Cannot parse tackcontent!' + str(e)) return room_list
def smartfares_task_parser(taskcontent): result = {} flights = {} tickets = [] result['para'] = {'flight':flights, 'ticket':tickets} result['error'] = 0 taskcontent = taskcontent.encode('utf-8') try: dept_id, dest_id, dept_day = taskcontent.strip().split('&')[0], \ taskcontent.strip().split('&')[1], taskcontent.strip().split('&')[2] except: logger.error('smartfaresFlight::Wrong Content Format with %s'%taskcontent) result['error'] = TASK_ERROR return result p = get_proxy(source='smartfaresFlight') #p= None if p == None: result['error'] = PROXY_NONE return result cj = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) try: search_url = get_search_url(dept_day,dept_id,dest_id) content = crawl_single_page(search_url, proxy=p, referer=HOST) search_id = get_search_id(content) except: logger.error('smartfares::Parse search id failed') result['error'] = PROXY_INVALID return result url_real = URL%search_id i = 0 content_len = 0 while i < 3 and content_len < CONTENT_LEN: content_real = crawl_single_page(url=url_real, proxy=p, referer=search_url) content_len = len(content_real) i += 1 if len(content_real) > 100: parser_result = parsePage(content_real) tickets = parser_result['ticket'] flights = parser_result['flight'] result['para'] = {'flight':flights, 'ticket':tickets} return result else: result['error'] = DATA_NONE return result
def easyjet_task_parser(taskcontent): result = {} flights = {} tickets = [] result['para'] = {'flight':flights, 'ticket':tickets} result['error'] = 0 try: dept_id, dest_id, dept_day_temp = taskcontent.strip().split('&')[0], \ taskcontent.strip().split('&')[1], \ taskcontent.strip().split('&')[2] except: logger.error('easyjet::Wrong Content Format with %s'%taskcontent) result['error'] = TASK_ERROR return result search_url = get_search_url(dept_id, dest_id, dept_day_temp) p = get_proxy(source='easyjet') time_zone_A = airport[dept_id] time_zone_B = airport[dest_id] #print p #print search_url if p == None: result['error'] = PROXY_NONE return result cj = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) i = 0 content_len = 0 while i < 3 and content_len < CONTENT_LEN: content = crawl_single_page(url=search_url, proxy=p, n=1, referer=HOST) content_len = len(content) i += 1 if content == '' or content == None or len(content) < CONTENT_LEN: result['error'] = PROXY_INVALID return result para = parsePage(content, p, time_zone_A, time_zone_B) if para == {'flight':{}, 'ticket':[]}: result['error'] = DATA_NONE return result else: flights = para['flight'] tickets = para['ticket'] result['para'] = {'ticket':tickets, 'flight':flights} return result
def biyi_room_task_parser(taskcontent): taskcontent = taskcontent.encode('utf-8') try: hotel_id = taskcontent.strip().split('&')[0] hotel_name = taskcontent.strip().split('&')[1] map_info = taskcontent.strip().split('&')[2] city_name_zh = taskcontent.strip().split('&')[3] city_name_en = taskcontent.strip().split('&')[4] country_name_zh = taskcontent.strip().split('&')[5] check_in_day_temp = taskcontent.strip().split('&')[6] except Exception, e: logger.error('Can not parse taskcontent!') return []
def parse(self,task): #解析test,提取出字段 content = task.content source = task.source #数据表名,每天日期 today = str(datetime.datetime.now()).split(' ')[0].replace('-','') #table_name = today table_name = '' #任务开始时间 stime = time.time() update_time = time.strftime('%Y-%m-%dT%H:%M:%S',time.localtime(time.time())) logger.info('ceairFlight: start a new task @ %s'%str(update_time)) #初始化参数 para = [] error = ERR_CODE #如果失败,重复抓取的次数 for i in range(2): #返回值是一个dict,{'para':[(),()],'error':0} result = ceair_task_parser(content) try: para = result['para'] error = result['error'] ticket = para['ticket'] flight = para['flight'] except Exception,e: logger.error('ceairFlight error: Wrong Result Format %s'%str(e)) return error if ticket == None or ticket == []: logger.info('ceairFlight: task failed with %s for %sth time'%(content,i)) time.sleep(random.randint(1,2)) continue else: try: InsertFlight(ticket) etime = time.time()#任务完成时间 dur = int((etime - stime) * 1000) logger.info('ceairFlight: task finish with %s using %d ms' %(content, dur)) return error except Exception,e: logger.error('ceairFlight: Insertation Error: %s'%str(e)) error = DB_ERR_CODE return error
def parseRoom(content, check_in, check_out, hotel_id): room_info = [] room = Room() content = content.replace('\n', '') content = replace_char(content) if content == '' or len(content) < CONTENT_LEN: return room_info try: each_type_room_list = each_room_type_pat.findall(content) if len(each_type_room_list) == 0: return room_info except Exception,e: logger.error('Cannot parse this hotel with error: ' + str(e)) return room_info
def ExecuteSQLs(sql, args = None): ''' 执行多条SQL语句, 正常执行返回影响的行数,出错返回Flase ''' ret = 0 try: conn = GetConnection() cur = conn.cursor() ret = cur.executemany(sql, args) conn.commit() cur.close() cur.close() except MySQLdb.Error, e: logger.error("ExecuteSQLs error: %s" %str(e)) return False
def QueryBySQL(sql, args = None, size = None): ''' 通过sql查询数据库,正常返回查询结果,否则返回None ''' results = [] try: conn = GetConnection() cur = conn.cursor(cursorclass = DictCursor) cur.execute(sql, args) rs = cur.fetchall() for row in rs : results.append(row) except MySQLdb.Error, e: logger.error("QueryBySQL error: %s" %str(e)) return None
def youzhan_request_parser(content): #content = hotelid|city_cn_name|roominfo(room_type)|date|source #example = 107634|根特|双人间|20140419-20140520|youzhan::Booking.com content = content.strip().encode('utf-8') try: infos = content.split('|') hotel_id = infos[0] city_name = infos[1] ipathid = cities_dict[city_name.encode('utf-8')] room_type = infos[2] checkin_date = infos[3].split('-')[0]#format:2014-05-05 checkout_date = infos[3].split('-')[1]#format:2014-05-06 real_source = infos[4].split('::')[-1] except Exception,e: logger.error('youzhanHotel: Wrong Content Format with %s'%content) return -1
def get_url(dept_city, dest_city, dept_date, dept_id, dest_id, proxy = None): parser_url = '' url_temp = 'http://www.ly.com/iflight/flightinterajax.aspx?action=SEARCHURL&airplaneInternatType=1&iOrgPort=' + dept_city + '&iArvPort=' + dest_city + '&idtGoDate=' + dept_date + '&idtBackDate=时间/日期&sel_inCabinType=Y&sel_inPassengersType=1&sel_inAdult=1&sel_inChild=0&iOrgPortMult=城市名&iArvPortMult=城市名&idtGoDateMult=时间/日期&iOrgPortMult=城市名&iArvPortMult=城市名&idtGoDateMult=时间/日期&iOrgPortMult=城市名&iArvPortMult=城市名&idtGoDateMult=时间/日期&iOrgPortMult=城市名&iArvPortMult=城市名&idtGoDateMult=时间/日期&iOrgPortMult=城市名&iArvPortMult=城市名&idtGoDateMult=时间/日期&iOrgPortMult=城市名&iArvPortMult=城市名&idtGoDateMult=时间/日期&callback=tc10805565235' page1 = crawl_single_page(url_temp, proxy=proxy, n=1, Accept='text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8') try: num01 = page1.find('(') num02 = page1.rfind(')') json_content_temp = page1[num01+1:num02] json_temp1 = json.loads(json_content_temp) if json_temp1['state'] == 100: url_temp1 = json_temp1['href'] else: return parser_url except Exception,e: logger.error('Can not get url temp 1!') return parser_url
def booking_room_task_parser(taskcontent): result = {} result['para'] = None result['error'] = 0 try: hotel_id,url_hotel_name,check_in_temp = taskcontent.strip().split('&')[0], \ taskcontent.strip().split('&')[1], taskcontent.strip().split('&')[2] check_in = check_in_temp[:4] + '-' + check_in_temp[4:6] + '-' + check_in_temp[6:] check_out_temp = datetime.datetime(int(check_in_temp[:4]), int(check_in_temp[4:6]), int(check_in_temp[6:])) check_out = str(check_out_temp + datetime.timedelta(days=1))[:10] except Exception,e: logger.error('bookingHotel: Wrong Content Format with %s'%taskcontent) result['error'] = TASK_ERROR return result
def wego_task_parser(content): #初始化参数 result = {} result['para'] = None result['error'] = 0 #解析字符串 content = content.encode('UTF-8').strip() try: info = content.split('&') dept_id = info[0] arr_id = info[1] dept_date = info[2][:4] + '-' + info[2][4:6] + '-' + info[2][6:] except Exception,e: logger.error('wegoFlight Content Error: cannot extract information from %s'%content) result['error'] = TASK_ERROR return result
def jijitong_task_parser(taskcontent): result = {} result['para'] = {'ticket':[], 'flight':{}} result['error'] = 0 taskcontent.encode('utf-8') try: dept_city_zh,dept_city_en,dest_city_zh,dest_city_en,dept_day_temp = \ taskcontent.strip().split('&')[0], \ taskcontent.strip().split('&')[1], \ taskcontent.strip().split('&')[2], \ taskcontent.strip().split('&')[3], \ taskcontent.strip().split('&')[4] dept_day = dept_day_temp[:4] + '-' + dept_day_temp[4:6] + '-' + dept_day_temp[6:] except Exception,e: logger.error('jijitongFlight:Wrong Content Format with %s'%taskcontent) result['error'] = TASK_ERROR return result
def parse(self,task): #解析test,提取出字段 content = task.content source = task.source #每天日期 today = str(datetime.datetime.now()).split(' ')[0].replace('-','') table_name = ''#today #任务开始时间 stime = time.time() update_time = time.strftime('%Y-%m-%dT%H:%M:%S',time.localtime(time.time())) logger.info ('elongHotelHotel: start a new task @ %s'%str(update_time)) para = [] error = ERR_CODE #如果失败,重复抓取的次数,当前为1 for i in range(2): result = elong_room_task_parser(content) try: para = result['para'] error = result['error'] except Exception,e: return error if para == None or para == []: logger.error ('elongHotelHotel: task failed with %s for %sth time'%(content,i)) time.sleep(random.randint(1,2)) continue else: try: InsertHotel_room(para) etime = time.time()#任务完成时间 dur = int((etime - stime) * 1000) logger.info("elongHotelHotel: task %s finish using %d ms"%(content, dur)) return error except Exception,e: logger.error('elongHotelHotel: Insertation Error: %s'%str(e)) error = DB_ERR_CODE return error