def do_refresh_line(self, line): return self.do_refresh_line_new(line) result_info = { "result_msg": "", "update_attrs": {}, } detail_url = "http://www.changtu.com/chepiao/queryOneSch.htm" params = dict( id=line.extra_info["id"], stationMapId=line.extra_info["stationMapId"], getModel=line.extra_info["getModel"], ticketType=line.extra_info["ticketTypeStr"].split(",")[0], schSource=0) headers = {"User-Agent": random.choice(BROWSER_USER_AGENT)} rebot = ChangtuWebRebot.get_one() try: r = rebot.http_get("%s?%s" % (detail_url, urllib.urlencode(params)), headers=headers) except: result_info.update(result_msg="exception_ok", update_attrs={ "left_tickets": 2, "refresh_datetime": dte.now() }) line_log.info("%s\n%s", "".join(traceback.format_exc()), locals()) return result_info res = r.json() if res["flag"] != "true": result_info.update(result_msg="flag is false", update_attrs={"left_tickets": 0}) return result_info result_msg = "ok" full_price = float(res["ticketMoney"]) left_tickets = int(res["seatAmount"]) confim_url = "http://www.changtu.com/chepiao/confirmSch.htm" sta_city_id, s_pinyin = line.s_city_id.split("|") params = dict( id=line.extra_info["id"], stationMapId=line.extra_info["stationMapId"], schSource=0, drvDate=line.drv_date, cityId=sta_city_id, ) r = rebot.http_get("%s?%s" % (confim_url, urllib.urlencode(params)), headers=headers) res = r.json() if res["flag"] != "true": result_msg = res["msg"] result_info.update(result_msg=result_msg, update_attrs={ "full_price": full_price, "left_tickets": left_tickets, }) return result_info
def close_line(self, line, reason=""): """ 关闭线路 """ if not hasattr(line, "line_id"): return line_log.info("[close] line:%s %s, reason:%s", line.crawl_source, line.line_id, reason) now = dte.now() line.modify(left_tickets=0, update_datetime=now, refresh_datetime=now)
def refresh_line(self, line, force=False): """ 线路信息刷新主流程, 不用子类重写 """ line_log.info("[refresh-start] city: %s, line:%s %s, left_tickets:%s ", line.s_city_name, line.crawl_source, line.line_id, line.left_tickets) if not self.valid_line(line): line.modify(left_tickets=0, refresh_datetime=dte.now()) line_log.info("[refresh-result] line:%s %s, invalid line", line.crawl_source, line.line_id) return if not self.need_refresh_line(line, force=force): line_log.info("[refresh-result] line:%s %s, not need refresh", line.crawl_source, line.line_id) return ret = self.do_refresh_line(line) update = ret["update_attrs"] now = dte.now() if update: if "refresh_datetime" not in update: update["refresh_datetime"] = now if update.get("left_tickets", 10) < 2: # 少于2张票视为无票 update["left_tickets"] = 0 line.modify(**update) line_log.info("[refresh-result] line:%s %s, result: %s, update: %s", line.crawl_source, line.line_id, ret["result_msg"], str(update))
def do_refresh_line_by_app(self, line): result_info = { "result_msg": "", "update_attrs": {}, } line_url = "http://www.vchepiao.cn/mb/base/bus/queryBusSKY" params = { "fromCity": "深圳", "stationCode": line.s_sta_id, "dstNode": line.d_city_name, "schDate": line.drv_date.replace('-', '') } headers = { "User-Agent": random.choice(BROWSER_USER_AGENT), "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "X-Requested-With": "XMLHttpRequest", "accept": "application/json" } now = dte.now() try: r = requests.post(line_url, data=urllib.urlencode(params), headers=headers) res = r.json() except: result_info.update(result_msg="exception_ok", update_attrs={ "left_tickets": 5, "refresh_datetime": now }) line_log.info("%s\n%s", "".join(traceback.format_exc()), locals()) return result_info if not res["success"]: result_info.update(result_msg="error response", update_attrs={ "left_tickets": 0, "refresh_datetime": now }) return result_info update_attrs = {} for d in res["data"]["list"]: if d['SchStat'] == '1': drv_datetime = dte.strptime( "%s %s" % (d["SchDate"], d["orderbytime"]), "%Y-%m-%d %H:%M") line_id_args = { "s_city_name": line.s_city_name, "d_city_name": line.d_city_name, "s_sta_name": d["SchWaitStName"], "d_sta_name": d["SchNodeName"], "crawl_source": line.crawl_source, "drv_datetime": drv_datetime, } line_id = md5( "%(s_city_name)s-%(d_city_name)s-%(drv_datetime)s-%(s_sta_name)s-%(d_sta_name)s-%(crawl_source)s" % line_id_args) try: obj = Line.objects.get(line_id=line_id) except Line.DoesNotExist: continue info = { "full_price": float(d["SchStdPrice"]), "fee": 0, "left_tickets": int(d["SchSeatCount"]), "refresh_datetime": now, "extra_info": { "raw_info": d }, } if line_id == line.line_id: update_attrs = info else: if obj.left_tickets == 0: continue obj.update(**info) if not update_attrs: result_info.update(result_msg="no line info", update_attrs={ "left_tickets": 0, "refresh_datetime": now }) else: result_info.update(result_msg="ok", update_attrs=update_attrs) return result_info
def do_refresh_line(self, line): rebot = SzkyWebRebot.get_one() headers = {"User-Agent": rebot.user_agent} cookies = {} for i in range(20): try: res = rebot.query_code(headers) except: continue if res.get('status', '') == 0: cookies = res.get('cookies') valid_code = res.get('valid_code') break result_info = { "result_msg": "", "update_attrs": {}, } now = dte.now() if cookies: data = { "DstNode": line.d_sta_name, "OpAddress": "-1", "OpStation": "-1", "OperMode": '', "SchCode": '', "SchDate": line.drv_date, "SchTime": '', 'SeatType': '', 'StartStation': line.s_sta_id, 'WaitStationCode': line.extra_info['raw_info']['SchWaitStCode'], 'cmd': "MQCenterGetClass", 'txtImgCode': valid_code, } line_url = 'http://211.162.125.225/UserData/MQCenterSale.aspx' headers.update({ "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "Referer": "http://211.162.125.225/User/Default.aspx", "X-Requested-With": "XMLHttpRequest", }) try: r = rebot.http_post(line_url, data=urllib.urlencode(data), headers=headers, cookies=cookies) res = json.loads(trans_js_str(r.content)) except: result_info.update(result_msg="exception_ok", update_attrs={ "left_tickets": 5, "refresh_datetime": now }) line_log.info("%s\n%s", "".join(traceback.format_exc()), locals()) return result_info else: return self.do_refresh_line_by_app(line) update_attrs = {} try: for d in res["data"]: if d['SchStat'] == '1': drv_datetime = dte.strptime( "%s %s" % (d["SchDate"], d["orderbytime"]), "%Y-%m-%d %H:%M") line_id_args = { "s_city_name": line.s_city_name, "d_city_name": line.d_city_name, "s_sta_name": d["SchWaitStName"], "d_sta_name": d["SchNodeName"], "crawl_source": line.crawl_source, "drv_datetime": drv_datetime, } line_id = md5( "%(s_city_name)s-%(d_city_name)s-%(drv_datetime)s-%(s_sta_name)s-%(d_sta_name)s-%(crawl_source)s" % line_id_args) try: obj = Line.objects.get(line_id=line_id) except Line.DoesNotExist: continue info = { "full_price": float(d["SchStdPrice"]), "fee": 0, "left_tickets": int(d["SchTicketCount"]), "refresh_datetime": now, "extra_info": { "raw_info": d }, } if line_id == line.line_id: update_attrs = info else: if obj.left_tickets == 0: continue obj.update(**info) except: result_info.update(result_msg="exception_ok", update_attrs={ "left_tickets": 5, "refresh_datetime": now }) line_log.info("%s\n%s", "".join(traceback.format_exc()), locals()) return result_info if not update_attrs: result_info.update(result_msg="no line info", update_attrs={ "left_tickets": 0, "refresh_datetime": now }) else: result_info.update(result_msg="ok", update_attrs=update_attrs) return result_info
def do_refresh_line_new(self, line): result_info = { "result_msg": "", "update_attrs": {}, } now = dte.now() line_url = "http://www.changtu.com/chepiao/newQuerySchList.htm" sta_city_id, s_pinyin = line.s_city_id.split("|") d_end_type, d_pinyin, end_city_id = line.d_city_id.split("|") params = dict( endTypeId=d_end_type, endId=end_city_id, planDate=line.drv_date, startCityUrl=s_pinyin, endCityUrl=d_pinyin, querySch=0, startCityId=sta_city_id, endCityId=end_city_id, ) url = "%s?%s" % (line_url, urllib.urlencode(params)) headers = {"User-Agent": random.choice(BROWSER_USER_AGENT)} rebot = ChangtuWebRebot.get_one() try: rebot = ChangtuWebRebot.get_one() r = rebot.http_get(url, headers=headers) res = r.json() except: result_info.update(result_msg="exception_ok", update_attrs={ "left_tickets": 2, "refresh_datetime": dte.now() }) line_log.info("%s\n%s", "".join(traceback.format_exc()), locals()) return result_info update_attrs = {} for d in res["schList"]: if int(d["bookFlag"]) != 2: continue drv_datetime = dte.strptime("%s %s" % (d["drvDate"], d["drvTime"]), "%Y-%m-%d %H:%M") line_id_args = { "s_city_name": line.s_city_name, "d_city_name": line.d_city_name, "s_sta_name": d["localCarrayStaName"], "d_sta_name": d["stopName"], "crawl_source": line.crawl_source, "drv_datetime": drv_datetime, } line_id = md5( "%(s_city_name)s-%(d_city_name)s-%(drv_datetime)s-%(s_sta_name)s-%(d_sta_name)s-%(crawl_source)s" % line_id_args) try: obj = Line.objects.get(line_id=line_id) except Line.DoesNotExist: continue extra_info = { "id": d["id"], "getModel": d["getModel"], "ticketTypeStr": d["ticketTypeStr"], "stationMapId": d["stationMapId"] } info = { "full_price": float(d["fullPrice"]), "fee": 0, "left_tickets": int(d["seatAmount"]), "refresh_datetime": now, "extra_info": extra_info, } if line_id == line.line_id: update_attrs = info else: obj.update(**info) if not update_attrs: result_info.update(result_msg="no line info", update_attrs={ "left_tickets": 0, "refresh_datetime": now }) else: result_info.update(result_msg="ok", update_attrs=update_attrs) return result_info
def query_line_detail(): """ 查询线路详细信息, 此接口会从源网站拿最新数据。 Input: { "line_id": "1111" # 线路ID } """ req_data = request.get_data() post = json.loads(req_data) rds = get_redis("line") today_str = dte.now().strftime("%Y-%m-%d") rds.incr(RK_DAY_LINE_STAT % (today_str, "total"), 1) try: line = Line.objects.get(line_id=post["line_id"]) except Line.DoesNotExist: line_log.info("[fail-detail] 线路不存在 %s", post["line_id"]) return jsonify({"code": RET_LINE_404, "message": "线路不存在", "data": ""}) open_city = line.get_open_city() if not open_city or not open_city.is_active: line_log.info("[fail-detail] 未找到opencity或未打开 %s", line.line_id) return jsonify({ "code": RET_CITY_NOT_OPEN, "message": "%s is not open" % line.s_city_name, "data": "" }) open_station = open_city.get_open_station(line.s_sta_name) if not open_station: line_log.info("[fail-detail] 未找到openstation %s %s %s", line.line_id, line.s_city_name, line.s_sta_name) return jsonify({ "code": RET_CITY_NOT_OPEN, "message": "%s is not open" % line.s_sta_name, "data": "" }) if open_station.close_status & STATION_CLOSE_YZCX: data = line.get_json() data["left_tickets"] = 0 line_log.info("[fail-detail] 此站禁止余票查询%s %s, %s", line.line_id, line.s_city_name, line.s_sta_name) return jsonify({ "code": RET_OK, "message": "%s 余票查询已关闭" % line.s_sta_name, "data": data }) now_time = dte.now().strftime("%H:%M") if now_time > open_station.end_time or now_time < open_station.open_time: data = line.get_json() data["left_tickets"] = 0 line_log.info("[fail-detail] 售票时间不对%s %s, %s", line.line_id, line.s_city_name, line.s_sta_name) return jsonify({ "code": RET_OK, "message": "售票时间是%s~%s" % (open_station.open_time, open_station.end_time), "data": data }) flow, new_line = get_compatible_flow(line) if not flow: data = line.get_json() data["left_tickets"] = 0 line_log.info("[fail-detail] 未找到flow %s %s, %s", line.line_id, line.s_city_name, line.s_sta_name) return jsonify({"code": RET_OK, "message": "没找到对应flow", "data": data}) flow.refresh_line(new_line) data = new_line.get_json() data["line_id"] = line.line_id if data["left_tickets"] > 0: rds.incr(RK_DAY_LINE_STAT % (today_str, "succ"), 1) return jsonify({"code": RET_OK, "message": "OK", "data": data})
def do_refresh_line(self, line): result_info = { "result_msg": "", "update_attrs": {}, } rebot = None for i in Bus100Rebot.objects.filter( is_active=True).order_by('-last_login_time')[0:5]: if i.test_login_status(): rebot = i break if not rebot: rebot = Bus100Rebot.get_random_rebot() data = { "loginType": 0, "backUrl": '', "mobile": rebot.telephone, "password": rebot.password, "validateCode": '1234' } r = requests.post("http://84100.com/doLogin/ajax", data=data) if r.json().get('flag', '') == '0': ua = rebot.user_agent if not ua: ua = random.choice(BROWSER_USER_AGENT) rebot.modify(cookies=dict(r.cookies), is_active=True, last_login_time=dte.now(), user_agent=ua) if not rebot.test_login_status(): return result_info else: return result_info now = dte.now() line = Line.objects.get(line_id=line.line_id) if line.shift_id == "0" or not line.extra_info.get('flag', 0): line_log.info("[refresh-result] no left_tickets line:%s %s ", line.crawl_source, line.line_id) result_info.update(result_msg="ok", update_attrs={ "left_tickets": 0, "refresh_datetime": now }) return result_info rebot.recrawl_shiftid(line) line.reload() url = "http://www.84100.com/getTrainInfo/ajax" payload = { "shiftId": line.shift_id, "startId": line.s_sta_id, "startName": line.s_sta_name, "ttsId": '' } trainInfo = requests.post(url, data=payload, cookies=rebot.cookies) if trainInfo.status_code == 404: line_log.info("[refresh-result] request 404 line:%s,%s %s ", line.crawl_source, line.s_city_name, line.line_id) result_info.update(result_msg="ok", update_attrs={ "left_tickets": 0, "refresh_datetime": now }) else: trainInfo = trainInfo.json() if str(trainInfo['flag']) == '0': sel = etree.HTML(trainInfo['msg']) full_price = sel.xpath( '//div[@class="order_detail"]/div[@class="left"]/p[@class="price"]/em/text()' ) if full_price: full_price = float(full_price[0]) try: ticket_info = sel.xpath( '//div[@class="order_detail"]/div[@class="right"]/p[3]/a/text()' )[0] p = re.compile(r'\d+') left_ticketObj = p.findall(ticket_info) left_tickets = 0 if left_ticketObj: left_tickets = int(left_ticketObj[0]) if int(trainInfo['maxSellNum']) < 3: left_tickets = 0 except Exception, e: line_log.info( "[refresh-result] request error line:%s,%s %s,%s ", line.crawl_source, line.s_city_name, line.line_id, e) left_tickets = 0 result_info.update(result_msg="ok", update_attrs={ "left_tickets": left_tickets, "refresh_datetime": now, 'full_price': full_price }) elif str(trainInfo['flag']) == '1': line_log.info( "[refresh-result] no left_tickets line:%s,%s %s,result:%s ", line.crawl_source, line.s_city_name, line.line_id, trainInfo) result_info.update(result_msg="ok", update_attrs={ "left_tickets": 0, "refresh_datetime": now })
def do_refresh_line(self, line): result_info = { "result_msg": "", "update_attrs": {}, } line_url = "http://www.96096kp.cn/UserData/MQCenterSale.aspx" params = { "StartStation": line.s_city_name, "WaitStationCode": "", "OpStation": -1, "OpAddress": -1, "SchDate": line.drv_date, "DstNode": line.d_city_name, "SeatType": "", "SchTime": "", "OperMode": "", "SchCode": "", "txtImgCode": "", "cmd": "MQCenterGetClass", "isCheck": "false", } headers = { "User-Agent": random.choice(BROWSER_USER_AGENT), "Referer": "http://www.96096kp.cn", "Origin": "http://www.96096kp.cn", "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", } rebot = CqkyWebRebot.get_one() now = dte.now() try: r = rebot.http_post(line_url, data=urllib.urlencode(params), headers=headers) content = r.content for k in set(re.findall("([A-Za-z]+):", content)): content = re.sub(r"\b%s\b" % k, '"%s"' % k, content) res = json.loads(content) except: result_info.update(result_msg="exception_ok", update_attrs={ "left_tickets": 5, "refresh_datetime": now }) line_log.info("%s\n%s", "".join(traceback.format_exc()), locals()) return result_info if res["success"] != "true": result_info.update(result_msg="error response", update_attrs={ "left_tickets": 0, "refresh_datetime": now }) return result_info update_attrs = {} for d in res["data"]: drv_datetime = dte.strptime("%s %s" % (d["SchDate"], d["SchTime"]), "%Y-%m-%d %H:%M") line_id_args = { "s_city_name": line.s_city_name, "d_city_name": line.d_city_name, "s_sta_name": d["SchStationName"], "d_sta_name": d["SchDstNodeName"], "crawl_source": line.crawl_source, "drv_datetime": drv_datetime, } line_id = md5( "%(s_city_name)s-%(d_city_name)s-%(drv_datetime)s-%(s_sta_name)s-%(d_sta_name)s-%(crawl_source)s" % line_id_args) try: obj = Line.objects.get(line_id=line_id) except Line.DoesNotExist: continue left = int(d["SchTicketCount"]) if left <= 1: left = 0 info = { "full_price": float(d["SchPrice"]), "fee": 0, "left_tickets": left, "refresh_datetime": now, "extra_info": { "raw_info": d }, } if line_id == line.line_id: update_attrs = info else: obj.update(**info) if not update_attrs: result_info.update(result_msg="no line info", update_attrs={ "left_tickets": 0, "refresh_datetime": now }) else: result_info.update(result_msg="ok", update_attrs=update_attrs) return result_info
def do_refresh_line(self, line): result_info = { "result_msg": "", "update_attrs": {}, } rebot = XinTuYunWebRebot.get_one() if not rebot.test_login_status(): rebot.login() now = dte.now() if line.shift_id == "0" or not line.extra_info.get('flag', 0): line_log.info("[refresh-result] no left_tickets line:%s %s ", line.crawl_source, line.line_id) result_info.update(result_msg="no left_tickets line", update_attrs={"left_tickets": 0, "refresh_datetime": now}) return result_info is_exist = False try: is_exist = rebot.recrawl_shiftid(line) except: result_info.update(result_msg="no line info", update_attrs={"left_tickets": 0, "refresh_datetime": now}) return result_info line.reload() if not is_exist: line.modify(left_tickets=0) result_info.update(result_msg="ok", update_attrs={"left_tickets": line.left_tickets, "refresh_datetime": now,'full_price':line.full_price}) return result_info url = "http://www.xintuyun.cn/getTrainInfo/ajax" payload = { "shiftId": line.shift_id, "startId": line.s_sta_id, "startName": line.s_sta_name, "ttsId": '' } headers = rebot.http_header() cookies = json.loads(rebot.cookies) try: trainInfo = rebot.http_post(url, data=payload, headers=headers,cookies=cookies,timeout=20) except: result_info.update(result_msg="xintuyun timeout", update_attrs={"left_tickets": line.left_tickets, "refresh_datetime": now}) return result_info if trainInfo.status_code == 404: line_log.info("[refresh-result] request 404 line:%s,%s %s ", line.crawl_source,line.s_city_name, line.line_id) result_info.update(result_msg="request 404", update_attrs={"left_tickets": 0, "refresh_datetime": now}) else: trainInfo = trainInfo.json() if str(trainInfo['flag']) == '0': content = trainInfo['msg'] if not isinstance(content, unicode): content = content.decode('utf-8') sel = etree.HTML(content) full_price = sel.xpath('//div[@class="order_detail"]/div[@class="left"]/p[@class="price"]/em/text()') if full_price: full_price = float(full_price[0]) try: ticket_info = sel.xpath('//div[@class="order_detail"]/div[@class="right"]/p[3]/a/text()')[0] p = re.compile(r'\d+') left_ticketObj = p.findall(ticket_info) left_tickets = 0 if left_ticketObj: left_tickets = int(left_ticketObj[0]) if int(trainInfo['maxSellNum']) < 3: left_tickets = 0 except Exception, e: line_log.info("[refresh-result] request error line:%s,%s %s,%s ", line.crawl_source,line.s_city_name, line.line_id,e) left_tickets = 0 result_info.update(result_msg="ok", update_attrs={"left_tickets": left_tickets, "refresh_datetime": now,'full_price':full_price}) elif str(trainInfo['flag']) == '1': line_log.info("[refresh-result] no left_tickets line:%s,%s %s,result:%s ", line.crawl_source,line.s_city_name, line.line_id,trainInfo) result_info.update(result_msg="ok", update_attrs={"left_tickets": 0, "refresh_datetime": now})