def update_price_code(cls): merge_codes = DataService.select_merge_codes() for item in merge_codes: train_code = item[0] merge_code = item[1] log.info('update price code ,%s,%s'% (train_code,merge_code)) DataService.update_price_code(merge_code,train_code)
def parse_content(self, content, link_job): content = content[content.index('(') + 1:content.rindex(')')] json_obj = json.loads(content) trains = json_obj['data']['trains'] for train in trains: try: train_code = train['trainNum'] start_station = train['fromCity'] end_station = train['toCity'] origin = train['beginPlace'] terminal = train['endPlace'] depart_time = train['fromTime'] arrive_time = train['toTime'] duration = int(train['usedTimeInt']) * 60 note = train['note'] A1 = self.get_price('hardseat', train['ticketState']) A2 = self.get_price('softseat', train['ticketState']) A3 = self.get_price('hardsleepermid', train['ticketState']) A4 = self.get_price('softsleeperdown', train['ticketState']) A6 = self.get_price('advancedsoftsleeper', train['ticketState']) A9 = self.get_price('businessseat', train['ticketState']) O = self.get_price('secondseat', train['ticketState']) M = self.get_price('firstseat', train['ticketState']) P = self.get_price('specialseat', train['ticketState']) sequence = 0 days = 0 stayTime = 0 grade = '' state = 0 train_no = '' exist = DataService.check_traincode_exist(train_code) # is_correct 0:错误信息 1:正确 is_correct = 1 if len(train['ticketState']) == 0: is_correct = 0 # state 0:正常 1:未收录此车次 2:收录此车次但是此站点已经取消 if not exist: state = 1 else: station_s = DataService.find_station(train_code, start_station) station_e = DataService.find_station(train_code, end_station) if len(station_e) != 0 and len(station_s) != 0: days_s = int(station_s[0][6]) if self.compareSS(station_s[0][4], station_s[0][8]) < 0: days_s += 1 sequence = station_e[0][3] days = station_e[0][6] - days_s stayTime = station_e[0][5] grade = station_e[0][9] train_no = station_e[0][10] else: state = 2 price = Price(train_code, end_station, start_station, depart_time, arrive_time, duration, A1, A2, A3, A4, O, M, A6, A9, grade, days, P, origin, terminal, sequence, train_no, stayTime, is_correct, state, note) DataService.save_train_price(price) except: t, v, tb = sys.exc_info() log.error("%s,%s,%s" % (t, v, traceback.format_tb(tb)))
def update_price_code(cls): merge_codes = DataService.select_merge_codes() for item in merge_codes: train_code = item[0] merge_code = item[1] log.info('update price code ,%s,%s' % (train_code, merge_code)) try: DataService.update_price_code(merge_code, train_code) except: t, v, tb = sys.exc_info() log.error("%s,%s,%s" % (t, v, traceback.format_tb(tb)))
def parse_content(self, content, link_job): json_obj = json.loads(content) list = json_obj['list'] for shop in list: try: category_id = shop['categoryId'] city_id = shop['cityId'] shop_id = shop['id'] url = self.shop_url_pattern % shop_id DataService.save_dp_shop(url, shop_id, city_id, category_id) except: t, v, tb = sys.exc_info() log.error("%s,%s,%s" % (t, v, traceback.format_tb(tb)))
def add_merge_train_code(cls): merge_codes = DataService.select_merge_codes() for item in merge_codes: train_code = item[0] merge_code = item[1] # exist = DataService.check_traincode_exist(merge_code) # if exist: # continue stops = DataService.get_stops(train_code) log.info("insert merge_train_stop:%s" % merge_code) for stop in stops: train_stop = TrainStop(merge_code,stop[2],stop[3],stop[4],stop[5],stop[6],stop[7],stop[8],stop[9],stop[10]) DataService.save_train_stop(train_stop)
def start_merge(cls): code_dict = {} rmv_zero = r'^(0+)' stop_no = DataService.get_stop_no() for stop in stop_no: train_code = stop[0] train_no = stop[1] substring_str = re.sub(rmv_zero, "", train_no[2:len(train_no) - 2]) DataService.insert_merge_stop(train_code, train_no, substring_str) log.info("insert:%s,%s,%s" % (train_code, train_no, substring_str)) if substring_str in code_dict: temp_code = code_dict[substring_str] temp_code = "%s/%s" % (temp_code, train_code) code_dict[substring_str] = temp_code else: code_dict[substring_str] = train_code for sub_str in code_dict: DataService.update_merge_stop(code_dict[sub_str], sub_str)
def __check_proxy_queue(self): proxy_jobs = DataService.select_proxy() DataService.update_proxys() for proxy in proxy_jobs: proxy_job = ProxyJob(proxy[1], proxy[2]) self.crawler.proxy_queue.put_checked_proxy(proxy_job)