def get_abbreviation(cls, station_name): abbreviation = DataBaseUtil.select(get_abbreviation_sql % station_name) if len(abbreviation) == 0: return '' else: abbreviation = abbreviation[0][0] return abbreviation
def get_alia_by_station(cls, station_name): pinyin = DataBaseUtil.select(get_alia_station % station_name) if len(pinyin) == 0: return '' else: abbreviation = pinyin[0][0] return abbreviation
def main(): # 保存站到站 temp_trains = ['C6903','C6905','C6907','C6909','C6911','C6913','C6915','C6917','C6919','C6921','C6923','C6925','C6953','C6955','C6957','C6959','C6961','C6963','C6965','C6967','C6969','C6971','C6973','C6975','C6902','C6904','C6906','C6908','C6910','C6912','C6918','C6920','C6922','C6924','C6926','C6952','C6954','C6956','C6958','C6960','C6962','C6966','C6968','C6970','C6972','C6974','C6976','C6929','C6914','C6928','C6930','C6978','C6964','C6980','C6977','C6979','C6981'] for code in temp_trains: names = DataBaseUtil.select("select name from train_line_stop where train_code = '%s' order by sequence" % code) if len(names) > 0: key = 0 name_list = {} for n in names: name_list[key] = n[0] key = key+1 kvs = name_list.items() i = -1 for ki,vi in kvs: i += 1 j = -1 for kj,vj in kvs: j += 1 if i == j: continue start_station = vi end_station = vj try: start_py = DataService.get_alia_by_station(start_station) if start_py == '': start_py = py_util.hanzi2pinyin_split(string=start_station, split="", firstcode=False) end_py = DataService.get_alia_by_station(end_station) if end_py == '': end_py = py_util.hanzi2pinyin_split(string=end_station, split="", firstcode=False) DataService.save_s2s(start_station,start_py,end_station,end_py) except: t, v, tb = sys.exc_info()
def add_train_job(cls, crawler, min_link_size): if crawler.links_queue.qsize <= min_link_size: train_list = DataBaseUtil.select(train_code_sql) for job in train_list: cls.update_train_state(1, job[0]) for item in train_list: try: train = TrainTask(item[1], item[2], item[3], item[8].strftime('%Y-%m-%d'), item[4], item[0], item[5], item[6], item[7], item[9]) link_job = LinkJob(train) crawler.links_queue.put_link(link_job) except: cls.update_train_state(0, item[0])
def add_station_job(cls, crawler, min_link_size): if crawler.links_queue.qsize <= min_link_size: link_jobs = DataBaseUtil.select(sql_job % BATCH_ADD_LINKS_SIZE) for item_job in link_jobs: try: cls.update_task_selected(1, item_job[0]) task = StationTask(item_job[0], item_job[1], item_job[2], item_job[3], item_job[4], item_job[5], item_job[6], item_job[7], item_job[8], item_job[9]) link_job = LinkJob(task) crawler.links_queue.put_link(link_job) except: # 入队失败回收 cls.update_task_selected(0, item_job[0])
def add_dp_list_job(cls, crawler, min_link_size): if crawler.links_queue.qsize <= min_link_size: link_jobs = DataBaseUtil.select(dq_list_job % BATCH_ADD_LINKS_SIZE) # 取出数据的同时更新selected状态 for job in link_jobs: DataBaseUtil.execute(upt_dp_list_selected % (1, job[0])) for item_job in link_jobs: try: task = DPListTask(item_job[0], item_job[1], item_job[2], item_job[3], item_job[4], item_job[5]) link_job = LinkJob(task) crawler.links_queue.put_link(link_job) except: # 入队失败回收 DataBaseUtil.execute(upt_dp_list_selected % (0, item_job[0]))
def check_traincode_exist(cls, train_code): count = DataBaseUtil.select(check_train_code_exist % train_code)[0][0] if count == 0: return False else: return True
def find_station(cls, train_code, name): trains = DataBaseUtil.select(find_station_sql % (train_code, name)) return trains
def select_proxy(cls): proxys = DataBaseUtil.select(sql_proxy) return proxys
def get_train_code_list(cls): train_code_list = DataBaseUtil.select(train_code_sql) return train_code_list
def get_stops(cls, train_code): data = DataBaseUtil.select(get_stops % train_code) return data
def select_merge_codes(cls): data = DataBaseUtil.select(merge_codes) return data
def get_stop_no(cls): data = DataBaseUtil.select(sql_stop_no) return data