def parse_stop_route(self): stop_id = self.id type = self.get_node_text(self.root, "//div[@class='transportType']/h3").lower() # get route info in this stop route_nodes = self.root.xpath("//div[@class='lineInformationInner']/div/ul/li") for node in route_nodes: route_name = self.get_node_text(node, 'a') route_id = self.get_node(node, 'a').get('href').split('/')[-1] db.update_table('route', 'id', route_id, name=route_name, type=type) title=link=line_id=None for ttn in node.xpath('ul'): title = self.get_node_text(ttn, 'li') link = self.get_node(ttn, 'li/ul/li/a').get('href') groups = self.timetable_url_pattern.findall(link) if (len(groups) > 0): line_id = groups[0][0] tt_id = utils.get_timetable_id(route_id, stop_id, line_id) print " ", "parsed timetable '%s' %s" % (title, tt_id) db.update_table('timetable_index', 'id', tt_id, route_id=route_id, stop_id=stop_id, line_id=line_id, title=title, link=link)
def parse_timetable(self): tt_node = self.get_node(self.root, "//div[@class='timetablesInner']/ul") li_nodes = tt_node.xpath(".//li") for node in li_nodes: title = self.get_node_text(node, "./a") link = self.get_node(node, "./a").get('href') if link=="": continue line_id = self.timetable_lineid_pattern.findall(link)[0] tt_id = utils.get_timetable_id(route_id=self.id, line_id=line_id) print " ", "parsed timetable '%s' %s" % (title, tt_id) db.update_table('timetable_index', 'id', tt_id, route_id=self.id, line_id=line_id, link=link, title=title)