def schedule(destination, now): now = int(now) - 1 later = int(now) + 1 day = time.strftime("%d", time.localtime()) month = time.strftime("%m", time.localtime()) year = time.strftime("%Y", time.localtime()) # pl catalunya:78805, mataro:79500, sants:71801, sant adria:79403, (horariDesde, horariFins + nomes hora) values = {'day': day, 'month': month, 'year': year, 'sourceCode': '79500', 'destinationCode': destination, 'fromtime': now, 'totime': later} data = urlencode(values) url = "http://www14.gencat.cat/mobi_rodalies/AppJava/pages/horaris/ResultatCerca.htm" page = get_page(url, data) p = WebParser() p.feed(page) timetables = p.dom.get_element_by_id('timetablesTable') schedules = timetables.get_elements_by_tag('li') time_table = {} for schedule_item in schedules: item_id = schedule_item.get_id() departure = schedule_item.get_elements_by_class('departureTime') arrival = schedule_item.get_elements_by_class('arrivalTime') triptime = schedule_item.get_element_by_id('tripTimeText') if departure != [] or arrival != []: time_table[item_id] = {'departureTime': departure[0].get_text(), 'arrivalTime': arrival[0].get_text(), 'tripTime': triptime.get_text()} return time_table
def collect_links_and_data(self, page_url): # Fixes ssl issue for some mac users if (not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)): ssl._create_default_https_context = ssl._create_unverified_context try: html_string = "" response = urlopen(page_url) if "text/html" in response.getheader( "Content-Type"): # Check to see if HTML response html_bytes = response.read() # Read the bytestream in response html_string = html_bytes.decode( "utf-8") # Decode bytestream as utf-8 parser = WebParser( self.base_url ) # Initialise custom webparser with html response parser.feed(html_string) # Execute parser self.data_list = parser.get_data_with_tags( ) # Retrieve datalist from parser except Exception as e: print("Error: " + str(e)) print("Program will terminate") sys.exit() return parser.get_page_urls()
def stations(): values = {'lineId': 'R1'} data = urlencode(values) url = "http://www14.gencat.cat/mobi_rodalies/AppJava/pages/linies/Detall.htm" page = get_page(url, data) p = WebParser() p.feed(page) station_list = p.dom.get_elements_by_class('RodaliesList')[0] station_list = station_list.get_elements_by_tag('li') result = {} for sl in station_list: station_id = sl.get_id()[9:] station_name_elem = sl.get_elements_by_class('stationName')[0] station_name_elem = station_name_elem.get_elements_by_tag('xml-fragment')[0] station_name = station_name_elem.get_text() result[station_id] = station_name return result
def collect_links_and_data(self, page_url): try: html_string = "" response = urlopen(page_url) if "text/html" in response.getheader( "Content-Type"): # Check to see if HTML response html_bytes = response.read() # Read the bytestream in response html_string = html_bytes.decode( "utf-8") # Decode bytestream as utf-8 parser = WebParser( self.base_url ) # Initialise custom webparser with html response parser.feed(html_string) # Execute parser self.data_list = parser.get_data_with_tags( ) # Retrieve datalist from parser except Exception as e: print("Error: " + str(e)) print("Program will terminate") sys.exit() return parser.get_page_urls()