def parse_schedules_trolley(trolley_list): for trolley in trolley_list: logging.info("Parsing schedule for trolley with id: %s", trolley["id"]) print "Current trolley_id is:", trolley["id"] html = parser_utils.download_page( TransportParser.TRANSPORT_PAGE_URL, trolley["post_data"] ) schedule = ScheduleParser.parse_html(html) json_trolley_object = { 'type': 'trolley', 'id': trolley["id"], 'name': trolley["name"], 'workdays': False, 'weekend': False, 'everyday': False } # in trolley case ['workdays'] always True if trolley['workdays'] is True: # case, when trolley works only on workdays if schedule['workdays'] is True: print "Workdays only" json_trolley_object['workdays'] = True json_trolley_object['schedule_workdays'] = ( schedule['schedule_table'] ) json_trolley_object['stations_workdays'] = ( schedule['stations_list'] ) # regular case, when trolley works both on workdays and weekend else: json_trolley_object['workdays'] = True json_trolley_object['schedule_workdays'] = ( schedule['schedule_table'] ) json_trolley_object['stations_workdays'] = ( schedule['stations_list'] ) if schedule['weekend'] is True: json_trolley_object['weekend'] = True html = parser_utils.download_page( schedule['weekend_link'] ) schedule_weekend = ScheduleParser.parse_html(html) json_trolley_object['schedule_weekend'] = ( schedule_weekend['schedule_table'] ) json_trolley_object['stations_weekend'] = ( schedule_weekend['stations_list'] ) parser_utils.save_json_file( (parser_configs.directories["TROLLEYS_DIR"] + trolley["id"] + ".json"), json_trolley_object )
def parse_schedules_suburban(): html = parser_utils.download_page( SuburbanParser.SUBURBAN_TRANSPORT_PAGE_URL ) suburban_buses = SuburbanParser.parse_html(html) for bus in suburban_buses: from_city = in_dict(bus, 'from_city') if from_city: # print bus['number'] from_city = no_whitespaces(from_city) schedule_from_city = SuburbanScheduleParser.parse_schedule( from_city ) bus['schedule_from_city'] = schedule_from_city if bus['number'] == '159': print schedule_from_city to_city = in_dict(bus, 'to_city') if to_city: # print bus['number'] to_city = no_whitespaces(to_city) schedule_to_city = SuburbanScheduleParser.parse_schedule( to_city ) bus['schedule_to_city'] = schedule_to_city if bus['number'] == '159': print schedule_to_city bus_id = in_dict(bus, 'number') bus_id = no_whitespaces(bus_id) bus_id = bus_id.replace(',', '_') postfix = unidecode(bus['station']).lower().replace('/', '_') postfix = postfix.replace('.', '') postfix = postfix.replace('\'', '') postfix = postfix.replace(' ', '_') bus_id = unidecode(bus_id) + '_' + postfix bus_id = "sub_" + bus_id # print bus_id bus['id'] = bus_id parser_utils.save_json_file( (parser_configs.directories["JSON_DIR"] + "suburban_transport.json"), suburban_buses )
station_schedule_buses = [x[0] for x in station_schedule] station_schedule_times = [x[1] for x in station_schedule] station_schedule_weekend_buses = station_schedule_buses station_schedule_weekend_times = station_schedule_times station_schedule_weekend = station_schedule station_schedule = { "name": u"Балтийский банк", "schedule_workdays_buses": station_schedule_workdays_buses, "schedule_workdays_times": station_schedule_workdays_times, "schedule_weekend_buses": station_schedule_weekend_buses, "schedule_weekend_times": station_schedule_weekend_times, } save_json_file("station_balt_bank.json", station_schedule) """ DERZHAVINA""" derzhavina_bus_list = ["bus_20", "bus_4", "bus_19"] derzhavina_position = [1, 0, 0] derzhavina_bus_dict = {} for bus_id in derzhavina_bus_list: derzhavina_bus_dict[bus_id] = load_json_file(transport_dir + bus_id + ".json") station_schedule = [] for i in range(len(derzhavina_bus_list)): bus_id = derzhavina_bus_list[i] sch = derzhavina_bus_dict[bus_id]["schedule_workdays"][derzhavina_position[i]] bus_id_list = [bus_id] * len(sch) sch = zip(bus_id_list, sch)
# print station['name'] + "|" # if 'stations_weekend' in transport: # for station in transport['stations_weekend']: # print station['name'] + "|" do_replace(transport, replace) if "stations_weekend" in transport and "stations_workdays" in transport: if transport["stations_weekend"] == transport["stations_workdays"]: transport["stations"] = transport["stations_weekend"] transport.pop("stations_workdays") transport.pop("stations_weekend") print "stations is the same" print "##############################" # serialization of web version JSON parser_utils.save_json_file(filename, transport) # transpose schedules, serializating phone version JSON if "schedule_workdays" in transport: schedule = [list(i) for i in zip(*transport["schedule_workdays"])] transport["schedule_workdays"] = schedule if "schedule_weekend" in transport: schedule = [list(i) for i in zip(*transport["schedule_weekend"])] transport["schedule_weekend"] = schedule if not os.path.exists(IPHONE_DIR): os.makedirs(IPHONE_DIR) filename = IPHONE_DIR + transport["id"] + ".json" parser_utils.save_json_file(filename, transport)
logging.info("Parsing trolleys schedules started.") parse_schedules_trolley(transport["trolley_list"]) parse_schedules_suburban() buses_trolleys = transport["trolley_list"] + transport["bus_list"] regular_buses = [ elem for elem in transport["bus_list"] if elem['id'] not in MIX_LIST ] mixed_buses = [ elem for elem in transport["bus_list"] if elem['id'] in MIX_LIST ] buses = get_transport_json(regular_buses, 'bus') trolleys = get_transport_json(transport["trolley_list"], 'trolley') mixed = get_transport_json(mixed_buses, 'mixed') transport_dict = { 'buses': buses, 'trolleys': trolleys, 'mixed': mixed } parser_utils.save_json_file( directories["JSON_DIR"] + "transport.json", transport_dict )
def parse_schedules_bus(bus_list): for bus in bus_list: logging.info("Parsing schedule for bus with id: %s", bus["id"]) print "Current bus_id is:", bus["id"] html = parser_utils.download_page( TransportParser.TRANSPORT_PAGE_URL, bus["post_data"] ) schedule = ScheduleParser.parse_html(html) json_bus_object = { 'type': 'bus', 'id': bus["id"], 'name': bus["name"], 'everyday': False } if bus['id'] in MIX_LIST: type_ = 'mixed' else: type_ = 'bus' json_bus_object['type'] = type_ # Case when bus have only weekend schedule if bus['weekend'] is True: json_bus_object['schedule_weekend'] = schedule['schedule_table'] json_bus_object['stations_weekend'] = schedule['stations_list'] # Case when bus have workdays, and maybe weekend schedule else: # Case of '7a' bus (everyday) # @TODO: could be others buses like this if schedule['everyday'] is True: json_bus_object['schedule_everyday'] = ( schedule['schedule_table'] ) json_bus_object['stations'] = schedule['stations_list'] else: json_bus_object['schedule_workdays'] = ( schedule['schedule_table'] ) json_bus_object['stations_workdays'] = schedule['stations_list'] if schedule['weekend'] is True: html = parser_utils.download_page( schedule['weekend_link'] ) schedule_weekend = ScheduleParser.parse_html(html) json_bus_object['schedule_weekend'] = ( schedule_weekend['schedule_table'] ) json_bus_object['stations_weekend'] = ( schedule_weekend['stations_list'] ) if 'stations_weekend' in json_bus_object and 'stations_workdays' in json_bus_object: if json_bus_object['stations_weekend'] == json_bus_object['stations_workdays']: json_bus_object['stations'] = json_bus_object['stations_weekend'] json_bus_object.pop('stations_workdays') json_bus_object.pop('stations_weekend') parser_utils.save_json_file( parser_configs.directories["BUSES_DIR"] + bus["id"] + ".json", json_bus_object )