Exemplo n.º 1
0
def fetch_timetable(browser, link):
    # Utility method to return a nice Dataset from a timetable url
    if debug:
        puts('Fetching timetable from %s' % link)
    response = browser.follow_link(link)
    soup = BeautifulSoup(response.read())
    table = soup.find('table')
    timetable = []
    for row in table.findAll('tr'):
        title = None
        title_test = row.find('td')
        if title_test.find('span'):
            title = title_test.getText()
            values = []
            for col in row.findAll('td')[1:]:
                value = col.getText()
                if value == ' ':
                    value = None
                if isinstance(value, basestring) and ':' in value:
                    try:
                        time = value.strip().split(':')
                        time = datetime.time(*[int(s) for s in time])
                    except:
                        pass
                    else:
                        value = time
                values.append(value)
            timetable.append((title, values))

    while len(timetable):
        if 'TRAIN NO.' not in timetable[0][0]:
            del timetable[0]
        else:
            break

    train_nums = timetable[0]
    data = Dataset()
    data.headers = train_nums[1]
    if debug:
        puts(repr(data.headers))
    for place, times in timetable[1:]:
        if debug:
            puts(repr((place, times)))
        data.rpush(times, tags=[place.title().replace('`S', "'s")])

    #Strip out TRAIN NO. columns
    while 1:
        try:
            del data['TRAIN NO.']
        except:
            break

    return data