Exemplo n.º 1
0
def download(coords, number):
    filename = os.path.join(get_dirname(coords), '%d.html' % number)
    urlretrieve(lib.page(coords, number), filename = filename)

    if is_last(filename):
        os.rename(filename, '.last.html')
        print('When searching for (%f, %f),' % coords)
        print('I stopped at page %d. If the script worked properly, this is because' % number)
        print('page %d was the last page or there were no meetings for this search.' % (number - 1))
        print('Check .last.html to make sure.')
        return False

    else:
        print(('Downloaded (%f, %f), ' % coords) + 'page %d' % number)
        return True
Exemplo n.º 2
0
def main_json():
    import json
    url = page(*search_args())
    html = lxml.html.parse(url)
    data = table_data(html)
    print(json.dumps(data))
Exemplo n.º 3
0
    else:
        print(('Downloaded (%f, %f), ' % coords) + 'page %d' % number)
        return True

if __name__ == '__main__':
    for coords in lib.choose_coords():
        dirname = get_dirname(coords)

        # Check if the search is done, or make a directory
        if os.path.isdir(dirname):
            if os.path.isfile(os.path.join(dirname, 'done')):
                continue
        else:
            os.makedirs(dirname)

        # Skip pages that are already finished.
        pages_so_far = set(os.listdir(dirname))
        number = 1
        while ('%d.html' % number) in pages_so_far:
            number += 1

        # Run the search.
        while download(coords, number):
            print lib.page(coords, number)
            number += 1
            sleep(normalvariate(1.17,0.2))

        # Mark as done.
        open(os.path.join(dirname, 'done'), 'w').write('')