def download(coords, number): filename = os.path.join(get_dirname(coords), '%d.html' % number) urlretrieve(lib.page(coords, number), filename = filename) if is_last(filename): os.rename(filename, '.last.html') print('When searching for (%f, %f),' % coords) print('I stopped at page %d. If the script worked properly, this is because' % number) print('page %d was the last page or there were no meetings for this search.' % (number - 1)) print('Check .last.html to make sure.') return False else: print(('Downloaded (%f, %f), ' % coords) + 'page %d' % number) return True
def main_json(): import json url = page(*search_args()) html = lxml.html.parse(url) data = table_data(html) print(json.dumps(data))
else: print(('Downloaded (%f, %f), ' % coords) + 'page %d' % number) return True if __name__ == '__main__': for coords in lib.choose_coords(): dirname = get_dirname(coords) # Check if the search is done, or make a directory if os.path.isdir(dirname): if os.path.isfile(os.path.join(dirname, 'done')): continue else: os.makedirs(dirname) # Skip pages that are already finished. pages_so_far = set(os.listdir(dirname)) number = 1 while ('%d.html' % number) in pages_so_far: number += 1 # Run the search. while download(coords, number): print lib.page(coords, number) number += 1 sleep(normalvariate(1.17,0.2)) # Mark as done. open(os.path.join(dirname, 'done'), 'w').write('')