def find_next(url_u, web, handler=None): url_byte = decoder.encode(url_u) if handler: io.output("Running handler with page: %s" % url_byte) handler(url_u) io.output("Spidering page: %s" % url_byte) txt_byte = get_page(url_byte) candidates_byte = find_urls_in_page(web, txt_byte, url_u, url_byte) encoding = decoder.detect_encoding(txt_byte) chosen_u = pick_url(candidates_byte, encoding=encoding) return chosen_u
def play(menu): while True: io.output("\n" + menu.header + "\n") choices = [] for option in menu.options: if option.key == menu.default: pkey = option.key.upper() else: pkey = option.key if option.key in option.text: choices.append(option.text.replace(option.key, "(%s)" % pkey, 1)) else: choices.append("(%s) %s" % (pkey, option.text)) io.output(menu.delimiter.join(choices) + menu.footer) default = False if menu.confirm: choice = io.input(None) if choice == "": default = True else: choice = getch() if choice == "\r": default = True if default: if menu.default: value = menu.keys[menu.default].value else: io.output("invalid choice\n") else: try: value = menu.keys[choice.lower()].value except KeyError: io.output("invalid choice\n") continue return value
def pause(): delay = random.randint(15, 60) io.output("Pausing for %s seconds..." % delay) time.sleep(delay)
txt_byte = get_page(url_byte) candidates_byte = find_urls_in_page(web, txt_byte, url_u, url_byte) encoding = decoder.detect_encoding(txt_byte) chosen_u = pick_url(candidates_byte, encoding=encoding) return chosen_u if __name__ == '__main__': url_byte = 'http://en.wikipedia.org/wiki/Main_Page' url_byte = 'http://ar.wikipedia.org/wiki/الصفحة_الرئيسية' url_byte = 'http://pt.wikipedia.org/wiki/Casa_da_Cascata' url_byte = 'http://it.wikipedia.org/wiki/Special:Random' web = web.Web() web.add_url(url_byte, []) url_u = decoder.decode(url_byte, 'utf-8') depth = -1 while depth != 0: # easy way to set depth as infinite depth -= 1 try: url_u = find_next(url_u, web, handler=url_handler) except: io.output("Recovering from exception:") io.output(traceback.format_exc()) url_u = pick_url(web.urls()) pause() # less hammer
def test_basic(self): io = console_io.ConsoleIo(None) io.break_pressed() io.output("line1", "line2")
def textoutput(self, text): return io.output(text)