Пример #1
0
def find_next(url_u, web, handler=None):
    url_byte = decoder.encode(url_u)

    if handler:
        io.output("Running handler with page: %s" % url_byte)
        handler(url_u)

    io.output("Spidering page: %s" % url_byte)
    txt_byte = get_page(url_byte)

    candidates_byte = find_urls_in_page(web, txt_byte, url_u, url_byte)
    encoding = decoder.detect_encoding(txt_byte)
    chosen_u = pick_url(candidates_byte, encoding=encoding)

    return chosen_u
Пример #2
0
def find_next(url_u, web, handler=None):
    url_byte = decoder.encode(url_u)

    if handler:
        io.output("Running handler with page: %s" % url_byte)
        handler(url_u)

    io.output("Spidering page: %s" % url_byte)
    txt_byte = get_page(url_byte)

    candidates_byte = find_urls_in_page(web, txt_byte, url_u, url_byte)
    encoding = decoder.detect_encoding(txt_byte)
    chosen_u = pick_url(candidates_byte, encoding=encoding)

    return chosen_u
Пример #3
0
def play(menu):
    while True:
        io.output("\n" + menu.header + "\n")
        choices = []
        for option in menu.options:
            if option.key == menu.default:
                pkey = option.key.upper()
            else:
                pkey = option.key

            if option.key in option.text:
                choices.append(option.text.replace(option.key, "(%s)" % pkey, 1))
            else:
                choices.append("(%s) %s" % (pkey, option.text))
        io.output(menu.delimiter.join(choices) + menu.footer)

        default = False
        if menu.confirm:
            choice = io.input(None)
            if choice == "":
                default = True
        else:
            choice = getch()
            if choice == "\r":
                default = True

        if default:
            if menu.default:
                value = menu.keys[menu.default].value
            else:
                io.output("invalid choice\n")
        else:
            try:
                value = menu.keys[choice.lower()].value
            except KeyError:
                io.output("invalid choice\n")
                continue

        return value
Пример #4
0
def pause():
    delay = random.randint(15, 60)
    io.output("Pausing for %s seconds..." % delay)
    time.sleep(delay)
Пример #5
0
    txt_byte = get_page(url_byte)

    candidates_byte = find_urls_in_page(web, txt_byte, url_u, url_byte)
    encoding = decoder.detect_encoding(txt_byte)
    chosen_u = pick_url(candidates_byte, encoding=encoding)

    return chosen_u


if __name__ == '__main__':
    url_byte = 'http://en.wikipedia.org/wiki/Main_Page'
    url_byte = 'http://ar.wikipedia.org/wiki/الصفحة_الرئيسية'
    url_byte = 'http://pt.wikipedia.org/wiki/Casa_da_Cascata'
    url_byte = 'http://it.wikipedia.org/wiki/Special:Random'

    web = web.Web()
    web.add_url(url_byte, [])

    url_u = decoder.decode(url_byte, 'utf-8')
    depth = -1
    while depth != 0:  # easy way to set depth as infinite
        depth -= 1
        try:
            url_u = find_next(url_u, web, handler=url_handler)
        except:
            io.output("Recovering from exception:")
            io.output(traceback.format_exc())
            url_u = pick_url(web.urls())

        pause()  # less hammer
Пример #6
0
 def test_basic(self):
     io = console_io.ConsoleIo(None)
     io.break_pressed()
     io.output("line1", "line2")
Пример #7
0
def pause():
    delay = random.randint(15, 60)
    io.output("Pausing for %s seconds..." % delay)
    time.sleep(delay)
Пример #8
0
    txt_byte = get_page(url_byte)

    candidates_byte = find_urls_in_page(web, txt_byte, url_u, url_byte)
    encoding = decoder.detect_encoding(txt_byte)
    chosen_u = pick_url(candidates_byte, encoding=encoding)

    return chosen_u


if __name__ == '__main__':
    url_byte = 'http://en.wikipedia.org/wiki/Main_Page'
    url_byte = 'http://ar.wikipedia.org/wiki/الصفحة_الرئيسية'
    url_byte = 'http://pt.wikipedia.org/wiki/Casa_da_Cascata'
    url_byte = 'http://it.wikipedia.org/wiki/Special:Random'

    web = web.Web()
    web.add_url(url_byte, [])

    url_u = decoder.decode(url_byte, 'utf-8')
    depth = -1
    while depth != 0: # easy way to set depth as infinite
        depth -= 1
        try:
            url_u = find_next(url_u, web, handler=url_handler)
        except:
            io.output("Recovering from exception:")
            io.output(traceback.format_exc())
            url_u = pick_url(web.urls())

        pause() # less hammer
Пример #9
0
 def test_basic(self):
     io = console_io.ConsoleIo(None)
     io.break_pressed()
     io.output("line1", "line2")
Пример #10
0
 def textoutput(self, text):
     return io.output(text)