Пример #1
0
    def ScrapeResult(url, proc, wnd, result):
        """Capture and save the scrape."""
        if log_file: log_file.write(result)

        # Scrape the page
        image = windowing.ScrapeWindow(wnd)
        filename = windowing.URLtoFilename(url, command["--outdir"], ".bmp")
        image.save(filename)
Пример #2
0
def Scrape(urls, outdir, size, pos, timeout=20, **kwargs):
    """Invoke a browser, send it to a series of URLs, and save its output.

  Args:
    urls: list of URLs to scrape
    outdir: directory to place output
    size: size of browser window to use
    pos: position of browser window
    timeout: amount of time to wait for page to load
    kwargs: miscellaneous keyword args

  Returns:
    None if success, else an error string
  """
    path = r"c:\program files\internet explorer\iexplore.exe"

    if "path" in kwargs and kwargs["path"]: path = kwargs["path"]

    (iewnd, ieproc, address_bar, render_pane,
     tab_window) = (InvokeBrowser(path))

    # Resize and reposition the frame
    windowing.MoveAndSizeWindow(iewnd, pos, size, render_pane)

    # Visit each URL we're given
    if type(urls) in types.StringTypes: urls = [urls]

    timedout = False

    for url in urls:

        # Double-click in the address bar, type the name, and press Enter
        mouse.DoubleClickInWindow(address_bar)
        keyboard.TypeString(url)
        keyboard.TypeString("\n")

        # Wait for the page to finish loading
        load_time = windowing.WaitForThrobber(tab_window, (6, 8, 22, 24),
                                              timeout)
        timedout = load_time < 0

        if timedout:
            break

        # Scrape the page
        image = windowing.ScrapeWindow(render_pane)

        # Save to disk
        if "filename" in kwargs:
            if callable(kwargs["filename"]):
                filename = kwargs["filename"](url)
            else:
                filename = kwargs["filename"]
        else:
            filename = windowing.URLtoFilename(url, outdir, ".bmp")
        image.save(filename)

    windowing.EndProcess(ieproc)

    if timedout:
        return "timeout"
Пример #3
0
def Scrape(urls, outdir, size, pos, timeout=20, **kwargs):
    """Invoke a browser, send it to a series of URLs, and save its output.

  Args:
    urls: list of URLs to scrape
    outdir: directory to place output
    size: size of browser window to use
    pos: position of browser window
    timeout: amount of time to wait for page to load
    kwargs: miscellaneous keyword args

  Returns:
    None if success, else an error string
  """
    if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
    else: path = DEFAULT_PATH

    (wnd, proc, render_pane) = InvokeBrowser(path)

    # Resize and reposition the frame
    windowing.MoveAndSizeWindow(wnd, pos, size, render_pane)

    time.sleep(3)

    # Firefox is a bit of a pain: it doesn't use standard edit controls,
    # and it doesn't display a throbber when there's no tab. Let's make
    # sure there's at least one tab, then select the first one

    mouse.ClickInWindow(wnd)
    keyboard.TypeString("[t]", True)
    mouse.ClickInWindow(wnd, (30, 115))
    time.sleep(2)

    timedout = False

    # Visit each URL we're given
    if type(urls) in types.StringTypes: urls = [urls]

    for url in urls:

        # Use keyboard shortcuts
        keyboard.TypeString("{d}", True)
        keyboard.TypeString(url)
        keyboard.TypeString("\n")

        # Wait for the page to finish loading
        load_time = windowing.WaitForThrobber(wnd, (10, 96, 26, 112), timeout)
        timedout = load_time < 0

        if timedout:
            break

        # Scrape the page
        image = windowing.ScrapeWindow(render_pane)

        # Save to disk
        if "filename" in kwargs:
            if callable(kwargs["filename"]):
                filename = kwargs["filename"](url)
            else:
                filename = kwargs["filename"]
        else:
            filename = windowing.URLtoFilename(url, outdir, ".bmp")
        image.save(filename)

    # Close all the tabs, cheesily
    mouse.ClickInWindow(wnd)

    while len(windowing.FindChildWindows(0, "MozillaUIWindowClass")):
        keyboard.TypeString("[w]", True)
        time.sleep(1)

    if timedout:
        return "timeout"
Пример #4
0
def Scrape(urls, outdir, size, pos, timeout, kwargs):
    """Invoke a browser, send it to a series of URLs, and save its output.

  Args:
    urls: list of URLs to scrape
    outdir: directory to place output
    size: size of browser window to use
    pos: position of browser window
    timeout: amount of time to wait for page to load
    kwargs: miscellaneous keyword args

  Returns:
    None if success, else an error string
  """
    if "path" in kwargs and kwargs["path"]: path = kwargs["path"]
    else: path = DEFAULT_PATH

    (wnd, proc, address_bar, render_pane) = InvokeBrowser(path)

    # Resize and reposition the frame
    windowing.MoveAndSizeWindow(wnd, pos, size, render_pane)

    # Visit each URL we're given
    if type(urls) in types.StringTypes: urls = [urls]

    timedout = False

    for url in urls:
        # Double-click in the address bar, type the name, and press Enter
        mouse.ClickInWindow(address_bar)
        keyboard.TypeString(url, 0.1)
        keyboard.TypeString("\n")

        # Wait for the page to finish loading
        load_time = windowing.WaitForThrobber(wnd, (20, 16, 36, 32), timeout)
        timedout = load_time < 0

        if timedout:
            break

        # Scrape the page
        image = windowing.ScrapeWindow(render_pane)

        # Save to disk
        if "filename" in kwargs:
            if callable(kwargs["filename"]):
                filename = kwargs["filename"](url)
            else:
                filename = kwargs["filename"]
        else:
            filename = windowing.URLtoFilename(url, outdir, ".bmp")
        image.save(filename)

    if proc:
        windowing.SetForegroundWindow(wnd)

        # Send Alt-F4, then wait for process to end
        keyboard.TypeString(r"{\4}", use_modifiers=True)
        if not windowing.WaitForProcessExit(proc, timeout):
            windowing.EndProcess(proc)
            return "crashed"

    if timedout:
        return "timeout"

    return None