Exemplo n.º 1
0
def request_from_url(url, settings):
    """
    request_from_url(str)

    gets the request from url and returns the requests object
    """
    cookies = settings["cookies"]
    if cookies["firefox"]:
        cj = web.browser_cookie3.firefox()
    elif cookies["chrome"]:
        cj = web.browser_cookie3.chrome()
    elif cookies["opera"]:
        cj = web.browser_cookie3.opera()
    elif cookies["edge"]:
        cj = web.browser_cookie3.edge()
    else:
        cj = CookieJar()
    try:
        r = web.requests.get(url,
                             cookies=cj,
                             headers={"User-Agent": web.FIREFOX_USER_AGENT},
                             timeout=settings["connection_timeout"])
    except web.requests.ReadTimeout:
        Debug.log_file("ConnectionError", "request_from_url",
                       f"Connection times out on {url}")
        r = None
    return r
Exemplo n.º 2
0
 def _on_delete_button(self, icon_button):
     # create a new list for stored location settings
     saved_coords = []
     settings = load_Settings()
     # remove the widget from the MDList
     self.location_list.remove_widget(icon_button.listitem)
     # Loop through the new list appending each location_setting
     for widget in self.location_list.children:
         saved_coords.append(widget.location_settings)
     # store new location list and save to file
     settings["saved_coords"] = saved_coords
     save_settings(settings)
     Debug.log_file("Saved Settings", "on_delete_button dialogs.py",
                    "Saved settings to settings.json")
Exemplo n.º 3
0
def download_image(filename, response, settings):
    """
    download_image(str, str, object)

    path should be the file path, filename should be the name of the file
    os.path.join is used to append path to filename
    response is the response returned from requests.get
    """
    # read from socket
    # store in memory
    # images shouldnt be too large
    byte_stream = BytesIO()
    for buff in response.iter_content(1000):
        byte_stream.write(buff)
    # load image from buffer io
    try:
        image = Image.open(byte_stream)
    except UnidentifiedImageError as err:
        image = None
        Debug.log("IMAGE_OPEN_ERROR",
                  err,
                  url=response.url,
                  error=err.__str__())
        Debug.log_file("ImageOpenError", "download_image",
                       f"Error opening image from {response.url}")
    if image:
        width, height = image.size
        # if image requirements met then save
        if width > 200 and height > 200:
            # check if directory exists
            Threads.new_folder_lock.acquire()
            if not os.path.exists(settings["save_path"]):
                os.mkdir(settings["save_path"])
            if settings["unique_pathname"]["enabled"]:
                path = os.path.join(settings["save_path"],
                                    settings["unique_pathname"]["name"])
                if not os.path.exists(path):
                    os.mkdir(path)
            else:
                path = settings["save_path"]
            Threads.new_folder_lock.release()
            ImageFile.write_to_file(path, filename, byte_stream)
        image.close()
    byte_stream.close()
Exemplo n.º 4
0
 def _on_mock_error(self, status, err):
     if status == "permission-denied":
         Debug.log_file("Error", "_on_mock_error main.py",
                        "MOCK LOCATION Permission denied")
     elif status == "provider-exists":
         Debug.log_file("Error", "_on_mock_error main.py",
                        "Provider exists")
     else:
         Debug.log_file("Error", "_on_mock_error main.py",
                        f"{err.__str__()}")
Exemplo n.º 5
0
 def on_fake_entries(self, *args):
     for x in range(10):
         Debug.log_file(f"Test{x}", "on_fake_entries", f"Test number {x}")
     log = Debug.getlogfromfile()
     if log:
         self.log = log
Exemplo n.º 6
0
def commander_thread(callback):
    """
    main handler thread takes in filepath or url
    and then passes onto captain_thread for parsing

    Level 1 parser and image finder thread
    will create grunt threads if any links found on url
    """
    quit = False
    grunts = []
    _task_running = False
    callback(
        Message(
            thread="commander",
            type="message",
            data={"message": "Commander thread has loaded. Waiting to scan"}))
    # stops code getting to long verbose
    MessageMain = functools.partial(Message,
                                    thread="commander",
                                    type="message")
    # settings dict will contain the settings at start of scraping
    settings = {}
    scanned_urls = []
    while not quit:
        try:
            # Get the json object from the global queue
            r = Threads.commander_queue.get(0.5)
            if r.thread == "main":
                if r.type == "quit":
                    Threads.cancel.set()
                    callback(Message(thread="commander", type="quit"))
                    quit = True
                elif r.type == "start":
                    if not _task_running:
                        grunts = []
                        _task_running = True

                        # load the settings from file
                        # create a new instance of it in memory
                        # we dont want these values to change
                        # whilst downloading and saving to file
                        settings = dict(Settings.load())

                        # Set the max connections
                        max_connections = round(
                            int(settings["max_connections"]))
                        Threads.semaphore = threading.Semaphore(
                            max_connections)
                        Debug.log_file(
                            "SETTINGS", "commander.run",
                            f"Max Connections set to {max_connections}")

                        callback(
                            MessageMain(
                                data={"message": "Starting Threads..."}))
                        for thread_index, url in enumerate(scanned_urls):
                            grunts.append(Grunt(thread_index, url, settings))
                        for _grunt in grunts:
                            _grunt.start()

                elif r.type == "fetch":
                    if not _task_running:
                        # Load settings
                        callback(
                            Message(thread="commander",
                                    type="fetch",
                                    status="started"))
                        settings = Settings.load()
                        callback(
                            MessageMain(
                                data={
                                    "message":
                                    "Initializing the global search filter..."
                                }))
                        # compile our filter matches only add those from the filter list
                        web.compile_regex_global_filter()
                        # get the document from the URL
                        callback(
                            MessageMain(
                                data={
                                    "message": f"Connecting to {r.data['url']}"
                                }))
                        webreq = request_from_url(r.data["url"], settings)
                        if webreq:
                            # make sure is a text document to parse
                            ext = web.is_valid_content_type(
                                r.data["url"], webreq.headers["Content-type"],
                                settings["images_to_search"])
                            if ext == ".html":
                                html_doc = webreq.text
                                # get the url title
                                _assign_unique_name(r.data["url"], html_doc)
                                callback(
                                    MessageMain(data={
                                        "message":
                                        "Parsing HTML Document..."
                                    }))
                                # scrape links and images from document
                                scanned_urls = []
                                if web.parse_html(url=r.data["url"],
                                                  html=html_doc,
                                                  urls=scanned_urls,
                                                  images_only=False,
                                                  thumbnails_only=True) > 0:
                                    # send the scanned urls to the main thread for processing
                                    callback(
                                        MessageMain(
                                            data={
                                                "message":
                                                f"Parsing succesful. Found {len(scanned_urls)} links"
                                            }))
                                    data = {"urls": scanned_urls}
                                    reqmsg = Message(thread="commander",
                                                     type="fetch",
                                                     status="finished",
                                                     data=data)
                                    callback(reqmsg)
                                else:
                                    # Nothing found notify main thread
                                    callback(
                                        MessageMain(
                                            data={
                                                "message": "No links found :("
                                            }))
                            webreq.close()
                    else:
                        callback(
                            MessageMain(
                                data={
                                    "message":
                                    "Still scanning for images please press cancel to start a new scan"
                                }))

                elif r.type == "cancel":
                    Threads.cancel.set()

            elif r.thread == "grunt":
                callback(r)

            elif r.thread == "settings":
                callback(MessageMain(data=r.data))

        except queue.Empty as err:
            print(f"Queue error: {err.__str__()}")

        finally:
            if _task_running:
                # check if all grunts are finished if so cleanup
                # and notify main thread
                if len(grunts_alive(grunts)) == 0:
                    Threads.cancel.clear()
                    grunts = []
                    _task_running = False
                    Urls.clear()
                    callback(Message(thread="commander", type="complete"))