Пример #1
0
    def download_page(self, page, training_stage):

        print_warning("[TRAINER] Opening page...")
        self.program.driver.load_url(page)
        self.program.driver.switch_to.window(self.program.driver.window_handles[0])

        print_warning("[TRAINER] Downloading page...")
        pyautogui.hotkey('CTRL', '0')
        # TODO try to find a more efficient way to do it...
        sleep(16)
        '''if training_stage == TrainingStage.LOGIN_PAGE:
            pyautogui.hotkey('DOWN', 'ENTER')
        # Browser remembers about previously chosen action - save and not open
        else:
            pyautogui.hotkey('ENTER')'''

        # TODO FIX THIS
        if commands.getoutput("xset q | grep LED | awk '{print $10}' | cut -b 8") == 1:
            print "CAPS Enabled"
            pyautogui.press('CAPSLOCK')
            if commands.getoutput("xset q | grep LED | awk '{print $10}' | cut -b 8") == 1:
                print "CAPS STILL ENABLED"
        pyautogui.hotkey('ENTER')
        sleep(1)
        pyautogui.keyDown('shift')
        pyautogui.typewrite('7')
        pyautogui.keyUp('shift')
        pyautogui.typewrite("tmp")
        pyautogui.keyDown('shift')
        pyautogui.typewrite('7')
        pyautogui.keyUp('shift')
        pyautogui.typewrite("dump")
        pyautogui.hotkey('ENTER')
        sleep(2)
        print_successful_status("[TRAINER] ...done!")
Пример #2
0
 def should_i_take_a_break(self):
     if random.random() > 0.97:
         print_warning(
             "[CRAWLER] Caronte needs to smoke a cigarette. Will work in a while."
         )
         sleep(self.take_a_break())
     else:
         print_warning("[CRAWLER] Not taking any break. Need to work!")
Пример #3
0
    def dump_procedure(self):
        print_warning("[CRAWLER] Beginning dump procedure.")
        print_warning("[CRAWLER] Reaching forum landing page.")
        sleep(1)
        '''
        Could be interesting thinking about how make this flexible to xpaths, ids and partial names. 
        An option could we working with "generic finders" that specialize in front of the informations given from the
        database. The DB should then qualify the property exposed, so that the code can invoke the correct function.

        Moreover, a proper try except structure can be useful to address both the problems connected to a new page 
        structure and for test purposes in training phase. 
        '''

        # TODO replace all load_url with click or different actions. load_url doesn't provide referral link.
        # Iterate forums
        cookie_value = "CARONTE" + str(
            int((datetime.datetime.today() -
                 datetime.datetime(1970, 1, 1)).total_seconds()))
        for forum_xpath in self.website_dao.get_forum_xpaths_by_url(
                self.landing_page):
            wait_simple_pause()
            self.program.driver.load_url(self.landing_page)
            # Retrieving forum link for navigation
            forum_element = self.program.driver.find_element_by_xpath(
                forum_xpath)
            forum_link = forum_element.get_attribute("href")

            subforums = self.website_dao.get_subforum_xpaths_by_url_and_parent_xpath(
                self.landing_page, forum_xpath)
            # Subforums might be not present but directly posts
            if subforums is None or subforums[0] == '' or subforums[0] is None:
                wait_simple_pause()
                self.program.driver.click(forum_element)
                self.iterate_threads(forum_link)
            else:
                # Iterate subforums
                for subforum_xpath in subforums:
                    wait_simple_pause()
                    self.program.driver.load_url(forum_link)
                    # Retrieving subforum link for navigation
                    subforum_element = self.program.driver.find_element_by_xpath(
                        subforum_xpath)
                    subforum_link = subforum_element.get_attribute("href")
                    wait_simple_pause()
                    self.program.driver.click(subforum_element)
                    self.iterate_threads(subforum_link)
Пример #4
0
    def begin_training(self, forum_landing_page, login_page):
        # Check if the forum's structure is known
        if self.website_dao.get_website_id_by_url(
                forum_landing_page) is not None:
            print_successful_status(
                "[TRAINER] The forum is known. It is possible to proceed to the crawling."
            )
        else:
            print_warning(
                "[TRAINER] The forum is not known. Proceeding to the learning phase."
            )
            self.get_login_page_structure(login_page)

            # Executing login
            CrawlBootstrapper(self.program, self.db, login_page)

            self.get_relevant_forums(forum_landing_page)
            self.get_subforum_structure(forum_landing_page)
            self.get_thread_structure(forum_landing_page)
        return self.website_dao.get_website_id_by_url(forum_landing_page)
Пример #5
0
 def remove_existing_js(self):
     print_warning("[TRAINER] Purging JS content from page...")
     with open("/tmp/dump.html") as oldfile, open("/tmp/no_js_dump.html", 'w') as newfile:
         script_open_tag = False
         for line in oldfile:
             pattern = re.compile("<script.*</script>")
             # This line is a an oneliner script?
             if pattern.match(line):
                 # It is
                 line = re.sub("<script.*/script>", "", line)
                 # It is just an oneliner or there's something left?
                 if line is not "":
                     # There's something left. Lets write it on the file.
                     newfile.write(line)
                 # else, it is just script and i can skip it.
             else:
                 # The line is not an oneliner script.
                 pattern = re.compile("<script.*")
                 # Begin of a multiliner?
                 if pattern.match(line):
                     # Yes it is
                     script_open_tag = True
                     line = re.sub("<script.*>", "", line)
                     # It is just starting or there's also something before?
                     if line is not "":
                         # There's something left. Lets write it on the file.
                         newfile.write(line)
                     # else, it is just starting and i can skip it
                 else:
                     pattern = re.compile(".*</script>")
                     # End of a multiliner?
                     if pattern.match(line):
                         script_open_tag = False
                         line = re.sub("<script.*/script>", "", line)
                         # There's something left?
                         if line is not "":
                             # Lets write it on the file.
                             newfile.write(line)
                     # Outside of a multiliner?
                     elif not script_open_tag:
                         newfile.write(line)
Пример #6
0
 def to_work_or_not_to_work(self):
     if self.work_schedule.start_time_morning is not None and self.now < self.work_schedule.start_time_morning:
         # Too early to work, schedule morning
         self.schedule_job_and_close(
             self.work_schedule.start_time_morning.hour,
             self.work_schedule.start_time_morning.minute, False)
     else:
         if self.work_schedule.start_time_morning is not None and self.now < self.work_schedule.end_time_morning:
             # Morning work session
             self.setup_end_job_handler(
                 (self.work_schedule.end_time_morning - self.now).seconds,
                 WorkSession.MORNING)
         else:
             if self.work_schedule.start_time_afternoon is not None and self.now < self.work_schedule.start_time_afternoon:
                 # Too early to work, schedule afternoon
                 self.schedule_job_and_close(
                     self.work_schedule.start_time_afternoon.hour,
                     self.work_schedule.start_time_afternoon.minute, False)
             else:
                 if self.work_schedule.start_time_afternoon is not None and self.now < self.work_schedule.end_time_afternoon:
                     # Afternoon work session
                     self.setup_end_job_handler(
                         (self.work_schedule.end_time_afternoon -
                          self.now).seconds, WorkSession.AFTERNOON)
                 else:
                     # Too early to work, schedule evening
                     if self.work_schedule.start_time_evening is not None and self.now < self.work_schedule.start_time_evening:
                         self.schedule_job_and_close(
                             self.work_schedule.start_time_evening.hour,
                             self.work_schedule.start_time_evening.minute,
                             False)
                     else:
                         if self.work_schedule.start_time_evening is not None and self.now < self.work_schedule.end_time_evening:
                             # Evening work session
                             self.setup_end_job_handler(
                                 (self.work_schedule.end_time_evening -
                                  self.now).seconds, WorkSession.EVENING)
                         else:
                             # Too late to work tonight. Let's go with tomorrow.
                             tomorrow_work_schedule = WorkSchedule(
                                 (self.weekday + 1) % 7)
                             if 0 <= (self.weekday + 1) % 7 <= 4:
                                 # I'll work tomorrow afternoon, since is weekday
                                 while tomorrow_work_schedule.start_time_afternoon is None:
                                     tomorrow_work_schedule = WorkSchedule(
                                         self.weekday)
                                 self.schedule_job_and_close(
                                     tomorrow_work_schedule.
                                     start_time_afternoon.hour,
                                     tomorrow_work_schedule.
                                     start_time_afternoon.minute, True)
                             else:
                                 # I'll work tomorrow morning, since is weekend
                                 while tomorrow_work_schedule.start_time_morning is None:
                                     tomorrow_work_schedule = WorkSchedule(
                                         self.weekday)
                                 self.schedule_job_and_close(
                                     tomorrow_work_schedule.
                                     start_time_morning.hour,
                                     tomorrow_work_schedule.
                                     start_time_morning.minute, True)
     sleep(1)  # Yeah, just for waiting SnoozeAlarm to print its stdout...
     self.print_work_schedule()
     # Linux's cron doesn't allow expressing seconds. For this reason, an analysis could make noticeable that all the
     # sessions start are at the same amounts of seconds, in good network conditions.
     seconds_to_wait = random.randint(1, 60)
     print_warning("[BOOTSTRAP] Delaying start time of " +
                   str(seconds_to_wait) + " seconds...")
     sleep(seconds_to_wait)
     self.setup_breaks_handler()
Пример #7
0
 def print_work_schedule(self):
     print_warning("===================")
     print_warning("Time now here: " +
                   str(get_localzone().localize(datetime.now())))
     print_warning("Time now in Moscow: " +
                   str(datetime.now(pytz.timezone('Europe/Moscow'))))
     print_warning("===================")
     print_warning("Work schedule (in Europe/Moscow timezone): ")
     print_warning("====> Start morning: " +
                   str(self.work_schedule.start_time_morning) +
                   "; duration: " +
                   str(self.work_schedule.actual_duration_minutes_morning))
     print_warning(
         "====> Start afternoon: " +
         str(self.work_schedule.start_time_afternoon) + "; duration: " +
         str(self.work_schedule.actual_duration_minutes_afternoon))
     print_warning("====> Start evening: " +
                   str(self.work_schedule.start_time_evening) +
                   "; duration: " +
                   str(self.work_schedule.actual_duration_minutes_evening))
     print_warning("===================")
Пример #8
0
    def inject_js_listeners(self, training_stage):

        print_warning("[TRAINER] Injecting JS content...")
        jquery_content = open(os.path.join(os.path.join(self.ROOT_DIR, "resources"), "jquery-3.3.1.min.js"), "r").read()
        with open("/tmp/no_js_dump.html") as oldfile, open("/tmp/clean_dump.html", 'w') as newfile:

            # Click-and-trigger-HTTP-request listeners + call on close
            if (training_stage == TrainingStage.FORUM) or (training_stage == TrainingStage.SUBFORUM) or (
                    training_stage == TrainingStage.THREAD_POOL) or (training_stage == TrainingStage.POST_POOL) or (
                    training_stage == TrainingStage.THREAD_NEXT_PAGE) or (
                    training_stage == TrainingStage.SUBFORUM_NEXT_PAGE) or (training_stage == TrainingStage.LOGIN_PAGE):

                for line in oldfile:
                    # Pre-editing for correcting body tag
                    if "<body " in line or "<BODY " in line or "<body>" in line or "<BODY>" in line:
                        index = line.find('>')
                        output_line = line[:index] + " onload=\"setupListeners()\"" + line[index:]
                        line = output_line
                    newfile.write(line)
                    # After writing head tag, let's inject the scripts
                    if "<head " in line or "<HEAD " in line or "<head>" in line or "<HEAD>" in line:

                        # Beautified snippet below
                        if training_stage == TrainingStage.LOGIN_PAGE:
                            injected_content = '<script>function setupListeners(){var ' \
                                               'Anchors=document.getElementsByTagName("a");for(var ' \
                                               'i=0;i<Anchors.length;i+=1){Anchors[i].removeAttribute("href")}var ' \
                                               'forms=document.getElementsByTagName("form");for(var ' \
                                               'i=0;i<forms.length;i+=1){forms[i].removeAttribute("action");forms[' \
                                               'i].removeAttribute("method")}}</script><script>var username_collected' \
                                               '=false;var is_next_button=false;document.onclick=function(event){ ' \
                                               'console.log(event); var target=\'target\'in event?event.target:event.' \
                                               'srcElement;console.log(target); if(' \
                                                'target.tagName===\'INPUT\' || target.tagName===\'BUTTON\' ){console.' \
                                               'log(target.tagName);console.log(\'My thoughts against JS are censored' \
                                               '. Why? Because removing any of these 4 prints up there causes it to ' \
                                               'fail to send the last HTTP request.\');console.log(\'Please leave ' \
                                               'this piece of black magic where it is. I hate doing things in this ' \
                                               'way, but Im done debugging JS. Well never be friends.\'); var ' \
                                                'root=document.compatMode===\'CSS1Compat\'?document.documentElement' \
                                                ':document.body;var path=getPathTo(target);if(!username_collected){' \
                                                '$.post("http://*****:*****@id=\'"+element.id+"\']"}if(element===document.body){return ' \
                                                'element.tagName.toLowerCase()}var ix=0;var ' \
                                                'siblings=element.parentNode.childNodes;for(var ' \
                                                'i=0;i<siblings.length;i+=1){var sibling=siblings[i];if(' \
                                                'sibling===element){return getPathTo(' \
                                                'element.parentNode)+\'/\'+element.tagName.toLowerCase()+\'[\'+(' \
                                                'ix+1)+\']\'}if(sibling.nodeType===1&&sibling.tagName===element.' \
                                                'tagName){ix+=1}}};alert("Please click on username field.")</script>'
                        else:
                            injected_content = '<script>function setupListeners(){for(var e=document.' \
                                               'getElementsByTagName("a"),n=0;n<e.length;n++)e[n].removeAttribute' \
                                               '("href"),e[n].addEventListener("click",function(e){name_found=' \
                                               'getPathTo(this),$.post("http://'
                            if training_stage == TrainingStage.FORUM:
                                injected_content += 'localhost:8080/forum_name'
                            if training_stage == TrainingStage.SUBFORUM:
                                injected_content += 'localhost:8080/subforum_name'
                            if training_stage == TrainingStage.SUBFORUM_NEXT_PAGE:
                                injected_content += 'localhost:8080/subforum_next_page'
                            if training_stage == TrainingStage.THREAD_POOL:
                                injected_content += 'localhost:8080/thread_name'
                            if training_stage == TrainingStage.THREAD_NEXT_PAGE:
                                injected_content += 'localhost:8080/thread_next_page'
                            injected_content += '",{name:name_found},function(e,n){console.log("error while posting' \
                                                '..?")})},!1)}</script><script>'
                            if training_stage == TrainingStage.FORUM:
                                injected_content += 'alert("Click forums to crawl.'
                            if training_stage == TrainingStage.SUBFORUM:
                                injected_content += 'alert("Click subforums to crawl.'
                            if training_stage == TrainingStage.SUBFORUM_NEXT_PAGE:
                                injected_content += 'alert("Click the next page button.'
                            if training_stage == TrainingStage.THREAD_POOL:
                                injected_content += 'alert("Click exactly five thread titles.'
                            if training_stage == TrainingStage.THREAD_NEXT_PAGE:
                                injected_content += 'alert("Click the next page button.'
                            injected_content += ' After you close the next page, put again Tor Browser on foreground,' \
                                                ' if it isn\'t.");function ' \
                                                'getPathTo(element){if(element.id!==\'\'){return "//*[' \
                                                '@id=\'"+element.id+"\']"}if(element===document.body){return ' \
                                                'element.tagName.toLowerCase()}var ix=0;var ' \
                                                'siblings=element.parentNode.childNodes;for(var ' \
                                                'i=0;i<siblings.length;i+=1){var sibling=siblings[i];if(' \
                                                'sibling===element){return getPathTo(' \
                                                'element.parentNode)+\'/\'+element.tagName.toLowerCase()+\'[\'+(' \
                                                'ix+1)+\']\'}if(sibling.nodeType===1&&sibling.tagName===element.' \
                                                'tagName){ix+=1}}}</script>'

                        injected_content += '<script>window.addEventListener("beforeunload",function(e){$.get("' \
                                            'http://localhost:8080/closed",function(e,o){console.log("should really ' \
                                            'be triggered?")});return(e||window.event).returnValue="o/","o/"});' \
                                            '</script><script>' + jquery_content + '</script>'
                        newfile.write(injected_content)
            else:
                # Call on close
                for line in oldfile:
                    newfile.write(line)
                    if "<head " in line or "<HEAD " in line or "<head>" in line or "<HEAD>" in line:
                        injected_content = "<script>window.addEventListener(\"beforeunload\",function(e){$.get(\"" \
                                           "http://localhost:8080/closed\",function(e,o){console.log(\"should really " \
                                           "be triggered?\")});return(e||window.event).returnValue=\"o/\",\"o/\"" \
                                           "});</script><script>" + jquery_content + "</script>"
                        newfile.write(injected_content)
        print_successful_status("[TRAINER] ...done!")
Пример #9
0
 def run(self):
     print_warning("[CRAWLER] I will work for " + str(self.zzz/3600) + " hours and " + str((self.zzz % 3600)/60) + " minutes.")
     time.sleep(self.zzz)
     print_warning("[CRAWLER] Stop working for now. Scheduling next work session...")
     self.schedule_restart()
     os.kill(os.getpid(), signal.SIGTERM)
Пример #10
0
    def iterate_threads(self, subforum_link):
        print_warning("[CRAWLER] Starting to read threads on page...")
        scanned_threads = []
        first_page = True
        while True:
            # If all threads of the page have been scanned, move to next page
            thread_pool = self.get_forum_subelements(ForumField.THREAD_POOL,
                                                     self.program.driver)
            if scanned_threads.__len__() == thread_pool.__len__():
                scanned_threads = []
                # Try to reach next page, if any.
                try:
                    # vBulletin handles next pages differently compared to other forums. In particular, uses the same
                    # tag and class to define the button of the next page, plus XPATH is not exploitable due the fact
                    # it varies depending on thread length.

                    if self.website_dao.get_is_vbullettin_by_website_id(
                            self.website_id):
                        prev_next_elements = self.get_forum_subelements(
                            ForumField.SUBFORUM_NEXT_PAGE, self.program.driver)

                        # If there is 4 navigation buttons (2 on top page, 2 on bottom page), or is the first page,
                        # then there's a next page...
                        try:
                            if prev_next_elements.__len__() == 4 or (
                                    prev_next_elements.__len__() == 2
                                    and first_page):
                                first_page = False
                                subforum_link = prev_next_elements[
                                    1].get_attribute("href")
                                if subforum_link is None:
                                    # Terrible, but should be worth it: if the current item has not the link,
                                    # it will likely be present in children. For this reason, I iterate on them until
                                    # I don't find the proper link.
                                    for child in prev_next_elements[
                                            1].find_elements_by_xpath(".//*"):
                                        subforum_link = child.get_attribute(
                                            "href")
                                        # Found it!
                                        if subforum_link != "" or not None:
                                            break
                                        # If I reach the end of for cycle, then there's a problem.
                                        raise NoSuchElementException
                                self.program.driver.click(
                                    prev_next_elements[1])
                            # ...else, could be or a first or a last page.
                            else:
                                raise NoSuchElementException
                        except AttributeError:
                            raise NoSuchElementException
                    else:
                        first_page = False
                        subforum_next_page = self.get_forum_subelement(
                            ForumField.SUBFORUM_NEXT_PAGE, self.program.driver)
                        if subforum_next_page is None:
                            raise NoSuchElementException
                        else:
                            subforum_link = subforum_next_page.get_attribute(
                                "href")
                            if subforum_link is None:
                                # Terrible, but should be worth it: if the current item has not the link, it will
                                # likely be present in children. For this reason, I iterate on them until I don't find
                                # the proper link.
                                for child in subforum_next_page.find_elements_by_xpath(
                                        ".//*"):
                                    subforum_link = child.get_attribute("href")
                                    # Found it!
                                    if subforum_link != "" or not None:
                                        break
                                    # If I reach the end of for cycle, then there's a problem.
                                    raise NoSuchElementException
                            self.program.driver.click(subforum_next_page)
                except NoSuchElementException:
                    break
            else:
                # Scan next thread
                for thread, thread_post_count_by_browser in izip(
                        self.get_forum_subelements(ForumField.THREAD_POOL,
                                                   self.program.driver),
                        self.get_forum_subelements(
                            ForumField.THREAD_POST_COUNT,
                            self.program.driver)):
                    thread_post_count_by_browser_text = thread_post_count_by_browser.get_attribute(
                        "text")
                    if thread_post_count_by_browser_text is None:
                        thread_post_count_by_browser_text = thread_post_count_by_browser.text
                    thread_post_count_by_browser = filter(
                        unicode.isdigit, thread_post_count_by_browser_text)
                    thread_link = thread.get_attribute("href")
                    if thread_link not in scanned_threads:
                        scanned_threads.append(thread_link)
                        do_i_have_to_scan_it = True
                        has_ever_been_scanned = self.thread_list_dao.has_ever_been_scanned(
                            thread_link)
                        if has_ever_been_scanned:
                            thread_post_count_by_db = str(
                                self.thread_list_dao.
                                get_post_count_by_thread_url(thread_link))
                            # The thread will count as scanned, whether will be carved or not, depending on post count.
                            if thread_post_count_by_db == thread_post_count_by_browser:
                                do_i_have_to_scan_it = False
                                print_warning(
                                    "[CRAWLER] I already know thread " +
                                    thread_link + " and I've collected "
                                    "all the " + thread_post_count_by_browser +
                                    " posts in it. Skipped.")
                        # The condition to satisfy is: has never been scanned or it has been scanned but different
                        # post count.
                        if do_i_have_to_scan_it:
                            wait_simple_pause()
                            self.program.driver.click(thread)
                            # Begin thread crawling
                            self.crawl_current_thread(thread_link, False)
                            # Taking note of current amount of posts
                            if has_ever_been_scanned:
                                self.thread_list_dao.update_post_count(
                                    thread_link, thread_post_count_by_browser)
                            else:
                                self.thread_list_dao.insert_post_count(
                                    thread_link, thread_post_count_by_browser)

                            # Done carving current thread, move on next going back to subforum
                            self.program.driver.load_url(subforum_link)
                            break  # I need to exit the for loop since elements aren't anymore connected to the DOM
Пример #11
0
    def crawl_current_thread(self, thread_link, is_second_try):
        # Thread crawl
        # This is needed for carving vbulletin threads, in order to tear apart first and last pages.
        print_warning("[CRAWLER] Starting to read posts on page...")
        try:
            # TODO use is_second_try for refreshing the page and continue the parsing from there
            first_page = True
            thread_name = self.get_element_text(
                self.get_thread_subelements(ThreadField.THREAD_TITLE,
                                            self.program.driver)[0])
            while True:
                for thread_post in self.get_thread_subelements(
                        ThreadField.POST_POOL, self.program.driver):
                    # Post carving
                    try:
                        post_text = self.get_element_text(
                            self.get_thread_subelement(ThreadField.POST_TEXT,
                                                       thread_post))
                        date = self.get_element_text(
                            self.get_thread_subelement(ThreadField.DATE,
                                                       thread_post))
                        author = self.get_element_text(
                            self.get_thread_subelement(ThreadField.AUTHOR,
                                                       thread_post))
                        post_count = self.get_element_text(
                            self.get_thread_subelement(ThreadField.POST_COUNT,
                                                       thread_post))
                        if not self.post_dumps_dao.check_if_post_dump_exists(
                                self.website_id, thread_link, thread_name,
                                author, date, post_count, post_text):
                            # TODO wait longer for the first post (50wpm)
                            self.post_dumps_dao.insert_post_dump(
                                self.website_id, thread_link, thread_name,
                                author, date, post_count, post_text)
                            wait_to_read(post_text)

                    except StaleElementReferenceException as e:
                        if not is_second_try:
                            sleep(1)
                            self.crawl_current_thread(thread_link, True)
                        else:
                            # This could be more worrying, but could also depend from a problem in the DOM of the page.
                            print_error_and_exception(
                                "[CRAWLER] Exception thrown. Might be a potential failure of "
                                "the carving.", e)
                            print_warning(
                                "[CRAWLER] I'm reloading the page...")
                            self.program.driver.load_url(thread_link)

                wait_simple_pause()
                # Try to reach next page of the thread, if any
                try:
                    # vBulletin handles next pages differently compared to other forums. In particular, uses the same
                    # tag and class to define the button of the next page, plus XPATH is not exploitable due the fact
                    # it varies depending on thread length.

                    if self.website_dao.get_is_vbullettin_by_website_id(
                            self.website_id):
                        prev_next_elements = self.get_thread_subelements(
                            ThreadField.THREAD_NEXT_PAGE, self.program.driver)
                        # If there is 4 navigation buttons (2 on top page, 2 on bottom page), or is the first page,
                        # then there's a next page...
                        if prev_next_elements.__len__() == 4 or (
                                prev_next_elements.__len__() == 2
                                and first_page):
                            first_page = False
                            self.program.driver.click(prev_next_elements[1])
                        # ...else, could be or a first or a last page.
                        else:
                            raise NoSuchElementException
                    else:
                        first_page = False
                        thread_next_page = self.get_thread_subelement(
                            ThreadField.THREAD_NEXT_PAGE, self.program.driver)
                        if thread_next_page is None:
                            raise NoSuchElementException
                        else:
                            try:
                                self.program.driver.click(thread_next_page)
                            #### TODO WORKAROUND PER OFFENSIVE COMMUNITY
                            #### C'E' UNA ROBA NASCOSTA CHE CONTIENE LINK NON CORRETTI
                            except ElementNotInteractableException:
                                thread_next_page = self.get_thread_subelements(
                                    ThreadField.THREAD_NEXT_PAGE,
                                    self.program.driver)[1]
                                self.program.driver.click(thread_next_page)
                except NoSuchElementException:
                    break

            sleep(1)
        except (IndexError, TypeError) as e:
            if not is_second_try:
                sleep(1)
                self.crawl_current_thread(thread_link, True)
            else:
                print_error_and_exception(
                    "[CRAWLER] Exception thrown. Might be a potential failure of the carving.",
                    e)