Пример #1
0
 def __init__(self,
              id,
              url=None,
              html=None,
              cookiesjar=None,
              depth=None,
              generator=None,
              parent_id=None,
              delta_depth=None,
              base_url=None):
     WebPage.__init__(self,
                      id,
                      url,
                      html,
                      cookiesjar,
                      depth,
                      base_url=base_url)
     self.generator = generator
     self.generator_requests = []
     self.parent_id = parent_id
     self.delta_depth = delta_depth
Пример #2
0
 def test_webpage(self):
     clickable1 = Clickable("click",
                            "a",
                            "body/div/div/a",
                            id="Test1",
                            html_class="Test2",
                            clickable_depth=243,
                            function_id="Test3")
     web_page = WebPage(1,
                        url=TEST_URL1,
                        html=TEST_HTML,
                        cookiesjar=None,
                        depth=24,
                        base_url=TEST_URL2)
     web_page.clickables.extend([clickable1])
     self.database.insert_page_into_db(SESSION, web_page)
     web_page1 = self.database.get_webpage_to_id_from_db(SESSION, 1)
     self.assertEqual(web_page.toString(), web_page1.toString())
     web_page2 = self.database.get_webpage_to_url_from_db(
         SESSION, TEST_URL1)
     self.assertEqual(web_page.toString(), web_page2.toString())
Пример #3
0
 def _get_web_page_from_db(self, current_session, page_id=None, url=None, page=None):
     if page is None:
         if page_id is not None:
             page = self.pages.find_one({"session": current_session, "web_page_id": page_id })
         elif url is not None:
             page = self.pages.find_one({"session": current_session, "url": url})
         else:
             raise AttributeError("You must specifies either page_id or url")
         if page is None:
             return None
     clickables = self.get_all_clickables_to_page_id_from_db(current_session, page['web_page_id'])
     forms = self.get_all_forms_to_page_id_from_db(current_session, page['web_page_id'])
     result = WebPage(page['web_page_id'], page['url'], page['html'], None, page['current_depth'], page['base_url'])
     result.clickables = clickables
     result.forms = forms
     links = []
     for link in page['links']:
         links.append(self._parse_link_from_db(link))
     result.links = links
     timemimg_requests = []
     for request in page['timing_requests']:
         timemimg_requests.append(self.get_asyncrequest_to_id(current_session, request))
     result.timing_requests = timemimg_requests
     ajax = []
     for request in page['ajax_requests']:
         ajax.append(self.get_asyncrequest_to_id(current_session, request))
     result.ajax_requests = ajax
     return result
Пример #4
0
    def replay(self, url, click=None, preclicks=[], timeout=60, delay=20):
        pagehtml, newurl = self.requestor.get(QUrl(url), delay=delay, timeout=timeout)

        self._event_executor.stopLogging()
        self._event_executor.setLoggedNetworkData(self.requestor.getLoggedNetworkData())

        logging.debug("Requestor is at {}".format(newurl))
        if newurl == "":
            # couldn't load
            return EventResult.ErrorWhileInitialLoading, None
        webpage = WebPage(0, newurl, pagehtml)
        errorcode, deltapage = self._event_executor.execute(webpage, element_to_click=click, pre_clicks=preclicks, xhr_options=XHRBehavior.ObserveXHR, timeout=timeout)
        if click != None and deltapage == None:
            logging.info("Replay failed!")
        return errorcode, deltapage
Пример #5
0
 def _get_web_page_from_db(self,
                           current_session,
                           page_id=None,
                           url=None,
                           page=None):
     if page is None:
         if page_id is not None:
             page = self.pages.find_one({
                 "session": current_session,
                 "web_page_id": page_id
             })
         elif url is not None:
             page = self.pages.find_one({
                 "session": current_session,
                 "url": url
             })
         else:
             raise AttributeError(
                 "You must specifies either page_id or url")
         if page is None:
             return None
     clickables = self.get_all_clickables_to_page_id_from_db(
         current_session, page['web_page_id'])
     forms = self.get_all_forms_to_page_id_from_db(current_session,
                                                   page['web_page_id'])
     result = WebPage(page['web_page_id'], page['url'], page['html'], None,
                      page['current_depth'], page['base_url'])
     result.clickables = clickables
     result.forms = forms
     links = []
     for link in page['links']:
         links.append(self._parse_link_from_db(link))
     result.links = links
     timemimg_requests = []
     for request in page['timing_requests']:
         timemimg_requests.append(
             self.get_asyncrequest_to_id(current_session, request))
     result.timing_requests = timemimg_requests
     ajax = []
     for request in page['ajax_requests']:
         ajax.append(self.get_asyncrequest_to_id(current_session, request))
     result.ajax_requests = ajax
     return result
Пример #6
0
 def _login_and_return_webpage(self,
                               login_form,
                               page_with_login_form=None,
                               login_data=None,
                               login_clickable=None):
     if page_with_login_form is None:
         page_with_login_form = self._page_with_loginform_logged_out
     try:
         if login_clickable is not None:
             tmp_page = deepcopy(page_with_login_form)
             event_state, page_with_login_form = self._event_executor.execute(
                 tmp_page, element_to_click=login_clickable)
             if event_state == EventResult.ErrorWhileInitialLoading:
                 sleep(2000)
                 event_state, page_with_login_form = self._event_executor.execute(
                     tmp_page, element_to_click=login_clickable)
                 if event_state == EventResult.ErrorWhileInitialLoading:
                     logging.debug(
                         "Two time executing fails.. stop crawling")
                     return None
             self.domain_handler.complete_urls_in_page(page_with_login_form)
             self.domain_handler.analyze_urls(page_with_login_form)
             self.async_request_handler.handle_requests(
                 page_with_login_form)
         logging.debug("Start submitting login form...")
         response_code, html_after_timeouts, new_clickables, forms, links, timemimg_requests = self._form_handler.submit_form(
             login_form, page_with_login_form, login_data)
     except ValueError:
         return None
     #TODO: Put building of Webpage inside submit function
     page_after_login = WebPage(-1, page_with_login_form.url,
                                html_after_timeouts)
     page_after_login.clickables = new_clickables
     page_after_login.links = links
     page_after_login.timing_requests = timemimg_requests
     page_after_login.forms = forms
     self.domain_handler.complete_urls_in_page(page_after_login)
     self.domain_handler.analyze_urls(page_after_login)
     self.async_request_handler.handle_requests(page_after_login)
     return page_after_login
Пример #7
0
    def analyze(self, url_to_request, timeout=10, current_depth=None, method="GET", data={}):
        try:
            url_to_request = url_to_request.toString()
        except AttributeError:
            url_to_request = url_to_request

        logging.debug("Start analyzing the url {}...".format(url_to_request))
        self._timing_requests = []
        self._new_clickables = []
        self._timeming_events = []
        self._current_timeming_event = None
        self._loading_complete = False
        self._analyzing_finished = False
        self.response_code = {}
        if method == "GET":
            self.mainFrame().load(QUrl(url_to_request))
        else:
            request = self.make_request(url_to_request)
            data = self.post_data_to_array(data)
            request.setRawHeader("Content-Type", QByteArray("application/x-www-form-urlencoded"))
            self.mainFrame().load(request, QNetworkAccessManager.PostOperation, data)
        t = 0
        while not self._loading_complete and t < timeout:  # Waiting for finish processing
            self._wait(self.wait_for_processing)
            t += self.wait_for_processing

        videos = self.mainFrame().findAllElements("video")
        if len(videos) > 0:
            logging.debug("{} videos found... removing them")
            for video in videos:
                video.removeFromDocument()

        overall_waiting_time = t
        buffer = 250
        while len(self._timeming_events) > 0 and overall_waiting_time < timeout:
            self._current_timeming_event = self._timeming_events.pop(0)  # Take the first event(ordered by needed time
            self._waiting_for = self._current_timeming_event["event_type"]  # Setting kind of event
            waiting_time_in_milliseconds = (
                self._current_timeming_event["time"] - overall_waiting_time
            )  # Taking waiting time and convert it from milliseconds to seconds
            waiting_time_in_milliseconds = (waiting_time_in_milliseconds + buffer) / 1000.0
            if waiting_time_in_milliseconds < 0.0:
                waiting_time_in_milliseconds = 0
            self._wait(waiting_time_in_milliseconds)  # Waiting for 100 millisecond before expected event
            overall_waiting_time += waiting_time_in_milliseconds
        if overall_waiting_time < 0.5:
            self._wait((0.5 - overall_waiting_time))

        # Just for debugging
        # f = open("text.txt", "w")
        # f.write(self.mainFrame().toHtml())
        # f.close()
        base_url = self.mainFrame().findFirstElement("base")
        if base_url is not None:
            base_url = base_url.attribute("href")

        links, clickables = extract_links(self.mainFrame(), url_to_request)
        forms = extract_forms(self.mainFrame())
        elements_with_event_properties = property_helper(self.mainFrame())
        self.mainFrame().evaluateJavaScript(self._property_obs_js)
        self._wait(0.1)

        self._analyzing_finished = True
        html_after_timeouts = self.mainFrame().toHtml()
        response_url = self.mainFrame().url().toString()

        self.mainFrame().setHtml(None)
        self._new_clickables.extend(clickables)
        self._new_clickables.extend(elements_with_event_properties)
        self._new_clickables = purge_dublicates(self._new_clickables)
        response_code = None
        try:
            response_code = self.response_code[url_to_request]
        except KeyError:
            response_code = 200
        if response_code is None:
            response_code = 200
        try:
            current_page = WebPage(self.parent().get_next_page_id(), response_url, html_after_timeouts)
        except AttributeError:  # Attacker don't need this function...
            current_page = WebPage(42, response_url, html_after_timeouts)
        current_page.timing_requests = self._timing_requests
        current_page.clickables = self._new_clickables
        current_page.links = links
        current_page.forms = forms
        if base_url is not None and base_url != "":
            current_page.base_url = base_url
        return response_code, current_page
    def analyze(self,
                url_to_request,
                timeout=10,
                current_depth=None,
                method="GET",
                data={}):
        try:
            url_to_request = url_to_request.toString()
        except AttributeError:
            url_to_request = url_to_request

        logging.debug("Start analyzing the url {}...".format(url_to_request))
        self._timing_requests = []
        self._new_clickables = []
        self._timeming_events = []
        self._current_timeming_event = None
        self._loading_complete = False
        self._analyzing_finished = False
        self.response_code = {}
        if method == "GET":
            self.mainFrame().load(QUrl(url_to_request))
        else:
            request = self.make_request(url_to_request)
            data = self.post_data_to_array(data)
            request.setRawHeader(
                'Content-Type',
                QByteArray('application/x-www-form-urlencoded'))
            self.mainFrame().load(request, QNetworkAccessManager.PostOperation,
                                  data)
        t = 0
        while (not self._loading_complete
               and t < timeout):  # Waiting for finish processing
            self._wait(self.wait_for_processing)
            t += self.wait_for_processing

        videos = self.mainFrame().findAllElements("video")
        if len(videos) > 0:
            logging.debug("{} videos found... removing them")
            for video in videos:
                video.removeFromDocument()

        overall_waiting_time = t
        buffer = 250
        while len(
                self._timeming_events) > 0 and overall_waiting_time < timeout:
            self._current_timeming_event = self._timeming_events.pop(
                0)  # Take the first event(ordered by needed time
            self._waiting_for = self._current_timeming_event[
                'event_type']  # Setting kind of event
            waiting_time_in_milliseconds = (
                self._current_timeming_event["time"] - overall_waiting_time
            )  # Taking waiting time and convert it from milliseconds to seconds
            waiting_time_in_milliseconds = (
                (waiting_time_in_milliseconds + buffer) / 1000.0)
            if waiting_time_in_milliseconds < 0.0:
                waiting_time_in_milliseconds = 0
            self._wait(waiting_time_in_milliseconds
                       )  # Waiting for 100 millisecond before expected event
            overall_waiting_time += waiting_time_in_milliseconds
        if overall_waiting_time < 0.5:
            self._wait((0.5 - overall_waiting_time))

        # Just for debugging
        #f = open("text.txt", "w", encoding="utf-8")
        #f.write(self.mainFrame().toHtml())
        #f.close()
        base_url = self.mainFrame().findFirstElement("base")
        if base_url is not None:
            base_url = base_url.attribute("href")

        links, clickables = extract_links(self.mainFrame(), url_to_request)
        forms = extract_forms(self.mainFrame())
        elements_with_event_properties = property_helper(self.mainFrame())
        self.mainFrame().evaluateJavaScript(self._property_obs_js)
        self._wait(0.1)

        self._analyzing_finished = True
        html_after_timeouts = self.mainFrame().toHtml()
        response_url = self.mainFrame().url().toString()

        self.mainFrame().setHtml(None)
        self._new_clickables.extend(clickables)
        self._new_clickables.extend(elements_with_event_properties)
        self._new_clickables = purge_dublicates(self._new_clickables)
        response_code = None
        try:
            response_code = self.response_code[url_to_request]
        except KeyError:
            response_code = 200
        if response_code is None:
            response_code = 200
        try:
            current_page = WebPage(self.parent().get_next_page_id(),
                                   response_url, html_after_timeouts)
        except AttributeError:  #Attacker don't need this function...
            current_page = WebPage(42, response_url, html_after_timeouts)
        current_page.timing_requests = self._timing_requests
        current_page.clickables = self._new_clickables
        current_page.links = links
        current_page.forms = forms
        if base_url is not None and base_url != "":
            current_page.base_url = base_url
        return response_code, current_page
Пример #9
0
 def __init__(self, id, url = None, html = None, cookiesjar = None, depth = None, generator = None, parent_id = None, delta_depth = None, base_url = None):
     WebPage.__init__(self, id, url, html, cookiesjar, depth, base_url=base_url)
     self.generator = generator
     self.generator_requests = []
     self.parent_id = parent_id
     self.delta_depth = delta_depth
Пример #10
0
SESSION = 12345
WEBPAGE_ID = 99
TEST_URL1 = "http://example.com"
TEST_URL2 = "http://example.com/exmaple.php"
TEST_HTML = "<html><head></head><body></body></html>"
CLICKABLE = Clickable("click",
                      "a",
                      "body/div/div/a",
                      id="Test1",
                      html_class="Test2",
                      clickable_depth=243,
                      function_id="Test3")
WEBPAGE = WebPage(1,
                  url=TEST_URL1,
                  html=TEST_HTML,
                  cookiesjar=None,
                  depth=24,
                  base_url=TEST_URL2)
AJAXREQUEST = AjaxRequest("GET",
                          TEST_URL1,
                          CLICKABLE,
                          parameters=["test=Test"])


class DataBaseTests(unittest.TestCase):
    def setUp(self):
        self.database = Database("DataBaseUnit")

    def test_url_set_and_get(self):
        url = Url(TEST_URL1, depth_of_finding=3)
        self.database.insert_url_into_db(SESSION, url)