Пример #1
0
class DataBaseTests(unittest.TestCase):
    def setUp(self):
        self.database = Database("DataBaseUnit")

    def test_url_set_and_get(self):
        url = Url(TEST_URL1, depth_of_finding=3)
        self.database.insert_url_into_db(SESSION, url)
        url2 = self.database.get_next_url_for_crawling(SESSION)
        self.assertEqual(url, url2)
        self.assertEqual(url2.depth_of_finding, 3)

    def test_url_visit(self):
        url1 = Url(TEST_URL1, depth_of_finding=3)
        url2 = Url(TEST_URL2, depth_of_finding=25)

        self.database.insert_url_into_db(SESSION, url1)
        self.database.insert_url_into_db(SESSION, url2)

        url3 = self.database.get_next_url_for_crawling(SESSION)
        self.database.visit_url(SESSION, url3, 25, 200)
        url4 = self.database.get_next_url_for_crawling(SESSION)

        self.assertEqual(url1, url3)
        self.assertEqual(url2, url4)

    def test_url_set(self):
        url1 = Url(TEST_URL1, depth_of_finding=3)
        url2 = Url(TEST_URL2, depth_of_finding=25)

        self.database.insert_url_into_db(SESSION, url1)
        self.assertEqual(self.database.urls.count(), 1)
        self.database.insert_url_into_db(SESSION, url1)
        self.assertEqual(self.database.urls.count(), 1)
        self.database.insert_url_into_db(SESSION, url2)
        self.assertEqual(self.database.urls.count(), 2)

    def test_clickables(self):
        clickable1 = Clickable("click",
                               "a",
                               "body/div/div/a",
                               id="Test1",
                               html_class="Test2",
                               clickable_depth=243,
                               function_id="Test3")
        self.database._insert_clickable_into_db(SESSION, WEBPAGE_ID,
                                                clickable1)

        clickables = self.database.get_all_clickables_to_page_id_from_db(
            SESSION, WEBPAGE_ID)
        self.assertEqual(len(clickables), 1)
        self.assertEqual(clickable1, clickables[0])

        self.database.set_clickable_clicked(
            SESSION,
            WEBPAGE_ID,
            clickable1.dom_address,
            clickable1.event,
            clickable_depth=243,
            clickable_type=ClickableType.CreatesNewNavigatables)

        clickables = self.database.get_all_clickables_to_page_id_from_db(
            SESSION, WEBPAGE_ID)
        self.assertEqual(len(clickables), 1)
        clickable1.clicked = True
        clickable1.clickable_type = ClickableType.CreatesNewNavigatables
        self.assertEqual(clickable1, clickables[0])

    def test_webpage(self):
        clickable1 = Clickable("click",
                               "a",
                               "body/div/div/a",
                               id="Test1",
                               html_class="Test2",
                               clickable_depth=243,
                               function_id="Test3")
        web_page = WebPage(1,
                           url=TEST_URL1,
                           html=TEST_HTML,
                           cookiesjar=None,
                           depth=24,
                           base_url=TEST_URL2)
        web_page.clickables.extend([clickable1])
        self.database.insert_page_into_db(SESSION, web_page)
        web_page1 = self.database.get_webpage_to_id_from_db(SESSION, 1)
        self.assertEqual(web_page.toString(), web_page1.toString())
        web_page2 = self.database.get_webpage_to_url_from_db(
            SESSION, TEST_URL1)
        self.assertEqual(web_page.toString(), web_page2.toString())

    def test_form1(self):
        form_input1 = FormInput("INPUT",
                                "Username",
                                input_type="text",
                                values=None)
        form_input2 = FormInput("INPUT",
                                "Password",
                                input_type="password",
                                values=None)
        form = HtmlForm([form_input1, form_input2],
                        TEST_URL1,
                        "POST",
                        dom_address=None)

        self.database.insert_form(SESSION, form, WEBPAGE_ID)
        self.assertEqual(self.database.forms.count(), 1)
        form1 = self.database.get_all_forms_to_page_id_from_db(
            SESSION, WEBPAGE_ID)
        self.assertEqual(form, form1[0])
        self.assertEqual(form.toString(), form1[0].toString())

    def test_similar_forms(self):
        form_input1 = FormInput("INPUT",
                                "Test1",
                                input_type="text",
                                values=["Thomas"])
        form_input2 = FormInput("INPUT",
                                "Test2",
                                input_type="text",
                                values=["Mueller"])
        form = HtmlForm([form_input1, form_input2],
                        TEST_URL1,
                        "POST",
                        dom_address=None)
        self.database.insert_form(SESSION, form, WEBPAGE_ID)
        self.assertEqual(self.database.forms.count(), 1)

        form_input1 = FormInput("INPUT",
                                "Test1",
                                input_type="text",
                                values=["Edgar"])
        form_input2 = FormInput("INPUT",
                                "Test2",
                                input_type="text",
                                values=["Mueller"])
        form = HtmlForm([form_input1, form_input2],
                        TEST_URL1,
                        "POST",
                        dom_address=None)
        self.database.insert_form(SESSION, form, WEBPAGE_ID)
        self.assertEqual(self.database.forms.count(), 1)

        form_input1 = FormInput("INPUT",
                                "Test1",
                                input_type="text",
                                values=["Thomas, Edgar"])
        form_input2 = FormInput("INPUT",
                                "Test2",
                                input_type="text",
                                values=["Mueller"])
        form = HtmlForm([form_input1, form_input2],
                        TEST_URL1,
                        "POST",
                        dom_address=None)
        self.database.insert_form(SESSION, form, WEBPAGE_ID)
        self.assertEqual(self.database.forms.count(), 1)

        expected_form = HtmlForm([form_input1, form_input2],
                                 TEST_URL1,
                                 "POST",
                                 dom_address=None)
        form1 = self.database.get_all_forms_to_page_id_from_db(
            SESSION, WEBPAGE_ID)[0]
        self.assertEqual(form1.toString(), expected_form.toString())

    def test_not_similar_forms(self):
        form_input1 = FormInput("INPUT",
                                "Test1",
                                input_type="text",
                                values=["Thomas"])
        form_input2 = FormInput("INPUT",
                                "Test3",
                                input_type="text",
                                values=["Mueller"])
        form = HtmlForm([form_input1, form_input2],
                        TEST_URL1,
                        "POST",
                        dom_address=None)
        self.database.insert_form(SESSION, form, WEBPAGE_ID)
        self.assertEqual(self.database.forms.count(), 1)

        form_input1 = FormInput("INPUT",
                                "Test1",
                                input_type="text",
                                values=["Edgar"])
        form_input2 = FormInput("INPUT",
                                "Test2",
                                input_type="text",
                                values=["Mueller"])
        form = HtmlForm([form_input1, form_input2],
                        TEST_URL1,
                        "POST",
                        dom_address=None)
        self.database.insert_form(SESSION, form, WEBPAGE_ID)
        self.assertEqual(self.database.forms.count(), 2)

    def test_web_page_extend_ajax(self):
        web_page = deepcopy(WEBPAGE)
        clickable = deepcopy(CLICKABLE)
        web_page.clickables.extend([clickable])
        self.database.insert_page_into_db(SESSION, web_page)
        ajax = deepcopy(AJAXREQUEST)
        self.database.extend_ajax_requests_to_webpage(SESSION, web_page,
                                                      [ajax])

        web_page.ajax_requests = [ajax]
        test_page = self.database.get_webpage_to_url_from_db(
            SESSION, web_page.url)
        self.assertEqual(web_page.toString(), test_page.toString())
        self.assertEqual(web_page.ajax_requests[0], ajax)
class DatabaseManager(object):
    
    def __init__(self, user, dropping=True):
        self._database = Database(user.username, dropping)
        self._database.insert_user_into_db(user)
        self._web_page_cache = []
        self._deltapage_cache = []
        self._current_session = None
        self.MAX_CACHE_SIZE = 0
        self._current_session = user.session

    def return_session_id_to_username(self, username):
        return self._database.get_user_to_username(username)

    def store_web_page(self, web_page):
        if self.MAX_CACHE_SIZE > 0:
            if len(self._web_page_cache) + 1 > self.MAX_CACHE_SIZE:
                del self._web_page_cache[-1]
            self._web_page_cache.insert(0, web_page)
        self._database.insert_page_into_db(self._current_session, web_page)
    
    def get_page_to_id(self, page_id):
        page = self.get_web_page_to_id(page_id)
        if page is not None:
            return page
        page = self.get_delta_page_to_id(page_id)
        if page is not None:
            return page
        return None
    
    def store_delta_page(self, delta_page):
        if self.MAX_CACHE_SIZE > 0:
            if len(self._deltapage_cache) +1 > self.MAX_CACHE_SIZE:
                del self._deltapage_cache[-1]
            self._deltapage_cache.insert(0, delta_page)
        self._database.insert_delta_page_into_db(self._current_session, delta_page)

    def get_page_to_url(self, url):
        try:
            url = url.toString()
        except AttributeError:
            url = url
        
        return self._database.get_webpage_to_url_from_db(self._current_session, url)
    
    def get_web_page_to_id(self, page_id):
        for page in self._web_page_cache:
            if page_id == page.id:
                return page
        return self._database.get_webpage_to_id_from_db(self._current_session, page_id)
            
    
    def get_delta_page_to_id(self, delta_page_id):
        for page in self._deltapage_cache:
            if delta_page_id == page.id:
                return page
            
        return self._database.get_delta_page_to_id(self._current_session, delta_page_id)

    def url_exists(self, url):
        return self._database.url_exists(self._current_session, url)
    
    def get_next_url_for_crawling(self):
        return self._database.get_next_url_for_crawling(self._current_session)

    def get_all_unvisited_urls_sorted_by_hash(self):
        return self._database.get_all_unvisited_urls_sorted_by_hash(self._current_session)
    
    def insert_url_into_db(self, url):
        return self._database.insert_url_into_db(self._current_session, url)
    
    def insert_redirected_url(self, url):
        return self._database.insert_url_into_db(self._current_session, url, is_redirected_url=True)
        
    def visit_url(self, url, webpage_id, response_code, redirected_to = None):
        self._database.visit_url(self._current_session, url, webpage_id, response_code, redirected_to)
    
    def extend_ajax_requests_to_webpage(self, webpage, ajax_reuqests):
        self._database.extend_ajax_requests_to_webpage(self._current_session, webpage, ajax_reuqests)
    
    
    def get_all_crawled_delta_pages(self, url=None):
        return self._database.get_all_crawled_deltapages_to_url_from_db(self._current_session, url)
    
    
    def update_clickable(self, web_page_id, clickable):
        if clickable.clickable_type == ClickableType.IgnoredByCrawler or clickable.clickable_type == ClickableType.UnsupportedEvent:
            self._database.set_clickable_ignored(self._current_session, web_page_id, clickable.dom_address, clickable.event, clickable.clickable_depth, clickable.clickable_type)
        else:
            self._database.set_clickable_clicked(self._current_session, web_page_id, clickable.dom_address, clickable.event, clickable.clickable_depth, clickable.clickable_type, clickable.links_to)

    def get_url_structure(self, hash):
        return self._database.get_url_structure_from_db(self._current_session, hash)

    def insert_url_structure(self, url_description):
        self._database.insert_url_structure_into_db(self._current_session, url_description)

    def get_all_pages(self):
        return self._database.get_all_pages(self._current_session)

    def get_url_structure_to_hash(self, url_hash):
        return self._database.get_url_structure_from_db(self._current_session,url_hash)

    def insert_url_structure_into_db(self, url_description):
        self._database.insert_url_structure_into_db(self._current_session, url_description)

    def get_url_to_id(self, id):
        return self._database.get_url_to_id(self._current_session, id)

    def write_clusters(self, url_hash, clusters):
        self._database.write_cluster(self._current_session, url_hash, clusters)

    def get_clusters(self, url_hash):
        return self._database.get_clusters(self._current_session, url_hash)

    def count_visited_url_per_hash(self, url_hash):
        return self._database.count_visited_urls_per_hash(self._current_session, url_hash)

    def get_all_url_structures(self):
        return  self._database.get_all_url_structures(self._current_session)

    def get_all_visited_urls(self):
        return self._database.get_all_successfully_visited_urls(self._current_session)

    def get_one_visited_url_per_structure(self):
        return self._database.get_one_visited_url_per_structure(self._current_session)

    def insert_attack_result(self, result, attack_url):
        self._database.insert_attack_result(self._current_session, result, attack_url)

    def get_asyncrequest_structure(self, structure_hash=None):
        return self._database.get_asyncrequest_structure(self._current_session, structure_hash)

    def get_all_get_forms(self):
        return self._database.get_all_get_forms(self._current_session)

    def get_one_form_per_destination(self):
        return self._database.get_one_form_per_destination(self._current_session)

    def num_of_ignored_urls(self, url_hash):
        return self._database.num_of_ignored_urls(self._current_session, url_hash)

    def url_visited(self, url):
        return self._database.url_visited(self._current_session, url)

    def get_id_to_url(self, url):
        return self._database.get_id_to_url(self._current_session, url)

    def get_all_urls_to_domain(self, domain):
        return self._database.get_all_urls_to_domain(self._current_session, domain)