示例#1
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
                SELECT         
                localStorage.id,
                crawl.crawl_id,
                task.task_id,
                task.start_time,
                task.manager_params,
                task.openwpm_version,
                task.browser_version,
                crawl.browser_params,
                crawl.screen_res,
                crawl.ua_string,
                crawl.finished,
                crawl.start_time,
                localStorage.page_url,
                localStorage.scope,
                localStorage.KEY,
                localStorage.value
                FROM localStorage
                INNER JOIN crawl ON localStorage.crawl_id = crawl.crawl_id
                INNER JOIN task ON crawl.task_id = task.task_id                        
                """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[2]
            task_model.start_time = row[3]
            task_model.manager_params = row[4]
            task_model.openwpm_version = row[5]
            task_model.browser_version = row[6]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[1]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[7]
            crawl_model.screen_res = row[8]
            crawl_model.ua_string = row[9]
            crawl_model.finished = row[10]
            crawl_model.start_time = row[11]

            # flash cookie model
            local_storage_model = LocalStorageModel()
            local_storage_model.id = row[0]
            local_storage_model.crawl_id = crawl_model
            local_storage_model.page_url = row[12]
            local_storage_model.scope = row[13]
            local_storage_model.KEY = row[14]
            local_storage_model.value = row[15]

            model_list.append(local_storage_model)
        cursor.close()
        self.disconnect()
        return model_list
示例#2
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
        SELECT         
        crawl.crawl_id,
        task.task_id,
        task.start_time,
        task.manager_params,
        task.openwpm_version,
        task.browser_version,
        crawl.browser_params,
        crawl.screen_res,
        crawl.ua_string,
        crawl.finished,
        crawl.start_time,
        CrawlHistory.command,
        CrawlHistory.arguments,
        CrawlHistory.bool_success,
        CrawlHistory.dtg
        FROM CrawlHistory
        INNER JOIN crawl ON CrawlHistory.crawl_id = crawl.crawl_id
        INNER JOIN task ON crawl.task_id = task.task_id        
        """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[1]
            task_model.start_time = row[2]
            task_model.manager_params = row[3]
            task_model.openwpm_version = row[4]
            task_model.browser_version = row[5]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[0]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[6]
            crawl_model.screen_res = row[7]
            crawl_model.ua_string = row[8]
            crawl_model.finished = row[9]
            crawl_model.start_time = row[10]

            # crawl history model
            crawl_history_model = CrawlHistoryModel()
            crawl_history_model.crawl_id = crawl_model
            crawl_history_model.command = row[11]
            crawl_history_model.arguments = row[12]
            crawl_history_model.bool_success = row[13]
            crawl_history_model.timestamp = row[14]
            model_list.append(crawl_history_model)

        cursor.close()
        self.disconnect()
        return model_list
示例#3
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
        SELECT         
        site_visits.visit_id,
        crawl.crawl_id,
        task.task_id,
        task.start_time,
        task.manager_params,
        task.openwpm_version,
        task.browser_version,
        crawl.browser_params,
        crawl.screen_res,
        crawl.ua_string,
        crawl.finished,
        crawl.start_time,
        site_visits.site_url
        FROM site_visits
        INNER JOIN crawl ON site_visits.crawl_id = crawl.crawl_id
        INNER JOIN task ON crawl.task_id = task.task_id        
        """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[2]
            task_model.start_time = row[3]
            task_model.manager_params = row[4]
            task_model.openwpm_version = row[5]
            task_model.browser_version = row[6]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[1]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[7]
            crawl_model.screen_res = row[8]
            crawl_model.ua_string = row[9]
            crawl_model.finished = row[10]
            crawl_model.start_time = row[11]

            # crawl history model
            site_visits_model = SiteVisitsModel()
            site_visits_model.visit_id = row[0]
            site_visits_model.crawl_id = crawl_model
            site_visits_model.site_url = row[12]
            model_list.append(site_visits_model)

        cursor.close()
        self.disconnect()
        return model_list
示例#4
0
 def select(self):
     model_list = []
     self.connect()
     cursor = self.connection.cursor()
     query = "SELECT * FROM task"
     cursor.execute(query)
     rows = cursor.fetchall()
     for row in rows:
         model = TaskModel()
         model.task_id = row[0]
         model.start_time = row[1]
         model.manager_params = row[2]
         model.openwpm_version = row[3]
         model.browser_version = row[4]
         model_list.append(model)
     cursor.close()
     self.disconnect()
     return model_list
示例#5
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
        SELECT 
        crawl.crawl_id,
        task.task_id,
        task.start_time,
        task.manager_params,
        task.openwpm_version,
        task.browser_version,
        crawl.browser_params,
        crawl.screen_res,
        crawl.ua_string,
        crawl.finished,
        crawl.start_time
        FROM crawl
        INNER JOIN task on crawl.task_id = task.task_id
        """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[1]
            task_model.start_time = row[2]
            task_model.manager_params = row[3]
            task_model.openwpm_version = row[4]
            task_model.browser_version = row[5]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[0]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[6]
            crawl_model.screen_res = row[7]
            crawl_model.ua_string = row[8]
            crawl_model.finished = row[9]
            crawl_model.start_time = row[10]
            model_list.append(crawl_model)
        cursor.close()
        self.disconnect()
        return model_list
示例#6
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
                SELECT         
                profile_cookies.id,
                crawl.crawl_id,
                task.task_id,
                task.start_time,
                task.manager_params,
                task.openwpm_version,
                task.browser_version,
                crawl.browser_params,
                crawl.screen_res,
                crawl.ua_string,
                crawl.finished,
                crawl.start_time,
                site_visits.visit_id,
                site_visits.site_url,
                profile_cookies.baseDomain,
                profile_cookies.name,
                profile_cookies.value,
                profile_cookies.host,
                profile_cookies.path,
                profile_cookies.expiry,
                profile_cookies.accessed,
                profile_cookies.creationTime,
                profile_cookies.isSecure,
                profile_cookies.isHttpOnly
                FROM profile_cookies
                INNER JOIN crawl ON profile_cookies.crawl_id = crawl.crawl_id
                INNER JOIN task ON crawl.task_id = task.task_id
                INNER JOIN site_visits ON profile_cookies.visit_id = site_visits.visit_id        
                """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[2]
            task_model.start_time = row[3]
            task_model.manager_params = row[4]
            task_model.openwpm_version = row[5]
            task_model.browser_version = row[6]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[1]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[7]
            crawl_model.screen_res = row[8]
            crawl_model.ua_string = row[9]
            crawl_model.finished = row[10]
            crawl_model.start_time = row[11]

            # site visits model
            site_visits_model = SiteVisitsModel()
            site_visits_model.visit_id = row[12]
            site_visits_model.crawl_id = crawl_model
            site_visits_model.site_url = row[13]

            # flash cookie model
            profile_cookies_model = ProfileCookiesModel()
            profile_cookies_model.id = row[0]
            profile_cookies_model.crawl_id = crawl_model
            profile_cookies_model.visit_id = site_visits_model
            profile_cookies_model.baseDomain = row[14]
            profile_cookies_model.name = row[15]
            profile_cookies_model.value = row[16]
            profile_cookies_model.host = row[17]
            profile_cookies_model.path = row[18]
            profile_cookies_model.expiry = row[19]
            profile_cookies_model.accessed = row[20]
            profile_cookies_model.creationTime = row[21]
            profile_cookies_model.isSecure = row[22]
            profile_cookies_model.isHttpOnly = row[23]

            model_list.append(profile_cookies_model)
        cursor.close()
        self.disconnect()
        return model_list
示例#7
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
                SELECT         
                javascript.id,
                crawl.crawl_id,
                task.task_id,
                task.start_time,
                task.manager_params,
                task.openwpm_version,
                task.browser_version,
                crawl.browser_params,
                crawl.screen_res,
                crawl.ua_string,
                crawl.finished,
                crawl.start_time,
                site_visits.visit_id,
                site_visits.site_url,
                javascript.script_url,
                javascript.script_line,
                javascript.script_col,
                javascript.func_name,
                javascript.script_loc_eval,
                javascript.call_stack,
                javascript.symbol,
                javascript.operation,
                javascript.value,
                javascript.arguments,
                javascript.time_stamp
                FROM javascript
                INNER JOIN crawl ON javascript.crawl_id = crawl.crawl_id
                INNER JOIN task ON crawl.task_id = task.task_id
                INNER JOIN site_visits ON javascript.visit_id = site_visits.visit_id        
                """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[2]
            task_model.start_time = row[3]
            task_model.manager_params = row[4]
            task_model.openwpm_version = row[5]
            task_model.browser_version = row[6]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[1]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[7]
            crawl_model.screen_res = row[8]
            crawl_model.ua_string = row[9]
            crawl_model.finished = row[10]
            crawl_model.start_time = row[11]

            # site visits model
            site_visits_model = SiteVisitsModel()
            site_visits_model.visit_id = row[12]
            site_visits_model.crawl_id = crawl_model
            site_visits_model.site_url = row[13]

            # flash cookie model
            javascript_model = JavascriptModel()
            javascript_model.id = row[0]
            javascript_model.crawl_id = crawl_model
            javascript_model.visit_id = site_visits_model
            javascript_model.script_url = row[14]
            javascript_model.script_line = row[15]
            javascript_model.script_col = row[16]
            javascript_model.func_name = row[17]
            javascript_model.script_loc_eval = row[18]
            javascript_model.call_stack = row[19]
            javascript_model.symbol = row[20]
            javascript_model.operation = row[21]
            javascript_model.value = row[22]
            javascript_model.arguments = row[23]
            javascript_model.time_stamp = row[24]

            model_list.append(javascript_model)
        cursor.close()
        self.disconnect()
        return model_list
示例#8
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
                SELECT         
                http_requests.id,
                crawl.crawl_id,
                task.task_id,
                task.start_time,
                task.manager_params,
                task.openwpm_version,
                task.browser_version,
                crawl.browser_params,
                crawl.screen_res,
                crawl.ua_string,
                crawl.finished,
                crawl.start_time,
                site_visits.visit_id,
                site_visits.site_url,
                http_requests.url,
                http_requests.top_level_url,
                http_requests.method,
                http_requests.referrer,
                http_requests.headers,
                http_requests.channel_id,
                http_requests.is_XHR,
                http_requests.is_frame_load,
                http_requests.is_full_page,
                http_requests.is_third_party_channel,
                http_requests.is_third_party_window,
                http_requests.triggering_origin,
                http_requests.loading_origin,
                http_requests.loading_href,
                http_requests.req_call_stack,
                http_requests.content_policy_type,
                http_requests.post_body,
                http_requests.time_stamp
                FROM http_requests
                INNER JOIN crawl ON http_requests.crawl_id = crawl.crawl_id
                INNER JOIN task ON crawl.task_id = task.task_id
                INNER JOIN site_visits ON http_requests.visit_id = site_visits.visit_id        
                """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[2]
            task_model.start_time = row[3]
            task_model.manager_params = row[4]
            task_model.openwpm_version = row[5]
            task_model.browser_version = row[6]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[1]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[7]
            crawl_model.screen_res = row[8]
            crawl_model.ua_string = row[9]
            crawl_model.finished = row[10]
            crawl_model.start_time = row[11]

            # site visits model
            site_visits_model = SiteVisitsModel()
            site_visits_model.visit_id = row[12]
            site_visits_model.crawl_id = crawl_model
            site_visits_model.site_url = row[13]

            # flash cookie model
            http_requests_model = HttpRequestsModel()
            http_requests_model.id = row[0]
            http_requests_model.crawl_id = crawl_model
            http_requests_model.visit_id = site_visits_model
            http_requests_model.url = row[14]
            http_requests_model.top_level_url = row[15]
            http_requests_model.method = row[16]
            http_requests_model.referrer = row[17]
            http_requests_model.headers = row[18]
            http_requests_model.channel_id = row[19]
            http_requests_model.is_XHR = row[20]
            http_requests_model.is_frame_load = row[21]
            http_requests_model.is_full_page = row[22]
            http_requests_model.is_third_party_channel = row[23]
            http_requests_model.is_third_party_window = row[24]
            http_requests_model.triggering_origin = row[25]
            http_requests_model.loading_origin = row[26]
            http_requests_model.loading_href = row[27]
            http_requests_model.req_call_stack = row[28]
            http_requests_model.content_policy_type = row[29]
            http_requests_model.post_body = row[30]
            http_requests_model.time_stamp = row[31]

            model_list.append(http_requests_model)
        cursor.close()
        self.disconnect()
        return model_list
示例#9
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
                SELECT         
                flash_cookies.id,
                crawl.crawl_id,
                task.task_id,
                task.start_time,
                task.manager_params,
                task.openwpm_version,
                task.browser_version,
                crawl.browser_params,
                crawl.screen_res,
                crawl.ua_string,
                crawl.finished,
                crawl.start_time,
                site_visits.visit_id,
                site_visits.site_url,
                flash_cookies.domain,
                flash_cookies.filename,
                flash_cookies.local_path,
                flash_cookies.key,
                flash_cookies.content
                FROM flash_cookies
                INNER JOIN crawl ON flash_cookies.crawl_id = crawl.crawl_id
                INNER JOIN task ON crawl.task_id = task.task_id
                INNER JOIN site_visits ON flash_cookies.visit_id = site_visits.visit_id        
                """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[2]
            task_model.start_time = row[3]
            task_model.manager_params = row[4]
            task_model.openwpm_version = row[5]
            task_model.browser_version = row[6]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[1]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[7]
            crawl_model.screen_res = row[8]
            crawl_model.ua_string = row[9]
            crawl_model.finished = row[10]
            crawl_model.start_time = row[11]

            # site visits model
            site_visits_model = SiteVisitsModel()
            site_visits_model.visit_id = row[12]
            site_visits_model.crawl_id = crawl_model
            site_visits_model.site_url = row[13]

            # flash cookie model
            flash_cookie_model = FlashCookiesModel()
            flash_cookie_model.id = row[0]
            flash_cookie_model.crawl_id = crawl_model
            flash_cookie_model.visit_id = site_visits_model
            flash_cookie_model.domain = row[14]
            flash_cookie_model.filename = row[15]
            flash_cookie_model.local_path = row[16]
            flash_cookie_model.key = row[17]
            flash_cookie_model.content = row[18]

            model_list.append(flash_cookie_model)
        cursor.close()
        self.disconnect()
        return model_list
示例#10
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
                SELECT         
                http_responses.id,
                crawl.crawl_id,
                task.task_id,
                task.start_time,
                task.manager_params,
                task.openwpm_version,
                task.browser_version,
                crawl.browser_params,
                crawl.screen_res,
                crawl.ua_string,
                crawl.finished,
                crawl.start_time,
                site_visits.visit_id,
                site_visits.site_url,
                http_responses.url,
                http_responses.method,
                http_responses.referrer,
                http_responses.response_status,
                http_responses.response_status_text,
                http_responses.is_cached,
                http_responses.headers,
                http_responses.channel_id,
                http_responses.location,
                http_responses.time_stamp,
                http_responses.content_hash
                FROM http_responses
                INNER JOIN crawl ON http_responses.crawl_id = crawl.crawl_id
                INNER JOIN task ON crawl.task_id = task.task_id
                INNER JOIN site_visits ON http_responses.visit_id = site_visits.visit_id        
                """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[2]
            task_model.start_time = row[3]
            task_model.manager_params = row[4]
            task_model.openwpm_version = row[5]
            task_model.browser_version = row[6]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[1]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[7]
            crawl_model.screen_res = row[8]
            crawl_model.ua_string = row[9]
            crawl_model.finished = row[10]
            crawl_model.start_time = row[11]

            # site visits model
            site_visits_model = SiteVisitsModel()
            site_visits_model.visit_id = row[12]
            site_visits_model.crawl_id = crawl_model
            site_visits_model.site_url = row[13]

            # flash cookie model
            http_responses_model = HttpResponsesModel()
            http_responses_model.id = row[0]
            http_responses_model.crawl_id = crawl_model
            http_responses_model.visit_id = site_visits_model
            http_responses_model.url = row[14]
            http_responses_model.method = row[15]
            http_responses_model.referrer = row[16]
            http_responses_model.response_status = row[17]
            http_responses_model.response_status_text = row[18]
            http_responses_model.is_cached = row[19]
            http_responses_model.headers = row[20]
            http_responses_model.channel_id = row[21]
            http_responses_model.location = row[22]
            http_responses_model.time_stamp = row[23]
            http_responses_model.content_hash = row[24]

            model_list.append(http_responses_model)
        cursor.close()
        self.disconnect()
        return model_list