Пример #1
0
    def process_uri(self, uri):
        sql_driver = MySQLDriver(self.db_name)
        output_store = OutputStore(self.db_name)
        phantom_driver = PhantomDriver(
            '--ignore-ssl-errors=true --ssl-protocol=any', 'wbxr_logger.js')

        # this can be higher or lower depending on network load
        # generally, 90 seems to be fine, so keep with it
        try:
            phantom_output = phantom_driver.execute(uri, 90)
        except:
            print("\t\t%-50s Phantomjs Did Not Return." % uri[:50])
            sql_driver.log_error(uri, "FAIL: Phantomjs Did Not Return.")
            return

        if re.match('^FAIL.+', phantom_output):
            print("\t\t%-50s Phantom Error\n\t%s" % (uri[:50], phantom_output))
            sql_driver.log_error(uri, phantom_output)
        else:
            print("\t\t%-50s %s" %
                  (uri[:50], output_store.store(uri, phantom_output)))

        # closes our db connections
        sql_driver.close()
        output_store.close()
        return
Пример #2
0
	def process_uri(self, uri):
		sql_driver 		= MySQLDriver(self.db_name)
		output_store 	= OutputStore(self.db_name)
		phantom_driver 	= PhantomDriver('--ignore-ssl-errors=true --ssl-protocol=any', 'wbxr_logger.js')

		# this can be higher or lower depending on network load
		# generally, 90 seems to be fine, so keep with it
		try:
			phantom_output = phantom_driver.execute(uri, 90)
		except:
			print("\t\t%-50s Phantomjs Did Not Return." % uri[:50])
			sql_driver.log_error(uri, "FAIL: Phantomjs Did Not Return.")
			return	

		if re.match('^FAIL.+', phantom_output):
			print("\t\t%-50s Phantom Error\n\t%s" % (uri[:50], phantom_output))
			sql_driver.log_error(uri, phantom_output)
		else:
			print("\t\t%-50s %s" % (uri[:50], output_store.store(uri, phantom_output)))
	
		# closes our db connections
		sql_driver.close()
		output_store.close()
		return
Пример #3
0
    def store_result(self, params):
        """
		Handles storing task_result and removing jobs
			from the task_queue.
		"""

        # unpack params
        target = params['target']
        task = params['task']
        task_result = params['task_result']
        client_id = params['client_id']

        # client_ip is optional
        if 'client_ip' in params:
            client_ip = params['client_ip']
        else:
            client_ip = None

        # if db_name is specified we are running in server mode and we
        #	connect to the db which corresponds to the result being
        #	processed.  otherwise, we use the global db_name as we are
        #	running in non-server mode.
        if 'db_name' in params:
            if self.db_engine == 'sqlite':
                from webxray.SQLiteDriver import SQLiteDriver
                sql_driver = SQLiteDriver(params['db_name'])
            elif self.db_engine == 'postgres':
                from webxray.PostgreSQLDriver import PostgreSQLDriver
                sql_driver = PostgreSQLDriver(params['db_name'])
            else:
                print('INVALID DB ENGINE FOR %s, QUITTING!' % db_engine)
                quit()
            output_store = OutputStore(params['db_name'], self.db_engine)
        else:
            if self.db_engine == 'sqlite':
                from webxray.SQLiteDriver import SQLiteDriver
                sql_driver = SQLiteDriver(self.db_name)
            elif self.db_engine == 'postgres':
                from webxray.PostgreSQLDriver import PostgreSQLDriver
                sql_driver = PostgreSQLDriver(self.db_name)
            else:
                print('INVALID DB ENGINE FOR %s, QUITTING!' % db_engine)
                quit()

            output_store = OutputStore(self.db_name, self.db_engine)

        if task == 'get_policy':
            store_result = output_store.store_policy(task_result,
                                                     client_id,
                                                     client_ip=client_ip)
            # we never retry policies
            sql_driver.remove_task_from_queue(target, task)
            if store_result['success']:
                result = {'success': True}
            else:
                # log error
                sql_driver.log_error({
                    'client_id':
                    client_id,
                    'task':
                    task,
                    'target':
                    target,
                    'msg':
                    'output_store fail on ' + store_result['result']
                })
                result = {'success': False, 'result': store_result['result']}
        # elif task == 'get_crawl' or task == 'get_random_crawl':
        else:
            all_crawls_ok = True

            # We want to be able to re-run random crawls, and to do so we make sure
            #	the crawl_id will match
            if task == 'get_crawl' or task == 'get_scan':
                crawl_id = target
            elif task == 'get_random_crawl':
                crawl_id = []
                for result in task_result:
                    crawl_id.append(result['start_url'])
                crawl_id = json.dumps(crawl_id)

            # tweak to account for differences between scans/crawls
            if task == 'get_scan': task_result = [task_result]

            # keep track of domains
            all_3p_cookie_domains = set()
            all_3p_dom_storage_domains = set()
            all_3p_request_domains = set()
            all_3p_response_domains = set()
            all_3p_websocket_domains = set()

            # When we store a crawl we add optional fields in the page table
            #	that allow us to connect the page loads into a single crawl.
            #	the crawl_id is a hash of the target (which is a json string
            #	derived from the url_list), and the crawl_timestamp which is the
            #	first accessed time from the crawl.
            for crawl_sequence, result in enumerate(task_result):
                store_result = output_store.store_scan({
                    'browser_output':
                    result,
                    'client_id':
                    client_id,
                    'crawl_id':
                    crawl_id,
                    'crawl_timestamp':
                    task_result[0]['accessed'],
                    'crawl_sequence':
                    crawl_sequence,
                    'client_ip':
                    client_ip
                })

                if store_result['success'] != True:
                    all_crawls_ok = False
                else:
                    # we are successful, create entries in page_lookup table
                    page_lookup_table = self.build_lookup_table(
                        'page', store_result['page_id'], {
                            'requests':
                            store_result['page_3p_request_domains'],
                            'responses':
                            store_result['page_3p_response_domains'],
                            'websockets':
                            store_result['page_3p_websocket_domains'],
                            'dom_storage':
                            store_result['page_3p_dom_storage_domains'],
                            'cookies':
                            store_result['page_3p_dom_storage_domains']
                        })

                    for lookup_item in page_lookup_table:
                        sql_driver.add_page_id_domain_lookup_item(
                            page_lookup_table[lookup_item])

                    # we are also making a lookup table for the crawl, keep joing the
                    #	sets as we go along
                    all_3p_request_domains.update(
                        store_result['page_3p_request_domains'])
                    all_3p_response_domains.update(
                        store_result['page_3p_response_domains'])
                    all_3p_websocket_domains.update(
                        store_result['page_3p_websocket_domains'])
                    all_3p_dom_storage_domains.update(
                        store_result['page_3p_dom_storage_domains'])
                    all_3p_cookie_domains.update(
                        store_result['page_3p_dom_storage_domains'])

            if all_crawls_ok:
                sql_driver.remove_task_from_queue(target, task)
                result = {'success': True}

                # build crawl lookup table
                crawl_lookup_table = self.build_lookup_table(
                    'crawl', crawl_id, {
                        'requests': all_3p_request_domains,
                        'responses': all_3p_response_domains,
                        'websockets': all_3p_websocket_domains,
                        'dom_storage': all_3p_dom_storage_domains,
                        'cookies': all_3p_cookie_domains
                    })

                # patch lookup table
                for lookup_item in crawl_lookup_table:
                    sql_driver.add_crawl_id_domain_lookup_item(
                        crawl_lookup_table[lookup_item])

            else:
                sql_driver.unlock_task_in_queue(target, task)
                # log error
                sql_driver.log_error({
                    'client_id':
                    client_id,
                    'task':
                    task,
                    'target':
                    target,
                    'msg':
                    'output_store fail to store all scans for crawl_id_target '
                    + target
                })
                result = {
                    'success': False,
                    'result': 'unable to store all crawl loads'
                }

        # tidy up
        output_store.close()
        sql_driver.close()

        # done
        return result