Пример #1
0
 def test_add_crawl_job_to_db(self):
     crawl_job_id = dbu.add_crawl_job_to_db(self.crawl_job, self.db_conn) # insert mock obj to db
     crawl_job_row = dbu.get_crawl_job_from_db(self.db_conn, by='id', value=crawl_job_id)[0] # retrieve inserted obj
     self.assert_db_val_equal(crawl_job_row, 'id', crawl_job_id)
     self.assert_db_val_equal(crawl_job_row, 'num_crawl_urls', self.crawl_job.num_crawl_urls)
     self.assert_db_val_equal(crawl_job_row, 'job_dir_path', self.crawl_job.job_dir)
     self.assert_db_val_equal(crawl_job_row, 'browser_user_agent', self.crawl_job.crawl_agent.user_agent_str)
     self.assert_db_val_equal(crawl_job_row, 'browser_type', self.crawl_job.crawl_agent.type)
     self.assert_db_val_equal(crawl_job_row, 'max_parallel_procs', self.crawl_job.max_parallel_procs)
     self.assert_db_val_equal(crawl_job_row, 'desc', self.crawl_job.desc)
Пример #2
0
 def test_add_crawl_job_to_db(self):
     crawl_job_id = dbu.add_crawl_job_to_db(self.crawl_job, self.db_conn) # insert mock obj to db
     crawl_job_row = dbu.get_crawl_job_from_db(self.db_conn, by='id', value=crawl_job_id)[0] # retrieve inserted obj
     self.assert_db_val_equal(crawl_job_row, 'id', crawl_job_id)
     self.assert_db_val_equal(crawl_job_row, 'num_crawl_urls', self.crawl_job.num_crawl_urls)
     self.assert_db_val_equal(crawl_job_row, 'job_dir_path', self.crawl_job.job_dir)
     self.assert_db_val_equal(crawl_job_row, 'browser_user_agent', self.crawl_job.crawl_agent.user_agent_str)
     self.assert_db_val_equal(crawl_job_row, 'browser_type', self.crawl_job.crawl_agent.type)
     self.assert_db_val_equal(crawl_job_row, 'max_parallel_procs', self.crawl_job.max_parallel_procs)
     self.assert_db_val_equal(crawl_job_row, 'desc', self.crawl_job.desc)
Пример #3
0
 def __init__(self, agent):
     self.job_dir = create_job_folder()
     self.desc = ''
     self.urls = []
     self.url_tuples = []
     self.num_crawl_urls = 0
     self.max_parallel_procs = MAX_PARALLEL_PROCESSES
     self.crawl_agent = agent
     self.crawl_agent.crawl_job = self
     self.crawl_agent.job_dir = self.job_dir # for passing to multiprocessing worker - should find a better way
     self.index_html_log = os.path.join(self.crawl_agent.job_dir, 'index.html')
     self.db_conn = dbu.mysql_init_db('fp_detective')
     self.crawl_id = dbu.add_crawl_job_to_db(self, self.db_conn)
     self.crawl_agent.crawl_id = self.crawl_id
Пример #4
0
 def __init__(self, agent):
     self.job_dir = create_job_folder()
     self.desc = ''
     self.urls = []
     self.url_tuples = []
     self.num_crawl_urls = 0
     self.max_parallel_procs = MAX_PARALLEL_PROCESSES
     self.crawl_agent = agent
     self.crawl_agent.crawl_job = self
     self.crawl_agent.job_dir = self.job_dir # for passing to multiprocessing worker - should find a better way
     self.index_html_log = os.path.join(self.crawl_agent.job_dir, 'index.html')
     self.db_conn = dbu.mysql_init_db('fp_detective')
     self.crawl_id = dbu.add_crawl_job_to_db(self, self.db_conn)
     self.crawl_agent.crawl_id = self.crawl_id