def test_add_crawl_job_to_db(self): crawl_job_id = dbu.add_crawl_job_to_db(self.crawl_job, self.db_conn) # insert mock obj to db crawl_job_row = dbu.get_crawl_job_from_db(self.db_conn, by='id', value=crawl_job_id)[0] # retrieve inserted obj self.assert_db_val_equal(crawl_job_row, 'id', crawl_job_id) self.assert_db_val_equal(crawl_job_row, 'num_crawl_urls', self.crawl_job.num_crawl_urls) self.assert_db_val_equal(crawl_job_row, 'job_dir_path', self.crawl_job.job_dir) self.assert_db_val_equal(crawl_job_row, 'browser_user_agent', self.crawl_job.crawl_agent.user_agent_str) self.assert_db_val_equal(crawl_job_row, 'browser_type', self.crawl_job.crawl_agent.type) self.assert_db_val_equal(crawl_job_row, 'max_parallel_procs', self.crawl_job.max_parallel_procs) self.assert_db_val_equal(crawl_job_row, 'desc', self.crawl_job.desc)
def __init__(self, agent): self.job_dir = create_job_folder() self.desc = '' self.urls = [] self.url_tuples = [] self.num_crawl_urls = 0 self.max_parallel_procs = MAX_PARALLEL_PROCESSES self.crawl_agent = agent self.crawl_agent.crawl_job = self self.crawl_agent.job_dir = self.job_dir # for passing to multiprocessing worker - should find a better way self.index_html_log = os.path.join(self.crawl_agent.job_dir, 'index.html') self.db_conn = dbu.mysql_init_db('fp_detective') self.crawl_id = dbu.add_crawl_job_to_db(self, self.db_conn) self.crawl_agent.crawl_id = self.crawl_id