def test_api_crawl_contents(self): """ Test to get crawl contents using an API end-point. """ with patch('api_app.views.get_google_cloud_manifest_contents' ) as mock_cloud: mock_cloud.return_value = "Test" user = User.objects.create_user('testUser2', '*****@*****.**', 'testpassword') crawl_request = CrawlRequest(user=user) crawl_request.storage_location = "abc/def" crawl_request.save() id = crawl_request.id client = APIClient() response_auth = client.post(reverse('authenticate_user'), { 'username': '******', 'password': '******' }, format="json") access_token = response_auth.data['token'] self.assertNotEqual('', access_token) client.credentials(HTTP_AUTHORIZATION='Bearer ' + access_token) params = {'JOB_ID': id, 'complete_crawl': 0} response_get = client.get(reverse('api_crawl_contents'), params) self.assertNotEqual('', response_get.data["crawl_contents"])
def test_crawl_complete(self): """ Test for crawl complete api. """ with patch('main_app.views.requests.post') as mock_post: user = User.objects.create_user('testUser4', '*****@*****.**', 'testpassword') client = APIClient() response_auth = client.post(reverse('authenticate_user'), { 'username': '******', 'password': '******' }, format="json") access_token = response_auth.data['token'] self.assertNotEqual('', access_token) client.credentials(HTTP_AUTHORIZATION='Bearer ' + access_token) crawl_request = CrawlRequest(user=user) crawl_request.urls = "http://abc.com" crawl_request.save() User.objects.create_user('admin' + str(crawl_request.id), '*****@*****.**', 'testpassword') data = { 'job_id': crawl_request.id, 'manifest': 'http://abc.com', 'csv': 'http://def.com', 'resources_count': 20, 'uploaded_pages': 20, 'time_taken': 150 } response = client.post(reverse('api_complete_crawl'), data, format="json") payload = json.loads(response.content) self.assertEqual("done", payload["CrawlComplete"])
def test_register_crawler_manager(self): """ Test to check registration of crawler manager. """ user = User.objects.create_user('testUser3', '*****@*****.**', 'testpassword') client = APIClient() response_auth = client.post(reverse('authenticate_user'), { 'username': '******', 'password': '******' }, format="json") access_token = response_auth.data['token'] self.assertNotEqual('', access_token) client.credentials(HTTP_AUTHORIZATION='Bearer ' + access_token) crawl_request = CrawlRequest(user=user) crawl_request.urls = "http://abc.com" crawl_request.save() data = {'job_id': crawl_request.id, 'endpoint': 'http://def.com'} response = client.post(reverse('api_register_crawler_manager'), data, format="json") payload = json.loads(response.content) self.assertEqual("http://abc.com", payload["urls"][0])
def test_job_details(self): """ Test to get the job details. """ user = User.objects.create_user('testJobDetails', '*****@*****.**', 'testpassword') crawl_request = CrawlRequest(user=user) crawl_request.urls = "http://abc.com" crawl_request.save() client = Client() client.login(username='******', password='******') response = client.get( reverse('mainapp_jobdetails', kwargs={'job_id': crawl_request.id})) self.assertEqual(response.status_code, 200) self.assertContains(response, 'Job Details for crawl job')
def test_crawl_contents(self): """ Test to get the crawl contents. """ with patch('main_app.views.get_google_cloud_manifest_contents' ) as mock_cloud: mock_cloud.return_value = b'Test' user = User.objects.create_user('testCrawlContents', '*****@*****.**', 'testpassword') crawl_request = CrawlRequest(user=user) crawl_request.storage_location = "abc/def" crawl_request.save() id = crawl_request.id client = Client() client.login(username='******', password='******') response = client.get( reverse('mainapp_crawlcontents', kwargs={'job_id': crawl_request.id})) self.assertEqual(response.status_code, 200)
def api_create_crawl(request): """ External api to submit a crawl request. """ logger.info('In api new job') username = request.data['username'] user_obj = User.objects.get(username=username) crawl_request = CrawlRequest(user=user_obj) crawl_request.name = request.data['name'] crawl_request.domain = request.data['domain'] crawl_request.urls = request.data['urls'] crawl_request.save() logger.info('NewJob created: %s', crawl_request.id) logger.info('Received urls: %s', crawl_request.urls) launch_crawler_manager(crawl_request, crawl_request.id) payload = {} payload['jobId'] = crawl_request.id return Response(payload, status=status.HTTP_200_OK)
def test_api_get_job_status(self): """ Test to get status of a job using an API end-point. """ user = User.objects.create_user('testStatusUser', '*****@*****.**', 'testpassword') client = APIClient() crawl_request = CrawlRequest(user=user) crawl_request.name = 'test' crawl_request.urls = "http://abc.com" crawl_request.save() data = {'job_id': crawl_request.id} response = client.post(reverse('api_job_status'), data, format="json") payload = json.loads(response.content) self.assertEqual("test", payload["name"])
def test_update_job_status(self): """ Test for getting the status of a job from manager and updating it in sql. """ with patch('main_app.utilities.requests.get') as mock_get: user = User.objects.create_user('testUpdateStatus', '*****@*****.**', 'testpassword') crawl_request = CrawlRequest(user=user) crawl_request.crawler_manager_endpoint = "http://abc.com" crawl_request.name = 'test_update_status' crawl_request.save() mock_response = mock.Mock() resp_string = '{"job_id":' + str( crawl_request.id) + ', "processed_count":10}' mock_response.text = resp_string mock_get.return_value = mock_response utilities.update_job_status(crawl_request) crawl_request2 = CrawlRequest(user=user) crawl_request2.crawler_manager_endpoint = "http://abc.com" crawl_request2.name = 'test_update_status_second' crawl_request2.save() utilities.update_job_status(crawl_request)
def test_crawl_request_model(self): """ Test for the Crawl Request model. """ user = User.objects.create_user('testCrawlRequestUser', '*****@*****.**', 'testpassword') crawl_instance = CrawlRequest(user=user) crawl_instance.name = 'crawl_model_test' crawl_instance.type = 1 crawl_instance.domain = 'http://abc.com' crawl_instance.urls = 'http://url.com' crawl_instance.description = 'Test Crawl Request' crawl_instance.docs_all = True crawl_instance.docs_html = False crawl_instance.docs_docx = False crawl_instance.docs_pdf = False crawl_instance.docs_collected = 10 crawl_instance.status = 1 crawl_instance.storage_location = 'http://storage.com' crawl_instance.crawler_manager_endpoint = 'http://end.com' crawl_instance.manifest = 'http://manifest.com' crawl_instance.num_crawlers = 1 crawl_instance.save() crawl_instance.get_absolute_url() crawl_instances = CrawlRequest.objects.filter(name='crawl_model_test') self.assertEqual(1, len(crawl_instances)) self.assertEqual("1 crawl_model_test", str(crawl_instances[0]))