Пример #1
0
    def testVerifyFromTagasauris(self):
        job = Job.objects.create_active(
            account=self.user.get_profile(),
            gold_samples=json.dumps([{'url': 'google.com', 'label': LABEL_YES}]),
            same_domain_allowed=2,
            no_of_urls=10,
        )

        worker_id = '1234'

        # Verifying first url (and adding)
        newest_url = 'google.com/1'
        data = {
            'url': newest_url,
            'worker_id': worker_id,
        }

        resp = self.c.post('%ssample/add/tagasauris/%s/?format=json'
            % (self.api_url, job.id), json.dumps(data), "text/json")
        self.assertEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('result' in resp_dict.keys())
        self.assertTrue('all' in resp_dict.keys())

        self.assertEqual('added', resp_dict['result'])
        self.assertEqual(False, resp_dict['all'])

        self.assertEqual(Sample.objects.filter(
            job=job, url=Sample.sanitize_url(newest_url)).count(), 1)

        # This time verification should fail becaufe of too many urls from same
        # domain
        newest_url = 'google.com/2'
        data = {
            'url': newest_url,
            'worker_id': worker_id,
        }

        resp = self.c.post('%ssample/add/tagasauris/%s/?format=json'
            % (self.api_url, job.id), json.dumps(data), "text/json")
        self.assertEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('result' in resp_dict.keys())
        self.assertTrue('all' in resp_dict.keys())

        self.assertEqual('domain duplicate', resp_dict['result'])
        self.assertEqual(False, resp_dict['all'])

        self.assertEqual(Sample.objects.filter(
            job=job, url=Sample.sanitize_url(newest_url)).count(), 0)

        # This time verification should fail becaufe of duplicated url (look at
        # golden sample)
        newest_url = 'google.com'
        data = {
            'url': newest_url,
            'worker_id': worker_id,
        }

        resp = self.c.post('%ssample/add/tagasauris/%s/?format=json'
            % (self.api_url, job.id), json.dumps(data), "text/json")
        self.assertEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('result' in resp_dict.keys())
        self.assertTrue('all' in resp_dict.keys())

        self.assertEqual('duplicate', resp_dict['result'])
        self.assertEqual(False, resp_dict['all'])

        self.assertEqual(Sample.objects.filter(
            job=job, url=Sample.sanitize_url(newest_url)).count(), 1)
Пример #2
0
    def testVerifyFromTagasaurisErrors(self):
        job = Job.objects.create_active(
            account=self.user.get_profile(),
            gold_samples=json.dumps([{'url': 'google.com', 'label': LABEL_YES}]),
            same_domain_allowed=2,
            no_of_urls=10,
        )

        worker_id = '1234'

        # Error on not existing job.
        newest_url = 'google.com/1'
        data = {
            'url': newest_url,
            'worker_id': worker_id,
        }

        resp = self.c.post('%ssample/add/tagasauris/%s/?format=json'
            % (self.api_url, 1234567), json.dumps(data), "text/json")
        self.assertNotEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('error' in resp_dict.keys())
        self.assertEqual(Sample.objects.filter(
            url=Sample.sanitize_url(newest_url)).count(), 0)

        # Error on wrong post data (not json).
        newest_url = 'google.com/1'
        data = {
            'url': newest_url,
            'worker_id': worker_id,
        }

        resp = self.c.post('%ssample/add/tagasauris/%s/?format=json'
            % (self.api_url, job.id), data)
        self.assertNotEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('error' in resp_dict.keys())
        self.assertEqual(Sample.objects.filter(
            url=Sample.sanitize_url(newest_url)).count(), 0)

        # Error on wrong post data (parameters errors).
        newest_url = 'google.com/1'
        data = {
            'worker_id': worker_id,
        }

        resp = self.c.post('%ssample/add/tagasauris/%s/?format=json'
            % (self.api_url, job.id), json.dumps(data), "text/json")
        self.assertNotEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('error' in resp_dict.keys())
        self.assertEqual(Sample.objects.filter(
            url=Sample.sanitize_url(newest_url)).count(), 0)

        newest_url = 'google.com/1'
        data = {
            'url': newest_url,
        }

        resp = self.c.post('%ssample/add/tagasauris/%s/?format=json'
            % (self.api_url, job.id), json.dumps(data), "text/json")
        self.assertNotEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('error' in resp_dict.keys())
        self.assertEqual(Sample.objects.filter(
            url=Sample.sanitize_url(newest_url)).count(), 0)
Пример #3
0
    def testVerifyFromTagasaurisLimit(self):
        job = Job.objects.create_active(
            account=self.user.get_profile(),
            gold_samples=json.dumps([{'url': 'google.com', 'label': LABEL_YES}]),
            same_domain_allowed=20,
            no_of_urls=2,
        )

        worker_id = '1234'

        # Verifying first url (and adding). We need one more.
        newest_url = 'google.com/1'
        data = {
            'url': newest_url,
            'worker_id': worker_id,
        }

        resp = self.c.post('%ssample/add/tagasauris/%s/?format=json'
            % (self.api_url, job.id), json.dumps(data), "text/json")
        self.assertEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('result' in resp_dict.keys())
        self.assertTrue('all' in resp_dict.keys())

        self.assertEqual('added', resp_dict['result'])
        self.assertEqual(False, resp_dict['all'])

        self.assertEqual(Sample.objects.filter(
            job=job, url=Sample.sanitize_url(newest_url)).count(), 1)

        # Verifying second url. Gathering should be completed.
        newest_url = 'google.com/2'
        data = {
            'url': newest_url,
            'worker_id': worker_id,
        }

        resp = self.c.post('%ssample/add/tagasauris/%s/?format=json'
            % (self.api_url, job.id), json.dumps(data), "text/json")
        self.assertEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('result' in resp_dict.keys())
        self.assertTrue('all' in resp_dict.keys())

        self.assertEqual('added', resp_dict['result'])
        self.assertEqual(True, resp_dict['all'])

        self.assertEqual(Sample.objects.filter(
            job=job, url=Sample.sanitize_url(newest_url)).count(), 1)

        self.assertEqual(job.get_urls_collected(), job.no_of_urls)

        # Verifying third url. Gathering should be completed but url won't be
        # added.
        newest_url = 'google.com/3'
        data = {
            'url': newest_url,
            'worker_id': worker_id,
        }

        resp = self.c.post('%ssample/add/tagasauris/%s/?format=json'
            % (self.api_url, job.id), json.dumps(data), "text/json")
        self.assertEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('result' in resp_dict.keys())
        self.assertTrue('all' in resp_dict.keys())

        self.assertEqual('', resp_dict['result'])
        self.assertEqual(True, resp_dict['all'])

        self.assertEqual(Sample.objects.filter(
            job=job, url=Sample.sanitize_url(newest_url)).count(), 0)

        self.assertEqual(job.get_urls_collected(), job.no_of_urls)
Пример #4
0
    def testVerifyFromTagasauris(self):
        job = Job.objects.create_active(
            account=self.user.get_profile(),
            gold_samples=json.dumps([{
                'url': 'google.com',
                'label': LABEL_YES
            }]),
            same_domain_allowed=2,
            no_of_urls=10,
        )

        worker_id = '1234'

        # Verifying first url (and adding)
        newest_url = 'google.com/1'
        data = {
            'url': newest_url,
            'worker_id': worker_id,
        }

        resp = self.c.post(
            '%ssample/add/tagasauris/%s/?format=json' % (self.api_url, job.id),
            json.dumps(data), "text/json")
        self.assertEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('result' in resp_dict.keys())
        self.assertTrue('all' in resp_dict.keys())

        self.assertEqual('added', resp_dict['result'])
        self.assertEqual(False, resp_dict['all'])

        self.assertEqual(
            Sample.objects.filter(job=job,
                                  url=Sample.sanitize_url(newest_url)).count(),
            1)

        # This time verification should fail becaufe of too many urls from same
        # domain
        newest_url = 'google.com/2'
        data = {
            'url': newest_url,
            'worker_id': worker_id,
        }

        resp = self.c.post(
            '%ssample/add/tagasauris/%s/?format=json' % (self.api_url, job.id),
            json.dumps(data), "text/json")
        self.assertEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('result' in resp_dict.keys())
        self.assertTrue('all' in resp_dict.keys())

        self.assertEqual('domain duplicate', resp_dict['result'])
        self.assertEqual(False, resp_dict['all'])

        self.assertEqual(
            Sample.objects.filter(job=job,
                                  url=Sample.sanitize_url(newest_url)).count(),
            0)

        # This time verification should fail becaufe of duplicated url (look at
        # golden sample)
        newest_url = 'google.com'
        data = {
            'url': newest_url,
            'worker_id': worker_id,
        }

        resp = self.c.post(
            '%ssample/add/tagasauris/%s/?format=json' % (self.api_url, job.id),
            json.dumps(data), "text/json")
        self.assertEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('result' in resp_dict.keys())
        self.assertTrue('all' in resp_dict.keys())

        self.assertEqual('duplicate', resp_dict['result'])
        self.assertEqual(False, resp_dict['all'])

        self.assertEqual(
            Sample.objects.filter(job=job,
                                  url=Sample.sanitize_url(newest_url)).count(),
            1)
Пример #5
0
    def testVerifyFromTagasaurisErrors(self):
        job = Job.objects.create_active(
            account=self.user.get_profile(),
            gold_samples=json.dumps([{
                'url': 'google.com',
                'label': LABEL_YES
            }]),
            same_domain_allowed=2,
            no_of_urls=10,
        )

        worker_id = '1234'

        # Error on not existing job.
        newest_url = 'google.com/1'
        data = {
            'url': newest_url,
            'worker_id': worker_id,
        }

        resp = self.c.post(
            '%ssample/add/tagasauris/%s/?format=json' %
            (self.api_url, 1234567), json.dumps(data), "text/json")
        self.assertNotEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('error' in resp_dict.keys())
        self.assertEqual(
            Sample.objects.filter(url=Sample.sanitize_url(newest_url)).count(),
            0)

        # Error on wrong post data (not json).
        newest_url = 'google.com/1'
        data = {
            'url': newest_url,
            'worker_id': worker_id,
        }

        resp = self.c.post(
            '%ssample/add/tagasauris/%s/?format=json' % (self.api_url, job.id),
            data)
        self.assertNotEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('error' in resp_dict.keys())
        self.assertEqual(
            Sample.objects.filter(url=Sample.sanitize_url(newest_url)).count(),
            0)

        # Error on wrong post data (parameters errors).
        newest_url = 'google.com/1'
        data = {
            'worker_id': worker_id,
        }

        resp = self.c.post(
            '%ssample/add/tagasauris/%s/?format=json' % (self.api_url, job.id),
            json.dumps(data), "text/json")
        self.assertNotEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('error' in resp_dict.keys())
        self.assertEqual(
            Sample.objects.filter(url=Sample.sanitize_url(newest_url)).count(),
            0)

        newest_url = 'google.com/1'
        data = {
            'url': newest_url,
        }

        resp = self.c.post(
            '%ssample/add/tagasauris/%s/?format=json' % (self.api_url, job.id),
            json.dumps(data), "text/json")
        self.assertNotEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('error' in resp_dict.keys())
        self.assertEqual(
            Sample.objects.filter(url=Sample.sanitize_url(newest_url)).count(),
            0)
Пример #6
0
    def testVerifyFromTagasaurisLimit(self):
        job = Job.objects.create_active(
            account=self.user.get_profile(),
            gold_samples=json.dumps([{
                'url': 'google.com',
                'label': LABEL_YES
            }]),
            same_domain_allowed=20,
            no_of_urls=2,
        )

        worker_id = '1234'

        # Verifying first url (and adding). We need one more.
        newest_url = 'google.com/1'
        data = {
            'url': newest_url,
            'worker_id': worker_id,
        }

        resp = self.c.post(
            '%ssample/add/tagasauris/%s/?format=json' % (self.api_url, job.id),
            json.dumps(data), "text/json")
        self.assertEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('result' in resp_dict.keys())
        self.assertTrue('all' in resp_dict.keys())

        self.assertEqual('added', resp_dict['result'])
        self.assertEqual(False, resp_dict['all'])

        self.assertEqual(
            Sample.objects.filter(job=job,
                                  url=Sample.sanitize_url(newest_url)).count(),
            1)

        # Verifying second url. Gathering should be completed.
        newest_url = 'google.com/2'
        data = {
            'url': newest_url,
            'worker_id': worker_id,
        }

        resp = self.c.post(
            '%ssample/add/tagasauris/%s/?format=json' % (self.api_url, job.id),
            json.dumps(data), "text/json")
        self.assertEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('result' in resp_dict.keys())
        self.assertTrue('all' in resp_dict.keys())

        self.assertEqual('added', resp_dict['result'])
        self.assertEqual(True, resp_dict['all'])

        self.assertEqual(
            Sample.objects.filter(job=job,
                                  url=Sample.sanitize_url(newest_url)).count(),
            1)

        self.assertEqual(job.get_urls_collected(), job.no_of_urls)

        # Verifying third url. Gathering should be completed but url won't be
        # added.
        newest_url = 'google.com/3'
        data = {
            'url': newest_url,
            'worker_id': worker_id,
        }

        resp = self.c.post(
            '%ssample/add/tagasauris/%s/?format=json' % (self.api_url, job.id),
            json.dumps(data), "text/json")
        self.assertEqual(resp.status_code, 200)

        resp_dict = json.loads(resp.content)

        self.assertTrue('result' in resp_dict.keys())
        self.assertTrue('all' in resp_dict.keys())

        self.assertEqual('', resp_dict['result'])
        self.assertEqual(True, resp_dict['all'])

        self.assertEqual(
            Sample.objects.filter(job=job,
                                  url=Sample.sanitize_url(newest_url)).count(),
            0)

        self.assertEqual(job.get_urls_collected(), job.no_of_urls)