示例#1
0
    def test_handle_task(self, get_preps, collect):
        task = Task.create(self.job_id, self.target)
        options = {'javascript': True, 'extract': True}

        fake_preps = ['prep1']
        get_preps.return_value = fake_preps

        collect_result = {'meta': 'so meta'}
        collect.return_value = collect_result

        mock_settings = {'artexin.out_dir': '/test/out'}

        handler = FetchableHandler()
        with mock_bottle_config('artexinweb.settings.BOTTLE_CONFIG',
                                mock_settings):
            result = handler.handle_task(task, options)

        assert result == collect_result

        get_preps.assert_called_once_with(task.target)

        out_dir = mock_settings['artexin.out_dir']
        collect.assert_called_once_with(task.target,
                                        prep=fake_preps,
                                        base_dir=out_dir,
                                        javascript=True,
                                        do_extract=True,
                                        meta={})
示例#2
0
    def test_mark_queued(self):
        task = Task.create(self.job_id, self.task_target)

        # tasks are queued by default, so make it failed
        task.mark_failed("error")
        assert task.is_queued is False
        task.mark_queued()
        assert task.is_queued is True
示例#3
0
    def test_mark_finished(self):
        task = Task.create(self.job_id, self.task_target)
        task.notes = 'test'
        task.save()

        assert task.is_finished is False
        task.mark_finished()
        assert task.is_finished is True
        assert task.notes == ''
示例#4
0
    def test_process_task_invalid_target(self, mark_failed, handle_task):
        task = Task.create(self.job_id, self.targets[0])

        handler = BaseJobHandler()
        with mock.patch.object(handler, 'is_valid_target', return_value=False):
            handler.process_task(task, {})

            assert mark_failed.call_count == 1
            assert not handle_task.called
示例#5
0
    def test_handle_task_result_failure(self, mark_failed, mark_finished):
        task = Task.create(self.job_id, self.target)
        result = {'error': 'something went wrong'}
        options = {}

        handler = FetchableHandler()
        handler.handle_task_result(task, result, options)

        assert not mark_finished.called
        assert mark_failed.call_count == 1
    def test_handle_task_override_title(self, extract_target, read_title,
                                        count_images, create_zipball,
                                        shutil_rmtree):
        options = {'origin': self.origin, 'meta': {'title': 'overridden'}}
        task = Task.create(self.job_id, self.target)

        handler = StandaloneHandler()

        expected_meta = {
            'title': 'overridden',
            'images': 4,
            'url': self.origin,
            'domain': urllib.parse.urlparse(self.origin).netloc
        }

        extract_target.return_value = self.temp_dir
        read_title.return_value = 'title actually found'
        count_images.return_value = expected_meta['images']

        def mocked_create_zipball(*args, **kwargs):
            return kwargs['meta']

        create_zipball.side_effect = mocked_create_zipball

        mock_settings = {'artexin.out_dir': '/test/out'}
        with mock_bottle_config('artexinweb.settings.BOTTLE_CONFIG',
                                mock_settings):
            result = handler.handle_task(task, options)

        for call_arg in create_zipball.call_args:
            if isinstance(call_arg, dict):
                assert call_arg['src_dir'] == self.temp_dir
                assert call_arg['out_dir'] == mock_settings['artexin.out_dir']
                for key, value in expected_meta.items():
                    assert call_arg['meta'][key] == value

                assert isinstance(call_arg['meta']['timestamp'],
                                  datetime.datetime)

        extract_target.assert_called_once_with(task.target)
        assert read_title.called is False
        count_images.assert_called_once_with(self.temp_dir)
        shutil_rmtree.assert_called_once_with(self.temp_dir)

        assert len(result) == len(expected_meta) + 1

        for key, value in expected_meta.items():
            assert result[key] == value

        assert isinstance(result['timestamp'], datetime.datetime)
示例#7
0
    def test_process_task_handle_task_failure(self, mark_failed, handle_task,
                                              handle_task_result):
        task = Task.create(self.job_id, self.targets[0])
        options = {}

        handle_task.side_effect = Exception()

        handler = BaseJobHandler()
        with mock.patch.object(handler, 'is_valid_target', return_value=True):
            handler.process_task(task, options)

            handle_task.assert_called_once_with(task, options)
            assert not handle_task_result.called
            assert mark_failed.call_count == 1
示例#8
0
    def test_is_valid_task(self):
        task = Task.create(self.job_id, self.targets[0])
        handler = BaseJobHandler()

        # initial status is queued
        assert handler.is_valid_task(task) is True

        task.mark_processing()
        assert handler.is_valid_task(task) is True

        task.mark_failed("failed")
        assert handler.is_valid_task(task) is True

        task.mark_finished()
        assert handler.is_valid_task(task) is False
示例#9
0
    def test_process_task_success(self, mark_failed, handle_task,
                                  handle_task_result):
        task = Task.create(self.job_id, self.targets[0])
        options = {}
        task_result = {'result': 'OK'}

        handle_task.return_value = task_result

        handler = BaseJobHandler()
        with mock.patch.object(handler, 'is_valid_target', return_value=True):
            handler.process_task(task, options)

            handle_task.assert_called_once_with(task, options)
            handle_task_result.assert_called_once_with(task, task_result,
                                                       options)
            assert not mark_failed.called
    def test_handle_task_result(self, mark_finished):
        task = Task.create(self.job_id, self.temp_dir)

        result = {
            'size': 1234,
            'hash': 'a' * 32,
            'title': 'page title',
            'images': 12,
            'timestamp': datetime.datetime.utcnow()
        }

        handler = StandaloneHandler()
        handler.handle_task_result(task, result, {})

        mark_finished.assert_called_once_with()

        assert task.size == result['size']
        assert task.md5 == result['hash']
        assert task.title == result['title']
        assert task.images == result['images']
        assert task.timestamp == result['timestamp']
示例#11
0
    def test_handle_task_result_success(self, mark_failed, mark_finished):
        task = Task.create(self.job_id, self.target)
        result = {
            'size': 1024,
            'hash': self.job_id,
            'title': 'Target title',
            'images': 3,
            'timestamp': datetime.datetime.utcnow()
        }
        options = {}

        handler = FetchableHandler()
        handler.handle_task_result(task, result, options)

        assert not mark_failed.called
        mark_finished.assert_called_once_with()

        assert task.size == result['size']
        assert task.md5 == result['hash']
        assert task.title == result['title']
        assert task.images == result['images']
        assert task.timestamp == result['timestamp']
示例#12
0
    def test_mark_failed(self):
        task = Task.create(self.job_id, self.task_target)

        assert task.is_failed is False
        task.mark_failed("error")
        assert task.is_failed is True
示例#13
0
    def test_mark_processing(self):
        task = Task.create(self.job_id, self.task_target)

        assert task.is_processing is False
        task.mark_processing()
        assert task.is_processing is True
示例#14
0
    def test_create(self):
        task = Task.create(self.job_id, self.task_target)

        assert task.job_id == self.job_id
        assert task.target == self.task_target
        assert task.status == Task.QUEUED