Exemplo n.º 1
0
    def test_simple_csv(self):
        # Test not only the load and xloader_hook is called at the end
        self.register_urls()
        data = {
            'api_key': self.api_key,
            'job_type': 'xloader_to_datastore',
            'result_url': self.callback_url,
            'metadata': {
                'ckan_url': 'http://%s/' % self.host,
                'resource_id': self.resource_id
            }
        }
        job_id = 'test{}'.format(random.randint(0, 1e5))

        with mock.patch('ckanext.xloader.jobs.set_datastore_active_flag') \
                as mocked_set_datastore_active_flag:
            # in tests we call jobs directly, rather than use rq, so mock
            # get_current_job()
            with mock.patch('ckanext.xloader.jobs.get_current_job',
                            return_value=mock.Mock(id=job_id)):
                result = jobs.xloader_data_into_datastore(data)
        eq_(result, None)

        # Check it said it was successful
        eq_(responses.calls[-1].request.url,
            'http://www.ckan.org/api/3/action/xloader_hook')
        job_dict = json.loads(responses.calls[-1].request.body)
        assert job_dict['status'] == u'complete', job_dict
        eq_(
            job_dict, {
                u'metadata': {
                    u'ckan_url': u'http://www.ckan.org/',
                    u'resource_id': u'foo-bar-42'
                },
                u'status': u'complete'
            })

        # Check the load
        data = self.get_datastore_table()
        eq_(data['headers'],
            ['_id', '_full_text', 'date', 'temperature', 'place'])
        eq_(data['header_dict']['date'], 'TEXT')
        # 'TIMESTAMP WITHOUT TIME ZONE')
        eq_(data['header_dict']['temperature'], 'TEXT')  # 'NUMERIC')
        eq_(data['header_dict']['place'], 'TEXT')  # 'TEXT')
        eq_(data['num_rows'], 6)
        eq_(data['rows'][0][2:], (u'2011-01-01', u'1', u'Galway'))
        # (datetime.datetime(2011, 1, 1), 1, 'Galway'))

        # Check it wanted to set the datastore_active=True
        mocked_set_datastore_active_flag.assert_called_once()
        eq_(mocked_set_datastore_active_flag.call_args[1]['data_dict'], {
            'ckan_url': 'http://www.ckan.org/',
            'resource_id': 'foo-bar-42'
        })

        logs = self.get_load_logs(job_id)
        logs.assert_no_errors()
Exemplo n.º 2
0
    def test_umlaut_and_extra_comma(self):
        self.register_urls(filename="umlaut_and_extra_comma.csv")
        # This csv has an extra comma which causes the COPY to throw a
        # psycopg2.DataError and the umlaut can cause problems for logging the
        # error. We need to check that it correctly reverts to using
        # messytables to load it
        data = {
            "api_key": self.api_key,
            "job_type": "xloader_to_datastore",
            "result_url": self.callback_url,
            "metadata": {
                "ckan_url": "http://%s/" % self.host,
                "resource_id": self.resource_id,
            },
        }
        job_id = "test{}".format(random.randint(0, 1e5))

        with mock.patch("ckanext.xloader.jobs.set_resource_metadata"):
            # in tests we call jobs directly, rather than use rq, so mock
            # get_current_job()
            with mock.patch(
                "ckanext.xloader.jobs.get_current_job",
                return_value=mock.Mock(id=job_id),
            ):
                result = jobs.xloader_data_into_datastore(data)
        assert result is None, jobs_db.get_job(job_id)["error"]["message"]

        # Check it said it was successful
        assert (
            responses.calls[-1].request.url
            == "http://www.ckan.org/api/3/action/xloader_hook"
        )
        job_dict = json.loads(responses.calls[-1].request.body)
        assert job_dict["status"] == u"complete", job_dict
        assert job_dict == {
            u"metadata": {
                u"datastore_contains_all_records_of_source_file": True,
                u"datastore_active": True,
                u"ckan_url": u"http://www.ckan.org/",
                u"resource_id": u"foo-bar-42",
            },
            u"status": u"complete",
        }

        logs = self.get_load_logs(job_id)
        logs.assert_no_errors()

        job = jobs_db.get_job(job_id)
        assert job["status"] == u"complete"
        assert job["error"] == None
Exemplo n.º 3
0
    def test_umlaut_and_extra_comma(self):
        self.register_urls(filename='umlaut_and_extra_comma.csv')
        # This csv has an extra comma which causes the COPY to throw a
        # psycopg2.DataError and the umlaut can cause problems for logging the
        # error. We need to check that it correctly reverts to using
        # messytables to load it
        data = {
            'api_key': self.api_key,
            'job_type': 'xloader_to_datastore',
            'result_url': self.callback_url,
            'metadata': {
                'ckan_url': 'http://%s/' % self.host,
                'resource_id': self.resource_id
            }
        }
        job_id = 'test{}'.format(random.randint(0, 1e5))

        with mock.patch('ckanext.xloader.jobs.set_resource_metadata') \
                as mocked_set_resource_metadata:
            # in tests we call jobs directly, rather than use rq, so mock
            # get_current_job()
            with mock.patch('ckanext.xloader.jobs.get_current_job',
                            return_value=mock.Mock(id=job_id)):
                result = jobs.xloader_data_into_datastore(data)
        assert result is None, jobs_db.get_job(job_id)['error']['message']

        # Check it said it was successful
        eq_(responses.calls[-1].request.url,
            'http://www.ckan.org/api/3/action/xloader_hook')
        job_dict = json.loads(responses.calls[-1].request.body)
        assert job_dict['status'] == u'complete', job_dict
        eq_(
            job_dict, {
                u'metadata': {
                    u'datastore_contains_all_records_of_source_file': True,
                    u'datastore_active': True,
                    u'ckan_url': u'http://www.ckan.org/',
                    u'resource_id': u'foo-bar-42'
                },
                u'status': u'complete'
            })

        logs = self.get_load_logs(job_id)
        logs.assert_no_errors()

        job = jobs_db.get_job(job_id)
        eq_(job['status'], u'complete')
        eq_(job['error'], None)
Exemplo n.º 4
0
    def test_too_large_xls(self):

        # Test not only the load and xloader_hook is called at the end
        self.register_urls(filename='simple-large.xls')
        data = {
            'api_key': self.api_key,
            'job_type': 'xloader_to_datastore',
            'result_url': self.callback_url,
            'metadata': {
                'ckan_url': 'http://%s/' % self.host,
                'resource_id': self.resource_id
            }
        }
        job_id = 'test{}'.format(random.randint(0, 1e5))

        with mock.patch('ckanext.xloader.jobs.set_resource_metadata') \
                as mocked_set_resource_metadata:
            # in tests we call jobs directly, rather than use rq, so mock
            # get_current_job()
            with mock.patch('ckanext.xloader.jobs.get_current_job',
                            return_value=mock.Mock(id=job_id)):
                result = jobs.xloader_data_into_datastore(data)
        assert result is not None, jobs_db.get_job(job_id)['error']['message']

        # Check it said it was successful
        eq_(responses.calls[-1].request.url,
            'http://www.ckan.org/api/3/action/xloader_hook')
        job_dict = json.loads(responses.calls[-1].request.body)
        assert job_dict['status'] == u'error', job_dict
        eq_(
            job_dict, {
                u'status': u'error',
                u'metadata': {
                    u'ckan_url': u'http://www.ckan.org/',
                    u'datastore_contains_all_records_of_source_file': False,
                    u'resource_id': u'foo-bar-42'
                },
                u'error':
                u'Loading file raised an error: array index out of range'
            })

        job = jobs_db.get_job(job_id)
        eq_(job['status'], u'error')
        eq_(job['error'], {
            u'message':
            u'Loading file raised an error: array index out of range'
        })
Exemplo n.º 5
0
    def test_too_large_xls(self):

        # Test not only the load and xloader_hook is called at the end
        self.register_urls(filename="simple-large.xls")
        data = {
            "api_key": self.api_key,
            "job_type": "xloader_to_datastore",
            "result_url": self.callback_url,
            "metadata": {
                "ckan_url": "http://%s/" % self.host,
                "resource_id": self.resource_id,
            },
        }
        job_id = "test{}".format(random.randint(0, 1e5))

        with mock.patch("ckanext.xloader.jobs.set_resource_metadata"):
            # in tests we call jobs directly, rather than use rq, so mock
            # get_current_job()
            with mock.patch(
                    "ckanext.xloader.jobs.get_current_job",
                    return_value=mock.Mock(id=job_id),
            ):
                result = jobs.xloader_data_into_datastore(data)
        assert result is not None, jobs_db.get_job(job_id)["error"]["message"]

        # Check it said it was successful
        assert (responses.calls[-1].request.url ==
                "http://www.ckan.org/api/3/action/xloader_hook")
        job_dict = json.loads(responses.calls[-1].request.body)
        assert job_dict["status"] == u"error", job_dict
        assert job_dict == {
            u"status": u"error",
            u"metadata": {
                u"ckan_url": u"http://www.ckan.org/",
                u"datastore_contains_all_records_of_source_file": False,
                u"resource_id": u"foo-bar-42",
            },
            u"error":
            u"Loading file raised an error: array index out of range",
        }

        job = jobs_db.get_job(job_id)
        assert job["status"] == u"error"
        assert job["error"] == {
            u"message":
            u"Loading file raised an error: array index out of range"
        }
Exemplo n.º 6
0
    def test_invalid_byte_sequence(self):
        self.register_urls(filename='go-realtime.xlsx')
        # This xlsx throws an Postgres error on INSERT because of
        # 'invalid byte sequence for encoding "UTF8": 0x00' which causes
        # the COPY to throw a psycopg2.DataError and umlauts in the file can
        # cause problems for logging the error. We need to check that
        # it correctly reverts to using messytables to load it
        data = {
            'api_key': self.api_key,
            'job_type': 'xloader_to_datastore',
            'result_url': self.callback_url,
            'metadata': {
                'ckan_url': 'http://%s/' % self.host,
                'resource_id': self.resource_id
            }
        }
        job_id = "test{}".format(random.randint(0, 1e5))

        with mock.patch('ckanext.xloader.jobs.set_datastore_active_flag'):
            # in tests we call jobs directly, rather than use rq, so mock
            # get_current_job()
            with mock.patch(
                    "ckanext.xloader.jobs.get_current_job",
                    return_value=mock.Mock(id=job_id),
            ):
                result = jobs.xloader_data_into_datastore(data)
        assert result is None, jobs_db.get_job(job_id)["error"]["message"]

        # Check it said it was successful
        assert responses.calls[-1].request.url == \
            'http://www.ckan.org/api/3/action/xloader_hook'
        job_dict = json.loads(responses.calls[-1].request.body)
        assert job_dict['status'] == u'complete', job_dict
        assert job_dict == \
            {u'metadata': {u'ckan_url': u'http://www.ckan.org/',
                           u'resource_id': u'foo-bar-42'},
             u'status': u'complete'}

        logs = self.get_load_logs(job_id)
        logs.assert_no_errors()

        job = jobs_db.get_job(job_id)
        assert job['status'] == u'complete'
        assert job['error'] is None
Exemplo n.º 7
0
    def test_first_request_is_202_pending_response(self):
        # when you first get the CSV it returns this 202 response, which is
        # what this server does: https://data-cdfw.opendata.arcgis.com/datasets
        responses.add(
            responses.GET,
            SOURCE_URL,
            status=202,
            body=
            '{"processingTime":"8.716 seconds","status":"Processing","generating":{}}',
            content_type='application/json')
        # subsequent GETs of the CSV work fine
        self.register_urls()
        data = {
            'api_key': self.api_key,
            'job_type': 'xloader_to_datastore',
            'result_url': self.callback_url,
            'metadata': {
                'ckan_url': 'http://%s/' % self.host,
                'resource_id': self.resource_id
            }
        }
        job_id = 'test{}'.format(random.randint(0, 1e5))

        with mock.patch('ckanext.xloader.jobs.set_resource_metadata') \
                as mocked_set_resource_metadata:
            # in tests we call jobs directly, rather than use rq, so mock
            # get_current_job()
            with mock.patch('ckanext.xloader.jobs.get_current_job',
                            return_value=mock.Mock(id=job_id)):
                result = jobs.xloader_data_into_datastore(data)
        assert result is None, jobs_db.get_job(job_id)['error']['message']

        # Check it said it was successful
        eq_(responses.calls[-1].request.url,
            'http://www.ckan.org/api/3/action/xloader_hook')
        job_dict = json.loads(responses.calls[-1].request.body)
        assert job_dict['status'] == u'complete', job_dict
        eq_(
            job_dict, {
                u'metadata': {
                    u'ckan_url': u'http://www.ckan.org/',
                    u'datastore_contains_all_records_of_source_file': True,
                    u'datastore_active': True,
                    u'resource_id': u'foo-bar-42'
                },
                u'status': u'complete'
            })

        # Check the load
        data = self.get_datastore_table()
        eq_(data['headers'],
            ['_id', '_full_text', 'date', 'temperature', 'place'])
        eq_(data['header_dict']['date'], 'TEXT')
        # 'TIMESTAMP WITHOUT TIME ZONE')
        eq_(data['header_dict']['temperature'], 'TEXT')  # 'NUMERIC')
        eq_(data['header_dict']['place'], 'TEXT')  # 'TEXT')
        eq_(data['num_rows'], 6)
        eq_(data['rows'][0][2:], (u'2011-01-01', u'1', u'Galway'))
        # (datetime.datetime(2011, 1, 1), 1, 'Galway'))

        # Check it wanted to set the datastore_active=True
        mocked_set_resource_metadata.assert_called_once()
        eq_(
            mocked_set_resource_metadata.call_args[1]['update_dict'], {
                'datastore_contains_all_records_of_source_file': True,
                'datastore_active': True,
                'ckan_url': 'http://www.ckan.org/',
                'resource_id': 'foo-bar-42'
            })

        logs = self.get_load_logs(job_id)
        logs.assert_no_errors()

        job = jobs_db.get_job(job_id)
        eq_(job['status'], u'complete')
        eq_(job['error'], None)
Exemplo n.º 8
0
    def test_messytables(self):
        # xloader's COPY can't handle xls, so it will be dealt with by
        # messytables
        self.register_urls(filename='simple.xls',
                           content_type='application/vnd.ms-excel')
        data = {
            'api_key': self.api_key,
            'job_type': 'xloader_to_datastore',
            'result_url': self.callback_url,
            'metadata': {
                'ckan_url': 'http://%s/' % self.host,
                'resource_id': self.resource_id
            }
        }
        job_id = 'test{}'.format(random.randint(0, 1e5))

        with mock.patch('ckanext.xloader.jobs.set_resource_metadata') \
                as mocked_set_resource_metadata:
            # in tests we call jobs directly, rather than use rq, so mock
            # get_current_job()
            with mock.patch('ckanext.xloader.jobs.get_current_job',
                            return_value=mock.Mock(id=job_id)):
                result = jobs.xloader_data_into_datastore(data)
        eq_(result, None)

        # Check it said it was successful
        eq_(responses.calls[-1].request.url,
            'http://www.ckan.org/api/3/action/xloader_hook')
        job_dict = json.loads(responses.calls[-1].request.body)
        assert job_dict['status'] == u'complete', job_dict
        eq_(
            job_dict, {
                u'metadata': {
                    u'datastore_contains_all_records_of_source_file': True,
                    u'datastore_active': True,
                    u'ckan_url': u'http://www.ckan.org/',
                    u'resource_id': u'foo-bar-42'
                },
                u'status': u'complete'
            })

        # Check the load
        data = self.get_datastore_table()
        eq_(data['headers'],
            ['_id', '_full_text', 'date', 'temperature', 'place'])
        eq_(data['header_dict']['date'], 'TIMESTAMP WITHOUT TIME ZONE')
        eq_(data['header_dict']['temperature'], 'NUMERIC')
        eq_(data['header_dict']['place'], 'TEXT')
        eq_(data['num_rows'], 6)
        eq_(data['rows'][0][2:], (datetime.datetime(2011, 1, 1), 1, u'Galway'))

        # Check it wanted to set the datastore_active=True
        mocked_set_resource_metadata.assert_called_once()
        eq_(
            mocked_set_resource_metadata.call_args[1]['update_dict'], {
                'ckan_url': 'http://www.ckan.org/',
                'datastore_contains_all_records_of_source_file': True,
                'datastore_active': True,
                'resource_id': 'foo-bar-42'
            })

        # check logs have the error doing the COPY
        logs = self.get_load_logs(job_id)
        copy_error_index = None
        for i, log in enumerate(logs):
            if log[0] == 'WARNING' and log[1].startswith(
                    'Load using COPY failed: Error during the load into PostgreSQL'
            ):
                copy_error_index = i
                break
        assert copy_error_index, 'Missing COPY error'

        # check messytable portion of the logs
        logs = Logs(logs[copy_error_index + 1:])
        eq_(logs[0], (u'INFO', u'Trying again with messytables'))
        logs.assert_no_errors()

        # Check ANALYZE was run
        last_analyze = self.get_time_of_last_analyze()
        assert (last_analyze)
Exemplo n.º 9
0
    def test_too_large_csv(self):

        # Test not only the load and xloader_hook is called at the end
        self.register_urls(filename='simple-large.csv')
        data = {
            'api_key': self.api_key,
            'job_type': 'xloader_to_datastore',
            'result_url': self.callback_url,
            'metadata': {
                'ckan_url': 'http://%s/' % self.host,
                'resource_id': self.resource_id
            }
        }
        job_id = 'test{}'.format(random.randint(0, 1e5))

        with mock.patch('ckanext.xloader.jobs.set_resource_metadata') \
                as mocked_set_resource_metadata:
            # in tests we call jobs directly, rather than use rq, so mock
            # get_current_job()
            with mock.patch('ckanext.xloader.jobs.get_current_job',
                            return_value=mock.Mock(id=job_id)):
                result = jobs.xloader_data_into_datastore(data)
        assert result is None, jobs_db.get_job(job_id)['error']['message']

        # Check it said it was successful
        eq_(responses.calls[-1].request.url,
            'http://www.ckan.org/api/3/action/xloader_hook')
        job_dict = json.loads(responses.calls[-1].request.body)
        assert job_dict['status'] == u'complete', job_dict
        eq_(
            job_dict, {
                u'metadata': {
                    u'datastore_contains_all_records_of_source_file': False,
                    u'datastore_active': True,
                    u'ckan_url': u'http://www.ckan.org/',
                    u'resource_id': u'foo-bar-42'
                },
                u'status': u'complete'
            })

        # Check the load
        data = self.get_datastore_table()
        eq_(data['headers'], ['_id', '_full_text', 'id', 'text'])
        eq_(data['header_dict']['id'], 'TEXT')
        # 'TIMESTAMP WITHOUT TIME ZONE')
        eq_(data['header_dict']['text'], 'TEXT')
        assert data['num_rows'] <= 100
        assert data['num_rows'] > 0
        eq_(data['rows'][0][2:], (u'1', u'a'))

        # Check it wanted to set the datastore_active=True
        mocked_set_resource_metadata.assert_called_once()
        eq_(
            mocked_set_resource_metadata.call_args[1]['update_dict'], {
                'datastore_contains_all_records_of_source_file': False,
                'datastore_active': True,
                'ckan_url': 'http://www.ckan.org/',
                'resource_id': 'foo-bar-42'
            })

        logs = self.get_load_logs(job_id)
        logs.assert_no_errors()

        job = jobs_db.get_job(job_id)
        eq_(job['status'], u'complete')
        eq_(job['error'], None)

        # Check ANALYZE was run
        last_analyze = self.get_time_of_last_analyze()
        assert (last_analyze)
Exemplo n.º 10
0
    def test_first_request_is_202_pending_response(self):
        # when you first get the CSV it returns this 202 response, which is
        # what this server does: https://data-cdfw.opendata.arcgis.com/datasets
        responses.add(
            responses.GET,
            SOURCE_URL,
            status=202,
            body=
            '{"processingTime":"8.716 seconds","status":"Processing","generating":{}}',
            content_type="application/json",
        )
        # subsequent GETs of the CSV work fine
        self.register_urls()
        data = {
            "api_key": self.api_key,
            "job_type": "xloader_to_datastore",
            "result_url": self.callback_url,
            "metadata": {
                "ckan_url": "http://%s/" % self.host,
                "resource_id": self.resource_id,
            },
        }
        job_id = "test{}".format(random.randint(0, 1e5))

        with mock.patch("ckanext.xloader.jobs.set_resource_metadata"
                        ) as mocked_set_resource_metadata:
            # in tests we call jobs directly, rather than use rq, so mock
            # get_current_job()
            with mock.patch(
                    "ckanext.xloader.jobs.get_current_job",
                    return_value=mock.Mock(id=job_id),
            ):
                result = jobs.xloader_data_into_datastore(data)
        assert result is None, jobs_db.get_job(job_id)["error"]["message"]

        # Check it said it was successful
        assert (responses.calls[-1].request.url ==
                "http://www.ckan.org/api/3/action/xloader_hook")
        job_dict = json.loads(responses.calls[-1].request.body)
        assert job_dict["status"] == u"complete", job_dict
        assert job_dict == {
            u"metadata": {
                u"ckan_url": u"http://www.ckan.org/",
                u"datastore_contains_all_records_of_source_file": True,
                u"datastore_active": True,
                u"resource_id": u"foo-bar-42",
            },
            u"status": u"complete",
        }

        # Check the load
        data = self.get_datastore_table()
        assert data["headers"] == [
            "_id",
            "_full_text",
            "date",
            "temperature",
            "place",
        ]
        assert data["header_dict"]["date"] == "TEXT"
        # 'TIMESTAMP WITHOUT TIME ZONE')
        assert data["header_dict"]["temperature"] == "TEXT"  # 'NUMERIC')
        assert data["header_dict"]["place"] == "TEXT"  # 'TEXT')
        assert data["num_rows"] == 6
        assert data["rows"][0][2:] == (u"2011-01-01", u"1", u"Galway")
        # (datetime.datetime(2011, 1, 1), 1, 'Galway'))

        # Check it wanted to set the datastore_active=True
        mocked_set_resource_metadata.assert_called_once()
        assert mocked_set_resource_metadata.call_args[1]["update_dict"] == {
            "datastore_contains_all_records_of_source_file": True,
            "datastore_active": True,
            "ckan_url": "http://www.ckan.org/",
            "resource_id": "foo-bar-42",
        }

        logs = self.get_load_logs(job_id)
        logs.assert_no_errors()

        job = jobs_db.get_job(job_id)
        assert job["status"] == u"complete"
        assert job["error"] is None
Exemplo n.º 11
0
    def test_messytables(self):
        # xloader's COPY can't handle xls, so it will be dealt with by
        # messytables
        self.register_urls(filename="simple.xls",
                           content_type="application/vnd.ms-excel")
        data = {
            "api_key": self.api_key,
            "job_type": "xloader_to_datastore",
            "result_url": self.callback_url,
            "metadata": {
                "ckan_url": "http://%s/" % self.host,
                "resource_id": self.resource_id,
            },
        }
        job_id = "test{}".format(random.randint(0, 1e5))

        with mock.patch("ckanext.xloader.jobs.set_resource_metadata"
                        ) as mocked_set_resource_metadata:
            # in tests we call jobs directly, rather than use rq, so mock
            # get_current_job()
            with mock.patch(
                    "ckanext.xloader.jobs.get_current_job",
                    return_value=mock.Mock(id=job_id),
            ):
                result = jobs.xloader_data_into_datastore(data)
        assert result is None

        # Check it said it was successful
        assert (responses.calls[-1].request.url ==
                "http://www.ckan.org/api/3/action/xloader_hook")
        job_dict = json.loads(responses.calls[-1].request.body)
        assert job_dict["status"] == u"complete", job_dict
        assert job_dict == {
            u"metadata": {
                u"datastore_contains_all_records_of_source_file": True,
                u"datastore_active": True,
                u"ckan_url": u"http://www.ckan.org/",
                u"resource_id": u"foo-bar-42",
            },
            u"status": u"complete",
        }

        # Check the load
        data = self.get_datastore_table()
        assert data["headers"] == [
            "_id",
            "_full_text",
            "date",
            "temperature",
            "place",
        ]
        assert data["header_dict"]["date"] == "TIMESTAMP WITHOUT TIME ZONE"
        assert data["header_dict"]["temperature"] == "NUMERIC"
        assert data["header_dict"]["place"] == "TEXT"
        assert data["num_rows"] == 6
        assert data["rows"][0][2:] == (
            datetime.datetime(2011, 1, 1),
            1,
            u"Galway",
        )

        # Check it wanted to set the datastore_active=True
        mocked_set_resource_metadata.assert_called_once()
        assert mocked_set_resource_metadata.call_args[1]["update_dict"] == {
            "ckan_url": "http://www.ckan.org/",
            "datastore_contains_all_records_of_source_file": True,
            "datastore_active": True,
            "resource_id": "foo-bar-42",
        }

        # check logs have the error doing the COPY
        logs = self.get_load_logs(job_id)
        copy_error_index = None
        for i, log in enumerate(logs):
            if log[0] == "WARNING" and log[1].startswith(
                    "Load using COPY failed: Error during the load into PostgreSQL"
            ):
                copy_error_index = i
                break
        assert copy_error_index, "Missing COPY error"

        # check messytable portion of the logs
        logs = Logs(logs[copy_error_index + 1:])
        assert logs[0] == (u"INFO", u"Trying again with messytables")
        logs.assert_no_errors()

        # Check ANALYZE was run
        last_analyze = self.get_time_of_last_analyze()
        assert last_analyze
Exemplo n.º 12
0
    def test_too_large_csv(self):

        # Test not only the load and xloader_hook is called at the end
        self.register_urls(filename="simple-large.csv")
        data = {
            "api_key": self.api_key,
            "job_type": "xloader_to_datastore",
            "result_url": self.callback_url,
            "metadata": {
                "ckan_url": "http://%s/" % self.host,
                "resource_id": self.resource_id,
            },
        }
        job_id = "test{}".format(random.randint(0, 1e5))

        with mock.patch("ckanext.xloader.jobs.set_resource_metadata"
                        ) as mocked_set_resource_metadata:
            # in tests we call jobs directly, rather than use rq, so mock
            # get_current_job()
            with mock.patch(
                    "ckanext.xloader.jobs.get_current_job",
                    return_value=mock.Mock(id=job_id),
            ):
                result = jobs.xloader_data_into_datastore(data)
        assert result is None, jobs_db.get_job(job_id)["error"]["message"]

        # Check it said it was successful
        assert (responses.calls[-1].request.url ==
                "http://www.ckan.org/api/3/action/xloader_hook")
        job_dict = json.loads(responses.calls[-1].request.body)
        assert job_dict["status"] == u"complete", job_dict
        assert job_dict == {
            u"metadata": {
                u"datastore_contains_all_records_of_source_file": False,
                u"datastore_active": True,
                u"ckan_url": u"http://www.ckan.org/",
                u"resource_id": u"foo-bar-42",
            },
            u"status": u"complete",
        }

        # Check the load
        data = self.get_datastore_table()
        assert data["headers"] == ["_id", "_full_text", "id", "text"]
        assert data["header_dict"]["id"] == "TEXT"
        # 'TIMESTAMP WITHOUT TIME ZONE')
        assert data["header_dict"]["text"] == "TEXT"
        assert data["num_rows"] <= 100
        assert data["num_rows"] > 0
        assert data["rows"][0][2:] == (u"1", u"a")

        # Check it wanted to set the datastore_active=True
        mocked_set_resource_metadata.assert_called_once()
        assert mocked_set_resource_metadata.call_args[1]["update_dict"] == {
            "datastore_contains_all_records_of_source_file": False,
            "datastore_active": True,
            "ckan_url": "http://www.ckan.org/",
            "resource_id": "foo-bar-42",
        }

        logs = self.get_load_logs(job_id)
        logs.assert_no_errors()

        job = jobs_db.get_job(job_id)
        assert job["status"] == u"complete"
        assert job["error"] is None

        # Check ANALYZE was run
        last_analyze = self.get_time_of_last_analyze()
        assert last_analyze