Пример #1
0
    def command(self):
        ApiCommand.command(self)
        XmlRpcCommand.command(self)

        if self.options.days:
            self.options.days = int(self.options.days)
        if self.options.start_date:
            self.options.start_date = self.parse_date(self.options.start_date)
        if self.options.end_date:
            self.options.end_date = self.parse_date(self.options.end_date)
        if not self.options.ons_cache_dir:
            self.options.ons_cache_dir = ONS_DEFAULT_CACHE_PATH

        if self.options.days or \
               self.options.start_date or \
               self.options.end_date:
            data_filepaths = OnsData.download_flexible(
                days=self.options.days,
                start_date=self.options.start_date,
                end_date=self.options.end_date,
                ons_cache_dir=self.options.ons_cache_dir)

        elif self.options.all_time:
            data_filepaths = OnsData.download_all()
        else:
            self.parser.error('Please specify a time period')

        importer = OnsImporter(filepaths=data_filepaths,
                               xmlrpc_settings=self.xmlrpc_settings)
        loader = OnsLoader(self.client)

        loader.load_packages(importer.pkg_dict())
Пример #2
0
    def command(self):
        from ckanext.dgu.ons.downloader import OnsData, ONS_DEFAULT_CACHE_PATH
        from ckanext.dgu.ons.importer import OnsImporter
        from ckanext.dgu.ons.loader import OnsLoader

        ApiCommand.command(self)
        log = logging.getLogger(__name__)

        try:
            if self.options.days:
                self.options.days = int(self.options.days)
            if self.options.start_date:
                self.options.start_date = self.parse_date(self.options.start_date)
            if self.options.end_date:
                self.options.end_date = self.parse_date(self.options.end_date)
            if self.options.month:
                self.options.month = self.parse_month(self.options.month)
            if self.options.months_since:
                self.options.months_since = self.parse_month(self.options.months_since)
            if not self.options.ons_cache_dir:
                self.options.ons_cache_dir = ONS_DEFAULT_CACHE_PATH

            if self.options.days or \
                self.options.start_date or \
                self.options.end_date:
                data_filepaths = OnsData.download_flexible(
                    days=self.options.days,
                    start_date=self.options.start_date,
                    end_date=self.options.end_date,
                    ons_cache_dir=self.options.ons_cache_dir)

            elif self.options.month:
                data_filepaths = OnsData.download_month(year=self.options.month.year,
                                                        month=self.options.month.month)
            elif self.options.months_since:
                data_filepaths = OnsData.download_months_since(
                    year=self.options.months_since.year,
                    month=self.options.months_since.month,
                    force_download=self.options.force_download)
            elif self.options.all_time:
                data_filepaths = OnsData.download_all(force_download=self.options.force_download)
            else:
                self.parser.error('Please specify a time period')

            filter_ = {}
            if self.options.publisher:
                filter_['publisher'] = self.options.publisher

            stats = StatsList()
            importer = OnsImporter(filepaths=data_filepaths,
                                   ckanclient=self.client, stats=stats,
                                   filter_=filter_)
            loader = OnsLoader(self.client, stats)

            loader.load_packages(importer.pkg_dict())
            log.info('Summary:\n' + stats.report())
        except:
            # Any problem, make sure it gets logged
            log.exception('ONS Loader exception')
            raise
Пример #3
0
    def command(self):
        ApiCommand.command(self)
        XmlRpcCommand.command(self)

        if self.options.days:
            self.options.days = int(self.options.days)
        if self.options.start_date:
            self.options.start_date = self.parse_date(self.options.start_date)
        if self.options.end_date:
            self.options.end_date = self.parse_date(self.options.end_date)

        if self.options.days or self.options.start_date or self.options.end_date:
            data_filepaths = OnsData.download_flexible(
                days=self.options.days,
                start_date=self.options.start_date,
                end_date=self.options.end_date,
                ons_cache_dir=self.options.ons_cache_dir,
            )

        elif self.options.all_time:
            data_filepaths = OnsData.download_all()
        else:
            self.parser.error("Please specify a time period")

        importer = OnsImporter(filepaths=data_filepaths, xmlrpc_settings=self.xmlrpc_settings)
        loader = OnsLoader(self.client)

        loader.load_packages(importer.pkg_dict())
Пример #4
0
    def setup_class(self):
        super(TestOnsLoadSeries, self).setup_class()
        TestOnsLoadSeries.initial_resources = set()

        try:
            for filepath in [sample_filepath('4a'), sample_filepath('4b')]:
                importer_ = importer.OnsImporter(filepath, self.testclient)
                pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
                for pkg_dict in pkg_dicts:
                    assert pkg_dict['title'] == 'Regional Labour Market Statistics', pkg_dict
                    assert_equal(pkg_dict['groups'],
                                 ['office-for-national-statistics'])
                    assert '2010-08-' in pkg_dict['extras']['date_released'], pkg_dict
                    assert pkg_dict['extras']['date_updated'] == '', pkg_dict
                loader = OnsLoader(self.testclient)
                res = loader.load_packages(pkg_dicts)

                for pid in res['pkg_ids']:
                    p = model.Package.get(pid)
                    if p:
                        TestOnsLoadSeries.initial_resources = \
                            TestOnsLoadSeries.initial_resources | set([d.id for d in p.resources])
                assert res['num_errors'] == 0, res
        except:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise
Пример #5
0
    def setup_class(self):
        super(TestOnsLoadSeries, self).setup_class()
        TestOnsLoadSeries.initial_resources = set()

        try:
            for filepath in [sample_filepath('4a'), sample_filepath('4b')]:
                importer_ = importer.OnsImporter(filepath, self.testclient)
                pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
                for pkg_dict in pkg_dicts:
                    assert pkg_dict['title'] == 'Regional Labour Market Statistics', pkg_dict
                    assert_equal(pkg_dict['groups'],
                                 ['office-for-national-statistics'])
                    assert '2010-08-' in pkg_dict['extras']['date_released'], pkg_dict
                    assert pkg_dict['extras']['date_updated'] == '', pkg_dict
                loader = OnsLoader(self.testclient)
                res = loader.load_packages(pkg_dicts)

                for pid in res['pkg_ids']:
                    p = model.Package.get(pid)
                    if p:
                        TestOnsLoadSeries.initial_resources = \
                            TestOnsLoadSeries.initial_resources | set([d.id for d in p.resources])
                assert res['num_errors'] == 0, res
        except:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise
Пример #6
0
    def command(self):
        from ckanext.dgu.ons.downloader import OnsData, ONS_DEFAULT_CACHE_PATH
        from ckanext.dgu.ons.importer import OnsImporter
        from ckanext.dgu.ons.loader import OnsLoader

        ApiCommand.command(self)

        if self.options.days:
            self.options.days = int(self.options.days)
        if self.options.start_date:
            self.options.start_date = self.parse_date(self.options.start_date)
        if self.options.end_date:
            self.options.end_date = self.parse_date(self.options.end_date)
        if self.options.month:
            self.options.month = self.parse_month(self.options.month)
        if self.options.months_since:
            self.options.months_since = self.parse_month(self.options.months_since)
        if not self.options.ons_cache_dir:
            self.options.ons_cache_dir = ONS_DEFAULT_CACHE_PATH

        if self.options.days or \
               self.options.start_date or \
               self.options.end_date:
            data_filepaths = OnsData.download_flexible(
                days=self.options.days,
                start_date=self.options.start_date,
                end_date=self.options.end_date,
                ons_cache_dir=self.options.ons_cache_dir)

        elif self.options.month:
            data_filepaths = OnsData.download_month(year=self.options.month.year,
                                                    month=self.options.month.month)
        elif self.options.months_since:
            data_filepaths = OnsData.download_months_since(
                year=self.options.months_since.year,
                month=self.options.months_since.month,
                force_download=self.options.force_download)
        elif self.options.all_time:
            data_filepaths = OnsData.download_all(force_download=self.options.force_download)
        else:
            self.parser.error('Please specify a time period')

        importer = OnsImporter(filepaths=data_filepaths, ckanclient=self.client)
        loader = OnsLoader(self.client)

        loader.load_packages(importer.pkg_dict())
Пример #7
0
 def setup_class(self):
     super(TestOnsLoadTwice, self).setup_class()
     # sample_filepath(2 has the same packages as 1, but slightly updated
     for filepath in [sample_filepath(''), sample_filepath(2)]:
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #8
0
 def setup_class(self):
     super(TestReloadUnknownPublisher, self).setup_class()
     for filepath in (sample_filepath('10'), sample_filepath('10')):
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         assert_equal(len(pkg_dicts), 1)
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #9
0
 def setup_class(self):
     super(TestOnsLoadTwice, self).setup_class()
     # sample_filepath(2 has the same packages as 1, but slightly updated
     for filepath in [sample_filepath(''), sample_filepath(2)]:
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #10
0
 def setup_class(self):
     super(TestReloadUnknownPublisher, self).setup_class()
     for filepath in (sample_filepath('10'), sample_filepath('10')):
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         assert_equal(len(pkg_dicts), 1)
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #11
0
 def setup_class(self):
     super(TestOnsLoadClashTitle, self).setup_class()
     # ons items have been split into 3 files, because search needs to
     # do indexing in between
     for suffix in 'abc':
         importer_ = importer.OnsImporter(sample_filepath('3' + suffix))
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         loader = OnsLoader(self.testclient)
         self.res = loader.load_packages(pkg_dicts)
         assert self.res['num_errors'] == 0, self.res
Пример #12
0
 def setup_class(self):
     super(TestOnsLoadClashTitle, self).setup_class()
     # ons items have been split into 3 files, because search needs to
     # do indexing in between
     for suffix in 'abc':
         importer_ = importer.OnsImporter(sample_filepath('3' + suffix))
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         loader = OnsLoader(self.testclient)
         self.res = loader.load_packages(pkg_dicts)
         assert self.res['num_errors'] == 0, self.res
Пример #13
0
 def setup_class(self):
     super(TestOnsUnknownPublisher, self).setup_class()
     for filepath in (sample_filepath('10'),):
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         assert_equal(len(pkg_dicts), 1)
         pkg_dict = pkg_dicts[0]
         assert_equal(pkg_dict['title'], 'NHS Cancer Waiting Times in Wales')
         assert_equal(pkg_dict['groups'], [])
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #14
0
 def setup_class(self):
     super(TestOnsUnknownPublisher, self).setup_class()
     for filepath in (sample_filepath('10'), ):
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         assert_equal(len(pkg_dicts), 1)
         pkg_dict = pkg_dicts[0]
         assert_equal(pkg_dict['title'],
                      'NHS Cancer Waiting Times in Wales')
         assert_equal(pkg_dict['groups'], [])
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #15
0
 def setup_class(self):
     super(TestNationalParkDuplicate, self).setup_class()
     filepath = sample_filepath(6)
     importer_ = importer.OnsImporter(filepath)
     pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
     self.name = u'national_park_parliamentary_constituency_and_ward_level_mid-year_population_estimates_experimental'
     for pkg_dict in pkg_dicts:
         assert pkg_dict['name'] == self.name, pkg_dict['name']
         assert pkg_dict['title'] == 'National Park, Parliamentary Constituency and Ward level mid-year population estimates (experimental)', pkg_dict
         assert_equal(pkg_dict['groups'], ['office-for-national-statistics'])
     loader = OnsLoader(self.testclient)
     res = loader.load_packages(pkg_dicts)
     assert res['num_errors'] == 0, res
Пример #16
0
 def setup_class(self):
     super(TestOnsLoadSeries, self).setup_class()
     for filepath in [sample_filepath('4a'), sample_filepath('4b')]:
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         for pkg_dict in pkg_dicts:
             assert pkg_dict['title'] == 'Regional Labour Market Statistics', pkg_dict
             assert_equal(pkg_dict['groups'],
                          ['office-for-national-statistics'])
             assert '2010-08-' in pkg_dict['extras']['date_released'], pkg_dict
             assert pkg_dict['extras']['date_updated'] == '', pkg_dict
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #17
0
 def setup_class(self):
     super(TestOnsLoadSeries, self).setup_class()
     for filepath in [sample_filepath('4a'), sample_filepath('4b')]:
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         for pkg_dict in pkg_dicts:
             assert pkg_dict['title'] == 'Regional Labour Market Statistics', pkg_dict
             assert_equal(strip_organisation_id(pkg_dict['extras']['published_by']),
                          'Office for National Statistics [some_number]')
             assert '2010-08-' in pkg_dict['extras']['date_released'], pkg_dict
             assert pkg_dict['extras']['date_updated'] == '', pkg_dict
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #18
0
 def setup_class(self):
     super(TestNationalParkDuplicate, self).setup_class()
     filepath = sample_filepath(6)
     importer_ = importer.OnsImporter(filepath)
     pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
     self.name = u'national_park_parliamentary_constituency_and_ward_level_mid-year_population_estimates_experimental'
     for pkg_dict in pkg_dicts:
         assert pkg_dict['name'] == self.name, pkg_dict['name']
         assert pkg_dict['title'] == 'National Park, Parliamentary Constituency and Ward level mid-year population estimates (experimental)', pkg_dict
         assert_equal(strip_organisation_id(pkg_dict['extras']['published_by']), 'Office for National Statistics [some_number]')
         assert pkg_dict['extras']['published_via'] == '', pkg_dict
     loader = OnsLoader(self.testclient)
     res = loader.load_packages(pkg_dicts)
     assert res['num_errors'] == 0, res
Пример #19
0
 def setup_class(self):
     super(TestOnsLoadTwice, self).setup_class()
     try:
         # sample_filepath(2 has the same packages as 1, but slightly updated
         for filepath in [sample_filepath(''), sample_filepath(2)]:
             importer_ = importer.OnsImporter(filepath, self.testclient)
             pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
             loader = OnsLoader(self.testclient)
             res = loader.load_packages(pkg_dicts)
             assert res['num_errors'] == 0, res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #20
0
 def setup_class(self):
     super(TestOnsLoadTwice, self).setup_class()
     try:
         # sample_filepath(2 has the same packages as 1, but slightly updated
         for filepath in [sample_filepath(''), sample_filepath(2)]:
             importer_ = importer.OnsImporter(filepath, self.testclient)
             pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
             loader = OnsLoader(self.testclient)
             res = loader.load_packages(pkg_dicts)
             assert res['num_errors'] == 0, res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #21
0
    def command(self):
        from ckanext.dgu.ons.downloader import OnsData, ONS_DEFAULT_CACHE_PATH
        from ckanext.dgu.ons.importer import OnsImporter
        from ckanext.dgu.ons.loader import OnsLoader

        ApiCommand.command(self)
        XmlRpcCommand.command(self)

        if self.options.days:
            self.options.days = int(self.options.days)
        if self.options.start_date:
            self.options.start_date = self.parse_date(self.options.start_date)
        if self.options.end_date:
            self.options.end_date = self.parse_date(self.options.end_date)
        if not self.options.ons_cache_dir:
            self.options.ons_cache_dir = ONS_DEFAULT_CACHE_PATH

        if self.options.days or \
               self.options.start_date or \
               self.options.end_date:
            data_filepaths = OnsData.download_flexible(
                days=self.options.days,
                start_date=self.options.start_date,
                end_date=self.options.end_date,
                ons_cache_dir=self.options.ons_cache_dir)

        elif self.options.all_time:
            data_filepaths = OnsData.download_all()
        else:
            self.parser.error('Please specify a time period')

        importer = OnsImporter(filepaths=data_filepaths,
                               xmlrpc_settings=self.xmlrpc_settings)
        loader = OnsLoader(self.client)

        loader.load_packages(importer.pkg_dict())
Пример #22
0
 def setup_class(self):
     super(TestReloadUnknownPublisher, self).setup_class()
     try:
         for filepath in (sample_filepath('10'), sample_filepath('10')):
             importer_ = importer.OnsImporter(filepath, self.testclient)
             pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
             assert_equal(len(pkg_dicts), 1)
             loader = OnsLoader(self.testclient)
             res = loader.load_packages(pkg_dicts)
             assert res['num_errors'] == 0, res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #23
0
 def setup_class(self):
     super(TestNationalParkDuplicate, self).setup_class()
     filepath = sample_filepath(6)
     importer_ = importer.OnsImporter(filepath)
     pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
     self.name = u'national_park_parliamentary_constituency_and_ward_level_mid-year_population_estimates_experimental'
     for pkg_dict in pkg_dicts:
         assert pkg_dict['name'] == self.name, pkg_dict['name']
         assert pkg_dict[
             'title'] == 'National Park, Parliamentary Constituency and Ward level mid-year population estimates (experimental)', pkg_dict
         assert_equal(pkg_dict['groups'],
                      ['office-for-national-statistics'])
     loader = OnsLoader(self.testclient)
     res = loader.load_packages(pkg_dicts)
     assert res['num_errors'] == 0, res
Пример #24
0
 def setup_class(self):
     super(TestReloadUnknownPublisher, self).setup_class()
     try:
         for filepath in (sample_filepath('10'), sample_filepath('10')):
             importer_ = importer.OnsImporter(filepath, self.testclient)
             pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
             assert_equal(len(pkg_dicts), 1)
             loader = OnsLoader(self.testclient)
             res = loader.load_packages(pkg_dicts)
             assert res['num_errors'] == 0, res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #25
0
 def setup_class(self):
     super(TestOnsLoadClashTitle, self).setup_class()
     try:
         # ons items have been split into 3 files, because search needs to
         # do indexing in between
         for suffix in 'abc':
             importer_ = importer.OnsImporter(sample_filepath('3' + suffix), self.testclient)
             pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
             loader = OnsLoader(self.testclient)
             self.res = loader.load_packages(pkg_dicts)
             assert self.res['num_errors'] == 0, self.res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #26
0
 def setup_class(self):
     super(TestOnsLoadSeries, self).setup_class()
     for filepath in [sample_filepath('4a'), sample_filepath('4b')]:
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         for pkg_dict in pkg_dicts:
             assert pkg_dict[
                 'title'] == 'Regional Labour Market Statistics', pkg_dict
             assert_equal(pkg_dict['groups'],
                          ['office-for-national-statistics'])
             assert '2010-08-' in pkg_dict['extras'][
                 'date_released'], pkg_dict
             assert pkg_dict['extras']['date_updated'] == '', pkg_dict
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #27
0
 def setup_class(self):
     super(TestOnsLoadClashTitle, self).setup_class()
     try:
         # ons items have been split into 3 files, because search needs to
         # do indexing in between
         for suffix in 'abc':
             importer_ = importer.OnsImporter(sample_filepath('3' + suffix), self.testclient)
             pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
             loader = OnsLoader(self.testclient)
             self.res = loader.load_packages(pkg_dicts)
             assert self.res['num_errors'] == 0, self.res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #28
0
 def setup_class(self):
     super(TestNationalParkDuplicate, self).setup_class()
     filepath = sample_filepath(6)
     importer_ = importer.OnsImporter(filepath)
     pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
     self.name = u'national_park_parliamentary_constituency_and_ward_level_mid-year_population_estimates_experimental'
     for pkg_dict in pkg_dicts:
         assert pkg_dict['name'] == self.name, pkg_dict['name']
         assert pkg_dict[
             'title'] == 'National Park, Parliamentary Constituency and Ward level mid-year population estimates (experimental)', pkg_dict
         assert_equal(
             strip_organisation_id(pkg_dict['extras']['published_by']),
             'Office for National Statistics [some_number]')
         assert pkg_dict['extras']['published_via'] == '', pkg_dict
     loader = OnsLoader(self.testclient)
     res = loader.load_packages(pkg_dicts)
     assert res['num_errors'] == 0, res
Пример #29
0
 def setup_class(self):
     super(TestOnsLoadSeries, self).setup_class()
     for filepath in [sample_filepath('4a'), sample_filepath('4b')]:
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         for pkg_dict in pkg_dicts:
             assert pkg_dict[
                 'title'] == 'Regional Labour Market Statistics', pkg_dict
             assert_equal(
                 strip_organisation_id(pkg_dict['extras']['published_by']),
                 'Office for National Statistics [some_number]')
             assert '2010-08-' in pkg_dict['extras'][
                 'date_released'], pkg_dict
             assert pkg_dict['extras']['date_updated'] == '', pkg_dict
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #30
0
 def setup_class(self):
     super(TestOnsUnknownPublisher, self).setup_class()
     try:
         for filepath in (sample_filepath('10'),):
             importer_ = importer.OnsImporter(filepath, self.testclient)
             pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
             assert_equal(len(pkg_dicts), 1)
             pkg_dict = pkg_dicts[0]
             assert_equal(pkg_dict['title'], 'NHS Cancer Waiting Times in Wales')
             assert_equal(pkg_dict['groups'], [])
             loader = OnsLoader(self.testclient)
             res = loader.load_packages(pkg_dicts)
             assert res['num_errors'] == 0, res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #31
0
    def setup_class(self):
        super(TestOnsLoadClashSource, self).setup_class()

        self.clash_name = u'cereals_and_oilseeds_production_harvest'
        CreateTestData.create_arbitrary([
            {'name':self.clash_name,
             'title':'Test clash',
             'groups':['department-for-environment-food-and-rural-affairs'],
             'extras':{
                 'import_source':'DECC-Jan-09',
                 },
             }
            ])
        importer_ = importer.OnsImporter(sample_filepath(''))
        pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
        loader = OnsLoader(self.testclient)
        self.res = loader.load_packages(pkg_dicts)
        assert self.res['num_errors'] == 0, self.res
Пример #32
0
    def setup_class(self):
        super(TestOnsLoadClashSource, self).setup_class()

        self.clash_name = u'cereals_and_oilseeds_production_harvest'
        CreateTestData.create_arbitrary([{
            'name': self.clash_name,
            'title': 'Test clash',
            'extras': {
                'published_by':
                'Department for Environment, Food and Rural Affairs',
                'import_source': 'DECC-Jan-09',
            },
        }])
        importer_ = importer.OnsImporter(sample_filepath(''))
        pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
        loader = OnsLoader(self.testclient)
        self.res = loader.load_packages(pkg_dicts)
        assert self.res['num_errors'] == 0, self.res
Пример #33
0
 def setup_class(self):
     super(TestOnsUnknownPublisher, self).setup_class()
     try:
         for filepath in (sample_filepath('10'),):
             importer_ = importer.OnsImporter(filepath, self.testclient)
             pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
             assert_equal(len(pkg_dicts), 1)
             pkg_dict = pkg_dicts[0]
             assert_equal(pkg_dict['title'], 'NHS Cancer Waiting Times in Wales')
             assert_equal(pkg_dict['groups'], [])
             loader = OnsLoader(self.testclient)
             res = loader.load_packages(pkg_dicts)
             assert res['num_errors'] == 0, res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #34
0
 def setup_class(self):
     super(TestNationalParkDuplicate, self).setup_class()
     try:
         filepath = sample_filepath(6)
         importer_ = importer.OnsImporter(filepath, self.testclient)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         self.name = u'national_park_parliamentary_constituency_and_ward_level_mid-year_population_estimates_experimental'
         for pkg_dict in pkg_dicts:
             assert pkg_dict['name'] == self.name, pkg_dict['name']
             assert pkg_dict['title'] == 'National Park, Parliamentary Constituency and Ward level mid-year population estimates (experimental)', pkg_dict
             assert_equal(pkg_dict['groups'], ['office-for-national-statistics'])
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #35
0
 def setup_class(self):
     super(TestNationalParkDuplicate, self).setup_class()
     try:
         filepath = sample_filepath(6)
         importer_ = importer.OnsImporter(filepath, self.testclient)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         self.name = u'national_park_parliamentary_constituency_and_ward_level_mid-year_population_estimates_experimental'
         for pkg_dict in pkg_dicts:
             assert pkg_dict['name'] == self.name, pkg_dict['name']
             assert pkg_dict['title'] == 'National Park, Parliamentary Constituency and Ward level mid-year population estimates (experimental)', pkg_dict
             assert_equal(pkg_dict['groups'], ['office-for-national-statistics'])
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #36
0
    def setup_class(self):
        super(TestOnsLoadClashSource, self).setup_class()

        try:
            self.clash_name = u'cereals_and_oilseeds_production_harvest'
            CreateTestData.create_arbitrary([
                {'name':self.clash_name,
                 'title':'Test clash',
                 'groups':['department-for-environment-food-and-rural-affairs'],
                 'extras':{
                     'import_source':'DECC-Jan-09',
                     },
                 }
                ])
            importer_ = importer.OnsImporter(sample_filepath(''), self.testclient)
            pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
            loader = OnsLoader(self.testclient)
            self.res = loader.load_packages(pkg_dicts)
            assert self.res['num_errors'] == 0, self.res
        except:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise
Пример #37
0
    def setup_class(self):
        super(TestOnsLoadClashSource, self).setup_class()

        try:
            self.clash_name = u'cereals_and_oilseeds_production_harvest'
            CreateTestData.create_arbitrary([
                {'name':self.clash_name,
                 'title':'Test clash',
                 'groups':['department-for-environment-food-and-rural-affairs'],
                 'extras':{
                     'import_source':'DECC-Jan-09',
                     },
                 }
                ])
            importer_ = importer.OnsImporter(sample_filepath(''), self.testclient)
            pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
            loader = OnsLoader(self.testclient)
            self.res = loader.load_packages(pkg_dicts)
            assert self.res['num_errors'] == 0, self.res
        except:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise
Пример #38
0
class TestOnsLoadBasic(OnsLoaderBase):
    lots_of_publishers = True
    
    @classmethod
    def setup_class(self):
        super(TestOnsLoadBasic, self).setup_class()
        try:
            user = model.User.by_name(u'annafan')
            assert user
            test_ckan_client = WsgiCkanClient(self.app, api_key=user.apikey)
            importer_ = importer.OnsImporter(sample_filepath(''), test_ckan_client)
            self.pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]

            self.loader = OnsLoader(test_ckan_client)
            self.res = self.loader.load_packages(self.pkg_dicts)
            assert self.res['num_errors'] == 0, self.res
        except Exception:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise

    def test_0_search_options(self):
        field_keys = ['title', 'groups']

        pkg_dict = {'title':'titleA',
                    'groups':['Department for Children, Schools and Families']}
        opts = self.loader._get_search_options(field_keys, pkg_dict)
        assert_equal(opts, [{'groups': 'Department for Children, Schools and Families', 'title': 'titleA'}])

    def test_1_hub_id_extraction(self):
        def assert_id(hub_id_value, expected_id):
            resource = {'description':'Some description',
                        'hub-id':hub_id_value}
            result = self.loader._get_hub_id(resource)
            assert_equal(result, expected_id)
        assert_id("119-46440",
                  "119-46440")

    def test_2_date_choose(self):
        def assert_id(date1, date2, earlier_or_later, expected_date_index):
            dates = (date1, date2)
            pkg0 = {'extras':{'date': date1}}
            result = self.loader._choose_date(pkg0, date2,
                                              earlier_or_later,
                                              'date')
            if not expected_date_index:
                assert_equal(result, expected_date_index)
            else:
                assert_equal(result, dates[expected_date_index - 1])
        assert_id('2010-12-01', '2010-12-02', 'earlier', 1)
        assert_id('2010-12-01', '2010-12-02', 'later', 2)
        assert_id('2010-12-02', '2010-12-01', 'earlier', 2)
        assert_id('2010-12-02', '2010-12-01', 'later', 1)
        assert_id('', '2010-12-02', 'earlier', 2)
        assert_id('2010-12-01', '', 'later', 1)
        assert_id('', '', 'earlier', None)

    def test_fields(self):
        q = model.Session.query(model.Package)
        names = [pkg.name for pkg in q.all()]
        pkg1 = model.Package.by_name(u'uk_official_holdings_of_international_reserves')
        cereals = model.Package.by_name(u'cereals_and_oilseeds_production_harvest')
        custody = model.Package.by_name(u'end_of_custody_licence_release_and_recalls')
        probation = model.Package.by_name(u'probation_statistics_brief')
        assert pkg1, names
        assert cereals, names
        assert custody, names
        assert probation, names
        assert pkg1.title == 'UK Official Holdings of International Reserves', pkg1.title
        assert pkg1.notes.startswith("Monthly breakdown for government's net reserves, detailing gross reserves and gross liabilities."), pkg1.notes
        assert len(pkg1.resources) == 1, pkg1.resources
        assert pkg1.resources[0].url == 'http://www.hm-treasury.gov.uk/national_statistics.htm', pkg1.resources[0]
        assert_equal(pkg1.resources[0].description, 'December 2009')
        assert_equal(pkg1.resources[0].extras['hub-id'], '119-36345')
        assert len(custody.resources) == 2, custody.resources
        assert custody.resources[0].url == 'http://www.justice.gov.uk/publications/endofcustodylicence.htm', custody.resources[0]
        assert_equal(custody.resources[0].description, 'November 2009')
        assert_equal(custody.resources[0].extras['hub-id'], '119-36836')
        assert custody.resources[1].url == 'http://www.justice.gov.uk/publications/endofcustodylicence.htm', custody.resources[0]
        assert_equal(custody.resources[1].description, 'December 2009')
        assert_equal(custody.resources[1].extras['hub-id'], '119-36838')
        assert pkg1.extras['date_released'] == u'2010-01-06', pkg1.extras['date_released']
        assert probation.extras['date_released'] == u'2010-01-04', probation.extras['date_released']
        assert_equal(group_names(pkg1), [u"her-majestys-treasury"])
        assert_equal(group_names(cereals), [u"department-for-environment-food-and-rural-affairs"])
        assert_equal(group_names(custody), [u"department-of-justice"])
        assert u"Source agency: HM Treasury" in pkg1.notes, pkg1.notes
        assert pkg1.extras['categories'] == 'Economy', pkg1.extras['category']
        assert_equal(pkg1.extras['geographic_coverage'], '111100: United Kingdom (England, Scotland, Wales, Northern Ireland)')
        assert pkg1.extras['national_statistic'] == 'no', pkg1.extras['national_statistic']
        assert cereals.extras['national_statistic'] == 'yes', cereals.extras['national_statistic']
        assert custody.extras['national_statistic'] == 'no', custody.extras['national_statistic']
        assert 'Designation: Official Statistics not designated as National Statistics' in custody.notes
        assert_equal(pkg1.extras['geographic_granularity'], 'UK and GB')
        assert 'Language: English' in pkg1.notes, pkg1.notes
        def check_tags(pkg, tags_list):            
            pkg_tags = [tag.name for tag in pkg.get_tags()]
            for tag in tags_list:
                assert tag in pkg_tags, "Couldn't find tag '%s' in tags: %s" % (tag, pkg_tags)
        check_tags(pkg1, ('economics-and-finance', 'reserves', 'currency', 'assets', 'liabilities', 'gold', 'economy', 'government-receipts-and-expenditure', 'public-sector-finance'))
        check_tags(cereals, ('environment', 'farming'))
        check_tags(custody, ('public-order-justice-and-rights', 'justice-system', 'prisons'))
        assert 'Alternative title: UK Reserves' in pkg1.notes, pkg1.notes
        
        assert pkg1.extras['external_reference'] == u'ONSHUB', pkg1.extras['external_reference']
        assert 'Open Government Licence' in pkg.license.title, pkg.license.title
        assert pkg1.extras['update_frequency'] == u'monthly', pkg1.extras['update_frequency']
        assert custody.extras['update_frequency'] == u'monthly', custody.extras['update_frequency']

        for pkg in (pkg1, cereals, custody):
            assert pkg.extras['import_source'].startswith('ONS'), '%s %s' % (pkg.name, pkg.extras['import_source'])
Пример #39
0
    def command(self):
        from ckanext.dgu.ons.downloader import OnsData, ONS_DEFAULT_CACHE_PATH
        from ckanext.dgu.ons.importer import OnsImporter
        from ckanext.dgu.ons.loader import OnsLoader

        ApiCommand.command(self)
        log = logging.getLogger(__name__)

        try:
            if self.options.days:
                self.options.days = int(self.options.days)
            if self.options.start_date:
                self.options.start_date = self.parse_date(
                    self.options.start_date)
            if self.options.end_date:
                self.options.end_date = self.parse_date(self.options.end_date)
            if self.options.month:
                self.options.month = self.parse_month(self.options.month)
            if self.options.months_since:
                self.options.months_since = self.parse_month(
                    self.options.months_since)
            if not self.options.ons_cache_dir:
                self.options.ons_cache_dir = ONS_DEFAULT_CACHE_PATH

            if self.options.days or \
                self.options.start_date or \
                self.options.end_date:
                data_filepaths = OnsData.download_flexible(
                    days=self.options.days,
                    start_date=self.options.start_date,
                    end_date=self.options.end_date,
                    ons_cache_dir=self.options.ons_cache_dir)

            elif self.options.month:
                data_filepaths = OnsData.download_month(
                    year=self.options.month.year,
                    month=self.options.month.month)
            elif self.options.months_since:
                data_filepaths = OnsData.download_months_since(
                    year=self.options.months_since.year,
                    month=self.options.months_since.month,
                    force_download=self.options.force_download)
            elif self.options.all_time:
                data_filepaths = OnsData.download_all(
                    force_download=self.options.force_download)
            else:
                self.parser.error('Please specify a time period')

            filter_ = {}
            if self.options.publisher:
                filter_['publisher'] = self.options.publisher

            stats = StatsList()
            importer = OnsImporter(filepaths=data_filepaths,
                                   ckanclient=self.client,
                                   stats=stats,
                                   filter_=filter_)
            loader = OnsLoader(self.client, stats)

            loader.load_packages(importer.pkg_dict())
            log.info('Summary:\n' + stats.report())
        except:
            # Any problem, make sure it gets logged
            log.exception('ONS Loader exception')
            raise
Пример #40
0
class TestOnsLoadBasic(OnsLoaderBase):
    lots_of_publishers = True
    
    @classmethod
    def setup_class(self):
        super(TestOnsLoadBasic, self).setup_class()
        try:
            user = model.User.by_name(u'annafan')
            assert user
            test_ckan_client = WsgiCkanClient(self.app, api_key=user.apikey)
            importer_ = importer.OnsImporter(sample_filepath(''), test_ckan_client)
            self.pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]

            self.loader = OnsLoader(test_ckan_client)
            self.res = self.loader.load_packages(self.pkg_dicts)
            assert self.res['num_errors'] == 0, self.res
        except Exception:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise

    def test_0_search_options(self):
        field_keys = ['title', 'groups']

        pkg_dict = {'title':'titleA',
                    'groups':['Department for Children, Schools and Families']}
        opts = self.loader._get_search_options(field_keys, pkg_dict)
        assert_equal(opts, [{'groups': 'Department for Children, Schools and Families', 'title': 'titleA'}])

    def test_1_hub_id_extraction(self):
        def assert_id(hub_id_value, expected_id):
            resource = {'description':'Some description',
                        'hub-id':hub_id_value}
            result = self.loader._get_hub_id(resource)
            assert_equal(result, expected_id)
        assert_id("119-46440",
                  "119-46440")

    def test_2_date_choose(self):
        def assert_id(date1, date2, earlier_or_later, expected_date_index):
            dates = (date1, date2)
            pkg0 = {'extras':{'date': date1}}
            result = self.loader._choose_date(pkg0, date2,
                                              earlier_or_later,
                                              'date')
            if not expected_date_index:
                assert_equal(result, expected_date_index)
            else:
                assert_equal(result, dates[expected_date_index - 1])
        assert_id('2010-12-01', '2010-12-02', 'earlier', 1)
        assert_id('2010-12-01', '2010-12-02', 'later', 2)
        assert_id('2010-12-02', '2010-12-01', 'earlier', 2)
        assert_id('2010-12-02', '2010-12-01', 'later', 1)
        assert_id('', '2010-12-02', 'earlier', 2)
        assert_id('2010-12-01', '', 'later', 1)
        assert_id('', '', 'earlier', None)

    def test_fields(self):
        q = model.Session.query(model.Package)
        names = [pkg.name for pkg in q.all()]
        pkg1 = model.Package.by_name(u'uk_official_holdings_of_international_reserves')
        cereals = model.Package.by_name(u'cereals_and_oilseeds_production_harvest')
        custody = model.Package.by_name(u'end_of_custody_licence_release_and_recalls')
        probation = model.Package.by_name(u'probation_statistics_brief')
        assert pkg1, names
        assert cereals, names
        assert custody, names
        assert probation, names
        assert pkg1.title == 'UK Official Holdings of International Reserves', pkg1.title
        assert pkg1.notes.startswith("Monthly breakdown for government's net reserves, detailing gross reserves and gross liabilities."), pkg1.notes
        assert len(pkg1.resources) == 1, pkg1.resources
        assert pkg1.resources[0].url == 'http://www.hm-treasury.gov.uk/national_statistics.htm', pkg1.resources[0]
        assert_equal(pkg1.resources[0].description, 'December 2009')
        assert_equal(pkg1.resources[0].extras['hub-id'], '119-36345')
        assert len(custody.resources) == 2, custody.resources
        assert custody.resources[0].url == 'http://www.justice.gov.uk/publications/endofcustodylicence.htm', custody.resources[0]
        assert_equal(custody.resources[0].description, 'November 2009')
        assert_equal(custody.resources[0].extras['hub-id'], '119-36836')
        assert custody.resources[1].url == 'http://www.justice.gov.uk/publications/endofcustodylicence.htm', custody.resources[0]
        assert_equal(custody.resources[1].description, 'December 2009')
        assert_equal(custody.resources[1].extras['hub-id'], '119-36838')
        assert pkg1.extras['date_released'] == u'2010-01-06', pkg1.extras['date_released']
        assert probation.extras['date_released'] == u'2010-01-04', probation.extras['date_released']
        assert_equal(group_names(pkg1), [u"her-majestys-treasury"])
        assert_equal(group_names(cereals), [u"department-for-environment-food-and-rural-affairs"])
        assert_equal(group_names(custody), [u"department-of-justice"])
        assert u"Source agency: HM Treasury" in pkg1.notes, pkg1.notes
        assert pkg1.extras['categories'] == 'Economy', pkg1.extras['category']
        assert_equal(pkg1.extras['geographic_coverage'], '111100: United Kingdom (England, Scotland, Wales, Northern Ireland)')
        assert pkg1.extras['national_statistic'] == 'no', pkg1.extras['national_statistic']
        assert cereals.extras['national_statistic'] == 'yes', cereals.extras['national_statistic']
        assert custody.extras['national_statistic'] == 'no', custody.extras['national_statistic']
        assert 'Designation: Official Statistics not designated as National Statistics' in custody.notes
        assert_equal(pkg1.extras['geographic_granularity'], 'UK and GB')
        assert 'Language: English' in pkg1.notes, pkg1.notes
        def check_tags(pkg, tags_list):            
            pkg_tags = [tag.name for tag in pkg.get_tags()]
            for tag in tags_list:
                assert tag in pkg_tags, "Couldn't find tag '%s' in tags: %s" % (tag, pkg_tags)
        check_tags(pkg1, ('economics-and-finance', 'reserves', 'currency', 'assets', 'liabilities', 'gold', 'economy', 'government-receipts-and-expenditure', 'public-sector-finance'))
        check_tags(cereals, ('environment', 'farming'))
        check_tags(custody, ('public-order-justice-and-rights', 'justice-system', 'prisons'))
        assert 'Alternative title: UK Reserves' in pkg1.notes, pkg1.notes
        
        assert pkg1.extras['external_reference'] == u'ONSHUB', pkg1.extras['external_reference']
        assert 'Open Government Licence' in pkg.license.title, pkg.license.title
        assert pkg1.extras['update_frequency'] == u'monthly', pkg1.extras['update_frequency']
        assert custody.extras['update_frequency'] == u'monthly', custody.extras['update_frequency']

        for pkg in (pkg1, cereals, custody):
            assert pkg.extras['import_source'].startswith('ONS'), '%s %s' % (pkg.name, pkg.extras['import_source'])