Пример #1
0
    def command(self):
        from ckanext.dgu.ons.downloader import OnsData, ONS_DEFAULT_CACHE_PATH
        from ckanext.dgu.ons.importer import OnsImporter
        from ckanext.dgu.ons.loader import OnsLoader

        ApiCommand.command(self)
        log = logging.getLogger(__name__)

        try:
            if self.options.days:
                self.options.days = int(self.options.days)
            if self.options.start_date:
                self.options.start_date = self.parse_date(self.options.start_date)
            if self.options.end_date:
                self.options.end_date = self.parse_date(self.options.end_date)
            if self.options.month:
                self.options.month = self.parse_month(self.options.month)
            if self.options.months_since:
                self.options.months_since = self.parse_month(self.options.months_since)
            if not self.options.ons_cache_dir:
                self.options.ons_cache_dir = ONS_DEFAULT_CACHE_PATH

            if self.options.days or \
                self.options.start_date or \
                self.options.end_date:
                data_filepaths = OnsData.download_flexible(
                    days=self.options.days,
                    start_date=self.options.start_date,
                    end_date=self.options.end_date,
                    ons_cache_dir=self.options.ons_cache_dir)

            elif self.options.month:
                data_filepaths = OnsData.download_month(year=self.options.month.year,
                                                        month=self.options.month.month)
            elif self.options.months_since:
                data_filepaths = OnsData.download_months_since(
                    year=self.options.months_since.year,
                    month=self.options.months_since.month,
                    force_download=self.options.force_download)
            elif self.options.all_time:
                data_filepaths = OnsData.download_all(force_download=self.options.force_download)
            else:
                self.parser.error('Please specify a time period')

            filter_ = {}
            if self.options.publisher:
                filter_['publisher'] = self.options.publisher

            stats = StatsList()
            importer = OnsImporter(filepaths=data_filepaths,
                                   ckanclient=self.client, stats=stats,
                                   filter_=filter_)
            loader = OnsLoader(self.client, stats)

            loader.load_packages(importer.pkg_dict())
            log.info('Summary:\n' + stats.report())
        except:
            # Any problem, make sure it gets logged
            log.exception('ONS Loader exception')
            raise
Пример #2
0
    def setup_class(self):
        super(TestOnsLoadMissingDept, self).setup_class()

        try:
            self.orig_pkg_dict = {
                 "name": u"measuring_subjective_wellbeing_in_the_uk",
                 "title": "Measuring Subjective Wellbeing in the UK",
                 "notes": "This report reviews:\n\nWhat is subjective wellbeing and why should we measure it?\n\nHow subjective wellbeing is currently measured in the UK - what subjective wellbeing questions are already being asked on major social surveys in the UK\n\nThe potential uses of subjective wellbeing data collected via these surveys\n\n\nIt concludes that subjective wellbeing is a valid construct that can be measured reliably. This is the first output of ONS' work on subjective wellbeing.\n\nSource agency: Office for National Statistics\n\nDesignation: Supporting material\n\nLanguage: English\n\nAlternative title: Working Paper: Measuring Subjective Wellbeing in the UK",
                 "license_id": "ukcrown-withrights",
                 "tags": ["communities", "health-well-being-and-care", "people-and-places", "societal-wellbeing", "subjective-wellbeing-subjective-well-being-objective-measures-subjective-measures", "well-being"],
                 "groups": ['office-for-national-statistics'],
                 "extras": {"geographic_coverage": "111100: United Kingdom (England, Scotland, Wales, Northern Ireland)", "geographic_granularity": "UK and GB", "external_reference": "ONSHUB", "temporal_granularity": "", "date_updated": "", "precision": "", "temporal_coverage_to": "", "temporal_coverage_from": "", "national_statistic": "no", "import_source": "ONS-ons_data_7_days_to_2010-09-17", "update_frequency": "", "date_released": "2010-09-14", "categories": "People and Places"},
                "resources": [{"url": "http://www.ons.gov.uk/about-statistics/measuring-equality/wellbeing/news-and-events/index.html", "format": "", "description": "2010", "extras":{"hub-id":"77-31166"}}],
                 }
            CreateTestData.create_arbitrary([self.orig_pkg_dict])

            # same data is imported, but should find record and add department
            importer_ = importer.OnsImporter(sample_filepath(5), self.testclient)
            self.pkg_dict = [pkg_dict for pkg_dict in importer_.pkg_dict()][0]
            loader = OnsLoader(self.testclient)
            self.res = loader.load_package(self.pkg_dict)
        except:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise
Пример #3
0
    def command(self):
        ApiCommand.command(self)
        XmlRpcCommand.command(self)

        if self.options.days:
            self.options.days = int(self.options.days)
        if self.options.start_date:
            self.options.start_date = self.parse_date(self.options.start_date)
        if self.options.end_date:
            self.options.end_date = self.parse_date(self.options.end_date)

        if self.options.days or self.options.start_date or self.options.end_date:
            data_filepaths = OnsData.download_flexible(
                days=self.options.days,
                start_date=self.options.start_date,
                end_date=self.options.end_date,
                ons_cache_dir=self.options.ons_cache_dir,
            )

        elif self.options.all_time:
            data_filepaths = OnsData.download_all()
        else:
            self.parser.error("Please specify a time period")

        importer = OnsImporter(filepaths=data_filepaths, xmlrpc_settings=self.xmlrpc_settings)
        loader = OnsLoader(self.client)

        loader.load_packages(importer.pkg_dict())
Пример #4
0
    def command(self):
        ApiCommand.command(self)
        XmlRpcCommand.command(self)

        if self.options.days:
            self.options.days = int(self.options.days)
        if self.options.start_date:
            self.options.start_date = self.parse_date(self.options.start_date)
        if self.options.end_date:
            self.options.end_date = self.parse_date(self.options.end_date)
        if not self.options.ons_cache_dir:
            self.options.ons_cache_dir = ONS_DEFAULT_CACHE_PATH

        if self.options.days or \
               self.options.start_date or \
               self.options.end_date:
            data_filepaths = OnsData.download_flexible(
                days=self.options.days,
                start_date=self.options.start_date,
                end_date=self.options.end_date,
                ons_cache_dir=self.options.ons_cache_dir)

        elif self.options.all_time:
            data_filepaths = OnsData.download_all()
        else:
            self.parser.error('Please specify a time period')

        importer = OnsImporter(filepaths=data_filepaths,
                               xmlrpc_settings=self.xmlrpc_settings)
        loader = OnsLoader(self.client)

        loader.load_packages(importer.pkg_dict())
Пример #5
0
    def setup_class(self):
        super(TestOnsLoadMissingDept, self).setup_class()

        try:
            self.orig_pkg_dict = {
                 "name": u"measuring_subjective_wellbeing_in_the_uk",
                 "title": "Measuring Subjective Wellbeing in the UK",
                 "notes": "This report reviews:\n\nWhat is subjective wellbeing and why should we measure it?\n\nHow subjective wellbeing is currently measured in the UK - what subjective wellbeing questions are already being asked on major social surveys in the UK\n\nThe potential uses of subjective wellbeing data collected via these surveys\n\n\nIt concludes that subjective wellbeing is a valid construct that can be measured reliably. This is the first output of ONS' work on subjective wellbeing.\n\nSource agency: Office for National Statistics\n\nDesignation: Supporting material\n\nLanguage: English\n\nAlternative title: Working Paper: Measuring Subjective Wellbeing in the UK",
                 "license_id": "ukcrown-withrights",
                 "tags": ["communities", "health-well-being-and-care", "people-and-places", "societal-wellbeing", "subjective-wellbeing-subjective-well-being-objective-measures-subjective-measures", "well-being"],
                 "groups": ['office-for-national-statistics'],
                 "extras": {"geographic_coverage": "111100: United Kingdom (England, Scotland, Wales, Northern Ireland)", "geographic_granularity": "UK and GB", "external_reference": "ONSHUB", "temporal_granularity": "", "date_updated": "", "precision": "", "temporal_coverage_to": "", "temporal_coverage_from": "", "national_statistic": "no", "import_source": "ONS-ons_data_7_days_to_2010-09-17", "update_frequency": "", "date_released": "2010-09-14", "categories": "People and Places"},
                "resources": [{"url": "http://www.ons.gov.uk/about-statistics/measuring-equality/wellbeing/news-and-events/index.html", "format": "", "description": "2010", "extras":{"hub-id":"77-31166"}}],
                 }
            CreateTestData.create_arbitrary([self.orig_pkg_dict])

            # same data is imported, but should find record and add department
            importer_ = importer.OnsImporter(sample_filepath(5), self.testclient)
            self.pkg_dict = [pkg_dict for pkg_dict in importer_.pkg_dict()][0]
            loader = OnsLoader(self.testclient)
            self.res = loader.load_package(self.pkg_dict)
        except:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise
Пример #6
0
    def setup_class(self):
        super(TestOnsLoadSeries, self).setup_class()
        TestOnsLoadSeries.initial_resources = set()

        try:
            for filepath in [sample_filepath('4a'), sample_filepath('4b')]:
                importer_ = importer.OnsImporter(filepath, self.testclient)
                pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
                for pkg_dict in pkg_dicts:
                    assert pkg_dict['title'] == 'Regional Labour Market Statistics', pkg_dict
                    assert_equal(pkg_dict['groups'],
                                 ['office-for-national-statistics'])
                    assert '2010-08-' in pkg_dict['extras']['date_released'], pkg_dict
                    assert pkg_dict['extras']['date_updated'] == '', pkg_dict
                loader = OnsLoader(self.testclient)
                res = loader.load_packages(pkg_dicts)

                for pid in res['pkg_ids']:
                    p = model.Package.get(pid)
                    if p:
                        TestOnsLoadSeries.initial_resources = \
                            TestOnsLoadSeries.initial_resources | set([d.id for d in p.resources])
                assert res['num_errors'] == 0, res
        except:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise
Пример #7
0
    def setup_class(self):
        super(TestOnsLoadSeries, self).setup_class()
        TestOnsLoadSeries.initial_resources = set()

        try:
            for filepath in [sample_filepath('4a'), sample_filepath('4b')]:
                importer_ = importer.OnsImporter(filepath, self.testclient)
                pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
                for pkg_dict in pkg_dicts:
                    assert pkg_dict['title'] == 'Regional Labour Market Statistics', pkg_dict
                    assert_equal(pkg_dict['groups'],
                                 ['office-for-national-statistics'])
                    assert '2010-08-' in pkg_dict['extras']['date_released'], pkg_dict
                    assert pkg_dict['extras']['date_updated'] == '', pkg_dict
                loader = OnsLoader(self.testclient)
                res = loader.load_packages(pkg_dicts)

                for pid in res['pkg_ids']:
                    p = model.Package.get(pid)
                    if p:
                        TestOnsLoadSeries.initial_resources = \
                            TestOnsLoadSeries.initial_resources | set([d.id for d in p.resources])
                assert res['num_errors'] == 0, res
        except:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise
Пример #8
0
    def setup_class(self):
        super(TestOnsLoadBasic, self).setup_class()
        importer_ = importer.OnsImporter(sample_filepath(''))
        self.pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]

        self.loader = OnsLoader(self.testclient)
        self.res = self.loader.load_packages(self.pkg_dicts)
        assert self.res['num_errors'] == 0, self.res
Пример #9
0
 def setup_class(self):
     super(TestReloadUnknownPublisher, self).setup_class()
     for filepath in (sample_filepath('10'), sample_filepath('10')):
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         assert_equal(len(pkg_dicts), 1)
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #10
0
 def setup_class(self):
     super(TestOnsLoadTwice, self).setup_class()
     # sample_filepath(2 has the same packages as 1, but slightly updated
     for filepath in [sample_filepath(''), sample_filepath(2)]:
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #11
0
 def setup_class(self):
     super(TestOnsLoadTwice, self).setup_class()
     # sample_filepath(2 has the same packages as 1, but slightly updated
     for filepath in [sample_filepath(''), sample_filepath(2)]:
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #12
0
 def setup_class(self):
     super(TestReloadUnknownPublisher, self).setup_class()
     for filepath in (sample_filepath('10'), sample_filepath('10')):
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         assert_equal(len(pkg_dicts), 1)
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #13
0
 def setup_class(self):
     super(TestOnsLoadClashTitle, self).setup_class()
     # ons items have been split into 3 files, because search needs to
     # do indexing in between
     for suffix in 'abc':
         importer_ = importer.OnsImporter(sample_filepath('3' + suffix))
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         loader = OnsLoader(self.testclient)
         self.res = loader.load_packages(pkg_dicts)
         assert self.res['num_errors'] == 0, self.res
Пример #14
0
 def setup_class(self):
     super(TestOnsLoadClashTitle, self).setup_class()
     # ons items have been split into 3 files, because search needs to
     # do indexing in between
     for suffix in 'abc':
         importer_ = importer.OnsImporter(sample_filepath('3' + suffix))
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         loader = OnsLoader(self.testclient)
         self.res = loader.load_packages(pkg_dicts)
         assert self.res['num_errors'] == 0, self.res
Пример #15
0
    def setup_class(self):
        super(TestOnsLoadBasic, self).setup_class()
        user = model.User.by_name(u'annafan')
        assert user
        test_ckan_client = WsgiCkanClient(self.app, api_key=user.apikey)
        importer_ = importer.OnsImporter(sample_filepath(''), test_ckan_client)
        self.pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]

        self.loader = OnsLoader(self.testclient)
        self.res = self.loader.load_packages(self.pkg_dicts)
        assert self.res['num_errors'] == 0, self.res
Пример #16
0
 def setup_class(self):
     super(TestOnsUnknownPublisher, self).setup_class()
     for filepath in (sample_filepath('10'),):
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         assert_equal(len(pkg_dicts), 1)
         pkg_dict = pkg_dicts[0]
         assert_equal(pkg_dict['title'], 'NHS Cancer Waiting Times in Wales')
         assert_equal(pkg_dict['groups'], [])
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #17
0
    def setup_class(self):
        super(TestAgencyFind, self).setup_class()
        try:
            self.orig_pkg_dict = {
                "name": u"national_child_measurement_programme",
                "title": "National Child Measurement Programme",
                "version": None, "url": None, "author": None, "author_email": None, "maintainer": None, "maintainer_email": None,
                "notes": "The National Child Measurement Programme weighs and measures primary school children.\r\nThis publication was formerly announced as \"National Child Measurement Programme - Statistics on Child Obesity 2008-09\" but the title has been amended to reflect suggestions from the UKSA Assessments Board.\r\nSource agency: Information Centre for Health and Social Care\r\nDesignation: National Statistics\r\nLanguage: English\r\nAlternative title: National Child Measurement Programme",
                "license_id": "uk-ogl",
                "tags": ["health", "health-and-social-care", "health-of-the-population", "lifestyles-and-behaviours", "nhs", "well-being-and-care"],
                "groups": ['nhs-information-centre-for-health-and-social-care'],
                "extras": {
                    "geographic_coverage": "100000: England",
                    "geographic_granularity": "Country",
                    "external_reference": "ONSHUB",
                    "temporal_coverage-from": "",
                    "temporal_granularity": "",
                    "date_updated": "",
                    "precision": "",
                    "geographic_granularity": "",
                    "temporal_coverage_to": "",
                    "temporal_coverage_from": "",
                    "taxonomy_url": "",
                    "import_source": "ONS-ons_data_2009-12",
                    "date_released": "2009-12-10",
                    "temporal_coverage-to": "",
                    "update_frequency": "",
                    "national_statistic": "yes",
                    "categories": "Health and Social Care"},
                "resources": [{"url": "http://www.ic.nhs.uk/ncmp", "format": "", "description": "England, 2008/09 School Year", "extras":{"hub-id":"119-37085", "publish-date":"2008-01-01"}},
                              {"url": "http://www.dh.gov.uk/en/Publichealth/Healthimprovement/Healthyliving/DH_073787", "format": "", "description": "2008", "extras":{"hub-id":"119-31792", "publish-date":"2007-01-01"}},
                              {"url": "http://www.ic.nhs.uk/ncmp", "format": "", "description": "Statistics on child obesity 2007-08", "extras":{"hub-id":"119-31784", "publish-date":"2009-01-01"}}],
                }

            CreateTestData.create_arbitrary([self.orig_pkg_dict])

            # same data is imported, but should find record and add department
            importer_ = importer.OnsImporter(sample_filepath(8), self.testclient)
            self.pkg_dict = [pkg_dict for pkg_dict in importer_.pkg_dict()][0]
            assert self.pkg_dict['groups'][0].startswith('nhs-information')
            loader = OnsLoader(self.testclient)
            print self.pkg_dict
            # load package twice, to ensure reload works too
            self.res = loader.load_package(self.pkg_dict)
            self.res = loader.load_package(self.pkg_dict)
            self.name = self.orig_pkg_dict['name']
            self.num_resources_originally = len(self.orig_pkg_dict['resources'])
        except:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise
Пример #18
0
    def setup_class(self):
        super(TestAgencyFind, self).setup_class()
        try:
            self.orig_pkg_dict = {
                "name": u"national_child_measurement_programme",
                "title": "National Child Measurement Programme",
                "version": None, "url": None, "author": None, "author_email": None, "maintainer": None, "maintainer_email": None,
                "notes": "The National Child Measurement Programme weighs and measures primary school children.\r\nThis publication was formerly announced as \"National Child Measurement Programme - Statistics on Child Obesity 2008-09\" but the title has been amended to reflect suggestions from the UKSA Assessments Board.\r\nSource agency: Information Centre for Health and Social Care\r\nDesignation: National Statistics\r\nLanguage: English\r\nAlternative title: National Child Measurement Programme",
                "license_id": "uk-ogl",
                "tags": ["health", "health-and-social-care", "health-of-the-population", "lifestyles-and-behaviours", "nhs", "well-being-and-care"],
                "groups": ['nhs-information-centre-for-health-and-social-care'],
                "extras": {
                    "geographic_coverage": "100000: England",
                    "geographic_granularity": "Country",
                    "external_reference": "ONSHUB",
                    "temporal_coverage-from": "",
                    "temporal_granularity": "",
                    "date_updated": "",
                    "precision": "",
                    "geographic_granularity": "",
                    "temporal_coverage_to": "",
                    "temporal_coverage_from": "",
                    "taxonomy_url": "",
                    "import_source": "ONS-ons_data_2009-12",
                    "date_released": "2009-12-10",
                    "temporal_coverage-to": "",
                    "update_frequency": "",
                    "national_statistic": "yes",
                    "categories": "Health and Social Care"},
                "resources": [{"url": "http://www.ic.nhs.uk/ncmp", "format": "", "description": "England, 2008/09 School Year", "extras":{"hub-id":"119-37085", "publish-date":"2008-01-01"}},
                              {"url": "http://www.dh.gov.uk/en/Publichealth/Healthimprovement/Healthyliving/DH_073787", "format": "", "description": "2008", "extras":{"hub-id":"119-31792", "publish-date":"2007-01-01"}},
                              {"url": "http://www.ic.nhs.uk/ncmp", "format": "", "description": "Statistics on child obesity 2007-08", "extras":{"hub-id":"119-31784", "publish-date":"2009-01-01"}}],
                }

            CreateTestData.create_arbitrary([self.orig_pkg_dict])

            # same data is imported, but should find record and add department
            importer_ = importer.OnsImporter(sample_filepath(8), self.testclient)
            self.pkg_dict = [pkg_dict for pkg_dict in importer_.pkg_dict()][0]
            assert self.pkg_dict['groups'][0].startswith('nhs-information')
            loader = OnsLoader(self.testclient)
            print self.pkg_dict
            # load package twice, to ensure reload works too
            self.res = loader.load_package(self.pkg_dict)
            self.res = loader.load_package(self.pkg_dict)
            self.name = self.orig_pkg_dict['name']
            self.num_resources_originally = len(self.orig_pkg_dict['resources'])
        except:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise
Пример #19
0
 def setup_class(self):
     super(TestOnsUnknownPublisher, self).setup_class()
     for filepath in (sample_filepath('10'), ):
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         assert_equal(len(pkg_dicts), 1)
         pkg_dict = pkg_dicts[0]
         assert_equal(pkg_dict['title'],
                      'NHS Cancer Waiting Times in Wales')
         assert_equal(pkg_dict['groups'], [])
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #20
0
 def setup_class(self):
     super(TestNationalParkDuplicate, self).setup_class()
     filepath = sample_filepath(6)
     importer_ = importer.OnsImporter(filepath)
     pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
     self.name = u'national_park_parliamentary_constituency_and_ward_level_mid-year_population_estimates_experimental'
     for pkg_dict in pkg_dicts:
         assert pkg_dict['name'] == self.name, pkg_dict['name']
         assert pkg_dict['title'] == 'National Park, Parliamentary Constituency and Ward level mid-year population estimates (experimental)', pkg_dict
         assert_equal(pkg_dict['groups'], ['office-for-national-statistics'])
     loader = OnsLoader(self.testclient)
     res = loader.load_packages(pkg_dicts)
     assert res['num_errors'] == 0, res
Пример #21
0
 def setup_class(self):
     super(TestNationalParkDuplicate, self).setup_class()
     filepath = sample_filepath(6)
     importer_ = importer.OnsImporter(filepath)
     pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
     self.name = u'national_park_parliamentary_constituency_and_ward_level_mid-year_population_estimates_experimental'
     for pkg_dict in pkg_dicts:
         assert pkg_dict['name'] == self.name, pkg_dict['name']
         assert pkg_dict['title'] == 'National Park, Parliamentary Constituency and Ward level mid-year population estimates (experimental)', pkg_dict
         assert_equal(strip_organisation_id(pkg_dict['extras']['published_by']), 'Office for National Statistics [some_number]')
         assert pkg_dict['extras']['published_via'] == '', pkg_dict
     loader = OnsLoader(self.testclient)
     res = loader.load_packages(pkg_dicts)
     assert res['num_errors'] == 0, res
Пример #22
0
 def setup_class(self):
     super(TestOnsLoadSeries, self).setup_class()
     for filepath in [sample_filepath('4a'), sample_filepath('4b')]:
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         for pkg_dict in pkg_dicts:
             assert pkg_dict['title'] == 'Regional Labour Market Statistics', pkg_dict
             assert_equal(pkg_dict['groups'],
                          ['office-for-national-statistics'])
             assert '2010-08-' in pkg_dict['extras']['date_released'], pkg_dict
             assert pkg_dict['extras']['date_updated'] == '', pkg_dict
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #23
0
 def setup_class(self):
     super(TestOnsLoadSeries, self).setup_class()
     for filepath in [sample_filepath('4a'), sample_filepath('4b')]:
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         for pkg_dict in pkg_dicts:
             assert pkg_dict['title'] == 'Regional Labour Market Statistics', pkg_dict
             assert_equal(strip_organisation_id(pkg_dict['extras']['published_by']),
                          'Office for National Statistics [some_number]')
             assert '2010-08-' in pkg_dict['extras']['date_released'], pkg_dict
             assert pkg_dict['extras']['date_updated'] == '', pkg_dict
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #24
0
 def setup_class(self):
     super(TestOnsLoadTwice, self).setup_class()
     try:
         # sample_filepath(2 has the same packages as 1, but slightly updated
         for filepath in [sample_filepath(''), sample_filepath(2)]:
             importer_ = importer.OnsImporter(filepath, self.testclient)
             pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
             loader = OnsLoader(self.testclient)
             res = loader.load_packages(pkg_dicts)
             assert res['num_errors'] == 0, res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #25
0
 def setup_class(self):
     super(TestOnsLoadTwice, self).setup_class()
     try:
         # sample_filepath(2 has the same packages as 1, but slightly updated
         for filepath in [sample_filepath(''), sample_filepath(2)]:
             importer_ = importer.OnsImporter(filepath, self.testclient)
             pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
             loader = OnsLoader(self.testclient)
             res = loader.load_packages(pkg_dicts)
             assert res['num_errors'] == 0, res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #26
0
 def setup_class(self):
     super(TestNationalParkDuplicate, self).setup_class()
     filepath = sample_filepath(6)
     importer_ = importer.OnsImporter(filepath)
     pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
     self.name = u'national_park_parliamentary_constituency_and_ward_level_mid-year_population_estimates_experimental'
     for pkg_dict in pkg_dicts:
         assert pkg_dict['name'] == self.name, pkg_dict['name']
         assert pkg_dict[
             'title'] == 'National Park, Parliamentary Constituency and Ward level mid-year population estimates (experimental)', pkg_dict
         assert_equal(pkg_dict['groups'],
                      ['office-for-national-statistics'])
     loader = OnsLoader(self.testclient)
     res = loader.load_packages(pkg_dicts)
     assert res['num_errors'] == 0, res
Пример #27
0
 def setup_class(self):
     super(TestReloadUnknownPublisher, self).setup_class()
     try:
         for filepath in (sample_filepath('10'), sample_filepath('10')):
             importer_ = importer.OnsImporter(filepath, self.testclient)
             pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
             assert_equal(len(pkg_dicts), 1)
             loader = OnsLoader(self.testclient)
             res = loader.load_packages(pkg_dicts)
             assert res['num_errors'] == 0, res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #28
0
 def setup_class(self):
     super(TestReloadUnknownPublisher, self).setup_class()
     try:
         for filepath in (sample_filepath('10'), sample_filepath('10')):
             importer_ = importer.OnsImporter(filepath, self.testclient)
             pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
             assert_equal(len(pkg_dicts), 1)
             loader = OnsLoader(self.testclient)
             res = loader.load_packages(pkg_dicts)
             assert res['num_errors'] == 0, res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #29
0
    def setup_class(self):
        super(TestDeathsOverwrite, self).setup_class()
        try:
            self.orig_pkg_dict = {
                "name": u"weekly_provisional_figures_on_deaths_registered_in_england_and_wales",
                "title": "Weekly provisional figures on deaths registered in England and Wales",
                "version": None, "url": None, "author": "Office for National Statistics", "author_email": None, "maintainer": None, "maintainer_email": None,
                "notes": "Weekly death figures provide provisional counts of the number of deaths registered in England and Wales in the latest four weeks for which data are available up to the end of 2009. From week one 2010 the latest eight weeks for which data are available will be published.\n\nSource agency: Office for National Statistics\n\nDesignation: National Statistics\n\nLanguage: English\n\nAlternative title: Weekly deaths",
                "license_id": "ukcrown-withrights",
                "tags": ["death", "deaths", "life-events", "life-in-the-community", "mortality-rates", "population", "weekly-deaths"],
                "groups": ['office-for-national-statistics'],
                "extras": {
                    "geographic_coverage": "101000: England, Wales",
                    "geographic_granularity": "Country",
                    "external_reference": "ONSHUB",
                    "temporal_coverage-from": "",
                    "temporal_granularity": "",
                    "date_updated": "",
                    "series": "Weekly provisional figures on deaths registered in England and Wales",
                    "precision": "",
                    "geographic_granularity": "",
                    "temporal_coverage_to": "",
                    "temporal_coverage_from": "",
                    "taxonomy_url": "",
                    "import_source": "ONS-ons_data_60_days_to_2010-09-22",
                    "date_released": "2010-08-03",
                    "temporal_coverage-to": "",
                    "update_frequency": "",
                    "national_statistic": "yes",
                    "categories": "Population"},
                "resources": [
                    {"url": "http://www.statistics.gov.uk/StatBase/Prep/9684.asp", "format": "", "description": "17/07/2009", "hash": "", "extras": {"hub-id": "77-27942"} }],
                }

            CreateTestData.create_arbitrary([self.orig_pkg_dict])

            # same data is imported, but should find record and add department
            importer_ = importer.OnsImporter(sample_filepath(7), self.testclient)
            self.pkg_dict = [pkg_dict for pkg_dict in importer_.pkg_dict()][0]
            loader = OnsLoader(self.testclient)
            print self.pkg_dict
            self.res = loader.load_package(self.pkg_dict)
            self.name = self.orig_pkg_dict['name']
        except:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise
Пример #30
0
    def setup_class(self):
        super(TestDeathsOverwrite, self).setup_class()
        try:
            self.orig_pkg_dict = {
                "name": u"weekly_provisional_figures_on_deaths_registered_in_england_and_wales",
                "title": "Weekly provisional figures on deaths registered in England and Wales",
                "version": None, "url": None, "author": "Office for National Statistics", "author_email": None, "maintainer": None, "maintainer_email": None,
                "notes": "Weekly death figures provide provisional counts of the number of deaths registered in England and Wales in the latest four weeks for which data are available up to the end of 2009. From week one 2010 the latest eight weeks for which data are available will be published.\n\nSource agency: Office for National Statistics\n\nDesignation: National Statistics\n\nLanguage: English\n\nAlternative title: Weekly deaths",
                "license_id": "ukcrown-withrights",
                "tags": ["death", "deaths", "life-events", "life-in-the-community", "mortality-rates", "population", "weekly-deaths"],
                "groups": ['office-for-national-statistics'],
                "extras": {
                    "geographic_coverage": "101000: England, Wales",
                    "geographic_granularity": "Country",
                    "external_reference": "ONSHUB",
                    "temporal_coverage-from": "",
                    "temporal_granularity": "",
                    "date_updated": "",
                    "series": "Weekly provisional figures on deaths registered in England and Wales",
                    "precision": "",
                    "geographic_granularity": "",
                    "temporal_coverage_to": "",
                    "temporal_coverage_from": "",
                    "taxonomy_url": "",
                    "import_source": "ONS-ons_data_60_days_to_2010-09-22",
                    "date_released": "2010-08-03",
                    "temporal_coverage-to": "",
                    "update_frequency": "",
                    "national_statistic": "yes",
                    "categories": "Population"},
                "resources": [
                    {"url": "http://www.statistics.gov.uk/StatBase/Prep/9684.asp", "format": "", "description": "17/07/2009", "hash": "", "extras": {"hub-id": "77-27942"} }],
                }

            CreateTestData.create_arbitrary([self.orig_pkg_dict])

            # same data is imported, but should find record and add department
            importer_ = importer.OnsImporter(sample_filepath(7), self.testclient)
            self.pkg_dict = [pkg_dict for pkg_dict in importer_.pkg_dict()][0]
            loader = OnsLoader(self.testclient)
            print self.pkg_dict
            self.res = loader.load_package(self.pkg_dict)
            self.name = self.orig_pkg_dict['name']
        except:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise
Пример #31
0
 def setup_class(self):
     super(TestOnsLoadSeries, self).setup_class()
     for filepath in [sample_filepath('4a'), sample_filepath('4b')]:
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         for pkg_dict in pkg_dicts:
             assert pkg_dict[
                 'title'] == 'Regional Labour Market Statistics', pkg_dict
             assert_equal(pkg_dict['groups'],
                          ['office-for-national-statistics'])
             assert '2010-08-' in pkg_dict['extras'][
                 'date_released'], pkg_dict
             assert pkg_dict['extras']['date_updated'] == '', pkg_dict
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #32
0
 def setup_class(self):
     super(TestOnsLoadClashTitle, self).setup_class()
     try:
         # ons items have been split into 3 files, because search needs to
         # do indexing in between
         for suffix in 'abc':
             importer_ = importer.OnsImporter(sample_filepath('3' + suffix), self.testclient)
             pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
             loader = OnsLoader(self.testclient)
             self.res = loader.load_packages(pkg_dicts)
             assert self.res['num_errors'] == 0, self.res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #33
0
    def test_load(self):
        user = model.User.by_name(u'testsysadmin')
        assert user
        testclient_admin = WsgiCkanClient(self.app, api_key=user.apikey)
        loader = OnsLoader(testclient_admin)
        print self.pkg_dict
        self.res = loader.load_package(self.pkg_dict)
        self.name = self.orig_pkg_dict['name']
        self.decoy_name = self.deleted_decoy_pkg_dict['name']
        self.num_resources_originally = len(self.orig_pkg_dict['resources'])

        names = [pkg.name for pkg in model.Session.query(model.Package).all()]
        assert_equal(set(names), set((self.name, self.decoy_name)))
        pkg = model.Package.by_name(self.name)
        assert pkg
        assert_equal(len(pkg.resources), self.num_resources_originally + 1)
Пример #34
0
 def setup_class(self):
     super(TestOnsLoadClashTitle, self).setup_class()
     try:
         # ons items have been split into 3 files, because search needs to
         # do indexing in between
         for suffix in 'abc':
             importer_ = importer.OnsImporter(sample_filepath('3' + suffix), self.testclient)
             pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
             loader = OnsLoader(self.testclient)
             self.res = loader.load_packages(pkg_dicts)
             assert self.res['num_errors'] == 0, self.res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #35
0
    def test_load(self):
        user = model.User.by_name(u'testsysadmin')
        assert user
        testclient_admin = WsgiCkanClient(self.app, api_key=user.apikey)
        loader = OnsLoader(testclient_admin)
        print self.pkg_dict
        self.res = loader.load_package(self.pkg_dict)
        self.name = self.orig_pkg_dict['name']
        self.decoy_name = self.deleted_decoy_pkg_dict['name']
        self.num_resources_originally = len(self.orig_pkg_dict['resources'])

        names = [pkg.name for pkg in model.Session.query(model.Package).all()]
        assert_equal(set(names), set((self.name, self.decoy_name)))
        pkg = model.Package.by_name(self.name)
        assert pkg
        assert_equal(len(pkg.resources), self.num_resources_originally + 1)
Пример #36
0
 def setup_class(self):
     super(TestOnsLoadSeries, self).setup_class()
     for filepath in [sample_filepath('4a'), sample_filepath('4b')]:
         importer_ = importer.OnsImporter(filepath)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         for pkg_dict in pkg_dicts:
             assert pkg_dict[
                 'title'] == 'Regional Labour Market Statistics', pkg_dict
             assert_equal(
                 strip_organisation_id(pkg_dict['extras']['published_by']),
                 'Office for National Statistics [some_number]')
             assert '2010-08-' in pkg_dict['extras'][
                 'date_released'], pkg_dict
             assert pkg_dict['extras']['date_updated'] == '', pkg_dict
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
Пример #37
0
    def setup_class(self):
        super(TestOnsLoadBasic, self).setup_class()
        try:
            user = model.User.by_name(u'annafan')
            assert user
            test_ckan_client = WsgiCkanClient(self.app, api_key=user.apikey)
            importer_ = importer.OnsImporter(sample_filepath(''), test_ckan_client)
            self.pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]

            self.loader = OnsLoader(test_ckan_client)
            self.res = self.loader.load_packages(self.pkg_dicts)
            assert self.res['num_errors'] == 0, self.res
        except Exception:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise
Пример #38
0
    def setup_class(self):
        super(TestOnsLoadBasic, self).setup_class()
        importer_ = importer.OnsImporter(sample_filepath(''))
        self.pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]

        self.loader = OnsLoader(self.testclient)
        self.res = self.loader.load_packages(self.pkg_dicts)
        assert self.res['num_errors'] == 0, self.res
Пример #39
0
 def setup_class(self):
     super(TestNationalParkDuplicate, self).setup_class()
     filepath = sample_filepath(6)
     importer_ = importer.OnsImporter(filepath)
     pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
     self.name = u'national_park_parliamentary_constituency_and_ward_level_mid-year_population_estimates_experimental'
     for pkg_dict in pkg_dicts:
         assert pkg_dict['name'] == self.name, pkg_dict['name']
         assert pkg_dict[
             'title'] == 'National Park, Parliamentary Constituency and Ward level mid-year population estimates (experimental)', pkg_dict
         assert_equal(
             strip_organisation_id(pkg_dict['extras']['published_by']),
             'Office for National Statistics [some_number]')
         assert pkg_dict['extras']['published_via'] == '', pkg_dict
     loader = OnsLoader(self.testclient)
     res = loader.load_packages(pkg_dicts)
     assert res['num_errors'] == 0, res
Пример #40
0
    def command(self):
        from ckanext.dgu.ons.downloader import OnsData, ONS_DEFAULT_CACHE_PATH
        from ckanext.dgu.ons.importer import OnsImporter
        from ckanext.dgu.ons.loader import OnsLoader

        ApiCommand.command(self)

        if self.options.days:
            self.options.days = int(self.options.days)
        if self.options.start_date:
            self.options.start_date = self.parse_date(self.options.start_date)
        if self.options.end_date:
            self.options.end_date = self.parse_date(self.options.end_date)
        if self.options.month:
            self.options.month = self.parse_month(self.options.month)
        if self.options.months_since:
            self.options.months_since = self.parse_month(self.options.months_since)
        if not self.options.ons_cache_dir:
            self.options.ons_cache_dir = ONS_DEFAULT_CACHE_PATH

        if self.options.days or \
               self.options.start_date or \
               self.options.end_date:
            data_filepaths = OnsData.download_flexible(
                days=self.options.days,
                start_date=self.options.start_date,
                end_date=self.options.end_date,
                ons_cache_dir=self.options.ons_cache_dir)

        elif self.options.month:
            data_filepaths = OnsData.download_month(year=self.options.month.year,
                                                    month=self.options.month.month)
        elif self.options.months_since:
            data_filepaths = OnsData.download_months_since(
                year=self.options.months_since.year,
                month=self.options.months_since.month,
                force_download=self.options.force_download)
        elif self.options.all_time:
            data_filepaths = OnsData.download_all(force_download=self.options.force_download)
        else:
            self.parser.error('Please specify a time period')

        importer = OnsImporter(filepaths=data_filepaths, ckanclient=self.client)
        loader = OnsLoader(self.client)

        loader.load_packages(importer.pkg_dict())
Пример #41
0
    def setup_class(self):
        super(TestOnsLoadClashSource, self).setup_class()

        self.clash_name = u'cereals_and_oilseeds_production_harvest'
        CreateTestData.create_arbitrary([
            {'name':self.clash_name,
             'title':'Test clash',
             'groups':['department-for-environment-food-and-rural-affairs'],
             'extras':{
                 'import_source':'DECC-Jan-09',
                 },
             }
            ])
        importer_ = importer.OnsImporter(sample_filepath(''))
        pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
        loader = OnsLoader(self.testclient)
        self.res = loader.load_packages(pkg_dicts)
        assert self.res['num_errors'] == 0, self.res
Пример #42
0
    def setup_class(self):
        super(TestOnsLoadClashSource, self).setup_class()

        self.clash_name = u'cereals_and_oilseeds_production_harvest'
        CreateTestData.create_arbitrary([{
            'name': self.clash_name,
            'title': 'Test clash',
            'extras': {
                'published_by':
                'Department for Environment, Food and Rural Affairs',
                'import_source': 'DECC-Jan-09',
            },
        }])
        importer_ = importer.OnsImporter(sample_filepath(''))
        pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
        loader = OnsLoader(self.testclient)
        self.res = loader.load_packages(pkg_dicts)
        assert self.res['num_errors'] == 0, self.res
Пример #43
0
 def setup_class(self):
     super(TestOnsUnknownPublisher, self).setup_class()
     try:
         for filepath in (sample_filepath('10'),):
             importer_ = importer.OnsImporter(filepath, self.testclient)
             pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
             assert_equal(len(pkg_dicts), 1)
             pkg_dict = pkg_dicts[0]
             assert_equal(pkg_dict['title'], 'NHS Cancer Waiting Times in Wales')
             assert_equal(pkg_dict['groups'], [])
             loader = OnsLoader(self.testclient)
             res = loader.load_packages(pkg_dicts)
             assert res['num_errors'] == 0, res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #44
0
 def setup_class(self):
     super(TestOnsUnknownPublisher, self).setup_class()
     try:
         for filepath in (sample_filepath('10'),):
             importer_ = importer.OnsImporter(filepath, self.testclient)
             pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
             assert_equal(len(pkg_dicts), 1)
             pkg_dict = pkg_dicts[0]
             assert_equal(pkg_dict['title'], 'NHS Cancer Waiting Times in Wales')
             assert_equal(pkg_dict['groups'], [])
             loader = OnsLoader(self.testclient)
             res = loader.load_packages(pkg_dicts)
             assert res['num_errors'] == 0, res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #45
0
 def setup_class(self):
     super(TestNationalParkDuplicate, self).setup_class()
     try:
         filepath = sample_filepath(6)
         importer_ = importer.OnsImporter(filepath, self.testclient)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         self.name = u'national_park_parliamentary_constituency_and_ward_level_mid-year_population_estimates_experimental'
         for pkg_dict in pkg_dicts:
             assert pkg_dict['name'] == self.name, pkg_dict['name']
             assert pkg_dict['title'] == 'National Park, Parliamentary Constituency and Ward level mid-year population estimates (experimental)', pkg_dict
             assert_equal(pkg_dict['groups'], ['office-for-national-statistics'])
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #46
0
 def setup_class(self):
     super(TestNationalParkDuplicate, self).setup_class()
     try:
         filepath = sample_filepath(6)
         importer_ = importer.OnsImporter(filepath, self.testclient)
         pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
         self.name = u'national_park_parliamentary_constituency_and_ward_level_mid-year_population_estimates_experimental'
         for pkg_dict in pkg_dicts:
             assert pkg_dict['name'] == self.name, pkg_dict['name']
             assert pkg_dict['title'] == 'National Park, Parliamentary Constituency and Ward level mid-year population estimates (experimental)', pkg_dict
             assert_equal(pkg_dict['groups'], ['office-for-national-statistics'])
         loader = OnsLoader(self.testclient)
         res = loader.load_packages(pkg_dicts)
         assert res['num_errors'] == 0, res
     except:
         # ensure that mock_drupal is destroyed
         MockDrupalCase.teardown_class()
         model.repo.rebuild_db()
         raise
Пример #47
0
    def setup_class(self):
        super(TestOnsLoadBasic, self).setup_class()
        user = model.User.by_name(u'annafan')
        assert user
        test_ckan_client = WsgiCkanClient(self.app, api_key=user.apikey)
        importer_ = importer.OnsImporter(sample_filepath(''), test_ckan_client)
        self.pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]

        self.loader = OnsLoader(self.testclient)
        self.res = self.loader.load_packages(self.pkg_dicts)
        assert self.res['num_errors'] == 0, self.res
Пример #48
0
    def command(self):
        from ckanext.dgu.ons.downloader import OnsData, ONS_DEFAULT_CACHE_PATH
        from ckanext.dgu.ons.importer import OnsImporter
        from ckanext.dgu.ons.loader import OnsLoader

        ApiCommand.command(self)
        XmlRpcCommand.command(self)

        if self.options.days:
            self.options.days = int(self.options.days)
        if self.options.start_date:
            self.options.start_date = self.parse_date(self.options.start_date)
        if self.options.end_date:
            self.options.end_date = self.parse_date(self.options.end_date)
        if not self.options.ons_cache_dir:
            self.options.ons_cache_dir = ONS_DEFAULT_CACHE_PATH

        if self.options.days or \
               self.options.start_date or \
               self.options.end_date:
            data_filepaths = OnsData.download_flexible(
                days=self.options.days,
                start_date=self.options.start_date,
                end_date=self.options.end_date,
                ons_cache_dir=self.options.ons_cache_dir)

        elif self.options.all_time:
            data_filepaths = OnsData.download_all()
        else:
            self.parser.error('Please specify a time period')

        importer = OnsImporter(filepaths=data_filepaths,
                               xmlrpc_settings=self.xmlrpc_settings)
        loader = OnsLoader(self.client)

        loader.load_packages(importer.pkg_dict())
Пример #49
0
    def setup_class(self):
        super(TestOnsLoadClashSource, self).setup_class()

        try:
            self.clash_name = u'cereals_and_oilseeds_production_harvest'
            CreateTestData.create_arbitrary([
                {'name':self.clash_name,
                 'title':'Test clash',
                 'groups':['department-for-environment-food-and-rural-affairs'],
                 'extras':{
                     'import_source':'DECC-Jan-09',
                     },
                 }
                ])
            importer_ = importer.OnsImporter(sample_filepath(''), self.testclient)
            pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
            loader = OnsLoader(self.testclient)
            self.res = loader.load_packages(pkg_dicts)
            assert self.res['num_errors'] == 0, self.res
        except:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise
Пример #50
0
    def setup_class(self):
        super(TestOnsLoadClashSource, self).setup_class()

        try:
            self.clash_name = u'cereals_and_oilseeds_production_harvest'
            CreateTestData.create_arbitrary([
                {'name':self.clash_name,
                 'title':'Test clash',
                 'groups':['department-for-environment-food-and-rural-affairs'],
                 'extras':{
                     'import_source':'DECC-Jan-09',
                     },
                 }
                ])
            importer_ = importer.OnsImporter(sample_filepath(''), self.testclient)
            pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]
            loader = OnsLoader(self.testclient)
            self.res = loader.load_packages(pkg_dicts)
            assert self.res['num_errors'] == 0, self.res
        except:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise
Пример #51
0
    def setup_class(self):
        super(TestOnsLoadBasic, self).setup_class()
        try:
            user = model.User.by_name(u'annafan')
            assert user
            test_ckan_client = WsgiCkanClient(self.app, api_key=user.apikey)
            importer_ = importer.OnsImporter(sample_filepath(''), test_ckan_client)
            self.pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]

            self.loader = OnsLoader(test_ckan_client)
            self.res = self.loader.load_packages(self.pkg_dicts)
            assert self.res['num_errors'] == 0, self.res
        except Exception:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise
Пример #52
0
class TestOnsLoadBasic(OnsLoaderBase):
    lots_of_publishers = True
    
    @classmethod
    def setup_class(self):
        super(TestOnsLoadBasic, self).setup_class()
        try:
            user = model.User.by_name(u'annafan')
            assert user
            test_ckan_client = WsgiCkanClient(self.app, api_key=user.apikey)
            importer_ = importer.OnsImporter(sample_filepath(''), test_ckan_client)
            self.pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]

            self.loader = OnsLoader(test_ckan_client)
            self.res = self.loader.load_packages(self.pkg_dicts)
            assert self.res['num_errors'] == 0, self.res
        except Exception:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise

    def test_0_search_options(self):
        field_keys = ['title', 'groups']

        pkg_dict = {'title':'titleA',
                    'groups':['Department for Children, Schools and Families']}
        opts = self.loader._get_search_options(field_keys, pkg_dict)
        assert_equal(opts, [{'groups': 'Department for Children, Schools and Families', 'title': 'titleA'}])

    def test_1_hub_id_extraction(self):
        def assert_id(hub_id_value, expected_id):
            resource = {'description':'Some description',
                        'hub-id':hub_id_value}
            result = self.loader._get_hub_id(resource)
            assert_equal(result, expected_id)
        assert_id("119-46440",
                  "119-46440")

    def test_2_date_choose(self):
        def assert_id(date1, date2, earlier_or_later, expected_date_index):
            dates = (date1, date2)
            pkg0 = {'extras':{'date': date1}}
            result = self.loader._choose_date(pkg0, date2,
                                              earlier_or_later,
                                              'date')
            if not expected_date_index:
                assert_equal(result, expected_date_index)
            else:
                assert_equal(result, dates[expected_date_index - 1])
        assert_id('2010-12-01', '2010-12-02', 'earlier', 1)
        assert_id('2010-12-01', '2010-12-02', 'later', 2)
        assert_id('2010-12-02', '2010-12-01', 'earlier', 2)
        assert_id('2010-12-02', '2010-12-01', 'later', 1)
        assert_id('', '2010-12-02', 'earlier', 2)
        assert_id('2010-12-01', '', 'later', 1)
        assert_id('', '', 'earlier', None)

    def test_fields(self):
        q = model.Session.query(model.Package)
        names = [pkg.name for pkg in q.all()]
        pkg1 = model.Package.by_name(u'uk_official_holdings_of_international_reserves')
        cereals = model.Package.by_name(u'cereals_and_oilseeds_production_harvest')
        custody = model.Package.by_name(u'end_of_custody_licence_release_and_recalls')
        probation = model.Package.by_name(u'probation_statistics_brief')
        assert pkg1, names
        assert cereals, names
        assert custody, names
        assert probation, names
        assert pkg1.title == 'UK Official Holdings of International Reserves', pkg1.title
        assert pkg1.notes.startswith("Monthly breakdown for government's net reserves, detailing gross reserves and gross liabilities."), pkg1.notes
        assert len(pkg1.resources) == 1, pkg1.resources
        assert pkg1.resources[0].url == 'http://www.hm-treasury.gov.uk/national_statistics.htm', pkg1.resources[0]
        assert_equal(pkg1.resources[0].description, 'December 2009')
        assert_equal(pkg1.resources[0].extras['hub-id'], '119-36345')
        assert len(custody.resources) == 2, custody.resources
        assert custody.resources[0].url == 'http://www.justice.gov.uk/publications/endofcustodylicence.htm', custody.resources[0]
        assert_equal(custody.resources[0].description, 'November 2009')
        assert_equal(custody.resources[0].extras['hub-id'], '119-36836')
        assert custody.resources[1].url == 'http://www.justice.gov.uk/publications/endofcustodylicence.htm', custody.resources[0]
        assert_equal(custody.resources[1].description, 'December 2009')
        assert_equal(custody.resources[1].extras['hub-id'], '119-36838')
        assert pkg1.extras['date_released'] == u'2010-01-06', pkg1.extras['date_released']
        assert probation.extras['date_released'] == u'2010-01-04', probation.extras['date_released']
        assert_equal(group_names(pkg1), [u"her-majestys-treasury"])
        assert_equal(group_names(cereals), [u"department-for-environment-food-and-rural-affairs"])
        assert_equal(group_names(custody), [u"department-of-justice"])
        assert u"Source agency: HM Treasury" in pkg1.notes, pkg1.notes
        assert pkg1.extras['categories'] == 'Economy', pkg1.extras['category']
        assert_equal(pkg1.extras['geographic_coverage'], '111100: United Kingdom (England, Scotland, Wales, Northern Ireland)')
        assert pkg1.extras['national_statistic'] == 'no', pkg1.extras['national_statistic']
        assert cereals.extras['national_statistic'] == 'yes', cereals.extras['national_statistic']
        assert custody.extras['national_statistic'] == 'no', custody.extras['national_statistic']
        assert 'Designation: Official Statistics not designated as National Statistics' in custody.notes
        assert_equal(pkg1.extras['geographic_granularity'], 'UK and GB')
        assert 'Language: English' in pkg1.notes, pkg1.notes
        def check_tags(pkg, tags_list):            
            pkg_tags = [tag.name for tag in pkg.get_tags()]
            for tag in tags_list:
                assert tag in pkg_tags, "Couldn't find tag '%s' in tags: %s" % (tag, pkg_tags)
        check_tags(pkg1, ('economics-and-finance', 'reserves', 'currency', 'assets', 'liabilities', 'gold', 'economy', 'government-receipts-and-expenditure', 'public-sector-finance'))
        check_tags(cereals, ('environment', 'farming'))
        check_tags(custody, ('public-order-justice-and-rights', 'justice-system', 'prisons'))
        assert 'Alternative title: UK Reserves' in pkg1.notes, pkg1.notes
        
        assert pkg1.extras['external_reference'] == u'ONSHUB', pkg1.extras['external_reference']
        assert 'Open Government Licence' in pkg.license.title, pkg.license.title
        assert pkg1.extras['update_frequency'] == u'monthly', pkg1.extras['update_frequency']
        assert custody.extras['update_frequency'] == u'monthly', custody.extras['update_frequency']

        for pkg in (pkg1, cereals, custody):
            assert pkg.extras['import_source'].startswith('ONS'), '%s %s' % (pkg.name, pkg.extras['import_source'])
Пример #53
0
class TestOnsLoadBasic(OnsLoaderBase):
    lots_of_publishers = True
    
    @classmethod
    def setup_class(self):
        super(TestOnsLoadBasic, self).setup_class()
        try:
            user = model.User.by_name(u'annafan')
            assert user
            test_ckan_client = WsgiCkanClient(self.app, api_key=user.apikey)
            importer_ = importer.OnsImporter(sample_filepath(''), test_ckan_client)
            self.pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()]

            self.loader = OnsLoader(test_ckan_client)
            self.res = self.loader.load_packages(self.pkg_dicts)
            assert self.res['num_errors'] == 0, self.res
        except Exception:
            # ensure that mock_drupal is destroyed
            MockDrupalCase.teardown_class()
            model.repo.rebuild_db()
            raise

    def test_0_search_options(self):
        field_keys = ['title', 'groups']

        pkg_dict = {'title':'titleA',
                    'groups':['Department for Children, Schools and Families']}
        opts = self.loader._get_search_options(field_keys, pkg_dict)
        assert_equal(opts, [{'groups': 'Department for Children, Schools and Families', 'title': 'titleA'}])

    def test_1_hub_id_extraction(self):
        def assert_id(hub_id_value, expected_id):
            resource = {'description':'Some description',
                        'hub-id':hub_id_value}
            result = self.loader._get_hub_id(resource)
            assert_equal(result, expected_id)
        assert_id("119-46440",
                  "119-46440")

    def test_2_date_choose(self):
        def assert_id(date1, date2, earlier_or_later, expected_date_index):
            dates = (date1, date2)
            pkg0 = {'extras':{'date': date1}}
            result = self.loader._choose_date(pkg0, date2,
                                              earlier_or_later,
                                              'date')
            if not expected_date_index:
                assert_equal(result, expected_date_index)
            else:
                assert_equal(result, dates[expected_date_index - 1])
        assert_id('2010-12-01', '2010-12-02', 'earlier', 1)
        assert_id('2010-12-01', '2010-12-02', 'later', 2)
        assert_id('2010-12-02', '2010-12-01', 'earlier', 2)
        assert_id('2010-12-02', '2010-12-01', 'later', 1)
        assert_id('', '2010-12-02', 'earlier', 2)
        assert_id('2010-12-01', '', 'later', 1)
        assert_id('', '', 'earlier', None)

    def test_fields(self):
        q = model.Session.query(model.Package)
        names = [pkg.name for pkg in q.all()]
        pkg1 = model.Package.by_name(u'uk_official_holdings_of_international_reserves')
        cereals = model.Package.by_name(u'cereals_and_oilseeds_production_harvest')
        custody = model.Package.by_name(u'end_of_custody_licence_release_and_recalls')
        probation = model.Package.by_name(u'probation_statistics_brief')
        assert pkg1, names
        assert cereals, names
        assert custody, names
        assert probation, names
        assert pkg1.title == 'UK Official Holdings of International Reserves', pkg1.title
        assert pkg1.notes.startswith("Monthly breakdown for government's net reserves, detailing gross reserves and gross liabilities."), pkg1.notes
        assert len(pkg1.resources) == 1, pkg1.resources
        assert pkg1.resources[0].url == 'http://www.hm-treasury.gov.uk/national_statistics.htm', pkg1.resources[0]
        assert_equal(pkg1.resources[0].description, 'December 2009')
        assert_equal(pkg1.resources[0].extras['hub-id'], '119-36345')
        assert len(custody.resources) == 2, custody.resources
        assert custody.resources[0].url == 'http://www.justice.gov.uk/publications/endofcustodylicence.htm', custody.resources[0]
        assert_equal(custody.resources[0].description, 'November 2009')
        assert_equal(custody.resources[0].extras['hub-id'], '119-36836')
        assert custody.resources[1].url == 'http://www.justice.gov.uk/publications/endofcustodylicence.htm', custody.resources[0]
        assert_equal(custody.resources[1].description, 'December 2009')
        assert_equal(custody.resources[1].extras['hub-id'], '119-36838')
        assert pkg1.extras['date_released'] == u'2010-01-06', pkg1.extras['date_released']
        assert probation.extras['date_released'] == u'2010-01-04', probation.extras['date_released']
        assert_equal(group_names(pkg1), [u"her-majestys-treasury"])
        assert_equal(group_names(cereals), [u"department-for-environment-food-and-rural-affairs"])
        assert_equal(group_names(custody), [u"department-of-justice"])
        assert u"Source agency: HM Treasury" in pkg1.notes, pkg1.notes
        assert pkg1.extras['categories'] == 'Economy', pkg1.extras['category']
        assert_equal(pkg1.extras['geographic_coverage'], '111100: United Kingdom (England, Scotland, Wales, Northern Ireland)')
        assert pkg1.extras['national_statistic'] == 'no', pkg1.extras['national_statistic']
        assert cereals.extras['national_statistic'] == 'yes', cereals.extras['national_statistic']
        assert custody.extras['national_statistic'] == 'no', custody.extras['national_statistic']
        assert 'Designation: Official Statistics not designated as National Statistics' in custody.notes
        assert_equal(pkg1.extras['geographic_granularity'], 'UK and GB')
        assert 'Language: English' in pkg1.notes, pkg1.notes
        def check_tags(pkg, tags_list):            
            pkg_tags = [tag.name for tag in pkg.get_tags()]
            for tag in tags_list:
                assert tag in pkg_tags, "Couldn't find tag '%s' in tags: %s" % (tag, pkg_tags)
        check_tags(pkg1, ('economics-and-finance', 'reserves', 'currency', 'assets', 'liabilities', 'gold', 'economy', 'government-receipts-and-expenditure', 'public-sector-finance'))
        check_tags(cereals, ('environment', 'farming'))
        check_tags(custody, ('public-order-justice-and-rights', 'justice-system', 'prisons'))
        assert 'Alternative title: UK Reserves' in pkg1.notes, pkg1.notes
        
        assert pkg1.extras['external_reference'] == u'ONSHUB', pkg1.extras['external_reference']
        assert 'Open Government Licence' in pkg.license.title, pkg.license.title
        assert pkg1.extras['update_frequency'] == u'monthly', pkg1.extras['update_frequency']
        assert custody.extras['update_frequency'] == u'monthly', custody.extras['update_frequency']

        for pkg in (pkg1, cereals, custody):
            assert pkg.extras['import_source'].startswith('ONS'), '%s %s' % (pkg.name, pkg.extras['import_source'])
Пример #54
0
    def command(self):
        from ckanext.dgu.ons.downloader import OnsData, ONS_DEFAULT_CACHE_PATH
        from ckanext.dgu.ons.importer import OnsImporter
        from ckanext.dgu.ons.loader import OnsLoader

        ApiCommand.command(self)
        log = logging.getLogger(__name__)

        try:
            if self.options.days:
                self.options.days = int(self.options.days)
            if self.options.start_date:
                self.options.start_date = self.parse_date(
                    self.options.start_date)
            if self.options.end_date:
                self.options.end_date = self.parse_date(self.options.end_date)
            if self.options.month:
                self.options.month = self.parse_month(self.options.month)
            if self.options.months_since:
                self.options.months_since = self.parse_month(
                    self.options.months_since)
            if not self.options.ons_cache_dir:
                self.options.ons_cache_dir = ONS_DEFAULT_CACHE_PATH

            if self.options.days or \
                self.options.start_date or \
                self.options.end_date:
                data_filepaths = OnsData.download_flexible(
                    days=self.options.days,
                    start_date=self.options.start_date,
                    end_date=self.options.end_date,
                    ons_cache_dir=self.options.ons_cache_dir)

            elif self.options.month:
                data_filepaths = OnsData.download_month(
                    year=self.options.month.year,
                    month=self.options.month.month)
            elif self.options.months_since:
                data_filepaths = OnsData.download_months_since(
                    year=self.options.months_since.year,
                    month=self.options.months_since.month,
                    force_download=self.options.force_download)
            elif self.options.all_time:
                data_filepaths = OnsData.download_all(
                    force_download=self.options.force_download)
            else:
                self.parser.error('Please specify a time period')

            filter_ = {}
            if self.options.publisher:
                filter_['publisher'] = self.options.publisher

            stats = StatsList()
            importer = OnsImporter(filepaths=data_filepaths,
                                   ckanclient=self.client,
                                   stats=stats,
                                   filter_=filter_)
            loader = OnsLoader(self.client, stats)

            loader.load_packages(importer.pkg_dict())
            log.info('Summary:\n' + stats.report())
        except:
            # Any problem, make sure it gets logged
            log.exception('ONS Loader exception')
            raise