Пример #1
0
    def assertDataTree(self, dataset_code):

        settings = self.DATASETS[dataset_code]
        dsd = settings["DSD"]
        
        data_tree = self.fetcher.build_data_tree()
        if self.is_debug:
            print("------ DATA TREE LOCAL ---------")
            pprint(data_tree)
        
        results = self.fetcher.upsert_data_tree()
        self.assertIsNotNone(results)
        
        data_tree = self.db[constants.COL_CATEGORIES].find({"provider_name": 
                                                            self.fetcher.provider_name})
        if self.is_debug:
            print("------ DATA TREE FROM DB -------")
            pprint(list(data_tree))

        datasets = self.fetcher.datasets_list()
        
        if self.is_debug:
            print("------DATASET LIST--------")
            pprint(datasets)

        self.assertEqual(datasets[0]["dataset_code"], self.DATASET_FIRST)
        self.assertEqual(datasets[-1]["dataset_code"], self.DATASET_LAST)
        
        category = Categories.search_category_for_dataset(self.fetcher.provider_name,
                                                          dataset_code, 
                                                          db=self.db)
        self.assertIsNotNone(category)
        self.assertEqual(category["category_code"], dsd["categories_key"])
        
        query = {"provider_name": self.fetcher.provider_name,
                 "datasets.dataset_code": dataset_code}
        
        dataset_category = self.db[constants.COL_CATEGORIES].find_one(query)
        self.assertIsNotNone(dataset_category)
        
        self.assertEqual(dataset_category["all_parents"], 
                         dsd["categories_parents"]) 
        
        roots = Categories.root_categories(self.fetcher.provider_name,
                                           db=self.db)
        self.assertEqual(roots.count(), 
                         len(dsd["categories_root"])) 
        
        root_codes = [r["category_code"] for r in roots]
        
        if self.is_debug:
            print("ROOTS : ", sorted(root_codes))
        
        self.assertEqual(sorted(root_codes),
                         dsd["categories_root"])
Пример #2
0
    def assertDataTree(self, dataset_code):

        settings = self.DATASETS[dataset_code]
        dsd = settings["DSD"]

        data_tree = self.fetcher.build_data_tree()
        if self.is_debug:
            print("------ DATA TREE LOCAL ---------")
            pprint(data_tree)

        results = self.fetcher.upsert_data_tree(data_tree=data_tree,
                                                force_update=False)
        self.assertIsNotNone(results)

        data_tree = self.db[constants.COL_CATEGORIES].find(
            {"provider_name": self.fetcher.provider_name})
        if self.is_debug:
            print("------ DATA TREE FROM DB -------")
            pprint(list(data_tree))

        datasets = self.fetcher.datasets_list()

        if self.is_debug:
            print("------DATASET LIST--------")
            pprint(datasets)

        self.assertEqual(datasets[0]["dataset_code"], self.DATASET_FIRST)
        self.assertEqual(datasets[-1]["dataset_code"], self.DATASET_LAST)

        category = Categories.search_category_for_dataset(
            self.fetcher.provider_name, dataset_code, db=self.db)
        self.assertIsNotNone(category)
        self.assertEqual(category["category_code"], dsd["categories_key"])

        query = {
            "provider_name": self.fetcher.provider_name,
            "datasets.dataset_code": dataset_code
        }

        dataset_category = self.db[constants.COL_CATEGORIES].find_one(query)
        self.assertIsNotNone(dataset_category)

        self.assertEqual(dataset_category["all_parents"],
                         dsd["categories_parents"])

        roots = Categories.root_categories(self.fetcher.provider_name,
                                           db=self.db)

        root_codes = [r["category_code"] for r in roots]

        if self.is_debug:
            print("ROOTS : ", sorted(root_codes))

        self.assertEqual(sorted(root_codes), dsd["categories_root"])
Пример #3
0
    def build_data_tree(self):
        """Build data_tree from ESRI site parsing
        """

        categories = []

        def make_node(data, parent_key=None):
            _category = {
                "name": data['name'],
                "category_code": data['category_code'],
                "parent": parent_key,
                "all_parents": [],
                "datasets": []
            }
            if parent_key:
                _category['category_code'] = "%s.%s" % (
                    parent_key, _category['category_code'])

            _category_key = _category['category_code']

            if 'children' in data:
                for c in data['children']:
                    make_node(c, _category_key)

            if 'datasets' in data:
                for d in data['datasets']:
                    _dataset = {
                        "dataset_code": d['dataset_code'],
                        "name": d['name'],
                        "last_update": d['release_date'],
                        "metadata": {
                            'url': d['url'],
                            'doc_href': d['doc_href']
                        }
                    }
                    _category["datasets"].append(_dataset)

            categories.append(_category)

        try:
            for data in parse_esri_site():
                make_node(data)
        except Exception as err:
            logger.error(err)
            raise

        _categories = dict([(doc["category_code"], doc) for doc in categories])

        for c in categories:
            parents = Categories.iter_parent(c, _categories)
            c["all_parents"] = parents

        return categories
Пример #4
0
    def build_data_tree(self):
        """Build data_tree from ESRI site parsing
        """

        categories = []
        
        def make_node(data, parent_key=None):
            _category = {
                "name": data['name'],
                "category_code": data['category_code'],
                "parent": parent_key,
                "all_parents": [],
                "datasets": []
            }
            if parent_key:
                _category['category_code'] = "%s.%s" % (parent_key, _category['category_code'])
            
            _category_key = _category['category_code']
            
            if 'children' in data:
                for c in data['children']:
                    make_node(c, _category_key)
            
            if 'datasets' in data:
                for d in data['datasets']:
                    _dataset = {
                        "dataset_code": d['dataset_code'],
                        "name": d['name'],
                        "last_update": d['release_date'],
                        "metadata": {
                            'url': d['url'], 
                            'doc_href': d['doc_href']
                        }
                    }                    
                    _category["datasets"].append(_dataset)
                    
            categories.append(_category)
        
        try:
            for data in parse_esri_site():
                make_node(data)
        except Exception as err:
            logger.error(err)   
            raise
        
        _categories = dict([(doc["category_code"], doc) for doc in categories])
        
        for c in categories:
            parents = Categories.iter_parent(c, _categories)
            c["all_parents"] = parents

        return categories
Пример #5
0
    def test_upsert_dataset_nama_10_fcs(self):
        
        # nosetests -s -v dlstats.tests.fetchers.test_eurostat:FetcherTestCase.test_upsert_dataset_nama_10_fcs
        
        httpretty.enable()

        dataset_code = "nama_10_fcs"
        self.DATASETS[dataset_code]["DSD"].update(LOCAL_DATASETS_UPDATE[dataset_code])
        self._load_files_datatree(TOC_FP)
        self._load_files(dataset_code)
        
        self.assertProvider()
        self.assertDataset(dataset_code)
        self.assertSeries(dataset_code)
        
        '''Reload upsert_dataset for normal fail'''
        with self.assertRaises(RejectUpdatedDataset) as err:
            self.fetcher.upsert_dataset(dataset_code)
        self.assertEqual(err.exception.comments, 
                         "update-date[2015-10-26 00:00:00]")

        '''Verify last_update in category for this dataset'''
        category = Categories.search_category_for_dataset(self.fetcher.provider_name, 
                                                          dataset_code, self.db)
        self.assertIsNotNone(category)
        last_update = None
        for d in category["datasets"]:
            if d["dataset_code"] == dataset_code:
                last_update = d["last_update"]
        self.assertIsNotNone(last_update)
        self.assertEqual(str(last_update), "2015-10-26 00:00:00")
        last_update = None
        
        httpretty.reset()
        httpretty.disable()
        httpretty.enable()
        self._load_files(dataset_code)

        '''Change last_update in catalog.xml for force update dataset'''
        with open(TOC_FP, 'rb') as fp:
            toc = fp.read()   
        self.assertFalse(b'27.10.2015' in toc)
        toc = toc.replace(b'26.10.2015', b'27.10.2015')
        self.assertTrue(b'27.10.2015' in toc)
        self._load_files_datatree(toc=toc)
        self.fetcher.provider.metadata["creation_date"] = datetime.datetime(1900, 1 , 1)
        results = self.fetcher.upsert_data_tree(force_update=True)
        self.assertIsNotNone(results)
        self.fetcher.get_selected_datasets(force=True)

        query = {
            'provider_name': self.fetcher.provider_name,
            "dataset_code": dataset_code
        }
        _id = self.db[constants.COL_SERIES].find_one()["_id"]
        deleted = self.db[constants.COL_SERIES].delete_one({"_id": _id})
        self.assertEqual(deleted.deleted_count, 1)

        dataset_settings = self.fetcher.selected_datasets[dataset_code]
        self.assertEqual(dataset_settings["last_update"],
                         datetime.datetime(2015, 10, 27, 0, 0))
        
        result = self.fetcher.upsert_dataset(dataset_code)
        self.assertIsNotNone(result) #_id du dataset
        dataset = self.db[constants.COL_DATASETS].find_one(query)
        self.assertIsNotNone(dataset)
        
        self.assertEqual(dataset["last_update"],
                         datetime.datetime(2015, 10, 27, 0, 0))

        #self.assertEqual(dataset["download_last"],
        #                 datetime.datetime(2015, 10, 27, 0, 0))

        httpretty.disable()
Пример #6
0
    def test_upsert_dataset_nama_10_fcs(self):

        # nosetests -s -v dlstats.tests.fetchers.test_eurostat:FetcherTestCase.test_upsert_dataset_nama_10_fcs

        httpretty.enable()

        dataset_code = "nama_10_fcs"
        self.DATASETS[dataset_code]["DSD"].update(
            LOCAL_DATASETS_UPDATE[dataset_code])
        self._load_files_datatree(TOC_FP)
        self._load_files(dataset_code)

        self.assertProvider()
        self.assertDataset(dataset_code)
        self.assertSeries(dataset_code)
        '''Reload upsert_dataset for normal fail'''
        with self.assertRaises(RejectUpdatedDataset) as err:
            self.fetcher.upsert_dataset(dataset_code)
        self.assertEqual(err.exception.comments,
                         "update-date[2015-10-26 00:00:00]")
        '''Verify last_update in category for this dataset'''
        category = Categories.search_category_for_dataset(
            self.fetcher.provider_name, dataset_code, self.db)
        self.assertIsNotNone(category)
        last_update = None
        for d in category["datasets"]:
            if d["dataset_code"] == dataset_code:
                last_update = d["last_update"]
        self.assertIsNotNone(last_update)
        self.assertEqual(str(last_update), "2015-10-26 00:00:00")
        last_update = None

        httpretty.reset()
        httpretty.disable()
        httpretty.enable()
        self._load_files(dataset_code)
        '''Change last_update in catalog.xml for force update dataset'''
        with open(TOC_FP, 'rb') as fp:
            toc = fp.read()
        self.assertFalse(b'27.10.2015' in toc)
        toc = toc.replace(b'26.10.2015', b'27.10.2015')
        self.assertTrue(b'27.10.2015' in toc)
        self._load_files_datatree(toc=toc)
        self.fetcher.provider.metadata["creation_date"] = datetime.datetime(
            1900, 1, 1)
        results = self.fetcher.upsert_data_tree(force_update=True)
        self.assertIsNotNone(results)
        self.fetcher.get_selected_datasets(force=True)

        query = {
            'provider_name': self.fetcher.provider_name,
            "dataset_code": dataset_code
        }
        _id = self.db[constants.COL_SERIES].find_one()["_id"]
        deleted = self.db[constants.COL_SERIES].delete_one({"_id": _id})
        self.assertEqual(deleted.deleted_count, 1)

        dataset_settings = self.fetcher.selected_datasets[dataset_code]
        self.assertEqual(dataset_settings["last_update"],
                         datetime.datetime(2015, 10, 27, 0, 0))

        result = self.fetcher.upsert_dataset(dataset_code)
        self.assertIsNotNone(result)  #_id du dataset
        dataset = self.db[constants.COL_DATASETS].find_one(query)
        self.assertIsNotNone(dataset)

        self.assertEqual(dataset["last_update"],
                         datetime.datetime(2015, 10, 27, 0, 0))

        #self.assertEqual(dataset["download_last"],
        #                 datetime.datetime(2015, 10, 27, 0, 0))

        httpretty.disable()