def assertDataTree(self, dataset_code): settings = self.DATASETS[dataset_code] dsd = settings["DSD"] data_tree = self.fetcher.build_data_tree() if self.is_debug: print("------ DATA TREE LOCAL ---------") pprint(data_tree) results = self.fetcher.upsert_data_tree() self.assertIsNotNone(results) data_tree = self.db[constants.COL_CATEGORIES].find({"provider_name": self.fetcher.provider_name}) if self.is_debug: print("------ DATA TREE FROM DB -------") pprint(list(data_tree)) datasets = self.fetcher.datasets_list() if self.is_debug: print("------DATASET LIST--------") pprint(datasets) self.assertEqual(datasets[0]["dataset_code"], self.DATASET_FIRST) self.assertEqual(datasets[-1]["dataset_code"], self.DATASET_LAST) category = Categories.search_category_for_dataset(self.fetcher.provider_name, dataset_code, db=self.db) self.assertIsNotNone(category) self.assertEqual(category["category_code"], dsd["categories_key"]) query = {"provider_name": self.fetcher.provider_name, "datasets.dataset_code": dataset_code} dataset_category = self.db[constants.COL_CATEGORIES].find_one(query) self.assertIsNotNone(dataset_category) self.assertEqual(dataset_category["all_parents"], dsd["categories_parents"]) roots = Categories.root_categories(self.fetcher.provider_name, db=self.db) self.assertEqual(roots.count(), len(dsd["categories_root"])) root_codes = [r["category_code"] for r in roots] if self.is_debug: print("ROOTS : ", sorted(root_codes)) self.assertEqual(sorted(root_codes), dsd["categories_root"])
def assertDataTree(self, dataset_code): settings = self.DATASETS[dataset_code] dsd = settings["DSD"] data_tree = self.fetcher.build_data_tree() if self.is_debug: print("------ DATA TREE LOCAL ---------") pprint(data_tree) results = self.fetcher.upsert_data_tree(data_tree=data_tree, force_update=False) self.assertIsNotNone(results) data_tree = self.db[constants.COL_CATEGORIES].find( {"provider_name": self.fetcher.provider_name}) if self.is_debug: print("------ DATA TREE FROM DB -------") pprint(list(data_tree)) datasets = self.fetcher.datasets_list() if self.is_debug: print("------DATASET LIST--------") pprint(datasets) self.assertEqual(datasets[0]["dataset_code"], self.DATASET_FIRST) self.assertEqual(datasets[-1]["dataset_code"], self.DATASET_LAST) category = Categories.search_category_for_dataset( self.fetcher.provider_name, dataset_code, db=self.db) self.assertIsNotNone(category) self.assertEqual(category["category_code"], dsd["categories_key"]) query = { "provider_name": self.fetcher.provider_name, "datasets.dataset_code": dataset_code } dataset_category = self.db[constants.COL_CATEGORIES].find_one(query) self.assertIsNotNone(dataset_category) self.assertEqual(dataset_category["all_parents"], dsd["categories_parents"]) roots = Categories.root_categories(self.fetcher.provider_name, db=self.db) root_codes = [r["category_code"] for r in roots] if self.is_debug: print("ROOTS : ", sorted(root_codes)) self.assertEqual(sorted(root_codes), dsd["categories_root"])
def build_data_tree(self): """Build data_tree from ESRI site parsing """ categories = [] def make_node(data, parent_key=None): _category = { "name": data['name'], "category_code": data['category_code'], "parent": parent_key, "all_parents": [], "datasets": [] } if parent_key: _category['category_code'] = "%s.%s" % ( parent_key, _category['category_code']) _category_key = _category['category_code'] if 'children' in data: for c in data['children']: make_node(c, _category_key) if 'datasets' in data: for d in data['datasets']: _dataset = { "dataset_code": d['dataset_code'], "name": d['name'], "last_update": d['release_date'], "metadata": { 'url': d['url'], 'doc_href': d['doc_href'] } } _category["datasets"].append(_dataset) categories.append(_category) try: for data in parse_esri_site(): make_node(data) except Exception as err: logger.error(err) raise _categories = dict([(doc["category_code"], doc) for doc in categories]) for c in categories: parents = Categories.iter_parent(c, _categories) c["all_parents"] = parents return categories
def build_data_tree(self): """Build data_tree from ESRI site parsing """ categories = [] def make_node(data, parent_key=None): _category = { "name": data['name'], "category_code": data['category_code'], "parent": parent_key, "all_parents": [], "datasets": [] } if parent_key: _category['category_code'] = "%s.%s" % (parent_key, _category['category_code']) _category_key = _category['category_code'] if 'children' in data: for c in data['children']: make_node(c, _category_key) if 'datasets' in data: for d in data['datasets']: _dataset = { "dataset_code": d['dataset_code'], "name": d['name'], "last_update": d['release_date'], "metadata": { 'url': d['url'], 'doc_href': d['doc_href'] } } _category["datasets"].append(_dataset) categories.append(_category) try: for data in parse_esri_site(): make_node(data) except Exception as err: logger.error(err) raise _categories = dict([(doc["category_code"], doc) for doc in categories]) for c in categories: parents = Categories.iter_parent(c, _categories) c["all_parents"] = parents return categories
def test_upsert_dataset_nama_10_fcs(self): # nosetests -s -v dlstats.tests.fetchers.test_eurostat:FetcherTestCase.test_upsert_dataset_nama_10_fcs httpretty.enable() dataset_code = "nama_10_fcs" self.DATASETS[dataset_code]["DSD"].update(LOCAL_DATASETS_UPDATE[dataset_code]) self._load_files_datatree(TOC_FP) self._load_files(dataset_code) self.assertProvider() self.assertDataset(dataset_code) self.assertSeries(dataset_code) '''Reload upsert_dataset for normal fail''' with self.assertRaises(RejectUpdatedDataset) as err: self.fetcher.upsert_dataset(dataset_code) self.assertEqual(err.exception.comments, "update-date[2015-10-26 00:00:00]") '''Verify last_update in category for this dataset''' category = Categories.search_category_for_dataset(self.fetcher.provider_name, dataset_code, self.db) self.assertIsNotNone(category) last_update = None for d in category["datasets"]: if d["dataset_code"] == dataset_code: last_update = d["last_update"] self.assertIsNotNone(last_update) self.assertEqual(str(last_update), "2015-10-26 00:00:00") last_update = None httpretty.reset() httpretty.disable() httpretty.enable() self._load_files(dataset_code) '''Change last_update in catalog.xml for force update dataset''' with open(TOC_FP, 'rb') as fp: toc = fp.read() self.assertFalse(b'27.10.2015' in toc) toc = toc.replace(b'26.10.2015', b'27.10.2015') self.assertTrue(b'27.10.2015' in toc) self._load_files_datatree(toc=toc) self.fetcher.provider.metadata["creation_date"] = datetime.datetime(1900, 1 , 1) results = self.fetcher.upsert_data_tree(force_update=True) self.assertIsNotNone(results) self.fetcher.get_selected_datasets(force=True) query = { 'provider_name': self.fetcher.provider_name, "dataset_code": dataset_code } _id = self.db[constants.COL_SERIES].find_one()["_id"] deleted = self.db[constants.COL_SERIES].delete_one({"_id": _id}) self.assertEqual(deleted.deleted_count, 1) dataset_settings = self.fetcher.selected_datasets[dataset_code] self.assertEqual(dataset_settings["last_update"], datetime.datetime(2015, 10, 27, 0, 0)) result = self.fetcher.upsert_dataset(dataset_code) self.assertIsNotNone(result) #_id du dataset dataset = self.db[constants.COL_DATASETS].find_one(query) self.assertIsNotNone(dataset) self.assertEqual(dataset["last_update"], datetime.datetime(2015, 10, 27, 0, 0)) #self.assertEqual(dataset["download_last"], # datetime.datetime(2015, 10, 27, 0, 0)) httpretty.disable()
def test_upsert_dataset_nama_10_fcs(self): # nosetests -s -v dlstats.tests.fetchers.test_eurostat:FetcherTestCase.test_upsert_dataset_nama_10_fcs httpretty.enable() dataset_code = "nama_10_fcs" self.DATASETS[dataset_code]["DSD"].update( LOCAL_DATASETS_UPDATE[dataset_code]) self._load_files_datatree(TOC_FP) self._load_files(dataset_code) self.assertProvider() self.assertDataset(dataset_code) self.assertSeries(dataset_code) '''Reload upsert_dataset for normal fail''' with self.assertRaises(RejectUpdatedDataset) as err: self.fetcher.upsert_dataset(dataset_code) self.assertEqual(err.exception.comments, "update-date[2015-10-26 00:00:00]") '''Verify last_update in category for this dataset''' category = Categories.search_category_for_dataset( self.fetcher.provider_name, dataset_code, self.db) self.assertIsNotNone(category) last_update = None for d in category["datasets"]: if d["dataset_code"] == dataset_code: last_update = d["last_update"] self.assertIsNotNone(last_update) self.assertEqual(str(last_update), "2015-10-26 00:00:00") last_update = None httpretty.reset() httpretty.disable() httpretty.enable() self._load_files(dataset_code) '''Change last_update in catalog.xml for force update dataset''' with open(TOC_FP, 'rb') as fp: toc = fp.read() self.assertFalse(b'27.10.2015' in toc) toc = toc.replace(b'26.10.2015', b'27.10.2015') self.assertTrue(b'27.10.2015' in toc) self._load_files_datatree(toc=toc) self.fetcher.provider.metadata["creation_date"] = datetime.datetime( 1900, 1, 1) results = self.fetcher.upsert_data_tree(force_update=True) self.assertIsNotNone(results) self.fetcher.get_selected_datasets(force=True) query = { 'provider_name': self.fetcher.provider_name, "dataset_code": dataset_code } _id = self.db[constants.COL_SERIES].find_one()["_id"] deleted = self.db[constants.COL_SERIES].delete_one({"_id": _id}) self.assertEqual(deleted.deleted_count, 1) dataset_settings = self.fetcher.selected_datasets[dataset_code] self.assertEqual(dataset_settings["last_update"], datetime.datetime(2015, 10, 27, 0, 0)) result = self.fetcher.upsert_dataset(dataset_code) self.assertIsNotNone(result) #_id du dataset dataset = self.db[constants.COL_DATASETS].find_one(query) self.assertIsNotNone(dataset) self.assertEqual(dataset["last_update"], datetime.datetime(2015, 10, 27, 0, 0)) #self.assertEqual(dataset["download_last"], # datetime.datetime(2015, 10, 27, 0, 0)) httpretty.disable()