class EurostatData(SeriesIterator): def __init__(self, dataset): super().__init__(dataset) self.dataset_url = make_url(self.dataset_code) self.xml_dsd = XMLStructure(provider_name=self.provider_name) self.xml_dsd.concepts = self.fetcher._concepts self.xml_dsd.codelists = self.fetcher._codelists self.store_path = self.get_store_path() self._load() def _load(self): download = Downloader(url=self.dataset_url, filename="data-%s.zip" % self.dataset_code, store_filepath=self.store_path, use_existing_file=self.fetcher.use_existing_file) filepaths = (extract_zip_file(download.get_filepath())) dsd_fp = filepaths[self.dataset_code + ".dsd.xml"] data_fp = filepaths[self.dataset_code + ".sdmx.xml"] self.fetcher.for_delete.append(dsd_fp) self.fetcher.for_delete.append(data_fp) self.xml_dsd.process(dsd_fp) self._set_dataset() self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dataset_code, #TODO: frequencies_supported=FREQUENCIES_SUPPORTED ) self.rows = self.xml_data.process(data_fp) def _set_dataset(self): dataset = dataset_converter(self.xml_dsd, self.dataset_code) self.dataset.dimension_keys = dataset["dimension_keys"] self.dataset.attribute_keys = dataset["attribute_keys"] self.dataset.concepts = dataset["concepts"] self.dataset.codelists = dataset["codelists"] def build_series(self, bson): self.dataset.add_frequency(bson["frequency"]) bson["last_update"] = self.dataset.last_update return bson
class EurostatData(SeriesIterator): def __init__(self, dataset): super().__init__(dataset) self.dataset_url = make_url(self.dataset_code) self.xml_dsd = XMLStructure(provider_name=self.provider_name) self.store_path = self.get_store_path() self._load() def _load(self): download = Downloader(url=self.dataset_url, filename="data-%s.zip" % self.dataset_code, store_filepath=self.store_path, use_existing_file=self.fetcher.use_existing_file) filepaths = (extract_zip_file(download.get_filepath())) dsd_fp = filepaths[self.dataset_code + ".dsd.xml"] data_fp = filepaths[self.dataset_code + ".sdmx.xml"] self.fetcher.for_delete.append(dsd_fp) self.fetcher.for_delete.append(data_fp) self.xml_dsd.process(dsd_fp) self._set_dataset() self.xml_data = XMLData( provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dataset_code, #TODO: frequencies_supported=FREQUENCIES_SUPPORTED ) self.rows = self.xml_data.process(data_fp) def _set_dataset(self): dataset = dataset_converter(self.xml_dsd, self.dataset_code) self.dataset.dimension_keys = dataset["dimension_keys"] self.dataset.attribute_keys = dataset["attribute_keys"] self.dataset.concepts = dataset["concepts"] self.dataset.codelists = dataset["codelists"] def build_series(self, bson): self.dataset.add_frequency(bson["frequency"]) bson["last_update"] = self.dataset.last_update return bson
def __init__(self, dataset=None): super().__init__(dataset) self.store_path = self.get_store_path() self.xml_dsd = XMLStructure(provider_name=self.provider_name) self._load_dsd() if self.dataset.last_update and self.xml_dsd.last_update: if self.dataset.last_update > self.xml_dsd.last_update: comments = "update-date[%s]" % self.xml_dsd.last_update raise errors.RejectUpdatedDataset(provider_name=self.provider_name, dataset_code=self.dataset.dataset_code, comments=comments) self.dataset.last_update = clean_datetime(self.xml_dsd.last_update) self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dataset_code, frequencies_supported=FREQUENCIES_SUPPORTED) self.rows = self._get_data_by_dimension()
def __init__(self, dataset=None): super().__init__(dataset) self.store_path = self.get_store_path() self.xml_dsd = XMLStructure(provider_name=self.provider_name) self._load_dsd() if self.dataset.last_update and self.xml_dsd.last_update: if self.dataset.last_update > self.xml_dsd.last_update: comments = "update-date[%s]" % self.xml_dsd.last_update raise errors.RejectUpdatedDataset( provider_name=self.provider_name, dataset_code=self.dataset.dataset_code, comments=comments) self.dataset.last_update = clean_datetime(self.xml_dsd.last_update) self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dataset_code, frequencies_supported=FREQUENCIES_SUPPORTED) self.rows = self._get_data_by_dimension()
def __init__(self, dataset): super().__init__(dataset) self.dataset_url = make_url(self.dataset_code) self.xml_dsd = XMLStructure(provider_name=self.provider_name) self.store_path = self.get_store_path() self._load()
def __init__(self, dataset=None, sdmx_filter=None): super().__init__(dataset) self.real_dataset_code = self.dataset_code self.sdmx_filter = sdmx_filter self.store_path = self.get_store_path() self.xml_dsd = XMLStructure(provider_name=self.provider_name) self._load_dsd() self.rows = self._get_data_by_dimension()
class OECD_Data(SeriesIterator): def __init__(self, dataset=None, sdmx_filter=None): super().__init__(dataset) self.real_dataset_code = self.dataset_code self.sdmx_filter = sdmx_filter self.store_path = self.get_store_path() self.xml_dsd = XMLStructure(provider_name=self.provider_name) self._load_dsd() self.rows = self._get_data_by_dimension() def _get_url_dsd(self): """ if self.dataset_code == "EO": self.dataset_code = "EO95_LTB" return "http://stats.oecd.org/restsdmx/sdmx.ashx/GetDataStructure/EO95_LTB" """ return "http://stats.oecd.org/restsdmx/sdmx.ashx/GetDataStructure/%s" % self.dataset_code def _get_url_data(self): """ if self.dataset_code == "EO": self.dataset_code = "EO95_LTB" return "http://stats.oecd.org/restsdmx/sdmx.ashx/GetData/EO95_LTB" db.datasets.bulkWrite([{ deleteMany: { "filter" : {provider_name: "OECD", dataset_code: "EO"} } }], { ordered : false }) db.series.bulkWrite([{ deleteMany: { "filter" : {provider_name: "OECD", dataset_code: "EO"} } }], { ordered : false }) """ return "http://stats.oecd.org/restsdmx/sdmx.ashx/GetData/%s" % self.dataset_code def _load_dsd(self): url = self._get_url_dsd() download = Downloader(store_filepath=self.store_path, url=url, filename="dsd-%s.xml" % self.dataset_code, use_existing_file=self.fetcher.use_existing_file, client=self.fetcher.requests_client) filepath = download.get_filepath() self.fetcher.for_delete.append(filepath) self.xml_dsd.process(filepath) self._set_dataset() def _set_dataset(self): dataset = dataset_converter(self.xml_dsd, self.dataset_code) self.dataset.dimension_keys = dataset["dimension_keys"] self.dataset.attribute_keys = dataset["attribute_keys"] self.dataset.concepts = dataset["concepts"] self.dataset.codelists = dataset["codelists"] def _get_dimensions_from_dsd(self): return get_dimensions_from_dsd(self.xml_dsd, self.provider_name, self.dataset_code) def _get_data_by_dimension(self): self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dataset_code, frequencies_supported=FREQUENCIES_SUPPORTED) dimension_keys, dimensions = self._get_dimensions_from_dsd() position, _key, dimension_values = select_dimension(dimension_keys, dimensions, choice="max") count_dimensions = len(dimension_keys) for dimension_value in dimension_values: sdmx_key = [] for i in range(count_dimensions): if i == position: sdmx_key.append(dimension_value) else: sdmx_key.append(".") key = "".join(sdmx_key) url = "%s/%s" % (self._get_url_data(), key) filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_")) download = Downloader(url=url, filename=filename, store_filepath=self.store_path, client=self.fetcher.requests_client ) filepath, response = download.get_filepath_and_response() if filepath: self.fetcher.for_delete.append(filepath) if response.status_code >= 400 and response.status_code < 500: continue elif response.status_code >= 500: raise response.raise_for_status() for row, err in self.xml_data.process(filepath): yield row, err #self.dataset.update_database(save_only=True) yield None, None def build_series(self, bson): bson["last_update"] = self.dataset.last_update bson["dataset_code"] = self.real_dataset_code self.dataset.add_frequency(bson["frequency"]) return bson
class IMF_XML_Data(SeriesIterator): def __init__(self, dataset=None): super().__init__(dataset) self.store_path = self.get_store_path() self.xml_dsd = XMLStructure(provider_name=self.provider_name) self._load_dsd() if self.dataset.last_update and self.xml_dsd.last_update: if self.dataset.last_update > self.xml_dsd.last_update: comments = "update-date[%s]" % self.xml_dsd.last_update raise errors.RejectUpdatedDataset(provider_name=self.provider_name, dataset_code=self.dataset.dataset_code, comments=comments) self.dataset.last_update = clean_datetime(self.xml_dsd.last_update) self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dataset_code, frequencies_supported=FREQUENCIES_SUPPORTED) self.rows = self._get_data_by_dimension() def _get_url_dsd(self): return "http://dataservices.imf.org/REST/SDMX_XML.svc/DataStructure/%s" % self.dataset_code def _get_url_data(self): return "http://dataservices.imf.org/REST/SDMX_XML.svc/CompactData/%s" % self.dataset_code def _load_dsd(self): url = self._get_url_dsd() download = Downloader(store_filepath=self.store_path, url=url, filename="dsd-%s.xml" % self.dataset_code, use_existing_file=self.fetcher.use_existing_file, client=self.fetcher.requests_client) filepath = download.get_filepath() self.fetcher.for_delete.append(filepath) self.xml_dsd.process(filepath) self._set_dataset() def _set_dataset(self): dataset = dataset_converter(self.xml_dsd, self.dataset_code) self.dataset.dimension_keys = dataset["dimension_keys"] self.dataset.attribute_keys = dataset["attribute_keys"] self.dataset.concepts = dataset["concepts"] self.dataset.codelists = dataset["codelists"] def _get_dimensions_from_dsd(self): return get_dimensions_from_dsd(self.xml_dsd, self.provider_name, self.dataset_code) def _get_data_by_dimension(self): dimension_keys, dimensions = self._get_dimensions_from_dsd() position, _key, dimension_values = select_dimension(dimension_keys, dimensions, choice="max") count_dimensions = len(dimension_keys) for dimension_value in dimension_values: '''Pour chaque valeur de la dimension, generer une key d'url''' local_count = 0 sdmx_key = [] for i in range(count_dimensions): if i == position: sdmx_key.append(dimension_value) else: sdmx_key.append(".") key = "".join(sdmx_key) url = "%s/%s" % (self._get_url_data(), key) filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_")) download = Downloader(url=url, filename=filename, store_filepath=self.store_path, client=self.fetcher.requests_client) filepath, response = download.get_filepath_and_response() if filepath: self.fetcher.for_delete.append(filepath) if response.status_code >= 400 and response.status_code < 500: continue elif response.status_code >= 500: raise response.raise_for_status() for row, err in self.xml_data.process(filepath): yield row, err local_count += 1 if local_count >= 2999: logger.warning("TODO: VRFY - series > 2999 for provider[IMF] - dataset[%s] - key[%s]" % (self.dataset_code, key)) #self.dataset.update_database(save_only=True) yield None, None def build_series(self, bson): bson["last_update"] = self.dataset.last_update self.dataset.add_frequency(bson["frequency"]) return bson
class IMF_XML_Data(SeriesIterator): def __init__(self, dataset=None): super().__init__(dataset) self.store_path = self.get_store_path() self.xml_dsd = XMLStructure(provider_name=self.provider_name) self._load_dsd() if self.dataset.last_update and self.xml_dsd.last_update: if self.dataset.last_update > self.xml_dsd.last_update: comments = "update-date[%s]" % self.xml_dsd.last_update raise errors.RejectUpdatedDataset( provider_name=self.provider_name, dataset_code=self.dataset.dataset_code, comments=comments) self.dataset.last_update = clean_datetime(self.xml_dsd.last_update) self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dataset_code, frequencies_supported=FREQUENCIES_SUPPORTED) self.rows = self._get_data_by_dimension() def _get_url_dsd(self): return "http://dataservices.imf.org/REST/SDMX_XML.svc/DataStructure/%s" % self.dataset_code def _get_url_data(self): return "http://dataservices.imf.org/REST/SDMX_XML.svc/CompactData/%s" % self.dataset_code def _load_dsd(self): url = self._get_url_dsd() download = Downloader(store_filepath=self.store_path, url=url, filename="dsd-%s.xml" % self.dataset_code, use_existing_file=self.fetcher.use_existing_file, client=self.fetcher.requests_client) filepath = download.get_filepath() self.fetcher.for_delete.append(filepath) self.xml_dsd.process(filepath) self._set_dataset() def _set_dataset(self): dataset = dataset_converter(self.xml_dsd, self.dataset_code) self.dataset.dimension_keys = dataset["dimension_keys"] self.dataset.attribute_keys = dataset["attribute_keys"] self.dataset.concepts = dataset["concepts"] self.dataset.codelists = dataset["codelists"] def _get_dimensions_from_dsd(self): return get_dimensions_from_dsd(self.xml_dsd, self.provider_name, self.dataset_code) def _get_data_by_dimension(self): dimension_keys, dimensions = self._get_dimensions_from_dsd() position, _key, dimension_values = select_dimension(dimension_keys, dimensions, choice="max") count_dimensions = len(dimension_keys) for dimension_value in dimension_values: '''Pour chaque valeur de la dimension, generer une key d'url''' local_count = 0 sdmx_key = [] for i in range(count_dimensions): if i == position: sdmx_key.append(dimension_value) else: sdmx_key.append(".") key = "".join(sdmx_key) url = "%s/%s" % (self._get_url_data(), key) filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_")) download = Downloader(url=url, filename=filename, store_filepath=self.store_path, client=self.fetcher.requests_client) filepath, response = download.get_filepath_and_response() if filepath: self.fetcher.for_delete.append(filepath) if response.status_code >= 400 and response.status_code < 500: continue elif response.status_code >= 500: raise response.raise_for_status() for row, err in self.xml_data.process(filepath): yield row, err local_count += 1 if local_count >= 2999: logger.warning( "TODO: VRFY - series > 2999 for provider[IMF] - dataset[%s] - key[%s]" % (self.dataset_code, key)) #self.dataset.update_database(save_only=True) yield None, None def build_series(self, bson): bson["last_update"] = self.dataset.last_update self.dataset.add_frequency(bson["frequency"]) return bson
class OECD_Data(SeriesIterator): def __init__(self, dataset=None, sdmx_filter=None): super().__init__(dataset) self.real_dataset_code = self.dataset_code self.sdmx_filter = sdmx_filter self.store_path = self.get_store_path() self.xml_dsd = XMLStructure(provider_name=self.provider_name) self._load_dsd() self.rows = self._get_data_by_dimension() def _get_url_dsd(self): """ if self.dataset_code == "EO": self.dataset_code = "EO95_LTB" return "http://stats.oecd.org/restsdmx/sdmx.ashx/GetDataStructure/EO95_LTB" """ return "http://stats.oecd.org/restsdmx/sdmx.ashx/GetDataStructure/%s" % self.dataset_code def _get_url_data(self): """ if self.dataset_code == "EO": self.dataset_code = "EO95_LTB" return "http://stats.oecd.org/restsdmx/sdmx.ashx/GetData/EO95_LTB" db.datasets.bulkWrite([{ deleteMany: { "filter" : {provider_name: "OECD", dataset_code: "EO"} } }], { ordered : false }) db.series.bulkWrite([{ deleteMany: { "filter" : {provider_name: "OECD", dataset_code: "EO"} } }], { ordered : false }) """ return "http://stats.oecd.org/restsdmx/sdmx.ashx/GetData/%s" % self.dataset_code def _load_dsd(self): url = self._get_url_dsd() download = Downloader(store_filepath=self.store_path, url=url, filename="dsd-%s.xml" % self.dataset_code, use_existing_file=self.fetcher.use_existing_file, client=self.fetcher.requests_client) filepath = download.get_filepath() self.fetcher.for_delete.append(filepath) self.xml_dsd.process(filepath) self._set_dataset() def _set_dataset(self): dataset = dataset_converter(self.xml_dsd, self.dataset_code) self.dataset.dimension_keys = dataset["dimension_keys"] self.dataset.attribute_keys = dataset["attribute_keys"] self.dataset.concepts = dataset["concepts"] self.dataset.codelists = dataset["codelists"] def _get_dimensions_from_dsd(self): return get_dimensions_from_dsd(self.xml_dsd, self.provider_name, self.dataset_code) def _get_data_by_dimension(self): self.xml_data = XMLData(provider_name=self.provider_name, dataset_code=self.dataset_code, xml_dsd=self.xml_dsd, dsd_id=self.dataset_code, frequencies_supported=FREQUENCIES_SUPPORTED) dimension_keys, dimensions = self._get_dimensions_from_dsd() position, _key, dimension_values = select_dimension(dimension_keys, dimensions, choice="max") count_dimensions = len(dimension_keys) for dimension_value in dimension_values: sdmx_key = [] for i in range(count_dimensions): if i == position: sdmx_key.append(dimension_value) else: sdmx_key.append(".") key = "".join(sdmx_key) url = "%s/%s" % (self._get_url_data(), key) filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_")) download = Downloader(url=url, filename=filename, store_filepath=self.store_path, client=self.fetcher.requests_client) filepath, response = download.get_filepath_and_response() if filepath: self.fetcher.for_delete.append(filepath) if response.status_code >= 400 and response.status_code < 500: continue elif response.status_code >= 500: raise response.raise_for_status() for row, err in self.xml_data.process(filepath): yield row, err #self.dataset.update_database(save_only=True) yield None, None def build_series(self, bson): bson["last_update"] = self.dataset.last_update bson["dataset_code"] = self.real_dataset_code self.dataset.add_frequency(bson["frequency"]) return bson