Пример #1
0
    def upsert_dataset(self, dataset_code):
        """Updates data in Database for selected datasets
        """
        self.get_selected_datasets()

        doc = self.db[constants.COL_DATASETS].find_one(
            {
                'provider_name': self.provider_name,
                'dataset_code': dataset_code
            }, {
                'dataset_code': 1,
                'last_update': 1
            })

        dataset_settings = self.selected_datasets[dataset_code]

        if doc and doc['last_update'] >= dataset_settings['last_update']:
            comments = "update-date[%s]" % doc['last_update']
            raise errors.RejectUpdatedDataset(provider_name=self.provider_name,
                                              dataset_code=dataset_code,
                                              comments=comments)

        dataset = Datasets(
            provider_name=self.provider_name,
            dataset_code=dataset_code,
            name=dataset_settings["name"],
            doc_href=dataset_settings["metadata"].get("doc_href"),
            last_update=None,
            fetcher=self)
        dataset.last_update = dataset_settings["last_update"]

        dataset.series.data_iterator = EurostatData(dataset)

        return dataset.update_database()
Пример #2
0
    def upsert_dataset(self, dataset_code):
        
        settings = self._get_datasets_settings()[dataset_code]
        
        dataset = Datasets(provider_name=self.provider_name, 
                           dataset_code=dataset_code, 
                           name=settings["name"], 
                           doc_href='http://www.bea.gov', 
                           fetcher=self)
        
        url = settings["metadata"]["url"]
        filename = settings["metadata"]["filename"]
        sheet_name = settings["metadata"]["sheet_name"]

        sheet = self._get_sheet(url, filename, sheet_name)
        fetcher_data = BeaData(dataset, url=url, sheet=sheet)
        
        if dataset.last_update and fetcher_data.release_date >= dataset.last_update and not self.force_update: 
            comments = "update-date[%s]" % fetcher_data.release_date
            raise errors.RejectUpdatedDataset(provider_name=self.provider_name,
                                              dataset_code=dataset_code,
                                              comments=comments)
        
        
        dataset.last_update = fetcher_data.release_date
        dataset.series.data_iterator = fetcher_data
        
        return dataset.update_database()
Пример #3
0
    def test_constructor(self):

        # nosetests -s -v dlstats.tests.fetchers.test__commons:DatasetTestCase.test_constructor
        
        with self.assertRaises(ValueError):
            Datasets(is_load_previous_version=False)
            
        f = Fetcher(provider_name="p1", is_indexes=False)
                
        d = Datasets(provider_name="p1", 
                    dataset_code="d1",
                    name="d1 Name",
                    doc_href="http://www.example.com",
                    fetcher=f, 
                    is_load_previous_version=False)
        d.dimension_list.update_entry("country", "country", "country")

        self.assertTrue(isinstance(d.series, Series))
        self.assertTrue(isinstance(d.dimension_list, CodeDict))
        self.assertTrue(isinstance(d.attribute_list, CodeDict))
        
        bson = d.bson
        self.assertEqual(bson['provider_name'], "p1")
        self.assertEqual(bson["dataset_code"], "d1")
        self.assertEqual(bson["name"], "d1 Name")
        self.assertEqual(bson["doc_href"], "http://www.example.com")
        self.assertTrue(isinstance(bson["dimension_list"], dict))
        self.assertTrue(isinstance(bson["attribute_list"], dict))
        self.assertIsNone(bson["last_update"])
        self.assertEqual(bson["slug"], "p1-d1")

        #TODO: last_update        
        d.last_update = datetime.now()
Пример #4
0
    def upsert_dataset(self, dataset_code):
        """Updates data in Database for selected datasets
        """
        self.get_selected_datasets()

        doc = self.db[constants.COL_DATASETS].find_one(
            {"provider_name": self.provider_name, "dataset_code": dataset_code}, {"dataset_code": 1, "last_update": 1}
        )

        dataset_settings = self.selected_datasets[dataset_code]

        if doc and doc["last_update"] >= dataset_settings["last_update"]:
            comments = "update-date[%s]" % doc["last_update"]
            raise errors.RejectUpdatedDataset(
                provider_name=self.provider_name, dataset_code=dataset_code, comments=comments
            )

        dataset = Datasets(
            provider_name=self.provider_name,
            dataset_code=dataset_code,
            name=dataset_settings["name"],
            doc_href=dataset_settings["metadata"].get("doc_href"),
            last_update=None,
            fetcher=self,
        )
        dataset.last_update = dataset_settings["last_update"]

        dataset.series.data_iterator = EurostatData(dataset)

        return dataset.update_database()
Пример #5
0
    def upsert_dataset(self, dataset_code):

        settings = self._get_datasets_settings()[dataset_code]

        dataset = Datasets(provider_name=self.provider_name,
                           dataset_code=dataset_code,
                           name=settings["name"],
                           doc_href='http://www.bea.gov',
                           fetcher=self)

        url = settings["metadata"]["url"]
        filename = settings["metadata"]["filename"]
        sheet_name = settings["metadata"]["sheet_name"]

        sheet = self._get_sheet(url, filename, sheet_name)
        fetcher_data = BeaData(dataset, url=url, sheet=sheet)

        if dataset.last_update and fetcher_data.release_date >= dataset.last_update and not self.force_update:
            comments = "update-date[%s]" % fetcher_data.release_date
            raise errors.RejectUpdatedDataset(provider_name=self.provider_name,
                                              dataset_code=dataset_code,
                                              comments=comments)

        dataset.last_update = fetcher_data.release_date
        dataset.series.data_iterator = fetcher_data

        return dataset.update_database()
Пример #6
0
 def upsert_sna(self, url, dataset_code):
     dataset = Datasets(self.provider_name,dataset_code,
                        fetcher=self)
     sna_data = EsriData(dataset,url)
     dataset.name = self.dataset_name[dataset_code]
     dataset.doc_href = 'http://www.esri.cao.go.jp/index-e.html'
     dataset.last_update = sna_data.release_date
     dataset.series.data_iterator = sna_data
     dataset.update_database()
Пример #7
0
 def upsert_gem(self, dataset_code):
     d = DATASETS[dataset_code]
     url = d['url']
     dataset = Datasets(provider_name=self.provider_name, 
                        dataset_code=dataset_code, 
                        name=d['name'], 
                        doc_href=d['doc_href'], 
                        fetcher=self)
     gem_data = GemData(dataset, url)
     dataset.last_update = gem_data.release_date
     dataset.series.data_iterator = gem_data
     dataset.update_database()
Пример #8
0
    def upsert_dataset(self, dataset_code):

        self._load_structure()

        dataset = Datasets(provider_name=self.provider_name,
                           dataset_code=dataset_code,
                           name=None,
                           doc_href=self.provider.website,
                           fetcher=self)
        dataset.last_update = utils.clean_datetime()

        _data = ECB_Data(dataset=dataset)
        dataset.series.data_iterator = _data
        return dataset.update_database()
Пример #9
0
 def upsert_dataset(self, dataset_code, sheet):    
     start = time.time()
     logger.info("upsert dataset[%s] - START" % (dataset_code))
     
     dataset = Datasets(self.provider_name,dataset_code,
                        fetcher=self)
     bea_data = BeaData(dataset,self.url, sheet)
     dataset.name = dataset_code
     dataset.doc_href = 'http://www.bea.gov/newsreleases/national/gdp/gdpnewsrelease.htm'
     dataset.last_update = bea_data.release_date
     dataset.series.data_iterator = bea_data
     dataset.update_database()
     self.update_metas(dataset_code)
     end = time.time() - start
     logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end))
Пример #10
0
    def upsert_dataset(self, dataset_code):

        self._load_structure()

        dataset = Datasets(
            provider_name=self.provider_name,
            dataset_code=dataset_code,
            name=None,
            doc_href=self.provider.website,
            fetcher=self,
        )
        dataset.last_update = utils.clean_datetime()

        _data = ECB_Data(dataset=dataset)
        dataset.series.data_iterator = _data
        return dataset.update_database()
Пример #11
0
    def upsert_dataset(self, dataset_code):

        if not DATASETS.get(dataset_code):
            raise Exception("This dataset is unknown" + dataset_code)

        dataset = Datasets(provider_name=self.provider_name,
                           dataset_code=dataset_code,
                           name=DATASETS[dataset_code]['name'],
                           doc_href=DATASETS[dataset_code]['doc_href'],
                           fetcher=self)
        dataset.last_update = clean_datetime()

        dataset.series.data_iterator = OECD_Data(
            dataset, sdmx_filter=DATASETS[dataset_code]['sdmx_filter'])

        return dataset.update_database()
Пример #12
0
    def upsert_dataset(self, dataset_code):

        self._load_structure_dataflows()
        self._load_structure_concepts()

        dataset = Datasets(provider_name=self.provider_name,
                           dataset_code=dataset_code,
                           name=None,
                           doc_href=None,
                           fetcher=self)
        dataset.last_update = clean_datetime()

        insee_data = INSEE_Data(dataset)
        dataset.series.data_iterator = insee_data

        return dataset.update_database()
Пример #13
0
 def upsert_dataset(self, dataset_code):
     
     if not DATASETS.get(dataset_code):
         raise Exception("This dataset is unknown" + dataset_code)
             
     dataset = Datasets(provider_name=self.provider_name, 
                        dataset_code=dataset_code, 
                        name=DATASETS[dataset_code]['name'], 
                        doc_href=DATASETS[dataset_code]['doc_href'],
                        fetcher=self)
     dataset.last_update = clean_datetime()
     
     dataset.series.data_iterator = OECD_Data(dataset, 
                                              sdmx_filter=DATASETS[dataset_code]['sdmx_filter'])
     
     return dataset.update_database()
Пример #14
0
    def upsert_dataset(self, dataset_code):

        self._load_structure_dataflows()
        self._load_structure_concepts()

        dataset = Datasets(provider_name=self.provider_name,
                           dataset_code=dataset_code,
                           name=None,
                           doc_href=None,
                           fetcher=self)
        dataset.last_update = clean_datetime()

        insee_data = INSEE_Data(dataset)
        dataset.series.data_iterator = insee_data

        return dataset.update_database()
Пример #15
0
    def upsert_dataset(self, dataset_code):

        self.get_selected_datasets()

        dataset_settings = self.selected_datasets[dataset_code]

        dataset = Datasets(
            provider_name=self.provider_name, dataset_code=dataset_code, name=dataset_settings["name"], fetcher=self
        )

        if dataset_code in DATASETS:
            dataset.series.data_iterator = ExcelData(dataset, DATASETS[dataset_code]["url"])
            dataset.doc_href = DATASETS[dataset_code]["doc_href"]
        else:
            dataset.last_update = clean_datetime()
            dataset.series.data_iterator = WorldBankAPIData(dataset, dataset_settings)

        return dataset.update_database()
Пример #16
0
 def upsert_weo_issue(self, url, dataset_code):
     
     settings = DATASETS[dataset_code]
     
     dataset = Datasets(provider_name=self.provider_name, 
                        dataset_code=dataset_code, 
                        name=settings['name'], 
                        doc_href=settings['doc_href'], 
                        fetcher=self)
     
     weo_data = WeoData(dataset, url)
     dataset.last_update = weo_data.release_date        
     dataset.attribute_list.update_entry('flags','e','Estimated')
     dataset.series.data_iterator = weo_data
     try:
         dataset.update_database()
         self.update_metas(dataset_code)
     except Exception as err:
         logger.error(str(err))
Пример #17
0
    def _common_tests(self):

        self._collections_is_empty()

        self.filepath = get_filepath(self.dataset_code)
        self.assertTrue(os.path.exists(self.filepath))

        # provider.update_database
        self.fetcher.provider.update_database()
        provider = self.db[constants.COL_PROVIDERS].find_one({"name": self.fetcher.provider_name})
        self.assertIsNotNone(provider)

        dataset = Datasets(
            provider_name=self.fetcher.provider_name,
            dataset_code=self.dataset_code,
            name=DATASETS[self.dataset_code]["name"],
            last_update=DATASETS[self.dataset_code]["last_update"],
            fetcher=self.fetcher,
        )

        # manual Data for iterator
        fetcher_data = esri.EsriData(dataset, make_url(self), filename=DATASETS[self.dataset_code]["filename"])
        dataset.series.data_iterator = fetcher_data
        dataset.last_update = DATASETS[self.dataset_code]["last_update"]
        dataset.update_database()

        self.dataset = self.db[constants.COL_DATASETS].find_one(
            {"provider_name": self.fetcher.provider_name, "dataset_code": self.dataset_code}
        )

        self.assertIsNotNone(self.dataset)

        dimensions = self.dataset["dimension_list"]
        self.assertEqual(len(dimensions), DATASETS[self.dataset_code]["dimension_count"])
        for c in dimensions["concept"]:
            self.assertIn(c[1], DATASETS["series_names"])

        series = self.db[constants.COL_SERIES].find(
            {"provider_name": self.fetcher.provider_name, "dataset_code": self.dataset_code}
        )

        self.assertEqual(series.count(), DATASETS[self.dataset_code]["series_count"])
Пример #18
0
    def upsert_dataset(self, dataset_code):
        """Updates data in Database for selected datasets
        :dset: dataset_code
        :returns: None"""
        self.get_selected_datasets()
        
        start = time.time()
        logger.info("upsert dataset[%s] - START" % (dataset_code))

        self.dataset_settings = self.selected_datasets[dataset_code]
        url = self.make_url()
        dataset = Datasets(self.provider_name,dataset_code,
                           fetcher=self)
        dataset.name = self.dataset_settings['name']
        dataset.doc_href = self.dataset_settings['metadata']['doc_href']
        dataset.last_update = self.dataset_settings['last_update']
        data_iterator = EsriData(dataset,url,filename=dataset_code)
        dataset.series.data_iterator = data_iterator
        dataset.update_database()
        end = time.time() - start
        logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end))
Пример #19
0
    def upsert_dataset(self, dataset_code):

        self.get_selected_datasets()

        dataset_settings = self.selected_datasets[dataset_code]

        dataset = Datasets(provider_name=self.provider_name,
                           dataset_code=dataset_code,
                           name=dataset_settings["name"],
                           fetcher=self)

        if dataset_code in DATASETS:
            dataset.series.data_iterator = ExcelData(
                dataset, DATASETS[dataset_code]["url"])
            dataset.doc_href = DATASETS[dataset_code]["doc_href"]
        else:
            dataset.last_update = clean_datetime()
            dataset.series.data_iterator = WorldBankAPIData(
                dataset, dataset_settings)

        return dataset.update_database()