Exemplo n.º 1
0
 def test_parse_sample_selection(self):
     input = Input()
     (crises_dates, normal_dates) = input.parse_sample_selection("./foc/forecaster/tests/test_sample_selection.xls")
     expected_crises = {"USA":[2009,2001], "DEU":[]}
     expected_normal = {"USA":[1994,1995,1996], "DEU":[1994,1995]}
     self.assertEqual(crises_dates, expected_crises)
     self.assertEqual(normal_dates, expected_normal)
Exemplo n.º 2
0
 def __init__(self, db_location):
     '''
     Constructor
     '''
     self.input = Input()
     self.crises, self.normals = self.input.parse_sample_selection(
         db_location)
Exemplo n.º 3
0
class CrisisSeer(object):
    '''
    Knows when a crisis occured in a certain country (based
    on the crisis database)
    '''
    def __init__(self, db_location):
        '''
        Constructor
        '''
        self.input = Input()
        self.crises, self.normals = self.input.parse_sample_selection(
            db_location)

    def get_crisis_years(self, country_code):
        """
        return a list of years when this country had crises
        """
        events_list = []
        try:
            events_list = self.crises[country_code]
        except KeyError:
            # this country has no noted crises so we'll suppose
            # it's crisis-free
            pass
        return events_list
Exemplo n.º 4
0
class CrisisSeer(object):
    '''
    Knows when a crisis occured in a certain country (based
    on the crisis database)
    '''


    def __init__(self, db_location):
        '''
        Constructor
        '''
        self.input = Input()
        self.crises, self.normals = self.input.parse_sample_selection(db_location)
        
    def get_crisis_years(self, country_code):
        """
        return a list of years when this country had crises
        """
        events_list = []
        try:
            events_list = self.crises[country_code]
        except KeyError:
            # this country has no noted crises so we'll suppose
            # it's crisis-free
            pass
        return events_list
        
Exemplo n.º 5
0
    def build_from_crises_file(self, country_codes, feature_indicators,
                               test_percentage):
        """
        Entry method that builds a samples set by fetching the data using the extractor.
        Classes are determined from a crisis XLS file.
        
        sparse - if True it fetches the data for the necessary years only. Shown to be non-efficient.
        """
        # clear the sample sets
        self.crisis_samples = []
        self.normal_samples = []
        # get the years classified as crises / normal periods
        dates_input = Input()
        t_crises, t_normal = dates_input.parse_sample_selection(self.t_loc)
        crises_list, normal_list = dates_input.parse_sample_selection_to_list(
            self.t_loc)

        if country_codes[
                0] == "EVERYTHING":  # we take everything available in the samples set
            wb_countries = self.extractor.grab_metadata("countries")
            wb_country_codes = set([country.code for country in wb_countries])
            samples_definition_codes = set(t_crises.keys()) | set(
                t_normal.keys())
            country_codes = list(wb_country_codes & samples_definition_codes)
            country_codes.sort()

        # we fetch all the data here
        # boundaries
        start_date = min(min(crises_list),
                         min(normal_list)) - conf.look_back_years
        end_date = max(max(crises_list), max(normal_list))
        arg = self.extractor.arg()
        arg["country_codes"] = country_codes
        arg["indicator_codes"] = feature_indicators
        arg["interval"] = (start_date, end_date)
        arg["pause"] = conf.wb_pause
        countries = self.extractor.grab(arg)
        if self.cache_enabled and self.extractor.was_cached():
            print("Cache was hit, didn't have to query the World Bank API.")
        elif self.cache_enabled:
            print("Data wasn't cached, queried the World Bank API.")

        # assign the samples
        for country in countries:
            # fetch all the indicators for target country
            indicators = []
            for ind_code in feature_indicators:
                indicator = country.get_indicator(ind_code)
                indicators.append(indicator)
            # create samples from those indicators - in crises...
            try:
                crisis_years = t_crises[country.code]
            except KeyError:
                continue  # we skip this country
            new_samples = self.assign_samples(indicators, crisis_years,
                                              CRISIS_CLASS, country.code)
            self.crisis_samples.extend(new_samples)
            # ... and in normal periods
            normal_years = t_normal[country.code]
            new_samples = self.assign_samples(indicators, normal_years,
                                              NORMAL_CLASS, country.code)
            self.normal_samples.extend(new_samples)
        return self.divide(self.crisis_samples, self.normal_samples,
                           test_percentage)
Exemplo n.º 6
0
 def build_from_crises_file(self, country_codes, feature_indicators, test_percentage):
     """
     Entry method that builds a samples set by fetching the data using the extractor.
     Classes are determined from a crisis XLS file.
     
     sparse - if True it fetches the data for the necessary years only. Shown to be non-efficient.
     """
     # clear the sample sets
     self.crisis_samples = []
     self.normal_samples = []
     # get the years classified as crises / normal periods
     dates_input= Input()
     t_crises, t_normal = dates_input.parse_sample_selection(self.t_loc)
     crises_list, normal_list = dates_input.parse_sample_selection_to_list(self.t_loc)
     
     if country_codes[0]=="EVERYTHING": # we take everything available in the samples set
         wb_countries = self.extractor.grab_metadata("countries")
         wb_country_codes = set([country.code for country in wb_countries])
         samples_definition_codes = set(t_crises.keys()) | set(t_normal.keys())
         country_codes = list(wb_country_codes & samples_definition_codes)
         country_codes.sort()
     
     # we fetch all the data here
     # boundaries
     start_date = min(min(crises_list), min(normal_list))-conf.look_back_years
     end_date = max(max(crises_list), max(normal_list))
     arg = self.extractor.arg()
     arg["country_codes"] = country_codes
     arg["indicator_codes"] = feature_indicators
     arg["interval"] = (start_date, end_date)
     arg["pause"] = conf.wb_pause
     countries = self.extractor.grab(arg)
     if self.cache_enabled and self.extractor.was_cached():
         print("Cache was hit, didn't have to query the World Bank API.")
     elif self.cache_enabled:
         print("Data wasn't cached, queried the World Bank API.")
     
     # assign the samples
     for country in countries:
         # fetch all the indicators for target country
         indicators = []
         for ind_code in feature_indicators:
             indicator = country.get_indicator(ind_code)
             indicators.append(indicator)
         # create samples from those indicators - in crises...
         try:
             crisis_years = t_crises[country.code]
         except KeyError:
             continue # we skip this country
         new_samples = self.assign_samples(indicators,
                                           crisis_years,
                                           CRISIS_CLASS,
                                           country.code)
         self.crisis_samples.extend(new_samples)
         # ... and in normal periods
         normal_years = t_normal[country.code]
         new_samples = self.assign_samples(indicators,
                                           normal_years,
                                           NORMAL_CLASS,
                                           country.code)
         self.normal_samples.extend(new_samples)
     return self.divide(self.crisis_samples, self.normal_samples, test_percentage)
Exemplo n.º 7
0
 def __init__(self, db_location):
     '''
     Constructor
     '''
     self.input = Input()
     self.crises, self.normals = self.input.parse_sample_selection(db_location)