예제 #1
0
    def test_holidays_for_year(self):
        d = SchoolHolidayDates()

        res = d.holidays_for_year(2018)

        self.assertEquals(len(res), 151)

        for k, v in res.items():
            self.assertEquals(sorted(v.keys()), self.EXPECTED_KEYS)

        with self.assertRaisesRegexp(UnsupportedYearException,
                                     "No data for year: 2024"):
            self.assertEquals({}, d.holidays_for_year(2024))
    def test_holidays_for_year(self):
        d = SchoolHolidayDates()

        res = d.holidays_for_year(2018)

        self.assertEquals(len(res), 151)

        for k, v in res.items():
            self.assertEquals(sorted(v.keys()), self.EXPECTED_KEYS)

        with self.assertRaisesRegexp(
            UnsupportedYearException, "No data for year: 2021"
        ):
            self.assertEquals({}, d.holidays_for_year(2021))
    def test_supported_holidays_are_complete(self):
        d = SchoolHolidayDates()

        res = d.holidays_for_year(2019)

        names = set()
        for _, v in res.items():
            names.add(v["nom_vacances"])

        expected = set(SchoolHolidayDates.SUPPORTED_HOLIDAY_NAMES)
        self.assertEquals(names, expected)
예제 #4
0
    def test_supported_holidays_are_complete(self):
        d = SchoolHolidayDates()

        res = d.holidays_for_year(2019)

        names = set()
        for _, v in res.items():
            names.add(v["nom_vacances"])

        expected = set(SchoolHolidayDates.SUPPORTED_HOLIDAY_NAMES)
        self.assertEquals(names, expected)
예제 #5
0
def get_encoded_calendar_df(data_final):
    # Load school holidays for France
    fr_holidays = SchoolHolidayDates()
    df_vacances = pd.DataFrame()
    for year in list(set(data_final['year'])):
        df_vacances = pd.concat([df_vacances, pd.DataFrame.from_dict(fr_holidays.holidays_for_year(year)).T])

    # Load bank holidays for France
    df_jf = pd.DataFrame()
    for year in list(set(data_final['year'])):
        df_jf = pd.concat([df_jf, pd.DataFrame([
            {'date': el[0], 'jour_ferie': el[1]} for el in sorted(holidays.FRA(years=year).items())])])
        
    # Merge school and bank holidays
    df_holidays = pd.merge(df_vacances, df_jf, how='outer', on='date')
    # Create features from df_holidays dataframes (school holidays and bank holidays):
    # - 3 binary features for school holidays, taking 1 if the given zone is on holiday, else 0 (vacances_zone_a, 
    # vacances_zone_b, vacances_zone_c)

    # Definition of a dictionary to encode boolean into numeric
    dict_map_vac = {
        True: 1,
        False: 0
    }
    # Apply dictionary to each holiday column for the three zones (A, B, C)
    df_holidays['vacances_zone_a'] = df_holidays['vacances_zone_a'].map(dict_map_vac)
    df_holidays['vacances_zone_b'] = df_holidays['vacances_zone_b'].map(dict_map_vac)
    df_holidays['vacances_zone_c'] = df_holidays['vacances_zone_c'].map(dict_map_vac)

    # - 1 binary feature for bank holiday, taking 1 if it is a bank holiday, else 0
    # The column "jour ferie" contains either the name of the holiday or a missing value (NaN)
    # The idea is to put a '1' when it's a holiday (i.e. when the value is different from nan, else 0)
    df_holidays['jour_ferie'] = df_holidays['jour_ferie'].map(lambda x: 1 if str(x) != 'nan' else 0)

    # - To go further: Try to create a combined feature with school and bank holidays
    df_holidays['holiday'] = df_holidays['vacances_zone_a'] + df_holidays['vacances_zone_b'] + df_holidays[
        'vacances_zone_c'] + df_holidays['jour_ferie']
    df_holidays['date'] = df_holidays['date'].map(lambda x: str(x))
    data_final_cal = pd.merge(data_final, df_holidays, how='left', left_on='release_date', right_on='date').fillna(0)
    data_final_cal['month'] = data_final_cal['release_date'].map(lambda x: int(x[5:7]))
    data_final_cal = apply_cos(data_final_cal, 'month', 'cos_month', 12)
    return data_final_cal