Пример #1
0
    def test_is_holiday(self):
        d = SchoolHolidayDates()

        self.assertTrue(d.is_holiday(datetime.date(2017, 12, 25)))
        self.assertFalse(d.is_holiday(datetime.date(2017, 12, 1)))

        with self.assertRaisesRegexp(UnsupportedYearException,
                                     'No data for year: 2005'):
            d.is_holiday(datetime.date(2005, 2, 7))
def processing(dataInt):
    ## missing value
    df = dataInt.copy()
    df_num = df.drop(['timestamp','loc_1', 'loc_2', 'loc_secondary_1', 'loc_secondary_2', 'loc_secondary_3'], axis=1)
    df_NonNum = df.select_dtypes(include=[np.object])
    imputed_training_mice = mice(df_num.values)
    data_mice = pd.DataFrame(imputed_training_mice, columns = df_num.columns, index = list(df.index.values))
    dClean = data_mice.join(df_NonNum)
    ## drop variable inutile
    d_tr = dClean.drop(['loc_1', 'loc_2', 'loc_secondary_1', 'loc_secondary_2', 'loc_secondary_3'], axis=1)
    ## create extra attribute
    conv(d_tr)
    d_tr['timestamp'] = pd.to_datetime(d_tr.timestamp, format = '%Y-%m-%dT%H:%M:%S.%f')
    ## create season and rangeInYear
    s = pd.to_datetime(pd.Series(d_tr['timestamp']))
    d_tr['rangeInYear'] = s.dt.strftime('%j').astype(int)
    d_tr['season'] = d_tr['rangeInYear'].apply(lambda d : get_season(d))
    #create jours working days
    d_tr['is_business_day'] = d_tr['datetime_perso'].apply(lambda e : int(business_day(e)))
    # Is it an holiday for zone A, B or C?
    d = SchoolHolidayDates()
    d_tr['is_holiday'] = d_tr['datetime_perso'].apply(lambda f : int(d.is_holiday(datetime.date(f))))

    dataInt1 = d_tr.drop(['rangeInYear', 'datetime_perso', 'date', 'timestamp'], axis=1)
    return (dataInt1)    
class OperationsStatsTransformer(BaseTransformer):
    DATE_COLUMNS = ["date_heure_reception_alerte_locale", "date_heure_reception_alerte"]

    def __init__(self, filepath):
        super(OperationsStatsTransformer, self).__init__(filepath)
        self.bank_holidays = {}
        self.school_holiday = SchoolHolidayDates()

    def transform(self, output):
        df = self.read_csv()

        df["phase_journee"] = df.apply(lambda row: self.phase_journee(row), axis=1)
        df["est_jour_ferie"] = df.apply(lambda row: self.est_jour_ferie(row), axis=1)
        df["est_vacances_scolaires"] = df.apply(
            lambda row: self.est_vacances_scolaires(row), axis=1
        )

        df.drop(["latitude", "longitude"] + self.DATE_COLUMNS, axis=1, inplace=True)

        self.to_csv(df, output)

    def phase_journee(self, row):
        heure_locale = row["date_heure_reception_alerte_locale"]
        heure_utc = row["date_heure_reception_alerte"]
        latitude, longitude = row["latitude"], row["longitude"]
        try:
            sunrise, sunset = SunriseSunset(heure_utc, latitude, longitude).calculate()
        except ValueError:
            return np.nan
        # coucher du soleil -> lever du soleil
        if sunset <= heure_utc <= sunrise:
            return "nuit"
        # 12:00 -> 13:59
        elif 12 <= heure_locale.hour <= 13:
            return "déjeuner"
        # lever du soleil -> 10:59
        elif heure_locale.hour <= 11:
            return "matinée"
        # 14:00 -> coucher du soleil
        elif heure_locale.hour >= 14:
            return "après-midi"
        else:
            raise ValueError("Date is invalid " + str(heure_locale))

    def est_jour_ferie(self, row):
        date = row["date_heure_reception_alerte_locale"].date()

        if date.year not in self.bank_holidays:
            dates = JoursFeries.for_year(date.year).values()
            self.bank_holidays[date.year] = dates

        return date in self.bank_holidays[date.year]

    def est_vacances_scolaires(self, row):
        try:
            date = row["date_heure_reception_alerte_locale"].date()
            return self.school_holiday.is_holiday(date)
        except UnsupportedYearException:
            return np.nan
    def test_is_holiday(self):
        d = SchoolHolidayDates()

        self.assertTrue(d.is_holiday(datetime.date(2017, 12, 25)))
        self.assertFalse(d.is_holiday(datetime.date(2017, 12, 1)))

        with self.assertRaisesRegexp(
            UnsupportedYearException, "No data for year: 1985"
        ):
            d.is_holiday(datetime.date(1985, 2, 7))

        with self.assertRaisesRegexp(ValueError, "date should be a datetime.date"):
            d.is_holiday(datetime.datetime(2017, 12, 1, 2, 0))
Пример #5
0
    def test_is_holiday(self):
        d = SchoolHolidayDates()

        self.assertTrue(d.is_holiday(datetime.date(2017, 12, 25)))
        self.assertFalse(d.is_holiday(datetime.date(2017, 12, 1)))

        with self.assertRaisesRegexp(UnsupportedYearException,
                                     "No data for year: 1985"):
            d.is_holiday(datetime.date(1985, 2, 7))

        with self.assertRaisesRegexp(ValueError,
                                     "date should be a datetime.date"):
            d.is_holiday(datetime.datetime(2017, 12, 1, 2, 0))
Пример #6
0
d_tr['timestamp'] = pd.to_datetime(d_tr.timestamp,
                                   format='%Y-%m-%dT%H:%M:%S.%f')
## create season and rangeInYear
s = pd.to_datetime(pd.Series(d_tr['timestamp']))
d_tr['rangeInYear'] = s.dt.strftime('%j').astype(int)

## create jours working days

d_tr['is_business_day'] = d_tr['datetime_perso'].apply(
    lambda e: int(business_day(e)))

# Is it an holiday for zone A, B or C?
d = SchoolHolidayDates()
d_tr['is_holiday'] = d_tr['datetime_perso'].apply(
    lambda f: int(d.is_holiday(datetime.date(f))))
d_tr['season'] = d_tr['rangeInYear'].apply(lambda d: get_season(d))
d_tr = d_tr.drop(['rangeInYear', 'datetime_perso', 'date', 'timestamp'],
                 axis=1)

d_ready = pd.merge(d_tr, dataOut, on='ID', how='left')

## gerer les dummies et les variables num
train_new = d_ready[d_ready['consumption_1'].notnull()]
train_new = train_new.drop(['ID'], axis=1)
test_new = d_ready[d_ready['consumption_1'].isnull()]
test_new = test_new.drop(['consumption_1', 'consumption_2', 'ID'], axis=1)

train_new.shape
test_new.shape
Пример #7
0
ts2.head()
df.tail()

## create season and rangeInYear
s = pd.Series(dataInt['timestamp'])
s = pd.to_datetime(s)
dataInt['rangeInYear'] = s.dt.strftime('%j').astype(int)
#dataInt_Xy = dataInt_Xy.drop_duplicates(subset = ['rangeInYear'])                    # A ENLRVER
dataInt['season'] = dataInt['rangeInYear'].apply(lambda d : get_season(d))

## create jours working days
dataInt['is_business_day'] = dataInt['datetime_perso'].apply(lambda e : int(business_day(e)))

# Is it an holiday for zone A, B or C?
d = SchoolHolidayDates()
dataInt['is_holiday'] = dataInt['datetime_perso'].apply(lambda f : int(d.is_holiday(datetime.date(f))))

dataInt.groupby('month')['consumption_1'].mean().plot.bar(fontsize=14, figsize=(10,7), title= 'monthly consum 1')
dataInt.groupby('month')['consumption_2'].mean().plot.bar(fontsize=14, figsize=(10,7), title= 'monthly consum 2')
dataInt.groupby('day of week')['consumption_1'].mean().plot.bar(fontsize=14, figsize=(10,7), title= 'daily consum 1')
dataInt.groupby('day of week')['consumption_2'].mean().plot.bar(fontsize=14, figsize=(10,7), title= 'daily consum 2')
dataInt.groupby('hour')['consumption_1'].mean().plot.bar(fontsize=14, figsize=(10,7), title= 'hourly consum 1')
dataInt.groupby('hour')['consumption_2'].mean().plot.bar(fontsize=14, figsize=(10,7), title= 'hourly consum 2')
dataInt.groupby('is_business_day')['consumption_1'].mean().plot.bar(fontsize=14, figsize=(10,7), title= ' business day consum 1')
dataInt.groupby('is_business_day')['consumption_2'].mean().plot.bar(fontsize=14, figsize=(10,7), title= 'business day consum 2')
dataInt.groupby('is_holiday')['consumption_1'].mean().plot.bar(fontsize=14, figsize=(10,7), title= ' holiday day consum 1')
dataInt.groupby('is_holiday')['consumption_2'].mean().plot.bar(fontsize=14, figsize=(10,7), title= 'holiday day consum 2')