def setUp(self): self.holidays = holidays.England() self.holidays = holidays.Wales() self.holidays = holidays.Scotland() self.holidays = holidays.IsleOfMan() self.holidays = holidays.NorthernIreland() self.holidays = holidays.UK()
def _get_england_holidays(start_date, number_of_days: int): end_year = ( start_date + relativedelta(years=math.ceil((number_of_days / 365) + 1))).year uk_holidays = holidays.England( years=range(start_date.year, end_year)).keys() uk_holidays = [ datetime.datetime(d.year, d.month, d.day) for d in uk_holidays ] return uk_holidays
def get_business_days(start_date, end_date): """Get a daily time series between start_date and end_date excluding weekends and public holidays.""" date_range = pd.date_range(start=start_date, end=end_date, freq=pd.tseries.offsets.BDay()) # remove public holidays pub_hols = holidays.England() date_range = pd.to_datetime( [date for date in date_range if date not in pub_hols]) return date_range
def uk_holiday(df,colm_dat,colm_reg): holdiay_list=[] for x in range(len(df)): if df.loc[x,colm_reg]=='England': uk_holidays =holidays.England() holdy=int(df_pharma_sales_raw.loc[x,'DATE'] in uk_holidays) elif df.loc[x,colm_reg]=='Scotland': uk_holidays =holidays.Scotland() holdy=int(df_pharma_sales_raw.loc[x,'DATE'] in uk_holidays) elif df.loc[x,colm_reg]=='Wales': uk_holidays =holidays.Wales() holdy=int(df_pharma_sales_raw.loc[x,'DATE'] in uk_holidays) elif df.loc[x,colm_reg]=='Northern Ireland': uk_holidays =holidays.NorthernIreland() holdy=int(df_pharma_sales_raw.loc[x,'DATE'] in uk_holidays) else: uk_holidays =holidays.UnitedKingdom() holdy=int(df_pharma_sales_raw.loc[x,'DATE'] in uk_holidays) holdiay_list.append(holdy) return holdiay_list
def add_holiday(df_weather): en_holidays = holidays.England() ir_holidays = holidays.Ireland() ca_holidays = holidays.Canada() us_holidays = holidays.UnitedStates() en_idx = df_weather.query('site_id == 1 or site_id == 5').index ir_idx = df_weather.query('site_id == 12').index ca_idx = df_weather.query('site_id == 7 or site_id == 11').index us_idx = df_weather.query('site_id == 0 or site_id == 2 or site_id == 3 or site_id == 4 or site_id == 6 or site_id == 8 or site_id == 9 or site_id == 10 or site_id == 13 or site_id == 14 or site_id == 15').index df_weather['IsHoliday'] = 0 df_weather.loc[en_idx, 'IsHoliday'] = df_weather.loc[en_idx, 'timestamp'].apply(lambda x: en_holidays.get(x, default=0)) df_weather.loc[ir_idx, 'IsHoliday'] = df_weather.loc[ir_idx, 'timestamp'].apply(lambda x: ir_holidays.get(x, default=0)) df_weather.loc[ca_idx, 'IsHoliday'] = df_weather.loc[ca_idx, 'timestamp'].apply(lambda x: ca_holidays.get(x, default=0)) df_weather.loc[us_idx, 'IsHoliday'] = df_weather.loc[us_idx, 'timestamp'].apply(lambda x: us_holidays.get(x, default=0)) holiday_idx = df_weather['IsHoliday'] != 0 df_weather.loc[holiday_idx, 'IsHoliday'] = 1 df_weather['IsHoliday'] = df_weather['IsHoliday'].astype(np.uint8) return df_weather
weather_df = weather_df.reset_index() weather_df = weather_df.drop(['datetime', 'day', 'week', 'month'], axis=1) return weather_df weather['timestamp'] = weather['timestamp'].astype(str) weather = fill_weather_dataset(weather) weather['timestamp'] = pd.to_datetime(weather['timestamp']) # holiday imformation import holidays en_holidays = holidays.England() ir_holidays = holidays.Ireland() ca_holidays = holidays.Canada() us_holidays = holidays.UnitedStates() en_idx = weather.query('site_id == 1 or site_id == 5').index ir_idx = weather.query('site_id == 12').index ca_idx = weather.query('site_id == 7 or site_id == 11').index us_idx = weather.query( 'site_id == 0 or site_id == 2 or site_id == 3 or site_id == 4 or site_id == 6 or site_id == 8 or site_id == 9 or site_id == 10 or site_id == 13 or site_id == 14 or site_id == 15' ).index weather['IsHoliday'] = 0 weather.loc[en_idx, 'IsHoliday'] = weather.loc[en_idx, 'timestamp'].apply( lambda x: en_holidays.get(x, default=0)) weather.loc[ir_idx, 'IsHoliday'] = weather.loc[ir_idx, 'timestamp'].apply(
def weather_feature_engineering(df): fs = us.get_feature_settings() # Humidity if fs['do_humidity']: saturated_vapor_pressure = 6.11 * (10.0 **(7.5 * df['air_temperature'] / (237.3 + df['air_temperature']))) actual_vapor_pressure = 6.11 * (10.0 **(7.5 * df['dew_temperature'] / (237.3 + df['dew_temperature']))) df['humidity'] = (actual_vapor_pressure / saturated_vapor_pressure) * 100 df['humidity'] = df['humidity'].astype(np.float) feature_cols = fs['weather_lag_vars'] lag_values = fs['weather_lag_values'] # lags for site_id in range(c.SITE_ID_RANGE): mask = df['site_id'] == site_id for feature in feature_cols: col_names_lags = [ feature + '_lag_' + str(shift) for shift in lag_values ] for idx in range(0, len(lag_values)): df.loc[mask, col_names_lags[idx]] = df.loc[mask, feature].shift( lag_values[idx]) # window_average feature_cols = fs['weather_average_vars'] window = fs['weather_average_window'] df_site = df.groupby('site_id') df_rolled = df_site[feature_cols].rolling(window=window, min_periods=0) df_mean = df_rolled.mean().reset_index().astype(np.float16) df_std = df_rolled.std().reset_index().astype(np.float16) for feature in feature_cols: df[f'{feature}_mean_window_{window}'] = df_mean[feature] df[f'{feature}_std_window_{window}'] = df_std[feature] # holidays if fs['do_holidays']: en_holidays = holidays.England() ir_holidays = holidays.Ireland() ca_holidays = holidays.Canada() us_holidays = holidays.UnitedStates() en_sites = c.SITE_COUNTRIES.get('England') ir_sites = c.SITE_COUNTRIES.get('Ireland') ca_sites = c.SITE_COUNTRIES.get('Canada') us_sites = c.SITE_COUNTRIES.get('United_States') en_idx = df.query('site_id in @en_sites').index ir_idx = df.query('site_id in @ir_sites').index ca_idx = df.query('site_id in @ca_sites').index us_idx = df.query('site_id in @us_sites').index df['is_holiday'] = 0 df.loc[en_idx, 'is_holiday'] = df.loc[en_idx, 'timestamp'].apply( lambda x: en_holidays.get(x, default=0)) df.loc[ir_idx, 'is_holiday'] = df.loc[ir_idx, 'timestamp'].apply( lambda x: ir_holidays.get(x, default=0)) df.loc[ca_idx, 'is_holiday'] = df.loc[ca_idx, 'timestamp'].apply( lambda x: ca_holidays.get(x, default=0)) df.loc[us_idx, 'is_holiday'] = df.loc[us_idx, 'timestamp'].apply( lambda x: us_holidays.get(x, default=0)) holiday_idx = df['is_holiday'] != 0 df.loc[holiday_idx, 'is_holiday'] = 1 df['is_holiday'] = df['is_holiday'].astype(np.uint8) return df
def TellMarkets(self): # Grabs the Current time in two different formats, adjust in future today = datetime.date.today() currentTime = datetime.datetime.utcnow().replace(tzinfo=pytz.utc) # standard holidays for United States, Great Britian, Japan, and Australia us_holidays = holidays.US() britian_holidays = holidays.England() japan_holidays = holidays.Japan() australia_holidays = holidays.Australia() # Exchange Holiday is not included in the Standard Holiday's of Japan exchange_holiday = "{}-12-31".format(currentTime.year) japan_holidays.append({exchange_holiday: "Exchange Holiday"}) # Translator needed because Japanese holiday's are returned in Japanese translator = Translator() # The Holidays that close Markets In the Given Countries, Unsure if all of Japan's Holiday's Close the Currency Markets us_market_holidays = [ "New Year's Day", "Martin Luther King, Jr. Day", "Presidents Day or Washington's Birthday", "Good Friday", "Memorial Day", "Independence Day", "Labor Day", "Thanksgiving Day", "Christmas Day", ] britian_market_holidays = [ "New Year's Day", "Good Friday", "Easter Monday", "May Day", "Spring Bank Holiday", "Summer Bank Holiday", "Christmas Day", "Boxing Day", "Exchange Holiday", ] japan_market_holidays = [ "New Year's Day", "Adult Day", "Foundation Day", "Vernal Equinox Day", "Showa Day", "Constitution Memorial Day", "Greenery Day", "Children's Day", "Sea Day", "Respect for the Aged Day", "Autumnal Equinox Day", "Health and Sports Day", "Culture Day", "Labor Thanksgiving Day", "The birth of the Emperor", "Exchange Holiday", ] australian_market_holidays = [ "New Year's Day", "Australia Day", "Good Friday", "Easter Monday", "Anzac Day", "Queen's Birthday", "Christmas Day", "Boxing Day", ] us_has_holiday = False japan_has_holiday = False britian_has_holiday = False australia_has_holiday = False markets_closed = False if int(currentTime.weekday()) == 4 or 5 or 6: if currentTime.hour >= 21 and int(currentTime.weekday()) == 4: markets_closed = True if currentTime.hour < 21 and int(currentTime.weekday()) == 6: markets_closed = True if int(currentTime.weekday()) == 5: markets_closed = True if markets_closed == True: self.open_markets = 'None' return self if us_holidays.get(today) is not None: for i in us_market_holidays: if us_holidays.get(today) == i: self.returned_items['US-Holiday'] = us_holidays.get(today) us_has_holiday = True if britian_holidays.get(today) is not None: for i in britian_market_holidays: if britian_holidays.get(today) == i: self.returned_items[ 'British-Holiday'] = britian_holidays.get(today) britian_has_holiday = True if japan_holidays.get(today) is not None: holiday_in_english = translator.translate( japan_holidays.get(today)) for i in japan_market_holidays: if holiday_in_english.text == i: self.returned_items[ 'Japanese-Holiday'] = holiday_in_english japan_has_holiday = True if australia_holidays.get(today) is not None: for i in australian_market_holidays: if australia_holidays == i: self.returned_items[ 'Australian-Holiday'] = australia_holidays.get(today) australia_has_holiday = True # checks to see if we are in daylight saving time # need to make dynamic for what is being returned if bool(datetime.datetime.now(pytz.timezone("UTC")).dst()): if currentTime.hour >= 22 or currentTime.hour == 7: if not australia_has_holiday: self.open_markets.append('Australia') if currentTime.hour >= 23 or currentTime.hour <= 8: if not japan_has_holiday: self.open_markets.append('Japan') if currentTime.hour >= 7 and currentTime.hour <= 16: if not britian_has_holiday: self.open_markets.append('Britian') if currentTime.hour >= 13 and currentTime.hour <= 21: if not us_has_holiday: self.open_markets.append('US') else: if currentTime.hour >= 20 and currentTime.hour <= 5: if not australia_has_holiday: self.open_markets.append('Australia') if currentTime.hour >= 22 or currentTime.hour == 7: if not japan_has_holiday: self.open_markets.append('Japan') if currentTime.hour >= 7 and currentTime.hour <= 16: if not britian_has_holiday: self.open_markets.append('Britian') if currentTime.hour >= 12 and currentTime.hour <= 21: if not us_has_holiday: self.open_markets.append('US') return (self)
elif Table2['Weather symbol'][k] == 'Partly cloudy (night)': Table2['cld_ttl_amt_id'][k] = 2 elif Table2['Weather symbol'][k] == 'Sunny intervals': Table2['cld_ttl_amt_id'][k] = 2 Table2['wmo_hr_sun_dur'][k] = 0.5 elif Table2['Weather symbol'][k] == 'Cloudy': Table2['cld_ttl_amt_id'][k] = 6 else: Table2['cld_ttl_amt_id'][k] = 8 import holidays hols = [] for ptr in holidays.England(years=[2021]).items(): hols.append(ptr[0]) for hol in hols: for i in range(len(Table2.index)): if Table2.index.date[i] == hol: Table2['holiday'][i] = 1 if ((Table2.index.weekday[i] == 5) or (Table2.index.weekday[i] == 6)): Table2['weekend'][i] = 1 if ((Table2['sunrise'][i] <= Table2.index.hour[i]) and (Table2.index.hour[i] < Table2['sunset'][i])) == True: Table2['islight'][i] = 1 Table3 = df_.merge(Table2, on='ob_time', how='outer') Table3.fillna(method='pad', inplace=True) series = pd.Series(