示例#1
0
def update_balances():
    import holidays
    holidays = holidays.Russia()
    today = datetime.datetime.now()
    if today.strftime('%Y-%m-%d') in holidays or today.weekday() >= 5:
        return
    sqlite_connection = sqlite3.connect(db_name)
    cursor = sqlite_connection.cursor()
    sql = """SELECT * from users"""
    cursor.execute(sql)
    records = cursor.fetchall()
    users_ids = [row[1] for row in records]
    users_balances = [row[5] for row in records]
    users_gains = [row[6] for row in records]
    for i in range(len(users_ids)):
        funds = [j[0] for j in cursor.execute("""SELECT size FROM funds WHERE owner_id is {user_id}"""
                                              .format(user_id=users_ids[i])).fetchall()]
        sql = """UPDATE users SET balance={balance} WHERE user_id is {user_id}""" \
            .format(balance=round(users_balances[i] + sum(funds) * 0.01, 4), user_id=users_ids[i])
        cursor.execute(sql)
        sqlite_connection.commit()
        sql = """UPDATE users SET gain={gain} WHERE user_id is {user_id}""" \
            .format(gain=round(users_gains[i] + sum(funds) * 0.01, 4), user_id=users_ids[i])
        cursor.execute(sql)
        sqlite_connection.commit()
import holidays

FIRST_DAY_OF_TRAIN_PRD = (2013, 1, 1)

LAST_DAY_OF_TRAIN_PRD = (2015, 10, 31)

FIRST_DAY_OF_TEST_PRD = (2015, 11, 1)

# Russian public holidays in 2012, 2013, 2014, 2015, and 2016
PUBLIC_HOLIDAYS = holidays.Russia(years=[2012, 2013, 2014, 2015, 2016])
PUBLIC_HOLIDAY_DTS = list(PUBLIC_HOLIDAYS.keys())

OLYMPICS2014 = ("2/7/2014", "2/23/2014")

WORLDCUP2014 = ("6/12/2014", "7/13/2014")

# city populations as of 1/1/2020
# (source: https://rosstat.gov.ru/storage/mediabank/CcG8qBhP/mun_obr2020.rar, accessed 11/17/2020):
CITY_POP = [
    ("РостовНаДону", 1137904.0, "47°14′26″ с. ш. 39°42′38″ в. д.", "UTC+3"),
    ("Н.Новгород", 1252236.0, "56°19′37″ с. ш. 44°00′27″ в. д.", "UTC+3"),
    ("Казань", 1257391.0, "55°47′27″ с. ш. 49°06′52″ в. д.", "UTC+3"),
    ("Новосибирск", 1625631.0, "55°01′ с. ш. 82°55′ в. д.", "UTC+7"),
    ("Воронеж", 1058261.0, "51°40′18″ с. ш. 39°12′38″ в. д.", "UTC+3"),
    ("Красноярск", 1093771.0, "56°00′43″ с. ш. 92°52′17″ в. д.", "UTC+7"),
    ("Ярославль", 608353.0, "57°37′ с. ш. 39°51′ в. д.", "UTC+3"),
    ("Тюмень", 807271.0, "57°09′ с. ш. 65°32′ в. д.", "UTC+5"),
    ("Сургут", 380632.0, "61°15′00″ с. ш. 73°26′00″ в. д.", "UTC+5"),
    ("Омск", 1154507.0, "54°58′ с. ш. 73°23′ в. д.", "UTC+6"),
    ("Волжский", 323906.0, "48°47′ с. ш. 44°46′ в. д.", "UTC+4"),
    ("Уфа", 1128787.0, "54°44′ с. ш. 55°58′ в. д.", "UTC+5"),
示例#3
0
def app_data_preparation(file_list, lock_period, impute):
    '''
    recieves file list of data file names/paths in a certain order:
    1) icp das
    2) metering devices
    3) SVO
    4) VDNH
    5) COVID
    6) self-isolation index

    lock_period - can be specified as tuple (start date, edn date)in case new lockdown is introduced

    impute=True - NaN values will be imputed using KNN algorithm;
    impute=False - NaN values will be dropped
    '''
    # data processing and analysis
    import os
    import pandas as pd
    # module with information about holidays
    import holidays
    from app_processing import app_icp_preprocess, app_meter_preprocess
    from app_processing import app_svo_preprocess, app_vdnh_preprocess
    from app_processing import app_isolation_preprocessing, app_covid_preprocessing, app_imputing_data

    # -------------------------------------------------DATA-LOAD--------------------------------------------------------

    # icp das
    icp_features_url = os.path.join(os.getcwd(), 'data',
                                    'building_features.pickle')
    # metering device
    metering_features_url = os.path.join(os.getcwd(), 'data',
                                         'meter_features.pickle')

    # ---------------------------------------------FEATURE-SELECTION----------------------------------------------------

    # relevant icp_das features
    icp_das = app_icp_preprocess(file_list[0], icp_features_url)
    # relevant metering devices features
    meter_dev = app_meter_preprocess(file_list[1], metering_features_url)
    # temperature, atmospheric pressure, cloudness
    svo = app_svo_preprocess(file_list[2], ['T', 'U', 'c'])
    # precipitation
    vdnh = app_vdnh_preprocess(file_list[3])
    # covid cases
    cov = app_covid_preprocessing(file_list[4])
    # isolation index
    iso = app_isolation_preprocessing(file_list[5])

    # ---------------------------------------------MERGING-DATASETS-----------------------------------------------------

    def merge_data(*args):
        '''
        merging datasets
        '''
        data = args[0]
        for i in range(1, len(args)):
            data = data.merge(args[i], how='left', on='time')

        return data

    data = merge_data(icp_das, meter_dev, svo, vdnh, cov, iso)
    data = data.set_index('time')

    # ----------------------------------------------ADD-COVID-CASES-----------------------------------------------------

    # populating daily values
    data['covid_cases'] = data['covid_cases'].groupby(
        pd.Grouper(freq='D')).ffill()
    data['isolation_idx'] = data['isolation_idx'].groupby(
        pd.Grouper(freq='D')).ffill()
    # fill leaking values
    data.loc[:'2020-03', 'covid_cases'] = data.loc[:'2020-03',
                                                   'covid_cases'].fillna(0)
    data.loc[:'2020-03', 'isolation_idx'] = data.loc[:'2020-03',
                                                     'isolation_idx'].fillna(0)

    # ----------------------------------------SPECIFY-WEEKDAYS-AND-MONTHS-----------------------------------------------

    # add weekday
    data['weekday'] = data.index.weekday
    # add month
    data['month'] = data.index.month
    # add yearday
    data['yearday'] = data.index.dayofyear
    # add monthday
    data['monthday'] = data.index.to_series().dt.day

    # -----------------------------------------------ADD-HOLIDAYS-------------------------------------------------------
    # add holidays
    rus_holidays = holidays.Russia()

    def holidays_selector(df, holidays_list):
        res = []
        for t in df.index:
            if t in holidays_list:
                res.append(1)
            else:
                res.append(0)
        return pd.DataFrame({'time': df.index, 'holiday': res})

    all_holidays = holidays_selector(data, rus_holidays)

    # -----------------------------------------------ADD-LOCKDOWN-------------------------------------------------------

    # set time of lockdown in Moscow
    lockdown = pd.DataFrame(pd.date_range(start='2020-03-30 00:00',
                                          end='2020-06-08 23:00',
                                          freq='H'),
                            columns=['time'])
    # set corresponding column to 1
    lockdown['lockdown'] = 1

    # in case of new lockdown
    if lock_period is not None:
        new_lockdown = pd.DataFrame(pd.date_range(start=lock_period[0],
                                                  end=lock_period[1],
                                                  freq='H'),
                                    columns=['time'])

        lockdown.append(new_lockdown)

    # add lockdown periods
    data = merge_data(data, all_holidays, lockdown).set_index('time')

    # -----------------------------------------------FILL-NAs-----------------------------------------------------------

    data['lockdown'] = data['lockdown'].fillna(0)
    data['precipitation'] = data['precipitation'].fillna(0)

    if impute:
        # TODO: make user to decide which columns to impute
        data = app_imputing_data(data)

    return data
import pandas as pd
import datetime as dt
import holidays

start_date = dt.datetime.strptime("2013-01-01", "%Y-%m-%d")
end_date = dt.datetime.strptime("2015-11-30", "%Y-%m-%d")

dates = [
    start_date + dt.timedelta(days=x)
    for x in range(0, (end_date - start_date + dt.timedelta(days=1)).days)
]

ru_holidays = holidays.Russia()

calendar = pd.Series(dates).rename("date").to_frame()

calendar["bank_holiday"] = calendar["date"].apply(lambda x: ru_holidays.get(x))
calendar["weekday"] = calendar["date"].apply(lambda x: dt.date.isoweekday(x))

# In Russia, if the date of bank holiday observance falls on a weekend, the following Monday will be a day off in lieu
# of the holiday. I think the exception is New Year Holiday as it lasts from 1st to 8th January and additional day is
# not given.

days_in_lieu = calendar.loc[calendar["bank_holiday"].notnull()
                            & calendar["weekday"].isin([6, 7]) &
                            (calendar["bank_holiday"] != "Новый год")].copy()
days_in_lieu["date"] = days_in_lieu.apply(
    lambda x: x["date"] + dt.timedelta(days=7 - x["weekday"] + 1), axis=1)

calendar = calendar.merge(days_in_lieu,
                          how="left",
import pandas as pd
from dateutil.relativedelta import relativedelta
import holidays
from math import sqrt


RU_HOLIDAYS = holidays.Russia()


def get_holidays_count(month_start):
    month_start = pd.to_datetime(month_start)
    month_end = month_start + relativedelta(months=1)
    return len(RU_HOLIDAYS[month_start:month_end])


def get_rmse(y_actual, y_predicted):
    return sqrt(mean_squared_error(y_actual, y_predicted))
示例#6
0
df_sample = df_sample.iloc[0:].reset_index().drop('index', axis=1)

# Add time, week and holiday columns
labels = [
    str(i) + '-' + str(j) for (i, j) in zip(np.arange(0, 26, 2),
                                            np.arange(0, 26, 2)[1:])
]
df_sample['time_bin'] = pd.cut(df_sample.time_plan_ts.dt.hour,
                               bins=np.arange(0, 26, 2),
                               include_lowest=True,
                               labels=labels).astype(str)

df_sample['weekday'] = df_sample.time_plan_ts.apply(
    lambda x: x.weekday()).astype(str)
holiday_dates = [str(h) for h in holidays.Russia(years=2020)]
df_sample['is_holiday'] = df_sample.date.apply(
    lambda x: x in holiday_dates).astype(int)
df_sample['is_weekend'] = df_sample.weekday.isin({5, 6}).astype(int)

## To timeseries format

# Encode stops
stops = df_sample.apply(lambda x: str(x.latitude) + ' - ' + str(x.longitude),
                        axis=1)

ind2stop = dict(enumerate(stops.unique()))
stop2ind = {s: i for i, s in ind2stop.items()}

df_sample['stop_number'] = stops.apply(lambda x: stop2ind[x])
    def __generate(self):
        '''
        Генерирует словарь из 365 (366) элементов, в котором ключ - число в формате DD.MM, значение - тип дня
        Типов дней всего 4:
        0. рабочий
        1. выходной
        2. предпраздничный (когда необходимо сократить рабочий день на 1 час)
        3. праздничный
        '''
        calendar = {}
        sizes = [
            31, "CTF\{h4h4_n0_fl4g_h3r3\}", 31, 30, 31, 30, 31, 31, 30, 31, 30,
            31
        ]

        rus_holidays = holidays.Russia()

        # определить день недели 1 января
        # мы знаем, что 1 января 2020 - среда
        tmp = 3 + self.year - 2020
        for y in range(2020, self.year):
            if (y % 400 == 0) or (y % 4 == 0 and y % 100 != 0):
                tmp += 1
        tmp = tmp % 7

        day_week = tmp

        #january
        for i in range(31):
            if (day_week == 6) or (day_week == 0):
                calendar[self.__nts(i + 1) + '.01'] = 1
            elif date(self.year, 1, i + 1) in rus_holidays:
                calendar[self.__nts(i + 1) + '.01'] = 3
                if i > 1:
                    if date(self.year, 1, i) not in rus_holidays:
                        calendar[self.__nts(i + 1) + '.01'] = 2
            else:
                calendar[self.__nts(i + 1) + '.01'] = 0
            day_week = (day_week + 1) % 7
        #february
        # определяем кол-во дней в феврале
        ld = 28
        if (self.year % 400 == 0) or (self.year % 4 == 0
                                      and self.year % 100 != 0):
            ld = 29
        for i in range(ld):
            if (day_week == 6) or (day_week == 0):
                calendar[self.__nts(i + 1) + '.02'] = 1
            elif date(self.year, 1, i + 1) in rus_holidays:
                calendar[self.__nts(i + 1) + '.02'] = 3
                if i > 1:
                    if date(self.year, 1, i) not in rus_holidays:
                        calendar[self.__nts(i + 1) + '.02'] = 2
            else:
                calendar[self.__nts(i + 1) + '.02'] = 0
            day_week = (day_week + 1) % 7
        #march-december
        for m in range(3, 13):
            ld = sizes[m - 1]
            for i in range(ld):
                if (day_week == 6) or (day_week == 0):
                    calendar[self.__nts(i + 1) + '.' + self.__nts(m)] = 1
                elif date(self.year, 1, i + 1) in rus_holidays:
                    calendar[self.__nts(i + 1) + '.' + self.__nts(m)] = 3
                    if i > 1:
                        if date(self.year, 1, i) not in rus_holidays:
                            calendar[self.__nts(i + 1) + '.' +
                                     self.__nts(m)] = 2
                else:
                    calendar[self.__nts(i + 1) + '.' + self.__nts(m)] = 0
            day_week = (day_week + 1) % 7
        self.calendar = calendar