Пример #1
0
def extract_bday_feats_n_heads(series, modality, field, stat_type, tr_type):
    """
    "bussiness day or not" conditioning feature extraction
    :return: feature name list, feature value list
    """
    if series is None or len(series) == 0:
        b_day_heads, b_day_feats = extract_basic_feats_n_heads(None, modality, field, stat_type, tr_type)
        nb_day_heads, nb_day_feats = extract_basic_feats_n_heads(None, modality, field, stat_type, tr_type)
    else:
        cal = SouthKorea()
        time_stamp_series = Series(series.index.tolist())
        unique_dates = time_stamp_series.map(lambda x: x.date()).unique()
        nb_day_series = None
        b_day_series = None
        for date in unique_dates:
            if cal.is_holiday(date) is False and date.weekday() < 5:
                if b_day_series is None:
                    b_day_series = series[series.index.date == date]
                else:
                    b_day_series = b_day_series.append(series[series.index.date == date])
            else:
                if nb_day_series is None:
                    nb_day_series = series[series.index.date == date]
                else:
                    nb_day_series = nb_day_series.append(series[series.index.date == date])
        b_day_heads, b_day_feats = extract_basic_feats_n_heads(b_day_series, modality, field, stat_type, tr_type)
        nb_day_heads, nb_day_feats = extract_basic_feats_n_heads(nb_day_series, modality, field, stat_type, tr_type)
    heads = list(map(lambda x: '%s_%s' % (feat.BSS_DAY, x), b_day_heads)) + list(
        map(lambda x: '%s_%s' % (feat.NON_BSS_DAY, x), nb_day_heads))
    values = b_day_feats + nb_day_feats
    return heads, values
Пример #2
0
 def workday_feature_genaration(self, data, country):
     data = data.reset_index()
     
     #1. dayofweek_grade
     data['dayofweek_grade'] = data['datetime'].dt.day_name()
     data['dayofweek_grade'] = data['dayofweek_grade'].astype('category')
     
     if country == 'SouthKorea':
         from workalendar.asia import SouthKorea
         calendar = SouthKorea()
     else:
         from workalendar.europe import italy
         calendar = italy()
 
     holiday_list = calendar.holidays(2017)+calendar.holidays(2018)+calendar.holidays(2019)+calendar.holidays(2020)+calendar.holidays(2020)
     holiday_list = list(zip(*holiday_list))[0]
 
     data['dayoff_grade'] = 'dayon'
     data.loc[data['dayofweek_grade'].isin(['Sunday', 'Saturday']), 'dayoff_grade']='dayoff' 
     data.loc[data['datetime'].dt.date.isin(holiday_list), 'dayoff_grade']='dayoff' 
     data['dayoff_grade'] = pd.Categorical(data['dayoff_grade'], categories =['dayon', 'dayoff'], ordered = True)
 
     #2. worktime_grade
     data['worktime_grade']='work' 
     data.loc[data['datetime'].dt.hour < 9, 'worktime_grade']='notwork'
     data.loc[data['datetime'].dt.hour > 18, 'worktime_grade']='notwork'
     data.loc[data['dayoff_grade']=='dayoff', 'worktime_grade']='notwork' 
     data['worktime_grade'] = pd.Categorical(data['worktime_grade'], categories =['work', 'notwork'], ordered = True)
     
     data = data.drop('dayofweek_grade', axis =1)
     data = data.set_index('datetime')
     return data
def get_weekday_index(date):
    from workalendar.asia import SouthKorea
    ko_calendar = SouthKorea()
    if ko_calendar.is_working_day(date.date()):
        return (date.date().weekday())
    else:
        if date.date().weekday() in [5, 6]:
            return (date.date().weekday())
        else:
            return (7)
Пример #4
0
    def initUI(self):
        cal = QCalendarWidget(self)
        cal.setGridVisible(True)
        cal.setVerticalHeaderFormat(
            QCalendarWidget.VerticalHeaderFormat(
                QCalendarWidget.NoVerticalHeader))
        cal.clicked[QDate].connect(self.showDate)

        # 한국 공휴일 캘린더 객체 생성 후 올해 기준으로 휴일을 3년간 Q캘린터에 표시
        wcal = SouthKorea()
        date = QDate.currentDate()

        # 해당 연도의 공휴일을 리스트로 반환
        # print(wcal.holidays(date.year()))

        # 공휴일 표시 서식 설정
        fm = QTextCharFormat()
        fm.setForeground(Qt.red)
        # fm.setBackground(Qt.yellow)

        # 올해 기준 전년, 올해, 다음해까지 공휴일 표시
        for one in wcal.holidays(date.year() - 1):
            print(one[0])
            cal.setDateTextFormat(one[0], fm)

        for one in wcal.holidays(date.year()):
            print(one[0])
            cal.setDateTextFormat(one[0], fm)

        for one in wcal.holidays(date.year() + 1):
            print(one[0])
            cal.setDateTextFormat(one[0], fm)

        self.lbl = QLabel(self)
        date = cal.selectedDate()
        self.lbl.setText(date.toString())

        self.lblmsg = QLabel(self)
        self.lblmsg.setText("강의 일정 계산")
        self.showDate(date)

        vbox = QVBoxLayout()
        vbox.addWidget(cal)
        vbox.addWidget(self.lbl)
        vbox.addWidget(self.lblmsg)

        self.setLayout(vbox)

        self.setWindowTitle('종강일 계산기')
        self.setGeometry(300, 300, 300, 300)
        self.show()
Пример #5
0
def check_holidays(start_yr, end_yr=-1):
    '''
    start_yr or start~end
    '''

    if end_yr == -1:
        holidays = pd.Series(np.array(SouthKorea().holidays(start))[:, 0])
    elif end_yr < start_yr:
        print('input : start~end')
    else:
        holidays = pd.Series()
        for year in range(start_yr, end_yr + 1):
            holidyas = pd.concat([
                holdays,
                pd.Series(np.array(SouthKorea().holidays(start))[:, 0])
            ])
    return holidays
Пример #6
0
def add_weekday_column(data):
    '''
    detect holiday and add a categorical column to dataframe
    :param data: original data
    :return: appended data
    '''
    from workalendar.asia import SouthKorea
    index = data.index
    ko_calendar = SouthKorea()
    is_holiday = []
    for time_ in index:
        if ko_calendar.is_working_day(time_.date()):
            is_holiday.append(time_.date().weekday())
        else:
            if time_.date().weekday() in [5, 6]:
                is_holiday.append(time_.date().weekday())
            else:
                is_holiday.append(7)
    is_holiday = pd.DataFrame(is_holiday, columns=['weekday_index'], index=index)
    appended_data = pd.concat([data, is_holiday], axis=1)
    return appended_data
Пример #7
0
    def calculate_date(self, start, selectDate):
        cnt = 0
        day_cnt = 0

        # workalendar 나라 설정
        kcal = SouthKorea()

        # 개강일이 월, 화인지 설정
        if start == 'mon': sd = 0
        if start == 'tue': sd = 1

        # 정규반 종강일 checkday 계산
        while cnt != 8:
            checkday = selectDate + timedelta(days=day_cnt)
            if checkday.weekday() == sd or checkday.weekday() == sd + 2:
                # print(cnt, checkday, checkday.weekday())
                if kcal.is_working_day(checkday):
                    cnt += 1
            day_cnt += 1

        # 속성반 종강일 fastcheckday 계산
        cnt = 0
        day_cnt = 0
        while cnt != 16:
            fastcheckday = selectDate + timedelta(days=day_cnt)
            # 월요일부터 목요일까지 계산 (0~3)
            if fastcheckday.weekday() >= 0 and fastcheckday.weekday() < 4:
                if kcal.is_working_day(fastcheckday):
                    cnt += 1
            day_cnt += 1

        msgEnd = "정규반 %s수강 시 종강일은 %s입니다." % (selectDate.strftime("%m-%d"),
                                             checkday.strftime("%m-%d"))
        msgEnd += "\n속성반 %s수강 시 종강일은 %s입니다." % (selectDate.strftime("%m-%d"),
                                                fastcheckday.strftime("%m-%d"))

        return msgEnd
Пример #8
0
def get_holidays_list():
    calendar_ko = SouthKorea()
    d = {}
    for y in range(2015, 2020):
        holidays = get_holidays(calendar_ko, y)
        d[y] = holidays
    # 임시공휴일
    temp_holidays = ['2015, 4, 28', '2015, 5, 1', '2015, 8, 14', '2015, 9, 29',
                     '2016, 2, 10', '2016, 4, 13', '2016, 5, 1', '2016, 5, 6',
                     '2017, 1, 30', '2017, 5, 1','2017, 5, 9', '2017, 10, 2', '2017, 10, 6',
                     '2018, 5, 1', '2018, 5, 7', '2018, 6, 13', '2018, 9, 26',
                     '2019, 5, 6']
    for h in temp_holidays:
        s = h.split(',')
        t = datetime.strptime(h, "%Y, %m, %d").date()
        d[int(s[0])].append(t)
    holiday_list = [v for lst in d.values() for v in lst]
    return holiday_list
Пример #9
0
import numpy as np
import pandas as pd
from workalendar.asia import SouthKorea

dates = pd.date_range(start='2014/01/01 00:00:00',
                      end='2018/11/01 00:00:00',
                      closed='left',
                      freq='1H')
weekdays = np.array(dates.weekday_name.tolist())
# print weekdays
# print dates.weekday_name.tolist()

cal = SouthKorea()
holidays = cal.holidays(2014) + cal.holidays(2015) + cal.holidays(
    2016) + cal.holidays(2017) + cal.holidays(2018)
result = []
for i in range(len(holidays)):
    result.append(holidays[i][0])

date_idx = np.zeros(len(dates), dtype=float)
date_idx[np.where(weekdays == 'Saturday')[0]] += 1
date_idx[np.where(weekdays == 'Sunday')[0]] += 1
date_idx[np.where(dates.isin(result))[0]] += 100

print len(
    pd.date_range(start='2014/01/01 00:00:00',
                  end='2018/01/01 00:00:00',
                  closed='left',
                  freq='1H'))  #35064
print len(
    pd.date_range(start='2014/01/01 00:00:00',
Пример #10
0
from datetime import datetime, date
from workalendar.asia import SouthKorea
from threading import Timer

cal = SouthKorea()
print(cal.holidays(2019))
print(cal.is_holiday(datetime.today()))
print(cal.is_working_day(datetime.today()))
# x = datetime.today()
# y = x.replace(day=x.day+1, hour=0, minute=0, second=0, microsecond=0)
# delta_t = y-x
# print(x)
# print(y)
# print(delta_t)
# secs = delta_t.seconds+1
# print(secs)
#
# def hello_world():
#     print("hello world")
#
# t = Timer(secs, hello_world)
# t.start()
Пример #11
0
import os
import json
from datetime import datetime

from workalendar.asia import SouthKorea

from common.config import korea_timezone

cal = SouthKorea()


def is_semester(date_to_know=None):
    if not date_to_know:
        date_to_know = datetime.now(tz=korea_timezone)
    # 학기중, 계절학기, 방학 중인지 구별 코드
    # json 파일 로드
    current_dir = os.path.dirname(os.path.abspath(__file__))
    date_url = f'{current_dir}/timetable/date.json'
    with open(date_url, 'r') as raw_json:
        result = json.load(raw_json)
    term_result = -1
    for key in [
            x for x in list(result.keys()) if x not in ['holiday', 'halt']
    ]:
        for term in result[key]:
            start_time = datetime.strptime(
                term['start'], "%m/%d/%Y").replace(tzinfo=korea_timezone)
            end_time = datetime.strptime(
                term['end'], "%m/%d/%Y").replace(tzinfo=korea_timezone)
            start_time = start_time.replace(year=date_to_know.year)
            end_time = end_time.replace(year=date_to_know.year)
Пример #12
0
    def __init__(self,
                 input_width=7,
                 label_width=7,
                 shift=14,
                 label_columns=["Maximum_Power_This_Year"],
                 features=None):

        ##############################################################
        # Raw data
        ##############################################################
        kpx_load = pd.read_csv("./data/preprocess/KPX_load.csv")
        self.data = kpx_load[[
            "Date", "Installed_Capacity", "Supply_Capacity",
            "Maximum_Power_Last_Year", "Maximum_Power_This_Year",
            "Supply_Reserve"
        ]]

        if isinstance(features, list):
            if "meteo" in features:
                """
                """
                meteorology = pd.read_csv("./data/preprocess/Meteorology.csv")
                meteorology = meteorology[[
                    "location",
                    "Date",
                    "avg_temp",
                    "min_temp",
                    "max_temp",  # 평균기온
                    "max_rain_1h",  #강우량
                    "avg_dew_point",  #이슬점
                    "avg_relative_humidity",  #상대습도
                    "sunshine_hr",  #일조시간
                    "avg_land_temp",  #지면온도
                ]]
                # meteorology.columns = ["location","Date",
                #                         "avg_temp","min_temp","max_temp", # 평균기온
                #                         "max_rain_1h", #강우량
                #                         "avg_dew_point", #이슬점
                #                         "avg_relative_humidity", #상대습도
                #                         "sunshine_hr", #일조시간
                #                         "avg_land_temp", #지면온도
                #                         ]
                # meteorology = meteorology.fillna(0).groupby("Date").agg('mean')
                self.data = pd.merge(self.data, meteorology, on="Date")
            if "covid" in features:
                """
                """
                covid = pd.read_csv("./data/COVID/covid19.csv")
                covid["Date"] = covid["Date"].str.replace(" ", "")
                covid["Sum_diff"] = np.gradient(covid.Sum, 1)
                covid["Sum_diff2"] = np.gradient(covid.Sum, 2)
                covid = covid[[
                    "Date",
                    "Sum_diff2",  #전일대비 증가량의 증가량
                    "Sum_diff"  #전일대비 증가량
                ]]
                self.data = pd.merge(self.data, covid, on="Date")

            if "gas" in features:
                """
                """
                gas = pd.read_csv("./data/preprocess/shell_price.csv")
                gas["gasoline_diff"] = np.gradient(gas.gasoline2, 1)
                gas["diesel_diff"] = np.gradient(gas.diesel, 1)
                gas = gas[[
                    "Date",
                    "gasoline2",  #일반휘발유 가격
                    "diesel",  #경유 가격
                    "gasoline_diff",  #일반휘발유 가격 전일대비 증가량
                    "diesel_diff"  #경유 전일대비 증가량
                ]]
                self.data = pd.merge(self.data, gas, on="Date")

            if "exchange" in features:
                """
                """
                exchange = pd.read_csv("./data/preprocess/exchange.csv")
                exchange["Last_diff"] = np.gradient(exchange.Last, 1)
                exchange = exchange[[
                    "Date",
                    "Last",  #종가
                    "Last_diff"  #종가 전일 대비 증가량
                ]]
                self.data = pd.merge(self.data, exchange, on="Date")
        """
        Others : holiday, weekday information
        """
        date = pd.date_range('2020.01.01', end='2020.11.24', freq='d')
        date = pd.DataFrame(columns=["Date"], data=date.astype(str).values)
        date["Date"] = date["Date"].str.replace("-", ".")
        date["weekday"] = pd.to_datetime(date["Date"]).dt.weekday
        week_dict = {0: 1, 1: 0, 2: 0, 3: 0, 4: 0, 5: 1, 6: 1}
        date["weekday"] = date["weekday"].map(week_dict)
        date["holiday"] = 0
        date.loc[date.Date.isin(
            pd.Series(np.array(SouthKorea().holidays(2020))[:, 0]).map(str).
            str.replace("-", ".")), "holiday"] = 1

        self.data = pd.merge(self.data, date, on="Date", how='left')

        self.data.fillna(0, inplace=True)

        self.train_df = self.data[self.data.Date < "2020.11.01"]
        self.val_df = self.data[self.data.Date >= "2020.11.01"]

        self.label_columns = label_columns
        ##############################################################
        # Work out the label column indices.
        ##############################################################
        if label_columns is not None:
            self.label_columns_indices = {
                name: i
                for i, name in enumerate(self.label_columns)
            }
        self.column_indices = {
            name: i
            for i, name in enumerate(self.train_df.columns)
        }

        ##############################################################
        # Work out the window parameters
        ##############################################################
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift

        self.total_window_size = self.input_width + self.shift

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(
            self.total_window_size)[self.input_slice]

        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(
            self.total_window_size)[self.labels_slice]
def cal_working_day(start_date, end_date):
    cal = SouthKorea()
    return cal.get_working_days_delta(start_date - timedelta(1), end_date)
Пример #14
0
    def prophet_kospi(self, model_kospi):
        # # 1) 코스피
        self.model_kospi = model_kospi
        self.df = copy.deepcopy(self.model_kospi)
        self.df['date'] = pd.to_datetime(self.df.index)
        self.data = self.df[['date', 'Close']].reset_index(drop=True)
        self.data = self.data.rename(columns={'date': 'ds', 'Close': 'y'})

        # 데이터의 추이 파악
        # self.data.plot(x='ds', y='y', figsize=(16, 8))

        # 하이퍼 파라미터
        #self.prop_model = Prophet(
        #     growth='linear',
        #     #changepoints=cp_1,
        #     #n_changepoints=25,
        #     changepoint_range=0.95,
        #     yearly_seasonality='auto',
        #     weekly_seasonality='auto',
        #     daily_seasonality='auto',
        #     holidays=None,
        #     seasonality_mode='additive',
        #     seasonality_prior_scale=10.0,
        #     holidays_prior_scale=10.0,
        #     changepoint_prior_scale=0.05,
        #     mcmc_samples=0,
        #     interval_width=0.8,
        #     uncertainty_samples=1000,
        #     stan_backend=None,
        # )

        self.prop_model = Prophet(yearly_seasonality='auto',
                             weekly_seasonality='auto',
                             daily_seasonality='auto',
                             changepoint_prior_scale=0.15,
                             changepoint_range=0.9
                             )

        self.model = self.prop_model
        self.model.add_country_holidays(country_name='KR')
        self.model.fit(self.data)

        self.kor_holidays = pd.concat([pd.Series(np.array(SouthKorea().holidays(2020))[:, 0]),
                                  pd.Series(np.array(SouthKorea().holidays(2021))[:, 0])]).reset_index(drop=True)

        self.future = self.model.make_future_dataframe(periods=self.pred_days)
        self.future = self.future[self.future.ds.dt.weekday != 5]
        self.future = self.future[self.future.ds.dt.weekday != 6]
        for self.kor_holiday in self.kor_holidays:
            self.future = self.future[self.future.ds != self.kor_holiday]

        self.forecast = self.model.predict(self.future)
        self.forecast[['ds', 'yhat', 'yhat_upper', 'yhat_lower']]

        # model.plot(forecast)
        # fig2 = model.plot_components(forecast)

        # figure = model.plot(forecast)
        # for changepoint in model.changepoints:
        #    plt.axvline(changepoint,ls='--', lw=1)
        # figure.legend(loc=2)

        # df.shape

        # # Cross Validation
        #
        # - For measuring forecast error by comparing the predicted values with the actual values
        # - initial:the size of the initial training period
        # - period : the spacing between cutoff dates
        # - horizon : the forecast horizon((ds minus cutoff)
        # - By default, the initial training period is set to three times the horizon, and cutoffs are made every half a horizon

        #self.cv = cross_validation(self.model, initial='534 days', period='20 days', horizon='134 days')
        #self.df_pm = performance_metrics(self.cv)

        # # Visualizing Performance Metrics
        # - cutoff: how far into the future the prediction was
        #plot_cross_validation_metric(self.cv, metric='rmse')

        # 실제값
        self.actual_value = float(self.data[self.data['ds'] == self.data.iloc[-1].ds]['y'])
        # 예측값
        self.predict_value = float(self.forecast[self.forecast['ds'] == self.date]['yhat'])
        if self.actual_value < self.predict_value:
            return '1'
        else:
            return '0'
Пример #15
0
    def __init__(self, 
                        input_width, 
                        label_width, 
                        shift, 
                        batch_size,
                        label_columns = ["Maximum_Power_This_Year"], 
                        features = None,
                        aux1 = False,
                        aux2 = False):
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift
        self.batch_size = batch_size
        self.label_columns = label_columns
        self.total_window_size = self.input_width + self.shift
        self.aux1 = aux1
        self.aux2 = aux2

        ##############################################################
        # Raw data
        ##############################################################
        kpx_load = pd.read_csv("./data/preprocess/KPX_load.csv")
        self.kpx_load = kpx_load[["Date",
                            # "Installed_Capacity",
                            "Supply_Capacity",
                            "Maximum_Power_Last_Year",
                            "Maximum_Power_This_Year",
                            "Supply_Reserve"
                            ]][-329:].reset_index(drop=True)
        
        self.kpx_load_size = self.kpx_load.shape[1] - 1
        self.data = self.kpx_load
        self.internal_size = self.kpx_load_size
        self.external_size = 0
        if isinstance(features,list):
            if "meteo" in features:
                """
                """
                meteorology = pd.read_csv("./data/preprocess/Meteorology.csv")
                meteorology = meteorology[["location","Date",
                                        "avg_temp","min_temp","max_temp", # 평균기온
                                        "max_rain_1h", #강우량
                                        "avg_dew_point", #이슬점
                                        "avg_relative_humidity", #상대습도
                                        "sunshine_hr", #일조시간
                                        "avg_land_temp", #지면온도
                                        ]]
                meteorology = meteorology.fillna(0).groupby("Date").agg('mean') #'mean'
                self.meteorology = meteorology.drop(["location"], axis=1)
                self.meteorology_size = self.meteorology.shape[1]

                self.internal_size += self.meteorology_size
                self.data = pd.merge(self.data, self.meteorology, on="Date")
                # print(self.data.head(5))
            if "covid" in features:
                """
                """
                covid = pd.read_csv("./data/preprocess/covid19.csv")
                covid["Date"] = covid["Date"].str.replace(" ","")
                covid["Sum_diff"] = np.gradient(covid.Sum,1)
                covid["Sum_diff2"] = np.gradient(covid.Sum,2)
                self.covid = covid[["Date",
                                "Sum_diff2", #전일대비 증가량의 증가량
                                "Sum_diff", #전일대비 증가량
                                # "Sum"
                ]]
                self.covid_size = self.covid.shape[1] - 1 # except Date
                self.external_size += self.covid_size
                self.data = pd.merge(self.data, self.covid, on="Date")
                # print(self.data.head(5))

            if "gas" in features:
                """
                """
                gas = pd.read_csv("./data/preprocess/shell_price.csv")
                gas["gasoline_diff"] = np.gradient(gas.gasoline2,1)
                gas["diesel_diff"] = np.gradient(gas.diesel,1)
                self.gas = gas[["Date",
                            "gasoline2", #일반휘발유 가격
                            "diesel", #경유 가격
                            "gasoline_diff", #일반휘발유 가격 전일대비 증가량
                            "diesel_diff" #경유 전일대비 증가량
                ]]
                self.gas_size = self.gas.shape[1] - 1
                self.external_size += self.gas_size
                self.data = pd.merge(self.data, self.gas, on="Date")
                # print(self.data.head(5))

            if "exchange" in features:
                """
                """
                exchange = pd.read_csv("./data/preprocess/exchange.csv")
                exchange["Last_diff"] = np.gradient(exchange.Last,1)
                self.exchange = exchange[["Date",
                                    "Last", #종가
                                    "Last_diff" #종가 전일 대비 증가량
                ]] 
                self.exchange_size = self.exchange.shape[1] - 1
                self.external_size += self.exchange_size
                self.data = pd.merge(self.data, self.exchange, on="Date")
                # print(self.data.head(5))

        """
        Others : holiday, weekday information
        """
        date = pd.date_range('2020.01.01', end='2020.11.24', freq='d')
        date = pd.DataFrame(columns=["Date"],data=date.astype(str).values)
        date["Date"] = date["Date"].str.replace("-",".")
        date["weekday"] = pd.to_datetime(date["Date"]).dt.weekday
        # week_dict = {0:0,1:1,2:1,3:1,4:1,5:2,6:2}
        # date["weekday"] = date["weekday"].map(week_dict)
        date["holiday"] = 0
        date.loc[date.Date.isin(pd.Series(np.array(SouthKorea().holidays(2020))[:, 0]).map(str).str.replace("-",".")),"holiday"] = 1
        date.loc[date.Date.isin(["2020.01.24", "2020.01.25", "2020.01.26", "2020.01.27"]),"holiday"] = 2
        date.loc[date.Date.isin(["2020.09.30", "2020.10.01", "2020.10.02", "2020.10.03"]),"holiday"] = 2

        ##############################################################
        # dummy
        ##############################################################
        weekday_dum = pd.get_dummies(date.weekday, prefix = "week")
        date = pd.concat([date,weekday_dum],axis=1)
        holiday_dum = pd.get_dummies(date.holiday, prefix = "holiday")
        date = pd.concat([date,holiday_dum],axis=1)
        date.drop(["weekday","holiday"], axis = 1, inplace = True)

        self.date = date
        self.date_size = date.shape[1] - 1

        ################################################################
        # Post-process
        ################################################################
        self.data = pd.merge(self.data, self.date, on="Date", how='left')
        self.data.fillna(0,inplace=True)
        idx_val  = self.data[self.data.Date == "2020.11.01"].index.values[0]
        idx_test = self.data[self.data.Date == "2020.11.01"].index.values[0]

        # divice it into train, val, and test
        # Will create train and test only
        self.train_df = self.data[:idx_val]
        self.val_df = self.data[(idx_val - self.total_window_size):idx_test]
        self.test_df = self.data[(idx_val - self.total_window_size):]

        # get date info
        self.date_train_df = self.train_df.Date
        self.date_val_df = self.val_df.Date[self.total_window_size:]
        self.date_test_df = self.test_df.Date[self.total_window_size:]
        self.train_df.drop(["Date"],axis=1,inplace=True)
        self.val_df.drop(["Date"],axis=1,inplace=True)
        self.test_df.drop(["Date"],axis=1,inplace=True)

        ##############################################################
        # Scaler
        ##############################################################
        # self.data_mean = self.data.drop(["Date"],axis=1).mean()
        # self.data_std = self.data.drop(["Date"],axis=1).std()

        # self.train_df = (self.train_df - self.data_mean) / self.data_std # Normalize
        # self.val_df = (self.val_df - self.data_mean) / self.data_std
        # self.test_df = (self.test_df - self.data_mean) / self.data_std

        self.data_min = self.data.drop(["Date"],axis=1).min()
        self.data_max = self.data.drop(["Date"],axis=1).max()

        self.train_df = (self.train_df - self.data_min) / (self.data_max - self.data_min) # Normalize
        self.val_df = (self.val_df - self.data_min) / (self.data_max - self.data_min)
        self.test_df = (self.test_df - self.data_min) / (self.data_max - self.data_min)

        ##############################################################
        # Work out the label column indices.
        ##############################################################
        if label_columns is not None:
            self.label_columns_indices = {name: i for i, name in
                                            enumerate(self.label_columns)}
        self.column_indices = {name: i for i, name in
                            enumerate(self.train_df.columns)}

        # ##############################################################
        # # Work out the window parameters
        # ##############################################################
        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]
        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]
Пример #16
0
country_hols['Italy'] = Italy()
from workalendar.europe import Portugal
country_hols['Portugal'] = Portugal()
from workalendar.europe import UnitedKingdom
country_hols['UnitedKingdom'] = UnitedKingdom()
from workalendar.europe import Ireland
country_hols['Ireland'] = Ireland()
from workalendar.europe import Netherlands
country_hols['Netherlands'] = Netherlands()

from workalendar.asia import China
country_hols['China'] = China()
from workalendar.asia import Japan
country_hols['Japan'] = Japan()
from workalendar.asia import SouthKorea
country_hols['Korea'] = SouthKorea()
# from workalendar.asia import India
# country_hols['India'] = India()
# from workalendar.asia import Thailand
# country_hols['Thailand'] = Thailand()
# from workalendar.asia import Vietnam
# country_hols['Vietnam'] = Vietnam()
# from workalendar.asia import Indonesia
# country_hols['Indonesia'] = Indonesia()

from workalendar.oceania import Australia
country_hols['Australia'] = Australia()

from workalendar.america import Brazil
country_hols['Brazil'] = Brazil()
from workalendar.america import Canada
Пример #17
0
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
import calendar
from datetime import date, datetime
from workalendar.asia import SouthKorea

cal = SouthKorea()  #calendar
holiday = []  #공휴일 리스트
holiday_date = []  #공휴일의 날짜(일) 리스트

#2020년에 있는 모든 공휴일 가져오기
for i in range(len(cal.holidays(2020))):
    holiday.append(str(cal.holidays(2020)[i][0])[5:])

month = datetime.today().month  #현재 달
safety_id = input("하영드리미 아이디 : ")
safety_pw = input("하영드리미 비밀번호 : ")

#현재 달에 공휴일이 있으면 holiday_date에 며칠이 공휴일인지 추가
for j in range(len(holiday)):
    if month == int(holiday[j][:2]):
        holiday_date.append(int(holiday[j][3:]))

#chrome driver
driver = webdriver.Chrome('chromedriver')
driver.get("http://safety.jejunu.ac.kr/")
time.sleep(1)

#safety에 로그인
search = driver.find_element_by_xpath('//*[@id="userId"]')
Пример #18
0
]
hd3 = [  # 총선
    '20160413',
    '20120411',
    '20080409',
    '20040415',
    '20000413',
]
hd4 = [  # 지선
    '20140604',
    '20100602',
    '20060531',
    '20020613',
]

cal = SouthKorea()
years = range(2000, 2019)
hds = []
[hds.extend([h[0] for h in cal.holidays(y)]) for y in years]
thds = [
    dt.strptime(h, '%Y%m%d').date() for h in (*hd0, *hd1, *hd2, *hd3, *hd4)
]
hds.extend(thds)
# print(hds)

# 2014.09.10: Extended Holiday
'''
Extended Holidays:
[20140910, 20150929, 20160210, 20170130, 20171006, 20180507, 20180926]
https://namu.wiki/w/%EB%8C%80%EC%B2%B4%20%ED%9C%B4%EC%9D%BC%20%EC%A0%9C%EB%8F%84#s-4
Пример #19
0
def run():
    from PIL import Image
    image = Image.open('logo.jpg')
    image_stock = Image.open('stock.jpg')

    st.image(image, use_column_width=False)

    add_selectbox = st.sidebar.selectbox("예측 방법 결정", ("Online", "Batch"))

    st.sidebar.info('프로젝트명 :' + '\n' + '자연어 처리 기반의 투자분석 및 예측시스템 개발')
    st.sidebar.success('★멘토님★ : 정좌연 PE')
    st.sidebar.info('팀명 : 턴어라운드')
    st.sidebar.success('팀원 : 이지훈, 이문형, 강민재, 구병진, 김서정')

    st.sidebar.image(image_stock)

    st.title("KOSPI 지수 및 YG 종목 주가 예측 모델")

    # 사용자 설정
    if add_selectbox == 'Online':
        date = str(
            st.number_input('Date',
                            min_value=20200101,
                            max_value=20201231,
                            value=20201027))
        rev_date = date[0:4] + '-' + date[4:6] + '-' + date[6:]
        target = st.selectbox('Target', ['KOSPI', 'YG'])
        method = st.selectbox(
            'Method',
            ['AutoML_CLA', 'AutoML_REG', 'ARIMA', 'Prophet', 'RL', 'NLP'])

        output = ""

        input_dict = {'Date': date, 'Target': target, 'Method': method}
        input_ = DataCollectionModel.DataCollection(date)
        prophet_input_ = ProphetModel.Prophet_(date)

        # 코스피 예측모델 데이터 수집 + 학습 데이터 준비
        if target == 'KOSPI':
            input_df = input_.kospi_collection()

            if method == 'AutoML_CLA':
                # 예측 모델
                model = load_model('deployment_kospi_20201029')
                # 학습 평가 모델
                model_train = load_model('deployment_kospi_train_20201029')
                load_test_model = predict_model(model_train,
                                                data=input_df[0].iloc[382:])
                test_model = load_test_model[['Labeling', 'Label']]

                acc_ = accuracy_score(test_model['Labeling'],
                                      test_model['Label'])
                auc_ = roc_auc_score(test_model['Labeling'],
                                     test_model['Label'])
                recall_ = recall_score(test_model['Labeling'],
                                       test_model['Label'])
                prec_ = precision_score(test_model['Labeling'],
                                        test_model['Label'])
                f1_ = f1_score(test_model['Labeling'], test_model['Label'])

                data = {
                    'ACC': [acc_],
                    'AUC': [auc_],
                    'RECALL': [recall_],
                    'PREC': [prec_],
                    'F1': [f1_]
                }

                score_model = pd.DataFrame(
                    data=data, columns=['ACC', 'AUC', 'RECALL', 'PREC', 'F1'])
                score_model.index.name = "Metrics Score"
                st.write("Test Data Metrics Score")
                st.table(score_model)

            elif method == 'AutoML_REG':

                # 예측 모델
                model = load_model('deployment_kospi_reg_20201029')
                # 학습 평가 모델
                model_train = load_model('deployment_kospi_reg_train_20201029')
                reg_data = copy.deepcopy(input_df[0].iloc[382:])
                del reg_data['Labeling']
                load_test_model = predict_model(model_train, data=reg_data)
                test_model = load_test_model[['Close', 'Label']]

                mae_ = mean_absolute_error(test_model['Close'],
                                           test_model['Label'])
                mse_ = mean_squared_error(test_model['Close'],
                                          test_model['Label'])
                rmse_ = mean_squared_error(test_model['Close'],
                                           test_model['Label'],
                                           squared=False)
                r2_ = r2_score(test_model['Close'], test_model['Label'])

                data = {
                    'MAE': [mae_],
                    'MSE': [mse_],
                    'RMSE': [rmse_],
                    'R2': [r2_]
                }

                score_model = pd.DataFrame(
                    data=data, columns=['MAE', 'MSE', 'RMSE', 'R2'])
                score_model.index.name = "Metrics Score"

                st.write("Test Data Metrics Score")
                st.table(score_model)
                st.write("Forecast Data (Test Data)")
                st.line_chart(test_model)

            elif method == 'ARIMA':
                # model load 필요시 여기에 추가
                print("ARIMA")

            elif method == 'Prophet':
                # model load 필요시 여기에 추가
                print("Prophet")

            elif method == 'RL':
                import main
                # model load 필요시 여기에 추가

                print("RL")

            elif method == 'NLP':
                # model load 필요시 여기에 추가
                print("NLP")

        # YG 예측모델 데이터 수집 + 학습 데이터 준비
        else:
            input_df = input_.yg_collection()

            if method == 'AutoML_CLA':
                # 예측 모델
                model = load_model('deployment_yg_20201029')
                # 학습 평가 모델
                model_train = load_model('deployment_yg_train_20201029')
                load_test_model = predict_model(model_train,
                                                data=input_df[0][341:])
                test_model = load_test_model[['Labeling', 'Label']]

                acc_ = accuracy_score(test_model['Labeling'],
                                      test_model['Label'])
                auc_ = roc_auc_score(test_model['Labeling'],
                                     test_model['Label'])
                recall_ = recall_score(test_model['Labeling'],
                                       test_model['Label'])
                prec_ = precision_score(test_model['Labeling'],
                                        test_model['Label'])
                f1_ = f1_score(test_model['Labeling'], test_model['Label'])

                data = {
                    'ACC': [acc_],
                    'AUC': [auc_],
                    'RECALL': [recall_],
                    'PREC': [prec_],
                    'F1': [f1_]
                }

                score_model = pd.DataFrame(
                    data=data, columns=['ACC', 'AUC', 'RECALL', 'PREC', 'F1'])
                score_model.index.name = "Metrics Score"
                st.write("Test Data Metrics Score")
                st.table(score_model)

            elif method == 'AutoML_REG':
                # 예측 모델
                model = load_model('deployment_yg_reg_20201029')
                # 학습 평가 모델
                model_train = load_model('deployment_yg_reg_train_20201029')
                reg_data = copy.deepcopy(input_df[0].iloc[341:])
                del reg_data['Labeling']
                load_test_model = predict_model(model_train, data=reg_data)
                test_model = load_test_model[['Close', 'Label']]

                mae_ = mean_absolute_error(test_model['Close'],
                                           test_model['Label'])
                mse_ = mean_squared_error(test_model['Close'],
                                          test_model['Label'])
                rmse_ = mean_squared_error(test_model['Close'],
                                           test_model['Label'],
                                           squared=False)
                r2_ = r2_score(test_model['Close'], test_model['Label'])

                data = {
                    'MAE': [mae_],
                    'MSE': [mse_],
                    'RMSE': [rmse_],
                    'R2': [r2_]
                }

                score_model = pd.DataFrame(
                    data=data, columns=['MAE', 'MSE', 'RMSE', 'R2'])
                score_model.index.name = "Metrics Score"

                st.write("Test Data Metrics Score")
                st.table(score_model)
                st.write("Forecast Data (Test Data)")
                st.line_chart(test_model)

            elif method == 'ARIMA':
                # model load 필요시 여기에 추가
                print("ARIMA")

            elif method == 'Prophet':
                # model load 필요시 여기에 추가
                print("prophet")

            elif method == 'RL':
                # model load 필요시 여기에 추가
                print("RL")

            elif method == 'NLP':
                print("NLP")

        # 예측 모델 실행
        buy_message = "주가 상승 예상 -> 매매 어드바이스 : 매수"
        sell_message = "주가 하락 예상 -> 매매 어드바이스 : 매도"

        if st.button("주가 예측"):
            if method == 'AutoML_CLA':
                output = predict(model=model, input_df=input_df[0])
                if output == '1':
                    output = date + buy_message
                else:
                    output = date + sell_message

            elif method == 'AutoML_REG':
                output = predict_reg(model=model, input_df=input_df)
                if output == '1':
                    output = date + buy_message
                else:
                    output = date + sell_message

            elif method == 'ARIMA':
                print("ARIMA")

            elif method == 'Prophet':
                if target == 'KOSPI':
                    df_prophet = copy.deepcopy(input_df[0])
                    df_prophet['date'] = pd.to_datetime(df_prophet.index)
                    df_data = df_prophet[['date',
                                          'Close']].reset_index(drop=True)
                    df_data = df_data.rename(columns={
                        'date': 'ds',
                        'Close': 'y'
                    })

                    prop_model = Prophet(yearly_seasonality='auto',
                                         weekly_seasonality='auto',
                                         daily_seasonality='auto',
                                         changepoint_prior_scale=0.15,
                                         changepoint_range=0.9)

                    prop_model.add_country_holidays(country_name='KR')
                    prop_model.fit(df_data)

                    kor_holidays = pd.concat([
                        pd.Series(np.array(SouthKorea().holidays(2020))[:, 0]),
                        pd.Series(np.array(SouthKorea().holidays(2021))[:, 0])
                    ]).reset_index(drop=True)

                    prop_future = prop_model.make_future_dataframe(periods=10)
                    prop_future = prop_future[prop_future.ds.dt.weekday != 5]
                    prop_future = prop_future[prop_future.ds.dt.weekday != 6]
                    for kor_holiday in kor_holidays:
                        prop_future = prop_future[
                            prop_future.ds != kor_holiday]

                    prop_forecast = prop_model.predict(prop_future)
                    prop_forecast[['ds', 'yhat', 'yhat_upper', 'yhat_lower']]

                    fig1 = prop_model.plot(prop_forecast)
                    fig2 = prop_model.plot_components(prop_forecast)
                    #cv = cross_validation(prop_model, initial='10 days', period='20 days', horizon='5 days')
                    #df_pm = performance_metrics(cv)
                    #fig3 = plot_cross_validation_metric(cv, metric='rmse')

                    st.write("Forecast Data")
                    st.write(fig1)
                    st.write("Component Wise Forecast")
                    st.write(fig2)
                    #st.write("Cross Validation Metric")
                    #st.table(df_pm)
                    #st.write(fig3)
                    output = prophet_input_.prophet_kospi(input_df[0])

                    if output == '1':
                        output = date + buy_message
                    else:
                        output = date + sell_message
                else:
                    df_prophet = copy.deepcopy(input_df[0])
                    df_prophet['date'] = pd.to_datetime(df_prophet.index)
                    df_data = df_prophet[['date',
                                          'Close']].reset_index(drop=True)
                    df_data = df_data.rename(columns={
                        'date': 'ds',
                        'Close': 'y'
                    })

                    # cp=['2019-10-23', '2019-11-04', '2019-11-13', '2019-11-22', '2019-12-04', '2019-12-13', '2019-12-26', '2020-01-08', '2020-01-17', '2020-01-31', '2020-02-11', '2020-02-20', '2020-03-03', '2020-03-12', '2020-03-23', '2020-04-02', '2020-04-13', '2020-04-23', '2020-05-08', '2020-05-19', '2020-05-29', '2020-06-09', '2020-06-18', '2020-06-30', '2020-07-09']
                    cp_spc = [
                        '2020-08-11', '2020-08-12', '2020-08-13', '2020-08-18',
                        '2020-08-19', '2020-08-20', '2020-08-26', '2020-08-28',
                        '2020-08-31', '2020-09-02', '2020-09-03', '2020-09-07',
                        '2020-09-08'
                    ]

                    cp_default = [
                        '2018-10-29', '2018-11-19', '2018-12-11', '2019-01-04',
                        '2019-01-29', '2019-02-22', '2019-03-19', '2019-04-10',
                        '2019-05-03', '2019-05-27', '2019-06-19', '2019-07-10',
                        '2019-08-01', '2019-08-26', '2019-09-20', '2019-10-15',
                        '2019-11-07', '2019-11-29', '2019-12-26', '2020-01-20',
                        '2020-02-13', '2020-03-05', '2020-03-30', '2020-04-21',
                        '2020-05-18'
                    ]
                    cp = cp_default + cp_spc

                    prop_model = Prophet(yearly_seasonality='auto',
                                         weekly_seasonality='auto',
                                         daily_seasonality='auto',
                                         changepoints=cp,
                                         changepoint_range=0.85,
                                         changepoint_prior_scale=0.2)
                    prop_model.fit(df_data)
                    kor_holidays = pd.concat([
                        pd.Series(np.array(SouthKorea().holidays(2019))[:, 0]),
                        pd.Series(np.array(SouthKorea().holidays(2020))[:, 0])
                    ]).reset_index(drop=True)
                    prop_future = prop_model.make_future_dataframe(periods=10)

                    prop_future = prop_future[prop_future.ds.dt.weekday != 5]
                    prop_future = prop_future[prop_future.ds.dt.weekday != 6]
                    for kor_holiday in kor_holidays:
                        prop_future = prop_future[
                            prop_future.ds != kor_holiday]

                    prop_forecast = prop_model.predict(prop_future)
                    prop_forecast[['ds', 'yhat', 'yhat_lower',
                                   'yhat_upper']].tail(10)

                    fig1 = prop_model.plot(prop_forecast)
                    fig2 = prop_model.plot_components(prop_forecast)
                    #cv = cross_validation(prop_model, initial='10 days', period='20 days', horizon='5 days')
                    #df_pm = performance_metrics(cv)
                    #fig3 = plot_cross_validation_metric(cv, metric='rmse')

                    st.write("Forecast Data")
                    st.write(fig1)
                    st.write("Component Wise Forecast")
                    st.write(fig2)
                    #st.write("Cross Validation Metric")
                    #st.table(df_pm)
                    #st.write(fig3)
                    output = prophet_input_.prophet_yg(input_df[0])

                    if output == '1':
                        output = date + buy_message
                    else:
                        output = date + sell_message

        st.success(output)

    if add_selectbox == 'Batch':

        file_upload = st.file_uploader("Upload csv file for predictions",
                                       type=["csv"])

        if file_upload is not None:
            data = pd.read_csv(file_upload)
            predictions = predict_model(estimator=model, data=data)
            st.write(predictions)
Пример #20
0
    def prophet_yg(self, model_yg):
        # # 2) YG
        self.model_yg = model_yg
        self.df=copy.deepcopy(self.model_yg)
        self.df['date'] = pd.to_datetime(self.df.index)
        self.data = self.df[['date', 'Close']].reset_index(drop=True)
        self.data = self.data.rename(columns={'date': 'ds', 'Close': 'y'})

        # 데이터의 추이 파악
        #self.data.plot(x='ds', y='y', figsize=(16, 8))
        # cp=['2019-10-23', '2019-11-04', '2019-11-13', '2019-11-22', '2019-12-04', '2019-12-13', '2019-12-26', '2020-01-08', '2020-01-17', '2020-01-31', '2020-02-11', '2020-02-20', '2020-03-03', '2020-03-12', '2020-03-23', '2020-04-02', '2020-04-13', '2020-04-23', '2020-05-08', '2020-05-19', '2020-05-29', '2020-06-09', '2020-06-18', '2020-06-30', '2020-07-09']
        self.cp_spc=['2020-08-11',
         '2020-08-12',
         '2020-08-13',
         '2020-08-18',
         '2020-08-19',
         '2020-08-20',
         '2020-08-26',
         '2020-08-28',
         '2020-08-31',
         '2020-09-02',
         '2020-09-03',
         '2020-09-07',
         '2020-09-08']

        self.cp_default=['2018-10-29',
            '2018-11-19',
            '2018-12-11',
            '2019-01-04',
            '2019-01-29',
            '2019-02-22',
           '2019-03-19',
           '2019-04-10',
           '2019-05-03',
           '2019-05-27',
           '2019-06-19',
           '2019-07-10',
           '2019-08-01',
           '2019-08-26',
           '2019-09-20',
           '2019-10-15',
           '2019-11-07',
           '2019-11-29',
           '2019-12-26',
           '2020-01-20',
           '2020-02-13',
           '2020-03-05',
           '2020-03-30',
           '2020-04-21',
           '2020-05-18']
        self.cp=self.cp_default+self.cp_spc

        # 하이퍼 파라미터
        #     growth='linear',
        #     #changepoints=cp_1,
        #     #n_changepoints=25,
        #     changepoint_range=0.95,
        #     yearly_seasonality='auto',
        #     weekly_seasonality='auto',
        #     daily_seasonality='auto',
        #     holidays=None,
        #     seasonality_mode='additive',
        #     seasonality_prior_scale=10.0,
        #     holidays_prior_scale=10.0,
        #     changepoint_prior_scale=0.05,
        #     mcmc_samples=0,
        #     interval_width=0.8,
        #     uncertainty_samples=1000,
        #     stan_backend=None,

        self.m = Prophet(yearly_seasonality='auto',
             weekly_seasonality='auto',
             daily_seasonality='auto',
             changepoints=self.cp,
             changepoint_range=0.8,
             changepoint_prior_scale=0.1
             )
        self.m.fit(self.data)
        self.kor_holidays = pd.concat([pd.Series(np.array(SouthKorea().holidays(2019))[:, 0]), pd.Series(np.array(SouthKorea().holidays(2020))[:, 0])]).reset_index(drop=True)
        self.future = self.m.make_future_dataframe(periods=self.pred_days)

        self.future = self.future[self.future.ds.dt.weekday != 5]
        self.future = self.future[self.future.ds.dt.weekday != 6]
        for self.kor_holiday in self.kor_holidays:
            self.future = self.future[self.future.ds != self.kor_holiday]

        self.forecast = self.m.predict(self.future)
        self.forecast[ [ 'ds', 'yhat', 'yhat_lower', 'yhat_upper' ] ].tail(self.pred_days)

        #self.m.plot(self.forecast)
        #self.m.plot_components(self.forecast)

        #self.figure = self.m.plot(self.forecast)
        #for changepoint in self.m.changepoints:
        #    plt.axvline(changepoint,ls='--', lw=1)
        #self.figure.legend(loc=2)
        #print(self.m.changepoints)

        # 예측한 값만 표로 보기
        #self.pred=self.forecast.tail(self.pred_days)
        #self.pred

        # plt.rc('font', family='NanumBarunGothic')

        # fig = plt.figure(figsize=(15,12))
        # ax1 = fig.add_subplot(211)
        # ax1.plot(y['종가'],label='Y')
        # ax1.plot(pred['yhat'],color='red',label='Yhat')
        # ax1.plot(pred['yhat_lower'],color='green',label='Yhat_Lower')
        # ax1.plot(pred['yhat_upper'],color='green',label='Yhat_Upper')
        # ax1.set_xlabel('Date')
        # ax1.set_ylabel('Y')
        # ax1.legend(loc='best')
        # plt.show

        # 실제값
        self.actual_value = float(self.data[self.data['ds'] == self.data.iloc[-1].ds]['y'])
        # 예측값
        self.predict_value = float(self.forecast[self.forecast['ds'] == self.date]['yhat'])
        if self.actual_value < self.predict_value:
            return '1'
        else:
            return '0'