示例#1
0
    def on_finished(
        self,
        data: pd.DataFrame,
        args: Dict,
    ) -> None:

        filepath = os.path.join(DataLocationConfig.CASHFLOW_YFINANCE_BASEDIR,
                                f'{args["code"]}.csv')
        data.to_csv(filepath)
        Logger.i(TAG, f'Saved data to {filepath}')
    def on_finished(
        self,
        data: pd.DataFrame,
        args: Dict,
    ) -> None:

        filepath = os.path.join(
            DataLocationConfig.STOCKPRICE_YFINANCE_DAILY_BASEDIR,
            args['start_dt'].strftime('%Y%m%d'), f'{args["code"]}.csv')
        data.to_csv(filepath)
        Logger.i(TAG, f'Saved data to {filepath}')
示例#3
0
    def on_finished(self, data, args):
        Logger.i('company_announcement_crawl : on_finished',
                 f'{args["start_dt"]} : {args["end_dt"]}')
        Logger.i('company_announcement_crawl : on_finished', len(data))

        [
            download_pdf_to_S3(d['document_url'],
                               d['pubdate'].split(' ')[0].replace('-', ''))
            for d in data
        ]
        print('=' * 100)
 def on_finished(
     self,
     data: pd.DataFrame,
     args: Dict,
 ) -> None:
     print(args['tag'])
     print(data)
     filepath = os.path.join(DataLocationConfig.ECONOMIC_INDICATOR_BASEDIR,
                             'ja', f'{args["name"]}.csv')
     data.to_csv(filepath)
     Logger.i(TAG, f'Saved data to {filepath}')
示例#5
0
    def on_finished(self, data, args):
        Logger.i('company_announcement_crawl : on_finished', f'{args["start_dt"]} : {args["end_dt"]}')
        Logger.i('company_announcement_crawl : on_finished', len(data))

        [d.update({'company_code': int(d['company_code'])}) for d in data]

        DynamoDB.put_items(
            AWSConfig.DYNAMODB_COMPANY_ANNOUNCEMENT_TABLE_NAME,
            data,
        )
        [download_pdf_to_S3(d['document_url'], d['pubdate'].split(' ')[0].replace('-', '')) for d in data]
        global g_data
        g_data = data
        print('='*100)
示例#6
0
def main():
    years = range(1983, 2020 + 1)
    df_stocklist = pd.read_csv(DataLocationConfig.STOCKLIST_FILE)
    codes = df_stocklist['銘柄コード'].unique()

    for code in codes:
        df_list = []
        for year in years:
            try:
                filepath = os.path.join(DataLocationConfig.STOCKPRICE_BASEDIR,
                                        f'{year}/{code}.csv')
                df_list.append(pd.read_csv(filepath))
            except Exception:
                #print(f'failed to load data {filepath}')
                pass

        if len(df_list) == 0:
            continue

        if len(df_list) == 1:
            df_concat = df_list[0]
        else:
            df_concat = reduce(
                lambda df1, df2: pd.concat([df1, df2], axis=0),
                df_list,
            )

        if CALC_STOP_HIGH_LOW:
            df_concat['日付'] = pd.to_datetime(df_concat['日付'])
            df_concat.set_index('日付', inplace=True)
            df_concat.sort_index(inplace=True)
            df_concat['last_close'] = df_concat['終値'].shift(1)
            df_concat['stop_high_low'] = df_concat.apply(
                lambda x: check_stop_high_low(x['last_close'], x['高値'], x['安値']
                                              ),
                axis=1)
        # print(df_concat.head())
        # print(df_concat.tail())

        filepath = os.path.join(DataLocationConfig.STOCKPRICE_CONCAT_BASEDIR,
                                f'{code}.csv')
        df_concat.to_csv(filepath,
                         #index=None
                         )
        Logger.i('concat_stockprice_data', f'Saved concat data to {filepath}')
        print('=' * 80)
示例#7
0
    def on_finished(self, data, args):
        Logger.i('company_announcement_crawl : on_finished', f'{args["start_dt"]} : {args["end_dt"]}')
        Logger.i('company_announcement_crawl : on_finished', len(data))

        [d.update({'company_code': int(d['company_code'])}) for d in data]

        global g_data
        ids = [d['id'] for d in g_data]
        filtered_data = list(filter(lambda x: x['id'] not in ids, data))

        Logger.i('company_announcement_crawl : on_finished, filtered_data len => ', len(filtered_data))
        DynamoDB.put_items(
            AWSConfig.DYNAMODB_COMPANY_ANNOUNCEMENT_TABLE_NAME,
            filtered_data,
        )
        [download_pdf_to_S3(d['document_url'], d['pubdate'].split(' ')[0].replace('-', '')) for d in filtered_data]
        
        g_data = []
        print('='*100)
def main():

    df_stocklist = pd.read_csv(
        DataLocationConfig.STOCKLIST_FILE
    )

    Logger.d(TAG, df_stocklist['銘柄コード'].unique())

    codes = df_stocklist['銘柄コード'].unique()

    STOCKPRICE_FILEPATH_FMT = 's3://fin-app/stockprice_concat/{code}.csv'

    METADATA_LOCAL_FILEPATH = '/tmp/DAILY_WINDOW-120d_STRIDE-30d_WIDTH-0.5_stockprice_metadata.csv'
    METADATA_S3_FILEPATH = os.path.join(
        DataLocationConfig.STOCKPRICE_CANDLECHART_BASEDIR.replace('s3://fin-app/', ''),
        f'metadata/DAILY_WINDOW-120d_STRIDE-30d_WIDTH-0.5/stockprice_metadata.csv'
    )

    s3_filepath_list = []
    start_dt_str_list = []
    end_dt_str_list = []
    code_list = []
    change_rate_list = []
    for code in tqdm(codes[:]):
        Logger.i(TAG, code)
        files = S3.get_filelist(
            basedir=os.path.join(
                DataLocationConfig.STOCKPRICE_CANDLECHART_BASEDIR.replace('s3://fin-app/', ''),
                # 'DAILY_WINDOW-120d_STRIDE-30d_WIDTH-0.5/1301'
                f'DAILY_WINDOW-120d_STRIDE-30d_WIDTH-0.5/{code}'
            )
        )
        start_dt_str = [file.split('/')[-1].replace('.png', '').split('_')[0] for file in files]
        end_dt_str = [file.split('/')[-1].replace('.png', '').split('_')[1] for file in files]

        s3_filepath_list += files
        start_dt_str_list += start_dt_str
        end_dt_str_list += end_dt_str
        code_list += [code]*len(files)

        Logger.i(TAG, f'len(files) : {len(files)}')
        Logger.i(TAG, f'len(s3_filepath_list) : {len(s3_filepath_list)}')

        try:
            df = pd.read_csv(
                STOCKPRICE_FILEPATH_FMT.format(code=code)
            )
        except Exception as e:
            Logger.e(TAG, f'failed to load csv file from s3 : {e}')
            change_rate_list += [None]*len(files)
            continue

        df['日付'] = pd.to_datetime(df['日付'])
        df = df.set_index('日付')
        df = df.rename(columns={
            '始値': 'open',
            '高値': 'high',
            '安値': 'low',
            '終値': 'close'
        })
        MAX_DT = df.index.max()

        for sds, eds in zip(start_dt_str, end_dt_str):
            if len(df[sds:eds]) == 0:
                change_rate_list.append(None)
                continue

            edt = datetime.strptime(eds, '%Y-%m-%d')
            for i in range(119):
                try:
                    df.loc[edt]
                    break
                except Exception:
                    edt -= timedelta(days=1)
                    continue
                #raise Exception('')
            change_rate_start_dt = edt + timedelta(days=1)
            change_rate_end_dt = change_rate_start_dt + timedelta(days=30)
            if change_rate_end_dt > MAX_DT or len(df[change_rate_start_dt:change_rate_end_dt]) == 0:
                change_rate_list.append(None)
                continue

            change_rate = \
                (df[change_rate_start_dt:change_rate_end_dt]['close'] - df.loc[edt]['close']).mean() /  \
                df.loc[edt]['close']
            change_rate_list.append(change_rate)

        if code % 10 == 0:
            df_meta = pd.DataFrame({
                's3_filepath': s3_filepath_list,
                'code': code_list,
                'start_dt': start_dt_str_list,
                'end_dt': end_dt_str_list,
                'change_rate_30d': change_rate_list,
            })
            df_meta.to_csv(
                METADATA_LOCAL_FILEPATH,
                index=None
            )
            Logger.i(TAG, f'len(df_meta) : {len(df_meta)}')

    df_meta = pd.DataFrame({
        's3_filepath': s3_filepath_list,
        'code': code_list,
        'start_dt': start_dt_str_list,
        'end_dt': end_dt_str_list,
        'change_rate_30d': change_rate_list,
    })
    df_meta.to_csv(
        METADATA_LOCAL_FILEPATH,
        index=None
    )

    S3.save_file(
        local_filepath=METADATA_LOCAL_FILEPATH,
        s3_filepath=METADATA_S3_FILEPATH,
    )
示例#9
0
def main():
    df_stocklist = pd.read_csv(
        DataLocationConfig.STOCKLIST_FILE
    )

    Logger.d(TAG, df_stocklist['銘柄コード'].unique())

    codes = df_stocklist['銘柄コード'].unique()

    STOCKPRICE_FILEPATH_FMT = 's3://fin-app/stockprice_concat/{code}.csv'

    STRIDE_DAYS = 30
    WINDOW_DAYS = 30*4
    STRIDE_D_TD = timedelta(days=STRIDE_DAYS)
    WINDOW_D_TD = timedelta(days=WINDOW_DAYS)

    WIDTH = 0.5

    S3_CANDLECHART_FILEPATH_FMT = os.path.join(
        DataLocationConfig.STOCKPRICE_CANDLECHART_BASEDIR.replace('s3://fin-app/', ''),
        f'DAILY_WINDOW-{WINDOW_DAYS}d_STRIDE-{STRIDE_DAYS}d_WIDTH-{WIDTH}',
        '{code}',
        '{start_dt}_{end_dt}.png'
    )
    LOCAL_CANDLECHART_FILEPATH_FMT = os.path.join(
        '/tmp',
        f'WINDOW-{WINDOW_DAYS}d_STRIDE-{STRIDE_DAYS}d',
        '{code}',
        '{start_dt}_{end_dt}.png'
    )

    for code in codes[2:]:
        # if code < 1515:
        #     continue
        try:
            df = pd.read_csv(
                STOCKPRICE_FILEPATH_FMT.format(code=code)
            )
        except Exception as e:
            Logger.e(TAG, f'failed to load csv file from s3 : {e}')
            continue
        df['日付'] = pd.to_datetime(df['日付'])
        df = df.set_index('日付')
        df = df.rename(columns={
            '始値': 'open',
            '高値': 'high',
            '安値': 'low',
            '終値': 'close'
        })
        MIN_DT = df.index.min()
        MAX_DT = df.index.max()

        start_dt = MIN_DT
        end_dt = MIN_DT + WINDOW_D_TD

        try:
            while end_dt <= MAX_DT:
                start_dt_str = start_dt.strftime('%Y-%m-%d')
                end_dt_str = end_dt.strftime('%Y-%m-%d')

                df_sliced = df[start_dt_str:end_dt_str]

                s3_filepath = S3_CANDLECHART_FILEPATH_FMT.format(
                    code=code,
                    start_dt=start_dt_str,
                    end_dt=end_dt_str,
                )
                local_filepath = LOCAL_CANDLECHART_FILEPATH_FMT.format(
                    code=code,
                    start_dt=start_dt_str,
                    end_dt=end_dt_str
                )
                if not os.path.exists(os.path.dirname(local_filepath)):
                    os.makedirs(os.path.dirname(local_filepath))

                local_filepath = creaet_candle_chart(
                    opens=df_sliced.open,
                    closes=df_sliced.close,
                    highs=df_sliced.high,
                    lows=df_sliced.low,
                    width=WIDTH,
                    filepath=local_filepath
                )

                S3.save_file(
                    local_filepath=local_filepath,
                    s3_filepath=s3_filepath,
                )

                Logger.i(TAG, f'Saved candle chart image to {s3_filepath}')

                os.remove(local_filepath)

                start_dt += STRIDE_D_TD
                end_dt += STRIDE_D_TD
        except Exception as e:
            Logger.e(TAG, f'{e}')
            continue