Exemplo n.º 1
0
class MarketData:
    def __init__(self, broker=None):
        self.writer = FileWriter()
        self.reader = FileReader()
        self.finder = PathFinder()

    def get_symbols(self):
        # get cached list of symbols
        symbols_path = self.finder.get_symbols_path()
        return list(self.reader.load_csv(symbols_path)[C.SYMBOL])

    def get_dividends(self, symbol, timeframe=None):
        # given a symbol, return a cached dataframe
        return self.reader.load_csv(self.finder.get_dividends_path(symbol))

    def save_dividends(self, symbol, timeframe=None):
        # given a symbol, save its dividend history
        if timeframe:
            df = self.get_dividends(symbol, timeframe)
        else:
            df = self.get_dividends(symbol)
        self.writer.update_csv(self.finder.get_dividends_path(symbol), df)
Exemplo n.º 2
0
class MarketData:
    def __init__(self):
        load_dotenv(find_dotenv('config.env'))
        self.writer = FileWriter()
        self.reader = FileReader()
        self.finder = PathFinder()
        self.traveller = TimeTraveller()
        self.provider = 'iexcloud'

    def try_again(self, func, **kwargs):
        retries = (kwargs['retries']
                   if 'retries' in kwargs else C.DEFAULT_RETRIES)
        delay = (kwargs['delay'] if 'delay' in kwargs else C.DEFAULT_DELAY)
        func_args = {
            k: v
            for k, v in kwargs.items() if k not in {'retries', 'delay'}
        }
        for retry in range(retries):
            try:
                return func(**func_args)
            except Exception as e:
                if retry == retries - 1:
                    raise e
                else:
                    sleep(delay)

    def get_symbols(self):
        # get cached list of symbols
        symbols_path = self.finder.get_symbols_path()
        return list(self.reader.load_csv(symbols_path)[C.SYMBOL])

    def get_dividends(self, symbol, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(
            self.finder.get_dividends_path(symbol, self.provider))
        filtered = self.reader.data_in_timeframe(df, C.EX, timeframe)
        return filtered

    def standardize(self, df, full_mapping, filename, columns, default):
        mapping = {k: v for k, v in full_mapping.items() if k in df}

        df = df[list(mapping)].rename(columns=mapping)
        time_col, val_cols = columns[0], columns[1:]

        if time_col in df and set(val_cols).issubset(df.columns):
            df = self.reader.update_df(filename, df,
                                       time_col).sort_values(by=[time_col])
            # since time col is pd.datetime,
            # consider converting to YYYY-MM-DD str format
            for val_col in val_cols:
                df[val_col] = df[val_col].apply(lambda val: float(val)
                                                if val else default)

        return df

    def standardize_dividends(self, symbol, df):
        full_mapping = dict(
            zip(['exDate', 'paymentDate', 'declaredDate', 'amount'],
                [C.EX, C.PAY, C.DEC, C.DIV]))
        filename = self.finder.get_dividends_path(symbol, self.provider)
        return self.standardize(df, full_mapping, filename, [C.EX, C.DIV], 0)

    def save_dividends(self, **kwargs):
        # given a symbol, save its dividend history
        symbol = kwargs['symbol']
        filename = self.finder.get_dividends_path(symbol, self.provider)
        if os.path.exists(filename):
            os.remove(filename)
        df = self.reader.update_df(filename, self.get_dividends(**kwargs),
                                   C.EX, C.DATE_FMT)
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename

    def get_splits(self, symbol, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(
            self.finder.get_splits_path(symbol, self.provider))
        filtered = self.reader.data_in_timeframe(df, C.EX, timeframe)
        return filtered

    def standardize_splits(self, symbol, df):
        full_mapping = dict(
            zip(['exDate', 'paymentDate', 'declaredDate', 'ratio'],
                [C.EX, C.PAY, C.DEC, C.RATIO]))
        filename = self.finder.get_splits_path(symbol, self.provider)
        return self.standardize(df, full_mapping, filename, [C.EX, C.RATIO], 1)

    def save_splits(self, **kwargs):
        # given a symbol, save its splits history
        symbol = kwargs['symbol']
        filename = self.finder.get_splits_path(symbol, self.provider)
        if os.path.exists(filename):
            os.remove(filename)
        df = self.reader.update_df(filename, self.get_splits(**kwargs), C.EX,
                                   C.DATE_FMT)
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename

    def standardize_ohlc(self, symbol, df, filename=None):
        full_mapping = dict(
            zip([
                'date', 'open', 'high', 'low', 'close', 'volume', 'average',
                'trades'
            ], [
                C.TIME, C.OPEN, C.HIGH, C.LOW, C.CLOSE, C.VOL, C.AVG, C.TRADES
            ]))

        filename = filename or self.finder.get_ohlc_path(symbol, self.provider)

        df = self.standardize(df, full_mapping, filename,
                              [C.TIME, C.OPEN, C.HIGH, C.LOW, C.CLOSE], 0)

        for col in [C.VOL, C.TRADES]:
            if col in df:
                df[col] = df[col].apply(lambda val: 0
                                        if pd.isnull(val) else int(val))

        return df

    def get_ohlc(self, symbol, timeframe='max'):
        df = self.reader.load_csv(
            self.finder.get_ohlc_path(symbol, self.provider))
        filtered = self.reader.data_in_timeframe(df, C.TIME, timeframe)
        return filtered

    def save_ohlc(self, **kwargs):
        symbol = kwargs['symbol']
        filename = self.finder.get_ohlc_path(symbol, self.provider)
        if os.path.exists(filename):
            os.remove(filename)
        df = self.reader.update_df(filename, self.get_ohlc(**kwargs), C.TIME,
                                   C.DATE_FMT)
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename

    def get_social_sentiment(self, symbol, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(self.finder.get_sentiment_path(symbol))
        filtered = self.reader.data_in_timeframe(
            df, C.TIME, timeframe)[[C.TIME, C.POS, C.NEG]]
        return filtered

    def get_social_volume(self, symbol, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(self.finder.get_sentiment_path(symbol))
        filtered = self.reader.data_in_timeframe(
            df, C.TIME, timeframe)[[C.TIME, C.VOL, C.DELTA]]
        return filtered

    def save_social_sentiment(self, **kwargs):
        # # given a symbol, save its sentiment data
        symbol = kwargs['symbol']
        filename = self.finder.get_sentiment_path(symbol)

        if os.path.exists(filename):
            os.remove(filename)

        sen_df = self.reader.update_df(filename,
                                       self.get_social_sentiment(**kwargs),
                                       C.TIME)
        sen_df = sen_df[{C.TIME, C.POS, C.NEG}.intersection(sen_df.columns)]

        vol_df = self.reader.update_df(filename,
                                       self.get_social_volume(**kwargs),
                                       C.TIME)
        vol_df = vol_df[{C.TIME, C.VOL, C.DELTA}.intersection(vol_df.columns)]

        if sen_df.empty and not vol_df.empty:
            df = vol_df
        elif not sen_df.empty and vol_df.empty:
            df = sen_df
        elif not sen_df.empty and not vol_df.empty:
            df = sen_df.merge(vol_df, how="outer", on=C.TIME)
        else:
            return
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename

    def standardize_sentiment(self, symbol, df):
        full_mapping = dict(
            zip(['timestamp', 'bullish', 'bearish'], [C.TIME, C.POS, C.NEG]))
        filename = self.finder.get_sentiment_path(symbol, self.provider)
        df = self.standardize(df, full_mapping, filename,
                              [C.TIME, C.POS, C.NEG], 0)
        return df[{C.TIME, C.POS, C.NEG}.intersection(df.columns)]

    def standardize_volume(self, symbol, df):
        full_mapping = dict(
            zip(['timestamp', 'volume_score', 'volume_change'],
                [C.TIME, C.VOL, C.DELTA]))
        filename = self.finder.get_sentiment_path(symbol, self.provider)
        df = self.standardize(df, full_mapping, filename,
                              [C.TIME, C.VOL, C.DELTA], 0)
        return df[{C.TIME, C.VOL, C.DELTA}.intersection(df.columns)]

    def get_intraday(self, symbol, min=1, timeframe='max', extra_hrs=False):
        # implement way to transform 1 min dataset to 5 min data
        #  or 30 or 60 should be flexible soln
        # implement way to only get market hours
        # given a symbol, return a cached dataframe
        dates = self.traveller.dates_in_range(timeframe)
        for date in dates:
            df = self.reader.load_csv(
                self.finder.get_intraday_path(symbol, date, self.provider))
            yield self.reader.data_in_timeframe(df, C.TIME, timeframe)

    def save_intraday(self, **kwargs):
        symbol = kwargs['symbol']
        dfs = self.get_intraday(**kwargs)
        filenames = []

        for df in dfs:
            date = df[C.TIME].iloc[0].strftime(C.DATE_FMT)
            filename = self.finder.get_intraday_path(symbol, date,
                                                     self.provider)
            if os.path.exists(filename):
                os.remove(filename)
            save_fmt = f'{C.DATE_FMT} {C.TIME_FMT}'
            df = self.reader.update_df(filename, df, C.TIME, save_fmt)
            self.writer.update_csv(filename, df)
            if os.path.exists(filename):
                filenames.append(filename)
        return filenames

    def get_unemployment_rate(self, timeframe='max'):
        # given a timeframe, return a cached dataframe
        df = self.reader.load_csv(self.finder.get_unemployment_path())
        filtered = self.reader.data_in_timeframe(df, C.TIME, timeframe)
        return filtered

    def standardize_unemployment(self, df):
        full_mapping = dict(zip(['time', 'value'], [C.TIME, C.UN_RATE]))
        filename = self.finder.get_unemployment_path()
        return self.standardize(df, full_mapping, filename,
                                [C.TIME, C.UN_RATE], 0)

    def save_unemployment_rate(self, **kwargs):
        # given a symbol, save its dividend history
        filename = self.finder.get_unemployment_path()
        if os.path.exists(filename):
            os.remove(filename)
        df = self.reader.update_df(filename,
                                   self.get_unemployment_rate(**kwargs),
                                   C.TIME, '%Y-%m')
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename

    def standardize_s2f_ratio(self, df):
        full_mapping = dict(
            zip(['t', 'o.daysTillHalving', 'o.ratio'],
                [C.TIME, C.HALVING, C.RATIO]))
        filename = self.finder.get_s2f_path()
        df = self.standardize(df, full_mapping, filename,
                              [C.TIME, C.HALVING, C.RATIO], 0)
        return df[{C.TIME, C.HALVING, C.RATIO}.intersection(df.columns)]

    def get_s2f_ratio(self, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(self.finder.get_s2f_path())
        filtered = self.reader.data_in_timeframe(
            df, C.TIME, timeframe)[[C.TIME, C.HALVING, C.RATIO]]
        return filtered

    def standardize_s2f_deflection(self, df):
        full_mapping = dict(zip(['t', 'v'], [C.TIME, C.VAL]))
        filename = self.finder.get_s2f_path()
        df = self.standardize(df, full_mapping, filename, [C.TIME, C.VAL], 1)
        return df[{C.TIME, C.VAL}.intersection(df.columns)]

    def get_s2f_deflection(self, timeframe='max'):
        # given a symbol, return a cached dataframe
        df = self.reader.load_csv(self.finder.get_s2f_path())
        filtered = self.reader.data_in_timeframe(df, C.TIME,
                                                 timeframe)[[C.TIME, C.VAL]]
        return filtered

    def save_s2f(self, **kwargs):
        # # given a symbol, save its s2f data
        filename = self.finder.get_s2f_path()

        if os.path.exists(filename):
            os.remove(filename)

        rat_df = self.reader.update_df(filename, self.get_s2f_ratio(**kwargs),
                                       C.TIME, C.DATE_FMT)
        rat_df = rat_df[{C.TIME, C.HALVING,
                         C.RATIO}.intersection(rat_df.columns)]

        def_df = self.reader.update_df(filename,
                                       self.get_s2f_deflection(**kwargs),
                                       C.TIME, C.DATE_FMT)
        def_df = def_df[{C.TIME, C.VAL}.intersection(def_df.columns)]

        if rat_df.empty and not def_df.empty:
            df = def_df
        elif not rat_df.empty and def_df.empty:
            df = rat_df
        elif not rat_df.empty and not def_df.empty:
            df = rat_df.merge(def_df, how="outer", on=C.TIME)
        else:
            return
        self.writer.update_csv(filename, df)
        if os.path.exists(filename):
            return filename