Пример #1
0
    def concat_data(cls, self, df):
        """Concat data with previous file if it exists."""

        if os.path.isfile(self.fpath):
            df_all = pd.concat([pd.read_parquet(self.fpath), df])
            df_all = (df_all.drop_duplicates(subset='symbol').reset_index(
                drop=True))
            df = dataTypes(df_all).df.copy(deep=True)
        else:
            # Minimize size of data
            df = dataTypes(df).df.copy(deep=True)

        self.df = df
Пример #2
0
 def clean_concat_data(cls, self):
     """Clean, concat data and prepare for compression."""
     df_old, df = None, None
     if os.path.isfile(self.fpath):
         df_old = pd.read_parquet(self.fpath)
         df = pd.concat([df_old, self.df])
         # Minimize data size
         self.df = dataTypes(df).df.copy(deep=True)
Пример #3
0
    def _convert_to_pandas(cls, self, sh_get):
        """Convert data to pandas dataframe."""
        sh_df = pd.read_csv(BytesIO(sh_get.content),
                            escapechar='\n',
                            delimiter=',',
                            skipinitialspace=False)
        # Convert object columns to category columns
        sh_df = dataTypes(sh_df, parquet=True).df

        return sh_df
Пример #4
0
    def get_data(cls, self, base_url, headers):
        """Get Alpaca symbol reference data."""
        url = f"{base_url}/assets"
        get_ref = requests.get(url, headers=headers)

        if get_ref.status_code < 400:
            df = pd.DataFrame(get_ref.json())
            self.df = dataTypes(df).df
        else:
            help_print_arg(get_ref.content)
Пример #5
0
    def get_full_hist(cls, self, sym_list):
        """Get full history with the date_range param."""
        for sym in sym_list:
            url = f"{self.base_url}/stock/{sym}/chart/{self.date_range}"
            df = pd.DataFrame(requests.get(url, params=self.payload).json())

            df['year'] = pd.to_datetime(df['date']).dt.year
            years = df['year'].value_counts().index.tolist()
            for yr in years:
                df_mod = df[df['year'] == yr].copy(deep=True)
                df_mod.drop(columns=['year'], inplace=True)
                df_mod.reset_index(drop=True, inplace=True)
                df_mod = dataTypes(df_mod).df
                fpath = f"{self.base_dir}/StockEOD/{yr}/{sym.lower()[0]}/_{sym}.parquet"
                write_to_parquet(df_mod, fpath)
Пример #6
0
    def symref_format(cls, self, df):
        """Format symbol reference data."""

        df['sym_suf'] = df['OSI Symbol'].str[-15:]
        df['side'] = df['sym_suf'].str[6]
        df['strike'] = (df['sym_suf'].str[7:12] + '.' + df['sym_suf'].str[13])
        df['expirationDate'] = df['sym_suf'].str[0:6]
        try:
            df.drop(columns=['Closing Only', 'Matching Unit', 'sym_suf'],
                    inplace=True)
        except KeyError as ke:
            pass

        df['yr'] = df['expirationDate'].str[0:2]
        df['mo'] = df['expirationDate'].str[2:4]
        df['day'] = df['expirationDate'].str[4:6]

        df.rename(columns={'Cboe Symbol': 'Symbol'}, inplace=True)
        df = dataTypes(df, parquet=True).df.copy()

        return df
    def _get_clean_data(cls, self, sym, period, interval, proxy):
        """Request and clean data from yfinance."""
        data = yf.download(
                tickers=sym,
                period=period,
                interval=interval,
                group_by='ticker',
                auto_adjust=True,
                prepost=False,
                proxy=proxy
            )

        df = data.reset_index()
        df.insert(1, 'symbol', sym)
        (df.rename(columns={'Date': 'date', 'Open': 'fOpen', 'High': 'fHigh',
                            'Low': 'fLow', 'Close': 'fClose',
                            'Volume': 'fVolume'}, inplace=True))
        df = dataTypes(df, parquet=True).df
        dt = getDate.query('iex_eod')

        self.df_yf = df[df['date'].dt.date <= dt]
Пример #8
0
    def merge_dfs(cls, self):
        """Merge mmo and symref dataframes."""
        merge_list = ['Symbol', 'Underlying']
        if ('exchange' in self.mmo_df.columns
                and 'exchange' in self.sym_df.columns):
            merge_list.append('exchange')

        try:
            df = (pd.merge(self.mmo_df,
                           self.sym_df,
                           on=merge_list,
                           how='inner'))

            df.reset_index(inplace=True, drop=True)
            # df['rptDate'] = date.today()
            df['rptDate'] = getDate.query('cboe')

            # Change data types to reduce file size
            df = dataTypes(df, parquet=True).df
            # df = dataTypes(df).df
        except TypeError:
            df = pd.DataFrame()
        return df
Пример #9
0
 def format_data(cls, self):
     """Format stocktwits data."""
     self.rec_df.drop(columns=['is_following', 'title', 'aliases'], inplace=True)
     self.rec_df = dataTypes(self.rec_df).df
     self.rec_df['timestamp'] = pd.Timestamp.now('US/Eastern')
Пример #10
0
 def _conver_cols_nopop(cls, self):
     """Convert columns from not popular data frame."""
     self.df = dataTypes(self.df, parquet=True).df
Пример #11
0
def read_clean_combined_all(local=False, dt=None, filter_syms=True):
    """Read, clean, and add columns to StockEOD combined all."""
    df_all = None

    if local:
        bpath = Path(baseDir().path, 'StockEOD/combined_all')
        fpath = get_most_recent_fpath(bpath)
        cols_to_read = [
            'date', 'symbol', 'fOpen', 'fHigh', 'fLow', 'fClose', 'fVolume'
        ]
        df_all = pd.read_parquet(fpath, columns=cols_to_read)
        if df_all['date'].dtype == 'object':
            df_all['date'] = pd.to_datetime(df_all['date'])
        df_all.drop_duplicates(subset=['symbol', 'date'], inplace=True)
    else:
        cols_to_read = [
            'date', 'symbol', 'fOpen', 'fHigh', 'fLow', 'fClose', 'fVolume'
        ]
        df_all = serverAPI('stock_close_cb_all').df
        df_all = df_all[cols_to_read]

        if filter_syms:
            all_cs_syms = remove_funds_spacs()
            df_all = df_all[df_all['symbol'].isin(
                all_cs_syms['symbol'])].copy()

        df_all['date'] = pd.to_datetime(df_all['date'])

        # Define base bpath for 2015-2020 stock data
        bpath = Path(baseDir().path, 'historical/each_sym_all')
        path = get_most_recent_fpath(
            bpath.joinpath('each_sym_all', 'combined_all'))
        df_hist = pd.read_parquet(path)
        # Combine 2015-2020 stock data with ytd
        df_all = pd.concat([df_hist, df_all]).copy()

        df_all.drop_duplicates(subset=['symbol', 'date'], inplace=True)
        df_all.reset_index(drop=True, inplace=True)

    if not dt:
        dt = getDate.query('iex_eod')
    # Exclude all dates from before this year
    df_all = (df_all[df_all['date'] >= str(dt.year)].dropna(
        subset=['fVolume']).copy())

    # Get rid of all symbols that only have 1 day of data
    df_vc = df_all['symbol'].value_counts()
    df_vc_1 = df_vc[df_vc == 1].index.tolist()
    df_all = (df_all[~df_all['symbol'].isin(df_vc_1)].reset_index(
        drop=True).copy())
    # Sort by symbol, date ascending
    df_all = df_all.sort_values(by=['symbol', 'date'], ascending=True)

    df_all['fRange'] = (df_all['fHigh'] - df_all['fLow'])
    df_all['vol/mil'] = (df_all['fVolume'].div(1000000))
    df_all['prev_close'] = df_all['fClose'].shift(periods=1, axis=0)
    df_all['prev_symbol'] = df_all['symbol'].shift(periods=1, axis=0)

    # Add fChangeP col
    print('Fib funcs: adding fChangeP column')
    df_all = add_fChangeP_col(df_all)

    # Merge with df_all and resume

    # Add gap column
    print('Fib funcs: adding gap column')
    df_all = add_gap_col(df_all)

    # Add range of gap
    df_all['gRange'] = (np.where(df_all['prev_close'] < df_all['fLow'],
                                 df_all['fHigh'] - df_all['prev_close'],
                                 df_all['fHigh'] - df_all['fLow']))

    df_all['cumPerc'] = np.where(df_all['symbol'] == df_all['prev_symbol'],
                                 df_all['fChangeP'].cumsum(), np.NaN)

    df_all['perc5'] = np.where(df_all['symbol'] == df_all['prev_symbol'],
                               df_all['cumPerc'].shift(-5) - df_all['cumPerc'],
                               np.NaN)

    df_all['vol_avg_2m'] = np.where(df_all['symbol'] == df_all['prev_symbol'],
                                    df_all['fVolume'].rolling(60).mean(),
                                    np.NaN)

    # Add cumulative sum of last 5 fChangeP rows
    df_all['fCP5'] = (np.where(
        df_all['symbol'] == df_all['prev_symbol'],
        df_all['fChangeP'].rolling(min_periods=1, window=5).sum(), 0))

    df_all = df_all.copy()
    # Calc RSI and moving averages
    print('Fib Funcs: calc_rsi')
    df_all = calc_rsi(df_all)
    print('Fib Funcs: making_moving_averages')
    df_all = make_moving_averages(df_all)

    # fHighMax funcs
    print('Fib funcs: fHighMax')
    df_all = add_fHighMax_col(df_all).copy()

    df_all = df_all.sort_values(by=['symbol', 'date'], ascending=True)

    float_32s = df_all.dtypes[df_all.dtypes == np.float32].index
    for col in float_32s:
        df_all[col] = df_all[col].astype(np.float64).round(3)

    df_all = dataTypes(df_all, parquet=True).df.copy()

    return df_all