示例#1
0
def get_df_prices(sym, start_str, end_str):
    '''Return dataframe with minute-level stock price data
    from start date to end date (inclusive).
    Args:
        sym (str): Ticker symbol e.g. 'BYND'
        start_str (str): Start date string e.g. '2020-07-18'
        end_str (str): End date string e.g. '2020-07-18'
    Returns:
        df (pandas.Dataframe)
    '''
    assert start_str <= end_str
    end_str_mod = add_days(end_str, 3)
    with suppress_stdout():
        df = yf.download(sym,
                         start=start_str,
                         end=end_str_mod,
                         interval='1m',
                         progress=0,
                         prepost=True).reset_index()
    is_date_range = ((df['Datetime'].dt.date.astype('str') >= start_str)
                     & (df['Datetime'].dt.date.astype('str') <= end_str))
    df = df[is_date_range]
    df['Datetime'] = df['Datetime'].dt.tz_localize(None)  #remove timezone
    is_reg_hours = ((df['Datetime'].dt.time.astype('str') >= '09:30:00')
                    & (df['Datetime'].dt.time.astype('str') <= '15:59:00'))
    df['is_reg_hours'] = np.where(is_reg_hours, 1, 0)
    df['sym'] = sym
    df = df.rename(
        columns={
            'Datetime': 'datetime',
            'Open': 'open',
            'High': 'high',
            'Low': 'low',
            'Adj Close': 'adj_close',
            'Volume': 'volume'
        })
    ls_col = [
        'sym',
        'datetime',
        'open',
        'high',
        'low',
        'adj_close',
        'volume',
        'is_reg_hours',
    ]
    return df[ls_col]
    beeps(1)

###################
# Update prices_d #
###################
if UPDATE_PRICES_D:
    print(MSG_PRICES_D_1)
    # get max date present
    q = '''
        SELECT DATE(MAX(date))
          FROM prices_d
         WHERE sym='IBM'
    '''
    max_date_str = pd.read_sql(q, db.conn).iloc[0, 0]
    # check dates
    end = add_days(datetime.datetime.today().strftime('%Y-%m-%d'),
                   3)  #today's date plus 3 days
    df = yf.download('IBM',
                     start=max_date_str,
                     end=end,
                     interval='1d',
                     progress=0).reset_index()
    df = df[df['Date'].astype('str') > max_date_str]
    if not df.empty:
        print(MSG_PRICES_D_2.format(max_date_str, end))
        # get ls_sym
        q = '''
            SELECT sym
              FROM stocks
             WHERE sec IS NOT NULL
        '''
        ls_sym = pd.read_sql(q, db.conn)['sym'].to_list()
示例#3
0
def get_df_i(sym, date_str, live_data, db, num_candles_min=200):
    '''Returns interim dataframe with price data and
    trading indicators for input symbol and date
    Args:
        sym (str)
        date_str  (str)
        live_data (int)
        db (Database object)
        num_candles_min (int)
    Returns:
        df_i (pandas.Dataframe)
    '''
    start_str = prev_weekday(
        date_str)  #start 1 day early to get prev day data for rsi etc
    end_str = add_days(date_str, 3)  #extend end date string due to bug
    if live_data:
        with suppress_stdout():
            df = yf.download(sym,
                             start=start_str,
                             end=end_str,
                             interval='1m',
                             prepost=False,
                             progress=0).reset_index()
        df['Datetime'] = df['Datetime'].dt.tz_localize(None)  #remove timezone
        df = df.rename(
            columns={
                'Adj Close': 'adj_close',
                'Datetime': 'datetime',
                'Open': 'open',
                'High': 'high',
                'Low': 'low',
                'Volume': 'volume'
            })
    else:
        q = '''
            SELECT *
              FROM prices_m
             WHERE is_reg_hours = 1
               AND sym='{}'
               AND DATE(datetime)>='{}'
               AND DATE(datetime)<='{}'
             ORDER BY datetime
        '''.format(sym, start_str, date_str)
        df = pd.read_sql(q, db.conn)
        df['datetime'] = pd.to_datetime(df['datetime'])
    df['date_str'] = df['datetime'].dt.date.astype('str')
    if df[df['date_str'] == start_str].empty:
        raise Exception(ERROR_NO_MINUTE_DATA_YTD)
    if df[df['date_str'] == date_str].empty:
        raise Exception(ERROR_NO_MINUTE_DATA_TDY)
    num_candles_today = df[df['date_str'] == date_str].shape[0]
    if num_candles_today < num_candles_min and not live_data:
        raise Exception(''.format(num_candles_today, num_candles_min))
    df = df[df['date_str'] <= date_str]
    df = df[df['date_str'] >= start_str]
    df['sma9'] = df['adj_close'].rolling(9).mean()
    df['sma90'] = df['adj_close'].rolling(90).mean()
    df['sma180'] = df['adj_close'].rolling(180).mean()
    df['sma180'] = df['sma180'].fillna(df['sma90'])
    df['sma9_var'] = (df['adj_close'] / df['sma9']) - 1
    df['sma180_var'] = (df['adj_close'] / df['sma180']) - 1
    df = add_rsi(df, 14)
    df['spread'] = ((df['adj_close'] / df['open']) - 1).abs()
    df['spread14_e'] = df['spread'].ewm(span=14).mean()
    df['volume14'] = df['volume'].rolling(14).mean()
    df['volume34'] = df['volume'].rolling(34).mean()
    df['volume14_34_var'] = (df['volume14'] / df['volume34']) - 1
    df['volume14_34_var'] = df['volume14_34_var'].fillna(0.0)
    prev_close = df[df['date_str'] == start_str]['adj_close'].to_list()[-1]
    prev_floor = df[df['date_str'] == start_str]['adj_close'].min()
    prev_ceil = df[df['date_str'] == start_str]['adj_close'].max()
    df['prev_close'] = prev_close
    df['prev_close_var'] = df['adj_close'] / prev_close - 1
    df['prev_floor_var'] = (df['adj_close'] / prev_floor) - 1
    df['prev_ceil_var'] = (df['adj_close'] / prev_ceil) - 1
    df['candle_score'] = df['adj_close'] / df['open'] - 1
    df['prev1_candle_score'] = df['candle_score'].shift(1)
    df['prev2_candle_score'] = df['candle_score'].shift(2)
    df['prev3_candle_score'] = df['candle_score'].shift(3)
    df = df[df['date_str'] == date_str]
    df = add_vwap(df)
    df = df.rename(columns={'adj_close': 'close'})
    ls_col = [
        'datetime',
        'close',
        'sma9',
        'sma180',
        'rsi14',
        'vwap',
        'sma9_var',
        'sma180_var',
        'vwap_var',
        'spread14_e',
        'volume14_34_var',
        'prev_close',
        'prev_close_var',
        'prev_floor_var',
        'prev_ceil_var',
        'prev1_candle_score',
        'prev2_candle_score',
        'prev3_candle_score',
    ]
    df = df[ls_col]
    ls_col_na = df.columns[df.isna().any()].tolist()
    if ls_col_na:
        raise Exception(ERROR_NULL_COL.format(ls_col_na))
    return df.reset_index(drop=1)
def get_df_i(sym, date_str, live_data, db):
    start_str = prev_weekday(
        date_str)  #start 1 day early to get prev day data for rsi etc
    end_str = add_days(date_str, 3)  #extend end date string due to bug
    if live_data:
        with suppress_stdout():
            df = yf.download(sym,
                             start=start_str,
                             end=end_str,
                             interval='1m',
                             prepost=False,
                             progress=0).reset_index()
        df['Datetime'] = df['Datetime'].dt.tz_localize(None)  #remove timezone
        df = df.rename(
            columns={
                'Adj Close': 'adj_close',
                'Datetime': 'datetime',
                'Open': 'open',
                'High': 'high',
                'Low': 'low',
                'Volume': 'volume'
            })
    else:
        q = '''
            SELECT *
              FROM prices_m
             WHERE is_reg_hours = 1
               AND sym='{}'
               AND DATE(datetime)>='{}'
               AND DATE(datetime)<='{}'
             ORDER BY datetime
        '''.format(sym, start_str, date_str)
        df = pd.read_sql(q, db.conn)
        df['datetime'] = pd.to_datetime(df['datetime'])
    df['date_str'] = df['datetime'].dt.date.astype('str')
    if df[df['date_str'] == start_str].empty:
        raise Exception('No intraday(minute interval) data for previous day!')
    if df[df['date_str'] == date_str].empty:
        raise Exception('No intraday(minute interval) data for today!')
    if df[df['date_str'] == date_str].shape[0] < 200 and not live_data:
        raise Exception(
            f'Less than 200 Data points for today ({df[df["date_str"]==date_str].shape[0]}), skipping!'
        )
    df = df[df['date_str'] <= date_str]
    df = df[df['date_str'] >= start_str]
    df['sma9'] = df['adj_close'].rolling(9).mean()
    df['sma90'] = df['adj_close'].rolling(90).mean()
    df['sma180'] = df['adj_close'].rolling(180).mean()
    df['sma180'] = df['sma180'].fillna(df['sma90'])
    df['sma9_var'] = (df['adj_close'] / df['sma9']) - 1
    df['sma180_var'] = (df['adj_close'] / df['sma180']) - 1
    df = add_rsi(df, 14)
    df['spread'] = ((df['adj_close'] / df['open']) - 1).abs()
    df['spread14_e'] = df['spread'].ewm(span=14).mean()
    df['volume14'] = df['volume'].rolling(14).mean()
    df['volume34'] = df['volume'].rolling(34).mean()
    df['volume14_34_var'] = (df['volume14'] / df['volume34']) - 1
    df['volume14_34_var'] = df['volume14_34_var'].fillna(0.0)
    #df['sma90'] = df['adj_close'].rolling(90).mean()
    #df['sma180'] = df['adj_close'].rolling(180).mean()
    #df['sma180'] = df['sma180'].fillna(df['sma90'])
    prev_close = df[df['date_str'] == start_str]['adj_close'].to_list()[-1]
    prev_floor = df[df['date_str'] == start_str]['adj_close'].min()
    prev_ceil = df[df['date_str'] == start_str]['adj_close'].max()
    df['prev_close'] = prev_close
    df['prev_close_var'] = df['adj_close'] / prev_close - 1
    df['prev_floor_var'] = (df['adj_close'] / prev_floor) - 1
    df['prev_ceil_var'] = (df['adj_close'] / prev_ceil) - 1
    df['candle_score'] = df['adj_close'] / df['open'] - 1
    df['prev1_candle_score'] = df['candle_score'].shift(1)
    df['prev2_candle_score'] = df['candle_score'].shift(2)
    df['prev3_candle_score'] = df['candle_score'].shift(3)
    df = df[df['date_str'] == date_str]
    df = add_vwap(df)
    df = df.rename(columns={'adj_close': 'close'})
    ls_col = [
        'datetime',
        'close',
        'sma9',
        'sma180',
        'rsi14',
        'vwap',
        'sma9_var',
        'sma180_var',
        'vwap_var',
        'spread14_e',
        'volume14_34_var',
        'prev_close',
        'prev_close_var',
        'prev_floor_var',
        'prev_ceil_var',
        'prev1_candle_score',
        'prev2_candle_score',
        'prev3_candle_score',
    ]
    df = df[ls_col]
    ls_col_na = df.columns[df.isna().any()].tolist()
    if ls_col_na:
        #df.to_csv('temp.csv')
        raise Exception(f'Null found in df_i columns: {ls_col_na}, skipping!')
    return df.reset_index(drop=1)