Пример #1
0
def get_summary(**kwargs):

    symbol1 = kwargs['symbol1']
    symbol2 = kwargs['symbol2']
    report_date = kwargs['report_date']

    if 'get_diagnosticQ' in kwargs.keys():
        get_diagnosticQ = kwargs['get_diagnosticQ']
    else:
        get_diagnosticQ = False

    report_datetime = cu.convert_doubledate_2datetime(report_date)

    data1 = gsp.get_stock_price_preloaded(ticker=symbol1, data_source='iex', settle_date_to = report_date)
    data2 = gsp.get_stock_price_preloaded(ticker=symbol2, data_source='iex', settle_date_to = report_date)

    merged_data = pd.merge(data1[['close','settle_datetime']], data2[['close','settle_datetime']], how='inner', on='settle_datetime')
    split = int(len(merged_data) * .4)

    if split<200 or report_datetime!=merged_data['settle_datetime'].iloc[-1]:
        return {'price1': np.nan,'price2': np.nan, 'p_value_2': np.nan,'p_value_1': np.nan,
            'beta_1': np.nan, 'beta_2': np.nan,
            'corr': np.nan,
            'cagr1': np.nan, 'cagr2': np.nan,
            'kalpha': np.nan, 'kbeta': np.nan,
            'meanSpread': np.nan, 'stdSpread': np.nan,
            'zScore': np.nan}

    training_data = merged_data[:split]
    test_data = merged_data[split:]
    cointegration_output_2 = sm.tsa.stattools.coint(training_data['close_x'], training_data['close_y'])
    cointegration_output_1 = sm.tsa.stattools.coint(test_data['close_x'], test_data['close_y'])

    regress_output_1 = ss.get_regression_results({'y': test_data['close_y'].values, 'x': test_data['close_x'].values})
    regress_output_2 = ss.get_regression_results({'y': training_data['close_y'].values, 'x': training_data['close_x'].values})
    regress_output_3 = ss.get_regression_results({'y': test_data['close_y'].diff().values, 'x': test_data['close_x'].diff().values})

    merged_data.set_index('settle_datetime', drop=True, inplace=True)
    backtest_output_1 = backtest(merged_data[split:], 'close_x', 'close_y')
    backtest_output_2 = backtest(merged_data[:split], 'close_x', 'close_y')

    if get_diagnosticQ:
        return {'backtest_output': backtest_output_1, 'cagr2': backtest_output_2['cagr']}
    else:
        return {'price1': merged_data['close_x'].iloc[-1],'price2': merged_data['close_y'].iloc[-1],
            'p_value_2': cointegration_output_2[1],'p_value_1': cointegration_output_1[1],
            'beta_1': regress_output_1['beta'], 'beta_2': regress_output_2['beta'],
            'corr': np.sqrt(regress_output_3['rsquared']/100),
            'cagr1': backtest_output_1['cagr'], 'cagr2': backtest_output_2['cagr'],
            'kalpha': backtest_output_1['kalpha'], 'kbeta': backtest_output_1['kbeta'],
            'meanSpread': backtest_output_1['meanSpread'], 'stdSpread': backtest_output_1['stdSpread'],
            'zScore': backtest_output_1['zScore']}
def time_series_regression(**kwargs):

    data_frame_input = kwargs['data_frame_input']
    num_obs = kwargs['num_obs']
    data_frame_input = data_frame_input.iloc[-num_obs:]

    return stats.get_regression_results({
        'x':
        range(num_obs),
        'y':
        data_frame_input[kwargs['y_var_name']],
        'clean_num_obs':
        num_obs
    })
def get_time_series_based_return_forecast(**kwargs):

    data_frame_input = kwargs['data_frame_input']
    regression_window = kwargs['regression_window']
    forecast_window1 = kwargs['forecast_window1']
    forecast_window2 = kwargs['forecast_window2']

    regress_output_list = [
        stats.get_regression_results({
            'x':
            range(regression_window),
            'y':
            data_frame_input['close'].iloc[(i - regression_window +
                                            1):(i + 1)].values,
            'clean_num_obs':
            regression_window
        }) for i in range(regression_window - 1, len(data_frame_input.index))
    ]

    data_frame_input['alpha'] = np.nan
    data_frame_input['beta'] = np.nan

    data_frame_input['alpha'].iloc[(regression_window - 1):] = [
        x['alpha'] for x in regress_output_list
    ]
    data_frame_input['beta'].iloc[(regression_window - 1):] = [
        x['beta'] for x in regress_output_list
    ]

    data_frame_input['tsf_' + str(regression_window) + '_' + str(forecast_window1)] = \
        (100*(data_frame_input['alpha'] + data_frame_input['beta'] * (regression_window + forecast_window1))/(data_frame_input['close']))-100

    data_frame_input['tsf_' + str(regression_window) + '_' + str(forecast_window2)] = \
        (100*(data_frame_input['alpha'] + data_frame_input['beta'] * (regression_window + forecast_window2))/(data_frame_input['close']))-100

    return data_frame_input.drop(['alpha', 'beta'], 1, inplace=False)
def get_intraday_spread_signals(**kwargs):

    ticker_list = kwargs['ticker_list']
    date_to = kwargs['date_to']

    #print(ticker_list)

    ticker_list = [x for x in ticker_list if x is not None]
    ticker_head_list = [
        cmi.get_contract_specs(x)['ticker_head'] for x in ticker_list
    ]
    ticker_class_list = [cmi.ticker_class[x] for x in ticker_head_list]

    #print('-'.join(ticker_list))

    if 'tr_dte_list' in kwargs.keys():
        tr_dte_list = kwargs['tr_dte_list']
    else:
        tr_dte_list = [
            exp.get_days2_expiration(ticker=x,
                                     date_to=date_to,
                                     instrument='futures')['tr_dte']
            for x in ticker_list
        ]

    if 'aggregation_method' in kwargs.keys(
    ) and 'contracts_back' in kwargs.keys():
        aggregation_method = kwargs['aggregation_method']
        contracts_back = kwargs['contracts_back']
    else:

        amcb_output = [
            opUtil.get_aggregation_method_contracts_back(
                cmi.get_contract_specs(x)) for x in ticker_list
        ]
        aggregation_method = max(
            [x['aggregation_method'] for x in amcb_output])
        contracts_back = min([x['contracts_back'] for x in amcb_output])

    if 'futures_data_dictionary' in kwargs.keys():
        futures_data_dictionary = kwargs['futures_data_dictionary']
    else:
        futures_data_dictionary = {
            x: gfp.get_futures_price_preloaded(ticker_head=x)
            for x in list(set(ticker_head_list))
        }

    if 'use_last_as_current' in kwargs.keys():
        use_last_as_current = kwargs['use_last_as_current']
    else:
        use_last_as_current = True

    if 'datetime5_years_ago' in kwargs.keys():
        datetime5_years_ago = kwargs['datetime5_years_ago']
    else:
        date5_years_ago = cu.doubledate_shift(date_to, 5 * 365)
        datetime5_years_ago = cu.convert_doubledate_2datetime(date5_years_ago)

    if 'num_days_back_4intraday' in kwargs.keys():
        num_days_back_4intraday = kwargs['num_days_back_4intraday']
    else:
        num_days_back_4intraday = 10

    contract_multiplier_list = [
        cmi.contract_multiplier[x] for x in ticker_head_list
    ]

    aligned_output = opUtil.get_aligned_futures_data(
        contract_list=ticker_list,
        tr_dte_list=tr_dte_list,
        aggregation_method=aggregation_method,
        contracts_back=contracts_back,
        date_to=date_to,
        futures_data_dictionary=futures_data_dictionary,
        use_last_as_current=use_last_as_current)

    aligned_data = aligned_output['aligned_data']
    current_data = aligned_output['current_data']

    if ticker_head_list in fixed_weight_future_spread_list:
        weights_output = sutil.get_spread_weights_4contract_list(
            ticker_head_list=ticker_head_list)
        spread_weights = weights_output['spread_weights']
        portfolio_weights = weights_output['portfolio_weights']
    else:
        regress_output = stats.get_regression_results({
            'x':
            aligned_data['c2']['change_1'][-60:],
            'y':
            aligned_data['c1']['change_1'][-60:]
        })
        spread_weights = [1, -regress_output['beta']]
        portfolio_weights = [
            1, -regress_output['beta'] * contract_multiplier_list[0] /
            contract_multiplier_list[1]
        ]

    aligned_data['spread'] = 0
    aligned_data['spread_pnl_1'] = 0
    aligned_data['spread_pnl1'] = 0
    spread_settle = 0

    last5_years_indx = aligned_data['settle_date'] >= datetime5_years_ago

    num_contracts = len(ticker_list)

    for i in range(num_contracts):
        aligned_data['spread'] = aligned_data['spread'] + aligned_data[
            'c' + str(i + 1)]['close_price'] * spread_weights[i]
        spread_settle = spread_settle + current_data[
            'c' + str(i + 1)]['close_price'] * spread_weights[i]
        aligned_data[
            'spread_pnl_1'] = aligned_data['spread_pnl_1'] + aligned_data[
                'c' + str(i + 1)]['change_1'] * portfolio_weights[
                    i] * contract_multiplier_list[i]
        aligned_data[
            'spread_pnl1'] = aligned_data['spread_pnl1'] + aligned_data[
                'c' + str(i + 1)]['change1_instant'] * portfolio_weights[
                    i] * contract_multiplier_list[i]

    aligned_data['spread_normalized'] = aligned_data['spread'] / aligned_data[
        'c1']['close_price']

    data_last5_years = aligned_data[last5_years_indx]

    percentile_vector = stats.get_number_from_quantile(
        y=data_last5_years['spread_pnl_1'].values,
        quantile_list=[1, 15, 85, 99],
        clean_num_obs=max(100, round(3 * len(data_last5_years.index) / 4)))

    downside = (percentile_vector[0] + percentile_vector[1]) / 2
    upside = (percentile_vector[2] + percentile_vector[3]) / 2

    date_list = [
        exp.doubledate_shift_bus_days(double_date=date_to, shift_in_days=x)
        for x in reversed(range(1, num_days_back_4intraday))
    ]
    date_list.append(date_to)

    intraday_data = opUtil.get_aligned_futures_data_intraday(
        contract_list=ticker_list, date_list=date_list)

    if len(intraday_data.index) == 0:
        return {
            'downside': downside,
            'upside': upside,
            'intraday_data': intraday_data,
            'trading_data': intraday_data,
            'spread_weight': spread_weights[1],
            'portfolio_weight': portfolio_weights[1],
            'z': np.nan,
            'recent_trend': np.nan,
            'intraday_mean10': np.nan,
            'intraday_std10': np.nan,
            'intraday_mean5': np.nan,
            'intraday_std5': np.nan,
            'intraday_mean2': np.nan,
            'intraday_std2': np.nan,
            'intraday_mean1': np.nan,
            'intraday_std1': np.nan,
            'aligned_output': aligned_output,
            'spread_settle': spread_settle,
            'data_last5_years': data_last5_years,
            'ma_spread_lowL': np.nan,
            'ma_spread_highL': np.nan,
            'ma_spread_low': np.nan,
            'ma_spread_high': np.nan,
            'intraday_sharp': np.nan
        }

    intraday_data['time_stamp'] = [
        x.to_datetime() for x in intraday_data.index
    ]
    intraday_data['settle_date'] = intraday_data['time_stamp'].apply(
        lambda x: x.date())

    end_hour = min([cmi.last_trade_hour_minute[x] for x in ticker_head_list])
    start_hour = max(
        [cmi.first_trade_hour_minute[x] for x in ticker_head_list])

    trade_start_hour = dt.time(9, 30, 0, 0)

    if 'Ag' in ticker_class_list:
        start_hour1 = dt.time(0, 45, 0, 0)
        end_hour1 = dt.time(7, 45, 0, 0)
        selection_indx = [
            x for x in range(len(intraday_data.index))
            if ((intraday_data['time_stamp'].iloc[x].time() < end_hour1) and
                (intraday_data['time_stamp'].iloc[x].time() >= start_hour1)) or
            ((intraday_data['time_stamp'].iloc[x].time() < end_hour) and
             (intraday_data['time_stamp'].iloc[x].time() >= start_hour))
        ]

    else:
        selection_indx = [
            x for x in range(len(intraday_data.index))
            if (intraday_data.index[x].to_datetime().time() < end_hour) and (
                intraday_data.index[x].to_datetime().time() >= start_hour)
        ]

    intraday_data = intraday_data.iloc[selection_indx]

    intraday_data['spread'] = 0

    for i in range(num_contracts):
        intraday_data[
            'c' + str(i + 1),
            'mid_p'] = (intraday_data['c' + str(i + 1)]['best_bid_p'] +
                        intraday_data['c' + str(i + 1)]['best_ask_p']) / 2

        intraday_data['spread'] = intraday_data['spread'] + intraday_data[
            'c' + str(i + 1)]['mid_p'] * spread_weights[i]

    unique_settle_dates = intraday_data['settle_date'].unique()
    intraday_data['spread1'] = np.nan

    for i in range(len(unique_settle_dates) - 1):
        if (intraday_data['settle_date'] == unique_settle_dates[i]).sum() == \
                (intraday_data['settle_date'] == unique_settle_dates[i+1]).sum():
            intraday_data.loc[intraday_data['settle_date'] == unique_settle_dates[i],'spread1'] = \
                intraday_data['spread'][intraday_data['settle_date'] == unique_settle_dates[i+1]].values

    intraday_data = intraday_data[intraday_data['settle_date'].notnull()]

    intraday_mean10 = intraday_data['spread'].mean()
    intraday_std10 = intraday_data['spread'].std()

    intraday_data_last5days = intraday_data[
        intraday_data['settle_date'] >= cu.convert_doubledate_2datetime(
            date_list[-5]).date()]
    intraday_data_last2days = intraday_data[
        intraday_data['settle_date'] >= cu.convert_doubledate_2datetime(
            date_list[-2]).date()]
    intraday_data_yesterday = intraday_data[intraday_data['settle_date'] ==
                                            cu.convert_doubledate_2datetime(
                                                date_list[-1]).date()]

    intraday_mean5 = intraday_data_last5days['spread'].mean()
    intraday_std5 = intraday_data_last5days['spread'].std()

    intraday_mean2 = intraday_data_last2days['spread'].mean()
    intraday_std2 = intraday_data_last2days['spread'].std()

    intraday_mean1 = intraday_data_yesterday['spread'].mean()
    intraday_std1 = intraday_data_yesterday['spread'].std()

    intraday_z = (spread_settle - intraday_mean5) / intraday_std5

    num_obs_intraday = len(intraday_data.index)
    num_obs_intraday_half = round(num_obs_intraday / 2)
    intraday_tail = intraday_data.tail(num_obs_intraday_half)

    num_positives = sum(
        intraday_tail['spread'] > intraday_data['spread'].mean())
    num_negatives = sum(
        intraday_tail['spread'] < intraday_data['spread'].mean())

    if num_positives + num_negatives != 0:
        recent_trend = 100 * (num_positives - num_negatives) / (num_positives +
                                                                num_negatives)
    else:
        recent_trend = np.nan

    intraday_data_shifted = intraday_data.groupby('settle_date').shift(-60)
    intraday_data['spread_shifted'] = intraday_data_shifted['spread']
    intraday_data[
        'delta60'] = intraday_data['spread_shifted'] - intraday_data['spread']

    intraday_data['ewma10'] = pd.ewma(intraday_data['spread'], span=10)
    intraday_data['ewma50'] = pd.ewma(intraday_data['spread'], span=50)
    intraday_data['ewma200'] = pd.ewma(intraday_data['spread'], span=200)

    intraday_data['ma40'] = pd.rolling_mean(intraday_data['spread'], 40)

    intraday_data[
        'ewma50_spread'] = intraday_data['spread'] - intraday_data['ewma50']
    intraday_data[
        'ma40_spread'] = intraday_data['spread'] - intraday_data['ma40']

    selection_indx = [
        x for x in range(len(intraday_data.index))
        if (intraday_data['time_stamp'].iloc[x].time() > trade_start_hour)
    ]
    selected_data = intraday_data.iloc[selection_indx]
    selected_data['delta60Net'] = (contract_multiplier_list[0] *
                                   selected_data['delta60'] /
                                   spread_weights[0])

    selected_data.reset_index(drop=True, inplace=True)
    selected_data['proxy_pnl'] = 0

    t_cost = cmi.t_cost[ticker_head_list[0]]

    ma_spread_low = np.nan
    ma_spread_high = np.nan
    ma_spread_lowL = np.nan
    ma_spread_highL = np.nan
    intraday_sharp = np.nan

    if sum(selected_data['ma40_spread'].notnull()) > 30:
        quantile_list = selected_data['ma40_spread'].quantile([0.1, 0.9])

        down_indx = selected_data['ma40_spread'] < quantile_list[0.1]
        up_indx = selected_data['ma40_spread'] > quantile_list[0.9]

        up_data = selected_data[up_indx]
        down_data = selected_data[down_indx]

        ma_spread_lowL = quantile_list[0.1]
        ma_spread_highL = quantile_list[0.9]

        #return {'selected_data':selected_data,'up_data':up_data,'up_indx':up_indx}

        selected_data.loc[up_indx,
                          'proxy_pnl'] = (-up_data['delta60Net'] -
                                          2 * num_contracts * t_cost).values
        selected_data.loc[down_indx,
                          'proxy_pnl'] = (down_data['delta60Net'] -
                                          2 * num_contracts * t_cost).values

        short_term_data = selected_data[
            selected_data['settle_date'] >= cu.convert_doubledate_2datetime(
                date_list[-5]).date()]
        if sum(short_term_data['ma40_spread'].notnull()) > 30:
            quantile_list = short_term_data['ma40_spread'].quantile([0.1, 0.9])
            ma_spread_low = quantile_list[0.1]
            ma_spread_high = quantile_list[0.9]

        if selected_data['proxy_pnl'].std() != 0:
            intraday_sharp = selected_data['proxy_pnl'].mean(
            ) / selected_data['proxy_pnl'].std()

    return {
        'downside': downside,
        'upside': upside,
        'intraday_data': intraday_data,
        'trading_data': selected_data,
        'spread_weight': spread_weights[1],
        'portfolio_weight': portfolio_weights[1],
        'z': intraday_z,
        'recent_trend': recent_trend,
        'intraday_mean10': intraday_mean10,
        'intraday_std10': intraday_std10,
        'intraday_mean5': intraday_mean5,
        'intraday_std5': intraday_std5,
        'intraday_mean2': intraday_mean2,
        'intraday_std2': intraday_std2,
        'intraday_mean1': intraday_mean1,
        'intraday_std1': intraday_std1,
        'aligned_output': aligned_output,
        'spread_settle': spread_settle,
        'data_last5_years': data_last5_years,
        'ma_spread_lowL': ma_spread_lowL,
        'ma_spread_highL': ma_spread_highL,
        'ma_spread_low': ma_spread_low,
        'ma_spread_high': ma_spread_high,
        'intraday_sharp': intraday_sharp
    }
Пример #5
0
def get_futures_butterfly_signals(**kwargs):

    ticker_list = kwargs['ticker_list']
    date_to = kwargs['date_to']

    if 'tr_dte_list' in kwargs.keys():
        tr_dte_list = kwargs['tr_dte_list']
    else:
        tr_dte_list = [
            exp.get_futures_days2_expiration({
                'ticker': x,
                'date_to': date_to
            }) for x in ticker_list
        ]

    if 'aggregation_method' in kwargs.keys(
    ) and 'contracts_back' in kwargs.keys():
        aggregation_method = kwargs['aggregation_method']
        contracts_back = kwargs['contracts_back']
    else:
        amcb_output = opUtil.get_aggregation_method_contracts_back(
            cmi.get_contract_specs(ticker_list[0]))
        aggregation_method = amcb_output['aggregation_method']
        contracts_back = amcb_output['contracts_back']

    if 'use_last_as_current' in kwargs.keys():
        use_last_as_current = kwargs['use_last_as_current']
    else:
        use_last_as_current = False

    if 'futures_data_dictionary' in kwargs.keys():
        futures_data_dictionary = kwargs['futures_data_dictionary']
    else:
        futures_data_dictionary = {
            x: gfp.get_futures_price_preloaded(ticker_head=x)
            for x in [cmi.get_contract_specs(ticker_list[0])['ticker_head']]
        }

    if 'contract_multiplier' in kwargs.keys():
        contract_multiplier = kwargs['contract_multiplier']
    else:
        contract_multiplier = cmi.contract_multiplier[cmi.get_contract_specs(
            ticker_list[0])['ticker_head']]

    if 'datetime5_years_ago' in kwargs.keys():
        datetime5_years_ago = kwargs['datetime5_years_ago']
    else:
        date5_years_ago = cu.doubledate_shift(date_to, 5 * 365)
        datetime5_years_ago = cu.convert_doubledate_2datetime(date5_years_ago)

    if 'datetime2_months_ago' in kwargs.keys():
        datetime2_months_ago = kwargs['datetime2_months_ago']
    else:
        date2_months_ago = cu.doubledate_shift(date_to, 60)
        datetime2_months_ago = cu.convert_doubledate_2datetime(
            date2_months_ago)

    aligned_output = opUtil.get_aligned_futures_data(
        contract_list=ticker_list,
        tr_dte_list=tr_dte_list,
        aggregation_method=aggregation_method,
        contracts_back=contracts_back,
        date_to=date_to,
        futures_data_dictionary=futures_data_dictionary,
        use_last_as_current=use_last_as_current)
    if not aligned_output['success']:
        return {'success': False}

    current_data = aligned_output['current_data']
    aligned_data = aligned_output['aligned_data']

    month_diff_1 = 12 * (current_data['c1']['ticker_year'] -
                         current_data['c2']['ticker_year']) + (
                             current_data['c1']['ticker_month'] -
                             current_data['c2']['ticker_month'])
    month_diff_2 = 12 * (current_data['c2']['ticker_year'] -
                         current_data['c3']['ticker_year']) + (
                             current_data['c2']['ticker_month'] -
                             current_data['c3']['ticker_month'])

    weight_11 = 2 * month_diff_2 / (month_diff_1 + month_diff_1)
    weight_12 = -2
    weight_13 = 2 * month_diff_1 / (month_diff_1 + month_diff_1)

    price_1 = current_data['c1']['close_price']
    price_2 = current_data['c2']['close_price']
    price_3 = current_data['c3']['close_price']

    linear_interp_price2 = (weight_11 * aligned_data['c1']['close_price'] +
                            weight_13 * aligned_data['c3']['close_price']) / 2

    butterfly_price = aligned_data['c1']['close_price'] - 2 * aligned_data[
        'c2']['close_price'] + aligned_data['c3']['close_price']

    price_ratio = linear_interp_price2 / aligned_data['c2']['close_price']

    linear_interp_price2_current = (weight_11 * price_1 +
                                    weight_13 * price_3) / 2

    price_ratio_current = linear_interp_price2_current / price_2

    q = stats.get_quantile_from_number({
        'x':
        price_ratio_current,
        'y':
        price_ratio.values,
        'clean_num_obs':
        max(100, round(3 * len(price_ratio.values) / 4))
    })
    qf = stats.get_quantile_from_number({
        'x': price_ratio_current,
        'y': price_ratio.values[-40:],
        'clean_num_obs': 30
    })

    recent_quantile_list = [
        stats.get_quantile_from_number({
            'x': x,
            'y': price_ratio.values[-40:],
            'clean_num_obs': 30
        }) for x in price_ratio.values[-40:]
    ]

    weight1 = weight_11
    weight2 = weight_12
    weight3 = weight_13

    last5_years_indx = aligned_data['settle_date'] >= datetime5_years_ago
    last2_months_indx = aligned_data['settle_date'] >= datetime2_months_ago
    data_last5_years = aligned_data[last5_years_indx]

    yield1 = 100 * (
        aligned_data['c1']['close_price'] -
        aligned_data['c2']['close_price']) / aligned_data['c2']['close_price']
    yield2 = 100 * (
        aligned_data['c2']['close_price'] -
        aligned_data['c3']['close_price']) / aligned_data['c3']['close_price']

    yield1_last5_years = yield1[last5_years_indx]
    yield2_last5_years = yield2[last5_years_indx]

    yield1_current = 100 * (
        current_data['c1']['close_price'] -
        current_data['c2']['close_price']) / current_data['c2']['close_price']
    yield2_current = 100 * (
        current_data['c2']['close_price'] -
        current_data['c3']['close_price']) / current_data['c3']['close_price']

    butterfly_price_current = current_data['c1']['close_price']\
                            -2*current_data['c2']['close_price']\
                              +current_data['c3']['close_price']

    #return {'yield1': yield1, 'yield2': yield2, 'yield1_current':yield1_current, 'yield2_current': yield2_current}

    yield_regress_output = stats.get_regression_results({
        'x':
        yield2,
        'y':
        yield1,
        'x_current':
        yield2_current,
        'y_current':
        yield1_current,
        'clean_num_obs':
        max(100, round(3 * len(yield1.values) / 4))
    })

    yield_regress_output_last5_years = stats.get_regression_results({
        'x':
        yield2_last5_years,
        'y':
        yield1_last5_years,
        'x_current':
        yield2_current,
        'y_current':
        yield1_current,
        'clean_num_obs':
        max(100, round(3 * len(yield1_last5_years.values) / 4))
    })

    bf_qz_frame_short = pd.DataFrame()
    bf_qz_frame_long = pd.DataFrame()

    if (len(yield1) >= 40) & (len(yield2) >= 40):

        recent_zscore_list = [
            (yield1[-40 + i] - yield_regress_output['alpha'] -
             yield_regress_output['beta'] * yield2[-40 + i]) /
            yield_regress_output['residualstd'] for i in range(40)
        ]

        bf_qz_frame = pd.DataFrame.from_dict({
            'bf_price':
            butterfly_price.values[-40:],
            'q':
            recent_quantile_list,
            'zscore':
            recent_zscore_list
        })

        bf_qz_frame = np.round(bf_qz_frame, 8)
        bf_qz_frame.drop_duplicates(['bf_price'], keep='last', inplace=True)

        # return bf_qz_frame

        bf_qz_frame_short = bf_qz_frame[(bf_qz_frame['zscore'] >= 0.6)
                                        & (bf_qz_frame['q'] >= 85)]
        bf_qz_frame_long = bf_qz_frame[(bf_qz_frame['zscore'] <= -0.6)
                                       & (bf_qz_frame['q'] <= 12)]

    if bf_qz_frame_short.empty:
        short_price_limit = np.NAN
    else:
        short_price_limit = bf_qz_frame_short['bf_price'].min()

    if bf_qz_frame_long.empty:
        long_price_limit = np.NAN
    else:
        long_price_limit = bf_qz_frame_long['bf_price'].max()

    zscore1 = yield_regress_output['zscore']
    rsquared1 = yield_regress_output['rsquared']

    zscore2 = yield_regress_output_last5_years['zscore']
    rsquared2 = yield_regress_output_last5_years['rsquared']

    second_spread_weight_1 = yield_regress_output['beta']
    second_spread_weight_2 = yield_regress_output_last5_years['beta']

    butterfly_5_change = data_last5_years['c1']['change_5']\
                             - (1+second_spread_weight_1)*data_last5_years['c2']['change_5']\
                             + second_spread_weight_1*data_last5_years['c3']['change_5']

    butterfly_5_change_current = current_data['c1']['change_5']\
                             - (1+second_spread_weight_1)*current_data['c2']['change_5']\
                             + second_spread_weight_1*current_data['c3']['change_5']

    butterfly_1_change = data_last5_years['c1']['change_1']\
                             - (1+second_spread_weight_1)*data_last5_years['c2']['change_1']\
                             + second_spread_weight_1*data_last5_years['c3']['change_1']

    percentile_vector = stats.get_number_from_quantile(
        y=butterfly_5_change.values,
        quantile_list=[1, 15, 85, 99],
        clean_num_obs=max(100, round(3 * len(butterfly_5_change.values) / 4)))

    downside = contract_multiplier * (percentile_vector[0] +
                                      percentile_vector[1]) / 2
    upside = contract_multiplier * (percentile_vector[2] +
                                    percentile_vector[3]) / 2
    recent_5day_pnl = contract_multiplier * butterfly_5_change_current

    residuals = yield1 - yield_regress_output[
        'alpha'] - yield_regress_output['beta'] * yield2

    regime_change_ind = (residuals[last5_years_indx].mean() -
                         residuals.mean()) / residuals.std()

    seasonal_residuals = residuals[aligned_data['c1']['ticker_month'] ==
                                   current_data['c1']['ticker_month']]
    seasonal_clean_residuals = seasonal_residuals[np.isfinite(
        seasonal_residuals)]
    clean_residuals = residuals[np.isfinite(residuals)]

    contract_seasonality_ind = (
        seasonal_clean_residuals.mean() -
        clean_residuals.mean()) / clean_residuals.std()

    yield1_quantile_list = stats.get_number_from_quantile(
        y=yield1, quantile_list=[10, 90])
    yield2_quantile_list = stats.get_number_from_quantile(
        y=yield2, quantile_list=[10, 90])

    noise_ratio = (yield1_quantile_list[1] - yield1_quantile_list[0]) / (
        yield2_quantile_list[1] - yield2_quantile_list[0])

    daily_noise_recent = stats.get_stdev(x=butterfly_1_change.values[-20:],
                                         clean_num_obs=15)
    daily_noise_past = stats.get_stdev(
        x=butterfly_1_change.values,
        clean_num_obs=max(100, round(3 * len(butterfly_1_change.values) / 4)))

    recent_vol_ratio = daily_noise_recent / daily_noise_past

    alpha1 = yield_regress_output['alpha']

    residuals_last5_years = residuals[last5_years_indx]
    residuals_last2_months = residuals[last2_months_indx]

    residual_current = yield1_current - alpha1 - second_spread_weight_1 * yield2_current

    z3 = (residual_current - residuals_last5_years.mean()) / residuals.std()
    z4 = (residual_current - residuals_last2_months.mean()) / residuals.std()

    yield_change = (alpha1 + second_spread_weight_1 * yield2_current -
                    yield1_current) / (1 + second_spread_weight_1)

    new_yield1 = yield1_current + yield_change
    new_yield2 = yield2_current - yield_change

    price_change1 = 100 * (
        (price_2 * (new_yield1 + 100) / 100) - price_1) / (200 + new_yield1)
    price_change2 = 100 * (
        (price_3 * (new_yield2 + 100) / 100) - price_2) / (200 + new_yield2)

    theo_pnl = contract_multiplier * (
        2 * price_change1 - 2 * second_spread_weight_1 * price_change2)

    aligned_data['residuals'] = residuals
    aligned_output['aligned_data'] = aligned_data

    grouped = aligned_data.groupby(aligned_data['c1']['cont_indx'])
    aligned_data['shifted_residuals'] = grouped['residuals'].shift(-5)
    aligned_data['residual_change'] = aligned_data[
        'shifted_residuals'] - aligned_data['residuals']

    mean_reversion = stats.get_regression_results({
        'x':
        aligned_data['residuals'].values,
        'y':
        aligned_data['residual_change'].values,
        'clean_num_obs':
        max(100, round(3 * len(yield1.values) / 4))
    })

    theo_spread_move_output = su.calc_theo_spread_move_from_ratio_normalization(
        ratio_time_series=price_ratio.values[-40:],
        starting_quantile=qf,
        num_price=linear_interp_price2_current,
        den_price=current_data['c2']['close_price'],
        favorable_quantile_move_list=[5, 10, 15, 20, 25])

    theo_pnl_list = [
        x * contract_multiplier * 2
        for x in theo_spread_move_output['theo_spread_move_list']
    ]

    return {
        'success': True,
        'aligned_output': aligned_output,
        'q': q,
        'qf': qf,
        'theo_pnl_list': theo_pnl_list,
        'ratio_target_list': theo_spread_move_output['ratio_target_list'],
        'weight1': weight1,
        'weight2': weight2,
        'weight3': weight3,
        'zscore1': zscore1,
        'rsquared1': rsquared1,
        'zscore2': zscore2,
        'rsquared2': rsquared2,
        'zscore3': z3,
        'zscore4': z4,
        'zscore5': zscore1 - regime_change_ind,
        'zscore6': zscore1 - contract_seasonality_ind,
        'zscore7': zscore1 - regime_change_ind - contract_seasonality_ind,
        'theo_pnl': theo_pnl,
        'regime_change_ind': regime_change_ind,
        'contract_seasonality_ind': contract_seasonality_ind,
        'second_spread_weight_1': second_spread_weight_1,
        'second_spread_weight_2': second_spread_weight_2,
        'downside': downside,
        'upside': upside,
        'yield1': yield1,
        'yield2': yield2,
        'yield1_current': yield1_current,
        'yield2_current': yield2_current,
        'bf_price': butterfly_price_current,
        'short_price_limit': short_price_limit,
        'long_price_limit': long_price_limit,
        'noise_ratio': noise_ratio,
        'alpha1': alpha1,
        'alpha2': yield_regress_output_last5_years['alpha'],
        'residual_std1': yield_regress_output['residualstd'],
        'residual_std2': yield_regress_output_last5_years['residualstd'],
        'recent_vol_ratio': recent_vol_ratio,
        'recent_5day_pnl': recent_5day_pnl,
        'price_1': price_1,
        'price_2': price_2,
        'price_3': price_3,
        'last5_years_indx': last5_years_indx,
        'price_ratio': price_ratio,
        'mean_reversion_rsquared': mean_reversion['rsquared'],
        'mean_reversion_signif': (mean_reversion['conf_int'][1, :] < 0).all()
    }
def get_futures_butterfly_signals(**kwargs):

    ticker_list = kwargs['ticker_list']
    date_to = kwargs['date_to']

    if 'tr_dte_list' in kwargs.keys():
        tr_dte_list = kwargs['tr_dte_list']
    else:
        tr_dte_list = [exp.get_futures_days2_expiration({'ticker': x,'date_to': date_to}) for x in ticker_list]

    if 'aggregation_method' in kwargs.keys() and 'contracts_back' in kwargs.keys():
        aggregation_method = kwargs['aggregation_method']
        contracts_back = kwargs['contracts_back']
    else:
        amcb_output = opUtil.get_aggregation_method_contracts_back(cmi.get_contract_specs(ticker_list[0]))
        aggregation_method = amcb_output['aggregation_method']
        contracts_back = amcb_output['contracts_back']

    if 'use_last_as_current' in kwargs.keys():
        use_last_as_current = kwargs['use_last_as_current']
    else:
        use_last_as_current = False

    if 'futures_data_dictionary' in kwargs.keys():
        futures_data_dictionary = kwargs['futures_data_dictionary']
    else:
        futures_data_dictionary = {x: gfp.get_futures_price_preloaded(ticker_head=x) for x in [cmi.get_contract_specs(ticker_list[0])['ticker_head']]}

    if 'contract_multiplier' in kwargs.keys():
        contract_multiplier = kwargs['contract_multiplier']
    else:
        contract_multiplier = cmi.contract_multiplier[cmi.get_contract_specs(ticker_list[0])['ticker_head']]

    if 'datetime5_years_ago' in kwargs.keys():
        datetime5_years_ago = kwargs['datetime5_years_ago']
    else:
        date5_years_ago = cu.doubledate_shift(date_to,5*365)
        datetime5_years_ago = cu.convert_doubledate_2datetime(date5_years_ago)

    if 'datetime2_months_ago' in kwargs.keys():
        datetime2_months_ago = kwargs['datetime2_months_ago']
    else:
        date2_months_ago = cu.doubledate_shift(date_to,60)
        datetime2_months_ago = cu.convert_doubledate_2datetime(date2_months_ago)

    aligned_output = opUtil.get_aligned_futures_data(contract_list=ticker_list,
                                                          tr_dte_list=tr_dte_list,
                                                          aggregation_method=aggregation_method,
                                                          contracts_back=contracts_back,
                                                          date_to=date_to,
                                                          futures_data_dictionary=futures_data_dictionary,
                                                          use_last_as_current=use_last_as_current)
    current_data = aligned_output['current_data']
    aligned_data = aligned_output['aligned_data']

    month_diff_1 = 12*(current_data['c1']['ticker_year']-current_data['c2']['ticker_year'])+(current_data['c1']['ticker_month']-current_data['c2']['ticker_month'])
    month_diff_2 = 12*(current_data['c2']['ticker_year']-current_data['c3']['ticker_year'])+(current_data['c2']['ticker_month']-current_data['c3']['ticker_month'])

    weight_11 = 2*month_diff_2/(month_diff_1+month_diff_1)
    weight_12 = -2
    weight_13 = 2*month_diff_1/(month_diff_1+month_diff_1)

    price_1 = current_data['c1']['close_price']
    price_2 = current_data['c2']['close_price']
    price_3 = current_data['c3']['close_price']

    linear_interp_price2 = (weight_11*aligned_data['c1']['close_price']+weight_13*aligned_data['c3']['close_price'])/2

    butterfly_price = aligned_data['c1']['close_price']-2*aligned_data['c2']['close_price']+aligned_data['c3']['close_price']

    price_ratio = linear_interp_price2/aligned_data['c2']['close_price']

    linear_interp_price2_current = (weight_11*price_1+weight_13*price_3)/2

    price_ratio_current = linear_interp_price2_current/price_2

    q = stats.get_quantile_from_number({'x': price_ratio_current, 'y': price_ratio.values, 'clean_num_obs': max(100, round(3*len(price_ratio.values)/4))})
    qf = stats.get_quantile_from_number({'x': price_ratio_current, 'y': price_ratio.values[-40:], 'clean_num_obs': 30})

    recent_quantile_list = [stats.get_quantile_from_number({'x': x, 'y': price_ratio.values[-40:], 'clean_num_obs': 30}) for x in price_ratio.values[-40:]]

    weight1 = weight_11
    weight2 = weight_12
    weight3 = weight_13

    last5_years_indx = aligned_data['settle_date']>=datetime5_years_ago
    last2_months_indx = aligned_data['settle_date']>=datetime2_months_ago
    data_last5_years = aligned_data[last5_years_indx]

    yield1 = 100*(aligned_data['c1']['close_price']-aligned_data['c2']['close_price'])/aligned_data['c2']['close_price']
    yield2 = 100*(aligned_data['c2']['close_price']-aligned_data['c3']['close_price'])/aligned_data['c3']['close_price']

    yield1_last5_years = yield1[last5_years_indx]
    yield2_last5_years = yield2[last5_years_indx]

    yield1_current = 100*(current_data['c1']['close_price']-current_data['c2']['close_price'])/current_data['c2']['close_price']
    yield2_current = 100*(current_data['c2']['close_price']-current_data['c3']['close_price'])/current_data['c3']['close_price']

    butterfly_price_current = current_data['c1']['close_price']\
                            -2*current_data['c2']['close_price']\
                              +current_data['c3']['close_price']

    yield_regress_output = stats.get_regression_results({'x':yield2, 'y':yield1,'x_current': yield2_current, 'y_current': yield1_current,
                                                         'clean_num_obs': max(100, round(3*len(yield1.values)/4))})
    yield_regress_output_last5_years = stats.get_regression_results({'x':yield2_last5_years, 'y':yield1_last5_years,
                                                                     'x_current': yield2_current, 'y_current': yield1_current,
                                                                     'clean_num_obs': max(100, round(3*len(yield1_last5_years.values)/4))})

    bf_qz_frame_short = pd.DataFrame()
    bf_qz_frame_long = pd.DataFrame()

    if (len(yield1) >= 40)&(len(yield2) >= 40):

        recent_zscore_list = [(yield1[-40+i]-yield_regress_output['alpha']-yield_regress_output['beta']*yield2[-40+i])/yield_regress_output['residualstd'] for i in range(40)]

        bf_qz_frame = pd.DataFrame.from_items([('bf_price', butterfly_price.values[-40:]),
                                           ('q',recent_quantile_list),
                                           ('zscore', recent_zscore_list)])

        bf_qz_frame = np.round(bf_qz_frame, 8)
        bf_qz_frame.drop_duplicates(['bf_price'], take_last=True, inplace=True)

    # return bf_qz_frame

        bf_qz_frame_short = bf_qz_frame[(bf_qz_frame['zscore'] >= 0.6) & (bf_qz_frame['q'] >= 85)]
        bf_qz_frame_long = bf_qz_frame[(bf_qz_frame['zscore'] <= -0.6) & (bf_qz_frame['q'] <= 12)]

    if bf_qz_frame_short.empty:
        short_price_limit = np.NAN
    else:
        short_price_limit = bf_qz_frame_short['bf_price'].min()

    if bf_qz_frame_long.empty:
        long_price_limit = np.NAN
    else:
        long_price_limit = bf_qz_frame_long['bf_price'].max()

    zscore1= yield_regress_output['zscore']
    rsquared1= yield_regress_output['rsquared']

    zscore2= yield_regress_output_last5_years['zscore']
    rsquared2= yield_regress_output_last5_years['rsquared']

    second_spread_weight_1 = yield_regress_output['beta']
    second_spread_weight_2 = yield_regress_output_last5_years['beta']

    butterfly_5_change = data_last5_years['c1']['change_5']\
                             - (1+second_spread_weight_1)*data_last5_years['c2']['change_5']\
                             + second_spread_weight_1*data_last5_years['c3']['change_5']

    butterfly_5_change_current = current_data['c1']['change_5']\
                             - (1+second_spread_weight_1)*current_data['c2']['change_5']\
                             + second_spread_weight_1*current_data['c3']['change_5']

    butterfly_1_change = data_last5_years['c1']['change_1']\
                             - (1+second_spread_weight_1)*data_last5_years['c2']['change_1']\
                             + second_spread_weight_1*data_last5_years['c3']['change_1']

    percentile_vector = stats.get_number_from_quantile(y=butterfly_5_change.values,
                                                       quantile_list=[1, 15, 85, 99],
                                                       clean_num_obs=max(100, round(3*len(butterfly_5_change.values)/4)))

    downside = contract_multiplier*(percentile_vector[0]+percentile_vector[1])/2
    upside = contract_multiplier*(percentile_vector[2]+percentile_vector[3])/2
    recent_5day_pnl = contract_multiplier*butterfly_5_change_current

    residuals = yield1-yield_regress_output['alpha']-yield_regress_output['beta']*yield2

    regime_change_ind = (residuals[last5_years_indx].mean()-residuals.mean())/residuals.std()
    contract_seasonality_ind = (residuals[aligned_data['c1']['ticker_month'] == current_data['c1']['ticker_month']].mean()-residuals.mean())/residuals.std()

    yield1_quantile_list = stats.get_number_from_quantile(y=yield1, quantile_list=[10, 90])
    yield2_quantile_list = stats.get_number_from_quantile(y=yield2, quantile_list=[10, 90])

    noise_ratio = (yield1_quantile_list[1]-yield1_quantile_list[0])/(yield2_quantile_list[1]-yield2_quantile_list[0])

    daily_noise_recent = stats.get_stdev(x=butterfly_1_change.values[-20:], clean_num_obs=15)
    daily_noise_past = stats.get_stdev(x=butterfly_1_change.values, clean_num_obs=max(100, round(3*len(butterfly_1_change.values)/4)))

    recent_vol_ratio = daily_noise_recent/daily_noise_past

    alpha1 = yield_regress_output['alpha']

    residuals_last5_years = residuals[last5_years_indx]
    residuals_last2_months = residuals[last2_months_indx]

    residual_current = yield1_current-alpha1-second_spread_weight_1*yield2_current

    z3 = (residual_current-residuals_last5_years.mean())/residuals.std()
    z4 = (residual_current-residuals_last2_months.mean())/residuals.std()

    yield_change = (alpha1+second_spread_weight_1*yield2_current-yield1_current)/(1+second_spread_weight_1)

    new_yield1 = yield1_current + yield_change
    new_yield2 = yield2_current - yield_change

    price_change1 = 100*((price_2*(new_yield1+100)/100)-price_1)/(200+new_yield1)
    price_change2 = 100*((price_3*(new_yield2+100)/100)-price_2)/(200+new_yield2)

    theo_pnl = contract_multiplier*(2*price_change1-2*second_spread_weight_1*price_change2)

    aligned_data['residuals'] = residuals
    aligned_output['aligned_data'] = aligned_data

    grouped = aligned_data.groupby(aligned_data['c1']['cont_indx'])
    aligned_data['shifted_residuals'] = grouped['residuals'].shift(-5)
    aligned_data['residual_change'] = aligned_data['shifted_residuals']-aligned_data['residuals']

    mean_reversion = stats.get_regression_results({'x':aligned_data['residuals'].values,
                                                         'y':aligned_data['residual_change'].values,
                                                          'clean_num_obs': max(100, round(3*len(yield1.values)/4))})

    theo_spread_move_output = su.calc_theo_spread_move_from_ratio_normalization(ratio_time_series=price_ratio.values[-40:],
                                                  starting_quantile=qf,
                                                  num_price=linear_interp_price2_current,
                                                  den_price=current_data['c2']['close_price'],
                                                  favorable_quantile_move_list=[5, 10, 15, 20, 25])

    theo_pnl_list = [x*contract_multiplier*2  for x in theo_spread_move_output['theo_spread_move_list']]

    return {'aligned_output': aligned_output, 'q': q, 'qf': qf,
            'theo_pnl_list': theo_pnl_list,
            'ratio_target_list': theo_spread_move_output['ratio_target_list'],
            'weight1': weight1, 'weight2': weight2, 'weight3': weight3,
            'zscore1': zscore1, 'rsquared1': rsquared1, 'zscore2': zscore2, 'rsquared2': rsquared2,
            'zscore3': z3, 'zscore4': z4,
            'zscore5': zscore1-regime_change_ind,
            'zscore6': zscore1-contract_seasonality_ind,
            'zscore7': zscore1-regime_change_ind-contract_seasonality_ind,
            'theo_pnl': theo_pnl,
            'regime_change_ind' : regime_change_ind,'contract_seasonality_ind': contract_seasonality_ind,
            'second_spread_weight_1': second_spread_weight_1, 'second_spread_weight_2': second_spread_weight_2,
            'downside': downside, 'upside': upside,
             'yield1': yield1, 'yield2': yield2, 'yield1_current': yield1_current, 'yield2_current': yield2_current,
            'bf_price': butterfly_price_current, 'short_price_limit': short_price_limit,'long_price_limit':long_price_limit,
            'noise_ratio': noise_ratio,
            'alpha1': alpha1, 'alpha2': yield_regress_output_last5_years['alpha'],
            'residual_std1': yield_regress_output['residualstd'], 'residual_std2': yield_regress_output_last5_years['residualstd'],
            'recent_vol_ratio': recent_vol_ratio, 'recent_5day_pnl': recent_5day_pnl,
            'price_1': price_1, 'price_2': price_2, 'price_3': price_3, 'last5_years_indx': last5_years_indx,
            'price_ratio': price_ratio,
            'mean_reversion_rsquared': mean_reversion['rsquared'],
            'mean_reversion_signif' : (mean_reversion['conf_int'][1, :] < 0).all()}