def mc_distribution_to_distribution(mc_distribution, bins=10**4 + 1, to_file=False, file_name=None): mean_mc_price = np.mean(mc_distribution) counts, binEdges = np.histogram(mc_distribution, bins) binCenters = .5 * (binEdges[1:] + binEdges[:-1]) probs = [i / sum(counts) for i in counts] # relative_moves = [binCenter / mean_mc_price for binCenter in binCenters] relative_moves = [binCenter / 1.0 for binCenter in binCenters] pct_moves = [relative_move - 1 for relative_move in relative_moves] distribution_info = { 'State': np.array(range(len(counts))), 'Prob': probs, 'Pct_Move': pct_moves, 'Relative_Price': relative_moves } if to_file is True: distribution_df = distribution_info_to_distribution( distribution_info).distribution_df # distribution_df.to_csv(file_name) to_pickle_and_CSV(distribution_df, file_name) logger.info("Iterations: {:,}".format(sum(counts))) logger.info("Total Prob: {:.2f}".format(sum(probs))) logger.info("Mean Stock Price: {}".format(mean_mc_price)) return distribution_info_to_distribution(distribution_info)
def make_price_table(symbols: 'list', start=dt.datetime(2016, 1, 1), end=dt.datetime.today(), file_name='default'): """Get prices from Yahoo's website for multiple symbols""" query_attempts = {} data_points = {} failed_symbols = [] def get_prices(symbol, start, end): #print("{}: Start: {}, End:{}".format(symbol, start, end)) try: print(symbol) count = 1 while count < 10: print(count) try: df = web.get_data_yahoo(symbol, start, end).set_index('date').round(2) except Exception: count += 1 if count == 9: logger.error("{} failed the query".format(symbol)) failed_symbols.append(symbol) query_attempts[symbol] = count else: logger.info("{}: Attempts: {}".format(symbol, count)) query_attempts[symbol] = count price_table = df.loc[:, ['adjclose']].rename( columns={'adjclose': symbol}) data_points[symbol] = price_table.shape[0] return price_table except Exception: print("Fetch Yahoo prices reached the general Exception clause") return None pool = ThreadPool(4) price_tables = pool.map(lambda stock: get_prices(stock, start, end), symbols) print('Query Attempts: ', query_attempts) print('Data Points: ', data_points) print('Failed Symbols: ', failed_symbols, end='\n') #price_table = merge_dfs_horizontally(price_tables) price_table = outer_join_dfs_horizontally(price_tables) price_table = orient_price_table(price_table) to_pickle_and_CSV(price_table, file_name) return price_table
def fetch_price_table(): if __name__ == '__main__': #symbols = get_sp500_symbols_from_wiki() #symbols = indices symbols = [ 'XBI', 'IBB', 'SPY', 'QQQ', 'SRPT', 'CRBP', 'NBIX', 'BIIB', 'ALNY', 'PFE' ] symbols = AllSymbols file_name = '/Users/paulwainer/Paulthon/DataFiles/StockPrices/sp500_prices_paul' price_table = make_price_table(symbols, start=dt.datetime(2015, 1, 1), end=dt.datetime.today(), file_name=file_name) to_pickle_and_CSV(price_table, file_name)
def create_calls_and_puts_df(self, expiry, display=False): try: print('Trying calls and puts cache') print(self.calls_and_puts_cache) return self.calls_and_puts_cache[self.symbol] except: calls_df = self.create_combined_df(expiry, 'Call', display=display) puts_df = self.create_combined_df(expiry, 'Put', display=display) to_pickle_and_CSV(calls_df, 'calls') to_pickle_and_CSV(puts_df, 'puts') calls_df = calls_df['Delta'] > .45 puts_df = puts_df['Delta'] < -.55 calls_and_puts_df = append_dfs_vertically([calls_df, puts_df]) self.calls_and_puts_cache[self.symbol] = calls_and_puts_df return calls_and_puts_df
def make_price_table(symbols: 'list', start=dt.datetime(2016, 1, 1), end=dt.datetime.today(), file_name='default'): """Get prices from Yahoo's website for multiple symbols""" query_attempts = [] failed_symbols = [] def get_prices(symbol, start, end): #print("{}: Start: {}, End:{}".format(symbol, start, end)) try: print(symbol) count = 1 while count < 10: print(count) try: df = web.get_data_yahoo(symbol, start, end).set_index('date').round(2) except Exception: count += 1 if count == 9: logger.error("{} failed the query".format(symbol)) failed_symbols.append(symbol) query_attempts.append(count) else: logger.info("{}: Attempts: {}".format(symbol, count)) query_attempts.append(count) return df.loc[:, ['adjclose']].rename( columns={'adjclose': symbol}) except Exception: return None pool = ThreadPool(4) price_tables = pool.map(lambda stock: get_prices(stock, start, end), symbols) #shapes = sorted([(df.columns.values.tolist(), df.shape) for df in price_tables], key=lambda x: x[1][0]) #print(shapes) price_table = pd.concat(price_tables, axis=1) to_pickle_and_CSV(price_table, file_name) print(query_attempts, failed_symbols, price_table, end='\n') return price_table
def get_best_betas(stocks, indices, lookback, sd_cutoff_params, percentile_cutoff, to_file=False, file_name='default'): """Returns a DataFrame of best betas based on highest correlation for a set of symbols and ETFs.""" df = get_betas_for_multiple_stocks_and_indices(stocks, indices, lookback, sd_cutoff_params, percentile_cutoff) combinations = list(itertools.product(indices, ['Corr'])) corr_df = df.loc[:, combinations].round(2) df['Best'] = corr_df.idxmax(axis=1) best = [i[0] for i in df.loc[:, 'Best'].tolist()] pairs = list(zip(stocks, best)) info = [] for stock, index in pairs: b = get_betas_multiple_stocks([stock], index, lookback, sd_cutoff_params, percentile_cutoff) b.columns.set_levels(['Best'], level=0, inplace=True) b[('Best', 'Index')] = index info.append(b) df = append_dfs_vertically(info) a = df.loc[:, [('Best', 'Beta'), ('Best', 'Corr'), ('Best', 'Stock_Cutoff'), ('Best', 'Index_Cutoff')]] a = df.loc[:, [('Best', column) for column in [ 'Beta', 'Corr', 'Stock_Cutoff', 'Index_Cutoff', 'Index_Beta_to_SPY', 'Beta_to_SPY' ]]] b = df.loc[:, [('Best', 'Index')]] df = merge_dfs_horizontally([b, a]).sort_values([('Best', 'Index'), ('Best', 'Corr')], ascending=[True, False], inplace=False) print(df.round(2).to_string()) if to_file: to_pickle_and_CSV(df, file_name) return df
def get_sp500_symbols_from_wiki(): """Pull S&P 500 Symbols from Wikipedia (with the ability to add discretionary symbols)""" # Pull symbols from Wikipedia table sp500_symbols = pd.read_html( 'https://en.wikipedia.org/wiki/List_of_S&P_500_companies' )[0][0][1:].reset_index(drop=True).tolist() # Add Discretionary Stock and Indices discretionary_stocks = HealthcareSymbols discretionary_indices = [ 'SPY', 'IWM', 'QQQ', 'IBB', 'XBI', 'XLP', 'XRT' ] + ['XLV', 'XLF', 'XLE', 'AMLP', 'VFH', 'GDX', 'XLU'] discretionary_symbols = discretionary_stocks + discretionary_indices discretionary_symbols = ['SPY'] # All Symbols for Output all_symbols = sorted(list(set(sp500_symbols + discretionary_symbols))) all_symbols = pd.Series(all_symbols).sort_values().reset_index(drop=True) to_pickle_and_CSV(all_symbols, 'current_symbols') return all_symbols.values.tolist()
def fetch_price_table(): #symbols = indices #symbols = ['XBI', 'IBB', 'SPY', 'QQQ', 'SRPT', 'CRBP', 'NBIX', 'BIIB', 'ALNY', 'PFE'] #symbols = ['AAAP', 'XBI', 'NBIX'] #symbols = ['MCRB', 'JNCE', 'CBRE'] #symbols = ['XBI', 'IBB'] symbols = all_symbols #from data.finance import PriceTable as previous_price_table #symbols = ['XBI'] file_name = '/Users/paulwainer/Paulthon/DataFiles/StockPrices/stock_prices' price_table = make_price_table(symbols, start=dt.datetime(2014, 1, 1), end=dt.datetime.today(), file_name=file_name) #price_table = outer_join_dfs_horizontally([previous_price_table, price_table]) to_pickle_and_CSV(price_table, file_name) return price_table
def create_combined_df(self, expiry, option_type='Call', display=False): base_df = self.create_base_df(expiry, option_type, display=display) print(base_df.to_string()) to_pickle_and_CSV(base_df, 'base') greeks_df = self.create_greeks_df(expiry, option_type, display=display) print(greeks_df.to_string()) to_pickle_and_CSV(greeks_df, 'greeks') combined_df = merge_dfs_horizontally([base_df, greeks_df]) print(combined_df.to_string()) to_pickle_and_CSV(combined_df, 'combined') return combined_df
def get_betas_over_iterable(stock, index, lookback=252, stock_ceiling_params=DEFAULT_STOCK_CEILING_PARAMS, index_floor_params=DEFAULT_INDEX_FLOOR_PARAMS, best_fit_param=BEST_FIT_PERCENTILE, save_to_file=False): stock_ceiling_params = [stock_ceiling_params] index_floor_params = [index_floor_params] best_fit_param = [best_fit_param] param_combos = list( itertools.product(stock, index, lookback, stock_ceiling_params, index_floor_params, best_fit_param)) counter = range(len(param_combos)) stocks = [i[0] for i in param_combos] indices = [i[1] for i in param_combos] lookbacks = [i[2] for i in param_combos] #scrub_params_all = [get_scrub_params(stock, index, lookback, stock_ceiling_params, index_floor_params, best_fit_param) for params in param_combos] scrub_params_all = [get_scrub_params(*params) for params in param_combos] scrub_param_combos = list(zip(stocks, indices, lookbacks, scrub_params_all)) betas = [ Beta(stocks[i], indices[i], lookbacks[i], scrub_params_all[i]) for i in counter ] # OLS Info beta_values = [beta.beta_value for beta in betas] corrs = [beta.corr for beta in betas] # Scrubbing Info stock_cutoffs = [ scrub_params.stock_cutoff for scrub_params in scrub_params_all ] index_cutoffs = [ scrub_params.index_cutoff for scrub_params in scrub_params_all ] percentile_cutoffs = [ scrub_params.percentile_cutoff for scrub_params in scrub_params_all ] percent_days_in_calc = [beta.percent_days_in_calculation for beta in betas] # Beta to SPY Info index_betas_to_SPY = [get_ETF_beta_to_SPY(index) for index in indices] betas_to_SPY = [index_betas_to_SPY[i] * beta_values[i] for i in counter] # Returns Info stock_returns = [ get_total_return(stocks[i], lookbacks[i]) for i in counter ] index_returns = [ get_total_return(indices[i], lookbacks[i]) for i in counter ] idio_returns = [ (1 + stock_returns[i]) / (1 + index_returns[i] * beta_values[i]) - 1 for i in counter ] # Unadjusted OLS Info unadjusted_betas = [ Beta(stocks[i], indices[i], lookbacks[i], ScrubParams(False, False, False)) for i in counter ] unadjusted_beta_values = [beta.beta_value for beta in unadjusted_betas] unadjusted_corrs = [beta.corr for beta in unadjusted_betas] # Prepare Information for the DataFrame in an Ordered Dictionary table_info_dict = OrderedDict([ #Unadjusted OLS Info ('Unadj. Beta', unadjusted_beta_values), ('Unadj. Corr', unadjusted_corrs), # OLS Info ('Beta', beta_values), ('Corr', corrs), # Beta to SPY Info ('Index_Beta_to_SPY', index_betas_to_SPY), ('Beta_to_SPY', betas_to_SPY), # Returns Info ('Stock_Return', stock_returns), ('Index_Return', index_returns), ('Idio_return', idio_returns), # Scrubbing Info ('Stock_Cutoff', stock_cutoffs), ('Index_Cutoff', index_cutoffs), ('Percentile_Cutoff', percentile_cutoffs), ('Percent_Days', percent_days_in_calc) ]) parameters = [stock, index, lookback] for parameter in parameters: if len(parameter) > 1: iterable = parameter else: pass if iterable == stock: not_iterable = index[0] elif iterable == index: not_iterable = stock[0] elif iterable == lookback: not_iterable = 'Stock: {}, Index {}'.format(stock[0], index[0]) else: raise ValueError # Create DataFrame column_labels = table_info_dict.keys() table_info = list(zip(*table_info_dict.values())) #index_row = pd.Index(indices, name = 'Index') index_row = pd.Index(iterable, name='Index') iterables_columns = [[not_iterable], column_labels] #iterables_columns = [[stock], column_labels] index_column = pd.MultiIndex.from_product(iterables_columns, names=['Stock', 'Beta_Info']) df = pd.DataFrame(table_info, index=index_row, columns=index_column) if save_to_file: to_pickle_and_CSV(df, file_name) return df
def get_betas_multiple_stocks( stocks: 'iterable of stocks', index: 'one index', lookback=252, stock_ceiling_params=DEFAULT_STOCK_CEILING_PARAMS, index_floor_params=DEFAULT_INDEX_FLOOR_PARAMS, best_fit_param=BEST_FIT_PERCENTILE, save_to_file=False): scrub_params_all = [ get_scrub_params(stock, index, lookback, stock_ceiling_params, index_floor_params, best_fit_param) for stock in stocks ] #betas = [create_beta_object_from_scrub_params(stocks[i], index, lookback, scrub_params_all[i]) for i in range(len(stocks))] betas = [ Beta(stocks[i], index, lookback, scrub_params_all[i]) for i in range(len(stocks)) ] beta_values = [beta.beta_value for beta in betas] # Returns Info returns = [get_total_return(stock, lookback) for stock in stocks] index_returns = [ get_total_return(index, lookback) for _ in range(len(stocks)) ] idio_returns = [ (1 + returns[i]) / (1 + index_returns[i] * beta_values[i]) - 1 for i in range(len(stocks)) ] # Beta to SPY Info index_betas_to_SPY = [get_ETF_beta_to_SPY(stock) for stock in stocks] betas_to_SPY = [ index_betas_to_SPY[i] * beta_values[i] for i in range(len(stocks)) ] # Prepare Information for the DataFrame in an Ordered Dictionary info = OrderedDict([ # Index Symbol ('Index', [index for _ in range(len(stocks))]), # OLS Info ('Beta', beta_values), ('Corr', [beta.corr for beta in betas]), # Beta to SPY Info ('Index_Beta_to_SPY', index_betas_to_SPY), ('Beta_to_SPY', betas_to_SPY), # Returns Info ('Stock_Return', returns), ('Index_Return', index_returns), ('Idio_Return', idio_returns), # Scrubbing Info ('Stock_Cutoff', [scrub_params.stock_cutoff for scrub_params in scrub_params_all]), ('Index_Cutoff', [scrub_params.index_cutoff for scrub_params in scrub_params_all]), ('Percentile_Cutoff', [scrub_params.percentile_cutoff for scrub_params in scrub_params_all]), ('Percent_Days', [beta.percent_days_in_calculation for beta in betas]) ]) # Create DataFrame column_labels = info.keys() table_info = list(zip(info.values())) index_row = pd.Index(stocks, name='Stock') #iterables_columns = [[index], column_labels] iterables_columns = [['Index'], column_labels] index_column = pd.MultiIndex.from_product(iterables_columns, names=['Index', 'Beta_Info']) df = pd.DataFrame(table_info, index=index_row, columns=index_column) if save_to_file: to_pickle_and_CSV(df, file_name) return df
def get_betas_multiple_indices( stock, indices: 'iterable of indices', lookback=252, stock_ceiling_params=DEFAULT_STOCK_CEILING_PARAMS, index_floor_params=DEFAULT_INDEX_FLOOR_PARAMS, best_fit_param=BEST_FIT_PERCENTILE, save_to_file=False): scrub_params_all = [ get_scrub_params(stock, index, lookback, stock_ceiling_params, index_floor_params, best_fit_param) for index in indices ] betas = [ Beta(stock, indices[i], lookback, scrub_params_all[i]) for i in range(len(indices)) ] # OLS Info beta_values = [beta.beta_value for beta in betas] corrs = [beta.corr for beta in betas] # Scrubbing Info stock_cutoffs = [ scrub_params.stock_cutoff for scrub_params in scrub_params_all ] index_cutoffs = [ scrub_params.index_cutoff for scrub_params in scrub_params_all ] percentile_cutoffs = [ scrub_params.percentile_cutoff for scrub_params in scrub_params_all ] percent_days_in_calc = [beta.percent_days_in_calculation for beta in betas] # Beta to SPY Info index_betas_to_SPY = [get_ETF_beta_to_SPY(index) for index in indices] betas_to_SPY = [ index_betas_to_SPY[i] * beta_values[i] for i in range(len(indices)) ] # Returns Info returns = [get_total_return(stock, lookback) for _ in indices] index_returns = [get_total_return(index, lookback) for index in indices] idio_returns = [ (1 + returns[i]) / (1 + index_returns[i] * beta_values[i]) - 1 for i in range(len(indices)) ] # Unadjusted OLS Info unadjusted_betas = [ Beta(stock, indices[i], lookback, ScrubParams(False, False, False)) for i in range(len(indices)) ] unadjusted_beta_values = [beta.beta_value for beta in unadjusted_betas] unadjusted_corrs = [beta.corr for beta in unadjusted_betas] # Prepare Information for the DataFrame in an Ordered Dictionary table_info_dict = OrderedDict([ #Unadjusted OLS Info ('Unadj. Beta', unadjusted_beta_values), ('Unadj. Corr', unadjusted_corrs), # OLS Info ('Beta', beta_values), ('Corr', corrs), # Beta to SPY Info ('Index_Beta_to_SPY', index_betas_to_SPY), ('Beta_to_SPY', betas_to_SPY), # Returns Info ('Stock_Return', returns), ('Index_Return', index_returns), ('Idio_return', idio_returns), # Scrubbing Info ('Stock_Cutoff', stock_cutoffs), ('Index_Cutoff', index_cutoffs), ('Percentile_Cutoff', percentile_cutoffs), ('Percent_Days', percent_days_in_calc) ]) # Create DataFrame column_labels = table_info_dict.keys() table_info = list(zip(*table_info_dict.values())) index_row = pd.Index(indices, name='Index') iterables_columns = [[stock], column_labels] index_column = pd.MultiIndex.from_product(iterables_columns, names=['Stock', 'Beta_Info']) df = pd.DataFrame(table_info, index=index_row, columns=index_column) if save_to_file: to_pickle_and_CSV(df, file_name) return df
def get_betas_multiple_stocks(stocks, index, lookback, sd_cutoff_params=sd_cutoff_params, percentile_cutoff=.85, to_file=False, file_name='default'): # Log #logger.info(stocks, index, lookback) # Calculate ScrubParams scrub_params_all = [ get_scrub_params_from_sd_cutoff_params(stock, index, lookback, sd_cutoff_params, percentile_cutoff) for stock in stocks ] # Establish Table Info betas = [ Beta_StepTwo(stock, index, lookback, scrub_params) for stock, scrub_params in zip(stocks, scrub_params_all) ] beta_values = [beta.beta_value for beta in betas] corrs = [beta.corr for beta in betas] stock_cutoffs = [ scrub_params.stock_cutoff for scrub_params in scrub_params_all ] index_cutoffs = [ scrub_params.index_cutoff for scrub_params in scrub_params_all ] percentile_cutoffs = [percentile_cutoff for stock in range(len(stocks))] index_betas_to_SPY = [ get_ETF_beta_to_SPY(index) for stock in range(len(stocks)) ] betas_to_SPY = [ get_ETF_beta_to_SPY(index) * beta_values[i] for i in range(len(stocks)) ] returns = [get_total_return(stock, lookback) for stock in stocks] index_returns = [get_total_return(index, lookback) for stock in stocks] idio_returns = [ (1 + returns[i]) / (1 + index_returns[i] * beta_values[i]) - 1 for i in range(len(stocks)) ] percent_days_in_calc = [beta.percent_days_in_calculation for beta in betas] # Create DataFrame table_info = list( zip(beta_values, corrs, stock_cutoffs, index_cutoffs, percentile_cutoffs, index_betas_to_SPY, betas_to_SPY, returns, index_returns, idio_returns, percent_days_in_calc)) InfoLabels = [ 'Beta', 'Corr', 'Stock_Cutoff', 'Index_Cutoff', 'Percentile_Cutoff', 'Index_Beta_to_SPY', 'Beta_to_SPY', 'Return', 'Index_Return', 'Idio_Return', 'Percent_Days' ] index_row = pd.Index(stocks, name='Stock') iterables_columns = [[index], InfoLabels] index_column = pd.MultiIndex.from_product(iterables_columns, names=['Index', 'Beta_Info']) df = pd.DataFrame(table_info, index=index_row, columns=index_column) if to_file: to_pickle_and_CSV(df, file_name) return df