Python to_pickle_and_CSV 예제들, utility.general.to_pickle_and_CSV Python 예제들

예제 #1

0

파일 보기

def mc_distribution_to_distribution(mc_distribution,
                                    bins=10**4 + 1,
                                    to_file=False,
                                    file_name=None):
    mean_mc_price = np.mean(mc_distribution)

    counts, binEdges = np.histogram(mc_distribution, bins)
    binCenters = .5 * (binEdges[1:] + binEdges[:-1])

    probs = [i / sum(counts) for i in counts]
    # relative_moves = [binCenter / mean_mc_price for binCenter in binCenters]
    relative_moves = [binCenter / 1.0 for binCenter in binCenters]
    pct_moves = [relative_move - 1 for relative_move in relative_moves]

    distribution_info = {
        'State': np.array(range(len(counts))),
        'Prob': probs,
        'Pct_Move': pct_moves,
        'Relative_Price': relative_moves
    }

    if to_file is True:
        distribution_df = distribution_info_to_distribution(
            distribution_info).distribution_df
        # distribution_df.to_csv(file_name)
        to_pickle_and_CSV(distribution_df, file_name)

    logger.info("Iterations: {:,}".format(sum(counts)))
    logger.info("Total Prob: {:.2f}".format(sum(probs)))
    logger.info("Mean Stock Price: {}".format(mean_mc_price))

    return distribution_info_to_distribution(distribution_info)

예제 #2

0

파일 보기

def make_price_table(symbols: 'list',
                     start=dt.datetime(2016, 1, 1),
                     end=dt.datetime.today(),
                     file_name='default'):
    """Get prices from Yahoo's website for multiple symbols"""
    query_attempts = {}
    data_points = {}
    failed_symbols = []

    def get_prices(symbol, start, end):
        #print("{}: Start: {}, End:{}".format(symbol, start, end))
        try:
            print(symbol)
            count = 1
            while count < 10:
                print(count)
                try:
                    df = web.get_data_yahoo(symbol, start,
                                            end).set_index('date').round(2)
                except Exception:
                    count += 1
                    if count == 9:
                        logger.error("{} failed the query".format(symbol))
                        failed_symbols.append(symbol)
                        query_attempts[symbol] = count
                else:
                    logger.info("{}: Attempts: {}".format(symbol, count))
                    query_attempts[symbol] = count

                    price_table = df.loc[:, ['adjclose']].rename(
                        columns={'adjclose': symbol})

                    data_points[symbol] = price_table.shape[0]
                    return price_table
        except Exception:
            print("Fetch Yahoo prices reached the general Exception clause")
            return None

    pool = ThreadPool(4)
    price_tables = pool.map(lambda stock: get_prices(stock, start, end),
                            symbols)

    print('Query Attempts: ', query_attempts)
    print('Data Points: ', data_points)
    print('Failed Symbols: ', failed_symbols, end='\n')
    #price_table = merge_dfs_horizontally(price_tables)
    price_table = outer_join_dfs_horizontally(price_tables)
    price_table = orient_price_table(price_table)

    to_pickle_and_CSV(price_table, file_name)
    return price_table

예제 #3

0

파일 보기

def fetch_price_table():
    if __name__ == '__main__':
        #symbols = get_sp500_symbols_from_wiki()
        #symbols = indices
        symbols = [
            'XBI', 'IBB', 'SPY', 'QQQ', 'SRPT', 'CRBP', 'NBIX', 'BIIB', 'ALNY',
            'PFE'
        ]
        symbols = AllSymbols
        file_name = '/Users/paulwainer/Paulthon/DataFiles/StockPrices/sp500_prices_paul'
        price_table = make_price_table(symbols,
                                       start=dt.datetime(2015, 1, 1),
                                       end=dt.datetime.today(),
                                       file_name=file_name)

        to_pickle_and_CSV(price_table, file_name)

예제 #4

0

파일 보기

파일: barchart_spider.py 프로젝트: easternstarlinz/Paulthon

    def create_calls_and_puts_df(self, expiry, display=False):
        try:
            print('Trying calls and puts cache')
            print(self.calls_and_puts_cache)
            return self.calls_and_puts_cache[self.symbol]

        except:
            calls_df = self.create_combined_df(expiry, 'Call', display=display)
            puts_df = self.create_combined_df(expiry, 'Put', display=display)

            to_pickle_and_CSV(calls_df, 'calls')
            to_pickle_and_CSV(puts_df, 'puts')

            calls_df = calls_df['Delta'] > .45
            puts_df = puts_df['Delta'] < -.55

            calls_and_puts_df = append_dfs_vertically([calls_df, puts_df])
            self.calls_and_puts_cache[self.symbol] = calls_and_puts_df
            return calls_and_puts_df

예제 #5

0

파일 보기

def make_price_table(symbols: 'list',
                     start=dt.datetime(2016, 1, 1),
                     end=dt.datetime.today(),
                     file_name='default'):
    """Get prices from Yahoo's website for multiple symbols"""
    query_attempts = []
    failed_symbols = []

    def get_prices(symbol, start, end):
        #print("{}: Start: {}, End:{}".format(symbol, start, end))
        try:
            print(symbol)
            count = 1
            while count < 10:
                print(count)
                try:
                    df = web.get_data_yahoo(symbol, start,
                                            end).set_index('date').round(2)
                except Exception:
                    count += 1
                    if count == 9:
                        logger.error("{} failed the query".format(symbol))
                        failed_symbols.append(symbol)
                        query_attempts.append(count)
                else:
                    logger.info("{}: Attempts: {}".format(symbol, count))
                    query_attempts.append(count)
                    return df.loc[:, ['adjclose']].rename(
                        columns={'adjclose': symbol})
        except Exception:
            return None

    pool = ThreadPool(4)
    price_tables = pool.map(lambda stock: get_prices(stock, start, end),
                            symbols)

    #shapes = sorted([(df.columns.values.tolist(), df.shape) for df in price_tables], key=lambda x: x[1][0])
    #print(shapes)
    price_table = pd.concat(price_tables, axis=1)

    to_pickle_and_CSV(price_table, file_name)
    print(query_attempts, failed_symbols, price_table, end='\n')
    return price_table

예제 #6

0

파일 보기

파일: get_best_betas_2.py 프로젝트: easternstarlinz/Paulthon

def get_best_betas(stocks,
                   indices,
                   lookback,
                   sd_cutoff_params,
                   percentile_cutoff,
                   to_file=False,
                   file_name='default'):
    """Returns a DataFrame of best betas based on highest correlation for a set of symbols and ETFs."""

    df = get_betas_for_multiple_stocks_and_indices(stocks, indices, lookback,
                                                   sd_cutoff_params,
                                                   percentile_cutoff)
    combinations = list(itertools.product(indices, ['Corr']))
    corr_df = df.loc[:, combinations].round(2)
    df['Best'] = corr_df.idxmax(axis=1)
    best = [i[0] for i in df.loc[:, 'Best'].tolist()]

    pairs = list(zip(stocks, best))
    info = []
    for stock, index in pairs:
        b = get_betas_multiple_stocks([stock], index, lookback,
                                      sd_cutoff_params, percentile_cutoff)
        b.columns.set_levels(['Best'], level=0, inplace=True)
        b[('Best', 'Index')] = index
        info.append(b)

    df = append_dfs_vertically(info)
    a = df.loc[:, [('Best', 'Beta'), ('Best', 'Corr'),
                   ('Best', 'Stock_Cutoff'), ('Best', 'Index_Cutoff')]]
    a = df.loc[:, [('Best', column) for column in [
        'Beta', 'Corr', 'Stock_Cutoff', 'Index_Cutoff', 'Index_Beta_to_SPY',
        'Beta_to_SPY'
    ]]]
    b = df.loc[:, [('Best', 'Index')]]
    df = merge_dfs_horizontally([b, a]).sort_values([('Best', 'Index'),
                                                     ('Best', 'Corr')],
                                                    ascending=[True, False],
                                                    inplace=False)
    print(df.round(2).to_string())

    if to_file:
        to_pickle_and_CSV(df, file_name)
    return df

예제 #7

0

파일 보기

def get_sp500_symbols_from_wiki():
    """Pull S&P 500 Symbols from Wikipedia (with the ability to add discretionary symbols)"""
    # Pull symbols from Wikipedia table
    sp500_symbols = pd.read_html(
        'https://en.wikipedia.org/wiki/List_of_S&P_500_companies'
    )[0][0][1:].reset_index(drop=True).tolist()

    # Add Discretionary Stock and Indices
    discretionary_stocks = HealthcareSymbols
    discretionary_indices = [
        'SPY', 'IWM', 'QQQ', 'IBB', 'XBI', 'XLP', 'XRT'
    ] + ['XLV', 'XLF', 'XLE', 'AMLP', 'VFH', 'GDX', 'XLU']
    discretionary_symbols = discretionary_stocks + discretionary_indices
    discretionary_symbols = ['SPY']

    # All Symbols for Output
    all_symbols = sorted(list(set(sp500_symbols + discretionary_symbols)))
    all_symbols = pd.Series(all_symbols).sort_values().reset_index(drop=True)
    to_pickle_and_CSV(all_symbols, 'current_symbols')
    return all_symbols.values.tolist()

예제 #8

0

파일 보기

def fetch_price_table():
    #symbols = indices
    #symbols = ['XBI', 'IBB', 'SPY', 'QQQ', 'SRPT', 'CRBP', 'NBIX', 'BIIB', 'ALNY', 'PFE']
    #symbols = ['AAAP', 'XBI', 'NBIX']
    #symbols = ['MCRB', 'JNCE', 'CBRE']
    #symbols = ['XBI', 'IBB']
    symbols = all_symbols

    #from data.finance import PriceTable as previous_price_table
    #symbols = ['XBI']

    file_name = '/Users/paulwainer/Paulthon/DataFiles/StockPrices/stock_prices'

    price_table = make_price_table(symbols,
                                   start=dt.datetime(2014, 1, 1),
                                   end=dt.datetime.today(),
                                   file_name=file_name)

    #price_table = outer_join_dfs_horizontally([previous_price_table, price_table])

    to_pickle_and_CSV(price_table, file_name)
    return price_table

예제 #9

0

파일 보기

파일: barchart_spider.py 프로젝트: easternstarlinz/Paulthon

    def create_combined_df(self, expiry, option_type='Call', display=False):
        base_df = self.create_base_df(expiry, option_type, display=display)
        print(base_df.to_string())
        to_pickle_and_CSV(base_df, 'base')

        greeks_df = self.create_greeks_df(expiry, option_type, display=display)
        print(greeks_df.to_string())
        to_pickle_and_CSV(greeks_df, 'greeks')

        combined_df = merge_dfs_horizontally([base_df, greeks_df])
        print(combined_df.to_string())
        to_pickle_and_CSV(combined_df, 'combined')
        return combined_df

예제 #10

0

파일 보기

파일: get_best_betas_2.py 프로젝트: easternstarlinz/Paulthon

def get_betas_over_iterable(stock,
                            index,
                            lookback=252,
                            stock_ceiling_params=DEFAULT_STOCK_CEILING_PARAMS,
                            index_floor_params=DEFAULT_INDEX_FLOOR_PARAMS,
                            best_fit_param=BEST_FIT_PERCENTILE,
                            save_to_file=False):

    stock_ceiling_params = [stock_ceiling_params]
    index_floor_params = [index_floor_params]
    best_fit_param = [best_fit_param]

    param_combos = list(
        itertools.product(stock, index, lookback, stock_ceiling_params,
                          index_floor_params, best_fit_param))

    counter = range(len(param_combos))

    stocks = [i[0] for i in param_combos]
    indices = [i[1] for i in param_combos]
    lookbacks = [i[2] for i in param_combos]
    #scrub_params_all = [get_scrub_params(stock, index, lookback, stock_ceiling_params, index_floor_params, best_fit_param) for params in param_combos]
    scrub_params_all = [get_scrub_params(*params) for params in param_combos]

    scrub_param_combos = list(zip(stocks, indices, lookbacks,
                                  scrub_params_all))

    betas = [
        Beta(stocks[i], indices[i], lookbacks[i], scrub_params_all[i])
        for i in counter
    ]

    # OLS Info
    beta_values = [beta.beta_value for beta in betas]
    corrs = [beta.corr for beta in betas]

    # Scrubbing Info
    stock_cutoffs = [
        scrub_params.stock_cutoff for scrub_params in scrub_params_all
    ]
    index_cutoffs = [
        scrub_params.index_cutoff for scrub_params in scrub_params_all
    ]
    percentile_cutoffs = [
        scrub_params.percentile_cutoff for scrub_params in scrub_params_all
    ]
    percent_days_in_calc = [beta.percent_days_in_calculation for beta in betas]

    # Beta to SPY Info
    index_betas_to_SPY = [get_ETF_beta_to_SPY(index) for index in indices]
    betas_to_SPY = [index_betas_to_SPY[i] * beta_values[i] for i in counter]

    # Returns Info
    stock_returns = [
        get_total_return(stocks[i], lookbacks[i]) for i in counter
    ]
    index_returns = [
        get_total_return(indices[i], lookbacks[i]) for i in counter
    ]
    idio_returns = [
        (1 + stock_returns[i]) / (1 + index_returns[i] * beta_values[i]) - 1
        for i in counter
    ]

    # Unadjusted OLS Info
    unadjusted_betas = [
        Beta(stocks[i], indices[i], lookbacks[i],
             ScrubParams(False, False, False)) for i in counter
    ]
    unadjusted_beta_values = [beta.beta_value for beta in unadjusted_betas]
    unadjusted_corrs = [beta.corr for beta in unadjusted_betas]

    # Prepare Information for the DataFrame in an Ordered Dictionary
    table_info_dict = OrderedDict([
        #Unadjusted OLS Info
        ('Unadj. Beta', unadjusted_beta_values),
        ('Unadj. Corr', unadjusted_corrs),

        # OLS Info
        ('Beta', beta_values),
        ('Corr', corrs),

        # Beta to SPY Info
        ('Index_Beta_to_SPY', index_betas_to_SPY),
        ('Beta_to_SPY', betas_to_SPY),

        # Returns Info
        ('Stock_Return', stock_returns),
        ('Index_Return', index_returns),
        ('Idio_return', idio_returns),

        # Scrubbing Info
        ('Stock_Cutoff', stock_cutoffs),
        ('Index_Cutoff', index_cutoffs),
        ('Percentile_Cutoff', percentile_cutoffs),
        ('Percent_Days', percent_days_in_calc)
    ])

    parameters = [stock, index, lookback]
    for parameter in parameters:
        if len(parameter) > 1:
            iterable = parameter
        else:
            pass

    if iterable == stock:
        not_iterable = index[0]
    elif iterable == index:
        not_iterable = stock[0]
    elif iterable == lookback:
        not_iterable = 'Stock: {}, Index {}'.format(stock[0], index[0])
    else:
        raise ValueError

    # Create DataFrame
    column_labels = table_info_dict.keys()
    table_info = list(zip(*table_info_dict.values()))

    #index_row = pd.Index(indices, name = 'Index')
    index_row = pd.Index(iterable, name='Index')
    iterables_columns = [[not_iterable], column_labels]
    #iterables_columns = [[stock], column_labels]
    index_column = pd.MultiIndex.from_product(iterables_columns,
                                              names=['Stock', 'Beta_Info'])
    df = pd.DataFrame(table_info, index=index_row, columns=index_column)

    if save_to_file:
        to_pickle_and_CSV(df, file_name)

    return df

예제 #11

0

파일 보기

파일: get_best_betas_2.py 프로젝트: easternstarlinz/Paulthon

def get_betas_multiple_stocks(
        stocks: 'iterable of stocks',
        index: 'one index',
        lookback=252,
        stock_ceiling_params=DEFAULT_STOCK_CEILING_PARAMS,
        index_floor_params=DEFAULT_INDEX_FLOOR_PARAMS,
        best_fit_param=BEST_FIT_PERCENTILE,
        save_to_file=False):

    scrub_params_all = [
        get_scrub_params(stock, index, lookback, stock_ceiling_params,
                         index_floor_params, best_fit_param)
        for stock in stocks
    ]

    #betas = [create_beta_object_from_scrub_params(stocks[i], index, lookback, scrub_params_all[i]) for i in range(len(stocks))]
    betas = [
        Beta(stocks[i], index, lookback, scrub_params_all[i])
        for i in range(len(stocks))
    ]
    beta_values = [beta.beta_value for beta in betas]

    # Returns Info
    returns = [get_total_return(stock, lookback) for stock in stocks]
    index_returns = [
        get_total_return(index, lookback) for _ in range(len(stocks))
    ]
    idio_returns = [
        (1 + returns[i]) / (1 + index_returns[i] * beta_values[i]) - 1
        for i in range(len(stocks))
    ]

    # Beta to SPY Info
    index_betas_to_SPY = [get_ETF_beta_to_SPY(stock) for stock in stocks]
    betas_to_SPY = [
        index_betas_to_SPY[i] * beta_values[i] for i in range(len(stocks))
    ]

    # Prepare Information for the DataFrame in an Ordered Dictionary
    info = OrderedDict([
        # Index Symbol
        ('Index', [index for _ in range(len(stocks))]),

        # OLS Info
        ('Beta', beta_values),
        ('Corr', [beta.corr for beta in betas]),

        # Beta to SPY Info
        ('Index_Beta_to_SPY', index_betas_to_SPY),
        ('Beta_to_SPY', betas_to_SPY),

        # Returns Info
        ('Stock_Return', returns),
        ('Index_Return', index_returns),
        ('Idio_Return', idio_returns),

        # Scrubbing Info
        ('Stock_Cutoff',
         [scrub_params.stock_cutoff for scrub_params in scrub_params_all]),
        ('Index_Cutoff',
         [scrub_params.index_cutoff for scrub_params in scrub_params_all]),
        ('Percentile_Cutoff',
         [scrub_params.percentile_cutoff
          for scrub_params in scrub_params_all]),
        ('Percent_Days', [beta.percent_days_in_calculation for beta in betas])
    ])

    # Create DataFrame
    column_labels = info.keys()
    table_info = list(zip(info.values()))

    index_row = pd.Index(stocks, name='Stock')
    #iterables_columns = [[index], column_labels]
    iterables_columns = [['Index'], column_labels]
    index_column = pd.MultiIndex.from_product(iterables_columns,
                                              names=['Index', 'Beta_Info'])
    df = pd.DataFrame(table_info, index=index_row, columns=index_column)

    if save_to_file:
        to_pickle_and_CSV(df, file_name)

    return df

예제 #12

0

파일 보기

파일: get_best_betas_2.py 프로젝트: easternstarlinz/Paulthon

def get_betas_multiple_indices(
        stock,
        indices: 'iterable of indices',
        lookback=252,
        stock_ceiling_params=DEFAULT_STOCK_CEILING_PARAMS,
        index_floor_params=DEFAULT_INDEX_FLOOR_PARAMS,
        best_fit_param=BEST_FIT_PERCENTILE,
        save_to_file=False):

    scrub_params_all = [
        get_scrub_params(stock, index, lookback, stock_ceiling_params,
                         index_floor_params, best_fit_param)
        for index in indices
    ]

    betas = [
        Beta(stock, indices[i], lookback, scrub_params_all[i])
        for i in range(len(indices))
    ]

    # OLS Info
    beta_values = [beta.beta_value for beta in betas]
    corrs = [beta.corr for beta in betas]

    # Scrubbing Info
    stock_cutoffs = [
        scrub_params.stock_cutoff for scrub_params in scrub_params_all
    ]
    index_cutoffs = [
        scrub_params.index_cutoff for scrub_params in scrub_params_all
    ]
    percentile_cutoffs = [
        scrub_params.percentile_cutoff for scrub_params in scrub_params_all
    ]
    percent_days_in_calc = [beta.percent_days_in_calculation for beta in betas]

    # Beta to SPY Info
    index_betas_to_SPY = [get_ETF_beta_to_SPY(index) for index in indices]
    betas_to_SPY = [
        index_betas_to_SPY[i] * beta_values[i] for i in range(len(indices))
    ]

    # Returns Info
    returns = [get_total_return(stock, lookback) for _ in indices]
    index_returns = [get_total_return(index, lookback) for index in indices]
    idio_returns = [
        (1 + returns[i]) / (1 + index_returns[i] * beta_values[i]) - 1
        for i in range(len(indices))
    ]

    # Unadjusted OLS Info
    unadjusted_betas = [
        Beta(stock, indices[i], lookback, ScrubParams(False, False, False))
        for i in range(len(indices))
    ]
    unadjusted_beta_values = [beta.beta_value for beta in unadjusted_betas]
    unadjusted_corrs = [beta.corr for beta in unadjusted_betas]

    # Prepare Information for the DataFrame in an Ordered Dictionary
    table_info_dict = OrderedDict([
        #Unadjusted OLS Info
        ('Unadj. Beta', unadjusted_beta_values),
        ('Unadj. Corr', unadjusted_corrs),

        # OLS Info
        ('Beta', beta_values),
        ('Corr', corrs),

        # Beta to SPY Info
        ('Index_Beta_to_SPY', index_betas_to_SPY),
        ('Beta_to_SPY', betas_to_SPY),

        # Returns Info
        ('Stock_Return', returns),
        ('Index_Return', index_returns),
        ('Idio_return', idio_returns),

        # Scrubbing Info
        ('Stock_Cutoff', stock_cutoffs),
        ('Index_Cutoff', index_cutoffs),
        ('Percentile_Cutoff', percentile_cutoffs),
        ('Percent_Days', percent_days_in_calc)
    ])
    # Create DataFrame
    column_labels = table_info_dict.keys()
    table_info = list(zip(*table_info_dict.values()))

    index_row = pd.Index(indices, name='Index')
    iterables_columns = [[stock], column_labels]
    index_column = pd.MultiIndex.from_product(iterables_columns,
                                              names=['Stock', 'Beta_Info'])
    df = pd.DataFrame(table_info, index=index_row, columns=index_column)

    if save_to_file:
        to_pickle_and_CSV(df, file_name)

    return df

예제 #13

0

파일 보기

def get_betas_multiple_stocks(stocks,
                              index,
                              lookback,
                              sd_cutoff_params=sd_cutoff_params,
                              percentile_cutoff=.85,
                              to_file=False,
                              file_name='default'):

    # Log
    #logger.info(stocks, index, lookback)

    # Calculate ScrubParams
    scrub_params_all = [
        get_scrub_params_from_sd_cutoff_params(stock, index, lookback,
                                               sd_cutoff_params,
                                               percentile_cutoff)
        for stock in stocks
    ]

    # Establish Table Info
    betas = [
        Beta_StepTwo(stock, index, lookback, scrub_params)
        for stock, scrub_params in zip(stocks, scrub_params_all)
    ]
    beta_values = [beta.beta_value for beta in betas]
    corrs = [beta.corr for beta in betas]
    stock_cutoffs = [
        scrub_params.stock_cutoff for scrub_params in scrub_params_all
    ]
    index_cutoffs = [
        scrub_params.index_cutoff for scrub_params in scrub_params_all
    ]
    percentile_cutoffs = [percentile_cutoff for stock in range(len(stocks))]
    index_betas_to_SPY = [
        get_ETF_beta_to_SPY(index) for stock in range(len(stocks))
    ]
    betas_to_SPY = [
        get_ETF_beta_to_SPY(index) * beta_values[i] for i in range(len(stocks))
    ]
    returns = [get_total_return(stock, lookback) for stock in stocks]
    index_returns = [get_total_return(index, lookback) for stock in stocks]
    idio_returns = [
        (1 + returns[i]) / (1 + index_returns[i] * beta_values[i]) - 1
        for i in range(len(stocks))
    ]
    percent_days_in_calc = [beta.percent_days_in_calculation for beta in betas]
    # Create DataFrame
    table_info = list(
        zip(beta_values, corrs, stock_cutoffs, index_cutoffs,
            percentile_cutoffs, index_betas_to_SPY, betas_to_SPY, returns,
            index_returns, idio_returns, percent_days_in_calc))
    InfoLabels = [
        'Beta', 'Corr', 'Stock_Cutoff', 'Index_Cutoff', 'Percentile_Cutoff',
        'Index_Beta_to_SPY', 'Beta_to_SPY', 'Return', 'Index_Return',
        'Idio_Return', 'Percent_Days'
    ]
    index_row = pd.Index(stocks, name='Stock')
    iterables_columns = [[index], InfoLabels]
    index_column = pd.MultiIndex.from_product(iterables_columns,
                                              names=['Index', 'Beta_Info'])
    df = pd.DataFrame(table_info, index=index_row, columns=index_column)

    if to_file:
        to_pickle_and_CSV(df, file_name)

    return df