示例#1
0
def stockdata():

    start = '2017-10-25'
    end = '2017-10-27'

    # collecting stockdata
    stocks = DataReader("AMZN", 'yahoo', start, end)['Adj Close']

    # calculating yields
    yields = stocks/stocks.shift(1)-1
    df_yields = yields.drop(yields.index[0])

    print(stocks)

    #stockdata_file = stockdata.to_csv('stockdata_aapl.csv')

    return df_yields#,stockdata_file
示例#2
0
    10).mean() + 2 * df['Close'].rolling(10).std()
df['Boll_Down_10_2'] = df['Close'].rolling(
    10).mean() - 2 * df['Close'].rolling(10).std()

# Donchian channels - rolling maximum and minimum prices during the same periods as moving avg
for channel_period in [5, 10, 20, 50, 100, 200]:
    up_name = 'Don_Ch_Up_%d' % (channel_period)
    down_name = 'Don_Ch_Down_%d' % (channel_period)
    df[up_name] = df['High'].rolling(channel_period).max()
    df[down_name] = df['Low'].rolling(channel_period).min()

# Shifted into time lags, 1-10 days prior
newdata = df['Close'].to_frame()
for lag in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]:
    shift = lag
    shifted = df.shift(shift)
    shifted.columns = [
        str.format('%s_shift_by_%d' % (column, shift))
        for column in shifted.columns
    ]
    newdata = pd.concat((newdata, shifted), axis=1)

# Future Days - target days to predict
forward_lag = 5
newdata['target'] = newdata['Close'].shift(-forward_lag)
newdata = newdata.drop('Close', axis=1)
newdata = newdata.dropna()
pprint.pprint(newdata, width=80)

X = newdata.drop('target', axis=1)
Y = newdata['target']
示例#3
0
def EventStudies():
	
	# Define list of stocks to conduct event analysis on.
	symbols_list = ['AES', 'AET', 'AFL', 'AVP', 'CLX', 'GM', '^GSPC']
	
	# Start and End dates
	dt_start = dt.datetime(2012, 1,1)
	dt_end = dt.datetime(2015, 1,1)

	# Download historical Adjusted Closing prices using Pandas downloader for Yahoo
	data = DataReader(symbols_list, 'yahoo', dt_start, dt_end)['Adj Close']

	# Create dataframe data_ret which includes returns
	data_ret = data/data.shift(1) - 1

	# Define event threshold variable daily_diff
	daily_diff = 0.03

	# Positive event if daily stock return > market return by daily_diff
	# Negative event if daily stock return < market return by daily_diff
	# otherwise no event has occurred.
	

	# Create an events data frame data_events, where columns = names of all stocks, and rows = daily dates 
	events_col = symbols_list[:] # Use [:] to deep copy the list
	events_col.remove('^GSPC') # We dont't need to create events for the S&P500
	events_index = data_ret.index # Copy the date index from data_ret to the events data frame
	data_events = pd.DataFrame(index=events_index, columns=events_col)
	
	# Fill in data_events with 1 for positive events, -1 for negative events, and NA otherwise.
	for i in events_col:
		data_events[i] = np.where((data_ret[i] - data_ret['^GSPC']) > daily_diff, 1, np.where((data_ret[i] - data_ret['^GSPC']) < -daily_diff, -1, np.nan))
	
	# Calculate abnormal returns based on market model (R_it = a_i + B_i*R_mt + e_it)
	# Define estimation period L1: the greater, the more accurate the model
	L1 = 30

	# Define window for forward and backward looking period. Should be less than L1,
	window = 20
	
	# Create 2 dictionaries of dictionaries (for positive and negative events) to store the 
	# abnormal returns (AR) values of each window day, for each stock.
	pos_dict = defaultdict(dict)
	neg_dict = defaultdict(dict)

	# For each stock, locate each event and calculate abnormal return for previous window days and future window days
	for s in events_col:
		pos_event_dates = data_events[s][data_events[s] == 1].index.tolist()
		neg_event_dates = data_events[s][data_events[s] == -1].index.tolist()

		# Create dictionary for each stock to store the AR values of each window day for each event
		pos_dict_s = defaultdict(dict)
		neg_dict_s = defaultdict(dict)

		for pos_event in pos_event_dates:
			date_loc = data_ret.index.get_loc(pos_event) 
			# Go to beginning of backward window and calculate AR from backward till forward window.
			date_loc = date_loc - window
			
			if date_loc > L1 and date_loc <= len(data_ret) - (2*window+1):
				index_range = (2*window) + 1

				# Create dictionairy to store the AR values for each day of this event
				pos_dict_s_event = OrderedDict() 
				for d in range(index_range):
					date_loc2 = date_loc + d

					# Parameters to estimate market model
					u_i = data_ret[s][date_loc2-L1 : date_loc2-1].mean()
					u_m = data_ret['^GSPC'][date_loc2-L1 : date_loc2-1].mean()
					R_i = data_ret.ix[date_loc2, s]
					R_m = data_ret.ix[date_loc2,'^GSPC']
					beta_i = ((R_i-u_i)*(R_m - u_m))/(R_m - u_m)**2
					alpha_i = u_i - (beta_i*u_m)
					var_err = (1/(L1 -2))*(R_i - alpha_i - (beta_i*R_m))**2
					AR_i = R_i - alpha_i - (beta_i*R_m)

					pos_dict_s_event[date_loc2] = AR_i
					
				pos_dict_s[pos_event] = pos_dict_s_event

		pos_dict[s] = pos_dict_s


		for neg_event in neg_event_dates:
			date_loc = data_ret.index.get_loc(neg_event)
			# Go to beginning of backward window and calculate AR from backward till forward window.
			date_loc = date_loc - window

			if date_loc > L1 and date_loc <= len(data_ret) - (2*window+1):
				index_range = (2*window) + 1

				# Create dictionairy to store the AR values for each day of this event
				neg_dict_s_event = OrderedDict() 
				for d in range(index_range):
					date_loc2 = date_loc + d

					# Parameters to estimate market model
					u_i = data_ret[s][date_loc2-L1 : date_loc2-1].mean()
					u_m = data_ret['^GSPC'][date_loc2-L1 : date_loc2-1].mean()
					R_i = data_ret.ix[date_loc2, s]
					R_m = data_ret.ix[date_loc2, '^GSPC']
					beta_i = ((R_i-u_i)*(R_m - u_m))/(R_m - u_m)**2
					alpha_i = u_i - (beta_i*u_m)
					var_err = (1/(L1 -2))*(R_i - alpha_i - (beta_i*R_m))**2
					AR_i = R_i - alpha_i - (beta_i*R_m)

					neg_dict_s_event[date_loc2] = AR_i

				neg_dict_s[neg_event] = neg_dict_s_event

		neg_dict[s] = neg_dict_s
 

	# Create empty Abnormal Returns data frame
	abret_col = symbols_list[:] # Use [:] to deep copy the list
	abret_col.remove('^GSPC') # We dont't need to calculate abnormal returns for the S&P500
	abret_index = range(-window, window+1)
	pos_data_abret = pd.DataFrame(index=abret_index, columns=abret_col)
	neg_data_abret = pd.DataFrame(index=abret_index, columns=abret_col)
	
	for h in abret_col:
		if h in pos_dict.keys():
			for z in abret_index:
				pos_data_abret[h][z] = np.mean([x.values()[z+window] for x in pos_dict[h].values()])

	for f in abret_col:
		if f in neg_dict.keys():
			for v in abret_index:
				neg_data_abret[f][v] = np.mean([x.values()[v+window] for x in neg_dict[f].values()])


	# Create Cumulative Abnormal Return (CAR) Tables pos_CAR and neg_CAR
	pos_CAR = pos_data_abret.cumsum()
	neg_CAR = neg_data_abret.cumsum()


	# Plot pos_CAR and neg_CAR
	plt.clf()
	plt.plot(pos_CAR)
	plt.legend(pos_CAR)
	plt.ylabel('CAR')
	plt.xlabel('Window')
	matplotlib.rcParams.update({'font.size': 8})
	plt.savefig('PositiveCAR_All.png', format='png')

	plt.clf()
	plt.plot(neg_CAR)
	plt.legend(neg_CAR)
	plt.ylabel('CAR')
	plt.xlabel('Window')
	matplotlib.rcParams.update({'font.size': 8})
	plt.savefig('NegativeCAR_All.png', format='png')

	# Sum CAR for positive and negative events to plot only the aggregate CAR

	pos_CAR['SUM'] = pos_CAR.sum(axis=1)
	neg_CAR['SUM'] = neg_CAR.sum(axis=1)

	plt.clf()
	plt.plot(pos_CAR['SUM'])
	plt.legend(pos_CAR['SUM'])
	plt.ylabel('CAR')
	plt.xlabel('Window')
	matplotlib.rcParams.update({'font.size': 8})
	plt.savefig('PositiveCAR_SUM.png', format='png')

	plt.clf()
	plt.plot(neg_CAR['SUM'])
	plt.legend(neg_CAR['SUM'])
	plt.ylabel('CAR')
	plt.xlabel('Window')
	matplotlib.rcParams.update({'font.size': 8})
	plt.savefig('NegativeCAR_SUM.png', format='png')
示例#4
0
def EventStudies():

    # Define list of stocks to conduct event analysis on.
    symbols_list = ['AES', 'AET', 'AFL', 'AVP', 'CLX', 'GM', '^GSPC']

    # Start and End dates
    dt_start = dt.datetime(2012, 1, 1)
    dt_end = dt.datetime(2015, 1, 1)

    # Download historical Adjusted Closing prices using Pandas downloader for Yahoo
    data = DataReader(symbols_list, 'yahoo', dt_start, dt_end)['Adj Close']

    # Create dataframe data_ret which includes returns
    data_ret = data / data.shift(1) - 1

    # Define event threshold variable daily_diff
    daily_diff = 0.03

    # Positive event if daily stock return > market return by daily_diff
    # Negative event if daily stock return < market return by daily_diff
    # otherwise no event has occurred.

    # Create an events data frame data_events, where columns = names of all stocks, and rows = daily dates
    events_col = symbols_list[:]  # Use [:] to deep copy the list
    events_col.remove(
        '^GSPC')  # We dont't need to create events for the S&P500
    events_index = data_ret.index  # Copy the date index from data_ret to the events data frame
    data_events = pd.DataFrame(index=events_index, columns=events_col)

    # Fill in data_events with 1 for positive events, -1 for negative events, and NA otherwise.
    for i in events_col:
        data_events[i] = np.where(
            (data_ret[i] - data_ret['^GSPC']) > daily_diff, 1,
            np.where((data_ret[i] - data_ret['^GSPC']) < -daily_diff, -1,
                     np.nan))

    # Calculate abnormal returns based on market model (R_it = a_i + B_i*R_mt + e_it)
    # Define estimation period L1: the greater, the more accurate the model
    L1 = 30

    # Define window for forward and backward looking period. Should be less than L1,
    window = 20

    # Create 2 dictionaries of dictionaries (for positive and negative events) to store the
    # abnormal returns (AR) values of each window day, for each stock.
    pos_dict = defaultdict(dict)
    neg_dict = defaultdict(dict)

    # For each stock, locate each event and calculate abnormal return for previous window days and future window days
    for s in events_col:
        pos_event_dates = data_events[s][data_events[s] == 1].index.tolist()
        neg_event_dates = data_events[s][data_events[s] == -1].index.tolist()

        # Create dictionary for each stock to store the AR values of each window day for each event
        pos_dict_s = defaultdict(dict)
        neg_dict_s = defaultdict(dict)

        for pos_event in pos_event_dates:
            date_loc = data_ret.index.get_loc(pos_event)
            # Go to beginning of backward window and calculate AR from backward till forward window.
            date_loc = date_loc - window

            if date_loc > L1 and date_loc <= len(data_ret) - (2 * window + 1):
                index_range = (2 * window) + 1

                # Create dictionairy to store the AR values for each day of this event
                pos_dict_s_event = OrderedDict()
                for d in range(index_range):
                    date_loc2 = date_loc + d

                    # Parameters to estimate market model
                    u_i = data_ret[s][date_loc2 - L1:date_loc2 - 1].mean()
                    u_m = data_ret['^GSPC'][date_loc2 - L1:date_loc2 -
                                            1].mean()
                    R_i = data_ret.ix[date_loc2, s]
                    R_m = data_ret.ix[date_loc2, '^GSPC']
                    beta_i = ((R_i - u_i) * (R_m - u_m)) / (R_m - u_m)**2
                    alpha_i = u_i - (beta_i * u_m)
                    var_err = (1 / (L1 - 2)) * (R_i - alpha_i -
                                                (beta_i * R_m))**2
                    AR_i = R_i - alpha_i - (beta_i * R_m)

                    pos_dict_s_event[date_loc2] = AR_i

                pos_dict_s[pos_event] = pos_dict_s_event

        pos_dict[s] = pos_dict_s

        for neg_event in neg_event_dates:
            date_loc = data_ret.index.get_loc(neg_event)
            # Go to beginning of backward window and calculate AR from backward till forward window.
            date_loc = date_loc - window

            if date_loc > L1 and date_loc <= len(data_ret) - (2 * window + 1):
                index_range = (2 * window) + 1

                # Create dictionairy to store the AR values for each day of this event
                neg_dict_s_event = OrderedDict()
                for d in range(index_range):
                    date_loc2 = date_loc + d

                    # Parameters to estimate market model
                    u_i = data_ret[s][date_loc2 - L1:date_loc2 - 1].mean()
                    u_m = data_ret['^GSPC'][date_loc2 - L1:date_loc2 -
                                            1].mean()
                    R_i = data_ret.ix[date_loc2, s]
                    R_m = data_ret.ix[date_loc2, '^GSPC']
                    beta_i = ((R_i - u_i) * (R_m - u_m)) / (R_m - u_m)**2
                    alpha_i = u_i - (beta_i * u_m)
                    var_err = (1 / (L1 - 2)) * (R_i - alpha_i -
                                                (beta_i * R_m))**2
                    AR_i = R_i - alpha_i - (beta_i * R_m)

                    neg_dict_s_event[date_loc2] = AR_i

                neg_dict_s[neg_event] = neg_dict_s_event

        neg_dict[s] = neg_dict_s

    # Create empty Abnormal Returns data frame
    abret_col = symbols_list[:]  # Use [:] to deep copy the list
    abret_col.remove(
        '^GSPC')  # We dont't need to calculate abnormal returns for the S&P500
    abret_index = range(-window, window + 1)
    pos_data_abret = pd.DataFrame(index=abret_index, columns=abret_col)
    neg_data_abret = pd.DataFrame(index=abret_index, columns=abret_col)

    for h in abret_col:
        if h in pos_dict.keys():
            for z in abret_index:
                pos_data_abret[h][z] = np.mean(
                    [x.values()[z + window] for x in pos_dict[h].values()])

    for f in abret_col:
        if f in neg_dict.keys():
            for v in abret_index:
                neg_data_abret[f][v] = np.mean(
                    [x.values()[v + window] for x in neg_dict[f].values()])

    # Create Cumulative Abnormal Return (CAR) Tables pos_CAR and neg_CAR
    pos_CAR = pos_data_abret.cumsum()
    neg_CAR = neg_data_abret.cumsum()

    # Plot pos_CAR and neg_CAR
    plt.clf()
    plt.plot(pos_CAR)
    plt.legend(pos_CAR)
    plt.ylabel('CAR')
    plt.xlabel('Window')
    matplotlib.rcParams.update({'font.size': 8})
    plt.savefig('PositiveCAR_All.png', format='png')

    plt.clf()
    plt.plot(neg_CAR)
    plt.legend(neg_CAR)
    plt.ylabel('CAR')
    plt.xlabel('Window')
    matplotlib.rcParams.update({'font.size': 8})
    plt.savefig('NegativeCAR_All.png', format='png')

    # Sum CAR for positive and negative events to plot only the aggregate CAR

    pos_CAR['SUM'] = pos_CAR.sum(axis=1)
    neg_CAR['SUM'] = neg_CAR.sum(axis=1)

    plt.clf()
    plt.plot(pos_CAR['SUM'])
    plt.legend(pos_CAR['SUM'])
    plt.ylabel('CAR')
    plt.xlabel('Window')
    matplotlib.rcParams.update({'font.size': 8})
    plt.savefig('PositiveCAR_SUM.png', format='png')

    plt.clf()
    plt.plot(neg_CAR['SUM'])
    plt.legend(neg_CAR['SUM'])
    plt.ylabel('CAR')
    plt.xlabel('Window')
    matplotlib.rcParams.update({'font.size': 8})
    plt.savefig('NegativeCAR_SUM.png', format='png')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from gurobipy import *
from pandas_datareader.data import DataReader
from datetime import datetime

stocks = ['ISP', 'CSV', 'RGC', 'WMS', 'GYB', 'KCC', 'BPL', 'WTW', 'GS', 'SPR']
ls_key = 'Adj Close'
start = datetime(2014,1,1)
end = datetime(2016,12,23)
# fetch daily adjusted close and drop na, WMS go public on 2014-07-25
price = DataReader(stocks, 'yahoo', start, end)[ls_key].dropna()[stocks]
# Calculate log return
rtn = np.log(price) - np.log(price.shift(1))
meanDailyReturns = rtn.mean()
covMatrix = rtn.cov()

# Calculate performance
def performance(weights, meanReturns, covMatrix):
    portReturn = np.sum(meanReturns*weights)
    portStdDev = np.sqrt(np.dot(weights.T, np.dot(covMatrix, weights)))
    return portReturn * 252, portStdDev * np.sqrt(252), portReturn/portStdDev * np.sqrt(252)
# Visualize Efficient Frontier
numPortfolios = 10000
results=np.zeros((3,numPortfolios))
for i in xrange(numPortfolios):
    weights = np.random.random(len(stocks))
    weights /= np.sum(weights)
    results[0,i], results[1,i], results[2,i] = performance(weights, meanDailyReturns, covMatrix)
plt.scatter(results[1,], results[0,], c=results[2,])
示例#6
0
plt.legend(['price', 'quarter average'])

# %% shifting
fig, ax = plt.subplots(3, sharex=True)

amazon['Close'].plot(ax=ax[0])
amazon['Close'].shift(365).plot(ax=ax[1])
amazon['Close'].shift(-365).plot(ax=ax[2])

ax[0].legend(['input'])
ax[1].legend(['shift by 365'])
ax[2].legend(['shift by -365'])

# %% ROI
ROI = 100 * (amazon.shift(16) / amazon - 1)
ROI['Close'].plot()

# %% rolling windows
amazon = amazon.sort_index()
rolling = amazon['Close'].rolling(120)
df = pd.DataFrame({
    'input': amazon['Close'],
    'rolling_mean': rolling.mean(),
    'rolling_std': rolling.std()
})

fig, ax = plt.subplots(2, sharex=True)
amazon['Close'].plot(ax=ax[0])
amazon['Close'].rolling(120).mean().plot(ax=ax[0], logy=True)
amazon['Close'].rolling(120).std().plot(ax=ax[1], logy=True)
示例#7
0
resampled_uber = raw_uber.resample('BM').mean()

#%% Plot resampled and raw close data on one plot
raw_uber['Close'].plot()
resampled_uber['Close'].plot(style='--', color='green')

#%% Plot raw close values with shift on one plot
fig, ax = plt.subplots(3, sharex=True)
raw_uber['Close'].plot(ax=ax[0])
raw_uber['Close'].shift(100).plot(ax=ax[1])
raw_uber['Close'].shift(-100).plot(ax=ax[2])

ax[0].legend(['Input'])
ax[1].legend(['Shift by 100 days'])
ax[2].legend(['Shift by -100 days'])

#%% Calculate ROI index
ROI = 100 * (raw_uber.shift(15) / raw_uber - 1)

#%% Plot ROI index
ROI['Close'].plot()

#%% Plot monthy mean, close values and dayly std on one graph

fig, ax = plt.subplots(2, sharex=True)
raw_uber['Close'].plot(ax=ax[0])
raw_uber['Close'].rolling(window=30).mean().plot(ax=ax[0])
raw_uber['Close'].pct_change().rolling(16).std().plot(ax=ax[1])

ax[0].legend(['price', 'rolling mean'])
ax[1].legend(['rolling_std'])