#!/usr/bin/env python # coding: utf-8 # In[2]: import pandas as pd import datetime import pandas_datareader.data as web from pandas import Series, DataFrame start = datetime.datetime(2010, 1, 1) end = datetime.datetime(2020, 6, 4) df = web.DataReader("AAPL", 'yahoo', start, end) df.tail() # In[3]: close_px = df['Adj Close'] mavg = close_px.rolling(window=100).mean() # In[4]: mavg
def test_iex_bad_symbol_list(self): with pytest.raises(Exception): web.DataReader(["AAPL", "BADTICKER"], "iex", self.start, self.end)
def test_single_symbol(self): df = web.DataReader("AAPL", "iex", self.start, self.end) assert list(df) == ["open", "high", "low", "close", "volume"] assert len(df) == 476
cur = conn.cursor(cursor_factory=RealDictCursor) cur.execute(""" SELECT * from mock_watchlist ORDER BY ticker ASC ; """) res = cur.fetchall() tickers = [x['ticker'] for x in res] # currently have to tickers (BHP, CBA) print("Tickers from db: {}".format(tickers)) # for now, just query one stock maybe NAB? # make dataframe (build custom function that will take ticker, and start and end, and return the DF) for tick in tickers: # for now just get 30 days end = datetime.date.today() start = end - datetime.timedelta(days=30) df = data.DataReader('{}.AX'.format(tick), start=start, end=end, data_source='yahoo')[[ 'High', 'Low', 'Open', 'Adj Close' ]] df['Up Today'] = df['Adj Close'] > df['Open'] df['% Change'] = df['Adj Close'].pct_change(periods=1) * 100 # Add moving averages? # Add momentum indicator? # Compute here what kind of candlestick it is (maybe only last ten?) print(df) # build function that will take a candlestick and define it (ie, doji, etc, use the Bedford book for defintions)
import pandas_datareader.data as web import datetime import matplotlib.pyplot as plt # data start = datetime.datetime(2010, 1, 1) end = datetime.datetime(2016, 3, 19) data = web.DataReader("AAPL", "yahoo", start, end) plt.plot(data.index, data['Adj Close']) plt.show()
import datetime as dt import pandas_datareader.data as web from machineTime import getMachineData now = dt.datetime.combine(dt.datetime.now(), dt.time()) date = dt.datetime.fromtimestamp(1588341840) start = date end = date + dt.timedelta(days=1) # dt.datetime.now() start = dt.datetime(2015, 7, 1) end = dt.datetime(2020, 7, 14) '''if(end > now): end = date''' dfs = [] symbols = [ "NBEV", "NEM", "AMD", "MKTX", "NVDA", "REGN", 'NLOK', 'HUM', 'VRTX', 'RMD', 'APPL', 'ODFL', 'MSCI', 'CTXS', 'DVA', 'SBAC', 'TGT', 'DG', 'MSFT', 'LDOS', 'ANSS' ] for count, symbol in enumerate(symbols): try: dfs.append(web.DataReader(symbol, 'yahoo', start, end)) except: pass getMachineData(dfs)
import pandas as pd import matplotlib.pyplot as plt import pandas_datareader.data as web # Get GS Data from Yahoo gs = web.DataReader("078930.KS", "yahoo", "2014-01-01", "2016-03-06") new_gs = gs[gs['Volume'] != 0] # Moving average ma5 = new_gs['Adj Close'].rolling(window=5).mean() ma20 = new_gs['Adj Close'].rolling(window=20).mean() ma60 = new_gs['Adj Close'].rolling(window=60).mean() ma120 = new_gs['Adj Close'].rolling(window=120).mean() # Insert columns new_gs.insert(len(new_gs.columns), "MA5", ma5) new_gs.insert(len(new_gs.columns), "MA20", ma20) new_gs.insert(len(new_gs.columns), "MA60", ma60) new_gs.insert(len(new_gs.columns), "MA120", ma120) # Plot plt.plot(new_gs.index, new_gs['Adj Close'], label="Adj Close") plt.plot(new_gs.index, new_gs['MA5'], label="MA5") plt.plot(new_gs.index, new_gs['MA20'], label="MA20") plt.plot(new_gs.index, new_gs['MA60'], label="MA60") plt.plot(new_gs.index, new_gs['MA120'], label="MA120") plt.legend(loc='best') plt.grid() plt.show()
import pandas as pd import datetime # import pandas.io.data as web # from pandas_datareader import data, wb import pandas_datareader.data as web import matplotlib.pyplot as plt from matplotlib import style style.use('ggplot') start = datetime.datetime(2010, 1, 1) end = datetime.datetime(2015, 1, 1) df = web.DataReader("XOM", "yahoo", start, end) print(df.head()) df['Adj Close'].plot() plt.show()
import datetime as dt import matplotlib.pyplot as plt print( 'This program will plot stock prices for you, just follow the prompts as they come...' ) date_entry = input('Enter the start date for your plot YYYY-MM-DD format: ') year, month, day = map(int, date_entry.split('-')) start = dt.datetime(year, month, day) date_entry2 = input('Enter the end date for your plot YYYY-MM-DD format: ') year, month, day = map(int, date_entry2.split('-')) end = dt.datetime(year, month, day) ticker = input('Enter the ticker you wish to plot: ') while True: query = input('Do you want to compare it to the market index? ') first_l = query[0].lower() if query == '' or not first_l in ['y', 'n']: print('Please answer with yes or no!') else: break if first_l == 'y': df = web.DataReader([ticker, 'SPY'], 'yahoo', start, end) df['Adj Close'].plot() plt.show() if first_l == 'n': df = web.DataReader([ticker], 'yahoo', start, end) df['Adj Close'].plot() plt.show()
import datetime as dt import matplotlib.pyplot as plt from matplotlib import style import pandas as pd import pandas_datareader.data as web style.use('ggplot') start = dt.datetime(2000, 1, 1) end = dt.datetime(2016, 12, 31) df = web.DataReader('TSLA', 'yahoo', start, end) print(df.tail(6))
def reset(self): stock_code = np.random.choice(self.code_list) s = pd.date_range(self.min_date, self.max_date, freq="D") sample_idx = random.randrange(self.window_size, len(s) - self.period) start_date = str(s[sample_idx - self.window_size]).split()[0] end_date = str(s[sample_idx + self.period]).split()[0] self.src_data = data.DataReader(stock_code, 'yahoo', start_date, end_date) #프리프로세싱 #data = source['Close'] sdata = self.src_data #ret = data.pct_change(1) log_ret = np.log(sdata / sdata.shift(1)) log_ret.columns = [ 'log_h', 'log_l', 'log_o', 'log_c', 'log_v', 'log_adj' ] def min_max_norm(wdata): return (wdata[-1] - wdata.min()) / (wdata.max() - wdata.min()) def mean_std_norm(wdata): return (wdata[-1] - wdata.mean()) / wdata.std() def svd_whiten(X): # a = source[X] U, s, Vt = np.linalg.svd(X, full_matrices=False) # U and Vt are the singular matrices, and s contains the singular values. # Since the rows of both U and Vt are orthonormal vectors, then U * Vt # will be white X_white = np.dot(U, Vt) return X_white[-1] def rolling_whiten(src, window=20, min_periods=10): ret = [] for i in range(src.__len__()): if i < min_periods - 1: ret.append([0 for i in range(src.columns.__len__())]) elif i < window: ret.append(svd_whiten(src[0:i].values)) else: ret.append(svd_whiten(src[i - window:i].values)) pdata = pd.DataFrame(np.stack(ret, 0)) pdata.columns = [ 'w' + str(i) for i in range(src.columns.__len__()) ] pdata.index = src.index return pdata norm0 = sdata.rolling(window=self.window_size, min_periods=10).apply(min_max_norm, raw=True).fillna(0) norm1 = sdata.rolling(window=self.window_size, min_periods=10).apply(mean_std_norm, raw=True).fillna(0) norm2 = rolling_whiten( self.src_data, window=self.window_size, min_periods=10).fillna( 0) #pca whitening using close open min max vol norm0.columns = ['mm_h', 'mm_l', 'mm_o', 'mm_c', 'mm_v', 'mm_adj'] norm1.columns = ['ms_h', 'ms_l', 'ms_o', 'ms_c', 'ms_v', 'ms_adj'] self.prep_data = pd.concat( [self.src_data, norm0, norm1, norm2, log_ret], axis=1) self.prev_action = 0 self.count = self.window_size self.balance = self.init_money self.num_stocks = 0 self.sum_action = 0 # 학습 데이터 분리 # features_training_data = [ # 'open_lastclose_ratio', 'high_close_ratio', 'low_close_ratio', # 'close_lastclose_ratio', 'volume_lastvolume_ratio', # 'close_ma5_ratio', 'volume_ma5_ratio', # 'close_ma10_ratio', 'volume_ma10_ratio', # 'close_ma20_ratio', 'volume_ma20_ratio', # 'close_ma60_ratio', 'volume_ma60_ratio', # 'close_ma120_ratio', 'volume_ma120_ratio' # ] da0 = self.prep_data.iloc[self.count] state = torch.from_numpy(da0.values).float() # state = torch.cat([state, torch.Tensor([self.sum_action]).view(1,-1)],dim=1) return state
Created on Mon Aug 6 21:39:41 2018 @author: kennedy """ import numpy as np import pandas as pd import matplotlib.pyplot as plt from datetime import datetime import pandas_datareader.data as web from sklearn.model_selection import KFold, GridSearchCV # start_date = datetime(2016, 1, 1) end_date = datetime(2018, 7, 16) data = web.DataReader('IBM', "yahoo", start_date, end_date) #define the feature vector we would be using for #to plot our regression df = data[['Open']] df['Volatility'] = df['Open'] - df['Open'].shift(1).fillna(0) #SVM model from sklearn.svm import SVR #this we would be using to draw our regression line Xf1 = np.arange(1, len(df) + 1) #Xf2 = (Xf1**2).astype(np.float64) #Xf3 = (Xf1**3).astype(np.float64) #Xf4 = (Xf1**4).astype(np.float64)
from sklearn import tree, svm, linear_model import datetime as dt import matplotlib.pyplot as plt from matplotlib import style import numpy as np import pandas as pd import pandas_datareader.data as web start = dt.datetime(2015, 1, 1) end = dt.datetime(2016, 12, 31) ticker = 'RBC' # Loading Data df = web.DataReader(ticker, 'iex', start, end) df = df.reset_index() closing_price = [] highest_price = [] lowest_price = [] opening_price = [] volume_traded = [] dates = [] labels = [] for i in df[['close']]: for j in df[i]: closing_price.append(round(j, 2)) for i in df[['high']]:
import pandas as pd from pandas import Series, DataFrame import numpy as np import pandas_datareader.data as web import matplotlib.pyplot as plt import seaborn as sns sns.set_style('whitegrid') # Give nice white background with grid from datetime import datetime stocks = ['AAPL', 'TEF', 'MSFT', 'TSLA'] end = datetime.now() start = datetime(end.year - 1, end.month, end.day) main_df = web.DataReader(stocks, "google", start, end)['Close'] rets = main_df.pct_change() rets = rets.dropna() corr = main_df.corr() mask = np.zeros_like(corr, dtype=np.bool) mask[np.triu_indices_from(mask)] = True cmap = sns.diverging_palette(220, 10, as_cmap=True) sns.heatmap(corr, mask=mask, cmap=cmap, vmax=1, center=0, annot=True, square=True, linewidths=.5, cbar_kws={"shrink": .5})
avg_first = 0 avg_end_val = 0 avg_realised_return = 0 avg_discrep_perc = 0 avg_expected_value = 0 print("\nAlgorithm\n\n") for year in time_frame: start_date = "{}/01/2015".format(year) end_date = "{}/01/2019".format(year) try: data = web.DataReader(stocks, data_source="yahoo", start=start_date, end=end_date)['Adj Close'] data = data.dropna() data.reset_index(inplace=True, drop=False) except: print("Testing") data['Total'] = 0 i = 0 for tick in stocks: data['Total'] = data['Total'] + data[tick] * w[i] i += 1 data['pct_change'] = data['Total'].pct_change()
""" Name : c8_05_print_obs_from_Google.py Book : Python for Finance (2nd ed.) Publisher: Packt Publishing Ltd. Author : Yuxing Yan Date : 6/6/2017 email : [email protected] [email protected] """ import pandas_datareader.data as web import datetime ticker='WMT' begdate = datetime.datetime(2010, 1, 1) enddate= datetime.datetime(2015, 5, 9) x=web.DataReader(ticker,'yahoo-actions',begdate,enddate) print(x.head())
import python.pandas #import pandas_datareader as pdr #import datetime import matplotlib from pandas_datareader import data, wb from datetime import date # aapl = pdr.get_data_yahoo('AAPL', # start=datetime.datetime(2019, 1, 1), # end=datetime.datetime(2019, 2, 19)) # # print(aapl) start = date(2019, 4, 1) end = date(2019, 4, 25) df = data.DataReader('GE', 'yahoo', start, end) print(df.head())
import numpy as np import pandas as pd import matplotlib.pyplot as plt from pandas_datareader import data as wb tickers = ['PG', 'BEI.DE'] sec_data = pd.DataFrame() for t in tickers: sec_data[t] = wb.DataReader(t, data_source='yahoo', start='2007-1-1')['Adj Close'] #Vamos utilizar a taxa de retorno logaritmica #O desvio padrao dos retornos de uma empresa tambem pode ser chamado de risco ou volatilidade #Uma acao que mostra um grande desvio de sua media, é chamada de mais volatil sec_returns = np.log(sec_data / sec_data.shift(1)) #Agora iremos calcular a media da taxa de retorno log. sec_returns['PG'].mean() #Agora iremos calcular a media da taxa de retorno log. anual sec_returns['PG'].mean() * 250 #agora iremos calcular o desvio padrao, que é calculado com o comando .std() sec_returns['PG'].std() sec_returns['PG'].std() * 250**0.5 #Agora iremos calcular a media da taxa de retorno log. sec_returns['BEI.DE'].mean()
import pandas_datareader.data as web import datetime import pandas_datareader import numpy as np import pandas as pd import matplotlib.pyplot as plt from pandas.plotting import scatter_matrix import mplfinance as mpf from matplotlib.dates import DateFormatter, date2num, WeekdayLocator, DateLocator, MONDAY start = datetime.datetime(2012, 1, 1) end = datetime.datetime(2019, 2, 1) tesla = web.DataReader('TSLA', 'yahoo', start, end) ford = web.DataReader('FORD', 'yahoo', start, end) gm = web.DataReader('GM', 'yahoo', start, end) # print(tesla.head()) # print(ford.head()) # print(gm.head()) # Plot based on opening prices # tesla['Open'].plot(label='Tesla', figsize=(16, 8), title='Open Price') # ford['Open'].plot(label='FORD') # gm['Open'].plot(label='GM') # # plt.legend() # plt.show() # ************* # Plot based on closing prices # tesla['Adj Close'].plot(label='Tesla', figsize=(16, 8),
import pandas as pd from pandas_datareader import data, wb import datetime import matplotlib.pyplot as plt from sklearn import linear_model #List with stocks to analyze. stocks =['BCBA:BMA'] start = datetime.date(2017,1,1) end = datetime.date(2017,10,6) #Read 'OLHC' data from google finance data = data.DataReader(stocks[0],'google',start,end) #Linear regression for close price. #Split train and test sets X = data.drop(['Close'],1) y = data['Close'] X_train = X[:-50] X_test = X[-50:] X_test.dropna(inplace=True) y_train = y[:-50] y_test = y[-50:] reg = linear_model.LinearRegression() reg.fit(X_train,y_train)
import datetime import pandas_datareader.data as web df_stockload = web.DataReader("600797.SS", "yahoo", datetime.datetime(2018, 1, 1), datetime.datetime(2019, 1, 1)) print(df_stockload.info()) # 替换 import matplotlib.finance as mpf 画k线图 import mpl_finance as mpf # 替换 import matplotlib.finance as mpf import matplotlib.pyplot as plt # 创建fig对象 fig = plt.figure(figsize=(8, 6), dpi=100, facecolor="white") # 设置图像边框 fig.subplots_adjust(left=0.09, bottom=0.20, right=0.94, top=0.90, wspace=0.2, hspace=0) # 创建子图 graph_KAV = fig.add_subplot(1, 1, 1) # 画k线 mpf.candlestick2_ochl(graph_KAV, df_stockload.Open,
import matplotlib.pyplot as plt import pandas_datareader.data as web sk_hynix = web.DataReader("000660.KS", "yahoo") fig = plt.figure(figsize=(12, 8)) top_axes = plt.subplot2grid((4, 4), (0, 0), rowspan=3, colspan=4) bottom_axes = plt.subplot2grid((4, 4), (3, 0), rowspan=1, colspan=4) bottom_axes.get_yaxis().get_major_formatter().set_scientific(False) top_axes.plot(sk_hynix.index, sk_hynix['Adj Close'], label='Adjusted Close') bottom_axes.plot(sk_hynix.index, sk_hynix['Volume']) plt.tight_layout() plt.show()
import pandas_datareader.data as pdr import matplotlib.pyplot as plt price = pdr.DataReader("^N225", 'yahoo', "1984/1/4", "2019/8/8") #月の初めの値を選択 print(price.resample('M').first().tail()) #月の終わりの値を選択 print(price.resample('M').last().tail()) # 1日分オフセット print(price.resample('M', loffset='1d').last().tail()) price.resample('A').Close().plot(color='magenta') plt.ylabel('N225 Index') plt.show()
def analyzepair(self, pairlist, startdate, enddate, showplot): s1 = pairlist[0] s2 = pairlist[1] print '---------------------' print 'Doing', s1, s2 #startdatetime = datetime.datetime(2015, 1, 1) #enddatetime = datetime.datetime(2017, 9, 30) startdatetime = datetime.datetime.strptime(startdate, "%Y-%m-%d") enddatetime = datetime.datetime.strptime(enddate, "%Y-%m-%d") #print startdatetime #print enddatetime #stop prices1 = data.DataReader(s1, "yahoo", startdatetime, enddatetime) prices2 = data.DataReader(s2, "yahoo", startdatetime, enddatetime) df_a = pd.DataFrame(index=prices1.index) df_a[s1] = prices1["Close"] df_a[s2] = prices2["Close"] df = df_a.dropna(axis=0, how='any') #print df if showplot == True: # Plot the two time series self.plot_price_series(df, s1, s2, startdatetime, enddatetime) # Display a scatter plot of the two time series self.plot_scatter_series(df, s1, s2) Y = df[s2].tolist() X = df[s1].tolist() #print Y[:5] #stop #print 'got here 1' # Calculate optimal hedge ratio "beta" #print Y #print X #print 'got here 3' #beta_hr = results #print 'beta_hr' #print beta_hr # Calculate the residuals of the linear combination i0 = 0 beta_hr_list = [] for idx, rows in df.iterrows(): #print idx if i0 >= 0: res = sm.OLS(Y[:i0 + 1], X[:i0 + 1]) beta_hr = res.fit().params[0] mydict = { 'Date': idx, 'beta_hr': beta_hr, 'actual': Y[i0] / X[i0] } beta_hr_list.append(mydict) i0 += 1 #if i0 >= 10: # stop df_beta_hr = pd.DataFrame(beta_hr_list) df_beta_hr.set_index("Date", drop=True, inplace=True) df_b = pd.concat([df_a, df_beta_hr], axis=1) df_b['a-b'] = df_b['actual'] - df_b['beta_hr'] df_b["res"] = df_b[s2] - df_b['beta_hr'] * df_b[s1] ## for idx, row in df_b.iterrows(): ## print idx,row['beta_hr'],round(row['a-b'],4) # Calculate and analyze the CADF test on the residuals cadf = ts.adfuller(df_b["res"]) test_null_hypothesis = cadf[0] five_percent_value = cadf[4]['5%'] print '' print 'cadf test for cointegration result 5% value:', test_null_hypothesis, 'must be less than', five_percent_value return df_b
def test_iex_bad_symbol(self): with pytest.raises(Exception): web.DataReader("BADTICKER", "iex,", self.start, self.end)
import pandas as pd import numpy as np import datetime import matplotlib.pyplot as plt #%matplotlib inline pd.set_option('display.notebook_repr_html', False) pd.set_option('display.max_columns', 15) pd.set_option('display.max_rows', 8) pd.set_option('precision', 3) from pandas_datareader import data as web start = datetime.datetime(2012, 1, 1) end = datetime.datetime(2012, 12, 30) msft = web.DataReader("MSFT", "yahoo", start, end) aapl = web.DataReader("AAPL", "yahoo", start, end)
def test_daily_invalid_date(self): start = datetime(2000, 1, 5) end = datetime(2017, 5, 24) with pytest.raises(Exception): web.DataReader(["AAPL", "TSLA"], "iex", start, end)
name: str = self["name"] category: str = self["category"] subcategories: dict = self["subcategories"] metatype: str = self["metatype"] submetatype: str = self["submetatype"] abbreviation: str = self["abbreviation"] jscat = self.main_helper.generate_hash(subcategories) return f"{name}:{category}:{jscat}:{metatype}:{submetatype}:{abbreviation}" if __name__ == "__main__": import pandas_datareader.data as web data_msft = web.DataReader( "MSFT", "yahoo", start="2010/1/1", end="2020/1/30" ).round(2) data_apple = web.DataReader( "AAPL", "yahoo", start="2010/1/1", end="2020/1/30" ).round(2) # print(data_apple) episode_id = uuid.uuid4().hex jambo = Jamboree() data_hander = DataHandler() data_hander.event = jambo data_hander.processor = jambo # The episode and live parameters are probably not good for the scenario. Will probably need to switch to something else to identify data data_hander.episode = episode_id data_hander.live = False data_hander["category"] = "markets" data_hander["subcategories"] = {
def test_multiple_symbols(self): syms = ["AAPL", "MSFT", "TSLA"] df = web.DataReader(syms, "iex", self.start, self.end) assert sorted(list(df.columns.levels[1])) == syms for sym in syms: assert len(df.xs(sym, level="Symbols", axis=1) == 578)
def basic(): df = data.DataReader("PTT.BK", data_source="yahoo", start="2017-1-1", end="2017-1-31") print(df.head()) df.to_csv("ptt.csv")