Пример #1
0
#!/usr/bin/env python
# coding: utf-8

# In[2]:


import pandas as pd
import datetime
import pandas_datareader.data as web
from pandas import Series, DataFrame


start = datetime.datetime(2010, 1, 1)
end = datetime.datetime(2020, 6, 4)

df = web.DataReader("AAPL", 'yahoo', start, end)
df.tail()


# In[3]:


close_px = df['Adj Close']
mavg = close_px.rolling(window=100).mean()


# In[4]:


mavg
 def test_iex_bad_symbol_list(self):
     with pytest.raises(Exception):
         web.DataReader(["AAPL", "BADTICKER"], "iex", self.start, self.end)
 def test_single_symbol(self):
     df = web.DataReader("AAPL", "iex", self.start, self.end)
     assert list(df) == ["open", "high", "low", "close", "volume"]
     assert len(df) == 476
Пример #4
0
cur = conn.cursor(cursor_factory=RealDictCursor)
cur.execute("""
        SELECT * from mock_watchlist
        ORDER BY ticker ASC
        ;
    """)
res = cur.fetchall()
tickers = [x['ticker'] for x in res]
# currently have to tickers (BHP, CBA)
print("Tickers from db: {}".format(tickers))

# for now, just query one stock maybe NAB?
# make dataframe (build custom function that will take ticker, and start and end, and return the DF)
for tick in tickers:
    # for now just get 30 days
    end = datetime.date.today()
    start = end - datetime.timedelta(days=30)
    df = data.DataReader('{}.AX'.format(tick),
                         start=start,
                         end=end,
                         data_source='yahoo')[[
                             'High', 'Low', 'Open', 'Adj Close'
                         ]]
    df['Up Today'] = df['Adj Close'] > df['Open']
    df['% Change'] = df['Adj Close'].pct_change(periods=1) * 100
    # Add moving averages?
    # Add momentum indicator?
    # Compute here what kind of candlestick it is (maybe only last ten?)
    print(df)
# build function that will take a candlestick and define it (ie, doji, etc, use the Bedford book for defintions)
Пример #5
0
import pandas_datareader.data as web
import datetime
import matplotlib.pyplot as plt

# data
start = datetime.datetime(2010, 1, 1)
end = datetime.datetime(2016, 3, 19)
data = web.DataReader("AAPL", "yahoo", start, end)

plt.plot(data.index, data['Adj Close'])
plt.show()
Пример #6
0
import datetime as dt
import pandas_datareader.data as web

from machineTime import getMachineData

now = dt.datetime.combine(dt.datetime.now(), dt.time())

date = dt.datetime.fromtimestamp(1588341840)

start = date
end = date + dt.timedelta(days=1)  # dt.datetime.now()

start = dt.datetime(2015, 7, 1)

end = dt.datetime(2020, 7, 14)
'''if(end > now):
    end = date'''
dfs = []
symbols = [
    "NBEV", "NEM", "AMD", "MKTX", "NVDA", "REGN", 'NLOK', 'HUM', 'VRTX', 'RMD',
    'APPL', 'ODFL', 'MSCI', 'CTXS', 'DVA', 'SBAC', 'TGT', 'DG', 'MSFT', 'LDOS',
    'ANSS'
]
for count, symbol in enumerate(symbols):
    try:
        dfs.append(web.DataReader(symbol, 'yahoo', start, end))
    except:
        pass
getMachineData(dfs)
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader.data as web

# Get GS Data from Yahoo
gs = web.DataReader("078930.KS", "yahoo", "2014-01-01", "2016-03-06")
new_gs = gs[gs['Volume'] != 0]

# Moving average
ma5 = new_gs['Adj Close'].rolling(window=5).mean()
ma20 = new_gs['Adj Close'].rolling(window=20).mean()
ma60 = new_gs['Adj Close'].rolling(window=60).mean()
ma120 = new_gs['Adj Close'].rolling(window=120).mean()

# Insert columns
new_gs.insert(len(new_gs.columns), "MA5", ma5)
new_gs.insert(len(new_gs.columns), "MA20", ma20)
new_gs.insert(len(new_gs.columns), "MA60", ma60)
new_gs.insert(len(new_gs.columns), "MA120", ma120)

# Plot
plt.plot(new_gs.index, new_gs['Adj Close'], label="Adj Close")
plt.plot(new_gs.index, new_gs['MA5'], label="MA5")
plt.plot(new_gs.index, new_gs['MA20'], label="MA20")
plt.plot(new_gs.index, new_gs['MA60'], label="MA60")
plt.plot(new_gs.index, new_gs['MA120'], label="MA120")

plt.legend(loc='best')
plt.grid()
plt.show()
Пример #8
0
import pandas as pd
import datetime
# import pandas.io.data as web
# from pandas_datareader import data, wb
import pandas_datareader.data as web
import matplotlib.pyplot as plt
from matplotlib import style

style.use('ggplot')

start = datetime.datetime(2010, 1, 1)
end = datetime.datetime(2015, 1, 1)

df = web.DataReader("XOM", "yahoo", start, end)

print(df.head())

df['Adj Close'].plot()

plt.show()
Пример #9
0
import datetime as dt
import matplotlib.pyplot as plt

print(
    'This program will plot stock prices for you, just follow the prompts as they come...'
)
date_entry = input('Enter the start date for your plot YYYY-MM-DD format: ')
year, month, day = map(int, date_entry.split('-'))
start = dt.datetime(year, month, day)
date_entry2 = input('Enter the end date for your plot YYYY-MM-DD format: ')
year, month, day = map(int, date_entry2.split('-'))
end = dt.datetime(year, month, day)

ticker = input('Enter the ticker you wish to plot: ')

while True:
    query = input('Do you want to compare it to the market index? ')
    first_l = query[0].lower()
    if query == '' or not first_l in ['y', 'n']:
        print('Please answer with yes or no!')
    else:
        break
if first_l == 'y':
    df = web.DataReader([ticker, 'SPY'], 'yahoo', start, end)
    df['Adj Close'].plot()
    plt.show()
if first_l == 'n':
    df = web.DataReader([ticker], 'yahoo', start, end)
    df['Adj Close'].plot()
    plt.show()
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import pandas_datareader.data as web

style.use('ggplot')

start = dt.datetime(2000, 1, 1)
end = dt.datetime(2016, 12, 31)

df = web.DataReader('TSLA', 'yahoo', start, end)
print(df.tail(6))
Пример #11
0
    def reset(self):
        stock_code = np.random.choice(self.code_list)

        s = pd.date_range(self.min_date, self.max_date, freq="D")
        sample_idx = random.randrange(self.window_size, len(s) - self.period)
        start_date = str(s[sample_idx - self.window_size]).split()[0]
        end_date = str(s[sample_idx + self.period]).split()[0]

        self.src_data = data.DataReader(stock_code, 'yahoo', start_date,
                                        end_date)

        #프리프로세싱
        #data = source['Close']
        sdata = self.src_data

        #ret = data.pct_change(1)
        log_ret = np.log(sdata / sdata.shift(1))
        log_ret.columns = [
            'log_h', 'log_l', 'log_o', 'log_c', 'log_v', 'log_adj'
        ]

        def min_max_norm(wdata):
            return (wdata[-1] - wdata.min()) / (wdata.max() - wdata.min())

        def mean_std_norm(wdata):
            return (wdata[-1] - wdata.mean()) / wdata.std()

        def svd_whiten(X):
            #    a = source[X]
            U, s, Vt = np.linalg.svd(X, full_matrices=False)
            # U and Vt are the singular matrices, and s contains the singular values.
            # Since the rows of both U and Vt are orthonormal vectors, then U * Vt
            # will be white
            X_white = np.dot(U, Vt)
            return X_white[-1]

        def rolling_whiten(src, window=20, min_periods=10):
            ret = []
            for i in range(src.__len__()):
                if i < min_periods - 1:
                    ret.append([0 for i in range(src.columns.__len__())])
                elif i < window:
                    ret.append(svd_whiten(src[0:i].values))
                else:
                    ret.append(svd_whiten(src[i - window:i].values))
            pdata = pd.DataFrame(np.stack(ret, 0))
            pdata.columns = [
                'w' + str(i) for i in range(src.columns.__len__())
            ]
            pdata.index = src.index
            return pdata

        norm0 = sdata.rolling(window=self.window_size,
                              min_periods=10).apply(min_max_norm,
                                                    raw=True).fillna(0)

        norm1 = sdata.rolling(window=self.window_size,
                              min_periods=10).apply(mean_std_norm,
                                                    raw=True).fillna(0)
        norm2 = rolling_whiten(
            self.src_data, window=self.window_size, min_periods=10).fillna(
                0)  #pca whitening using close open min max vol

        norm0.columns = ['mm_h', 'mm_l', 'mm_o', 'mm_c', 'mm_v', 'mm_adj']
        norm1.columns = ['ms_h', 'ms_l', 'ms_o', 'ms_c', 'ms_v', 'ms_adj']

        self.prep_data = pd.concat(
            [self.src_data, norm0, norm1, norm2, log_ret], axis=1)

        self.prev_action = 0
        self.count = self.window_size
        self.balance = self.init_money
        self.num_stocks = 0
        self.sum_action = 0

        # 학습 데이터 분리
        #        features_training_data = [
        #            'open_lastclose_ratio', 'high_close_ratio', 'low_close_ratio',
        #            'close_lastclose_ratio', 'volume_lastvolume_ratio',
        #            'close_ma5_ratio', 'volume_ma5_ratio',
        #            'close_ma10_ratio', 'volume_ma10_ratio',
        #            'close_ma20_ratio', 'volume_ma20_ratio',
        #            'close_ma60_ratio', 'volume_ma60_ratio',
        #            'close_ma120_ratio', 'volume_ma120_ratio'
        #        ]

        da0 = self.prep_data.iloc[self.count]

        state = torch.from_numpy(da0.values).float()

        #        state = torch.cat([state, torch.Tensor([self.sum_action]).view(1,-1)],dim=1)
        return state
Пример #12
0
Created on Mon Aug  6 21:39:41 2018

@author: kennedy
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import pandas_datareader.data as web
from sklearn.model_selection import KFold, GridSearchCV
#
start_date = datetime(2016, 1, 1)
end_date = datetime(2018, 7, 16)

data = web.DataReader('IBM', "yahoo", start_date, end_date)

#define the feature vector we would be using for
#to plot our regression
df = data[['Open']]

df['Volatility'] = df['Open'] - df['Open'].shift(1).fillna(0)

#SVM model
from sklearn.svm import SVR

#this we would be using to draw our regression line
Xf1 = np.arange(1, len(df) + 1)
#Xf2 = (Xf1**2).astype(np.float64)
#Xf3 = (Xf1**3).astype(np.float64)
#Xf4 = (Xf1**4).astype(np.float64)
Пример #13
0
from sklearn import tree, svm, linear_model
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
import pandas as pd
import pandas_datareader.data as web

start = dt.datetime(2015, 1, 1)
end = dt.datetime(2016, 12, 31)

ticker = 'RBC'

# Loading Data
df = web.DataReader(ticker, 'iex', start, end)
df = df.reset_index()

closing_price = []
highest_price = []
lowest_price = []
opening_price = []
volume_traded = []
dates = []

labels = []

for i in df[['close']]:
    for j in df[i]:
        closing_price.append(round(j, 2))

for i in df[['high']]:
Пример #14
0
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')  # Give nice white background with grid
from datetime import datetime

stocks = ['AAPL', 'TEF', 'MSFT', 'TSLA']
end = datetime.now()

start = datetime(end.year - 1, end.month, end.day)
main_df = web.DataReader(stocks, "google", start, end)['Close']
rets = main_df.pct_change()
rets = rets.dropna()

corr = main_df.corr()
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
cmap = sns.diverging_palette(220, 10, as_cmap=True)
sns.heatmap(corr,
            mask=mask,
            cmap=cmap,
            vmax=1,
            center=0,
            annot=True,
            square=True,
            linewidths=.5,
            cbar_kws={"shrink": .5})
    avg_first = 0
    avg_end_val = 0
    avg_realised_return = 0
    avg_discrep_perc = 0
    avg_expected_value = 0

    print("\nAlgorithm\n\n")

    for year in time_frame:

        start_date = "{}/01/2015".format(year)
        end_date = "{}/01/2019".format(year)

        try:
            data = web.DataReader(stocks,
                                  data_source="yahoo",
                                  start=start_date,
                                  end=end_date)['Adj Close']
            data = data.dropna()
            data.reset_index(inplace=True, drop=False)

        except:
            print("Testing")

        data['Total'] = 0
        i = 0

        for tick in stocks:
            data['Total'] = data['Total'] + data[tick] * w[i]
            i += 1

        data['pct_change'] = data['Total'].pct_change()
"""
  Name     : c8_05_print_obs_from_Google.py
  Book     : Python for Finance (2nd ed.)
  Publisher: Packt Publishing Ltd. 
  Author   : Yuxing Yan
  Date     : 6/6/2017
  email    : [email protected]
             [email protected]
"""



import pandas_datareader.data as web
import datetime
ticker='WMT'
begdate = datetime.datetime(2010, 1, 1)
enddate= datetime.datetime(2015, 5, 9)
x=web.DataReader(ticker,'yahoo-actions',begdate,enddate)
print(x.head())
Пример #17
0
import python.pandas

#import pandas_datareader as pdr
#import datetime

import matplotlib

from pandas_datareader import data, wb
from datetime import date

# aapl = pdr.get_data_yahoo('AAPL',
#                           start=datetime.datetime(2019, 1, 1),
#                           end=datetime.datetime(2019, 2, 19))
#
# print(aapl)

start = date(2019, 4, 1)
end = date(2019, 4, 25)
df = data.DataReader('GE', 'yahoo', start, end)

print(df.head())
Пример #18
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas_datareader import data as wb

tickers = ['PG', 'BEI.DE']

sec_data = pd.DataFrame()

for t in tickers:
    sec_data[t] = wb.DataReader(t, data_source='yahoo',
                                start='2007-1-1')['Adj Close']

#Vamos utilizar a taxa de retorno logaritmica
#O desvio padrao dos retornos de uma empresa tambem pode ser chamado de risco ou volatilidade
#Uma acao que mostra um grande desvio de sua media, é chamada de mais volatil

sec_returns = np.log(sec_data / sec_data.shift(1))

#Agora iremos calcular a media da taxa de retorno log.
sec_returns['PG'].mean()

#Agora iremos calcular a media da taxa de retorno log. anual
sec_returns['PG'].mean() * 250

#agora iremos calcular o desvio padrao, que é calculado com o comando .std()
sec_returns['PG'].std()
sec_returns['PG'].std() * 250**0.5

#Agora iremos calcular a media da taxa de retorno log.
sec_returns['BEI.DE'].mean()
Пример #19
0
import pandas_datareader.data as web
import datetime
import pandas_datareader
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
import mplfinance as mpf
from matplotlib.dates import DateFormatter, date2num, WeekdayLocator, DateLocator, MONDAY

start = datetime.datetime(2012, 1, 1)
end = datetime.datetime(2019, 2, 1)

tesla = web.DataReader('TSLA', 'yahoo', start, end)
ford = web.DataReader('FORD', 'yahoo', start, end)
gm = web.DataReader('GM', 'yahoo', start, end)

# print(tesla.head())
# print(ford.head())
# print(gm.head())

# Plot based on opening prices
# tesla['Open'].plot(label='Tesla', figsize=(16, 8), title='Open Price')
# ford['Open'].plot(label='FORD')
# gm['Open'].plot(label='GM')
# # plt.legend()
# plt.show()
# *************

# Plot based on closing prices
# tesla['Adj Close'].plot(label='Tesla', figsize=(16, 8),
Пример #20
0
import pandas as pd
from pandas_datareader import data, wb
import datetime
import matplotlib.pyplot as plt
from sklearn import linear_model

#List with stocks to analyze.
stocks =['BCBA:BMA']

start = datetime.date(2017,1,1)
end = datetime.date(2017,10,6)



#Read 'OLHC' data from google finance
data = data.DataReader(stocks[0],'google',start,end)

#Linear regression for close price.
#Split train and test sets
X = data.drop(['Close'],1)
y = data['Close']

X_train = X[:-50]   
X_test = X[-50:]
X_test.dropna(inplace=True)
y_train = y[:-50]
y_test = y[-50:]

reg = linear_model.LinearRegression()
reg.fit(X_train,y_train)
Пример #21
0
import datetime

import pandas_datareader.data as web

df_stockload = web.DataReader("600797.SS", "yahoo",
                              datetime.datetime(2018, 1, 1),
                              datetime.datetime(2019, 1, 1))
print(df_stockload.info())

# 替换 import matplotlib.finance as mpf 画k线图
import mpl_finance as mpf  # 替换 import matplotlib.finance as mpf

import matplotlib.pyplot as plt

# 创建fig对象
fig = plt.figure(figsize=(8, 6), dpi=100, facecolor="white")

# 设置图像边框
fig.subplots_adjust(left=0.09,
                    bottom=0.20,
                    right=0.94,
                    top=0.90,
                    wspace=0.2,
                    hspace=0)

# 创建子图
graph_KAV = fig.add_subplot(1, 1, 1)

# 画k线
mpf.candlestick2_ochl(graph_KAV,
                      df_stockload.Open,
Пример #22
0
import matplotlib.pyplot as plt
import pandas_datareader.data as web

sk_hynix = web.DataReader("000660.KS", "yahoo")

fig = plt.figure(figsize=(12, 8))
top_axes = plt.subplot2grid((4, 4), (0, 0), rowspan=3, colspan=4)
bottom_axes = plt.subplot2grid((4, 4), (3, 0), rowspan=1, colspan=4)
bottom_axes.get_yaxis().get_major_formatter().set_scientific(False)

top_axes.plot(sk_hynix.index, sk_hynix['Adj Close'], label='Adjusted Close')
bottom_axes.plot(sk_hynix.index, sk_hynix['Volume'])

plt.tight_layout()
plt.show()
Пример #23
0
import pandas_datareader.data as pdr
import matplotlib.pyplot as plt

price = pdr.DataReader("^N225", 'yahoo', "1984/1/4", "2019/8/8")

#月の初めの値を選択
print(price.resample('M').first().tail())

#月の終わりの値を選択
print(price.resample('M').last().tail())

# 1日分オフセット
print(price.resample('M', loffset='1d').last().tail())

price.resample('A').Close().plot(color='magenta')
plt.ylabel('N225 Index')
plt.show()
Пример #24
0
    def analyzepair(self, pairlist, startdate, enddate, showplot):

        s1 = pairlist[0]
        s2 = pairlist[1]
        print '---------------------'
        print 'Doing', s1, s2

        #startdatetime = datetime.datetime(2015, 1, 1)
        #enddatetime = datetime.datetime(2017, 9, 30)
        startdatetime = datetime.datetime.strptime(startdate, "%Y-%m-%d")
        enddatetime = datetime.datetime.strptime(enddate, "%Y-%m-%d")
        #print startdatetime
        #print enddatetime
        #stop
        prices1 = data.DataReader(s1, "yahoo", startdatetime, enddatetime)
        prices2 = data.DataReader(s2, "yahoo", startdatetime, enddatetime)

        df_a = pd.DataFrame(index=prices1.index)
        df_a[s1] = prices1["Close"]
        df_a[s2] = prices2["Close"]
        df = df_a.dropna(axis=0, how='any')
        #print df
        if showplot == True:
            # Plot the two time series
            self.plot_price_series(df, s1, s2, startdatetime, enddatetime)

            # Display a scatter plot of the two time series
            self.plot_scatter_series(df, s1, s2)

        Y = df[s2].tolist()
        X = df[s1].tolist()

        #print Y[:5]
        #stop

        #print 'got here 1'
        # Calculate optimal hedge ratio "beta"
        #print Y
        #print X

        #print 'got here 3'
        #beta_hr = results
        #print 'beta_hr'
        #print beta_hr
        # Calculate the residuals of the linear combination

        i0 = 0
        beta_hr_list = []

        for idx, rows in df.iterrows():
            #print idx
            if i0 >= 0:
                res = sm.OLS(Y[:i0 + 1], X[:i0 + 1])
                beta_hr = res.fit().params[0]
                mydict = {
                    'Date': idx,
                    'beta_hr': beta_hr,
                    'actual': Y[i0] / X[i0]
                }
                beta_hr_list.append(mydict)
            i0 += 1
            #if i0 >= 10:
            #    stop
        df_beta_hr = pd.DataFrame(beta_hr_list)
        df_beta_hr.set_index("Date", drop=True, inplace=True)
        df_b = pd.concat([df_a, df_beta_hr], axis=1)

        df_b['a-b'] = df_b['actual'] - df_b['beta_hr']
        df_b["res"] = df_b[s2] - df_b['beta_hr'] * df_b[s1]

        ##        for idx, row in df_b.iterrows():
        ##            print idx,row['beta_hr'],round(row['a-b'],4)

        # Calculate and analyze the CADF test on the residuals
        cadf = ts.adfuller(df_b["res"])
        test_null_hypothesis = cadf[0]
        five_percent_value = cadf[4]['5%']
        print ''
        print 'cadf test for cointegration result 5% value:', test_null_hypothesis, 'must be less than', five_percent_value
        return df_b
 def test_iex_bad_symbol(self):
     with pytest.raises(Exception):
         web.DataReader("BADTICKER", "iex,", self.start, self.end)
Пример #26
0
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
#%matplotlib inline
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 15)
pd.set_option('display.max_rows', 8)
pd.set_option('precision', 3)

from pandas_datareader import data as web

start = datetime.datetime(2012, 1, 1)
end = datetime.datetime(2012, 12, 30)

msft = web.DataReader("MSFT", "yahoo", start, end)
aapl = web.DataReader("AAPL", "yahoo", start, end)
 def test_daily_invalid_date(self):
     start = datetime(2000, 1, 5)
     end = datetime(2017, 5, 24)
     with pytest.raises(Exception):
         web.DataReader(["AAPL", "TSLA"], "iex", start, end)
Пример #28
0
        name: str = self["name"]
        category: str = self["category"]
        subcategories: dict = self["subcategories"]
        metatype: str = self["metatype"]
        submetatype: str = self["submetatype"]
        abbreviation: str = self["abbreviation"]

        jscat = self.main_helper.generate_hash(subcategories)
        return f"{name}:{category}:{jscat}:{metatype}:{submetatype}:{abbreviation}"


if __name__ == "__main__":
    import pandas_datareader.data as web

    data_msft = web.DataReader(
        "MSFT", "yahoo", start="2010/1/1", end="2020/1/30"
    ).round(2)
    data_apple = web.DataReader(
        "AAPL", "yahoo", start="2010/1/1", end="2020/1/30"
    ).round(2)
    # print(data_apple)
    episode_id = uuid.uuid4().hex
    jambo = Jamboree()
    data_hander = DataHandler()
    data_hander.event = jambo
    data_hander.processor = jambo
    # The episode and live parameters are probably not good for the scenario. Will probably need to switch to something else to identify data
    data_hander.episode = episode_id
    data_hander.live = False
    data_hander["category"] = "markets"
    data_hander["subcategories"] = {
 def test_multiple_symbols(self):
     syms = ["AAPL", "MSFT", "TSLA"]
     df = web.DataReader(syms, "iex", self.start, self.end)
     assert sorted(list(df.columns.levels[1])) == syms
     for sym in syms:
         assert len(df.xs(sym, level="Symbols", axis=1) == 578)
Пример #30
0
def basic():
    df = data.DataReader("PTT.BK", data_source="yahoo",
                         start="2017-1-1", end="2017-1-31")
    print(df.head())
    df.to_csv("ptt.csv")