def get_single_dataset(stock_name, data_columns, append_intraday=False): if (stock_name == "Amazon"): ticker_name = "AMZN" elif (stock_name == "Google"): ticker_name = "GOOG" elif (stock_name == "Apple"): ticker_name = "AAPL" elif (stock_name == "Microsoft"): ticker_name = "MSFT" elif (stock_name == "Tesla"): ticker_name = "TSLA" elif (stock_name == "Bitcoin"): ticker_name = "BTC-USD" elif (stock_name == "Ethereum"): ticker_name = "ETH-USD" elif (stock_name == "Aixtron"): ticker_name = "AIXA.DE" elif (stock_name == "Gaia"): ticker_name = "GAIA" elif (stock_name == "SunOpta"): ticker_name = "STKL" elif (stock_name == "Infineon"): ticker_name = "IFX.DE" else: ticker_name = stock_name #ticker = yf.Ticker(ticker_name) dataset = yf.download(ticker_name, period='max').get(data_columns) dataset = dataset[~(dataset == 0).any(axis=1)] dataset.isna().any() if append_intraday: today = dt.datetime.today() dataset_intraday = yf.download(ticker_name, period='1d', interval='1m').get(data_columns) dataset_intraday = dataset_intraday[~(dataset_intraday == 0).any( axis=1)] dataset_intraday.isna().any() mask = pd.to_datetime(dataset.index.values).strftime( "%Y-%m-%d") == today.strftime("%Y-%m-%d") mask_intraday = pd.to_datetime(dataset_intraday.index.values).strftime( "%Y-%m-%d") == today.strftime("%Y-%m-%d") if (not mask.any() and mask_intraday.any()): append_values = np.mean(dataset.values[-7:], axis=0) dataset_tmp = dataset_intraday[mask_intraday] append_values[0] = np.mean(dataset_tmp.values[-60:], axis=0)[0] data_tmp = pd.DataFrame([append_values], index=[today], columns=data_columns) dataset = pd.concat([dataset, data_tmp]) return dataset
def update_company(self, company): if company in self.data.keys(): cur_date = self.data[company].index.max().date() diff = (self.today - cur_date).days if diff > 0 and diff < 3 and datetime.date.today().weekday() >= 5: return True elif diff > 0: append_data = yf.download([company], start=cur_date, end=self.today) self.data[company].append(append_data) else: self.data[company] = yf.download([company], start=self.start_date, end=self.today) self.save_data() return True
def main(): #the sample i am using are NVDA and AMD from 2012 to 2015 stdate = '2013-01-01' eddate = '2014-12-31' ticker1 = 'NVDA' ticker2 = 'AMD' df1 = yf.download(ticker1, start=stdate, end=eddate) df2 = yf.download(ticker2, start=stdate, end=eddate) signals = signal_generation(df1, df2, cointegration) plot(signals, ticker1, ticker2)
def download_data(tickers, startDate, endDate): ''' This functions takes the list of tickers and downloads data from Yahoo Finance from startDate to endDate ''' quotes = yf.download(tickers, startDate, endDate) print(tickers) return quotes
def get_data(symbol, start_date, end_date): ticker = str(symbol)+'.SA' start = pd.to_datetime(start_date) end = pd.to_datetime(end_date) df = yf.download(ticker, start=start, end=end) return df
def main(): #awesome oscillator uses 5 lags as short ma #34 lags as long ma #for the consistent comparison #i apply the same to macd oscillator ma1 = 5 ma2 = 34 #downloading stdate = input('start date in format yyyy-mm-dd:') eddate = input('end date in format yyyy-mm-dd:') ticker = input('ticker:') df = yf.download(ticker, start=stdate, end=eddate) #slicing the downloaded dataset #if the dataset is too large #backtesting plot would look messy slicer = int(input('slicing:')) signals = signal_generation(df, ewmacd, ma1, ma2) sig = awesome_signal_generation(signals, awesome_ma) new = sig[slicer:] plot(new, ticker) portfo = portfolio(sig) profit(portfo) stats(portfo)
def get_log_ret_data_from_yahoo(self, symbols, sd, ed, save=False): if symbols is None: raise ValueError( 'The attribute "symbols" has to be provided' ) if isinstance(symbols, str): symbols = [symbols] Validator.validate_attribute(sd, str, True) Validator.validate_attribute(ed, str, True) get_px = lambda x: yf.download( tickers=x, start=sd, end=ed )['Adj Close'] # raw adjusted close prices data = pd.DataFrame({sym:get_px(sym) for sym in symbols}) # log returns lrets = np.log(data/data.shift(1)).dropna() lrets.rename( columns={'SPY': 'LSPY', 'TLT': 'LTLT', 'MSFT': 'LMSFT'}, inplace=True ) data = pd.concat([data, lrets], axis=1).dropna() if save: data.to_csv( 'logged_fd.csv', index=True, encoding='utf-8' ) return data
def download_yf_data(self, symbol, start_date=None, end_date=None): download_path = 'E:\Projects\yahoo-data-download\Data' # type: assert isinstance(str, object) if self.start_date is None: self.start_date = date.today().strftime('%Y-%m-%d') if self.end_date is None: self.end_date = date.today().strftime('%Y-%m-%d') try: price = data.download(symbol, start=start_date, end=end_date) price['SymbolId'] = self.get_id(str(symbol)) price['UpdateDate'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') price['AdjClose'] = price['Adj Close'] price['Open_'] = price['Open'] price['Close_'] = price['Close'] price['PriceDate'] = price.index price.reset_index(level=0, inplace=True) to_df = price[[ 'PriceDate', 'Open_', 'Low', 'High', 'Close_', 'AdjClose', 'Volume', 'UpdateDate', 'SymbolId' ]] to_df.to_csv(os.path.join(download_path, symbol + '.csv')) print( "Data downloaded successfully for symbol: {} ".format(symbol)) except Exception as e: print( "Data not available for download for symbol: {0}, and error: {1} " .format(symbol, e))
def retrieve_data(ticker, start_date, end_date): data = yf.download(ticker, start_date, end_date) return data #print(data) #retrieve_data("AAPL", "2012-01-01", "2019-01-01")
def DownloadData(symbols, start_date, end_date=0, dropna=1, source='yahoo', metric='Adj Close'): """ Returns DataFrame with selected metric for given tickers and dates. ___PARAMETERS Symbols: List of tickers in YahooFinance format (TICK.EX) Start_date: YYYY/MM/DD format; YahooF will select earliest possible dates in cases when start<data end_date=0: TODAY is used; can be specified YYYY/MM/DD dropna=1: Drops data for other tickers if one ticker has n/a. source: Select source (see web_datareader) metric: Select metric (see web_datareader) ___OUTPUT data: Pandas DataFrame with selected metric. MxN. M-dates, N-tickers. """ import datetime import fix_yahoo_finance as yf yf.pdr_override() print("\nSelected portfolio:", symbols) print("Starting download protocol...") data = pd.DataFrame() if end_date == 0: end_date = datetime.datetime.today().strftime('%Y-%m-%d') data = yf.download(symbols, data_source=source, start=start_date, end=end_date, retry_count=15)[metric] if dropna == 1: data = data.dropna() return (data)
def makeWebhookResult(req): if req.get("result").get("action") != "price.check": return {} result = req.get("result") parameters = result.get("parameters") stock = parameters.get("price_check") cost = { 'chevron': 'cvx', 'Exxon': 'XOM', 'BP': 'BP', 'TOTAL': 'TOT', 'oil': 'CLF18.NYM' } price = yf.download(cost[stock]) speech = "The price of " + stock + " is " + str(round( price['Close'][-1])) + " dollar." print("Response:") print(speech) return { "speech": speech, "displayText": speech, #"data": {}, # "contextOut": [], "source": "Oili" }
def yahoo_finance_download(self): merges = pd.DataFrame() for ticker in self.ticker_list: prices = yf.download(ticker, start=datetime(2017,1,1), end=date.today()) if len(prices): merges[ticker] = prices['Adj Close'] self.rets = np.log(merges / merges.shift(1))
def get_market_indices(self, data): """End-of-day Data Downloads This function downloads the end-of-day data last 25 years the major global stock market indices from Yahoo Finance""" #The line of code below retreive the date of 25 years ago backdate = 365 * 25 self.startdate = (datetime.now() - timedelta(backdate)).date() #The line of code below retrieve the date of today. self.enddate = datetime.now().date() #Creating a pandas DataFrame to hold the downloaded data market_indices = pd.DataFrame() for ftr in range(0, len(data)): #Actual downloads is done on this line mdata = yf.download(data['CODE'][ftr], start=self.startdate, end=self.enddate) #This line append the 'Adj Close' column of the downloaded # data to the DataFrame declared market_indices[data['INDEX'][ftr]] = mdata['Adj Close'] #The pandas DataFrame is returned return market_indices
def main(): #initializing #stop loss positions, the maximum long positions we can get #without certain constraints, you will long indefinites times as long as the market condition triggers the signal #in a whipsaw condition, it is suicidal stls=3 ticker='NVDA' stdate='2015-04-01' eddate='2018-02-15' #slice is used for plotting #a three year dataset with 750 variables would be too much for a figure slicer=700 #downloading data df=yf.download(ticker,start=stdate,end=eddate) df1=signal_generation(df,heikin_ashi) new=df1[slicer:] plot(new,ticker) portfo=portfolio(new) profit(portfo) stats(portfo,df1,stdate,eddate)
def get_yahoo_data(symbol_list, start_str, end_str): """ Documentation: start/end_str is of the format of, e.g. "2017-09-15" """ import_ = yf.download(symbol_list, start=start_str, end=end_str) df = import_.to_frame().unstack() return df
def main(): #input the long moving average and short moving average period #for the classic MACD, it is 12 and 26 #once a upon a time you got six trading days in a week #so it is two week moving average versus one month moving average #for now, the ideal choice would be 10 and 21 global ma1,ma2,stdate,eddate,ticker,slicer #macd is easy and effective #there is just one issue #entry signal is always late #watch out for downward EMA spirals! ma1=int(input('ma1:')) ma2=int(input('ma2:')) stdate=input('start date in format yyyy-mm-dd:') eddate=input('end date in format yyyy-mm-dd:') ticker=input('ticker:') #slicing the downloaded dataset #if the dataset is too large, backtesting plot would look messy #you get too many markers cluster together slicer=int(input('slicing:')) #downloading data df=yf.download(ticker,start=stdate,end=eddate) new=signal_generation(df,macd) new=new[slicer:] plot(new, ticker)
def stock_linear_regression(): s = start_date() e = end_date() sym = input_symbol() df = yf.download(sym, s, e) n = len(df.index) X = np.array(df['Open']).reshape(n,-1) Y = np.array(df['Adj Close']).reshape(n,-1) lr = LinearRegression() lr.fit(X, Y) lr.predict(X) plt.figure(figsize=(12,8)) plt.scatter(df['Adj Close'], lr.predict(X)) plt.plot(X, lr.predict(X), color = 'red') plt.xlabel('Prices') plt.ylabel('Predicted Prices') plt.grid() plt.title(sym + ' Prices vs Predicted Prices') plt.show() print('Summary:') print('Estimate intercept coefficient:', lr.intercept_) print('Number of coefficients:', len(lr.coef_)) print('Accuracy Score:', lr.score(X, Y)) return
def stock_svr(): s = start_date() e = end_date() sym = input_symbol() df = yf.download(sym, s, e) dates = np.reshape(df.index,(len(df.index), 1)) # convert to 1xn dimension x = 31 x = np.reshape(x,(len(x), 1)) prices = df['Adj Close'] svr_lin = SVR(kernel='linear', C=1e3) svr_poly = SVR(kernel='poly', C=1e3, degree=2) svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) # Fit regression model svr_lin .fit(dates, prices) svr_poly.fit(dates, prices) svr_rbf.fit(dates, prices) plt.figure(figsize=(12,8)) plt.scatter(dates, prices, c='k', label='Data') plt.plot(dates, svr_lin.predict(dates), c='g', label='Linear model') plt.plot(dates, svr_rbf.predict(dates), c='r', label='RBF model') plt.plot(dates, svr_poly.predict(dates), c='b', label='Polynomial model') plt.xlabel('Date') plt.ylabel('Price') plt.title('Support Vector Regression') plt.legend() plt.show() print('Linear Model:', svr_rbf.predict(x)[0]) print('RBF Model:', svr_lin.predict(x)[0]) print('Polynomial Model:', svr_poly.predict(x)[0]) return
def getData(data_name): print("Cargando datos...") path_data = '../data/' + data_name + '.csv' df = None # Check if data exists # If not exists then data is downloaded and save in folder data if os.path.exists(path_data): print('Datos existentes en ../data.') df = pd.read_csv(path_data, index_col="Date", parse_dates=True) print(path_data + ' cargado con éxito.') else: print('Datos no existentes en ../data.') print('Descargando datos..') from_date = '2000-01-01' today = datetime.datetime.now() today = today.strftime('%Y-%m-%d') df = yf.download(data_name, from_date, today) df = df[['Open', 'High', 'Low', 'Close', 'Volume']] if not os.path.exists('../data'): os.makedirs('../data') df.to_csv(path_data) print('Datos ' + path_data + ' guardados.') return df
def chart(): symbols = ['CORN', 'UGA', 'NDAQ'] # defining stock value for each stock data = yf.download(symbols, '2018-01-01', '2018-01-31') # download data from yahoo finance api data.Close.plot() # plot the data plt.show() # show the data return render_template("my-form.html")
def pullTicker(ticker, f, t, isETF=False): df = yf.download(ticker, start=f, end=t, auto_adjust=True) print('[pullTickerYahoo]', ticker, f, t, ' size=', len(df.index)) last_close = None for i in df.index: Date = Helper.todate(i) Close = float(df['Close'][i]) High = float(df['High'][i]) Low = float(df['Low'][i]) Open = float(df['Open'][i]) Volume = int(df['Volume'][i]) if Volume < 1 or Close < 1: continue if isETF: doc, created = ETFQuote.objects.get_or_create(ticker_id=ticker, date=Date) else: doc, created = Quote.objects.get_or_create(ticker_id=ticker, date=Date) doc.open = Open doc.high = High doc.low = Low doc.close = Close doc.adjclose = Close doc.volume = Volume if last_close is not None: change = ((Close / last_close) - 1) * 100 doc.change = change # as % doc.save() last_close = Close return len(df.index)
def getSingleStock(symbol, from_date, till_date): repeat_times = 1 message = "" df = pd.DataFrame() if len(symbol) == 0: return df, message for _ in range(repeat_times): try: data = yf.download(symbol, start=from_date, end=till_date, interval='1wk') #data = pdr.get_data_yahoo(symbol, start=from_date, end=till_date, interval='d') data = data.rename( columns={ 'Date': 'date', 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', "Adj Close": 'adj_close', 'Volume': 'volume' }) data.index.name = 'date' data.sort_index() return data, "" except Exception as e: message = symbol + " fetch exception: " + str(e) continue return df, message
def get_data(stocks, source, metric="Close", start='2016-01-01', end=None): """ Get price values from source :param stocks: list :param metrics: str., optional :param source: str., optional :param start: str., optional :param end: str., optional :return: pandas.DataFrame """ if source.lower() == "yahoo": stocks = [item + ".AX" for item in stocks] else: msg = "The source needs to be yahoo." raise ValueError(msg) print("Loading data from {}.".format(source)) if not end: end = str(datetime.now().date()) downloaded_data = yf.download(stocks, start=start, end=end)[metric] columns = pd.MultiIndex.from_product([stocks, [metric]]) data = pd.DataFrame(columns=columns, index=downloaded_data.index) for share in downloaded_data.columns: data.loc[:, (share, metric)] = downloaded_data.loc[:, share].values data.dropna(how="all", inplace=True) data.sort_index(ascending=True, inplace=True) return data
def download_currency_quotes(self, startAt, currency = 'ILS'): endAt = datetime.date.today() delta = datetime.timedelta(days = 100) while endAt > startAt: startDate = endAt - delta if startDate < startAt: startDate = startAt pandas_data = yf.download(currency + "=X", start = startDate, end = endAt) quotes = pandas_data['Close'] dates = quotes.keys() for i in xrange(len(quotes)): quote_date = dates[i].date() quote_value = quotes[i] if not self.Currencies.has_key(quote_date): self.Currencies[quote_date] = {} if not self.Currencies[quote_date].has_key('USD'): self.Currencies[quote_date]['USD'] = {} if not self.Currencies[quote_date].has_key(currency): self.Currencies[quote_date][currency] = {} self.Currencies[quote_date]['USD'][currency] = quote_value self.Currencies[quote_date][currency]['USD'] = 1 / quote_value endAt -= delta print 'downloaded'
def main(): #download data via fix yahoo finance library stdate = ('2016-01-01') eddate = ('2018-01-01') ticker = ('EA') #slice is used for plotting #a two year dataset with 500 variables would be too much for a figure slicer = 450 df = yf.download(ticker, start=stdate, end=eddate) #delete adj close and volume #as we dont need them del df['Adj Close'] del df['Volume'] #no need to iterate over timestamp index df.reset_index(inplace=True) new = signal_generation(df, parabolic_sar) #convert back to time series for plotting #so that we get a date x axis new.set_index(new['date'], inplace=True) #shorten our plotting horizon and plot new = new[slicer:] plot(new, ticker)
def download_data_4_symbols(ticker): try: data = fy.download(ticker, yday.strftime(dformat), today.strftime(dformat)) return data except Exception as e: pp.pprint(e)
def get_risk_free_rate(start_date: str, end_date: str) -> List: data_rf = yf.download("^IRX", start=start_date, end=end_date) adj_close_rf = data_rf['Adj Close'].values / 100 returns_rf = (adj_close_rf / 252)[0:] print('Successfully retrieved risk-free rate from' + start_date + ' to ' + end_date + ': ' + str(len(returns_rf)) + ' trading days') return returns_rf
def Get_Yahoo_Finance_data(Ticker, Start=None): if Start == None: Start = '1980-01-01' data = yf.download(Ticker, start=Start, progress=False) return data
def get_yahoo_data(symbols, start_date, end_date): """Read stock data (adjusted close) for given symbols from CSV files.""" dates = pd.date_range(start_date, end_date) df = pd.DataFrame(index=dates) includes_spy = True if 'SPY' in symbols else False if not includes_spy: # add SPY for reference, if absent symbols.insert(0, 'SPY') for symbol in symbols: print('getting data..' + symbol) df_yahoo = yf.download(symbol, start_date, end_date) df_yahoo = df_yahoo.rename(columns={'Adj Close': symbol}) df_yahoo = df_yahoo[symbol] df = df.join(df_yahoo) if symbol == 'SPY': # drop dates SPY did not trade df = df.dropna(subset=["SPY"]) if not includes_spy: symbols.remove('SPY') return df[symbols]
def collect_data(source, tick='SPY', start='1980-01-01', end='2019-01-01'): if source == "yahoo": import fix_yahoo_finance as yf data = yf.download(tick, start, end) # dataBench = yf.download('SPY','2013-01-01','2018-01-01') # series = data['Adj Close'] - dataBench['Adj Close'] if source == 'csv': #series = Series.from_csv('XOM.csv', header=0)# if header=0, skip 0 row data = pd.read_csv('GSPC.csv', index_col="Date", parse_dates=True) data = data.loc[start:end, :] if source == 'mongodb': from KafkaProcess import MongoDBConnect mongo = MongoDBConnect(IPAddress="192.168.110.116", Port=27017, dbName="chart", Collection="Day") stock_high, stock_low, stock_close, stock_open, stock_volume, stock_time = mongo.mongodb_connect( tick, start.split('-'), end.split('-')) data = pd.DataFrame( { 'High': stock_high, 'Low': stock_low, 'Open': stock_open, 'Close': stock_close, 'Volume': stock_volume }, index=stock_time) #data = pd.Series( data ) return data
def yhGetHistory(myTicker, start, stop): """ Helper function used by getMeData function myTicker: string ticker symbol start: string, format = "YYYY-M-D" stop: string, format = "YYYY-M-D" return: list of dictionaries """ histList = [] # myStock = Share(myTicker) # myRecord = myStock.get_historical(start, stop) data = yf.download(myTicker, start, stop) # for i in range(len(myRecord)): # tempDate = myRecord[i]["Date"] # tempOpen = float(myRecord[i]["Open"]) # tempHigh = float(myRecord[i]["High"]) # tempLow = float(myRecord[i]["Low"]) # tempClose = float(myRecord[i]["Close"]) # tempVolume = int(myRecord[i]["Volume"]) # tempAdjClose = float(myRecord[i]["Adj_Close"]) # # histList.append({"Date" : tempDate, "Open" : round(tempOpen, 2), "High" : round(tempHigh, 2), "Low" : round(tempLow, 2), # "Close" : round(tempClose, 2), "Volume" : tempVolume, "Adj Close" : round(tempAdjClose, 2)}) return data
def send_head(self): """Common code for GET and HEAD commands. This sends the response code and MIME headers. Return value is either a file object (which has to be copied to the outputfile by the caller unless the command was HEAD, and must be closed by the caller under all circumstances), or None, in which case the caller has nothing further to do. """ print self.path request_path = self.path[1:] ctype = 'application/octet-stream' f = StringIO() params = parse_qs(urlparse(request_path).query) stock = params['s'][0] month_begin = params['a'][0] day_begin = params['b'][0] year_begin = params['c'][0] month_end = params['d'][0] day_end = params['e'][0] year_end = params['f'][0] type = params['g'][0] yf.pdr_override() data_begin_str = year_begin + "-" + month_begin +"-" + day_begin date_end_str = year_end + "-" + month_end +"-" + day_end if(type == 'd'): action_to_request = None else: action_to_request = 'only' print stock, data_begin_str, date_end_str, action_to_request pandas_data = yf.download(stock, start = data_begin_str, end = date_end_str, actions = action_to_request) print pandas_data if('d' == type): data = pandas_data.to_csv(columns = ['Open','High','Low','Close','Volume','Adj Close']) else: data = pandas_data.to_csv(columns = ['action','value']) f.write(data) self.send_response(200) length = f.tell() f.seek(0) self.send_header("Content-type", ctype) self.send_header("Content-Length", str(length)) self.end_headers() return f
def get_fixstock_quote(symbol,diff_time,current_time): data = yf.download("%s"%symbol,start=diff_time,end=current_time) price_fl = None if not data.empty: price_fl=float(str("%5.3f"%data['Close'].values[-1])) return price_fl