def assert_stock_prices(): '''Make sure that all market dates after the first data date for any stock have data for that stock. The scraper often leaves holes in the data. If a hole is found, look for manually downloaded data in the old Yahoo database and in ./manualdata/<ticker>.csv. If doesn't exist, fill in data as Yahoo does and print an error.''' stocks = Database.get_stocks() last_assertion_day = StockData.createSDate("2017-07-25") market_dates = Database.get_market_dates() for stock in stocks: for date in market_dates: if last_assertion_day >= date: continue if date.day_number >= stock.first_data_date.day_number: if Database.get_dailydata(stock, date=date) is None: dd = Scraper.scrape_dailydata(stock, date, date) # try scraper again if dd is not None: if len(dd) != 0: dd = dd[0] # take it out of array else: dd = None # no data found if dd is None: # try old Yahoo database dd = Database.get_Yahoo_dailydata(stock, date) if dd is None: # try manual csv's dd = Scraper.get_manual_dailydata(stock, date) if dd is None: # nothing left to try, throw error # add the previous day's close to all values and volume to 0. This is what Yahoo does. prev = Database.get_dailydata(stock, date.getPrevious()) dd = StockData.SDailyData(stock, date, prev.close, prev.close, prev.close, prev.close, 0) Log.log_error( "No data found for {} on {}. Added pseudo values copied from previous day. Check manually to make sure daily data doesn't exist." .format(stock, date)) Database.add_dailydata(dd)
def get_ticker_info(ticker): '''Returns a Stock object for the provided ticker. May either be a stock or index fund.''' if not isinstance(ticker, str): raise TypeError("'ticker' must be of time 'str'") if __contains(__STOCKS_TABLE_NAME, "ticker='{}'".format(ticker)): res = __select_all(__STOCKS_TABLE_NAME, "ticker='{}'".format(ticker))[0] stock = StockData.Stock(ticker, company=res[1], indices=res[2], on_watchlist=res[3], in_portfolio=res[4], first_data_date=res[5], last_update=res[6]) elif __contains(__INDEXFUNDS_TABLE_NAME, "ticker='{}'".format(ticker)): res = __select_all(__INDEXFUNDS_TABLE_NAME, "ticker='{}'".format(ticker))[0] stock = StockData.Stock(ticker, on_watchlist=res[1], in_portfolio=res[2], first_data_date=StockData.createSDate(res[3]), last_update=StockData.createSDate(res[4])) else: raise Exception( "{} not found in IndexFunds or Stocks tables".format(ticker)) return stock
def __format_stock_res(res): return StockData.Stock(res[0], company=res[1], indices=res[2], on_watchlist=res[3], in_portfolio=res[4], first_data_date=StockData.createSDate(res[5]), last_update=StockData.createSDate(res[6]))
def __av_scrape_dailydata(stock, start_date, end_date): time.sleep(2) # to slow down our queries to the requested rate # Build URL function = "TIME_SERIES_DAILY" symbol = stock.ticker outputsize = "compact" if ( start_date.getDayNumber() - end_date.getDayNumber() < 100) else "full" datatype = "csv" apikey = "KJJTVJT1GLZNVZ5I" url = "https://www.alphavantage.co/query?function={}&symbol={}&outputsize={}&datatype={}&apikey={}".format( function, symbol, outputsize, datatype, apikey) if __PRINT_URLS: print "URL: ", url # Get csv from URL ''' page = requests.get(url) if "Response [4" in str(page): # failed csvpath = __manual_csv_path(stock) if csvpath is None: Log.log_error("Error scraping Alpha Vantage for {} {} to {}, please add manual .csv for the stock.".format(stock,start_date,end_date), shutdown=True) f = open(csvpath, "r") page = f.readlines() f.close() print "PAGE.TEXT ", page.text prices = csv.reader(page.text.splitlines()) ''' response = urllib.urlopen(url) page = response.read() prices = list(csv.reader(page.splitlines(), delimiter=",")) if not prices or "Error Message" in page: # failed csvpath = __manual_csv_path(stock) if csvpath is None: Log.log_error( "Error scraping Alpha Vantage for {} {} to {}, please add manual .csv for the stock." .format(stock, start_date, end_date), shutdown=False) return [] f = open(csvpath, "r") page = f.readlines() f.close() # Parse into dailydata list skipfirst = True dailydata = [] for p in prices: if skipfirst: skipfirst = False continue dailydata.append( StockData.SDailyData(stock, StockData.createSDate(p[0]), float(p[1]), float(p[2]), float(p[3]), float(p[4]), int(p[5]))) dailydata.reverse() # data comes descending by date return dailydata
def get_manual_dailydata(stock, date): '''Manually download csv's from Yahoo Finance. Returns one SDailyData.''' file_path = __manual_csv_path(stock) if file_path is None: return None with open(file_path, "r") as csvfile: dailydata = csv.reader(csvfile.readlines()) for dd in dailydata: if str(dd[0]) == str(date): return StockData.SDailyData(stock, StockData.createSDate(str(dd[0])), float(dd[1]), float(dd[2]), float(dd[3]), float(dd[4]), int(dd[6])) return None
def exchangeForStockList(stockList, startDate=None, endDate=None): if len(stockList) > 0: dfLastRecord = pd.DataFrame() dataPath = sd.getDataFilePath() filePath = dataPath + 'last_exchange_record.csv' for code in stockList: cyclicalStockExchangeStrategy = CyclicalStockExchangeStrategy( code, startDate=startDate, endDate=endDate) stockExchangeStrategy = Context(cyclicalStockExchangeStrategy) stockExchangeStrategy.doExchange() lastIndex = len(stockExchangeStrategy.strategy.exchangeDf) - 1 if lastIndex >= 0: dfLastRecord = pd.concat([ dfLastRecord, stockExchangeStrategy.strategy.exchangeDf.loc[[lastIndex]] ]) dfLastRecord.reset_index(drop=True, inplace=True) dfLastRecord.to_csv(filePath, index=0, float_format=dp.FLOAT_FORMAT2, encoding=sd.UTF_8) print( '######################### get the last exchange record has been done!#########################' )
def get_first_market_date(): '''Returns the first SDate for which the market was open''' res = __single( __select('date', __MARKETDATES_TABLE_NAME, restrictions='ORDER BY day_number ASC LIMIT 1')) return StockData.createSDate(res)
def __run_ticker_list(self, ticker_list, title): self.__header(title) for ticker in ticker_list: rsi = get_RSI(StockData.Stock(ticker, ""), Database.get_last_market_date()) if rsi is None: continue self.__check_rsi(ticker, rsi)
def scrape_misc(): '''Scrapes a list of Stocks list as 'misc'.''' stocks = [] for stock_info in MiscInfo.misc_stocks: ticker = stock_info[0] company_name = stock_info[1] stocks.append(StockData.Stock(ticker, company_name)) return stocks
def get_market_dates(): '''Returns a list of all market dates ordered by date ascending.''' dates = [] res = __select('date', __MARKETDATES_TABLE_NAME, restrictions='ORDER BY day_number ASC') for entry in res: dates.append(StockData.createSDate(entry[0])) return dates
def __get_rsi_iterative(stock, date): first_data_date = stock.first_data_date if first_data_date is None or str( first_data_date) is "000-00-00": # no data available return NULL_VALUE first_day = first_data_date.day_number for day_num in range(first_day, date.day_number): # fill in previous RSI values __get_rsi_recursive(stock, StockData.createSDate(day_num)) return __get_rsi_recursive(stock, date)
def allMoneyScript(launchSpreadsheet=False): accountsCopiedFromMint = Utilities.getClipboard() #password = getpass.getpass(prompt="Enter Mint password: "******"*****@*****.**", password) accountsFromMint = Accounts() #accountsFromMint.getAccountsFromMintAccounts(mintConnection.getAccounts()) try: accountsFromMint.getAccountsFromMintCopy(accountsCopiedFromMint) except ImproperlyFormattedMintData as err: print("Error: " + err.message) print("Quitting.") return print(accountsFromMint) allMoneySpreadsheet = AllMoneySpreadsheet() print("Connecting to spreadsheet...") allMoneySpreadsheet.connect() print("Preparing spreadsheet for new data...") rowNum = allMoneySpreadsheet.addNewRowForData() if (launchSpreadsheet): webbrowser.open(allMoneySpreadsheet.getSpreadsheetUrl()) mintAccountsNameMap = MintAccountsNameMap(allMoneySpreadsheet) print("Putting mint data into spreadsheet...") allMoneySpreadsheet.setAccountsData(accountsFromMint, mintAccountsNameMap, rowNum) print("Getting the DOW and S&P500...") try: dowValue = StockData.getStockPrice("DJI") allMoneySpreadsheet.setDow(rowNum, dowValue) spValue = StockData.getStockPrice("SPX") allMoneySpreadsheet.setSpIndex(rowNum, spValue) except Exception as err: print("Error getting stock values: " + str(err)) print("Done.") return allMoneySpreadsheet
def update_indexfund_prices(): '''Update the daily data for all index funds on all market dates. If an index fund is not listed in the database, it will be added.''' funds = [] for ticker in MiscInfo.INDEXFUNDS: Database.add_indexfund(StockData.Stock(ticker, "")) fund = Database.get_ticker_info(ticker) Database.add_indexfund(fund) funds.append(fund) __update_prices([fund])
def __google_format_date(date_in): spl = date_in.split("-") date = spl[0] month = google_date_dict[spl[1]] year = spl[2] if int(year) < 50: year = "20{}".format(year) else: year = "19{}".format(year) strDate = "{}-{}-{}".format(year, month, date) return StockData.createSDate(strDate)
def scrape_NYSE(): '''Scrapes a list of Stocks in the NYSE.''' page = requests.get( 'http://www.nasdaq.com/screening/companies-by-industry.aspx?exchange=NYSE&render=download' ) reader = csv.reader(page.text.splitlines()) stocks = [] for entry in reader: ticker = entry[0] company_name = entry[1] stocks.append(StockData.Stock(ticker, company_name))
def generateMoreDataForAllPeriod(stockCode=None): if(stockCode is not None): for period in PERIOD_LIST_ALL: stockData = sd.StockData(stockCode) stockObj = DataProcess(stockCode,stockData.getStockName(),period) stockObj.makeGenData() stockObj.saveAsGeneratedData() print("[Function:%s line:%s] Message: generate data for stock:%s of period:%s has been done!" % (generateMoreDataForAllPeriod.__name__, sys._getframe().f_lineno, stockCode, period)) else: print("[Function:%s line:%s] Error: Parameters should not be empty!" % (generateMoreDataForAllPeriod.__name__, sys._getframe().f_lineno)) sys.exit()
def __get_market_dates(stock, start_date, end_date): threshhold_date = StockData.createSDate("2016-01-01") # use manual data for older dates, and go online to scrape newer dates if threshhold_date.day_number < start_date.day_number and threshhold_date.day_number < end_date.day_number: return __online_market_dates(stock, start_date, end_date) elif threshhold_date.day_number > start_date.day_number and threshhold_date.day_number > end_date.day_number: return __manual_market_dates(stock, start_date, end_date) else: # straddles threshhold dates = [] dates.extend(__manual_market_dates(stock, start_date, threshhold_date)) dates.extend(__online_market_dates(stock, threshhold_date, end_date)) return dates
def scrape_market_dates(start_date=StockData.createSDate( MiscInfo.FIRST_MARKET_DATE)): '''Scrapes the SDates for which the stock market was open using several old reference stocks. Returns list in ascending order.''' today = StockData.createSDate(time.strftime("%Y-%m-%d")) # populate dates with first stock dates = __get_market_dates( StockData.Stock(MiscInfo.MARKET_DATE_REFERENCE_STOCKS[0], ""), start_date, today) # assert that other stocks' dates agree for ticker in MiscInfo.MARKET_DATE_REFERENCE_STOCKS[1:]: stock = StockData.Stock(ticker, "") #days = __manual_market_dates(stock,start_date,today) days = __online_market_dates(stock, start_date, today) for day in days: if day not in dates: Log.log_error( "market date {} not in agreement between reference stocks {}" .format(day.date, MiscInfo.MARKET_DATE_REFERENCE_STOCKS), shutdown=True) return dates
def main(): tickers = ["^DJI", "^GSPC", "AAPL", "MSFT"] T = StockData.createImages(tickers) createDoc(tickers, T) files = ["DailyPrice.png", "DailyVolume.png", "Final.png"] cleanUpFiles(tickers, files) return
def scrape_DJI(): '''Scrapes a list of Stocks in the DJI.''' page = requests.get( 'https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average') soup = BeautifulSoup(page.text, "lxml") table = soup.find_all('table')[1] stocks = [] for row in table.find_all('tr')[1:]: cols = row.find_all('td') ticker = cols[2].a.text company_name = cols[0].a.text stocks.append(StockData.Stock(ticker, company_name)) return stocks
def scrape_SP500(): '''Scrapes a list of Stocks in the S&P500.''' page = requests.get( 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies') soup = BeautifulSoup(page.text, "lxml") table = soup.find_all('table')[0] stocks = [] for row in table.find_all('tr')[1:]: cols = row.find_all('td') ticker = cols[0].a.text company_name = cols[1].a.text stocks.append(StockData.Stock(ticker, company_name)) return stocks
def get_dailydata(stock, date=None): '''Returns a list of SDailyData for the provided Stock, ordered by date ascending. If a date is provided, then only one SDailyData object is returned.''' if not isinstance(stock, StockData.Stock): raise TypeError("'stock' must be of time Stock") if date != None and not isinstance(date, StockData.SDate): raise TypeError("'date' must be of type SDate") where = "ticker='{}' ".format(stock.ticker) if date is not None: where += "AND date='{}'".format(date) res = __select_all(__DAILYDATA_TABLE_NAME, where=where, restrictions="ORDER BY date ASC") if res is None: return None dd = [] for entry in res: dd.append( StockData.SDailyData(get_stock(entry[0]), StockData.createSDate(entry[1]), entry[2], entry[3], entry[4], entry[5], entry[6])) if date != None: return dd[0] return dd
def get_Yahoo_dailydata(stock, date): '''Returns an SDailyData object for the provided Stock and date from the old Yahoo database.''' if not isinstance(stock, StockData.Stock): raise TypeError("'stock' must be of time Stock") if date != None and not isinstance(date, StockData.SDate): raise TypeError("'date' must be of type SDate") where = "ticker='{}' ".format(stock.ticker) if date is not None: where += "AND date='{}'".format(date) res = yahoo_database.query( "SELECT * FROM DailyData WHERE {} ORDER BY date ASC".format(where)) if res is None: return None dd = [] for entry in res: dd.append( StockData.SDailyData(get_stock(entry[0]), StockData.createSDate(entry[1]), entry[2], entry[3], entry[4], entry[5], entry[6])) if len(dd) == 0: return None if date != None: return dd[0] return dd
def rsi_get_defaults(stock): '''Return a list of all SDates who RSI value is the default value for the provided stock, sorted ascending.''' if not isinstance(stock, StockData.Stock): raise TypeError("'stock' must be of time Stock") res = __select("date", __RSI_TABLE_NAME, where="RSI='{}' AND ticker='{}'".format( RSI_DEFAULT_VALUE, stock.ticker), restrictions="ORDER BY date ASC") if res is None: return res dates = [] for date in res: dates.append(StockData.createSDate(date[0])) return dates
def get_RSIs(stock, start_date, end_date): '''Returns a set of RSI values for the provided stock and date range, inclusive.''' if not isinstance(stock, StockData.Stock): raise TypeError("'stock' must be of type Stock") if not isinstance(start_date, StockData.SDate): raise TypeError("'start_date' must be of type SDate") if not isinstance(end_date, StockData.SDate): raise TypeError("'end_date' must be of type SDate") start_day = start_date.day_number end_day = end_date.day_number rsis = [] for day in range(start_day, end_day + 1): date = StockData.createSDate(day) rsis.append(get_RSI(stock, date)) return rsis
def getCurrentTradeReportForAllPeriod(stockCode=None, dfData=None): if(stockCode is not None and dfData is not None): for period in PERIOD_LIST_ALL: length = len(dfData) if(length): stock = sd.StockData(stockCode) dfMerged = stock.mergeData(dfData,period) dfMerged = dfMerged.reset_index(drop = True) pStock = DataProcess(stockCode,stock.getStockName(),period) pStock.setDfData(dfMerged) pStock.addNormalIndicator() pStock.generateExchangeSignal(PERIOD_LIST_DEV,lastNPeriods=1,updateReportForCurrentTradeData=True) print("[Function:%s line:%s] Message: update generated data for stock:%s of Period:%s has been done!" % (getCurrentTradeReportForAllPeriod.__name__, sys._getframe().f_lineno, stockCode, period)) else: print("[Function:%s line:%s] Error: Parameters should not be empty!" % (getCurrentTradeReportForAllPeriod.__name__, sys._getframe().f_lineno)) sys.exit()
def __init__(self, stock_list, start, end): self.data_pool = {} self.data_pool_normalized = {} self.data_set = [] self.label_set = [] self.sorted_data = [] self.hist_length = 20 self.future_length = 3 self.n_feature = 5 self.n_class = 7 for symbol in stock_list: self.data_pool[symbol] = StockData.StockData(symbol, start, end) self.normalize_data() datas, labels = self.ConstructTrainingSet( self.data_pool_normalized[symbol].data) self.data_set.append(datas) self.label_set.append(labels)
def __init__(self, file_list=DEFAULT_FILE_LIST, rep=DEFAULT_REP): """ blabla """ dataset = list() #creating the dataset of Data objects for fi in file_list: dataset.append(StockData(fi, rep)) for i, _ in enumerate(dataset): dataset[i].upgrade() self.object_list = dataset self.size = len(self.object_list) self.add_mean()
def __manual_market_dates(stock, start_date, end_date): csvpath = __manual_csv_path(stock) if csvpath is None: Log.log_error( "Error scraping manually for {} {} to {}, please add manual .csv for the stock." .format(stock, start_date, end_date), shutdown=True) f = open(csvpath, "r") page = f.readlines() dates = [] for line in page[1:]: p = line.split(",") date = StockData.createSDate(p[0]) #if time.strptime(date,"%Y-%m-%d") <= time.strptime(end_date,"%Y-%m-%d") and time.strptime(date,"%Y-%m-%d") >= time.strptime(start_date,"%Y-%m-%d"): if date <= end_date and date >= start_date: dates.append(date) # assuming YYYY-MM-DD format f.close() return dates
def updateGeneratedDataForAllPeriod(stockCode=None): if(stockCode is not None): for period in PERIOD_LIST_ALL: stock = sd.StockData(stockCode) stock.updateKData(period) length = stock.getDataLenUpdated() if(length > 0): pStock = DataProcess(stockCode,stock.getStockName(),period) pStock.readData() pStock.addNormalIndicator() pStock.saveAsGeneratedData() pStock.generateExchangeSignal(PERIOD_LIST_DEV,lastNPeriods=length,Update=True,updateReportForLatestData=True) print("[Function:%s line:%s] Message: update generated data for stock:%s of Period:%s has been done!" % (updateGeneratedDataForAllPeriod.__name__, sys._getframe().f_lineno, stockCode, period)) else: print("[Function:%s line:%s] Message: No updated data for stock:%s of Period:%s!" % (updateGeneratedDataForAllPeriod.__name__, sys._getframe().f_lineno, stockCode, period)) else: print("[Function:%s line:%s] Error: Parameters should not be empty!" % (updateGeneratedDataForAllPeriod.__name__, sys._getframe().f_lineno)) sys.exit()