def collect_data(ticker): url = "https://finance.yahoo.com/quote/" + ticker try: retry_times = [5, 10, 20, 30, 40, 50, 60] retries = 0 symbol_type = ticker_list[ticker].symbolTypeDisplay ticker = ticker.replace('*', '').replace('\\', '') if symbol_type not in os.listdir('Data'): os.mkdir(f'Data/{symbol_type}') if symbol_type in SKIP_LIST: with open(f"Data/{symbol_type}/{ticker}.pickle", 'wb') as handle: pickle.dump({}, handle, protocol=pickle.HIGHEST_PROTOCOL) print(f"{Fore.RED} Empty pickle saved for {ticker} due to it being a {symbol_type}.\n") if f"{ticker}.pickle" in os.listdir(f"Data/{symbol_type}"): print(f"{Fore.GREEN} {ticker} ({symbol_type}) already collected. \n") return None while not get_json("https://finance.yahoo.com/quote/AAPL"): print(f"{Fore.RED} Waiting {retry_times[retries]} seconds..") time.sleep(retry_times[retries]) if retry_times[retries] != 60: retries += 1 data = get_json(url) except Exception as error: print(f"{Fore.RED} Not able to find the data for {ticker} by checking the url {url} due to {error}. \n") return None try: with open(f"Data/{symbol_type}/{ticker}.pickle", 'wb') as handle: pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL) print(f"{Fore.BLUE} {ticker} downloaded and stored in {symbol_type}. \n") except Exception as error: print(f"{Fore.RED} Was not able to write to pickle due to: {error}. \n")
def get_core_selection_data(username, password): degiro = degiroapi.DeGiro() degiro.login(username, password) data_set = {} print("Prepare dictionary..") for symbol in tqdm(isin): try: searcher = degiro.search_products(symbol) data_set[f"{searcher[0]['symbol']}.{isin[symbol]}"] = {} data_set[f"{searcher[0]['symbol']}.{isin[symbol]}"][ 'name'] = searcher[0]['name'] data_set[f"{searcher[0]['symbol']}.{isin[symbol]}"][ 'ISIN'] = symbol except Exception as e: print(f"Error for {symbol} dictionary preparing: {e}") data_set_with_data = {} print("Collecting data..") for symbol in tqdm(data_set): try: data_set_with_data[symbol] = get_json( "https://finance.yahoo.com/quote/" + symbol) data_set_with_data[symbol]['ISIN'] = data_set[symbol]['ISIN'] except Exception as e: print(f"Error for {symbol} data collection: {e}") dump_pickle(data_set_with_data, 'core_selection_degiro.pickle') return data_set_with_data
def collectData(self, ticker): url = "https://finance.yahoo.com/quote/" + ticker data = get_json(url) fundPerformance = data['fundPerformance'] topHoldings = data['topHoldings'] defaultKeyStatistics = data['defaultKeyStatistics'] summaryDetail = data['summaryDetail'] self.tickerName = data['quoteType']['longName'] self.businessSummary = data['assetProfile']['longBusinessSummary'] sectorData = topHoldings['sectorWeightings'] self.sectorHoldings = {} for sector in sectorData: for key, value in sector.items(): self.sectorHoldings[key] = str(round(value * 100, 2)) + '%' companyData = topHoldings['holdings'] self.companyHoldings = {} for company in companyData: self.companyHoldings[company['holdingName']] = str( round(company['holdingPercent'] * 100, 2)) + '%' annualReturnsData = fundPerformance['annualTotalReturns'][ 'returns'][:6] self.annualReturns = {} for returns in annualReturnsData: if returns['annualValue'] == None: self.annualReturns[returns['year']] = "N/A" else: self.annualReturns[returns['year']] = str( round(returns['annualValue'] * 100, 2)) + '%' riskStatistics = fundPerformance['riskOverviewStatistics'][ 'riskStatistics'] self.riskData = {} for risk in riskStatistics: self.riskData[risk['year']] = risk self.imageURL = data['fundProfile']['styleBoxUrl'] self.keyCharacteristics = {} for option in defaultKeyStatisticsChoices: if option == 'fundInceptionDate': self.keyCharacteristics[option] = defaultKeyStatistics[option] self.keyCharacteristics[option] = datetime.fromtimestamp( self.keyCharacteristics[option]).strftime('%Y-%m-%d') else: self.keyCharacteristics[option] = defaultKeyStatistics[option] for option in defaultsummaryDetailChoices: self.keyCharacteristics[option] = summaryDetail[option]
def scrape_json_value(self, key, detail_key=None): """Returns the value of key value pair from Yahoo Finance scraping json. Default to summaryProfile""" try: scrape_url = 'https://finance.yahoo.com/quote' url = '%s/%s' % (scrape_url, self.holding) data = utils.get_json(url) return data[key][detail_key] except Exception: pass
def mocked_get_json(url, _=None): ''' Mocks the get_json function ''' if url not in url_map: return get_json(url) with open(data_path / url_map[url]) as json_file: data = json_file.read() return json.loads(data)
def fetch(self, max_minutes=99999999): print(f"start fetching info for {len(self.symbols)} symbols") start_time = datetime.now() info = YfDetail._new_dict() i = 0 def dict_walker(d: dict, key): if key in d: return d[key] for k, v in d.items(): if isinstance(v, dict): res = dict_walker(v, key) if res is not None: return res return None while len(self.symbols) > 0: symbol = self.symbols[0] url = f"https://finance.yahoo.com/quote/{symbol}" try: data = get_json(url) data["symbol"] = symbol for k, v in info.items(): v.append(dict_walker(data, k)) except Exception as e: print(f"Not able to find the data for {url}", e) i += 1 if i > 0 and (i % 10 == 0 or len(self.symbols) <= 1): print(url) df = pd.DataFrame(info) # we eventually need to start a new partition file after we reached 50MB self.datafile = get_next_partition_file(self.datafile, 50) csv_kwargs = dict(mode='a', header=False) if os.path.exists( self.datafile) else {} print(f"saving new info to {self.datafile}") df.set_index("symbol").to_csv(self.datafile, **csv_kwargs) info = YfDetail._new_dict() if (datetime.now() - start_time).seconds / 60 > max_minutes: raise TimeoutError(f"{max_minutes} reached") self.symbols.pop(0)
import Searcher as fd # Replace with FinanceDatabase if repo not cloned from yfinance.utils import get_json from yfinance import download import matplotlib.pyplot as plt import matplotlib.colors as mcolors import random airlines_us = fd.select_equities(country='United States', industry='Airlines') airlines_us_fundamentals = {} for symbol in airlines_us: airlines_us_fundamentals[symbol] = get_json( "https://finance.yahoo.com/quote/" + symbol) airlines_us_stock_data = download(list(airlines_us)) colors = list(mcolors.CSS4_COLORS.items()) for symbol in airlines_us_fundamentals: color = random.choice(colors)[1] quick_ratio = airlines_us_fundamentals[symbol]['financialData'][ 'quickRatio'] long_name = airlines_us_fundamentals[symbol]['quoteType']['longName'] if quick_ratio is None: continue plt.barh(long_name, quick_ratio, color=color) plt.tight_layout() plt.show()
def get_FUT_chain(symbol: str): url = '{}/{}'.format(_url, symbol) fc = utils.get_json(url)['futuresChain']['futures'] return fc
# Add to temporary list TemporaryList.append(ticker) # Save everything perodically if counter_for_saving == 100: counter_for_saving = 0 for ticker in TemporaryList: DoneList.append(ticker) TemporaryList = [] save_to_pickle() # Increase counter by 1 counter_for_saving += 1 try: data = get_json("https://finance.yahoo.com/quote/" + ticker) symbol_type = ticker_list[ticker].symbolTypeDisplay except Exception as e: print("Not able to find the data for {TICKER} by checking the url {URL} due to {ERROR}.".format( TICKER=ticker, URL="https://finance.yahoo.com/quote/" + ticker, ERROR=e)) Errors[ticker] = "Not able to find the data for {TICKER} by checking the url {URL} due to {ERROR}.".format( TICKER=ticker, URL="https://finance.yahoo.com/quote/" + ticker, ERROR=e) continue try: if symbol_type == 'Equity': Equities[ticker] = data elif symbol_type == "ETF": ETFs[ticker] = data elif symbol_type == "Fund": Funds[ticker] = data
import FinanceDatabase as fd from yfinance.utils import get_json import matplotlib.pyplot as plt all_etfs = fd.select_etfs() semiconductor_etfs = fd.search_products(all_etfs, 'semiconductor') # Remove some unwanted ETFs del semiconductor_etfs['DXSH.DE'] del semiconductor_etfs['DXSH.F'] semiconductor_etfs_fundamentals = {} for symbol in semiconductor_etfs: semiconductor_etfs_fundamentals[symbol] = get_json( "https://finance.yahoo.com/quote/" + symbol) for symbol in semiconductor_etfs_fundamentals: ytd_return = semiconductor_etfs_fundamentals[symbol]['fundPerformance'][ 'trailingReturns']['ytd'] long_name = semiconductor_etfs_fundamentals[symbol]['quoteType'][ 'longName'] if ytd_return is None: continue plt.barh(long_name, ytd_return) plt.tight_layout() plt.xticks([-1, -0.5, 0, 0.5, 1], ['-100%', '-50%', '0%', '50%', '100%']) plt.show()