def get_dividend_cal(date: str) -> pd.DataFrame: """Gets dividend calendar for given date. Date represents Ex-Dividend Date Parameters ---------- date: datetime Date to get for in format YYYY-MM-DD Returns ------- pd.DataFrame: Dataframe of dividend calendar """ ag = get_user_agent() # Nasdaq API doesn't like this user agent, thus we always get other than this particular one while (ag == "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:82.1) Gecko/20100101 Firefox/82.1" ): ag = get_user_agent() try: r = requests.get( f"https://api.nasdaq.com/api/calendar/dividends?date={date}", headers={"User-Agent": ag}, ) if r.status_code == 200: return pd.DataFrame(r.json()["data"]["calendar"]["rows"]) except requests.exceptions.ReadTimeout: return pd.DataFrame() return pd.DataFrame()
def get_overall_withdrawal_fees(top: int = 100) -> pd.DataFrame: """Scrapes top coins withdrawal fees [Source: https://withdrawalfees.com/] Parameters ---------- top: int Number of coins to search, by default n=100, one page has 100 coins, so 1 page is scraped. Returns ------- pandas.DataFrame: Coin, Lowest, Average, Median, Highest, Exchanges Compared """ COINS_PER_PAGE = 100 withdrawal_fees_homepage = BeautifulSoup( requests.get( "https://withdrawalfees.com/", headers={ "User-Agent": get_user_agent() }, ).text, "lxml", ) table = withdrawal_fees_homepage.find_all("table") tickers_html = withdrawal_fees_homepage.find_all("div", {"class": "name"}) if table is None or tickers_html is None: return pd.DataFrame() df = pd.read_html(str(table))[0] df["Coin"] = [ticker.text for ticker in tickers_html] df["Highest"] = df["Highest"].apply( lambda x: f'{x[:x.index(".")+3]} ({x[x.index(".")+3:]})' if "." in x and isinstance(x, str) else x) num_pages = int(math.ceil(top / COINS_PER_PAGE)) if num_pages > 1: for idx in range(2, num_pages + 1): withdrawal_fees_homepage = BeautifulSoup( requests.get( f"https://withdrawalfees.com/coins/page/{idx}", headers={ "User-Agent": get_user_agent() }, ).text, "lxml", ) table = withdrawal_fees_homepage.find_all("table") tickers_html = withdrawal_fees_homepage.find_all( "div", {"class": "name"}) if table is not None and tickers_html is not None: new_df = pd.read_html(str(table))[0] new_df["Highest"] = new_df["Highest"].apply( lambda x: f'{x[:x.index(".")+3]} ({x[x.index(".")+3:]})' if "." in x else x) new_df["Coin"] = [ticker.text for ticker in tickers_html] df = df.append(new_df) df = df.fillna("") return df
def low_float(l_args): parser = argparse.ArgumentParser( prog="low_float", description=""" Print top stocks with lowest float. LowFloat.com provides a convenient sorted database of stocks which have a float of under 10 million shares. Additional key data such as the number of outstanding shares, short interest, and company industry is displayed. Data is presented for the Nasdaq Stock Market, the New York Stock Exchange, the American Stock Exchange, and the Over the Counter Bulletin Board. [Source: www.lowfloat.com] """, ) parser.add_argument( "-n", "--num", action="store", dest="n_num", type=check_positive, default=10, help="Number of top stocks to print.", ) ns_parser = parse_known_args_and_warn(parser, l_args) url_high_short_interested_stocks = "https://www.lowfloat.com" text_soup_low_float_stocks = BeautifulSoup( requests.get(url_high_short_interested_stocks, headers={ "User-Agent": get_user_agent() }).text, "lxml", ) a_low_float_header = list() for low_float_header in text_soup_low_float_stocks.findAll( "td", {"class": "tblhdr"}): a_low_float_header.append( low_float_header.text.strip("\n").split("\n")[0]) df_low_float = pd.DataFrame(columns=a_low_float_header) df_low_float.loc[0] = ["", "", "", "", "", "", ""] stock_list_tr = text_soup_low_float_stocks.find_all("tr") low_float_data = list() for a_stock in stock_list_tr: a_stock_txt = a_stock.text if a_stock_txt == "": continue low_float_data = a_stock_txt.split("\n") if len(low_float_data) == 8: df_low_float.loc[len(df_low_float.index)] = low_float_data[:-1] low_float_data = list() pd.set_option("display.max_colwidth", -1) print(df_low_float.head(n=ns_parser.n_num).to_string(index=False)) print("")
def get_etf_holdings(symbol: str) -> pd.DataFrame: """Get ETF holdings Parameters ---------- symbol: str Symbol to get holdings for Returns ------- df: pd.DataFrame Dataframe of holdings """ link = f"https://stockanalysis.com/etf/{symbol}/holdings/" r = requests.get(link, headers={"User-Agent": get_user_agent()}) if r.status_code == 200: soup = bs(r.text, "html.parser") soup = soup.find("table") tds = soup.findAll("td") tickers = [] for i in tds[1::5]: tickers.append(i.text) percents = [] for i in tds[3::5]: percents.append(i.text) shares = [] for i in tds[4::5]: shares.append(i.text) df = pd.DataFrame(index=tickers) df["% Of Etf"] = percents df["Shares"] = shares return df return pd.DataFrame()
def market_overview() -> pd.DataFrame: """Scrape data for market overview Returns ------- overview: pd.DataFrame Dataframe containing name, price, net change and percent change """ data = requests.get( "https://www.wsj.com/market-data?id=%7B%22application%22%3A%22WSJ%22%2C%22instruments%22%3A%5B%7B%22symbol%22" "%3A%22INDEX%2FUS%2F%2FDJIA%22%2C%22name%22%3A%22DJIA%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FUS%2F%2FSPX%22%2C%22" "name%22%3A%22S%26P%20500%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FUS%2F%2FCOMP%22%2C%22name%22%3A%22Nasdaq%20" "Composite%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FJP%2F%2FNIK%22%2C%22name%22%3A%22Japan%3A%20Nikkei%20225%22%7D%" "2C%7B%22symbol%22%3A%22INDEX%2FUK%2F%2FUKX%22%2C%22name%22%3A%22UK%3A%20FTSE%20100%22%7D%2C%7B%22symbol%22%3A%" "22FUTURE%2FUS%2F%2FCRUDE%20OIL%20-%20ELECTRONIC%22%2C%22name%22%3A%22Crude%20Oil%20Futures%22%7D%2C%7B%22symbol" "%22%3A%22FUTURE%2FUS%2F%2FGOLD%22%2C%22name%22%3A%22Gold%20Futures%22%7D%2C%7B%22symbol%22%3A%22CURRENCY%2FUS%2" "F%2FUSDJPY%22%2C%22name%22%3A%22Yen%22%7D%2C%7B%22symbol%22%3A%22CURRENCY%2FUS%2F%2FEURUSD%22%2C%22name%22%3A%" "22Euro%22%7D%5D%7D&type=mdc_quotes", headers={"User-Agent": get_user_agent()}, ).json() name, last_price, net_change, percent_change = [], [], [], [] for entry in data["data"]["instruments"]: name.append(entry["formattedName"]) last_price.append(entry["lastPrice"]) net_change.append(entry["priceChange"]) percent_change.append(entry["percentChange"]) overview = pd.DataFrame( {" ": name, "Price": last_price, "Chg": net_change, "%Chg": percent_change} ) return overview
def global_currencies() -> pd.DataFrame: """Scrape data for global currencies Returns ------- currencies: pd.DataFrame Dataframe containing name, price, net change and percent change """ data = requests.get( "https://www.wsj.com/market-data?id=%7B%22application%22%3A%22WSJ%22%2C%22instruments%22%3A%5" "B%7B%22symbol%22%3A%22CURRENCY%2FUS%2F%2FEURUSD%22%2C%22name%22%3A%22Euro%20(EUR%2FUSD)%22%7D%" "2C%7B%22symbol%22%3A%22CURRENCY%2FUS%2F%2FUSDJPY%22%2C%22name%22%3A%22Japanese%20Yen%20(USD%2F" "JPY)%22%7D%2C%7B%22symbol%22%3A%22CURRENCY%2FUS%2F%2FGBPUSD%22%2C%22name%22%3A%22U.K.%20Poun" "d%20(GBP%2FUSD)%22%7D%2C%7B%22symbol%22%3A%22CURRENCY%2FUS%2F%2FUSDCHF%22%2C%22name%22%3A%22Sw" "iss%20Franc%20(USD%2FCHF)%22%7D%2C%7B%22symbol%22%3A%22CURRENCY%2FUS%2F%2FUSDCNY%22%2C%22name%2" "2%3A%22Chinese%20Yuan%20(USD%2FCNY)%22%7D%2C%7B%22symbol%22%3A%22CURRENCY%2FUS%2F%2FUSDCAD%22%2C" "%22name%22%3A%22Canadian%20%24%20(USD%2FCAD)%22%7D%2C%7B%22symbol%22%3A%22CURRENCY%2FUS%2F%2F" "USDMXN%22%2C%22name%22%3A%22Mexican%20Peso%20(USD%2FMXN)%22%7D%2C%7B%22symbol%22%3A%22CRYPTO" "CURRENCY%2FUS%2F%2FBTCUSD%22%2C%22name%22%3A%22Bitcoin%20(BTC%2FUSD)%22%7D%2C%7B%22symbol%22%3A" "%22INDEX%2FXX%2F%2FBUXX%22%2C%22name%22%3A%22WSJ%20Dollar%20Index%22%7D%2C%7B%22symbol%22%3A%2" "2INDEX%2FUS%2F%2FDXY%22%2C%22name%22%3A%22U.S.%20Dollar%20Index%22%7D%5D%7D&type=mdc_quotes", headers={"User-Agent": get_user_agent()}, ).json() name, last_price, price_change, pct_change = [], [], [], [] for entry in data["data"]["instruments"]: name.append(entry["formattedName"]) last_price.append(entry["lastPrice"]) price_change.append(entry["priceChange"]) pct_change.append(entry["percentChange"]) currencies = pd.DataFrame( {" ": name, "Last": last_price, "Chng": price_change, "%Chng": pct_change} ) return currencies
def us_indices() -> pd.DataFrame: """Get the top US indices Returns ------- indices: pd.DataFrame Dataframe containing name, price, net change and percent change """ data = requests.get( "https://www.wsj.com/market-data/stocks?id=%7B%22application%22%3A%22WSJ%22%2C%22instruments%22%3A%5B%7B" "%22symbol%22%3A%22INDEX%2FUS%2F%2FDJIA%22%2C%22name%22%3A%22DJIA%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FUS%2F" "%2FCOMP%22%2C%22name%22%3A%22Nasdaq%20Composite%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FUS%2F%2FSPX%22%2C%22name" "%22%3A%22S%26P%20500%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FUS%2F%2FDWCF%22%2C%22name%22%3A%22DJ%20Total%20Stock" "%20Market%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FUS%2F%2FRUT%22%2C%22name%22%3A%22Russell%202000%22%7D%2C%7B" "%22symbol%22%3A%22INDEX%2FUS%2F%2FNYA%22%2C%22name%22%3A%22NYSE%20Composite%22%7D%2C%7B%22symbol%22%3A%22INDEX" "%2FUS%2F%2FB400%22%2C%22name%22%3A%22Barron%27s%20400%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FUS%2F%2FVIX%22%2C%22" "name%22%3A%22CBOE%20Volatility%22%7D%2C%7B%22symbol%22%3A%22FUTURE%2FUS%2F%2FDJIA%20FUTURES%22%2C%22name%22%3A%" "22DJIA%20Futures%22%7D%2C%7B%22symbol%22%3A%22FUTURE%2FUS%2F%2FS%26P%20500%20FUTURES%22%2C%22name%22%3A%22S%26P" "%20500%20Futures%22%7D%5D%7D&type=mdc_quotes", headers={"User-Agent": get_user_agent()}, ).json() name, last_price, net_change, percent_change = [], [], [], [] for entry in data["data"]["instruments"]: name.append(entry["formattedName"]) last_price.append(entry["lastPrice"]) net_change.append(entry["priceChange"]) percent_change.append(entry["percentChange"]) indices = pd.DataFrame( {" ": name, "Price": last_price, "Chg": net_change, "%Chg": percent_change} ) return indices
def us_bonds() -> pd.DataFrame: """Scrape data for us bonds Returns ------- bonds: pd.DataFrame Dataframe containing name, coupon rate, yield and change in yield """ data = requests.get( "https://www.wsj.com/market-data?id=%7B%22application%22%3A%22WSJ%22%2C%22instruments%22%3A%5B" "%7B%22symbol%22%3A%22BOND%2FBX%2F%2FTMUBMUSD30Y%22%2C%22name%22%3A%2230-Year%20Bond%22%7D%2C%7" "B%22symbol%22%3A%22BOND%2FBX%2F%2FTMUBMUSD10Y%22%2C%22name%22%3A%2210-Year%20Note%22%7D%2C%7B%2" "2symbol%22%3A%22BOND%2FBX%2F%2FTMUBMUSD07Y%22%2C%22name%22%3A%227-Year%20Note%22%7D%2C%7B%22sym" "bol%22%3A%22BOND%2FBX%2F%2FTMUBMUSD05Y%22%2C%22name%22%3A%225-Year%20Note%22%7D%2C%7B%22symbol" "%22%3A%22BOND%2FBX%2F%2FTMUBMUSD03Y%22%2C%22name%22%3A%223-Year%20Note%22%7D%2C%7B%22symbol%22%" "3A%22BOND%2FBX%2F%2FTMUBMUSD02Y%22%2C%22name%22%3A%222-Year%20Note%22%7D%2C%7B%22symbol%22%3A%" "22BOND%2FBX%2F%2FTMUBMUSD01Y%22%2C%22name%22%3A%221-Year%20Bill%22%7D%2C%7B%22symbol%22%3A%22" "BOND%2FBX%2F%2FTMUBMUSD06M%22%2C%22name%22%3A%226-Month%20Bill%22%7D%2C%7B%22symbol%22%3A%22BON" "D%2FBX%2F%2FTMUBMUSD03M%22%2C%22name%22%3A%223-Month%20Bill%22%7D%2C%7B%22symbol%22%3A%22BOND%" "2FBX%2F%2FTMUBMUSD01M%22%2C%22name%22%3A%221-Month%20Bill%22%7D%5D%7D&type=mdc_quotes", headers={"User-Agent": get_user_agent()}, ).json() name, yield_pct, rate, yld_chng = [], [], [], [] for entry in data["data"]["instruments"]: name.append(entry["formattedName"]) yield_pct.append(entry["bond"]["yield"]) rate.append(entry["bond"]["couponRate"]) yld_chng.append(entry["bond"]["yieldChange"]) bonds = pd.DataFrame( {" ": name, "Rate (%)": rate, "Yld (%)": yield_pct, "Yld Chg (%)": yld_chng} ) return bonds
def top_commodities() -> pd.DataFrame: """Scrape data for top commodities Returns ------- commodities: pd.DataFrame Dataframe containing name, price, net change and percent change """ data = requests.get( "https://www.wsj.com/market-data/commodities?id=%7B%22application%22%3A%22WSJ%22%2C%22instruments%22%3A%5B%7" "B%22symbol%22%3A%22FUTURE%2FUS%2F%2FCRUDE%20OIL%20-%20ELECTRONIC%22%2C%22name%22%3A%22Crude%20Oil%20Futures" "%22%7D%2C%7B%22symbol%22%3A%22FUTURE%2FUK%2F%2FBRENT%20CRUDE%22%2C%22name%22%3A%22Brent%20Crude%20Futures%22" "%7D%2C%7B%22symbol%22%3A%22FUTURE%2FUS%2F%2FGOLD%22%2C%22name%22%3A%22Gold%20Futures%22%7D%2C%7B%22symbol%22%" "3A%22FUTURE%2FUS%2F%2FSILVER%22%2C%22name%22%3A%22Silver%20Futures%22%7D%2C%7B%22symbol%22%3A%22FUTURE%2FUS%2F" "%2FNATURAL%20GAS%22%2C%22name%22%3A%22Natural%20Gas%20Futures%22%7D%2C%7B%22symbol%22%3A%22FUTURE%2FUS%2F%2" "FUNLEADED%20GASOLINE%22%2C%22name%22%3A%22Unleaded%20Gasoline%20Futures%22%7D%2C%7B%22symbol%22%3A%22FUTURE%" "2FUS%2F%2FCOPPER%22%2C%22name%22%3A%22Copper%20Futures%22%7D%2C%7B%22symbol%22%3A%22FUTURE%2FUS%2F%2FCORN%22%2" "C%22name%22%3A%22Corn%20Futures%22%7D%2C%7B%22symbol%22%3A%22FUTURE%2FUS%2F%2FWHEAT%22%2C%22name%22%3A%22Wheat" "%20Futures%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FXX%2F%2FBCOM%22%7D%5D%7D&type=mdc_quotes", headers={"User-Agent": get_user_agent()}, ).json() name, last_price, net_change, percent_change = [], [], [], [] for entry in data["data"]["instruments"]: name.append(entry["formattedName"]) last_price.append(entry["lastPrice"]) net_change.append(entry["priceChange"]) percent_change.append(entry["percentChange"]) commodities = pd.DataFrame( {" ": name, "Price": last_price, "Chg": net_change, "%Chg": percent_change} ) return commodities
def scrape_gecko_data(url: str) -> BeautifulSoup: """Helper method that scrape Coin Gecko site. Parameters ---------- url : str coin gecko url to scrape e.g: "https://www.coingecko.com/en/discover" Returns ------- BeautifulSoup object """ headers = {"User-Agent": get_user_agent()} session = _retry_session("https://www.coingecko.com") try: req = session.get(url, headers=headers, timeout=5) except Exception as error: console.print(error) raise RetryError( "Connection error. Couldn't connect to CoinGecko and scrape the data. " "Please visit CoinGecko site, and check if it's not under maintenance" ) from error if req.status_code >= 400: raise Exception( f"Couldn't connect to {url}. Status code: {req.status_code}. Reason: {req.reason}" ) return BeautifulSoup(req.text, features="lxml")
def _make_request(url: str) -> Optional[dict]: """Helper method handles dappradar api requests. [Source: https://dappradar.com/] Parameters ---------- url: str endpoint url Returns ------- dict: dictionary with response data """ response = requests.get( url, headers={"Accept": "application/json", "User-Agent": get_user_agent()} ) if not 200 <= response.status_code < 300: console.print(f"[red]dappradar api exception: {response.text}[/red]") return None try: return response.json() except Exception as e: # noqa: F841 logger.exception("Invalid Response: %s", str(e)) console.print(f"[red]Invalid Response:: {response.text}[/red]") return None
def get_ark_orders() -> DataFrame: url_orders = "https://cathiesark.com/ark-funds-combined/trades" raw_page = requests.get(url_orders, headers={"User-Agent": get_user_agent()}).text parsed_script = BeautifulSoup(raw_page, "lxml").find( "script", {"id": "__NEXT_DATA__"} ) parsed_json = json.loads(parsed_script.string) df_orders = pd.json_normalize(parsed_json["props"]["pageProps"]["arkTrades"]) df_orders.drop( [ "everything", "everything.profile.customThumbnail", "hidden", "images.thumbnail", ], axis=1, inplace=True, ) df_orders["date"] = pd.to_datetime(df_orders["date"], format="%Y-%m-%dZ").dt.date return df_orders
def get_article_data(article_id: int) -> dict: """Returns an article Parameters ---------- article_id : int Article ID Returns ------- dict Article data """ article_url = f"https://seekingalpha.com/api/v3/news/{article_id}" response = requests.get(article_url, headers={"User-Agent": get_user_agent()}) jdata = response.json() content = jdata["data"]["attributes"]["content"].replace( "</li>", "</li>\n") content = BeautifulSoup(content, features="html.parser").get_text() article = { "title": jdata["data"]["attributes"]["title"], "publishedAt": jdata["data"]["attributes"]["lastModified"], "url": "https://seekingalpha.com" + jdata["data"]["links"]["self"], "content": content, } return article
def get_futures() -> dict: """Get futures data. [Source: Finviz] Parameters ---------- futures : dict Indices, Energy, Metals, Meats, Grains, Softs, Bonds, Currencies """ source = requests.get( "https://finviz.com/futures.ashx", headers={"User-Agent": get_user_agent()} ).text slice_source = source[ source.find("var groups = ") : source.find( "\r\n\r\n groups.forEach(function(group) " ) ] groups = literal_eval( slice_source[ : slice_source.find("\r\n var tiles = ") - 1 ].strip("var groups = ") ) titles = literal_eval( slice_source[ slice_source.find("\r\n var tiles = ") : -1 ].strip("\r\n var tiles = ") ) d_futures: dict = {} for future in groups: d_futures[future["label"]] = [] for ticker in future["contracts"]: d_futures[future["label"]].append(titles[ticker["ticker"]]) return d_futures
def get_treasury_yield(interval: str, maturity: str) -> pd.DataFrame: """Get historical yield for a given maturity Parameters ---------- interval : str Interval for data. Can be "d","w","m" for daily, weekly or monthly maturity : str Maturity timeline. Can be "3mo","5y","10y" or "30y" Returns ------- pd.DataFrame Dataframe of historical yields """ d_interval = {"d": "daily", "w": "weekly", "m": "monthly"} d_maturity = { "3m": "3month", "5y": "5year", "10y": "10year", "30y": "30year" } url = f"https://www.alphavantage.co/query?function=TREASURY_YIELD&interval={d_interval[interval]}&ma" url += f"turity={d_maturity[maturity]}&apikey={cfg.API_KEY_ALPHAVANTAGE}" r = requests.get(url, headers={"User-Agent": get_user_agent()}) if r.status_code != 200: return pd.DataFrame() data = pd.DataFrame(r.json()["data"]) data["date"] = pd.to_datetime(data["date"]) data["Yield"] = data["value"].astype(float) data = data.drop(columns=["value"]) return data
def get_news_html(news_type: str = "Top-News") -> dict: """Gets news. [Source: SeekingAlpha] Parameters ---------- news_type : str From: Top-News, On-The-Move, Market-Pulse, Notable-Calls, Buybacks, Commodities, Crypto, Issuance, Global, Guidance, IPOs, SPACs, Politics, M-A, Consumer, Energy, Financials, Healthcare, MLPs, REITs, Technology Returns ------- dict HTML page of articles """ sa_url = ( f"http://seekingalpha.com/api/v3/news?filter%5Bcategory%5D=market-news%3A%3A{news_type}" "&filter%5Bsince%5D=0&filter%5Buntil%5D=0&include=author%2CprimaryTickers%2CsecondaryTickers" "&isMounting=true&page%5Bsize%5D=25&page%5Bnumber%5D=1") articles_html = requests.get(sa_url, headers={ "User-Agent": get_user_agent() }).json() return articles_html
def get_gdp_capita() -> pd.DataFrame: """Real GDP per Capita for United States Returns ------- pd.DataFrame DataFrame of GDP per Capita """ url = f"https://www.alphavantage.co/query?function=REAL_GDP_PER_CAPITA&apikey={cfg.API_KEY_ALPHAVANTAGE}" r = requests.get(url, headers={"User-Agent": get_user_agent()}) if r.status_code != 200: return pd.DataFrame() payload = r.json() data = pd.DataFrame() # Successful requests if "data" in payload: if r.json()["data"]: data = pd.DataFrame(r.json()["data"]) data["date"] = pd.to_datetime(data["date"]) data["GDP"] = data["value"].astype(float) data = data.drop(columns=["value"]) else: console.print("No data found.\n") # Invalid API Keys elif "Error Message" in payload: console.print(payload["Error Message"]) # Premium feature, API plan is not authorized elif "Information" in payload: console.print(payload["Information"]) return data
def get_etf_overview(etf_symbol: str) -> pd.DataFrame: """Get overview data for selected etf Parameters ---------- etf_symbol : str Etf symbol to get overview for Returns ---------- df : pd.DataFrame Dataframe of stock overview data """ r = requests.get( f"https://stockanalysis.com/etf/{etf_symbol}", headers={"User-Agent": get_user_agent()}, ) soup = bs(r.text, "html.parser") # %% tables = soup.findAll("table") texts = [] for tab in tables[:2]: entries = tab.findAll("td") for ent in entries: texts.append(ent.get_text()) var_cols = [0, 2, 4, 6, 8, 10, 12, 18, 20, 22, 26, 28, 30, 32] vals = [idx + 1 for idx in var_cols] columns = [texts[idx] for idx in var_cols] data = [texts[idx] for idx in vals] df = pd.DataFrame(data, index=columns, columns=[etf_symbol.upper()]) return df
def _make_request(url: str) -> Union[BeautifulSoup, None]: """Helper method to scrap Parameters ---------- url : str url to scrape Returns ------- BeautifulSoup object """ headers = {"User-Agent": get_user_agent()} session = _retry_session("https://www.coingecko.com") try: req = session.get(url, headers=headers, timeout=5) except Exception as error: logger.exception(str(error)) console.print(error) raise RetryError( "Connection error. Couldn't connect to CoinGecko and scrape the data. " "Please visit CoinGecko site, and check if it's not under maintenance" ) from error if req.status_code == 404: return None if req.status_code >= 400: raise Exception( f"Couldn't connect to {url}. Status code: {req.status_code}. Reason: {req.reason}" ) return BeautifulSoup(req.text, features="lxml")
def get_overall_exchange_withdrawal_fees() -> pd.DataFrame: """Scrapes exchange withdrawal fees [Source: https://withdrawalfees.com/] Parameters ---------- Returns ------- pandas.DataFrame: Exchange, Coins, Lowest, Average, Median, Highest """ exchange_withdrawal_fees = BeautifulSoup( requests.get( "https://withdrawalfees.com/exchanges", headers={ "User-Agent": get_user_agent() }, ).text, "lxml", ) table = exchange_withdrawal_fees.find_all("table") if table is None: return pd.DataFrame() df = pd.read_html(str(table))[0] df = df.fillna("") return df
def get_options_info(ticker: str) -> pd.DataFrame: """Scrape barchart for options info Parameters ---------- ticker: str Stock ticker Returns ------- df: pd.DataFrame Dataframe of information """ page = f"https://www.barchart.com/stocks/quotes/{ticker}/overview" r = requests.get(page, headers={"User-Agent": get_user_agent()}) soup = BeautifulSoup(r.text, "html.parser") tags = soup.find( "div", attrs={ "class": "barchart-content-block symbol-fundamentals bc-cot-table-wrapper" }, ) data = tags.find_all("li") labels = [] values = [] for row in data: labels.append(row.find_all("span")[0].getText()) values.append(row.find_all("span")[1].getText()) df = pd.DataFrame(data=[labels, values]).T return df
def get_cpi(interval: str) -> pd.DataFrame: """Get Consumer Price Index from Alpha Vantage Parameters ---------- interval : str Interval for data. Either "m" or "s" for monthly or semiannual Returns ------- pd.DataFrame Dataframe of CPI """ s_interval = "semiannual" if interval == "s" else "monthly" url = f"https://www.alphavantage.co/query?function=CPI&interval={s_interval}&apikey={cfg.API_KEY_ALPHAVANTAGE}" r = requests.get(url, headers={"User-Agent": get_user_agent()}) if r.status_code != 200: return pd.DataFrame() data = pd.DataFrame(r.json()["data"]) data["date"] = pd.to_datetime(data["date"]) data["CPI"] = data["value"].astype(float) data = data.drop(columns=["value"]) return data
def global_bonds() -> pd.DataFrame: """Scrape data for global bonds Returns ------- bonds: pd.DataFrame Dataframe containing name, coupon rate, yield and change in yield """ data = requests.get( "https://www.wsj.com/market-data?id=%7B%22application%22%3A%22WSJ%22%2C%22bonds%22%3A%5" "B%7B%22symbol%22%3A%22TMUBMUSD10Y%22%2C%22name%22%3A%22U.S.%2010%20Year%22%7D%2C%7B%22symbol" "%22%3A%22TMBMKDE-10Y%22%2C%22name%22%3A%22Germany%2010%20Year%22%7D%2C%7B%22symbol%22%3A%22TMB" "MKGB-10Y%22%2C%22name%22%3A%22U.K.%2010%20Year%22%7D%2C%7B%22symbol%22%3A%22TMBMKJP-10Y%22%2C%" "22name%22%3A%22Japan%2010%20Year%22%7D%2C%7B%22symbol%22%3A%22TMBMKAU-10Y%22%2C%22name%22%3A%2" "2Australia%2010%20Year%22%7D%2C%7B%22symbol%22%3A%22AMBMKRM-10Y%22%2C%22name%22%3A%22China%2010" "%20Year%22%7D%5D%7D&type=mdc_governmentbonds", headers={ "User-Agent": get_user_agent() }, ).json() name, yield_pct, rate, yld_chng = [], [], [], [] for entry in data["data"]["instruments"]: name.append(entry["djLegalName"]) yield_pct.append(entry["yieldPercent"]) rate.append(entry["couponPercent"]) yld_chng.append(entry["yieldChange"]) bonds = pd.DataFrame({ " ": name, "Rate (%)": rate, "Yld (%)": yield_pct, "Yld Chg (%)": yld_chng }) return bonds
def get_ark_orders() -> DataFrame: """Returns ARK orders in a Dataframe Returns ------- DataFrame ARK orders data frame with the following columns: ticker, date, shares, weight, fund, direction """ url_orders = "https://cathiesark.com/ark-funds-combined/trades" raw_page = requests.get(url_orders, headers={ "User-Agent": get_user_agent() }).text parsed_script = BeautifulSoup(raw_page, "lxml").find("script", {"id": "__NEXT_DATA__"}) parsed_json = json.loads(parsed_script.string) df_orders = pd.json_normalize( parsed_json["props"]["pageProps"]["arkTrades"]) if df_orders.empty: return pd.DataFrame() df_orders.drop( [ "hidden", "images.thumbnail", "cusip", "estimated_price", "updated_at", "created_at", "region", "country", "isADR", "companyName", "clinicalTrialsSearchHandle", "wasSPACBuy", "currencyMultiplier", "useRapidAPI", "description", "quandlTicker", "customThumbnail", "custom_thumbnail", "id", ], axis=1, inplace=True, ) df_orders["date"] = pd.to_datetime(df_orders["date"], format="%Y-%m-%d").dt.date return df_orders
def get_insider_activity(ticker: str) -> pd.DataFrame: """Get insider activity. [Source: Business Insider] Parameters ---------- ticker : str Ticker to get insider activity data from Returns ------- df_insider : pd.DataFrame Get insider activity data """ url_market_business_insider = ( f"https://markets.businessinsider.com/stocks/{ticker.lower()}-stock") text_soup_market_business_insider = BeautifulSoup( requests.get(url_market_business_insider, headers={ "User-Agent": get_user_agent() }).text, "lxml", ) d_insider = dict() l_insider_vals = list() for idx, insider_val in enumerate( text_soup_market_business_insider.findAll( "td", {"class": "table__td text-center"})): l_insider_vals.append(insider_val.text.strip()) # Add value to dictionary if (idx + 1) % 6 == 0: # Check if we are still parsing insider trading activity if "/" not in l_insider_vals[0]: break d_insider[(idx + 1) // 6] = l_insider_vals l_insider_vals = list() df_insider = pd.DataFrame.from_dict( d_insider, orient="index", columns=[ "Date", "Shares Traded", "Shares Held", "Price", "Type", "Option" ], ) df_insider["Date"] = pd.to_datetime(df_insider["Date"]) df_insider = df_insider.set_index("Date") df_insider = df_insider.sort_index(ascending=True) l_names = list() for s_name in text_soup_market_business_insider.findAll( "a", {"onclick": "silentTrackPI()"}): l_names.append(s_name.text.strip()) df_insider["Insider"] = l_names return df_insider
def get_sec_filings(ticker: str) -> pd.DataFrame: """Get SEC filings for a given stock ticker. [Source: Market Watch] Parameters ---------- ticker : str Stock ticker Returns ------- df_financials : pd.DataFrame SEC filings data """ pd.set_option("display.max_colwidth", None) url_financials = ( f"https://www.marketwatch.com/investing/stock/{ticker}/financials/secfilings" ) text_soup_financials = BeautifulSoup( requests.get(url_financials, headers={ "User-Agent": get_user_agent() }).text, "lxml", ) # a_financials_header = list() df_financials = None b_ready_to_process_info = False soup_financials = text_soup_financials.findAll("tr", {"class": "table__row"}) for financials_info in soup_financials: a_financials = financials_info.text.split("\n") # If header has been processed and dataframe created ready to populate the SEC information if b_ready_to_process_info: l_financials_info = [a_financials[2]] l_financials_info.extend(a_financials[5:-1]) l_financials_info.append(financials_info.a["href"]) # Append data values to financials df_financials.loc[len( df_financials.index)] = l_financials_info # type: ignore if "Filing Date" in a_financials: l_financials_header = [a_financials[2]] l_financials_header.extend(a_financials[5:-1]) l_financials_header.append("Link") df_financials = pd.DataFrame(columns=l_financials_header) df_financials.set_index("Filing Date") b_ready_to_process_info = True # Set Filing Date as index df_financials = df_financials.set_index("Filing Date") # type: ignore return df_financials
def etf_movers(sort_type: str = "gainers") -> pd.DataFrame: """ Scrape data for top etf movers. Parameters ---------- sort_type: str Data to get. Can be "gainers", "decliners" or "active" Returns ------- etfmovers: pd.DataFrame Datafame containing the name, price, change and the volume of the etf """ if sort_type == "gainers": url = ( "https://www.wsj.com/market-data/mutualfunds-etfs/etfmovers?id=%7B%22application" "%22%3A%22WSJ%22%2C%22etfMover%22%3A%22leaders%22%2C%22count%22%3A25%7D&type=mdc_etfmovers" ) elif sort_type == "decliners": url = ( "https://www.wsj.com/market-data/mutualfunds-etfs/etfmovers?id=%7B%22application" "%22%3A%22WSJ%22%2C%22etfMover%22%3A%22laggards%22%2C%22count%22%3A25%7D&type=mdc_etfmovers" ) elif sort_type == "active": url = ( "https://www.wsj.com/market-data/mutualfunds-etfs/etfmovers?id=%7B%22application" "%22%3A%22WSJ%22%2C%22etfMover%22%3A%22most_active%22%2C%22count%22%3A25%7D&type=mdc_etfmovers" ) else: url = "" if url: data = requests.get(url, headers={ "User-Agent": get_user_agent() }).json() name, last_price, net_change, percent_change, volume = [], [], [], [], [] for entry in data["data"]["instruments"]: name.append(entry["name"]) last_price.append(entry["lastPrice"]) net_change.append(entry["priceChange"]) percent_change.append(entry["percentChange"]) volume.append(entry["formattedVolume"]) etfmovers = pd.DataFrame({ " ": name, "Price": last_price, "Chg": net_change, "%Chg": percent_change, "Vol": volume, }) return etfmovers return pd.DataFrame()
def get_today_hot_penny_stocks() -> DataFrame: """Returns today hot penny stocks Returns ------- DataFrame Today hot penny stocks DataFrame with the following columns: Ticker, Price, Change, $ Volume, Volume, # Trades """ url_penny_stock_stocks = "https://www.pennystockflow.com" text_soup_penny_stock_stocks = BeautifulSoup( requests.get(url_penny_stock_stocks, headers={ "User-Agent": get_user_agent() }).text, "lxml", ) a_penny_stock_header = [] for penny_stock_header in text_soup_penny_stock_stocks.findAll( "td", {"class": "tblhdr"}): a_penny_stock_header.append(penny_stock_header.text) df_penny = pd.DataFrame(columns=a_penny_stock_header) first_penny = [] for idx, penny_stock_header in enumerate( text_soup_penny_stock_stocks.findAll("a")): if idx == 0: continue if idx > 1: break first_penny.append(penny_stock_header.text) for idx, first_penny_stock in enumerate( text_soup_penny_stock_stocks.findAll("td", {"align": "right"})): first_penny.append(first_penny_stock.text) if idx > 3: break df_penny.loc[0] = first_penny a_penny_stock = [] penny_idx = 1 for idx, penny_stock in enumerate( text_soup_penny_stock_stocks.findAll("td", {"class": "tdi"})): a_penny_stock.append(penny_stock.text) if (idx + 1) % 6 == 0: df_penny.loc[penny_idx] = a_penny_stock penny_idx += 1 a_penny_stock = [] return df_penny
def get_metadata(self, author: str) -> dict: """Loads metadata for a given author Parameters ---------- author : str Author key - Marcus_Aurelius, Epictetus, Seneca, Marcus_Tullius_Cicero, Aristotle, Plato, Pythagoras Returns ------- dict Metadata dictionary that includes number of quotes, number of pages and first 30 quotes """ quotes_page = BeautifulSoup( requests.get( self.urls[author], headers={ "User-Agent": get_user_agent() }, ).text, "lxml", ) find_navigation = quotes_page.find("em", { "class": "current" }).find_parent("div") page_count = [] for a_page_ref in find_navigation.find_all("a", href=True): page_count.append(a_page_ref.text.strip("\n")) ret = {} ret["pages"] = page_count[-2] find_count = quotes_page.find(string=re.compile("Showing 1-30 of")) quote_count = re.search(r"Showing 1-30 of (?P<number>[\d,]+)", find_count) if quote_count: ret["quoutes"] = quote_count.group("number") all_quotes = quotes_page.find_all("div", {"class": "quote"}) ret["quotes"] = [] for a_quote in all_quotes: parsed_quote = {} parsed_quote = a_quote.find("div", {"class": "quoteText"}).text ret["quotes"].append(parsed_quote) return ret
def get_yield_curve_year(year: str) -> DataFrame: """Returns a US Treasury Yield Curve for a given year Parameters ---------- year : str Yield curve year Returns ------- DataFrame US Treasury Yield Curve data frame with the following columns Date,1 mo,2 mo,3 mo,6 mo,1 yr,2 yr,3 yr,5 yr,7 yr,10 yr,20 yr,30 yr """ # pylint: disable=line-too-long yield_curve_url = "https://www.treasury.gov/resource-center/data-chart-center/interest-rates/Pages/TextView.aspx?data=yieldYear&year={}" # noqa: E501 text_soup_yield_curve = BeautifulSoup( requests.get( yield_curve_url.format(year), headers={ "User-Agent": get_user_agent() }, ).text, "lxml", ) yield_table = text_soup_yield_curve.find_all("table", {"class": "t-chart"})[0] a_yield_table_header = [] for yield_table_header_col in yield_table.find("tr", { "class": None }).find_all("th"): a_yield_table_header.append(yield_table_header_col.text) df_yield_curve = pd.DataFrame(columns=a_yield_table_header) for yield_table_row in yield_table.find_all( "tr", {"class": ["oddrow", "evenrow"]}): a_yield_row = [] for idx, yield_table_col in enumerate(yield_table_row.find_all("td")): if idx == 0: a_yield_row.append( datetime.strptime(yield_table_col.text, "%m/%d/%y")) else: a_yield_row.append(float(yield_table_col.text)) # type: ignore df_yield_curve.loc[len(df_yield_curve)] = a_yield_row df_yield_curve = df_yield_curve.set_index(["Date"]) return df_yield_curve