Пример #1
0
def get_dividend_cal(date: str) -> pd.DataFrame:
    """Gets dividend calendar for given date.  Date represents Ex-Dividend Date

    Parameters
    ----------
    date: datetime
        Date to get for in format YYYY-MM-DD

    Returns
    -------
    pd.DataFrame:
        Dataframe of dividend calendar
    """
    ag = get_user_agent()
    # Nasdaq API doesn't like this user agent, thus we always get other than this particular one
    while (ag ==
           "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:82.1) Gecko/20100101 Firefox/82.1"
           ):
        ag = get_user_agent()
    try:
        r = requests.get(
            f"https://api.nasdaq.com/api/calendar/dividends?date={date}",
            headers={"User-Agent": ag},
        )
        if r.status_code == 200:
            return pd.DataFrame(r.json()["data"]["calendar"]["rows"])
    except requests.exceptions.ReadTimeout:
        return pd.DataFrame()
    return pd.DataFrame()
Пример #2
0
def get_overall_withdrawal_fees(top: int = 100) -> pd.DataFrame:
    """Scrapes top coins withdrawal fees
    [Source: https://withdrawalfees.com/]

    Parameters
    ----------
    top: int
        Number of coins to search, by default n=100, one page has 100 coins, so 1 page is scraped.
    Returns
    -------
    pandas.DataFrame:
        Coin, Lowest, Average, Median, Highest, Exchanges Compared
    """

    COINS_PER_PAGE = 100
    withdrawal_fees_homepage = BeautifulSoup(
        requests.get(
            "https://withdrawalfees.com/",
            headers={
                "User-Agent": get_user_agent()
            },
        ).text,
        "lxml",
    )
    table = withdrawal_fees_homepage.find_all("table")
    tickers_html = withdrawal_fees_homepage.find_all("div", {"class": "name"})
    if table is None or tickers_html is None:
        return pd.DataFrame()
    df = pd.read_html(str(table))[0]

    df["Coin"] = [ticker.text for ticker in tickers_html]
    df["Highest"] = df["Highest"].apply(
        lambda x: f'{x[:x.index(".")+3]} ({x[x.index(".")+3:]})'
        if "." in x and isinstance(x, str) else x)

    num_pages = int(math.ceil(top / COINS_PER_PAGE))
    if num_pages > 1:
        for idx in range(2, num_pages + 1):
            withdrawal_fees_homepage = BeautifulSoup(
                requests.get(
                    f"https://withdrawalfees.com/coins/page/{idx}",
                    headers={
                        "User-Agent": get_user_agent()
                    },
                ).text,
                "lxml",
            )
            table = withdrawal_fees_homepage.find_all("table")
            tickers_html = withdrawal_fees_homepage.find_all(
                "div", {"class": "name"})
            if table is not None and tickers_html is not None:
                new_df = pd.read_html(str(table))[0]
                new_df["Highest"] = new_df["Highest"].apply(
                    lambda x: f'{x[:x.index(".")+3]} ({x[x.index(".")+3:]})'
                    if "." in x else x)
                new_df["Coin"] = [ticker.text for ticker in tickers_html]
                df = df.append(new_df)
    df = df.fillna("")
    return df
Пример #3
0
def low_float(l_args):
    parser = argparse.ArgumentParser(
        prog="low_float",
        description="""
            Print top stocks with lowest float. LowFloat.com provides a convenient
            sorted database of stocks which have a float of under 10 million shares. Additional key
            data such as the number of outstanding shares, short interest, and company industry is
            displayed. Data is presented for the Nasdaq Stock Market, the New York Stock Exchange,
            the American Stock Exchange, and the Over the Counter Bulletin Board. [Source: www.lowfloat.com]
        """,
    )

    parser.add_argument(
        "-n",
        "--num",
        action="store",
        dest="n_num",
        type=check_positive,
        default=10,
        help="Number of top stocks to print.",
    )

    ns_parser = parse_known_args_and_warn(parser, l_args)

    url_high_short_interested_stocks = "https://www.lowfloat.com"
    text_soup_low_float_stocks = BeautifulSoup(
        requests.get(url_high_short_interested_stocks,
                     headers={
                         "User-Agent": get_user_agent()
                     }).text,
        "lxml",
    )

    a_low_float_header = list()
    for low_float_header in text_soup_low_float_stocks.findAll(
            "td", {"class": "tblhdr"}):
        a_low_float_header.append(
            low_float_header.text.strip("\n").split("\n")[0])
    df_low_float = pd.DataFrame(columns=a_low_float_header)
    df_low_float.loc[0] = ["", "", "", "", "", "", ""]

    stock_list_tr = text_soup_low_float_stocks.find_all("tr")

    low_float_data = list()
    for a_stock in stock_list_tr:
        a_stock_txt = a_stock.text

        if a_stock_txt == "":
            continue

        low_float_data = a_stock_txt.split("\n")

        if len(low_float_data) == 8:
            df_low_float.loc[len(df_low_float.index)] = low_float_data[:-1]

        low_float_data = list()

    pd.set_option("display.max_colwidth", -1)
    print(df_low_float.head(n=ns_parser.n_num).to_string(index=False))
    print("")
Пример #4
0
def get_etf_holdings(symbol: str) -> pd.DataFrame:
    """Get ETF holdings

    Parameters
    ----------
    symbol: str
        Symbol to get holdings for

    Returns
    -------
    df: pd.DataFrame
        Dataframe of holdings
    """

    link = f"https://stockanalysis.com/etf/{symbol}/holdings/"
    r = requests.get(link, headers={"User-Agent": get_user_agent()})
    if r.status_code == 200:
        soup = bs(r.text, "html.parser")
        soup = soup.find("table")
        tds = soup.findAll("td")
        tickers = []
        for i in tds[1::5]:
            tickers.append(i.text)
        percents = []
        for i in tds[3::5]:
            percents.append(i.text)
        shares = []
        for i in tds[4::5]:
            shares.append(i.text)
        df = pd.DataFrame(index=tickers)
        df["% Of Etf"] = percents
        df["Shares"] = shares
        return df
    return pd.DataFrame()
Пример #5
0
def market_overview() -> pd.DataFrame:
    """Scrape data for market overview

    Returns
    -------
    overview: pd.DataFrame
        Dataframe containing name, price, net change and percent change
    """
    data = requests.get(
        "https://www.wsj.com/market-data?id=%7B%22application%22%3A%22WSJ%22%2C%22instruments%22%3A%5B%7B%22symbol%22"
        "%3A%22INDEX%2FUS%2F%2FDJIA%22%2C%22name%22%3A%22DJIA%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FUS%2F%2FSPX%22%2C%22"
        "name%22%3A%22S%26P%20500%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FUS%2F%2FCOMP%22%2C%22name%22%3A%22Nasdaq%20"
        "Composite%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FJP%2F%2FNIK%22%2C%22name%22%3A%22Japan%3A%20Nikkei%20225%22%7D%"
        "2C%7B%22symbol%22%3A%22INDEX%2FUK%2F%2FUKX%22%2C%22name%22%3A%22UK%3A%20FTSE%20100%22%7D%2C%7B%22symbol%22%3A%"
        "22FUTURE%2FUS%2F%2FCRUDE%20OIL%20-%20ELECTRONIC%22%2C%22name%22%3A%22Crude%20Oil%20Futures%22%7D%2C%7B%22symbol"
        "%22%3A%22FUTURE%2FUS%2F%2FGOLD%22%2C%22name%22%3A%22Gold%20Futures%22%7D%2C%7B%22symbol%22%3A%22CURRENCY%2FUS%2"
        "F%2FUSDJPY%22%2C%22name%22%3A%22Yen%22%7D%2C%7B%22symbol%22%3A%22CURRENCY%2FUS%2F%2FEURUSD%22%2C%22name%22%3A%"
        "22Euro%22%7D%5D%7D&type=mdc_quotes",
        headers={"User-Agent": get_user_agent()},
    ).json()
    name, last_price, net_change, percent_change = [], [], [], []

    for entry in data["data"]["instruments"]:
        name.append(entry["formattedName"])
        last_price.append(entry["lastPrice"])
        net_change.append(entry["priceChange"])
        percent_change.append(entry["percentChange"])

    overview = pd.DataFrame(
        {" ": name, "Price": last_price, "Chg": net_change, "%Chg": percent_change}
    )

    return overview
Пример #6
0
def global_currencies() -> pd.DataFrame:
    """Scrape data for global currencies

    Returns
    -------
    currencies: pd.DataFrame
        Dataframe containing name, price, net change and percent change
    """
    data = requests.get(
        "https://www.wsj.com/market-data?id=%7B%22application%22%3A%22WSJ%22%2C%22instruments%22%3A%5"
        "B%7B%22symbol%22%3A%22CURRENCY%2FUS%2F%2FEURUSD%22%2C%22name%22%3A%22Euro%20(EUR%2FUSD)%22%7D%"
        "2C%7B%22symbol%22%3A%22CURRENCY%2FUS%2F%2FUSDJPY%22%2C%22name%22%3A%22Japanese%20Yen%20(USD%2F"
        "JPY)%22%7D%2C%7B%22symbol%22%3A%22CURRENCY%2FUS%2F%2FGBPUSD%22%2C%22name%22%3A%22U.K.%20Poun"
        "d%20(GBP%2FUSD)%22%7D%2C%7B%22symbol%22%3A%22CURRENCY%2FUS%2F%2FUSDCHF%22%2C%22name%22%3A%22Sw"
        "iss%20Franc%20(USD%2FCHF)%22%7D%2C%7B%22symbol%22%3A%22CURRENCY%2FUS%2F%2FUSDCNY%22%2C%22name%2"
        "2%3A%22Chinese%20Yuan%20(USD%2FCNY)%22%7D%2C%7B%22symbol%22%3A%22CURRENCY%2FUS%2F%2FUSDCAD%22%2C"
        "%22name%22%3A%22Canadian%20%24%20(USD%2FCAD)%22%7D%2C%7B%22symbol%22%3A%22CURRENCY%2FUS%2F%2F"
        "USDMXN%22%2C%22name%22%3A%22Mexican%20Peso%20(USD%2FMXN)%22%7D%2C%7B%22symbol%22%3A%22CRYPTO"
        "CURRENCY%2FUS%2F%2FBTCUSD%22%2C%22name%22%3A%22Bitcoin%20(BTC%2FUSD)%22%7D%2C%7B%22symbol%22%3A"
        "%22INDEX%2FXX%2F%2FBUXX%22%2C%22name%22%3A%22WSJ%20Dollar%20Index%22%7D%2C%7B%22symbol%22%3A%2"
        "2INDEX%2FUS%2F%2FDXY%22%2C%22name%22%3A%22U.S.%20Dollar%20Index%22%7D%5D%7D&type=mdc_quotes",
        headers={"User-Agent": get_user_agent()},
    ).json()

    name, last_price, price_change, pct_change = [], [], [], []
    for entry in data["data"]["instruments"]:
        name.append(entry["formattedName"])
        last_price.append(entry["lastPrice"])
        price_change.append(entry["priceChange"])
        pct_change.append(entry["percentChange"])

    currencies = pd.DataFrame(
        {" ": name, "Last": last_price, "Chng": price_change, "%Chng": pct_change}
    )
    return currencies
Пример #7
0
def us_indices() -> pd.DataFrame:
    """Get the top US indices

    Returns
    -------
    indices: pd.DataFrame
        Dataframe containing name, price, net change and percent change
    """
    data = requests.get(
        "https://www.wsj.com/market-data/stocks?id=%7B%22application%22%3A%22WSJ%22%2C%22instruments%22%3A%5B%7B"
        "%22symbol%22%3A%22INDEX%2FUS%2F%2FDJIA%22%2C%22name%22%3A%22DJIA%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FUS%2F"
        "%2FCOMP%22%2C%22name%22%3A%22Nasdaq%20Composite%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FUS%2F%2FSPX%22%2C%22name"
        "%22%3A%22S%26P%20500%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FUS%2F%2FDWCF%22%2C%22name%22%3A%22DJ%20Total%20Stock"
        "%20Market%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FUS%2F%2FRUT%22%2C%22name%22%3A%22Russell%202000%22%7D%2C%7B"
        "%22symbol%22%3A%22INDEX%2FUS%2F%2FNYA%22%2C%22name%22%3A%22NYSE%20Composite%22%7D%2C%7B%22symbol%22%3A%22INDEX"
        "%2FUS%2F%2FB400%22%2C%22name%22%3A%22Barron%27s%20400%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FUS%2F%2FVIX%22%2C%22"
        "name%22%3A%22CBOE%20Volatility%22%7D%2C%7B%22symbol%22%3A%22FUTURE%2FUS%2F%2FDJIA%20FUTURES%22%2C%22name%22%3A%"
        "22DJIA%20Futures%22%7D%2C%7B%22symbol%22%3A%22FUTURE%2FUS%2F%2FS%26P%20500%20FUTURES%22%2C%22name%22%3A%22S%26P"
        "%20500%20Futures%22%7D%5D%7D&type=mdc_quotes",
        headers={"User-Agent": get_user_agent()},
    ).json()

    name, last_price, net_change, percent_change = [], [], [], []

    for entry in data["data"]["instruments"]:
        name.append(entry["formattedName"])
        last_price.append(entry["lastPrice"])
        net_change.append(entry["priceChange"])
        percent_change.append(entry["percentChange"])

    indices = pd.DataFrame(
        {" ": name, "Price": last_price, "Chg": net_change, "%Chg": percent_change}
    )

    return indices
Пример #8
0
def us_bonds() -> pd.DataFrame:
    """Scrape data for us bonds

    Returns
    -------
    bonds: pd.DataFrame
        Dataframe containing name, coupon rate, yield and change in yield
    """

    data = requests.get(
        "https://www.wsj.com/market-data?id=%7B%22application%22%3A%22WSJ%22%2C%22instruments%22%3A%5B"
        "%7B%22symbol%22%3A%22BOND%2FBX%2F%2FTMUBMUSD30Y%22%2C%22name%22%3A%2230-Year%20Bond%22%7D%2C%7"
        "B%22symbol%22%3A%22BOND%2FBX%2F%2FTMUBMUSD10Y%22%2C%22name%22%3A%2210-Year%20Note%22%7D%2C%7B%2"
        "2symbol%22%3A%22BOND%2FBX%2F%2FTMUBMUSD07Y%22%2C%22name%22%3A%227-Year%20Note%22%7D%2C%7B%22sym"
        "bol%22%3A%22BOND%2FBX%2F%2FTMUBMUSD05Y%22%2C%22name%22%3A%225-Year%20Note%22%7D%2C%7B%22symbol"
        "%22%3A%22BOND%2FBX%2F%2FTMUBMUSD03Y%22%2C%22name%22%3A%223-Year%20Note%22%7D%2C%7B%22symbol%22%"
        "3A%22BOND%2FBX%2F%2FTMUBMUSD02Y%22%2C%22name%22%3A%222-Year%20Note%22%7D%2C%7B%22symbol%22%3A%"
        "22BOND%2FBX%2F%2FTMUBMUSD01Y%22%2C%22name%22%3A%221-Year%20Bill%22%7D%2C%7B%22symbol%22%3A%22"
        "BOND%2FBX%2F%2FTMUBMUSD06M%22%2C%22name%22%3A%226-Month%20Bill%22%7D%2C%7B%22symbol%22%3A%22BON"
        "D%2FBX%2F%2FTMUBMUSD03M%22%2C%22name%22%3A%223-Month%20Bill%22%7D%2C%7B%22symbol%22%3A%22BOND%"
        "2FBX%2F%2FTMUBMUSD01M%22%2C%22name%22%3A%221-Month%20Bill%22%7D%5D%7D&type=mdc_quotes",
        headers={"User-Agent": get_user_agent()},
    ).json()
    name, yield_pct, rate, yld_chng = [], [], [], []

    for entry in data["data"]["instruments"]:
        name.append(entry["formattedName"])
        yield_pct.append(entry["bond"]["yield"])
        rate.append(entry["bond"]["couponRate"])
        yld_chng.append(entry["bond"]["yieldChange"])

    bonds = pd.DataFrame(
        {" ": name, "Rate (%)": rate, "Yld (%)": yield_pct, "Yld Chg (%)": yld_chng}
    )
    return bonds
Пример #9
0
def top_commodities() -> pd.DataFrame:
    """Scrape data for top commodities

    Returns
    -------
    commodities: pd.DataFrame
        Dataframe containing name, price, net change and percent change
    """
    data = requests.get(
        "https://www.wsj.com/market-data/commodities?id=%7B%22application%22%3A%22WSJ%22%2C%22instruments%22%3A%5B%7"
        "B%22symbol%22%3A%22FUTURE%2FUS%2F%2FCRUDE%20OIL%20-%20ELECTRONIC%22%2C%22name%22%3A%22Crude%20Oil%20Futures"
        "%22%7D%2C%7B%22symbol%22%3A%22FUTURE%2FUK%2F%2FBRENT%20CRUDE%22%2C%22name%22%3A%22Brent%20Crude%20Futures%22"
        "%7D%2C%7B%22symbol%22%3A%22FUTURE%2FUS%2F%2FGOLD%22%2C%22name%22%3A%22Gold%20Futures%22%7D%2C%7B%22symbol%22%"
        "3A%22FUTURE%2FUS%2F%2FSILVER%22%2C%22name%22%3A%22Silver%20Futures%22%7D%2C%7B%22symbol%22%3A%22FUTURE%2FUS%2F"
        "%2FNATURAL%20GAS%22%2C%22name%22%3A%22Natural%20Gas%20Futures%22%7D%2C%7B%22symbol%22%3A%22FUTURE%2FUS%2F%2"
        "FUNLEADED%20GASOLINE%22%2C%22name%22%3A%22Unleaded%20Gasoline%20Futures%22%7D%2C%7B%22symbol%22%3A%22FUTURE%"
        "2FUS%2F%2FCOPPER%22%2C%22name%22%3A%22Copper%20Futures%22%7D%2C%7B%22symbol%22%3A%22FUTURE%2FUS%2F%2FCORN%22%2"
        "C%22name%22%3A%22Corn%20Futures%22%7D%2C%7B%22symbol%22%3A%22FUTURE%2FUS%2F%2FWHEAT%22%2C%22name%22%3A%22Wheat"
        "%20Futures%22%7D%2C%7B%22symbol%22%3A%22INDEX%2FXX%2F%2FBCOM%22%7D%5D%7D&type=mdc_quotes",
        headers={"User-Agent": get_user_agent()},
    ).json()
    name, last_price, net_change, percent_change = [], [], [], []

    for entry in data["data"]["instruments"]:
        name.append(entry["formattedName"])
        last_price.append(entry["lastPrice"])
        net_change.append(entry["priceChange"])
        percent_change.append(entry["percentChange"])

    commodities = pd.DataFrame(
        {" ": name, "Price": last_price, "Chg": net_change, "%Chg": percent_change}
    )

    return commodities
def scrape_gecko_data(url: str) -> BeautifulSoup:
    """Helper method that scrape Coin Gecko site.

    Parameters
    ----------
    url : str
        coin gecko url to scrape e.g: "https://www.coingecko.com/en/discover"

    Returns
    -------
        BeautifulSoup object
    """
    headers = {"User-Agent": get_user_agent()}
    session = _retry_session("https://www.coingecko.com")
    try:
        req = session.get(url, headers=headers, timeout=5)
    except Exception as error:
        console.print(error)
        raise RetryError(
            "Connection error. Couldn't connect to CoinGecko and scrape the data. "
            "Please visit CoinGecko site, and check if it's not under maintenance"
        ) from error

    if req.status_code >= 400:
        raise Exception(
            f"Couldn't connect to {url}. Status code: {req.status_code}. Reason: {req.reason}"
        )

    return BeautifulSoup(req.text, features="lxml")
Пример #11
0
def _make_request(url: str) -> Optional[dict]:
    """Helper method handles dappradar api requests. [Source: https://dappradar.com/]

    Parameters
    ----------
    url: str
        endpoint url
    Returns
    -------
    dict:
        dictionary with response data
    """

    response = requests.get(
        url, headers={"Accept": "application/json", "User-Agent": get_user_agent()}
    )
    if not 200 <= response.status_code < 300:
        console.print(f"[red]dappradar api exception: {response.text}[/red]")
        return None
    try:
        return response.json()
    except Exception as e:  # noqa: F841
        logger.exception("Invalid Response: %s", str(e))
        console.print(f"[red]Invalid Response:: {response.text}[/red]")
        return None
Пример #12
0
def get_ark_orders() -> DataFrame:
    url_orders = "https://cathiesark.com/ark-funds-combined/trades"

    raw_page = requests.get(url_orders, headers={"User-Agent": get_user_agent()}).text

    parsed_script = BeautifulSoup(raw_page, "lxml").find(
        "script", {"id": "__NEXT_DATA__"}
    )

    parsed_json = json.loads(parsed_script.string)

    df_orders = pd.json_normalize(parsed_json["props"]["pageProps"]["arkTrades"])
    df_orders.drop(
        [
            "everything",
            "everything.profile.customThumbnail",
            "hidden",
            "images.thumbnail",
        ],
        axis=1,
        inplace=True,
    )

    df_orders["date"] = pd.to_datetime(df_orders["date"], format="%Y-%m-%dZ").dt.date

    return df_orders
def get_article_data(article_id: int) -> dict:
    """Returns an article

    Parameters
    ----------
    article_id : int
        Article ID

    Returns
    -------
    dict
        Article data
    """

    article_url = f"https://seekingalpha.com/api/v3/news/{article_id}"
    response = requests.get(article_url,
                            headers={"User-Agent": get_user_agent()})
    jdata = response.json()
    content = jdata["data"]["attributes"]["content"].replace(
        "</li>", "</li>\n")
    content = BeautifulSoup(content, features="html.parser").get_text()

    article = {
        "title": jdata["data"]["attributes"]["title"],
        "publishedAt": jdata["data"]["attributes"]["lastModified"],
        "url": "https://seekingalpha.com" + jdata["data"]["links"]["self"],
        "content": content,
    }

    return article
Пример #14
0
def get_futures() -> dict:
    """Get futures data. [Source: Finviz]

    Parameters
    ----------
    futures : dict
       Indices, Energy, Metals, Meats, Grains, Softs, Bonds, Currencies
    """
    source = requests.get(
        "https://finviz.com/futures.ashx", headers={"User-Agent": get_user_agent()}
    ).text

    slice_source = source[
        source.find("var groups = ") : source.find(
            "\r\n\r\n                    groups.forEach(function(group) "
        )
    ]
    groups = literal_eval(
        slice_source[
            : slice_source.find("\r\n                    var tiles = ") - 1
        ].strip("var groups = ")
    )
    titles = literal_eval(
        slice_source[
            slice_source.find("\r\n                    var tiles = ") : -1
        ].strip("\r\n                    var tiles = ")
    )

    d_futures: dict = {}
    for future in groups:
        d_futures[future["label"]] = []
        for ticker in future["contracts"]:
            d_futures[future["label"]].append(titles[ticker["ticker"]])

    return d_futures
Пример #15
0
def get_treasury_yield(interval: str, maturity: str) -> pd.DataFrame:
    """Get historical yield for a given maturity

    Parameters
    ----------
    interval : str
        Interval for data.  Can be "d","w","m" for daily, weekly or monthly
    maturity : str
        Maturity timeline.  Can be "3mo","5y","10y" or "30y"

    Returns
    -------
    pd.DataFrame
        Dataframe of historical yields
    """
    d_interval = {"d": "daily", "w": "weekly", "m": "monthly"}
    d_maturity = {
        "3m": "3month",
        "5y": "5year",
        "10y": "10year",
        "30y": "30year"
    }

    url = f"https://www.alphavantage.co/query?function=TREASURY_YIELD&interval={d_interval[interval]}&ma"
    url += f"turity={d_maturity[maturity]}&apikey={cfg.API_KEY_ALPHAVANTAGE}"
    r = requests.get(url, headers={"User-Agent": get_user_agent()})
    if r.status_code != 200:
        return pd.DataFrame()

    data = pd.DataFrame(r.json()["data"])
    data["date"] = pd.to_datetime(data["date"])
    data["Yield"] = data["value"].astype(float)
    data = data.drop(columns=["value"])

    return data
Пример #16
0
def get_news_html(news_type: str = "Top-News") -> dict:
    """Gets news. [Source: SeekingAlpha]

    Parameters
    ----------
    news_type : str
        From: Top-News, On-The-Move, Market-Pulse, Notable-Calls, Buybacks, Commodities, Crypto, Issuance, Global,
        Guidance, IPOs, SPACs, Politics, M-A, Consumer, Energy, Financials, Healthcare, MLPs, REITs, Technology

    Returns
    -------
    dict
        HTML page of articles
    """
    sa_url = (
        f"http://seekingalpha.com/api/v3/news?filter%5Bcategory%5D=market-news%3A%3A{news_type}"
        "&filter%5Bsince%5D=0&filter%5Buntil%5D=0&include=author%2CprimaryTickers%2CsecondaryTickers"
        "&isMounting=true&page%5Bsize%5D=25&page%5Bnumber%5D=1")

    articles_html = requests.get(sa_url,
                                 headers={
                                     "User-Agent": get_user_agent()
                                 }).json()

    return articles_html
def get_gdp_capita() -> pd.DataFrame:
    """Real GDP per Capita for United States

    Returns
    -------
    pd.DataFrame
        DataFrame of GDP per Capita
    """
    url = f"https://www.alphavantage.co/query?function=REAL_GDP_PER_CAPITA&apikey={cfg.API_KEY_ALPHAVANTAGE}"
    r = requests.get(url, headers={"User-Agent": get_user_agent()})
    if r.status_code != 200:
        return pd.DataFrame()

    payload = r.json()
    data = pd.DataFrame()
    # Successful requests
    if "data" in payload:
        if r.json()["data"]:
            data = pd.DataFrame(r.json()["data"])
            data["date"] = pd.to_datetime(data["date"])
            data["GDP"] = data["value"].astype(float)
            data = data.drop(columns=["value"])
        else:
            console.print("No data found.\n")
    # Invalid API Keys
    elif "Error Message" in payload:
        console.print(payload["Error Message"])
    # Premium feature, API plan is not authorized
    elif "Information" in payload:
        console.print(payload["Information"])

    return data
Пример #18
0
def get_etf_overview(etf_symbol: str) -> pd.DataFrame:
    """Get overview data for selected etf

    Parameters
    ----------
    etf_symbol : str
        Etf symbol to get overview for

    Returns
    ----------
    df : pd.DataFrame
        Dataframe of stock overview data
    """
    r = requests.get(
        f"https://stockanalysis.com/etf/{etf_symbol}",
        headers={"User-Agent": get_user_agent()},
    )
    soup = bs(r.text, "html.parser")  # %%
    tables = soup.findAll("table")
    texts = []
    for tab in tables[:2]:
        entries = tab.findAll("td")
        for ent in entries:
            texts.append(ent.get_text())

    var_cols = [0, 2, 4, 6, 8, 10, 12, 18, 20, 22, 26, 28, 30, 32]
    vals = [idx + 1 for idx in var_cols]
    columns = [texts[idx] for idx in var_cols]
    data = [texts[idx] for idx in vals]
    df = pd.DataFrame(data, index=columns, columns=[etf_symbol.upper()])
    return df
Пример #19
0
def _make_request(url: str) -> Union[BeautifulSoup, None]:
    """Helper method to scrap

    Parameters
    ----------
    url : str
        url to scrape

    Returns
    -------
        BeautifulSoup object
    """
    headers = {"User-Agent": get_user_agent()}
    session = _retry_session("https://www.coingecko.com")
    try:
        req = session.get(url, headers=headers, timeout=5)
    except Exception as error:
        logger.exception(str(error))
        console.print(error)
        raise RetryError(
            "Connection error. Couldn't connect to CoinGecko and scrape the data. "
            "Please visit CoinGecko site, and check if it's not under maintenance"
        ) from error

    if req.status_code == 404:
        return None

    if req.status_code >= 400:
        raise Exception(
            f"Couldn't connect to {url}. Status code: {req.status_code}. Reason: {req.reason}"
        )

    return BeautifulSoup(req.text, features="lxml")
Пример #20
0
def get_overall_exchange_withdrawal_fees() -> pd.DataFrame:
    """Scrapes exchange withdrawal fees
    [Source: https://withdrawalfees.com/]

    Parameters
    ----------

    Returns
    -------
    pandas.DataFrame:
        Exchange, Coins, Lowest, Average, Median, Highest
    """
    exchange_withdrawal_fees = BeautifulSoup(
        requests.get(
            "https://withdrawalfees.com/exchanges",
            headers={
                "User-Agent": get_user_agent()
            },
        ).text,
        "lxml",
    )
    table = exchange_withdrawal_fees.find_all("table")
    if table is None:
        return pd.DataFrame()
    df = pd.read_html(str(table))[0]
    df = df.fillna("")
    return df
Пример #21
0
def get_options_info(ticker: str) -> pd.DataFrame:
    """Scrape barchart for options info

    Parameters
    ----------
    ticker: str
        Stock ticker

    Returns
    -------
    df: pd.DataFrame
        Dataframe of information
    """
    page = f"https://www.barchart.com/stocks/quotes/{ticker}/overview"

    r = requests.get(page, headers={"User-Agent": get_user_agent()})
    soup = BeautifulSoup(r.text, "html.parser")
    tags = soup.find(
        "div",
        attrs={
            "class":
            "barchart-content-block symbol-fundamentals bc-cot-table-wrapper"
        },
    )
    data = tags.find_all("li")
    labels = []
    values = []
    for row in data:
        labels.append(row.find_all("span")[0].getText())
        values.append(row.find_all("span")[1].getText())

    df = pd.DataFrame(data=[labels, values]).T

    return df
Пример #22
0
def get_cpi(interval: str) -> pd.DataFrame:
    """Get Consumer Price Index from Alpha Vantage

    Parameters
    ----------
    interval : str
        Interval for data.  Either "m" or "s" for monthly or semiannual

    Returns
    -------
    pd.DataFrame
        Dataframe of CPI
    """
    s_interval = "semiannual" if interval == "s" else "monthly"
    url = f"https://www.alphavantage.co/query?function=CPI&interval={s_interval}&apikey={cfg.API_KEY_ALPHAVANTAGE}"
    r = requests.get(url, headers={"User-Agent": get_user_agent()})

    if r.status_code != 200:
        return pd.DataFrame()

    data = pd.DataFrame(r.json()["data"])
    data["date"] = pd.to_datetime(data["date"])
    data["CPI"] = data["value"].astype(float)
    data = data.drop(columns=["value"])

    return data
Пример #23
0
def global_bonds() -> pd.DataFrame:
    """Scrape data for global bonds

    Returns
    -------
    bonds: pd.DataFrame
        Dataframe containing name, coupon rate, yield and change in yield
    """
    data = requests.get(
        "https://www.wsj.com/market-data?id=%7B%22application%22%3A%22WSJ%22%2C%22bonds%22%3A%5"
        "B%7B%22symbol%22%3A%22TMUBMUSD10Y%22%2C%22name%22%3A%22U.S.%2010%20Year%22%7D%2C%7B%22symbol"
        "%22%3A%22TMBMKDE-10Y%22%2C%22name%22%3A%22Germany%2010%20Year%22%7D%2C%7B%22symbol%22%3A%22TMB"
        "MKGB-10Y%22%2C%22name%22%3A%22U.K.%2010%20Year%22%7D%2C%7B%22symbol%22%3A%22TMBMKJP-10Y%22%2C%"
        "22name%22%3A%22Japan%2010%20Year%22%7D%2C%7B%22symbol%22%3A%22TMBMKAU-10Y%22%2C%22name%22%3A%2"
        "2Australia%2010%20Year%22%7D%2C%7B%22symbol%22%3A%22AMBMKRM-10Y%22%2C%22name%22%3A%22China%2010"
        "%20Year%22%7D%5D%7D&type=mdc_governmentbonds",
        headers={
            "User-Agent": get_user_agent()
        },
    ).json()
    name, yield_pct, rate, yld_chng = [], [], [], []

    for entry in data["data"]["instruments"]:
        name.append(entry["djLegalName"])
        yield_pct.append(entry["yieldPercent"])
        rate.append(entry["couponPercent"])
        yld_chng.append(entry["yieldChange"])

    bonds = pd.DataFrame({
        " ": name,
        "Rate (%)": rate,
        "Yld (%)": yield_pct,
        "Yld Chg (%)": yld_chng
    })
    return bonds
Пример #24
0
def get_ark_orders() -> DataFrame:
    """Returns ARK orders in a Dataframe

    Returns
    -------
    DataFrame
        ARK orders data frame with the following columns:
        ticker, date, shares, weight, fund, direction
    """
    url_orders = "https://cathiesark.com/ark-funds-combined/trades"

    raw_page = requests.get(url_orders,
                            headers={
                                "User-Agent": get_user_agent()
                            }).text

    parsed_script = BeautifulSoup(raw_page,
                                  "lxml").find("script",
                                               {"id": "__NEXT_DATA__"})

    parsed_json = json.loads(parsed_script.string)

    df_orders = pd.json_normalize(
        parsed_json["props"]["pageProps"]["arkTrades"])

    if df_orders.empty:
        return pd.DataFrame()

    df_orders.drop(
        [
            "hidden",
            "images.thumbnail",
            "cusip",
            "estimated_price",
            "updated_at",
            "created_at",
            "region",
            "country",
            "isADR",
            "companyName",
            "clinicalTrialsSearchHandle",
            "wasSPACBuy",
            "currencyMultiplier",
            "useRapidAPI",
            "description",
            "quandlTicker",
            "customThumbnail",
            "custom_thumbnail",
            "id",
        ],
        axis=1,
        inplace=True,
    )

    df_orders["date"] = pd.to_datetime(df_orders["date"],
                                       format="%Y-%m-%d").dt.date

    return df_orders
Пример #25
0
def get_insider_activity(ticker: str) -> pd.DataFrame:
    """Get insider activity. [Source: Business Insider]

    Parameters
    ----------
    ticker : str
        Ticker to get insider activity data from

    Returns
    -------
    df_insider : pd.DataFrame
        Get insider activity data
    """
    url_market_business_insider = (
        f"https://markets.businessinsider.com/stocks/{ticker.lower()}-stock")
    text_soup_market_business_insider = BeautifulSoup(
        requests.get(url_market_business_insider,
                     headers={
                         "User-Agent": get_user_agent()
                     }).text,
        "lxml",
    )

    d_insider = dict()
    l_insider_vals = list()
    for idx, insider_val in enumerate(
            text_soup_market_business_insider.findAll(
                "td", {"class": "table__td text-center"})):
        l_insider_vals.append(insider_val.text.strip())

        # Add value to dictionary
        if (idx + 1) % 6 == 0:
            # Check if we are still parsing insider trading activity
            if "/" not in l_insider_vals[0]:
                break
            d_insider[(idx + 1) // 6] = l_insider_vals
            l_insider_vals = list()

    df_insider = pd.DataFrame.from_dict(
        d_insider,
        orient="index",
        columns=[
            "Date", "Shares Traded", "Shares Held", "Price", "Type", "Option"
        ],
    )

    df_insider["Date"] = pd.to_datetime(df_insider["Date"])
    df_insider = df_insider.set_index("Date")
    df_insider = df_insider.sort_index(ascending=True)

    l_names = list()
    for s_name in text_soup_market_business_insider.findAll(
            "a", {"onclick": "silentTrackPI()"}):
        l_names.append(s_name.text.strip())
    df_insider["Insider"] = l_names

    return df_insider
Пример #26
0
def get_sec_filings(ticker: str) -> pd.DataFrame:
    """Get SEC filings for a given stock ticker. [Source: Market Watch]

    Parameters
    ----------
    ticker : str
        Stock ticker

    Returns
    -------
    df_financials : pd.DataFrame
        SEC filings data
    """

    pd.set_option("display.max_colwidth", None)

    url_financials = (
        f"https://www.marketwatch.com/investing/stock/{ticker}/financials/secfilings"
    )

    text_soup_financials = BeautifulSoup(
        requests.get(url_financials, headers={
            "User-Agent": get_user_agent()
        }).text,
        "lxml",
    )

    # a_financials_header = list()
    df_financials = None
    b_ready_to_process_info = False
    soup_financials = text_soup_financials.findAll("tr",
                                                   {"class": "table__row"})
    for financials_info in soup_financials:
        a_financials = financials_info.text.split("\n")

        # If header has been processed and dataframe created ready to populate the SEC information
        if b_ready_to_process_info:
            l_financials_info = [a_financials[2]]
            l_financials_info.extend(a_financials[5:-1])
            l_financials_info.append(financials_info.a["href"])
            # Append data values to financials
            df_financials.loc[len(
                df_financials.index)] = l_financials_info  # type: ignore

        if "Filing Date" in a_financials:
            l_financials_header = [a_financials[2]]
            l_financials_header.extend(a_financials[5:-1])
            l_financials_header.append("Link")

            df_financials = pd.DataFrame(columns=l_financials_header)
            df_financials.set_index("Filing Date")
            b_ready_to_process_info = True

    # Set Filing Date as index
    df_financials = df_financials.set_index("Filing Date")  # type: ignore

    return df_financials
Пример #27
0
def etf_movers(sort_type: str = "gainers") -> pd.DataFrame:
    """
    Scrape data for top etf movers.
    Parameters
    ----------
    sort_type: str
        Data to get.  Can be "gainers", "decliners" or "active"

    Returns
    -------
    etfmovers: pd.DataFrame
        Datafame containing the name, price, change and the volume of the etf
    """

    if sort_type == "gainers":
        url = (
            "https://www.wsj.com/market-data/mutualfunds-etfs/etfmovers?id=%7B%22application"
            "%22%3A%22WSJ%22%2C%22etfMover%22%3A%22leaders%22%2C%22count%22%3A25%7D&type=mdc_etfmovers"
        )
    elif sort_type == "decliners":
        url = (
            "https://www.wsj.com/market-data/mutualfunds-etfs/etfmovers?id=%7B%22application"
            "%22%3A%22WSJ%22%2C%22etfMover%22%3A%22laggards%22%2C%22count%22%3A25%7D&type=mdc_etfmovers"
        )
    elif sort_type == "active":
        url = (
            "https://www.wsj.com/market-data/mutualfunds-etfs/etfmovers?id=%7B%22application"
            "%22%3A%22WSJ%22%2C%22etfMover%22%3A%22most_active%22%2C%22count%22%3A25%7D&type=mdc_etfmovers"
        )
    else:
        url = ""

    if url:
        data = requests.get(url, headers={
            "User-Agent": get_user_agent()
        }).json()
        name, last_price, net_change, percent_change, volume = [], [], [], [], []

        for entry in data["data"]["instruments"]:
            name.append(entry["name"])
            last_price.append(entry["lastPrice"])
            net_change.append(entry["priceChange"])
            percent_change.append(entry["percentChange"])
            volume.append(entry["formattedVolume"])

        etfmovers = pd.DataFrame({
            " ": name,
            "Price": last_price,
            "Chg": net_change,
            "%Chg": percent_change,
            "Vol": volume,
        })

        return etfmovers

    return pd.DataFrame()
def get_today_hot_penny_stocks() -> DataFrame:
    """Returns today hot penny stocks

    Returns
    -------
    DataFrame
        Today hot penny stocks DataFrame with the following columns:
        Ticker, Price, Change, $ Volume, Volume, # Trades
    """
    url_penny_stock_stocks = "https://www.pennystockflow.com"

    text_soup_penny_stock_stocks = BeautifulSoup(
        requests.get(url_penny_stock_stocks,
                     headers={
                         "User-Agent": get_user_agent()
                     }).text,
        "lxml",
    )

    a_penny_stock_header = []
    for penny_stock_header in text_soup_penny_stock_stocks.findAll(
            "td", {"class": "tblhdr"}):
        a_penny_stock_header.append(penny_stock_header.text)

    df_penny = pd.DataFrame(columns=a_penny_stock_header)

    first_penny = []
    for idx, penny_stock_header in enumerate(
            text_soup_penny_stock_stocks.findAll("a")):
        if idx == 0:
            continue
        if idx > 1:
            break
        first_penny.append(penny_stock_header.text)

    for idx, first_penny_stock in enumerate(
            text_soup_penny_stock_stocks.findAll("td", {"align": "right"})):
        first_penny.append(first_penny_stock.text)
        if idx > 3:
            break

    df_penny.loc[0] = first_penny

    a_penny_stock = []
    penny_idx = 1
    for idx, penny_stock in enumerate(
            text_soup_penny_stock_stocks.findAll("td", {"class": "tdi"})):
        a_penny_stock.append(penny_stock.text)

        if (idx + 1) % 6 == 0:
            df_penny.loc[penny_idx] = a_penny_stock
            penny_idx += 1
            a_penny_stock = []

    return df_penny
Пример #29
0
    def get_metadata(self, author: str) -> dict:
        """Loads metadata for a given author

        Parameters
        ----------
        author : str
            Author key - Marcus_Aurelius, Epictetus, Seneca, Marcus_Tullius_Cicero, Aristotle, Plato, Pythagoras

        Returns
        -------
        dict
            Metadata dictionary that includes number of quotes, number of pages and first 30 quotes
        """
        quotes_page = BeautifulSoup(
            requests.get(
                self.urls[author],
                headers={
                    "User-Agent": get_user_agent()
                },
            ).text,
            "lxml",
        )

        find_navigation = quotes_page.find("em", {
            "class": "current"
        }).find_parent("div")

        page_count = []
        for a_page_ref in find_navigation.find_all("a", href=True):
            page_count.append(a_page_ref.text.strip("\n"))

        ret = {}
        ret["pages"] = page_count[-2]

        find_count = quotes_page.find(string=re.compile("Showing 1-30 of"))

        quote_count = re.search(r"Showing 1-30 of (?P<number>[\d,]+)",
                                find_count)

        if quote_count:
            ret["quoutes"] = quote_count.group("number")

        all_quotes = quotes_page.find_all("div", {"class": "quote"})

        ret["quotes"] = []

        for a_quote in all_quotes:
            parsed_quote = {}

            parsed_quote = a_quote.find("div", {"class": "quoteText"}).text

            ret["quotes"].append(parsed_quote)

        return ret
def get_yield_curve_year(year: str) -> DataFrame:
    """Returns a US Treasury Yield Curve for a given year

    Parameters
    ----------
    year : str
        Yield curve year

    Returns
    -------
    DataFrame
        US Treasury Yield Curve data frame with the following columns
        Date,1 mo,2 mo,3 mo,6 mo,1 yr,2 yr,3 yr,5 yr,7 yr,10 yr,20 yr,30 yr
    """

    # pylint: disable=line-too-long
    yield_curve_url = "https://www.treasury.gov/resource-center/data-chart-center/interest-rates/Pages/TextView.aspx?data=yieldYear&year={}"  # noqa: E501
    text_soup_yield_curve = BeautifulSoup(
        requests.get(
            yield_curve_url.format(year),
            headers={
                "User-Agent": get_user_agent()
            },
        ).text,
        "lxml",
    )

    yield_table = text_soup_yield_curve.find_all("table",
                                                 {"class": "t-chart"})[0]

    a_yield_table_header = []
    for yield_table_header_col in yield_table.find("tr", {
            "class": None
    }).find_all("th"):
        a_yield_table_header.append(yield_table_header_col.text)

    df_yield_curve = pd.DataFrame(columns=a_yield_table_header)

    for yield_table_row in yield_table.find_all(
            "tr", {"class": ["oddrow", "evenrow"]}):
        a_yield_row = []
        for idx, yield_table_col in enumerate(yield_table_row.find_all("td")):
            if idx == 0:
                a_yield_row.append(
                    datetime.strptime(yield_table_col.text, "%m/%d/%y"))
            else:
                a_yield_row.append(float(yield_table_col.text))  # type: ignore

        df_yield_curve.loc[len(df_yield_curve)] = a_yield_row

    df_yield_curve = df_yield_curve.set_index(["Date"])

    return df_yield_curve