def get_morningstar_overall_rating(self, fund_symbol):
        """
        Gets the overall Morningstar rating
        Process:
            1. Use lxml to find the corresponding performanceId for the fund, located in head > meta name = performanceId
            2. Then, hit the security identifier API of morningstar with that id, which will return in a field the number of stars'

        Ex:
            FSDAX's performanceId is 0P00002PPP
        """
        performanceId = self.extract_performance_id(fund_symbol)

        response = {}
        url = Util.build_url(Section.OVERALL_RATING, fund_symbol, 0,
                             performanceId)
        raw = requests.get(url)
        if raw.status_code == 200 and raw.text != "":
            print("200 and not empty")
            data = raw.json()
            if "starRating" in data:
                response["starRating"] = data["starRating"]
            else:
                raise FundException.UIChangedError(
                    f"Error while retrieving data for trailing returns: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                )
        else:
            raise FundException.SymbolDoesNotExistError(
                f"Error while retrieving data for trailing returns: Symbol does not exist: {fund_symbol}"
            )

        return response
Пример #2
0
    def get_trailing_returns(self, fund_symbol):
        # Build a dictionary, where key = time period, value = trailing return for that time period.
        timespans = [
            "1-Month", "3-Month", "6-Month", "YTD", "1-Year", "3-Year",
            "5-Year", "10-Year", "15-Year"
        ]
        response = {}
        url = Util.build_url(Section.TRAILING, fund_symbol)
        raw = requests.get(url)
        if raw.status_code == 200 and raw.text != "":
            print("200 and not empty")
            soup = BeautifulSoup(raw.text, 'html.parser')

            # Find corresponding column values of trailing returns. These will be the values of the dict
            table = soup.find("table")

            if table is not None:
                rows = table.findAll(lambda tag: tag.name == 'tr')
                for row in rows:
                    row_header = row.find("th")
                    if row_header != None and row_header.text == fund_symbol:
                        quarterly_returns = [
                            col.text for col in row.findAll("td")
                        ]
                        response = dict(zip(timespans, quarterly_returns))
            else:
                raise FundException.UIChangedError(
                    f"Error while retrieving data for trailing returns: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                )
        else:
            raise FundException.SymbolDoesNotExistError(
                f"Error while retrieving data for trailing returns: Symbol does not exist: {fund_symbol}"
            )

        return response
Пример #3
0
 def get_fund_historical_returns(self, fund_symbol):
     url = Util.build_url(Section.HISTORICAL, fund_symbol)
     raw = requests.get(url)
     if raw.status_code == 200 and raw.text != "":
         return self.retrieve_historical_returns(fund_symbol, url, raw)
     else:
         raise FundException.SymbolDoesNotExistError(
             f"Error while retrieving data for historical returns: Symbol does not exist: {fund_symbol}"
         )
Пример #4
0
    def get_section_data(self, section, fund_symbol):
        response = {}

        url = ""
        if section == Section.OVERALL_RATING:
            performanceId = self.extract_performance_id(fund_symbol)
            url = Util.build_url(section, fund_symbol, 0, performanceId)
        else:
            url = Util.build_url(section, fund_symbol)

        raw = requests.get(url)
        if raw.status_code == 200 and raw.text != "":
            print("200 and not empty")
            soup = BeautifulSoup(raw.text, 'html.parser')
            return self.extract_column_data(section, soup, raw)

        else:
            raise FundException.SymbolDoesNotExistError(
                f"Error while retrieving data for General stats, section {section}: Symbol does not exist: {fund_symbol}"
            )
Пример #5
0
    def get_holdings_stats(self, fund_symbol):
        """
        Gets the top 25 companies in their portfolio, as well as the following stats:
            1. Name
            2. % portfolio weight
            3. YTD return
            4. Shares owned
            5. Shares changed
            6. P/E
            7. Price
            8. G/L % (gain/loss percent for the day)

        First get the first 25 most weighted companies from portfolio (desc)
        For each:
            1. Equity view tab
                -Name
                -% portfolio weight
                -Shares owned
                -Shares changed
                -YTD return (could be positive, negative, float, or blank (-) )
                -P/E (could be positive, negative, float, or blank (-) )
            2. Equity prices tab
                -Price
                -G/L % (gain/loss percent)

        Each tab is represented as a table
            -equity view tab:       id = equity_holding_tab
                -get <tbody> with id holding_epage0
            -equity prices tab:     id = equityPrice_holding_tab

        Comparisons between 2+ mutual funds will compare Name and % portfolio weight only
        """

        fund_symbol = fund_symbol.upper()
        response = {}

        try:
            Util.validate_format(fund_symbol)
            url = Util.build_url(Section.HOLDINGS_PAGE_TOP_25, fund_symbol)
            response = self.extractHoldings(url, fund_symbol)

        except FundException.ImproperSymbolFormatError as e:
            raise FundException.ImproperSymbolFormatError(e)
        except FundException.SymbolDoesNotExistError as e:
            raise FundException.SymbolDoesNotExistError(e)
        except FundException.UIChangedError as e:
            raise FundException.UIChangedError(e)
        except FundException.SourceEndpointChangedError as e:
            raise FundException.SourceEndpointChangedError(e)

        return response
    def extract_performance_id(self, fund_symbol):
        """
        Extract id from page, so get_morningstar_overall_rating() can build a url that can get the actual star rating
        """
        url = Util.build_url(Section.QUOTES_PAGE, fund_symbol)
        raw = requests.get(url)
        if raw.status_code == 200 and raw.text != "":
            #Build lxml tree from webpage
            tree = html.fromstring(raw.content)

            #Find the meta tag that says "performanceId", and extract the content field
            tags = tree.xpath('.//meta[@name="performanceId"]')
            tag = tags[0]
            return tag.get("content")
    def get_capture_ratios(self, fund_symbol, timespan):
        """
        Gets upside and downside capture ratios for 1 year, 3 year, 5 year, 10 year, 15 year
        """
        # Build a dictionary, where key = time period, value = trailing return for that time period.
        timespans = ["3-Year", "5-Year", "10-Year", "15-Year"]
        upsidedownside_fields = ["Upside ratio", "Downside ratio"]
        fields = [
            "Standard Deviation", "Return", "Sharpe Ratio", "Sortino Ratio"
        ]
        response = {}

        url = Util.build_url(Section.CAPTURE_RATIOS, fund_symbol)
        raw = requests.get(url)
        if raw.status_code == 200 and raw.text != "":
            print("200 and not empty")
            soup = BeautifulSoup(raw.text, 'html.parser')

            # Find corresponding column values of trailing risk stats. These will be the values of the dict
            table = soup.find("table")
            if table is not None:
                rows = table.findAll(lambda tag: tag.name == 'tr')
                for row in rows:
                    row_header = row.find("th")
                    if row_header != None and row_header.text == fund_symbol:
                        stats = []
                        for col in row.findAll("td"):
                            #Values are stuck together. Ex: Convert "145.9576.71" --> "145.95", "76.71"
                            raw = col.text
                            first_dot = raw.find(".")
                            upside_ratio = raw[:first_dot + 3]
                            downside_ratio = raw[first_dot + 3:]
                            stats.append({
                                "upside_ratio": upside_ratio,
                                "downside_ratio": downside_ratio
                            })

                        del stats[
                            0]  #Delete 1-Year for consistency, since other stats only have 3year, 5year, 10year, 15year
                        response = dict(zip(timespans, stats))
            else:
                raise FundException.UIChangedError(
                    f"Error while retrieving data for risk capture ratios: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                )
        else:
            raise FundException.SymbolDoesNotExistError(
                f"Error while retrieving data for risk capture ratios: Symbol does not exist: {fund_symbol}"
            )

        return response
    def get_asset_allocation_data(self, fund_symbol):
        """
        Gets the asset allocation data necessary for the pie chart
        Mimics Morningstar's asset allocation pie chart on the quotes page
        Note: On morningstar, there are 2 possible layouts:
            1. Pie chart:
                -7 rows in response (1 blank, 6 with 2 columns each: field name and value)
                -ex: PRHSX
            2. Table:
                -8 rows in response (2 irrelvant, 6 with 4 columns each: field name, net, short, long)
                -We'll only use field name and net, to match consistency with pie chart scenario
                -Contains the phrase "Note: Contains derivatives or short positions"
                -ex: FSDAX
        """
        # Build a dictionary, where key = time period, value = trailing return for that time period.
        response = {}
        url = Util.build_url(Section.ASSET_ALLOCATION, fund_symbol)
        raw = requests.get(url)
        if raw.status_code == 200 and raw.text != "":
            print("200 and not empty")
            soup = BeautifulSoup(raw.text, 'html.parser')

            fields = [
                "Cash", "US Stock", "US Stocks", "Non US Stock",
                "Non US Stocks", "Bond", "Bonds", "Other"
            ]
            table = soup.find("table")
            if table is not None:
                rows = table.findAll(lambda tag: tag.name == 'tr')
                for row in rows:
                    rowData = [
                        col.text for col in row.findAll("td") if col.text != ""
                    ]
                    if len(rowData) > 0:
                        fieldEntry = rowData[0]
                        if fieldEntry in fields:
                            response[fieldEntry] = rowData[1]
            else:
                raise FundException.UIChangedError(
                    f"Error while retrieving data for trailing returns: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                )
        else:
            raise FundException.SymbolDoesNotExistError(
                f"Error while retrieving data for trailing returns: Symbol does not exist: {fund_symbol}"
            )

        return response
Пример #9
0
    def get_10000_growth(self, fund_symbol):
        response = {}
        url = Util.build_url(Section.GROWTH, fund_symbol)
        raw = requests.get(url)

        if raw.status_code == 200 and raw.text != "":
            raw_json = {}
            try:
                raw_json = raw.json()
            except Exception as e:
                raise FundException.SymbolDoesNotExistError(
                    f"Error while retrieving data for $10000 growth: Symbol does not exist: {fund_symbol}"
                )

            #Interpret HTML using BeautifulSoup, then extract out data in JSON from <div data_mod_config = ...., class = mod-ui-chart--dynamic>
            html = raw_json["html"]
            soup = BeautifulSoup(html, 'html.parser')
            response = {}
            data_mod_config_div = soup.find(
                "div", {"class": "mod-ui-chart--dynamic"})["data-mod-config"]
            if data_mod_config_div != "":
                #Convert dictionary in string form to an actual dictionary
                growth_json = ast.literal_eval(data_mod_config_div)
                internal_data = growth_json["data"]
                if len(internal_data) >= 1:
                    #Access first element in the dict, which is the list of values
                    growths = next(iter(internal_data.values()))

                    #Parse into a dict where key = date (YYYY-MM-DD, removing the "T00:00:00" from the end), value = expected dollar value that year
                    response = {
                        year["date"][:len(year["date"]) - 9]: year["value"]
                        for year in growths
                    }
            else:
                raise FundException.UIChangedError(
                    f"Error while retrieving data for $10000 growth: UI changed for symbol name: {fund_symbol}; thus, we cannot scrape"
                )
        else:
            raise FundException.SymbolDoesNotExistError(
                f"Error while retrieving data for $10000 growth: Symbol does not exist: {fund_symbol}"
            )

        return response
    def get_mpt_stats(self, fund_symbol, timespan):
        """
        Retrieves alpha, beta, R-squared, Treynor ratio
        """
        # Build a dictionary, where key = time period, value = trailing return for that time period.
        timespans = ["3-Year", "5-Year", "10-Year", "15-Year"]
        fields = [
            "Category Index", "R-Squared", "Beta", "Alpha", "Treynor Ratio",
            "Currency"
        ]
        response = {}

        for timespan in timespans:
            year = timespan.split("-")[0]
            url = Util.build_url(Section.RISK_MPT, fund_symbol, year)
            raw = requests.get(url)
            if raw.status_code == 200 and raw.text != "":
                print("200 and not empty")
                soup = BeautifulSoup(raw.text, 'html.parser')

                # Find corresponding column values of trailing risk stats. These will be the values of the dict
                dataNotFoundYet = True
                table = soup.find("table")
                if table is not None:
                    rows = table.findAll(lambda tag: tag.name == 'tr')
                    for row in rows:
                        row_header = row.find("th")
                        if dataNotFoundYet and row_header != None and row_header.text == fund_symbol:
                            dataNotFoundYet = False
                            stats = [
                                col.text.strip() for col in row.findAll("td")
                            ]
                            response[timespan] = dict(zip(fields, stats))
                else:
                    raise FundException.UIChangedError(
                        f"Error while retrieving data for risk mpt: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                    )
            else:
                raise FundException.SymbolDoesNotExistError(
                    f"Error while retrieving data for risk mpt: Symbol does not exist: {fund_symbol}"
                )

        return response
Пример #11
0
    def get_mpt_and_volatility_data(self, fund_symbol, timespan):
        """
        For a given timespan, gets ALL the MPT + Volatility data
        Builds a dictionary, where key = time period, value = dict containing all the stats for that year
        """

        timespan_dict = {}
        year = timespan.split("-")[0]
        sections = [Section.RISK_MPT, Section.RISK_VOLATILITY]

        for section in sections:
            section_dict = {}
            url = Util.build_url(section, fund_symbol, year)
            raw = requests.get(url)
            if raw.status_code == 200 and raw.text != "":
                print("200 and not empty")
                soup = BeautifulSoup(raw.text, 'html.parser')

                # Find corresponding column values of trailing risk stats. These will be the values of the dict
                dataNotFoundYet = True
                table = soup.find("table")
                if table is not None:
                    rows = table.findAll(lambda tag: tag.name == 'tr')
                    for row in rows:
                        row_header = row.find("th")
                        if dataNotFoundYet and row_header != None and row_header.text == fund_symbol:
                            dataNotFoundYet = False
                            section_dict = self.extract_column_data(
                                row, section)

                            #Accumulate key-value pairs of section_dict into timespan_dict
                            timespan_dict = {**timespan_dict, **section_dict}
                else:
                    raise FundException.UIChangedError(
                        f"Error while retrieving data for risk mpt: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                    )
            else:
                raise FundException.SymbolDoesNotExistError(
                    f"Error while retrieving data for risk mpt: Symbol does not exist: {fund_symbol}"
                )

        return timespan_dict
    def get_volatility_stats(self, fund_symbol, timespan):
        """
        Retrieves standard deviation, return, sharpe ratio, sortino ratio
        """
        # Build a dictionary, where key = time period, value = trailing return for that time period.
        timespans = ["3-Year", "5-Year", "10-Year", "15-Year"]
        fields = [
            "Standard Deviation", "Return", "Sharpe Ratio", "Sortino Ratio"
        ]
        response = {}

        for timespan in timespans:
            year = timespan.split("-")[0]
            url = Util.build_url(Section.RISK_VOLATILITY, fund_symbol, year)
            raw = requests.get(url)
            if raw.status_code == 200 and raw.text != "":
                print("200 and not empty")
                soup = BeautifulSoup(raw.text, 'html.parser')

                # Find corresponding column values of trailing risk stats. These will be the values of the dict
                table = soup.find("table")
                if table is not None:
                    rows = table.findAll(lambda tag: tag.name == 'tr')
                    for row in rows:
                        row_header = row.find("th")
                        if row_header != None and row_header.text == fund_symbol:
                            stats = [
                                col.text.strip() for col in row.findAll("td")
                            ]
                            del stats[len(stats) -
                                      1]  #Remove unnecessary values
                            response[timespan] = dict(zip(fields, stats))
                else:
                    raise FundException.UIChangedError(
                        f"Error while retrieving data for risk volatility statistics: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                    )
            else:
                raise FundException.SymbolDoesNotExistError(
                    f"Error while retrieving data for risk volatility statistics: Symbol does not exist: {fund_symbol}"
                )

        return response
    def get_risk_return_vs_category(self, fund_symbol):
        """
        Gets the:
            1. overall risk compared to its category, as judged by Morningstar
            2. overall return compared to its category, as judged by Morningstar
        Found on quotes page
        """
        response = {}
        url = Util.build_url(Section.RISK_RETURN_VS_CATEGORY, fund_symbol)
        raw = requests.get(url)
        if raw.status_code == 200 and raw.text != "":
            print("200 and not empty")
            soup = BeautifulSoup(raw.text, 'html.parser')

            fields = ["Risk vs.Category", "Return vs.Category"]
            table = soup.find("table")
            if table is not None:
                rows = table.findAll(lambda tag: tag.name == 'tr')
                for row in rows:
                    rowData = [
                        col.text.strip() for col in row.findAll("td")
                        if col.text.strip() != ""
                    ]
                    if len(rowData) > 0:
                        fieldEntry = rowData[0]
                        for field in fields:
                            if fieldEntry.find(field) != -1:
                                response[field] = rowData[1]
            else:
                raise FundException.UIChangedError(
                    f"Error while retrieving data for trailing returns: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                )
        else:
            raise FundException.SymbolDoesNotExistError(
                f"Error while retrieving data for trailing returns: Symbol does not exist: {fund_symbol}"
            )

        return response
    def get_general_details(self, fund_symbol):
        """
        Gets the following:
            1. Price/ NAV
            2. Minimum investment
            3. Expense ratio (in percentage, ex: .77%)
            4. Turnover ratio (in percentage, ex: .77%)
            5. Morningstar Category
        """
        # Build a dictionary, where key = time period, value = trailing return for that time period.
        response = {}
        url = Util.build_url(Section.GENERAL_STATS, fund_symbol)
        raw = requests.get(url)
        if raw.status_code == 200 and raw.text != "":
            print("200 and not empty")
            soup = BeautifulSoup(raw.text, 'html.parser')

            keys = [
                "NAV", "MinInvestment", "ExpenseRatio", "Turnover",
                "MorningstarCategory"
            ]
            for key in keys:
                spans = soup.findAll("span", attrs={"vkey": key})
                if len(spans) > 0:
                    span = spans[0]
                    span_text = span.text
                    response[key] = span_text.strip()
                else:
                    raise FundException.UIChangedError(
                        f"Error while retrieving data for trailing returns: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                    )
        else:
            raise FundException.SymbolDoesNotExistError(
                f"Error while retrieving data for trailing returns: Symbol does not exist: {fund_symbol}"
            )

        return response
    def get_holdings_stats(self, fund_symbol):
        """
        Gets the top 25 companies in their portfolio, as well as the following stats:
            1. Name
            2. % portfolio weight
            3. YTD return

        First get the first 25 most weighted companies from portfolio (desc), then bottom 25 (asc)
        For each:
            1. Equity view tab
                -Name
                -% portfolio weight
                -Shares owned
                -Shares changed
                -YTD return (could be positive, negative, float, or blank (-) )
                -P/E (could be positive, negative, float, or blank (-) )
            2. Equity prices tab
                -Price
                -G/L % (gain/loss percent)

        Each tab is represented as a table
            -equity view tab:       id = equity_holding_tab
                -get <tbody> with id holding_epage0
            -equity prices tab:     id = equityPrice_holding_tab

        Comparisons between 2+ mutual funds will compare Name and % portfolio weight only
        """
        section = Section.HOLDINGS_PAGE_BOTTOM_25
        url = Util.build_url(section, fund_symbol)

        raw = requests.get(url)
        raw_data = raw.json()
        data = raw_data["htmlStr"]

        data = data.strip()
        data = data.replace("\n", "")
        data = data.replace("\t", "")

        soup = BeautifulSoup(data, 'html.parser')

        # Equity view tab
        table = soup.find("table", id="equity_holding_tab")
        if table is not None:
            tbody = table.find('tbody')
            rows = table.findAll(lambda tag: tag.name == 'tr')
            for row in rows:
                #Extract stock name
                row_header = row.find("th")
                if row_header is not None:
                    stock_name = row_header.text

                    #Extract details for that stock
                    stats = [
                        col.text.strip() for col in row.findAll("td")
                        if col.text.strip() != ""
                    ]
                    if len(stats) > 1:
                        #Delete values in positions 2,3,4,5, as they don't pertain with what we want to retain
                        del stats[2:5]

                        fields = [
                            "% portfolio weight", "Shares Owned", "Country",
                            "YTD Return", "P/E ratio"
                        ]
                        response[stock_name] = dict(zip(fields, stats))

        # Equity prices tab
        table = soup.find("table", id="equityPrice_holding_tab")
        if table is not None:
            tbody = table.find('tbody')
            rows = table.findAll(lambda tag: tag.name == 'tr')
            for row in rows:
                #Extract stock name
                row_header = row.find("th")
                if row_header is not None:
                    stock_name = row_header.text

                    #Extract details for that stock
                    stats = [
                        col.text.strip() for col in row.findAll("td")
                        if col.text.strip() != ""
                    ]
                    if len(stats) > 1:
                        print(stats)
                        #Only retain values in positions 2,3,4 (Currency, price, Gain/loss %)
                        stats = stats[2:5]

                        print("stats after: ", stats)

                        fields = ["Currency", "Price", "Gain/Loss %"]
                        response[stock_name] = dict(zip(fields, stats))

        response["data"] = data
        return response