示例#1
0
def get_stock_summary(stockSoup):
    """
    Gets the stock summary information from the company summary page including Name, Price<br>, Market Cap, Price Earnings Ratio, Price Change, Ticker, Earnings per Share, Net Tangible Assets, Net DPS, Gross DPS, Beta Value, Price/NTA, Net Yield, Gross Yield, Sharpe Ratio

    Args:
        stockSoup (BeautifulSoup): The parsed page source of the summary page

    Returns:
        summaryDict (Dict): A dictionary which contains all the information captured on this page
    """

    summaryDict = {}
    summaryDict["Name"] = (stockSoup.find('h1').text).split(' -')[0]
    summaryDict["Price"] = float(
        stockSoup.find(
            'td',
            text='Market Price').find_next_sibling('td').text.split('$')[1])
    summaryDict["Market Cap"] = float(
        stockSoup.find('td', text='Marketcap').find_next_sibling(
            'td').text.split('$')[1].replace(',', ''))
    try:
        summaryDict["Price Change"] = float(
            stockSoup.find(
                'td', text='Price Change').find_next('td').text.split('$')[1])
    except IndexError:
        summaryDict["Price Change"] = float(0)
    summaryDict["Ticker"] = stockSoup.find(
        'td', text='Ticker').find_next_sibling('td').text

    logger.debug(summaryDict)
    return summaryDict
示例#2
0
def get_stock_historical_prices(stockHistoricalPricesCSV):
    """
    Reads in the csv and outputs a dictionary for storage in the Stock class

    Args:
        stockHistoricalPricesCSV (String): Location where file is located

    Returns:
        (Dict): dictionary of historical prices
    """
    logger.debug(pandas.read_csv(stockHistoricalPricesCSV))
    return pandas.read_csv(stockHistoricalPricesCSV).to_dict('r')
示例#3
0
def get_stock_historical_dividends(stockHistoricalDividendsCSV) :
    logger.debug(pandas.read_csv(stockHistoricalDividendsCSV))
    dividendDF = pandas.read_csv(stockHistoricalDividendsCSV)
    dividendDF = dividendDF.dropna()
    try:
        dividendDF = dividendDF[['Ex Date', 'Gross Amount']]
        dividendDF.columns = ['Date', 'Dividend Paid']
        dividendDF = dividendDF[dividendDF['Dividend Paid'] != '-']
        return dividendDF.to_dict('r')
    except:
        logger.warning("No dividend information")
        return None
示例#4
0
def get_stock_summary(stockSoup) :
    """
    Gets the stock summary information including
        - Name
        - Price
        - Market Cap
        - Price Earnings Ratio
        - Price Change
        - Ticker
        - Earnings per Share
        - Net Tangible Assets
        - Net DPS
        - Gross DPS
        - Beta Value
        - Price/NTA
        - Net Yield
        - Gross Yield
        - Sharpe Ratio

    Args:
        param1 (BeautifulSoup): The parsed page source of the summary page.

    Returns:
        dict: A dictionary which contains all the information captured on this page
    """

    summaryDict = {}
    summaryDict["Name"] = (stockSoup.find('h1').text).split(' -')[0]
    summaryDict["Price"] = stockSoup.find('td', text= 'Market Price').find_next_sibling('td').text
    summaryDict["Market Cap"] = stockSoup.find('td', text= 'Marketcap').find_next_sibling('td').text
    summaryDict["Price Earnings Ratio"] = stockSoup.find('td', text= 'P/E ratio').find_next_sibling('td').text
    summaryDict["Price Change"] = stockSoup.find('td', text= 'Price Change').find_next('td').text
    summaryDict["Ticker"] = stockSoup.find('td', text= 'Ticker').find_next_sibling('td').text
    summaryDict["EPS"] = stockSoup.find('td', text= 'EPS').find_next('td').text
    summaryDict["NTA"] = stockSoup.find('td', text= 'NTA').find_next_sibling('td').text
    summaryDict["Net DPS"] = stockSoup.find('td', text= 'Net DPS').find_next_sibling('td').text
    summaryDict["Gross DPS"] = stockSoup.find('td', text= 'Gross DPS').find_next_sibling('td').text
    summaryDict["Beta Value"] = stockSoup.find('td', text= 'Beta Value').find_next_sibling('td').text
    summaryDict["Price/NTA"] = stockSoup.find('td', text= 'Price/NTA').find_next_sibling('td').text
    summaryDict["Net Yield"] = stockSoup.find('td', text= 'Net Yield').find_next_sibling('td').text
    summaryDict["Gross Yield"] = stockSoup.find('td', text= 'Gross Yield').find_next_sibling('td').text
    summaryDict["Sharpe Ratio"] = stockSoup.find('td', text= 'Sharpe Ratio').find_next_sibling('td').text

    logger.debug(summaryDict)
    return summaryDict
示例#5
0
def get_stock_historical_prices(stockHistoricalPricesCSV):
    """
    Reads in the csv and outputs a dictionary for storage in the Stock class

    Args:
        stockHistoricalPricesCSV (String): Location where file is located

    Returns:
        (Dict): dictionary of historical prices
    """
    with warnings.catch_warnings():
        warnings.simplefilter(action='ignore', category=FutureWarning)
        prices = pandas.read_csv(stockHistoricalPricesCSV).to_dict('r')
        pricesReturn = []
        logger.debug(prices)
        for price in prices:
            price['Dollar Value Traded'] = price.pop('$ Value Traded')
            pricesReturn.append(price)
        return pricesReturn
示例#6
0
def get_stock_historical_dividends(stockHistoricalDividendsCSV):
    """
    Reads in the csv and outputs a dictionary for storage in the Stock class

    Args:
        stockHistoricalDividendsCSV (String): Location where file is located

    Returns:
        (Dict): dictionary of historical dividends
    """
    logger.debug(pandas.read_csv(stockHistoricalDividendsCSV))
    dividendDF = pandas.read_csv(stockHistoricalDividendsCSV)
    dividendDF = dividendDF.dropna()
    try:
        dividendDF = dividendDF[['Ex Date', 'Gross Amount']]
        dividendDF.columns = ['Date', 'Dividend Paid']
        dividendDF = dividendDF[dividendDF['Dividend Paid'] != '-']
        return dividendDF.to_dict('r')
    except:
        logger.warning("No dividend information")
        return None
示例#7
0
def print_financial_profile_sheet(workbook, stock, formats):
    logger.info("       Printing Financial Profile for " +
                stock.stockSummaryDict["Ticker"])
    row = 0
    col = 0

    # Create sheet
    worksheet = workbook.add_worksheet(stock.stockSummaryDict["Ticker"] +
                                       "_FinancialProfile")
    # Print Headers & Values

    logger.debug(stock.stockFinancialProfileDict.items())
    for key, value in stock.stockFinancialProfileDict.items():
        worksheet.write_string(row, col, key)
        worksheet.write_string(row, col + 1, value)
        row += 1
    worksheet.write_url(0,
                        13,
                        "internal:" + stock.stockSummaryDict["Ticker"] +
                        "_Summary!A1",
                        string="BACK")
示例#8
0
def print_historical_dividends_sheet(workbook, stock, formats):
    if stock.stockHistoricalDividendsDict is not None:
        logger.info("       Printing Historical Dividends for " +
                    stock.stockSummaryDict["Ticker"])
        row = 0
        col = 0

        # Create sheet
        worksheet = workbook.add_worksheet(stock.stockSummaryDict["Ticker"] +
                                           "_HistoricalDividends")
        # Print Headers
        keys = stock.stockHistoricalDividendsDict[0].keys()
        logger.debug(stock.stockHistoricalDividendsDict[0].keys())
        for key in keys:
            worksheet.write_string(row, col, key)
            col += 1
        worksheet.write_url(row,
                            col + 13,
                            "internal:" + stock.stockSummaryDict["Ticker"] +
                            "_Summary!A1",
                            string="BACK")

        row = 1
        col = 0

        # Print Items
        for rowItems in stock.stockHistoricalDividendsDict:
            logger.debug(rowItems)
            for key, value in rowItems.items():
                logger.debug(value)
                if (key == 'Date'):
                    worksheet.write_datetime(
                        row, col, datetime.strptime(value, '%d %b %Y'),
                        formats['dateFormat'])
                else:
                    worksheet.write_number(row, col, float(value))
                col += 1
            row += 1
            col = 0
示例#9
0
def scrape_company(browser, stock):
    """
    Contains the logic behind the scraping of an entire company's data

    Navigating to pages, downloading files

    Args:
        browser (Selenium.WebDriver): The automated Chrome browser
        stock (String): The stock ticker currently being scraped

    Returns:
        stockData (Stock): Class containing dictionaries of data
    """
    logger.info("Current Stock: " + stock)

    # Arrive at Summary & Ratios page and pull information
    browser.find_element_by_link_text(stock).click()
    summarySoup = BeautifulSoup(browser.page_source, 'lxml')
    logger.info("Pulling ratio information")
    stockSummaryDict = get_stock_summary(summarySoup)

    # Arrive at Company Directory and pull directors information
    browser.find_element_by_xpath(
        ".//span[contains(text(), 'Company Directory')]").click()
    directorSoup = BeautifulSoup(browser.page_source, 'lxml')
    logger.info("Pulling Director's information")
    stockDirectorDict = get_director_information(directorSoup)
    browser.execute_script("window.history.go(-1)")  # Go back to summary page

    # Arrive at Company Profile and pull description information
    browser.find_element_by_xpath(
        ".//span[contains(text(), 'Company Profile')]").click()
    profileSoup = BeautifulSoup(browser.page_source, 'lxml')
    logger.info("Pulling company description")
    stockProfileDict = get_company_profile(profileSoup)
    logger.debug(stockProfileDict)
    browser.execute_script("window.history.go(-1)")  # Go back to summary page

    # Arrive at Financial Profile and pull debt-equity information
    browser.find_element_by_xpath(
        ".//span[contains(text(), 'Financial Profile')]").click()
    stockSoup = BeautifulSoup(browser.page_source, 'lxml')
    logger.info("Pulling financial profile information")
    stockFinancialProfileDict = get_financial_profile(stockSoup)
    browser.execute_script("window.history.go(-1)")  # Go back to summary page

    # Arrive at Annual Reports and pull latest annual report
    # ? May require refactor of xpath to shorten it (Looks nicer)
    # TODO change dl directory outside temp
    browser.find_element_by_xpath(
        ".//span[contains(text(), 'Annual Reports')]").click()
    logger.info("Pulling annual report")
    browser.find_element_by_xpath(
        r"""//*[@id="content"]/center/table/tbody/tr[3]/td/table/tbody/tr[2]/
                                      td[2]/table/tbody/tr/td/table[2]/tbody/tr[1]/td[1]/table/tbody/
                                      tr[1]/td[2]/form/input""").click()
    # sleep(10)
    browser.execute_script("window.history.go(-1)")  # Go back to summary page

    # Arrive at Tear Sheet and pull latest tear sheet
    browser.find_element_by_xpath(
        ".//span[contains(text(), 'Tear Sheet')]").click()
    logger.info("Pulling tear sheet")
    browser.find_element_by_xpath(
        r"""//*[@id="content"]/center/table/tbody/tr[3]/td/table/tbody/tr[2]/td[2]/table/tbody/tr/td/table/tbody/tr/td/p[2]/a"""
    ).click()
    # sleep(10)
    browser.execute_script("window.history.go(-1)")  # Go back

    # Create csv link for historical prices and pull it into a temporary folder
    csvLink = create_historical_prices_csv_link(stockSummaryDict["Ticker"])
    logger.info("Pulling historical prices information")
    browser.get(csvLink)
    sleep(3)
    stockHistoricalPricesDict = get_stock_historical_prices(
        tempDirectory + stockSummaryDict["Ticker"] + " Historical Prices.csv")

    # Create csv link for dividends and pull it into a temporary folder
    csvLink = create_historical_dividends_csv_link(stockSummaryDict["Ticker"])
    logger.info("Pulling historical dividends information")
    browser.get(csvLink)
    sleep(3)
    stockHistoricalDividendsDict = get_stock_historical_dividends(
        tempDirectory + stockSummaryDict["Ticker"] +
        " Historical Dividends.csv")

    # Go back to the stock ticker page
    logger.info("Back to company listings")
    browser.execute_script("window.history.go(-1)")

    # Create the stock obj and store it in an array
    stockData = Stock(stockSummaryDict, stockHistoricalPricesDict,
                      stockHistoricalDividendsDict, stockFinancialProfileDict,
                      stockProfileDict, stockDirectorDict)

    return stockData
示例#10
0
def save_data(stockDataArray, success):
    """
    Constructs a dictionary of company information. Converts it JSON, and sends it externally using send_to_server()

    Args:
        stockDataArray (List): dictionary of all company information
        success (Boolean): To indicate whether the scraping was succesful, to identify if processing needs to occur
    """
    currentTimeStamp = datetime.now().strftime('%Y/%m/%d')
    scrapeInsert = {currentTimeStamp: {'Date': currentTimeStamp}}
    if success:
        logger.info("Saving data")
        print("Saving data")

        dividendInsert = {'Data': {}, 'Name': 'HistoricalDividends'}
        priceInsert = {'Data': {}, 'Name': 'HistoricalPrices'}

        stockIteration = 0
        # Select stock
        for stock in stockDataArray:
            currentStockTicker = stock['Summary']['Ticker']
            logger.info("Saving data for: " + currentStockTicker)
            stockInsert = {}

            # Create stock dict from scraped data
            for sectionKey, sectionData in stock.items():
                logger.info(sectionKey)
                sectionInsert = {}
                if sectionKey == 'HistoricalPrices':
                    for line in sectionData:
                        logger.debug(line)
                        dateString = line.pop('Date')
                        dateString = (datetime.strptime(
                            dateString, '%d %b %Y')).strftime("%Y-%m-%d")
                        sectionInsert[dateString] = line
                    stockInsert[sectionKey] = sectionInsert
                elif sectionKey == 'HistoricalDividends':
                    try:
                        for line in sectionData:
                            logger.debug(line)
                            dateString = line.pop('Date')
                            dateString = (datetime.strptime(
                                dateString, '%d %b %Y')).strftime("%Y-%m-%d")
                            sectionInsert[dateString] = line.pop(
                                'Dividend Paid')
                        stockInsert[sectionKey] = sectionInsert
                    except TypeError:
                        pass
                else:
                    for elementKey, elementValue in sectionData.items():
                        sectionInsert[elementKey] = elementValue
                    stockInsert[sectionKey] = sectionInsert

            scrapeInsert[currentTimeStamp][stock['Summary']
                                           ['Ticker']] = stockInsert
            stockIteration += 1
            printProgressBar(
                stockIteration,
                len(stockDataArray),
                prefix='Saving {} data'.format(stock['Summary']['Ticker']),
                suffix='of {} companies completed'.format(len(stockDataArray)))

        with open('data.txt', 'w') as outfile:
            json.dump(scrapeInsert, outfile, indent=4)

        # save_result_to_pastebin(scrapeInsert, currentTimeStamp)
        send_to_server(scrapeInsert)
        send_files_to_server()
    else:
        scrapeInsert[currentTimeStamp] = {}
        # save_result_to_pastebin(scrapeInsert, currentTimeStamp)
        send_to_server(scrapeInsert)
示例#11
0
def get_stock_historical_prices(stockHistoricalPricesCSV) :
    logger.debug(pandas.read_csv(stockHistoricalPricesCSV))
    return pandas.read_csv(stockHistoricalPricesCSV).to_dict('r')
示例#12
0
def scrape_company(browser, stock):
    """
    Contains the logic behind the scraping of an entire company's data

    Navigating to pages, downloading files

    Args:
        browser (Selenium.WebDriver): The automated Chrome browser
        stock (String): The stock ticker currently being scraped

    Returns:
        stockData (Stock): Class containing dictionaries of data
    """
    logger.info("Current Stock: " + stock)
    stockInnerIteration = 0
    numFuncs = 10
    printProgressBar(stockInnerIteration,
                     numFuncs,
                     prefix='Scraping {} data'.format(stock),
                     suffix='of {} completed'.format(stock))

    # Arrive at Summary & Ratios page and pull information
    browser.find_element_by_link_text(stock).click()
    summarySoup = BeautifulSoup(browser.page_source, 'lxml')
    logger.info("Pulling ratio information")
    stockSummaryDict = get_stock_summary(summarySoup)
    stockInnerIteration += 1
    printProgressBar(stockInnerIteration,
                     numFuncs,
                     prefix='Scraping {} data'.format(stock),
                     suffix='of {} completed'.format(stock))
    stockRatioDict = get_ratios(summarySoup)
    stockInnerIteration += 1
    printProgressBar(stockInnerIteration,
                     numFuncs,
                     prefix='Scraping {} data'.format(stock),
                     suffix='of {} completed'.format(stock))

    # Create csv link for historical prices and pull it into a temporary folder
    csvLink = create_historical_prices_csv_link(stock)
    logger.info("Pulling historical prices information")
    browser.get(csvLink)

    # Create csv link for dividends and pull it into a temporary folder
    csvLink = create_historical_dividends_csv_link(stock)
    logger.info("Pulling historical dividends information")
    browser.get(csvLink)

    # Arrive at Annual Reports and pull latest annual report
    # TODO May require refactor of xpath to shorten it (Looks nicer)
    # TODO change dl directory outside temp
    # Create try catch block
    try:
        logger.info("Pulling annual report")
        year = int(datetime.now().strftime('%Y'))
        annualReportLink = create_annual_report_link(stock, str(year))
        browser.get(annualReportLink)
        if browser.find_element_by_xpath(
                ".//title[contains(text(), '404 Not Found')]"):
            browser.execute_script(
                "window.history.go(-1)")  # Go back to summary page
            annualReportLink = create_annual_report_link(stock, str(year - 1))
            browser.get(annualReportLink)
            if browser.find_element_by_xpath(
                    ".//title[contains(text(), '404 Not Found')]"):
                browser.execute_script(
                    "window.history.go(-1)")  # Go back to summary page
    except:
        pass
    stockInnerIteration += 1
    printProgressBar(stockInnerIteration,
                     numFuncs,
                     prefix='Scraping {} data'.format(stock),
                     suffix='of {} completed'.format(stock))
    # browser.execute_script("window.history.go(-1)") # Go back to summary page

    # Create and get the tear sheet for the company
    tearSheetLink = 'https://companyresearch-nzx-com.ezproxy.aut.ac.nz/tearsheets/' + stock + '.pdf'
    browser.get(tearSheetLink)
    stockInnerIteration += 1
    printProgressBar(stockInnerIteration,
                     numFuncs,
                     prefix='Scraping {} data'.format(stock),
                     suffix='of {} completed'.format(stock))

    # Arrive at Company Directory and pull directors information
    browser.find_element_by_xpath(
        ".//span[contains(text(), 'Company Directory')]").click()
    directorSoup = BeautifulSoup(browser.page_source, 'lxml')
    logger.info("Pulling Director's information")
    stockDirectorDict = get_director_information(directorSoup)
    stockInnerIteration += 1
    printProgressBar(stockInnerIteration,
                     numFuncs,
                     prefix='Scraping {} data'.format(stock),
                     suffix='of {} completed'.format(stock))
    browser.execute_script("window.history.go(-1)")  # Go back to summary page

    # Arrive at Company Profile and pull description information
    browser.find_element_by_xpath(
        ".//span[contains(text(), 'Company Profile')]").click()
    profileSoup = BeautifulSoup(browser.page_source, 'lxml')
    logger.info("Pulling company description")
    stockProfileDict = get_company_profile(profileSoup)
    logger.debug(stockProfileDict)
    stockInnerIteration += 1
    printProgressBar(stockInnerIteration,
                     numFuncs,
                     prefix='Scraping {} data'.format(stock),
                     suffix='of {} completed'.format(stock))
    browser.execute_script("window.history.go(-1)")  # Go back to summary page

    # Arrive at Financial Profile and pull debt-equity information
    browser.find_element_by_xpath(
        ".//span[contains(text(), 'Financial Profile')]").click()
    stockSoup = BeautifulSoup(browser.page_source, 'lxml')
    logger.info("Pulling financial profile information")
    stockFinancialProfileDict = get_financial_profile(stockSoup)
    stockInnerIteration += 1
    printProgressBar(stockInnerIteration,
                     numFuncs,
                     prefix='Scraping {} data'.format(stock),
                     suffix='of {} completed'.format(stock))
    browser.execute_script("window.history.go(-1)")  # Go back to summary page

    # Read in the pries csv
    stockHistoricalPricesDict = get_stock_historical_prices(
        tempDirectory + stock + " Historical Prices.csv")
    stockInnerIteration += 1
    printProgressBar(stockInnerIteration,
                     numFuncs,
                     prefix='Scraping {} data'.format(stock),
                     suffix='of {} completed'.format(stock))

    # Read in dividends csv
    stockHistoricalDividendsDict = get_stock_historical_dividends(
        tempDirectory + stock + " Historical Dividends.csv")
    stockInnerIteration += 1
    printProgressBar(stockInnerIteration,
                     numFuncs,
                     prefix='Scraping {} data'.format(stock),
                     suffix='of {} completed'.format(stock))

    # Go back to the stock ticker page
    logger.info("Back to company listings")
    browser.execute_script("window.history.go(-1)")
    stockInnerIteration += 1
    printProgressBar(stockInnerIteration,
                     numFuncs,
                     prefix='Scraping {} data'.format(stock),
                     suffix='of {} completed'.format(stock))

    # Create the stock obj and store it in an array
    stockData = {
        'Summary': stockSummaryDict,
        'Ratio': stockRatioDict,
        'HistoricalPrices': stockHistoricalPricesDict,
        'HistoricalDividends': stockHistoricalDividendsDict,
        'FinancialProfile': stockFinancialProfileDict,
        'Profile': stockProfileDict,
        'Directors': stockDirectorDict
    }

    return stockData