# For parsing and creating soup objects import os fileSaveDir = os.chdir( '/Users/ChrisErnst/Development/data-science-lessons-di/Seventeenth_Lesson') # Set the directory from utilities import add_link, email_client # import our link adding function import numpy as np # good ol numpy (for matrix and array manipulation) import pandas as pd # for exporting to csv linkList = [] # Create an empty list we will use to hold links linkList.append(add_link(2001, model='accord')) linkList.append(add_link(2000)) # build our list of urls to specific desired models page = requests.get(linkList[1]) # Sets page to the target URL chaos = page.text # requests function # Makes the html a text-based object soup = BeautifulSoup(chaos, 'html.parser') # Uses a html parser to break up the html blocks newBlockLong = soup.find_all('p', 'result-info') # Returns a result set type(newBlockLong)
# Build the linklist with the input data year and model for all counties linkList = [] # Build an optional linkList for link debugging later on # import webbrowser as wb # debugging # Make a request for each of the links, and add it to list ourResultMatrix ourResultMatrix = [] for j in range(len(inputData)): tempYear = inputData['year'][j] tempModel = inputData['model'][j] print("\nWorking on", tempYear, tempModel, "| Vehicle ", j + 1, " of", len(inputData), "\n") links = add_link(tempYear, model=tempModel) # for f in range(len(links)): linkList.append(links) for x in range(len(links)): print("\nWorking on link:", x + 1, "of", len(links), "\n") page = requests.get(links[x]) # Sets page to the target URL chaos = page.text # requests function # Makes the html a text-based object soup = BeautifulSoup(chaos, 'html.parser')
'/Users/ChrisErnst/Development/data-science-lessons-di/Seventeenth_Lesson') # Set the directory from utilities import add_link from utilities import email_client # import our link adding function import numpy as np # good ol numpy (for matrix and array manipulation) import pandas as pd # for exporting to csv from datetime import datetime import time linkList = [] # Create an empty list we will use to hold links linkList.append(add_link(2001, model='accord')) # 0th element linkList.append(add_link(2000)) # 1st element # build our list of urls to specific desired models page = requests.get(linkList[1]) # Sets page to the target URL chaos = page.text # requests function # Makes the html a text-based object soup = BeautifulSoup(chaos, 'html.parser') # Uses a html parser to break up the html blocks newBlockLong = soup.find_all('p', 'result-info') # Returns a result set type(newBlockLong)