class Part(object): def __init__(self, url): self.url = url self.partPage = requests.get(url) self.html = BeautifulSoup(self.partPage.text, "lxml") self.standard_number = self.find_standard_number() self.mainIndex = self.html.find("div", class_="main") self.setupPart() #print(self.standard_number) def table_of_contents(self): return self.table_of_contents; def subparts(self): return self.subpartArray; def setupPart(self): lineItems = self.mainIndex.find('ul').find_all('li') for li in lineItems: if self.find_toc(li): print('found toc') continue elif self.find_subparts(li): print('foundsubpart') continue else: print('foundsubpart def') self.find_subpartdefinition(li) def find_standard_number(self): return self.html.find("div", class_="blackTen").find('strong').text; def find_toc(self, li): if 'Table' in li.find_all('a')[1]['title']: self.table_of_contents = ToC(li.find_all('a')[1]['href']) return True return False def find_subparts(self, li): self.subparts = [] if 'Subpart' in li.find_all('a')[0]['name']: self.subpart = SubPart(li.find_all('a')[1]['href']) self.subparts.append(self.subpart) return True return False def find_subpartdefinition(self, li): for li in self.mainIndex.find('ul').find_all('li'): self.subpart.append_subpartdef(li.find_all('a')[1]['href'])
def find_subparts(self, li): self.subparts = [] if 'Subpart' in li.find_all('a')[0]['name']: self.subpart = SubPart(li.find_all('a')[1]['href']) self.subparts.append(self.subpart) return True return False