def writeToFile(self, data, output): """ Writes an html page that contains tables to a file in ascii format. Uses BeautifulSoup for parsing tables. :note: This method is untested. :param data: data that has been retrieved with the getData method :param output: name of the output file """ g = open(output, 'w') soup = BeautifulSoup(''.join(data)) t = soup.findAll('table') for table in t: g.write('\nNew Table:\n') rows = table.findAll('tr') for tr in rows: cols = tr.findAll('td') hdr = tr.findAll('th') for th in hdr: try: g.write(th.find(text=True)) g.write(',') except: pass for td in cols: try: g.write(td.find(text=True)) g.write(',') except: pass g.write("\n") g.close()
def parseTable(self, data): """ Parses html table data using BeautifulSoup. Note that table number has been hard coded. The SMAKCED wiki page returns several "tables". :param data: data that has been retrieved with the getData method :return: array containing table entries """ tablenumber = 3 result = [] soup = BeautifulSoup(''.join(data)) t = soup.findAll('table') table = t[tablenumber] rows = table.findAll('tr') for tr in rows: line = [] cols = tr.findAll('td') hdr = tr.findAll('th') for th in hdr: tmp = str(th.find(text=True)) x = tmp.replace(' ', '') line.append(x) for td in cols: tmp = str(td.find(text=True)) x = tmp.replace(' ', '') line.append(x) result.append(line) return result