def populate_csv_file(): backup(csv_path) f_csv = CsvFile(csv_path, 'w') seen_courses = set() course_tags_soup = BeautifulSoup(open(html_path), 'html.parser') for course_tag in course_tags_soup.findAll('a'): displayed_course_information = course_tag.contents[0] department_and_level_regex = '[A-Zx0-9 \.\-]+' if re.match('%s \- ' % department_and_level_regex, displayed_course_information): department_and_level, title = displayed_course_information.split(' - ', 1) elif re.search(' \- %s($|\s)' % department_and_level_regex, displayed_course_information): title, department_and_level = displayed_course_information.rsplit(' - ', 1) else: title, department_and_level = displayed_course_information, '' url = course_tag.get('href') if (title, department_and_level) not in seen_courses: f_csv.add_row(title, department_and_level, url) seen_courses.add((title, department_and_level)) f_csv.close()
def save_as_csv(self, url, file_path): csv = CsvFile('./data/test.csv') html = './data/view-source_https___www.worldometers.info_coronavirus_.html' html = './data/table.html' f = open(html, "r") soup = BeautifulSoup(f, 'html.parser') table = soup.find(id="main_table_countries_today") # add header columns = table.findAll('th') output_row = [] for column in columns: output_row.append(csv.clean(column.text)) csv.add_columns(output_row) #csv.delete_column('A') # add rows output_rows = [] for table_row in table.findAll('tr'): columns = table_row.findAll('td') output_row = [] for column in columns: output_row.append(column.text) output_rows.append(output_row) csv.add_row(output_row) csv.save('./data/abc.csv')