def get_product_data(product): product_html = utils.get_html_as_string(product_base_url + product) parser = BeautifulSoup(product_html, 'html.parser') brand = 'ERR BRAND NOT FOUND' num_ratings = 'ERR NUM RATINGS NOT FOUND' rating = 'ERR RATING NOT FOUND' #find brand and num_ratings for span in parser.find_all('span'): if span.get('itemprop') != None and span.get('itemprop') == 'brand': brand = span.string if span.get('itemprop') != None and span.get('itemprop') == 'ratingCount': num_ratings = span.string #case of 0 ratings detected when ratingCount not present on page if num_ratings == 'ERR NUM RATINGS NOT FOUND': num_ratings = '0' rating = 'n/a' else: #find rating if present for p in parser.find_all('p'): if p.get('class') != None and p.get('class')[0] == 'heading-e': if p.string != 'Age' and p.string != 'Gender': rating = p.string.split(' ')[3] data = [product_base_url + product, brand, num_ratings, rating] return data
def parse_search(query): search_html = utils.get_html_as_string(search_base_url + query) product_links = get_product_pages(search_html) filename = '../parsed_data/' +time.strftime("%m-%d-%Y") +'_' +time.strftime("%H-%M-%S") +'_' +query +'.csv' with open(filename, 'w', newline='') as csvfile: writer = csv.writer(csvfile, delimiter=',') for product in product_links: writer.writerow(get_product_data(product))