Пример #1
0
def get_product_data(product):
  product_html = utils.get_html_as_string(product_base_url + product)
  parser = BeautifulSoup(product_html, 'html.parser')

  brand = 'ERR BRAND NOT FOUND'
  num_ratings = 'ERR NUM RATINGS NOT FOUND'
  rating = 'ERR RATING NOT FOUND'

  #find brand and num_ratings
  for span in parser.find_all('span'):
    if span.get('itemprop') != None and span.get('itemprop') == 'brand':
      brand = span.string
    if span.get('itemprop') != None and span.get('itemprop') == 'ratingCount':
      num_ratings = span.string

  #case of 0 ratings detected when ratingCount not present on page
  if num_ratings == 'ERR NUM RATINGS NOT FOUND':
    num_ratings = '0'
    rating = 'n/a'
  else:
    #find rating if present
    for p in parser.find_all('p'):
      if p.get('class') != None and p.get('class')[0] == 'heading-e':
        if p.string != 'Age' and p.string != 'Gender':
          rating = p.string.split(' ')[3]


  data = [product_base_url + product, brand, num_ratings, rating]
  return data
Пример #2
0
def parse_search(query):
  search_html = utils.get_html_as_string(search_base_url + query)
  product_links = get_product_pages(search_html)

  filename = '../parsed_data/' +time.strftime("%m-%d-%Y") +'_' +time.strftime("%H-%M-%S") +'_' +query +'.csv'
  
  with open(filename, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile, delimiter=',')
    
    for product in product_links:
      writer.writerow(get_product_data(product))