def price_parse(request): BASE_URL = 'http://classicag.ru/price/' import urllib2 from bs4 import BeautifulSoup c = urllib2.urlopen(BASE_URL) soup = BeautifulSoup(c.read()) for category in soup.findAll('h2', attrs={'class': 'button'}): print '___________________________________________________________' print category.contents[0].string cat = Category(name=category.contents[0].string) cat.save() category_container = category.findNextSiblings('div')[0] for subcat in category_container.findAll('h3'): print ' ' + subcat.string subc = SubCategory(category=cat, name=subcat.string) subc.save() items_table = subcat.parent.findAll('table')[0] for tr in items_table.findAll('tr'): print ' ' + tr.td.string + ' | ' + str( tr.findAll('td')[1].findAll('input')[1]['value']) i = Item(subcategory=subc, name=tr.td.string, unit=tr.findAll('td')[2].string, price=int( tr.findAll('td')[1].findAll('input')[1]['value'])) i.save() return HttpResponseRedirect('/price/')
def price_parse(request): BASE_URL = "http://classicag.ru/price/" import urllib2 from bs4 import BeautifulSoup c = urllib2.urlopen(BASE_URL) soup = BeautifulSoup(c.read()) for category in soup.findAll("h2", attrs={"class": "button"}): print "___________________________________________________________" print category.contents[0].string cat = Category(name=category.contents[0].string) cat.save() category_container = category.findNextSiblings("div")[0] for subcat in category_container.findAll("h3"): print " " + subcat.string subc = SubCategory(category=cat, name=subcat.string) subc.save() items_table = subcat.parent.findAll("table")[0] for tr in items_table.findAll("tr"): print " " + tr.td.string + " | " + str(tr.findAll("td")[1].findAll("input")[1]["value"]) i = Item( subcategory=subc, name=tr.td.string, unit=tr.findAll("td")[2].string, price=int(tr.findAll("td")[1].findAll("input")[1]["value"]), ) i.save() return HttpResponseRedirect("/price/")