Exemplo n.º 1
0
def price_parse(request):
    BASE_URL = 'http://classicag.ru/price/'

    import urllib2
    from bs4 import BeautifulSoup

    c = urllib2.urlopen(BASE_URL)
    soup = BeautifulSoup(c.read())
    for category in soup.findAll('h2', attrs={'class': 'button'}):
        print '___________________________________________________________'
        print category.contents[0].string
        cat = Category(name=category.contents[0].string)
        cat.save()

        category_container = category.findNextSiblings('div')[0]
        for subcat in category_container.findAll('h3'):
            print '   ' + subcat.string
            subc = SubCategory(category=cat, name=subcat.string)
            subc.save()
            items_table = subcat.parent.findAll('table')[0]
            for tr in items_table.findAll('tr'):
                print '      ' + tr.td.string + ' | ' + str(
                    tr.findAll('td')[1].findAll('input')[1]['value'])
                i = Item(subcategory=subc,
                         name=tr.td.string,
                         unit=tr.findAll('td')[2].string,
                         price=int(
                             tr.findAll('td')[1].findAll('input')[1]['value']))
                i.save()
    return HttpResponseRedirect('/price/')
Exemplo n.º 2
0
def price_parse(request):
    BASE_URL = "http://classicag.ru/price/"

    import urllib2
    from bs4 import BeautifulSoup

    c = urllib2.urlopen(BASE_URL)
    soup = BeautifulSoup(c.read())
    for category in soup.findAll("h2", attrs={"class": "button"}):
        print "___________________________________________________________"
        print category.contents[0].string
        cat = Category(name=category.contents[0].string)
        cat.save()

        category_container = category.findNextSiblings("div")[0]
        for subcat in category_container.findAll("h3"):
            print "   " + subcat.string
            subc = SubCategory(category=cat, name=subcat.string)
            subc.save()
            items_table = subcat.parent.findAll("table")[0]
            for tr in items_table.findAll("tr"):
                print "      " + tr.td.string + " | " + str(tr.findAll("td")[1].findAll("input")[1]["value"])
                i = Item(
                    subcategory=subc,
                    name=tr.td.string,
                    unit=tr.findAll("td")[2].string,
                    price=int(tr.findAll("td")[1].findAll("input")[1]["value"]),
                )
                i.save()
    return HttpResponseRedirect("/price/")