Exemplo n.º 1
0
import lxml.html
import requests
from neweggpy.nefuncs import IterPages, BoolToInt, getPIDS, getData, insertData

baseurl = (
    "http://m.newegg.com/ProductList?description=Computer+Cases"
    + "&categoryId=7&storeId=1&nodeId=7583&parentCategoryId=9"
    + "&isSubCategory=true&categoryType=1"
)

pg1 = requests.get(baseurl).content
root1 = lxml.html.fromstring(pg1)
page_count = IterPages(root1)
URLs = ["%s&Page=%s" % (baseurl, pgnum) for pgnum in range(1, page_count + 1)]

# FETCH AND PARSE THE DATA
pids = getPIDS(URLs, root1)
df = getData(pids)

# PUT DATA IN DATABASE
insertData("computercases", df)
import lxml.html
import requests
from neweggpy.nefuncs import IterPages, BoolToInt, getPIDS, getData, insertData

baseurl = 'http://m.newegg.com/ProductList?description=Computer+Cases' + \
          '&categoryId=7&storeId=1&nodeId=7583&parentCategoryId=9' + \
          '&isSubCategory=true&categoryType=1'

pg1 = requests.get(baseurl).content
root1 = lxml.html.fromstring(pg1)
page_count = IterPages(root1)
URLs = ['%s&Page=%s' % (baseurl, pgnum) for pgnum in range(1, page_count + 1)]

# FETCH AND PARSE THE DATA
pids = getPIDS(URLs, root1)
df = getData(pids)

# PUT DATA IN DATABASE
insertData('computercases', df)