Пример #1
0
def getNum(url):
    soup = getSoup(url)
    if(soup==-1):
        return -1
    raw=soup.find('h2',{'class':'a-size-base a-spacing-small a-spacing-top-small a-text-normal'}).contents[0]
    start = raw.find('of ')
    end= raw.find(' results')
    return int(raw[start+3:end].replace(',',''))
Пример #2
0
def getData(line):
    item  = line.split()
    if(check(item[1])==0):
        return -1
    id =item[0]
    url = 'http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords='+\
          id + '&rh=i%3Aaps%2Ck%3AB00R8GX5WO'

    result={'ASIN': id, 'offers':'N', 'reviews':item[1],'stars':item[3]}

    soup = getSoup(url)
    if(soup==-1):
        return result

    raw=soup.findAll(text=re.compile('offers'))
    if(len(raw)>0): result['offers'] = raw[0].split()[0].strip('(')

    if(result['stars'] =='N'):
        raw=soup.findAll(text=re.compile('out of 5 stars'))
        if(len(raw)>0): result['stars'] = raw[0].split()[0]

    #print result
    return result
Пример #3
0
def getData(i,interval):
    results=[]
    url=global_url.format(interval[0], interval[1], i)

    soup = getSoup(url)
    if(soup==-1):
        print "errrrrrrrror\t%s" % url
        return -1, i

    contents=soup.findAll('div',{'class':"s-item-container"})
    print("page: %d  num: %d "% (i, len(contents)))

    for item in contents:
        result={'ASIN': item.parent['data-asin'], 'offers':'N', 'reviews':'N','stars':'N'}
        try:
            #other price new
            new_offer = item.find('span',{'class':'a-size-base a-color-price a-text-bold'})
            if(new_offer):
                offer_num = new_offer.parent.find('span',{'class':'a-color-secondary'})
                result['offers'] = offer_num.contents[0].split()[0].strip('(')
            else:
                result['offers']='N'
            #reviews
            raw_review = item.findAll('a',{'class':"a-size-small a-link-normal a-text-normal"})[-1]
            if('Reviews' in raw_review.prettify()):
                result['reviews'] = raw_review.contents[-1]
            #stars
            stars= item.find(text=re.compile('out of 5 stars'))
            if(stars):
                result['stars'] = stars.split()[0]
        except:
            print result['ASIN']
        finally:
            results.append(result)

    return results, i
Пример #4
0
def getData(line):
    output = check(line)
    if(output==-1):
        return (-2,[],[])
    id = output['ASIN']
    url1 = 'http://www.amazon.com/Revgear-129004-Youth-Boxing-Glove/dp/'+ id +'/ref=sr_1_1?ie=UTF8&qid=1450578728&sr=8-1'
    url2 = 'http://www.amazon.com/gp/offer-listing/'+ id + '/ref=sr_1_1_olp?ie=UTF8&qid=1449347231&sr=8-1&keywords=B0019CU6T8&condition=new'
    soup = getSoup(url1)
    if(soup==-1):
        return (-1,[],[])
    #find parent
    tmp_pounds = soup.findAll(text=re.compile('pounds'))
    tmp_ounces = soup.findAll(text=re.compile('ounces'))
    weight = getWeight(tmp_pounds, 'Shipping Weight',1)

    if(weight =='N'):
        weight = getWeight(tmp_ounces,'Shipping Weight',1)
    if(weight =='N'):
        weight = getWeight(tmp_ounces,'Item Weight',1)
    if(weight =='N'):
        weight = getWeight(tmp_pounds,'Item Weight',1)

    #find parent's parent
    if(weight =='N'):
        weight = getWeight(tmp_pounds, 'Shipping Weight',0)
    if(weight =='N'):
        weight = getWeight(tmp_ounces,'Shipping Weight',0)
    if(weight =='N'):
        weight = getWeight(tmp_ounces,'Item Weight',0)
    if(weight =='N'):
        weight = getWeight(tmp_pounds,'Item Weight',0)
    output['weight']=weight

    soup = getSoup(url2)
    if(soup==-1):
        return (-1,[],[])
    products=soup.findAll('div', {'class':'a-row a-spacing-mini olpOffer'})
    prime_list, uprime_list=[], []
    num_p, num_up =0,0

    for product in products:
        try:
            raw = product.find('span',{'class':'a-size-large a-color-price olpOfferPrice a-text-bold'})
            if(raw): price_raw = raw.contents[0]
            else: continue
            price = float(price_raw.strip().split('$')[1].replace(',', ''))
            if(product.find('span',{'class':'supersaver'})):
                if num_p>1: break
                prime_list.append(price)
                num_p=num_p+1
            else:
                if num_up>1: break
                shipping_raw= product.find('span',{'class':'olpShippingPrice'})
                if(shipping_raw):
                    shipping = float(shipping_raw.contents[0].split('$')[1])
                else:
                    shipping=0
                uprime_list.append((price, shipping))
                num_up=num_up+1
        except:
            print product
            return (-1,[],[])

    return (output, prime_list, uprime_list)