def revviewd(): i = 0 data = Data() while True: itemid = data.outputitems("revviewd") if itemid == None: print "revviewd\t%s no job!" % itemid time.sleep(5) continue else: itemid = itemid["_id"] customerreviewlink = "http://www.amazon.cn/product-reviews/%s" % itemid print "revviewd\t%s " % itemid # try: reviewdd = ReviewDetail(customerreviewlink) # except Exception, ex: # print "revviewd", ex # data.changeitemidstauts(itemid, "revviewd") # continue useridss = reviewdd.getUserids() data.updateItemUsers({"_id": itemid, "reviewusers": useridss}) for user in useridss: data.insertuserid(user) i += 1
def itemdetail(): data = Data() i = 0 while True: mid = data.outputitems("itemdetail") if mid == None: print "itemdetail\t%s \tno job!" % mid time.sleep(5) continue else: mid = mid["_id"] print "itemdetail\t%s \t" % mid # print url # soup = BeautifulSoup(dirver.page_source) try: itemd = ItemDetail(mid) except Exception, ex: print "itemdetail", ex data.changeitemidstauts(mid, "itemdetail") continue for item in itemd.realteitem: data.insertitemid(item) doc = itemd.getDoc() data.insertItem(doc) i += 1