def fetch_slick_images(url): imageParser = sdImageParser() web = urllib.urlopen(url) html = web.read() web.close() imageParser.feed(html) return imageParser.images
def fetch_new_title_slick(old_title_hash, new_deal_hash, new_title_hash, filtered_deal_hash): imageParser = sdImageParser() print "fetch_new_title_slick" selector = dealSelector() hasNewTitle = True index = 1 cur = datetime.now() timestamp = "%i" % ( ( ( cur.year * 100 + cur.month) * 100 + cur.day ) * 100 + cur.hour ) dirname = "../data/slickdeal/%s" % timestamp try: os.mkdir(dirname) except: pass while hasNewTitle and index <= 30: print "\tprocessing %i" % index url='http://slickdeals.net/forums/forumdisplay.php?f=9&page={0}&order=desc&sort=lastpost'.format(index) web = urllib.urlopen(url) html = web.read() web.close() filename = "../data/slickdeal/%s/%i.html" % (timestamp, index) output = open(filename, "w") output.write(html) output.close() parser = sdParser() parser.feed(html) hasNewTitle = False for title in parser.promo_hash: if title not in old_title_hash: old_title_hash[title] = url hasNewTitle = True url = parser.promo_hash[title][0] new_title_hash[title] = url if selector.checkDeal(title): new_deal_hash[title] = [] real_url = "http://slickdeals.net" + url new_deal_hash[title].append(real_url) new_deal_hash[title].append("") new_deal_hash[title].append("") for image in fetch_slick_images(real_url): new_deal_hash[title].append(image) else: filtered_deal_hash[title] = selector.filter_rule index += 1