def fetch_deal_in(self, city_id): try: total_page_num = -1 total_deal_num = -1 start_page_num = 1 print "lashou_crawl.py fetch_deal_in", self.mapping[city_id] while True: content = tuan_http.http_fetch(self.url_deal + city_id + "/p/" + str(start_page_num) + "/r/500", self.user_agent) filepath = None if start_page_num == 1: filepath = os.path.join(self.data_store, city_id) else: filepath = os.path.join(self.data_store, city_id + "_" + str(start_page_num)) fhandler = open(filepath, "w") fhandler.write(content) fhandler.close() if total_deal_num == -1: root = ET.fromstring(content) total_deal_num = int(root.attrib["count"]) total_page_num = total_deal_num / 500 + 1 if start_page_num == total_page_num: break else: start_page_num = start_page_num + 1 except Exception, e: print "Error: lashou_crawl.py fetch_deal_in", self.mapping[city_id], e raise
def fetch_deal_in(self, city): try: print "dida_crawl.py fetch_deal_in", city content = tuan_http.http_fetch(self.url_deal + city, self.user_agent) filepath = os.path.join(self.data_store, city) fhandler = open(filepath, "w") fhandler.write(content) fhandler.close() except Exception, e: print "Error: dida_crawl.py fetch_deal_in", city, e raise
def fetch_deal_in(self, city): try: start_page_num = 1 count_per_page = 10000 print "dianping_crawl.py fetch_deal_in", city content = tuan_http.http_fetch(self.url_deal + self.mapping[city] + "&page=" + str(start_page_num) + "&count=" + str(count_per_page), self.user_agent) filepath = os.path.join(self.data_store, city) fhandler = open(filepath, "w") fhandler.write(content) fhandler.close() except Exception, e: print "Error: dianping_crawl.py fetch_deal_in", city, e raise
def fetch_city_list(self): try: content = tuan_http.http_fetch(self.url_city_list, self.user_agent) try: city_list = [] root = ET.fromstring(content) for city in root.iter('city'): city_list.append(city.find('id').text) return city_list except Exception, e: print "Error: dida_crawl.py fetch_city_list", e raise except Exception, e: print "Error: dida_crawl.py fetch_city_list", e raise
def fetch_city_list(self): try: print "wuba_crawl.py fetch_city_list" content = tuan_http.http_fetch(self.url_city_list, self.user_agent) filepath = os.path.join(self.data_store, "city_list") fhandler = open(filepath, "w") fhandler.write(content) fhandler.close() try: city_list = [] root = ET.fromstring(content) for city in root.iter("city"): city_list.append(city.find("enname").text) return city_list except Exception, e: raise except Exception, e: print "Error: wuba_crawl.py fetch_city_list", e raise
def _fetch(self, url): return tuan_http.http_fetch(url, self.user_agent)