def start_requests(self): # id_esf_url = Postgresql().query('lj_residence', ['id', 'esf_url']) id_esf_url = Postgresql().query_by_sql( "select id,esf_url from lj_residence where url like 'https://wh.lianjia.com/%'" ) for id_, url in id_esf_url: if url != 'None': yield Request(url, meta={'id': id_}, callback=self.get_esf_url)
def process_request(self, request, spider): try: proxy_url = Postgresql().query_by_sql( 'select ip from proxy where id=1')[0][0] print 'proxyUrl:', proxy_url if proxy_url is not None: request.meta['proxy'] = "http://" + proxy_url except Exception, e: print 'my ip>>>'
def start_requests(self): deal_new = Postgresql().query_by_sql('select d.id,co.route from lj_district as d,lj_community as co where d.id=co.district_id') for d_id, route in deal_new: if d_id in [18, 19]: url = self.start_urls[1] else: url = self.start_urls[0] yield Request( url + route + '/', callback=self.get_deal_new_url )
def start_requests(self): id_esf_url = Postgresql().query_by_sql(''' select co.route,c.url from lj_community co,lj_district d,lj_city c where d.id=co.district_id and d.city_id=c.id and c.id=5 ''') for c_route, url in id_esf_url: yield Request( url + 'ershoufang/' + c_route + '/', meta={'community': c_route}, callback=self.get_esf_url, dont_filter=True )
def start_requests(self): id_url = Postgresql().query_by_sql(''' select c.id,c.cn_name,c.route from lj_community c,lj_district d where c.district_id=d.id and d.city_id=2; ''') for id_, name, route in id_url: yield Request('http://sh.lianjia.com/xiaoqu/' + route + '/', meta={ 'id': id_, 'name': name }, callback=self.get_count, dont_filter=True)
def start_requests(self): deal_new = Postgresql().query_by_sql( '''select co.id,co.cn_name,co.route from lj_district d,lj_community co where d.id=co.district_id and d.city_id=3 ''') for c_id, name, route in deal_new: yield Request(self.start_urls[0] + route + '/', meta={ 'id': c_id, 'route': route, 'name': name }, callback=self.get_count2, dont_filter=True)