Exemplo n.º 1
0
 def start_requests(self):
     # id_esf_url = Postgresql().query('lj_residence', ['id', 'esf_url'])
     id_esf_url = Postgresql().query_by_sql(
         "select id,esf_url from lj_residence where url like 'https://wh.lianjia.com/%'"
     )
     for id_, url in id_esf_url:
         if url != 'None':
             yield Request(url, meta={'id': id_}, callback=self.get_esf_url)
Exemplo n.º 2
0
 def process_request(self, request, spider):
     try:
         proxy_url = Postgresql().query_by_sql(
             'select ip from proxy where id=1')[0][0]
         print 'proxyUrl:', proxy_url
         if proxy_url is not None:
             request.meta['proxy'] = "http://" + proxy_url
     except Exception, e:
         print 'my ip>>>'
Exemplo n.º 3
0
    def start_requests(self):
        deal_new = Postgresql().query_by_sql('select d.id,co.route from lj_district as d,lj_community as co where d.id=co.district_id')

        for d_id, route in deal_new:
            if d_id in [18, 19]:
                url = self.start_urls[1]
            else:
                url = self.start_urls[0]
            yield Request(
                url + route + '/',
                callback=self.get_deal_new_url
            )
Exemplo n.º 4
0
 def start_requests(self):
     id_esf_url = Postgresql().query_by_sql('''
                     select co.route,c.url
                     from lj_community co,lj_district d,lj_city c
                     where d.id=co.district_id and d.city_id=c.id and c.id=5
                 ''')
     for c_route, url in id_esf_url:
         yield Request(
             url + 'ershoufang/' + c_route + '/',
             meta={'community': c_route},
             callback=self.get_esf_url,
             dont_filter=True
         )
 def start_requests(self):
     id_url = Postgresql().query_by_sql('''
                             select c.id,c.cn_name,c.route
                             from lj_community c,lj_district d
                             where c.district_id=d.id and d.city_id=2;
                         ''')
     for id_, name, route in id_url:
         yield Request('http://sh.lianjia.com/xiaoqu/' + route + '/',
                       meta={
                           'id': id_,
                           'name': name
                       },
                       callback=self.get_count,
                       dont_filter=True)
Exemplo n.º 6
0
    def start_requests(self):
        deal_new = Postgresql().query_by_sql(
            '''select co.id,co.cn_name,co.route
                                                from lj_district d,lj_community co
                                                where d.id=co.district_id and d.city_id=3
                                        ''')

        for c_id, name, route in deal_new:
            yield Request(self.start_urls[0] + route + '/',
                          meta={
                              'id': c_id,
                              'route': route,
                              'name': name
                          },
                          callback=self.get_count2,
                          dont_filter=True)