Пример #1
0
 def start_requests(self):
     page = 1
     for url in self.start_urls:
         if url == 'http://www.chinaclear.cn/cms-rank/queryPledgeProportion?queryDate={date}&secCde=':
             dateformat = "%Y.%m.%d"
             today = datetime.datetime.now().strftime("%Y%m%d")
             datelist = S.datelist('20100101', today, dateformat)
             datelist.reverse()
             for date in datelist:
                 _url = url.format(date=date)
                 yield scrapy.Request(_url,
                                      meta={
                                          'page': page,
                                          'date': date
                                      },
                                      headers=hdr(),
                                      priority=0)
Пример #2
0
    def start_requests(self):
        self.Start = {
            '1110': self.colistparse,
            '1105': self.fundlistparse,
            '1273': self.zqparse,
        }

        for _url in self.start_urls:
            page = 1
            if _url == 'http://www.szse.cn/szseWeb/FrontController.szse':
                for CATALOGID, callback in self.Start.items():
                    postdata = szse_data(page, CATALOGID)
                    meta = {'CATALOGID': CATALOGID, 'page': page, '_url': _url}
                    yield scrapy.Request(_url,
                                         callback=callback,
                                         method='POST',
                                         headers=hdr(),
                                         meta=meta,
                                         body=postdata)
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1837_xxpl&TABKEY=tab1&txtDate={date}&tab2PAGENO={page}&tab2PAGECOUNT=&tab2RECORDCOUNT=&REPORT_ACTION=navigate':
                dateformat = "%Y-%m-%d"
                today = datetime.datetime.now().strftime("%Y%m%d")
                datelist = S.datelist('20100101', today, dateformat)
                datelist.reverse()

                for date in datelist:
                    url = _url.format(page=page, date=date)
                    yield scrapy.Request(
                        url,
                        headers=hdr(),
                        meta={
                            'page': page,
                            'date': date
                        },
                        callback=self.rzrqparse,
                    )
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1900&TABKEY=tab2&tab2PAGENO={page}&tab2PAGECOUNT=&tab2RECORDCOUNT=&REPORT_ACTION=navigate':
                url = 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1900&TABKEY=tab2&tab2PAGENO={page}&tab2PAGECOUNT=&tab2RECORDCOUNT=&REPORT_ACTION=navigate'.format(
                    page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.zrdsparse,
                )
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1901&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate':
                url = _url.format(page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.zrdmparse,
                )
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1901&TABKEY=tab2&tab2PAGENO={page}&tab2PAGECOUNT=&tab2RECORDCOUNT=&REPORT_ACTION=navigate':
                url = _url.format(page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.dmzgpxdaparse,
                )
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1900&TABKEY=tab3&tab3PAGENO={page}&tab3PAGECOUNT=&tab3RECORDCOUNT=&REPORT_ACTION=navigate':
                url = _url.format(page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.dsrckparse,
                )
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1759_cxda&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate':
                url = _url.format(page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.chufaparse,
                )
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1903_detail&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate':
                url = _url.format(page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.zhongjiechufaparse,
                )

            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1902&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate':
                url = _url.format(page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.jiechuxianshoufaparse,
                )
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1902&TABKEY=tab2&tab2PAGENO={page}&tab2PAGECOUNT=&tab2RECORDCOUNT=&REPORT_ACTION=navigate':
                url = _url.format(page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.jiechuxianshou1perfaparse,
                )
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1902&TABKEY=tab3&tab3PAGENO={page}&tab3PAGECOUNT=&tab3RECORDCOUNT=&REPORT_ACTION=navigate':
                url = _url.format(page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.jiechuxianshou5perfaparse,
                )
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=sgshqd&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate':
                url = _url.format(page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.sghgqdparse,
                )
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1931_zcjhcjxx&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&report_action=navigate':
                url = _url.format(page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.zcjhcjxxparse,
                )
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1839_zcjhcpxx&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate':
                url = _url.format(page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.zcjhcpxxparse,
                )
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1798&TABKEY=tab1&txtKsrq=2000-01-01&txtZzrq=%s&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate' % datetime.datetime.now(
            ).strftime("%Y-%m-%d"):
                url = _url.format(page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.tfpxxparse,
                )
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=SSGSGMXX&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate':
                url = _url.format(page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.fullnamechangeparse,
                )
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=SSGSGMXX&TABKEY=tab2&tab2PAGENO={page}&tab2PAGECOUNT=&tab2RECORDCOUNT=&REPORT_ACTION=navigate':
                url = _url.format(page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.shortnamechangeparse,
                )
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1793_ssgs&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate':
                url = _url.format(page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.suspendListingparse,
                )
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1793_ssgs&TABKEY=tab2&tab2PAGENO={page}&tab2PAGECOUNT=&tab2RECORDCOUNT=&REPORT_ACTION=navigate':
                url = _url.format(page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.StopListingparse,
                )
            elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=xmjdxx&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate':
                url = _url.format(page=page)
                yield scrapy.Request(
                    url,
                    headers=hdr(),
                    meta={'page': page},
                    callback=self.projparse,
                )