Exemplo n.º 1
0
    def preparation(self, resp):
        form = resp.xpath(u'//*[@id="aspnetForm"]')
        form_id = form.xpath(u"./@id").extract_first()

        formdata = {
            # u"__EVENTARGUMENT": u"saveToWindow=format:html;",
            u"__EVENTARGUMENT":
            u"saveToWindow=format:csv;",
            u"__EVENTTARGET":
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer",
            u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT5_PageNumber_VI":
            u"1",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L":
            u"1",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber":
            u"1",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT6$PageCount":
            u"25",

            # u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT11_SaveFormat_VI": u"html",
            # u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDD$L": u"html",
            # u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat": u"Html",
            u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT11_SaveFormat_VI":
            u"csv",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDD$L":
            u"csv",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat":
            u"Csv",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDDState":
            u"{"windowsState":"0:0:-1:0:0:0:-10000:-10000:1:0:0:0"}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L$State":
            u"{"CustomCallback":""}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDDState":
            u"{"windowsState":"0:0:-1:430:140:1:68:165:1:0:0:0"}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl":
            u"{"activeTabIndex":1}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu":
            u"{"selectedItemIndexPath":"","checkedState":""}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer":
            u"{"drillDown":{},"parameters":{},"cacheKey":"","currentPageIndex":0}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxErrorGrid":
            u"{"keys":[],"callbackState":"BwQHAwIERGF0YQcnAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABwAHAAcAAgtGb3JtYXRTdGF0ZQcAAgVTdGF0ZQc/BwAHAAcABwAHAAIABQAAAIAJAgtUaW1lT2ZFcnJvcgcACQIAAgADBwQCAAcAAgEHAAcAAgACAQcABwAHAAcAAg1TaG93RmlsdGVyUm93CgIB","selection":""}",
        }

        yield SplashFormRequest.from_response(
            response=resp,
            formid=form_id,
            formdata=formdata,
            callback=self.get_station_data,
            # callback=self.parse_tags,
            dont_click=True,
            endpoint=u"execute",
            cache_args=[u"lua_source"],
            args={
                u"http_method": u"POST",
                u"headers": {
                    u"Content-Type": u"application/x-www-form-urlencoded",
                },
                u"lua_source": script,
            })
Exemplo n.º 2
0
 def parse(self, response):
     if not self.credentials():
         self.logger.error('add the credentials first!')
         return
     yield SplashFormRequest.from_response(
         response,
         formdata={
             'txtUser': self.txtUser,
             'txtPassword': self.txtPassword,
             'txtDependencia': self.txtDependencia,
             'ComboTipoDep': self.ComboTipoDep,
             'btnAceptar': self.btnAceptar
         },
         callback=self.parse_login,
         endpoint='execute',
         cache_args=['lua_source'],
         session_id='dummy',
         args={
             'html':
             1,
             'lua_source':
             self.getFileContentAsString(self.lua_dir + 'init.lua'),
             'wait':
             5
         })
Exemplo n.º 3
0
 def parse(self, response):
     csrf_token = response.xpath('//input[@name="csrf_token"]/@value').get()
     yield SplashFormRequest.from_response(response,
                                           formxpath='//form',
                                           formdata={
                                               'csrf_token': csrf_token,
                                               'username': '******',
                                               'password': '******'
                                           },
                                           callback=self.after_login)
Exemplo n.º 4
0
    def parse(self, response):
        if self.password == '':
            self.logger.error('first add a password')
            return
        return SplashFormRequest.from_response(
                    response,
                    formdata=
                    {
                        'email' : self.user_name,
                        'pass' : self.password
                    },
                    callback = self.after_login,
                    formid = 'loginform',
                    dont_process_response = True

                    )
    def login(self, response):  # 调用splash的SplashFormRequest,提交表单参数

        yield SplashFormRequest.from_response(
            response=response,
            url=self.login_url,
            endpoint='execute',
            formdata={
                'user_agent': '',
                'cookie': ''
            },
            args={
                'wait': 30,
                'lua_source': lg,
                'proxy': 'http://:1080',
                'search_key': self.search_key
            },
            callback=self.after_login,
            errback=self.error_parse,
        )
Exemplo n.º 6
0
    def login(self,
              response):  # 调用splash的SplashFormRequest,提交表单参数(user、password)

        yield SplashFormRequest.from_response(
            response=response,
            url=self.login_url,
            formdata={
                'email': user,
                'password': password
            },
            endpoint='execute',
            args={
                'wait': 30,
                'lua_source': log_lua_script,
                'user_name': user,
                'user_password': password,
                'proxy': 'http://:1080'
            },
            callback=self.after_login,
            errback=self.error_parse,
        )
Exemplo n.º 7
0
def test_form_request_from_response():
    # Copied from scrapy tests (test_from_response_submit_not_first_clickable)
    def _buildresponse(body, **kwargs):
        kwargs.setdefault('body', body)
        kwargs.setdefault('url', 'http://example.com')
        kwargs.setdefault('encoding', 'utf-8')
        return HtmlResponse(**kwargs)
    response = _buildresponse(
        """<form action="get.php" method="GET">
        <input type="submit" name="clickable1" value="clicked1">
        <input type="hidden" name="one" value="1">
        <input type="hidden" name="two" value="3">
        <input type="submit" name="clickable2" value="clicked2">
        </form>""")
    req = SplashFormRequest.from_response(
        response, formdata={'two': '2'}, clickdata={'name': 'clickable2'})
    assert req.method == 'GET'
    assert req.meta['splash']['args']['url'] == req.url
    fs = cgi.parse_qs(req.url.partition('?')[2], True)
    assert fs['clickable2'] == ['clicked2']
    assert 'clickable1' not in fs
    assert fs['one'] == ['1']
    assert fs['two'] == ['2']
Exemplo n.º 8
0
def test_form_request_from_response():
    # Copied from scrapy tests (test_from_response_submit_not_first_clickable)
    def _buildresponse(body, **kwargs):
        kwargs.setdefault('body', body)
        kwargs.setdefault('url', 'http://example.com')
        kwargs.setdefault('encoding', 'utf-8')
        return HtmlResponse(**kwargs)
    response = _buildresponse(
        """<form action="get.php" method="GET">
        <input type="submit" name="clickable1" value="clicked1">
        <input type="hidden" name="one" value="1">
        <input type="hidden" name="two" value="3">
        <input type="submit" name="clickable2" value="clicked2">
        </form>""")
    req = SplashFormRequest.from_response(
        response, formdata={'two': '2'}, clickdata={'name': 'clickable2'})
    assert req.method == 'GET'
    assert req.meta['splash']['args']['url'] == req.url
    fs = cgi.parse_qs(req.url.partition('?')[2], True)
    assert fs['clickable2'] == ['clicked2']
    assert 'clickable1' not in fs
    assert fs['one'] == ['1']
    assert fs['two'] == ['2']
    def login(self, response):
        logging.debug('Screenshot: %s', response.data['png'])

        try:
            headers = json.loads(self.rpt_mp.headers)
        except json.decoder.JSONDecodeError as err:
            headers = {}
        formdata, url, method = fill_login_form(response.url, response.text,
                                                response.meta['username'],
                                                response.meta['password'])

        # because following request is a new Splash request
        response.meta.pop('splash')
        response.meta.pop('_splash_processed')

        return SplashFormRequest.from_response(response,
                                               endpoint='render.json',
                                               args=self.splash_args,
                                               headers=headers,
                                               dont_filter=True,
                                               formname='fm',
                                               formdata=formdata,
                                               meta=response.meta,
                                               callback=self.after_login)
Exemplo n.º 10
0
    def parse(self, response):
        if not self.link_extractor.matches(response.url):
            return

        request_meta = {
            'from_search': response.meta.get('is_search'),
            'extracted_at': response.url,
        }

        def request(url, meta=None, **kwargs):
            meta = meta or {}
            meta.update(request_meta)
            return self.make_request(url, meta=meta, **kwargs)

        # Not using formasaurus for Inferlink forms processing
        forms = (formasaurus.extract_forms(response.text)
                 if response.text else [])

        #        for x in forms:
        #          print(etree.tostring(x[0], pretty_print=True))
        #          print(x[1])
        metadata = dict(
            is_page=response.meta.get('is_page', False),
            is_onclick=response.meta.get('is_onclick', False),
            is_iframe=response.meta.get('is_iframe', False),
            is_search=response.meta.get('is_search', False),
            from_search=response.meta.get('from_search', False),
            extracted_at=response.meta.get('extracted_at', None),
            depth=response.meta.get('depth', None),
            priority=response.request.priority,
            forms=[meta for _, meta in forms],
            screenshot=self._take_screenshot(response),
        )

        #        print('Response.url -- ', response.url)
        #        print('start_urls[0] -- ', self.start_urls[0])
        #        if response.url == self.start_urls[0]:
        #          print('its the first url')
        ##          yield self.text_cdr_item(
        ##              response, follow_urls=[], metadata=metadata)
        #          print('Google request again')
        ##          yield request('http://www.google.com')

        #        if there is no file
        #    then pages can be an empty array
        #    for will not do nothing
        #    for each page say page_valid=true beforehand
        #    and also start with a null urlExtractionInfo

        page_valid = True
        url_extract_info = None
        forms = []

        # page classification
        #pages = self.pages_data['pagesInfo']
        for pg in self.pages_cfg:
            page_name = pg.get('pageName')
            #          print(page_name)

            # check if the page gets classified
            url_regex = pg.get('urlRegex')
            content_regex = pg.get('contentRegex')
            page_valid = True
            if url_regex:
                #            print(search_re(url_regex, response.url))
                if not search_re(url_regex, response.url):
                    page_valid = False
            if content_regex:
                #            print('content_regex not null')
                #            print(search_re(content_regex, response.body.decode("utf-8")))
                if not search_re(content_regex, response.body.decode("utf-8")):
                    page_valid = False
            if page_valid:
                url_extract_info = pg.get('urlExtractionInfo')
                forms = pg.get('formsInfo')
                break

        if not page_valid:
            print(
                'the page did not pass through any of the specified page classifiers',
                response.url)
        else:
            # do rest of the processing
            #          print('PAGE is VALID --', response.url)
            follow_urls = {
                link_to_url(link)
                for link in self.link_extractor.extract_links(response)
                if not self._looks_like_logout(link, response)
            }
            print('Size of follow-urls: ', len(follow_urls))

            yield self.text_cdr_item(response,
                                     follow_urls=follow_urls,
                                     metadata=metadata)

            if not self.settings.getbool('FOLLOW_LINKS'):
                return

            if self.settings.getbool('PREFER_PAGINATION'):
                # Follow pagination links; pagination is not a subject of
                # a max depth limit. This also prioritizes pagination links because
                # depth is not increased for them.
                with _dont_increase_depth(response):
                    for url in self._pagination_urls(response):
                        # self.logger.debug('Pagination link found: %s', url)
                        yield request(url, meta={'is_page': True})

            #url extraction processing
            allowed_follow_urls = list()
            if url_extract_info:
                url_extract_method = url_extract_info['extractionMethod']
                if url_extract_method == 'inferlink':
                    extract_urls = url_extract_info.get('urls')
                    for extract_url in (extract_urls or []):
                        allowed_follow_urls.append(extract_url)
                else:
                    url_regexes_allow = url_extract_info.get('urlRegexesAllow')
                    url_regexes_deny = url_extract_info.get('urlRegexesDeny')
                    for follow_url in (follow_urls or []):
                        follow = True
                        for url_regex_deny in (url_regexes_deny or []):
                            if search_re(url_regex_deny, follow_url):
                                follow = False
                                break
                        if follow and url_regexes_allow:
                            follow = False
                            for url_regex_allow in (url_regexes_allow or []):
                                if search_re(url_regex_allow, follow_url):
                                    follow = True
                                    break
                        if follow:
                            allowed_follow_urls.append(follow_url)
            else:
                allowed_follow_urls = list(follow_urls)

            print('Number of urls to be followed - ', len(allowed_follow_urls))
            # Follow all the allowed in-domain links.
            # Pagination requests are sent twice, but we don't care because
            # they're be filtered out by a dupefilter.
            for url in allowed_follow_urls:
                #              print('url to be followed: ', url)
                yield request(url)

            # urls extracted from onclick handlers
            for url in get_js_links(response):
                priority = 0 if _looks_like_url(url) else -15
                url = response.urljoin(url)
                yield request(url,
                              meta={'is_onclick': True},
                              priority=priority)

            # go to iframes
            for link in self.iframe_link_extractor.extract_links(response):
                yield request(link_to_url(link), meta={'is_iframe': True})

            # forms processing
            for form in (forms or []):
                form_identity = json.loads(form['identity'])
                form_method = form['method']
                form_params_list = form['params']

                kwargs = {}
                if self.use_splash:
                    kwargs.update(self.setup_splash_args())
                meta = {}
                meta['avoid_dup_content'] = True
                meta.update(request_meta)
                kwargs.update(form_identity)

                for form_params in form_params_list:
                    # SplashRequest for all the params
                    print('===== Submitting FORM again ========' +
                          json.dumps(form_params))
                    yield SplashFormRequest.from_response(response,
                                                          formdata=form_params,
                                                          method=form_method,
                                                          callback=self.parse,
                                                          meta=meta.copy(),
                                                          **kwargs)
Exemplo n.º 11
0
    def preparation(self, resp):
        print("TAGS1", resp.meta)
        # print("TAGS1", resp.cookiejar)

        form = resp.xpath(u'//*[@id="aspnetForm"]')
        form_name = form.xpath(u"./@name").extract_first()
        form_id = form.xpath(u"./@id").extract_first()
        form_action = form.xpath(u"./@action").extract_first().lstrip("../")
        print(form_action)

        # hidden = resp.xpath(u"//input[@type='hidden']")
        #
        # fm = dict()
        # for el in hidden:
        #     name = el.xpath(u"./@name").extract_first()
        #     value = el.xpath(u"./@value").extract_first()
        #     fm[name] = value

        fm = {
            # u"__EVENTARGUMENT": u"saveToDisk=format:html;",
            u"__EVENTARGUMENT":
            u"saveToWindow=format:csv;",
            u"__EVENTTARGET":
            u"tl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer",
            u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT5_PageNumber_VI":
            u"5",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L":
            u"5",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber":
            u"5",
            u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT11_SaveFormat_VI":
            u"csv",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDD$L":
            u"csv",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat":
            u"Csv",
        }

        formdata1 = {
            u"__EVENTARGUMENT":
            u"saveToWindow=format:html;",
            u"__EVENTTARGET":
            u"tl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl":
            u"{&quot;activeTabIndex&quot;:1}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu":
            u"{&quot;selectedItemIndexPath&quot;:&quot;&quot;,&quot;checkedState&quot;:&quot;&quot;}",
            u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT5_PageNumber_VI":
            u"1",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber":
            u"1",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDDState":
            u"{&quot;windowsState&quot;:&quot;0:0:-1:0:0:0:-10000:-10000:1:0:0:0&quot;}",
            u"ctl00$ctl00$ctl00$MasinContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L$State":
            u"{&quot;CustomCallback&quot;:&quot;&quot;}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L":
            u"1",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT6$PageCount":
            u"25",
            u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT11_SaveFormat_VI":
            u"html",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat":
            u"Html",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDDState":
            u"{&quot;windowsState&quot;:&quot;0:0:-1:430:250:1:68:165:1:0:0:0&quot;}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDD$L":
            u"html",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer":
            u"{&quot;drillDown&quot;:{},&quot;parameters&quot;:{},&quot;cacheKey&quot;:&quot;&quot;,&quot;currentPageIndex&quot;:0}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxErrorGrid":
            u"{&quot;keys&quot;:[],&quot;callbackState&quot;:&quot;BwQHAwIERGF0YQcnAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABwAHAAcAAgtGb3JtYXRTdGF0ZQcAAgVTdGF0ZQc/BwAHAAcABwAHAAIABQAAAIAJAgtUaW1lT2ZFcnJvcgcACQIAAgADBwQCAAcAAgEHAAcAAgACAQcABwAHAAcAAg1TaG93RmlsdGVyUm93CgIB&quot;,&quot;selection&quot;:&quot;&quot;}",
        }

        yield SplashFormRequest.from_response(
            response=resp,
            formid=form_id,
            formdata=formdata1,
            callback=self.parse_tags,
            dont_click=True,
            endpoint='execute',
            cache_args=['lua_source'],
            args={'lua_source': script},
        )
Exemplo n.º 12
0
    def parse(self, response):

        formdata = {
            u"__EVENTTARGET":
            u"tl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer",
            # u"__EVENTTARGET": u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer",
            # u"__EVENTARGUMENT": response.xpath(u"//input[@id='__EVENTARGUMENT']/@value").extract_first(),
            u"__EVENTARGUMENT":
            u"saveToWindow=format:html;",
            u"__VSTATE_AV":
            response.xpath(
                u"//input[@id='__VSTATE_AV']/@value").extract_first(),
            u"__VIEWSTATE":
            response.xpath(
                u"//input[@id='__VIEWSTATE']/@value").extract_first(),
            u"__EVENTVALIDATION":
            response.xpath(
                u"//input[@id='__EVENTVALIDATION']/@value").extract_first(),
            u"DXScript":
            u"1_232,1_134,1_225,1_169,1_226,1_223,1_155,9_45,1_131,1_217,1_206,1_167,1_175,1_138,1_180,1_166,1_164,1_215,1_170,9_38,9_48,9_46,1_153,1_194,1_196",
            u"DXCss":
            u"1_33,1_18,0_3879,0_4037,0_4039,0_3877,0_4060,0_3883,0_3885,../Content/bootstrap.min.css,../Styles/Site.css,../images/AgilaireAV.ico",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl":
            u"{&quot;activeTabIndex&quot;:1}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu":
            u"{&quot;selectedItemIndexPath&quot;:&quot;&quot;,&quot;checkedState&quot;:&quot;&quot;}",
            u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT5_PageNumber_VI":
            u"2",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber":
            u"2",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDDState":
            u"{&quot;windowsState&quot;:&quot;0:0:-1:0:0:0:-10000:-10000:1:0:0:0&quot;}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L$State":
            u"{&quot;CustomCallback&quot;:&quot;&quot;}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L":
            u"2",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT6$PageCount":
            u"25",
            u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT11_SaveFormat_VI":
            u"html",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat":
            u"Html",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDDState":
            u"{&quot;windowsState&quot;:&quot;0:0:-1:430:250:1:68:165:1:0:0:0&quot;}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDD$L":
            u"html",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer":
            u"{&quot;drillDown&quot;:{},&quot;parameters&quot;:{},&quot;cacheKey&quot;:&quot;&quot;,&quot;currentPageIndex&quot;:1}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxErrorGrid":
            u"{&quot;keys&quot;:[],&quot;callbackState&quot;:&quot;BwQHAwIERGF0YQcnAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABwAHAAcAAgtGb3JtYXRTdGF0ZQcAAgVTdGF0ZQc/BwAHAAcABwAHAAIABQAAAIAJAgtUaW1lT2ZFcnJvcgcACQIAAgADBwQCAAcAAgEHAAcAAgACAQcABwAHAAcAAg1TaG93RmlsdGVyUm93CgIB&quot;,&quot;selection&quot;:&quot;&quot;}",
            # u"": u"",
        }

        formdata1 = {
            u"__EVENTARGUMENT":
            u"saveToWindow=format:html;",
            u"__EVENTTARGET":
            u"tl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl":
            u"{&quot;activeTabIndex&quot;:1}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu":
            u"{&quot;selectedItemIndexPath&quot;:&quot;&quot;,&quot;checkedState&quot;:&quot;&quot;}",
            u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT5_PageNumber_VI":
            u"1",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber":
            u"1",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDDState":
            u"{&quot;windowsState&quot;:&quot;0:0:-1:0:0:0:-10000:-10000:1:0:0:0&quot;}",
            u"ctl00$ctl00$ctl00$MasinContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L$State":
            u"{&quot;CustomCallback&quot;:&quot;&quot;}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L":
            u"1",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT6$PageCount":
            u"25",
            u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT11_SaveFormat_VI":
            u"html",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat":
            u"Html",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDDState":
            u"{&quot;windowsState&quot;:&quot;0:0:-1:430:250:1:68:165:1:0:0:0&quot;}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDD$L":
            u"html",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer":
            u"{&quot;drillDown&quot;:{},&quot;parameters&quot;:{},&quot;cacheKey&quot;:&quot;&quot;,&quot;currentPageIndex&quot;:0}",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxErrorGrid":
            u"{&quot;keys&quot;:[],&quot;callbackState&quot;:&quot;BwQHAwIERGF0YQcnAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABwAHAAcAAgtGb3JtYXRTdGF0ZQcAAgVTdGF0ZQc/BwAHAAcABwAHAAIABQAAAIAJAgtUaW1lT2ZFcnJvcgcACQIAAgADBwQCAAcAAgEHAAcAAgACAQcABwAHAAcAAg1TaG93RmlsdGVyUm93CgIB&quot;,&quot;selection&quot;:&quot;&quot;}",
            u"DXScript":
            u"1_232,1_134,1_225,1_169,1_226,1_223,1_155,9_45,1_131,1_217,1_206,1_167,1_175,1_138,1_180,1_166,1_164,1_215,1_170,9_38,9_48,9_46,1_153,1_194,1_196",
            u"DXCss":
            u"1_33,1_18,0_3879,0_4037,0_4039,0_3877,0_4060,0_3883,0_3885,../Content/bootstrap.min.css,../Styles/Site.css,../images/AgilaireAV.ico",
        }

        fm = {
            # u"__EVENTARGUMENT": u"saveToDisk=format:html;",
            u"__EVENTARGUMENT":
            u"saveToWindow=format:html;",
            u"__EVENTTARGET":
            u"tl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer",
            u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT5_PageNumber_VI":
            u"1",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L":
            u"1",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber":
            u"1",
            u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT11_SaveFormat_VI":
            u"html",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDD$L":
            u"html",
            u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat":
            u"Html",
            u"DXScript":
            u"1_232,1_134,1_225,1_169,1_226,1_223,1_155,9_45,1_131,1_217,1_206,1_167,1_175,1_138,1_180,1_166,1_164,1_215,1_170,9_38,9_48,9_46,1_153,1_194,1_196",
            u"DXCss":
            u"1_33,1_18,0_3879,0_4037,0_4039,0_3877,0_4060,0_3883,0_3885,../Content/bootstrap.min.css,../Styles/Site.css,../images/AgilaireAV.ico",
        }
        # print(response.xpath(u'//*[@id="DXScript"]').extract_first())
        # fm[response.xpath(u'//*[@id="DXScript"]/@name').extract_first().decode()] = response.xpath(u'//*[@id="DXScript"]/@value').extract_first().decode(),
        # print(formdata)

        yield SplashFormRequest.from_response(
            # url=u"http://keap.kdhe.state.ks.us/AirVision/Modules/Reporting/ReportViewers/XtraReportViewer.aspx?dxrep_fake=&q=0a05224c-dc17-4166-9cf6-0fb063c315a7",
            response,
            formdata=fm,
            callback=self.parse_tags,
            dont_click=True,
            # endpoint='render.html',
        )
Exemplo n.º 13
0
 def login(self, response):
     yield SplashFormRequest.from_response(response=response, formxpath='//div[3]//tr[1]/td/input', formdata={'name': '*****@*****.**'}, callback=self.go_to_listings)