Exemplo n.º 1
0
    def get_rows(self, response: scrapy.FormRequest) -> dict:
        """
        Manages each row of page and pass it to new function
        :param response: Page
        :type response: scrapy.FormRequest
        :return: Dictionary for save
        :rtype: dict
        """

        # Each row handling
        for table_row in response.css('#ctl00_cphMainContent_gvSearchResults '
                                      'tr.gridRow, #ctl00_cphMainContent_'
                                      'gvSearchResults tr.gridAltRow'):
            yield self.format_row_data(table_row)
Exemplo n.º 2
0
    def get_pages(self, response: scrapy.FormRequest) -> scrapy.FormRequest:
        """
        Manages the multiple pages for bigger queries
        :param response: Page
        :type response: scrapy.FormRequest
        :return: concrete page FormRequest
        :rtype: scrapy.FormRequest
        """

        # Pagination row
        paginator = response.css(
            "#ctl00_cphMainContent_gvSearchResults tr.gridPager:first-child td table tr td"
        )

        # Multi-pages with 'Last' problem (more than paginator length) /// solved
        pg = 0
        while int(paginator.css("a::text")[-1].extract()) != pg:
            pg += 1
            yield scrapy.FormRequest(
                url=response.url,
                formdata={
                    "ctl00_cphMainContent_txtLCSTartDate_dateInput_text":
                    f"1/1/{CURRENT_YEAR}",
                    "ctl00_cphMainContent_txtLCEndDate_dateInput_text":
                    f"12/31/{CURRENT_YEAR}",
                    "ctl00$cphMainContent$ddlLCDocumentType$vddlDropDown":
                    "101627",
                    '__VIEWSTATE':
                    response.css(
                        'input#__VIEWSTATE::attr(value)').extract_first(),
                    '__EVENTARGUMENT':
                    f"Page${pg}",
                    "__EVENTTARGET":
                    "ctl00$cphMainContent$gvSearchResults",
                },
                callback=self.get_rows)