def get_rows(self, response: scrapy.FormRequest) -> dict: """ Manages each row of page and pass it to new function :param response: Page :type response: scrapy.FormRequest :return: Dictionary for save :rtype: dict """ # Each row handling for table_row in response.css('#ctl00_cphMainContent_gvSearchResults ' 'tr.gridRow, #ctl00_cphMainContent_' 'gvSearchResults tr.gridAltRow'): yield self.format_row_data(table_row)
def get_pages(self, response: scrapy.FormRequest) -> scrapy.FormRequest: """ Manages the multiple pages for bigger queries :param response: Page :type response: scrapy.FormRequest :return: concrete page FormRequest :rtype: scrapy.FormRequest """ # Pagination row paginator = response.css( "#ctl00_cphMainContent_gvSearchResults tr.gridPager:first-child td table tr td" ) # Multi-pages with 'Last' problem (more than paginator length) /// solved pg = 0 while int(paginator.css("a::text")[-1].extract()) != pg: pg += 1 yield scrapy.FormRequest( url=response.url, formdata={ "ctl00_cphMainContent_txtLCSTartDate_dateInput_text": f"1/1/{CURRENT_YEAR}", "ctl00_cphMainContent_txtLCEndDate_dateInput_text": f"12/31/{CURRENT_YEAR}", "ctl00$cphMainContent$ddlLCDocumentType$vddlDropDown": "101627", '__VIEWSTATE': response.css( 'input#__VIEWSTATE::attr(value)').extract_first(), '__EVENTARGUMENT': f"Page${pg}", "__EVENTTARGET": "ctl00$cphMainContent$gvSearchResults", }, callback=self.get_rows)