示例#1
0
 def parse(self, response):
     rnd = lambda: str(int(round(time.time() * 1000)))
     self.searchkeys = []
     if self.start.isalpha() and self.end.isalpha():
         self.searchkeys = SearchCriteria.strRange(self.start, self.end)
         if 'AND' in self.searchkeys:
             self.searchkeys.remove('AND')
         if 'NOT' in self.searchkeys:
             self.searchkeys.remove('NOT')
         if 'BTI' in self.searchkeys:
             self.searchkeys.remove('BTI')
     else:
         self.searchkeys = SearchCriteria.numberRange(
             self.start, self.end, 1)
     # self.searchkeys=['INDUSTRIES']
     print(len(self.searchkeys))
     search = self.searchkeys.pop(0)
     # search='154 WEST RAYEN CORP'
     url = 'https://businesssearchapi.sos.state.oh.us/zyjLcCmoqeZffOn1ajJdsiek3tmuj9QtZVn{}_X?_={}'.format(
         str(search), rnd())
     yield scrapy.Request(url=url,
                          callback=self.parse_data,
                          dont_filter=True,
                          meta={'search': search})
     self.ids = []
示例#2
0
    def parse(self, response):

        link = 'https://quickstart.sos.nh.gov'
        url = response.xpath('//iframe/@src').extract_first()
        image_url = link + url

        request = response.xpath(
            '*//input[@name="__RequestVerificationToken"]/@value'
        ).extract_first()
        if self.check_first:
            self.searchkeys = []
            # searchkeys=[]
            if self.start.isalpha() and self.end.isalpha():
                self.searchkeys = SearchCriteria.strRange(self.start, self.end)
            else:
                self.searchkeys = SearchCriteria.numberRange(
                    self.start, self.end, 1)
            self.check_first = False
        if len(self.searchkeys) > 0:
            search = self.searchkeys.pop(0)

            formdata = {
                '__RequestVerificationToken': request,
                'rbBusinessNameSearch': 'StartsWith',
                'rbBasicSearch': 'BusinessName',
                'txtBusinessName': str(search),
                'chkWithSimilarSoundingBusinessNames': 'false',
                'txtHomeStateBusiness': '',
                'chkWithSoundingBusinessHomeStateNames': 'false',
                'txtBusinessID': '',
                'txtFilingNumber': '',
                'txtBusCreationDate': '',
                'ddlBusinessType': '',
                'ddlBusinessStatus': '',
                'txtCity': '',
                'txtZipCode': '',
                'County': '',
                'txtAgentName': '',
                'chkWithSimilarSoundingAgentNames': 'false',
                'txtPrincipalName': '',
                'chkWithSimilarSoundingPrincipalNames': 'false',
                'txtCaptcha':
                self.getcaptchaCoder().resolveImgCaptcha(image_url),
                'btnSearch': 'Search',
                'hdnMessage': ''
            }

            yield scrapy.FormRequest(
                url=
                'https://quickstart.sos.nh.gov/online/BusinessInquire/BusinessSearch',
                callback=self.parse_next,
                method='POST',
                formdata=formdata,
                dont_filter=True,
                meta={'page': 2})
 def parse(self, response):
     if self.value:
         self.search_element = SearchCriteria.strRange(self.start, self.end)
         self.value = False
     if len(self.search_element) > 0:
         parm = self.search_element.pop(0)
         page_count_link = 'https://public.claycountygov.com/permitsearch/API/Search/Count?tab=Owner&sortfield=issuedate&sortdirection=D&owner=' + str(
             parm) + '&status=all&page=1&v=496'
         page_count_header = {
             'Accept':
             '*/*',
             'Accept-Encoding':
             'gzip, deflate, br',
             'Accept-Language':
             'en-US,en;q=0.9,ta;q=0.8',
             'Cache-Control':
             'max-age=0',
             'Connection':
             'keep-alive',
             'Content-Type':
             'application/json',
             # DNT: 1
             'Host':
             'public.claycountygov.com',
             'Referer':
             'https://public.claycountygov.com/PermitSearch/',
             'User-Agent':
             'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
         }
         yield scrapy.Request(url=page_count_link,
                              callback=self.parse_user_det,
                              headers=page_count_header,
                              dont_filter=True,
                              meta={"parm": parm})
示例#4
0
 def parse(self, response):
     main_page = yield scrapy.Request(
         url=
         "https://webpermit.mecklenburgcountync.gov/Default.aspx?PossePresentation=SearchByAddress",
         dont_filter=True)
     datac = main_page.xpath(
         "/html/body/div/form[1]/input[5]/@value").extract()[0]
     self.search_businames = SearchCriteria.strRange(self.start, self.end)
     for alpha in self.search_businames:
         for iv in range(0, 10):
             dc = ('C', 'S0', 447713, str(iv)), ('C', 'S0', 447712,
                                                 str(alpha)), ('F', '', 0,
                                                               0)
             dc1 = datac + ',' + ''.join(map(str, str(dc))).replace(
                 '((', '(').replace('))', ')').replace(' ', '')
             formdata1 = {
                 'currentpaneid': '467087',
                 'paneid': '467088',
                 'functiondef': '5',
                 'sortcolumns': '{}',
                 'datachanges': str(dc1),
                 'comesfrom': 'posse',
                 'changesxml': ''
             }
             yield scrapy.FormRequest(
                 url=
                 "https://webpermit.mecklenburgcountync.gov/Default.aspx?PossePresentation=SearchByAddress",
                 formdata=formdata1,
                 dont_filter=True,
                 callback=self.parse2)
 def parse_things(self, response):
     if self.value:
         self.search_element = SearchCriteria.strRange(self.start, self.end)
         self.value = False
     # print ('@@@@@@@@@22',self.search_element)
     if len(self.search_element) > 0:
         val = self.search_element.pop(0)
         form_data1 = {
             'ctl00$cphMainApp$ToolkitScriptManager1':
             'ctl00$cphMainApp$upSearch|ctl00$cphMainApp$ButtonPermitSearch',
             '__VIEWSTATEGENERATOR':
             'DCE30F85',
             '__VIEWSTATE':
             response.xpath(
                 "//input[@id='__VIEWSTATE']/@value").extract_first(),
             '__EVENTVALIDATION':
             response.xpath(
                 "//input[@id='__EVENTVALIDATION']/@value").extract_first(),
             'ctl00$cphMainApp$PermitSearchCriteria1$TextBoxRefLastName':
             str(val),
             '__ASYNCPOST':
             'true',
             'ctl00$cphMainApp$ButtonPermitSearch':
             'Search For Permits'
         }
         yield scrapy.FormRequest(
             url='https://gcs.douglascountywi.org/gcswebportal/search.aspx',
             callback=self.parse_second,
             dont_filter=True,
             formdata=form_data1)
示例#6
0
    def verify_captcha(self, response):
        if len(self.search_element) == 0:
            self.search_element = SearchCriteria.strRange(self.start, self.end)
        if len(self.search_element) > 0:
            param = self.search_element.pop(0)
            print('--------------------param---------', param)
            jsonresponse = json.loads(response.body_as_unicode())
            formdata = {
                'county': '-1',
                'fname': '',
                'lictype': '-1',
                'lname': str(param),
                'lnumber': '',
                'page': '1',
                'pageSize': '20',
                'sdata': [],
                'sortby': '',
                'sortexp': '',
                'vid': jsonresponse['d'],
            }

            yield scrapy.Request(
                response.urljoin('/online/JS_grd/Grid.svc/GetIndv_license'),
                method='POST',
                body=json.dumps(formdata),
                headers={'Content-Type': 'application/json'},
                callback=self.getIndv_license,
                meta={
                    'page': '2',
                    'vid': jsonresponse['d'],
                    'param': param
                })
 def parse(self, response):
     if len(self.searchkeys) > 0:
         self.page_no = 1
         self.last_name = self.searchkeys.pop(0)
         self.searchkeys1 = SearchCriteria.strRange('aa', 'zz')
         yield scrapy.Request(url=self.start_urls[0],
                              callback=self.parse_search,
                              dont_filter=True)
 def __init__(self,
              start=None,
              end=None,
              startnum=None,
              endnum=None,
              is_server=None,
              proxyserver=None,
              *a,
              **kw):
     super(CtTeachersLicensesSpider, self).__init__(start,
                                                    end,
                                                    proxyserver=None,
                                                    *a,
                                                    **kw)
     if start and startnum:
         self.searchkeys = SearchCriteria.numberRange(
             startnum, endnum, 1) + SearchCriteria.strRange(start, end)
     elif start:
         self.searchkeys = SearchCriteria.strRange(start, end)
     elif startnum:
         self.searchkeys = SearchCriteria.numberRange(startnum, endnum, 1)
 def parse(self, response):
     if self.check_first:
         self.check_first = False
         self.search_element = SearchCriteria.dateRange(
             self.start, self.end, freq='1D', formatter='%m/%d/%Y')
         self.end_date = self.search_element.pop(0)
     if len(self.search_element) > 0:
         start_date = copy.copy(self.end_date)
         self.end_date = self.search_element.pop(0)
         formdata = {
             'ctl00$RadScriptManager1':
             'ctl00$RadScriptManager1|ctl00$cplMain$btnSearch',
             '__EVENTTARGET':
             ' ctl00$cplMain$btnSearch',
             '__EVENTARGUMENT':
             '',
             '__VIEWSTATE':
             response.xpath(
                 '//*[@id="__VIEWSTATE"]/@value').extract_first(),
             '__VIEWSTATEGENERATOR':
             '2A136539',
             'ctl00$ucLogin$hfDashboardRedirect':
             'https://etrakit.champaignil.gov/etrakit/dashboard.aspx',
             'ctl00$ucLogin$hfCartRedirect':
             'https://etrakit.champaignil.gov/etrakit/ShoppingCart.aspx?iscartview=true',
             'ctl00$ucLogin$hfHome':
             ' https://etrakit.champaignil.gov/etrakit/default.aspx',
             'ctl00_ucLogin_RadTextBox2_ClientState':
             '{"enabled":true,"emptyMessage":"Password","validationText":"","valueAsString":"","lastSetTextBoxValue":"Password"}',
             'ctl00_ucLogin_txtPassword_ClientState':
             '{"enabled":true,"emptyMessage":"","validationText":"","valueAsString":"","lastSetTextBoxValue":""}',
             'ctl00$cplMain$ddSearchBy':
             'Permit_Main.ISSUED',
             'ctl00$cplMain$ddSearchOper':
             'EQUALS',
             'ctl00$cplMain$txtSearchString':
             str(start_date),
             'ctl00_cplMain_tcSearchDetails_ClientState':
             '{"selectedIndexes":["2"],"logEntries":[],"scrollState":{}}',
             '__ASYNCPOST':
             'true',
             'RadAJAXControlID':
             'ctl00_RadAjaxManager1'
         }
         yield scrapy.FormRequest(url=response.url,
                                  formdata=formdata,
                                  method='POST',
                                  dont_filter=True,
                                  callback=self.parse_details,
                                  meta={
                                      'page': 1,
                                      'start_date': str(start_date)
                                  })
示例#10
0
 def parse_next(self, response):
     if self.start.isdigit():
         self.search_elements = SearchCriteria.numberRange(
             self.start, self.end)
     elif self.start.isalpha():
         self.search_elements = SearchCriteria.strRange(
             self.start, self.end)
     elif self.start.isalnum():
         for s in self.alpha_list:
             for i in range(0, 10):
                 search = s + str(i)
                 self.search_elements.append(search)
     drop_down = response.xpath(
         '//*[@id="BizEntitySearch_EntityStatus"]/option/@value').extract(
         )[1:]
     if len(self.final_list) == 0:
         for search in self.search_elements:
             for drop in drop_down:
                 self.final_list.append(drop)
                 self.final_list.append(search)
     print('______________final_list:', self.final_list)
     if len(self.final_list) > 0:
         val = self.final_list.pop(0)
         search_ = self.final_list.pop(0)
         form_data3 = {
             "BizEntitySearch_String": str(search_),
             "Search": "Search",
             "BizEntitySearch_Type": "EntityName",
             "BizEntitySearch_DepthType": "StartsWith",
             "BizEntitySearch_EntityStatus": str(val),
             "BizEntitySearch_TradeNameStatus": ""
         }
         yield scrapy.FormRequest(
             url="https://corponline.dcra.dc.gov/Home.aspx/ProcessRequest",
             callback=self.parse_for_listpage,
             formdata=form_data3,
             method='POST',
             dont_filter=True,
             errback=self.handle_form_error)
示例#11
0
 def __init__(self,
              start=None,
              end=None,
              startnum=None,
              endnum=None,
              proxyserver=None,
              *a,
              **kw):
     super(AlMedicalPersonLicensesSpider, self).__init__(start,
                                                         end,
                                                         proxyserver=None,
                                                         *a,
                                                         **kw)
     self.search_element = SearchCriteria.strRange(start, end)
示例#12
0
 def __init__(self,
              start=None,
              end=None,
              startnum=None,
              endnum=None,
              proxyserver=None,
              *a,
              **kw):
     super(WyPhysicianLicensesSpider, self).__init__(start,
                                                     end,
                                                     proxyserver=None,
                                                     *a,
                                                     **kw)
     self.year = SearchCriteria.strRange(self.start, self.end)
示例#13
0
    def parse(self, response):
        entityType = [
            'COOPERATIVE', 'CORPORATION', 'LLC', 'PARTNERSHIP', 'TRADENAME'
        ]

        req_url = 'https://hbe.ehawaii.gov/documents/search.html'
        searchkeys = []
        if self.start.isalpha() and self.end.isalpha():
            searchkeys = SearchCriteria.strRange(self.start, self.end)
        else:
            searchkeys = SearchCriteria.numberRange(self.start, self.end, 1)
        self.search_lis = []
        for key in searchkeys:
            for type in entityType:
                dic = {}
                dic['query'] = key
                dic['entityType'] = type
                self.search_lis.append(dic.copy())
                dic.clear()
        # self.search_lis=[{'query':'PIRANHA GROUP LLC','entityType':'ALL'}]
        search = self.search_lis.pop(0)
        formdata = {
            'beginsWith': 'true',
            'query': search['query'],
            'recordType': 'ALL',
            'status': 'ALL',
            'entityType': search['entityType'],
            'page': '0'
        }
        yield scrapy.FormRequest(url=req_url,
                                 formdata=formdata,
                                 callback=self.parse_data,
                                 dont_filter=True,
                                 meta={'search': search},
                                 errback=self.err_parse_data)
        self.links = []
示例#14
0
 def on_search(self, response):
     form_data = self.form_data(response)
     for form_data in [{
             "ctl00$MainContent$txtSearchTerms": key
     } for key in SearchCriteria.rangeAAAAtoZZZZ()]:
         # form_data["ctl00$MainContent$txtSearchTerms"] ='AAAA'
         form_data["__VIEWSTATE"] = response.xpath(
             "//input[@id='__VIEWSTATE']/@value").extract_first()
         form_data["__VIEWSTATEGENERATOR"] = response.xpath(
             "//input[@id='__VIEWSTATEGENERATOR']/@value").extract_first()
         form_data["__EVENTVALIDATION"] = response.xpath(
             "//input[@id='__EVENTVALIDATION']/@value").extract_first()
         yield scrapy.FormRequest.from_response(
             response,
             url=self.form_url,
             callback=self.second_page_crawl,
             formdata=form_data)
 def parse(self, response):
     if self.check_first:
         self.check_first = False
         self.search_element_a = SearchCriteria.strRange(self.starta,self.enda)
         self.search_element1='*'+str(self.search_element.pop(0))
     if len(self.search_element_a) > 0:
         val = '*'+str(self.search_element_a.pop(0))
         form_data={'ctl00$ContentPlaceHolder1$txtLastName': str(self.search_element1),
         'ctl00$ContentPlaceHolder1$txtShopName': str(val),
         '__EVENTTARGET': 'ctl00$ContentPlaceHolder1$btnLicenseeSubmit'}
         head={'Connection': 'keep-alive' ,
         'Host': 'alboc.glsuite.us',
         'Origin': 'https://alboc.glsuite.us','Upgrade-Insecure-Requests': '1' ,
         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36' ,
         'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application         signed-exchange;v=b3',     
         'Referer': 'https://alboc.glsuite.us/GLSuiteWeb/Clients/ALBOC/public/VerificationSearch.aspx',
         'Accept-Encoding': 'gzip, deflate, br','Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8' }
         yield scrapy.FormRequest.from_response(response,headers=head,formdata=form_data,formid='aspnetForm',method="POST",dont_filter=True,callback=self.parse_details)
 def search(self, response):
     if self.check_first:
         self.check_first = False
         self.search_element = SearchCriteria.numberRange(
             int(self.start), int(self.end), 1)
     if len(self.search_element) > 0:
         param = self.search_element.pop(0)
         param = param.zfill(5)
         form_data = {
             'ctl00$RadScriptManager1':
             'ctl00$RadScriptManager1|ctl00$cplMain$btnSearch',
             'RadScriptManager1_TSM':
             ';;System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35:en-US:1453655a-6b8d-49b1-94c2-f77a352f5241:ea597d4b:b25378d2;Telerik.Web.UI, Version=2013.2.717.40, Culture=neutral, PublicKeyToken=121fae78165ba3d4:en-US:0507d587-20ad-4e22-b866-76bd3eaee2df:16e4e7cd:ed16cbdc:f7645509:24ee1bba:92fe8ea0:f46195d3:fa31b949:874f8ea2:19620875:490a9d4e:bd8f85e4:b7778d6c:58366029:e330518b:1e771326:8e6f0d33:6a6d718d;',
             'ctl00$ucLogin$hfDashboardRedirect':
             'https://www.cob.org/epermits/dashboard.aspx',
             'ctl00$ucLogin$hfCartRedirect':
             'https://www.cob.org/epermits/ShoppingCart.aspx',
             'ctl00$ucLogin$hfViewEditProfile':
             'static value',
             'ctl00$ucLogin$hfHome':
             'https://www.cob.org/epermits/default.aspx',
             'ctl00$ucLogin$hfSetupAnAccountForPublic':
             'https://www.cob.org/epermits/publicUserAccount.aspx?action=npa',
             'ctl00$ucLogin$hfSetupAnAccountForContractor':
             'https://www.cob.org/epermits/RegistrationConfirmation.aspx',
             'ctl00$ucLogin$hfContractorCSLBVerification':
             'DISABLED',
             'ctl00$ucLogin$ddlSelLogin':
             '******',
             'ctl00$ucLogin$txtLoginId':
             'Username',
             'ctl00_ucLogin_txtLoginId_ClientState':
             '{"enabled":true,"emptyMessage":"Username","validationText":"","valueAsString":"","lastSetTextBoxValue":"Username"}',
             'ctl00$ucLogin$RadTextBox2':
             'Password',
             'ctl00_ucLogin_RadTextBox2_ClientState':
             '{"enabled":true,"emptyMessage":"Password","validationText":"","valueAsString":"","lastSetTextBoxValue":"Password"}',
             'ctl00_ucLogin_txtPassword_ClientState':
             '{"enabled":true,"emptyMessage":"","validationText":"","valueAsString":"","lastSetTextBoxValue":""}',
             'ctl00$hfGoogleKey':
             'UA-5831706-1',
             'ctl00$cplMain$hfActivityMode':
             '',
             'ctl00$cplMain$ddSearchBy':
             'Permit_Main.PERMIT_NO',
             'ctl00$cplMain$ddSearchOper':
             'CONTAINS',
             'ctl00$cplMain$txtSearchString':
             str(param),
             'ctl00_cplMain_rgSearchRslts_ClientState':
             '{"selectedIndexes":["0"],"selectedCellsIndexes":[],"unselectableItemsIndexes":[],"reorderedColumns":[],"expandedItems":[],"expandedGroupItems":[],"expandedFilterItems":[],"deletedItems":[],"hidedColumns":[],"showedColumns":[],"groupColsState":{},"hierarchyState":{},"popUpLocations":{},"draggedItemsIndexes":[]}',
             'ctl00_cplMain_tcSearchDetails_ClientState':
             '{"selectedIndexes":["0"],"logEntries":[],"scrollState":{}}',
             '__EVENTTARGET':
             'ctl00$cplMain$btnSearch',
             '__VIEWSTATE':
             response.xpath(
                 '//*[@id="__VIEWSTATE"]/@value').extract_first(),
             '__VIEWSTATEGENERATOR':
             response.xpath(
                 '//*[@id="__VIEWSTATEGENERATOR"]/@value').extract_first(),
             '__ASYNCPOST':
             'true',
             'RadAJAXControlID':
             'ctl00_RadAjaxManager1'
         }
         currentPage = 1
         yield FormRequest(url=self.start_urls[0],
                           headers={'Referer': self.start_urls},
                           formdata=form_data,
                           callback=self.parse_list,
                           dont_filter=True,
                           meta={
                               'currentPage': currentPage,
                               'param': param
                           })
示例#17
0
    def parse(self, response):
        self.searchkeys = []
        if self.start.isalpha() and self.end.isalpha():
            self.searchkeys = SearchCriteria.strRange(self.start, self.end)
        else:
            self.searchkeys = SearchCriteria.numberRange(
                self.start, self.end, 1)
        search = self.searchkeys.pop(0)

        self.form_data = {
            'search.SearchName':
            'express',
            'search.SearchType':
            'BusinessName',
            'search.BusinessName':
            search,
            'search.ActualBusinessName':
            search,
            'search.BusinessId':
            '',
            'search.SearchCriteria':
            'StartsWith',
            'search.CitizenShipType':
            '',
            'search.BusinessTypeId':
            '0',
            'search.BusinessStatusId':
            '',
            'search.NaicsCode':
            '',
            'search.businessStatus':
            '',
            'search.isGoodStanding':
            '',
            'search.Country':
            '',
            'search.Zip':
            '',
            'search.City':
            '',
            'search.State':
            '',
            'search.OtherState':
            '',
            'search.PostalCode':
            '',
            'search.AgentType':
            '',
            'search.RAFirstName':
            '',
            'search.RAMiddleName':
            '',
            'search.RALastName':
            '',
            'search.RASuffix':
            '',
            'search.RAName':
            '',
            'search.RAAddress1':
            '',
            'search.RAAddress2':
            '',
            'search.RACountry':
            '',
            'search.RAZip':
            '',
            'search.RACity':
            '',
            'search.RAState':
            '',
            'search.RAOtherState':
            '',
            'search.RAPostalCode':
            '',
            'search.DirectorFirstName':
            '',
            'search.DirectorMiddleName':
            '',
            'search.DirectorLastName':
            '',
            'search.DirectorSuffix':
            '',
            'search.IncorporatorType':
            '',
            'search.IncorporatorFirstName':
            '',
            'search.IncorporatorMiddleName':
            '',
            'search.IncorporatorLastName':
            '',
            'search.IncorporatorSuffix':
            '',
            'search.IncorporatorEntityName':
            '',
            'search.IncorporatorAddress1':
            '',
            'search.IncorporatorAddress2':
            '',
            'search.IncorporatorCountry':
            '',
            'search.IncorporatorZip':
            '',
            'search.IncorporatorCity':
            '',
            'search.IncorporatorState':
            '',
            'search.IncorporatorOtherState':
            '',
            'search.IncorporatorPostalCode':
            '',
            'search.OrganizerFirstName':
            '',
            'search.OrganizerMiddleName':
            '',
            'search.OrganizerLastName':
            '',
            'search.OrganizerSuffix':
            '',
            'search.ReservationNo':
            '',
            'search.CaptchaResponse':
            self.getcaptchaCoder(self.site_key).resolver(response.url),
        }
        yield scrapy.FormRequest(response.url,
                                 formdata=self.form_data,
                                 dont_filter=True,
                                 method='POST',
                                 callback=self.parse_two,
                                 meta={'page': 2})