def parse(self, response) :
#        test_urls = [
#        "http://ntiaoji.kaoyan.com/tjadm/1.html",
#        "http://ntiaoji.kaoyan.com/tjadm/2.html",
#        "http://ntiaoji.kaoyan.com/tjadm/3.html",
#        "http://ntiaoji.kaoyan.com/tjadm/4.html",
#        "http://ntiaoji.kaoyan.com/tjadm/5.html",
#        "http://ntiaoji.kaoyan.com/tjadm/6.html",
#        "http://ntiaoji.kaoyan.com/tjadm/7.html"
#	]
#
#	for url in test_urls :
#	    print url
#	    time.sleep(2)
#	    self.headers['Referer'] = url
#            yield FormRequest.from_response(response,
#	        headers = self.headers,
#	        formdata = {
#	        'username' : 'kytj1',
#	        'password' : '6ujBJ4XQyLeGmJmB'
#	        },
#	        callback = self.download_page,
#	        dont_filter = True
#	    )
        return FormRequest.from_response(response,
	    headers = self.headers,
	    formdata = {
	        'username' : 'kytj1',
	        'password' : '6ujBJ4XQyLeGmJmB'
	    },
	    callback = self.after_login,
	    dont_filter = True
        )
示例#2
0
    def _request_next_page(self, response, date_str, callback):
        current_page = int(response.meta['current_page'])

        total_string = response.css('#LblTotal').xpath('./text()').extract_first(default='')

        total = re.search(r'(\d+)', total_string)

        if total:
            # Deal with the next page.
            total = total.group(1)
            number_of_pages = self._get_number_of_pages(int(total))

            if current_page < number_of_pages:
                current_page += 1

                formdata = {
                    'TxtFecha': date_str,
                    'BtnBuscar': 'Buscar',
                    'LwVisitasCR$DpVisitasCR$ctl02$ctl00.x': '1',
                    'LwVisitasCR$DpVisitasCR$ctl02$ctl00.y': '1'
                }

                request = FormRequest.from_response(response,
                                                    formdata=formdata,
                                                    dont_click=True,
                                                    dont_filter=True,
                                                    callback=callback,
                                                    )

                request.meta['date'] = date_str
                request.meta['current_page'] = current_page

                return request
    def parse(self, response):
        """
        这是默认的回调方法,得到response后:
        1. 如果需要登录,则先通过FormRequest登录论坛;
        2. 如果不需要登录,通过Request继续请求;
        :param response:
        :return:
        """
        # 需要登录,使用FormRequest.from_response模拟登录
        if 'id="lsform"' in response.body:
            logging.info('in parse, need to login, url: {0}'.format(response.url))
            form_data = {'handlekey': 'ls', 'quickforward': 'yes', 'username': '******', 'password': '******'}
            request = FormRequest.from_response(response=response,
                                                headers=self.headers,
                                                formxpath='//form[contains(@id, "lsform")]',
                                                formdata=form_data,
                                                callback=self.parse_list
                                                )
        else:
            logging.info('in parse, NOT need to login, url: {0}'.format(response.url))
            request = Request(url=response.url,
                              headers=self.headers,
                              callback=self.parse_list,
                              )

        yield request
 def parse(self, response):
     form_data = {'username': '******', 'password': '******', 'remember_me': '1'}
     return FormRequest.from_response(response,
                                      headers=self.headers,
                                      formxpath='//form[@class="form-login"]',
                                      formdata=form_data,
                                      callback=self.after_login,
                                      )
 def parse(self, response):
     yield FormRequest.from_response(
         response,
         formname='aspnetForm',
         formdata={'Skin$body$FundingSourceChoices$0': '1',
                   'Skin$body$FundingSourceChoices$1': '0'},
         meta={'curr_listing_page': 1,  'flag': False},
         callback=self.after_login)
示例#6
0
 def parse(self, response):
     yield FormRequest.from_response(response,
                                     formdata={
                                         'tanggal': '20160817#Rabu, 17 Agustus 2016',
                                         'origination': 'KAC#KIARACONDONG',
                                         'destination': 'MN#MADIUN',
                                         'adult': '1',
                                         'infant': '0'
                                     },
                                     callback=self.parseInfo)
 def parse(self, response):
     login_form = {
         'login': self.username,
         'password': self.password,
     }
     return FormRequest.from_response(
         response,
         formdata=login_form,
         callback=self.after_login
     )
    def parse(self, response) :
        return FormRequest.from_response(response,
	    headers = self.headers,
	    formdata = {
	        'username' : 'kytj1',
	        'password' : '6ujBJ4XQyLeGmJmB'
	    },
	    callback = self.after_login,
	    dont_filter = True
        )
示例#9
0
    def login(self,response):
        # login = requests.post(response.url,
        #                       headers = self.headers,
        #                       data={
        #                              'source':'None',
        #                              'redir':'https://www.douban.com/people/60012975/',
        #                              'form_email':'*****@*****.**',
        #                              'form_password':'******',
        #
        #                              'remember':'on',
        #                              'login':u'登录'
        #                       })

        hxs = Selector(response)
        if hxs.xpath('//*[@name="captcha-id"]/@value').extract():
            captchaID = hxs.xpath('//*[@name="captcha-id"]/@value').extract()[0]
            captchAdd = hxs.xpath('//*[@id="captcha_image"]/@src').extract()[0]
            urllib.urlretrieve(captchAdd,'captcha.jpg')
            captch = raw_input('please input the captcha:')
            yield FormRequest.from_response(response,
                                            meta =response.meta,
                                            # headers = self.headers,
                                            formdata={'source':'None',
                                                      'redir':'https://www.douban.com/people/unlucky_strike/',
                                                      'form_email':'*****@*****.**',
                                                      'form_password':'******',
                                                      'captcha-solution':captch,
                                                      'captcha-id':captchaID,
                                                      'remember':'on',
                                                      'login':u'登录'},
                                            callback=self.parse)
        else:
            yield FormRequest.from_response(response,
                                            meta ={'cookiejar':response.meta['cookiejar']},
                                            # headers = self.headers,
                                            formdata={'source':'None',
                                                      'redir':'https://www.douban.com/people/unlucky_strike/',
                                                      'form_email':'*****@*****.**',
                                                      'form_password':'******',
                                                      'remember':'on',
                                                      'login':u'登录'},
                                            callback=self.parse)
示例#10
0
 def parse(self, response):
     '''Parse login page'''
     return FormRequest.from_response(
         response,
         formxpath='//form[contains(@action, "login")]',
         formdata={
             'email': self.username,
             'pass': self.password,
         },
         callback=self.parse_home,
     )
示例#11
0
    def _get_page_request(self, response, page, date):

        request = FormRequest.from_response(
            response,
            formdata={"txtDesde": date, "__EVENTTARGET": "gvwConsulta", "__EVENTARGUMENT": "Page${}".format(page)},
            dont_filter=True,
            callback=self.parse,
        )

        request.meta["date"] = date

        return request
示例#12
0
 def currency_form(self, response):
     """
     Currency form viewed and change to USD posted.
     """
     self.log('currency_form', level=logging.INFO)
     formdata = {
         'ddlCountry1': 'United States',
         'ddlCurrency': '503329C6-40CB-47E6-91D1-9F11AF63F706'
     }
     return FormRequest.from_response(response,
                                      formdata=formdata,
                                      callback=self.currency_changed)
示例#13
0
    def parse_initial_request(self, response):
        date = response.meta["date"]

        request = FormRequest.from_response(
            response,
            formdata={"txtDesde": date, "btnBuscar.x": "1", "btnBuscar.y": "1"},
            dont_filter=True,
            callback=self.parse_page,
        )

        request.meta["date"] = date

        yield request
示例#14
0
 def parse(self, response):
     """
     Overwrites Spiders parse method. Fill in log in details in log in form and submit.
     :return:
     """
     print('custom settings:')
     print(self._settings)
     return FormRequest.from_response(
         response,
         formxpath='//div[contains(concat(" ", normalize-space(@class), " "), " main-container ")]/descendant::form',
         formdata={'EmailOrUsername': self._settings['username'], 'Password': self._settings['password']},
         callback=self.go_to_search_site
     )
示例#15
0
 def parse_start_url(self, response):
     sel=Selector(response)
     passwd=sel.xpath(r'/html/body/div[2]/form/div/input[2]/@name').extract_first()
     captchaUrl=sel.xpath(r'/html/body/div[2]/form/div/img[1]/@src').extract_first()
     code=requests.get(captchaUrl)
     with open('/home/shichangtai/code.gif','wb') as f:
         f.write(code.content)
     captcha=raw_input('请输入验证码: ')
     #此次的meta是第一次请求获取cookie,以后每次的请求都讲传送这个cookie_jar
     return [FormRequest.from_response(response=response,
                 formdata={'mobile':self.account,passwd:self.password,'code':captcha},
                 meta = {'cookiejar':1},#不要设置'dont_merge_cookies'为True
                 callback=self.after_log)]
示例#16
0
 def parse_home(self, response):
     '''Parse user news feed page'''
     if response.css('#approvals_code'):
         # Handle 'Approvals Code' checkpoint (ask user to enter code).
         if not self.code:
             # Show facebook messages via logs
             # and request user for approval code.
             message = response.css('._50f4::text').extract()[0]
             self.log(process_string(message))
             message = response.css('._3-8y._50f4').xpath('string()').extract()[0]
             self.log(process_string(message))
             self.code = input('Enter the code: ')
         self.code = str(self.code)
         if not (self.code and self.code.isdigit()):
             self.log('Bad approvals code detected.')
             return
         return FormRequest.from_response(
             response,
             formdata={'approvals_code': self.code},
             callback=self.parse_home,
         )
     elif response.css('input#u_0_1'):
         # Handle 'Save Browser' checkpoint.
         return FormRequest.from_response(
             response,
             formdata={'name_action_selected': 'dont_save'},
             callback=self.parse_home,
             dont_filter=True,
         )
     elif response.css('button#checkpointSubmitButton'):
         # Handle `Someone tried to log into your account` warning.
         return FormRequest.from_response(
             response, callback=self.parse_home, dont_filter=True,)
     # Else go to the user profile.
     href = response.css('a[title="Profile"]::attr(href)').extract()[0]
     return Request(
         response.urljoin(href),
         callback=self.parse_profile,
     )
示例#17
0
 def _get_page_request(self, response, page, date):
     request = FormRequest.from_response(
         response,
         formdata={
             'txtDesde': date,
             '__EVENTTARGET': 'gvwConsulta',
             '__EVENTARGUMENT': 'Page${}'.format(page),
         },
         dont_filter=True,
         callback=self.parse,
     )
     request.meta['date'] = date
     return request
示例#18
0
 def parse_category(self, response):
     items = response.xpath('//div[@class="datagrid"]//tr')
     for item in items:
         product = item.xpath('td//font/b/span[contains(@id, "main_GDVMain_lblProductName")]/text()').extract()
         #price = item.xpath('td//font/b/span[contains(@id, "main_GDVMain_lblHarga")]/text()').extract()
         #link = 
         if (len(product) > 0):
             print product
             
         pages = item.xpath('td[@colspan="3"]//a/@href').re("doPostBack\(([^)]+')")
         if len(pages) > 0:
             for page in pages:
                 yield FormRequest.from_response(response, formdata={'__EVENTTARGET': eventtarget, '__EVENTARGUMENT': eventargument}, callback = self.parse_items, dont_click = True)
示例#19
0
	def login(self, response):
		"""
		Attempts to login
		"""
		return FormRequest.from_response(response,
			formxpath="//form[@id=\"login_top\"]",
			formdata={
				# "form_sent": "1",
				# "redirect_url": "/index.php",
				"req_username": username,
				"req_password": password,
			},
			callback=self.request_threads
		)
示例#20
0
    def parse(self, response):
        inspect_response(response)
        all_provinces_value = response.xpath(
            '//select[@name="propinsi"]/option[contains(., "All")]/@value'
        ).extract()[0]

        return FormRequest.from_response(
            response,
            formdata={
                'propinsi': all_provinces_value,
                'keyword': '',
                'submit': 'search!',
            },
            callback=self.parse_list
        )
示例#21
0
    def parse_captcha(self, response):
        with open('captcha.jpg', 'wb') as f:
            f.write(response.body)

        subprocess.call(["open", "captcha.jpg"])

        answer = input("Digite o captcha, seu lindo!: ")

        captcha_page = response.meta['captcha_page']

        yield FormRequest.from_response(
            captcha_page,
            "formConsultaPublica",
            formdata={'captcha': answer, 'numeroProcesso':'1'},
            callback=self.parse_captcha_result)
示例#22
0
 def parse_initial_request(self, response):
     date_str = response.meta['date']
     request = FormRequest.from_response(
         response,
         formdata={
             'txtFecha': date_str,
             'txtFechaF': date_str,
             'btnListar': 'Listar',
             'DDLFuncionario': '',
         },
         dont_filter=True,
         dont_click=True,
         callback=self.parse_pages,
     )
     request.meta['date'] = date_str
     yield request
示例#23
0
    def _request_initial_date_page(self, response, date_str, callback):
        formdata = {
            'TxtFecha': date_str,
            'BtnBuscar': 'Buscar'
        }

        request = FormRequest.from_response(response,
                                            formdata=formdata,
                                            dont_click=True,
                                            dont_filter=True,
                                            callback=callback
                                            )

        request.meta['date'] = date_str
        request.meta['current_page'] = 1
        return request
示例#24
0
 def parse_home(self, response):
     # goes through the 'save device' part by not saving device
     if response.xpath("//div/a[contains(@href,'save-device')]"):
         self.logger.info('"save-device" checkpoint. redirecting...')
         return FormRequest.from_response(
             response,
             formdata={'name_action_selected': 'dont_save'},
             callback=self.parse_home)
     profile_url = self.get_element(
         response.xpath('//a[contains(text(), "Profile")]/@href').extract())
     friends_url = self.clean_url(profile_url) + '/friends'
     # if "don't save" is selected, go on to profile page
     return scrapy.Request(
         url=friends_url,
         callback=self.parse_profile,
     )
示例#25
0
    def parse_initial_request(self, response):
        date = response.meta['date']
        request = FormRequest.from_response(
            response,
            formdata={
                'txtDesde': date,
                'btnBuscar.x': '62',
                'btnBuscar.y': '15',
            },
            dont_filter=True,
            callback=self.parse_page,
        )

        request.meta['date'] = date

        yield request
示例#26
0
    def _get_page_request(self, response, page, date):

        request = FormRequest.from_response(
            response,
            formdata={
                'txtDesde': date,
                '__EVENTTARGET': 'gvwConsulta',
                '__EVENTARGUMENT': 'Page${}'.format(page),
            },
            dont_filter=True,
            callback=self.parse,
        )

        request.meta['date'] = date

        return request
示例#27
0
    def _request_page(self, response, page_number, date_str, callback):
        request = FormRequest.from_response(response,
                                            formdata={
                                                'txtFecha': date_str,
                                                'txtFechaF': date_str,
                                                'DDLFuncionario': '[ -- Seleccione Funcionario o Empleado -- ]',
                                                '__EVENTTARGET': 'DTGVisitas',
                                                '__EVENTARGUMENT': 'Page${}'.format(page_number),
                                            },
                                            dont_filter=True,
                                            dont_click=True,
                                            callback=callback,
                                            )

        request.meta['date'] = date_str
        return request
示例#28
0
 def parse_initial_request(self, response):
     date_str = response.meta['date']
     request = FormRequest.from_response(
         response,
         formdata={
             'txtFecha': date_str,
             'txtFechaF': date_str,
             'btnListar': 'Listar',
             'DDLFuncionario': '',
         },
         dont_filter=True,
         dont_click=True,
         callback=self.parse_pages,
     )
     request.meta['date'] = date_str
     yield request
示例#29
0
文件: zhihu.py 项目: Fly365/py-learn
 def post_login(self, response):
     print("preparing login")
     # 用于抓取请求网页后返回中的_xsrf 字段的文字,用于成功提交表单
     xsrf = Selector(response).xpath("//input[@name='_xsrf']/@value").extract()[0]
     print(xsrf)
     # formRequest.from_response是scrapy提供的一个函数,用于POST表单
     # 登录成功后,会调用 after_login 回调函数
     return [FormRequest.from_response(response,
                                       meta = {"cookiejar":response.meta["cookiejar"]},
                                       headers = self.headers,
                                       formdata= {
                                           "_xsrf":xsrf,
                                           "email": "*****@*****.**",
                                           "password":"******"
                                       },
                                       callback = self.after_login,
                                       dont_filter = True)]
示例#30
0
    def parse_initial_request(self, response):
        date = response.meta['date']

        request = FormRequest.from_response(
            response,
            formdata={
                'txtDesde': date,
                'btnBuscar.x': '1',
                'btnBuscar.y': '1',
            },
            dont_filter=True,
            callback=self.parse_page,
        )

        request.meta['date'] = date

        yield request
def _check_response(response, callback, **kwargs):
    # Handle 'save-device' redirection
    if response.xpath("//div/a[contains(@href,'save-device')]"):
        return FormRequest.from_response(
            response,
            formdata={'name_action_selected': 'dont_save'},
            callback=lambda res: callback(),
            **kwargs)

    # Handle GDPR redirection
    if response.xpath("//div/a[contains(@href,'consent_step')]"):
        return _handle_gdpr_consent_step(response, callback, **kwargs)

    if '/login' in response.url:
        raise CloseSpider('login_failed')

    return callback()
示例#32
0
    def parse_start_url(self, response):
        # 如果你登录的有验证码之类的,你就可以在此处加入各种处理方法;
        # 比如提交给打码平台,或者自己手动输入、再或者pil处理之类的

        formdate = {
            'log': 'account',
            'pwd': 'password',
            'rememberme': "forever",
            'wp-submit': "登录",
            'redirect_to': "http://www.haoduofuli.wang/wp-admin/",
            'testcookie': "1"
        }
        return [
            FormRequest.from_response(response,
                                      formdata=formdate,
                                      callback=self.after_login)
        ]
 def parse(self, response):
     """通过公司搜索页查找待用公司信息"""
     for i in range(26):  # 对应字母表搜索公司
         form_req = FormRequest.from_response(
             response,
             self.search_corp,
             formdata={'searchIndex': str(i)},
             headers={'Referer': self.search_index},
             callback=self.parse_companies,
             errback=self.errback_scraping,
             meta={
                 'seq': i,
                 'first': True,
                 'current': 1
             },
         )
         yield form_req.replace(url=self.search_corp)
示例#34
0
    def _request_page(self, response, page_number, date_str, callback):
        request = FormRequest.from_response(
            response,
            formdata={
                'txtFecha': date_str,
                'txtFechaF': date_str,
                'DDLFuncionario': '',
                '__EVENTTARGET': 'DTGVisitas',
                '__EVENTARGUMENT': 'Page${}'.format(page_number),
            },
            dont_filter=True,
            dont_click=True,
            callback=callback,
        )

        request.meta['date'] = date_str
        return request
示例#35
0
    def login(self, response):
        logging.debug("in login()")
        image = Image.open(BytesIO(response.body))
        top = tkinter.Tk()
        top.title("Solving Captcha")
        top.geometry("400x200")
        img = ImageTk.PhotoImage(image, size="400x200")
        imagelabel = tkinter.Label(top, image=img)
        textentry = tkinter.Entry(top, font="Helvetica 20 bold")
        textentry.focus_set()

        original_response = response.meta['original response']
        empty_inputs = original_response.selector.xpath(
            "//form/div[@class='formInputs']/div/input[not(@value) or @value='']"
        ).extract()

        if len(empty_inputs) != 3:
            with open('strange.html', 'wb') as f:
                f.write(original_response.body)

        username_key = Selector(
            text=empty_inputs[0]).xpath(".//input/@name").extract_first()
        password_key = Selector(
            text=empty_inputs[1]).xpath(".//input/@name").extract_first()
        captcha_key = Selector(
            text=empty_inputs[2]).xpath(".//input/@name").extract_first()

        formdata = {
            username_key: json.load(open('config'))['username'],
            password_key: json.load(open('config'))['password']
        }

        def callback(en):
            formdata[captcha_key] = textentry.get()
            top.destroy()

        textentry.bind("<Return>", callback)
        imagelabel.pack(side="top", fill="both", expand="yes")
        textentry.pack(side="bottom", fill="both", expand="yes")
        top.mainloop()

        logging.debug('applying formdata in original response {}'.format(
            original_response))
        yield FormRequest.from_response(original_response,
                                        formdata=formdata,
                                        callback=self.click_all_drugs)
示例#36
0
    def parse_captcha(self, response):
        with open('captcha.jpg', 'wb') as f:
            f.write(response.body)

        subprocess.call(["open", "captcha.jpg"])

        answer = input("Digite o captcha, seu lindo!: ")

        captcha_page = response.meta['captcha_page']

        yield FormRequest.from_response(captcha_page,
                                        "formConsultaPublica",
                                        formdata={
                                            'captcha': answer,
                                            'numeroProcesso': '1'
                                        },
                                        callback=self.parse_captcha_result)
示例#37
0
 def login(self, response):
     time.sleep(6)
     data = {
         "pwuser": "******",
         "pwpwd": "1qaz2wsx",
         "question":"0",
         "step":"2",
         "lgt":"0",
         "customquest":"",
         "answer":"",
         "head_login":"",
         "jumpurl":"http://bbs.lcdhome.net/index.php"
     }
     return [FormRequest.from_response(response,
                                       meta={'cookiejar':response.meta['cookiejar']},
                                       headers=self.send_headers,
                                       formdata=data,
                                       callback=self.after_login)]
示例#38
0
 def parse_start_url(self, response):
     """
     如果你登录的有验证码之类的,你就可以在此处加入各种处理方法;
     比如提交给打码平台,或者自己手动输入、再或者pil处理之类的
     """
     formdate = {
         "log": account,
         "pwd": password,
         "remenberme": "forever",
         "wp-submit": "登录",
         "redirect_to": "http://www.haoduofuli.wang/wp-admin/",
         "testcooke": "1"
     }
     return [
         FormRequest.from_response(response,
                                   formdata=formdate,
                                   callback=self.after_login)
     ]
示例#39
0
 def tongshi_1(self, response):
     for course_type in ['02', '03', '04', '05']:
         et_str = 'gridGModule$ctl' + course_type + '$radioButton'
         params = {et_str: 'radioButton', '__EVENTTARGET': et_str}
         item = Course()
         item['course_type'] = course_type
         # yield Request(url=TEST_RENWEN_URL,
         #         dont_filter=True,
         #         meta= {'item':item},
         #         callback = self.tongshi_2
         # )
         yield FormRequest.from_response(response,
                                         dont_filter=True,
                                         url=ELECT_URL +
                                         'speltyCommonCourse.aspx',
                                         formdata=params,
                                         meta={'item': item},
                                         callback=self.tongshi_2)
 def login(self, response):
     """
     logs into the forums
     :param response:
     :return:
     """
     return FormRequest.from_response(
         response,
         #
         #
         # !!! TAKE THIS OUT IF YOU DISTRIBUTE THIS !!!
         formdata={
             'username': self.uname,
             'password': self.password,
             'checked': 'checked'
         },  # "checked" is the "use https" checkbox, dgaf
         formxpath='//form[@class="login_form"]',
         callback=self.verify_login)
示例#41
0
 def parse(self, response):
     view_state = response.xpath("//input[@id='__VIEWSTATE']/@value").get()
     view_state_generator = response.xpath(
         "//input[@id='__VIEWSTATEGENERATOR']/@value").get()
     yield FormRequest.from_response(response,
                                     formdata={
                                         '__EVENTTARGET': 'btnSearch',
                                         '__VIEWSTATE': view_state,
                                         '__VIEWSTATEGENERATOR':
                                         view_state_generator,
                                         'today': '20210121',
                                         'sortBy': 'shareholding',
                                         'sortDirection': 'desc',
                                         'txtShareholdingDate':
                                         '2021/01/20',
                                         'txtStockCode': '00001'
                                     },
                                     callback=self.parse_item)
示例#42
0
    def post_login(self, response):
        print 'Preparing login'
        #下面这句话用于抓取请求网页后返回网页中的_xsrf字段的文字, 用于成功提交表单
        xsrf = response.xpath('//input[@name="_xsrf"]/@value').extract()[0]

        #FormRequeset.from_response是Scrapy提供的一个函数, 用于post表单
        #登陆成功后, 会调用after_login回调函数
        return [FormRequest.from_response(response,   #"http://www.zhihu.com/#signin",
                            # meta={'cookiejar': response.meta['cookiejar']},
                            headers=self.headers,  #注意此处的headers
                            formdata={
                                '_xsrf': xsrf,
                                'email': EMAIL,
                                'password': PASSWORD
                            },
                            cookies=self.cookies,
                            callback=self.after_login,
                            )]
示例#43
0
    def parse(self, response):
        """Submit a gov. resolution form for every given gov. number and parse.

        Currently there are 6 available governments,
        and their buttons are confusingly numbered 0 to 5.

        This function submits a form request for each government separately,
        and paginates over results per gov.
        """
        # iterate over given gov. indexes
        # and scrape each one
        for i in self.gov_indexes:
            # submit form request,
            # requesting all pages from given government number
            # using previously received session headers
            yield FormRequest.from_response(
                response,
                formdata={self.gov_number_header_key % i: self.gov_number_header_value},
                callback=self.parse_form_result)
示例#44
0
    def parse(self, resp):
        try:
            import credentials
            username, password = credentials.USERNAME, credentials.PASSWORD
        except ImportError:
            import getpass
            print "NO CREDENTIALS"
            username = raw_input("Username (Email): ").strip()
            password = getpass.getpass("Password: "******"//form[contains(@class,'poeForm')]",
            formdata={
                'login_email': username,
                'login_password': password,
            },
            callback=self.after_login,
        )
示例#45
0
    def parse(self, response):
        self.id_number = urlparse(response.url).query

        login_url = \
            'https://login.digdag.nl/partououd/ouder?%PARAM%-1.IFormSubmitListener-form'\
                .replace('%PARAM%', self.id_number)

        return [
            FormRequest.from_response(response,
                                      url=login_url,
                                      formdata={
                                          'loginpanel:gebruikersnaam':
                                          USERNAME,
                                          'loginpanel:wachtwoord': PASSWORD,
                                          'loginpanel:rememberme': 'False',
                                          'loginpanel:inloggen': 'x',
                                      },
                                      callback=self.after_login)
        ]
示例#46
0
 def start_login(self, response):
     return [
         FormRequest.from_response(
             response,
             url=
             'https://weimaqi.net/admin_mchm_new/control/Handler.ashx?action=login',
             method='POST',
             formdata={
                 'mch_acc':
                 self.uid,
                 'mch_pwd':
                 hashlib.md5(hashlib.md5(
                     self.pwd).hexdigest().upper()).hexdigest(),
                 'auto':
                 'false'
             },
             callback=self.check_info,
             dont_filter=True)
     ]
示例#47
0
文件: zhihu.py 项目: joeeeeee/python
 def post_login(self, response):
     #下面这句话用于抓取请求网页后返回网页中的_xsrf字段的文字, 用于成功提交表单
     xsrf = Selector(response).xpath(
         '//input[@name="_xsrf"]/@value').extract()[0]
     #FormRequeset.from_response是Scrapy提供的一个函数, 用于post表单
     #登陆成功后, 会调用after_login回调函数
     return [
         FormRequest.from_response(
             response,  #"http://www.zhihu.com/login",
             meta={'cookiejar': response.meta['cookiejar']},
             headers=self.headers,  #注意此处的headers
             formdata={
                 '_xsrf': xsrf,
                 'email': '*****@*****.**',
                 'password': '******'
             },
             callback=self.after_login,
             dont_filter=True)
     ]
示例#48
0
 def post_login(self, response):
     # 登陆成功后, 会调用after_login回调函数
     print 'Preparing login'
     # 下面这句话用于抓取请求网页后返回网页中的_xsrf字段的文字, 用于成功提交表单
     a = response
     family = Selector(response).xpath('//input[@name="family"]/@value').extract()[0]
     print family
     return FormRequest.from_response(response,
                                      meta={
                                          'cookiejar': response.meta['cookiejar']
                                      },
                                      headers=self.headers,
                                      formdata={
                                          'family': '',
                                          'username': '******',
                                          'password': '******',
                                          'remember': 1
                                      },
                                      callback=self.after_login
                                      )
示例#49
0
 def post_login(self, response):
     print 'Preparing login'
     #下面这句话用于抓取请求网页后返回网页中的_xsrf字段的文字, 用于成功提交表单
     # xsrf = Selector(response).xpath('//input[@name="_xsrf"]/@value').extract()[0]
     # print xsrf
     #FormRequeset.from_response是Scrapy提供的一个函数, 用于post表单
     #登陆成功后, 会调用after_login回调函数
     return [
         FormRequest.from_response(
             response,  #"http://www.zhihu.com/login",
             meta={'cookiejar': response.meta['cookiejar']},
             headers=self.headers,  #注意此处的headers
             formdata={
                 # '_xsrf': xsrf,
                 'username': '******',
                 'password': '******'
             },
             callback=self.after_login,
             dont_filter=True)
     ]
示例#50
0
文件: login.py 项目: eilinge/scrapy
    def parse_login(self, response):
        #pdb.set_trace()
        next = response.xpath('//input[@name="_next"]/@value').extract()[0]
        formname = response.xpath(
            '//input[@name="_formname"]/@value').extract()[0]
        formkey = response.xpath(
            '//input[@name="_formkey"]/@value').extract()[0]
        formdata = {
            'email': '*****@*****.**',
            'password': '******',
            '_next': next,
            '_formkey': formkey,
            '_formname': formname,
        }

        yield FormRequest.from_response(
            response,
            formdata=formdata,
            meta={'cookiejar': response.meta['cookiejar']},  #注意这里cookie的获取
            callback=self.parse_after)
示例#51
0
    def parse_shipping(self, response):
        free_delivery_over = response.xpath('//ul[@class="uspContent"]/li/text()').re(u'Free Standard Delivery over \xa3(.*)')
        if free_delivery_over and self.exchange_rate:
            self.free_delivery_over = extract_price(free_delivery_over[0]) * self.exchange_rate
            self.log('Free delivery over {} USD'.format(self.free_delivery_over))
        else:
            self.free_delivery_over = 0
        formdata = {}
        formdata['__VIEWSTATEGENERATOR'] = response.xpath('//input[@name="__VIEWSTATEGENERATOR"]/@value').extract()[0]
        formdata['__EVENTVALIDATION'] = response.xpath('//input[@name="__EVENTVALIDATION"]/@value').extract()[0]
        formdata['__VIEWSTATE'] = response.xpath('//input[@name="__VIEWSTATE"]/@value').extract()[0]
        formdata['__EVENTTARGET'] = ''
        formdata['ctl00$ScriptManager1'] = ''
        formdata['ctl00$ContentMain$product_details1$dd_quantity'] = '1'
        formdata['ctl00$ContentMain$product_details1$imgbtn_addToBasket.x'] = '0'
        formdata['ctl00$ContentMain$product_details1$imgbtn_addToBasket.y'] = '0'

        req = FormRequest.from_response(response, formname='aspnetForm', formdata=formdata,
                                        callback=self.parse_shipping1)
        yield req
示例#52
0
    def login(self, response):
        """
        Login with the credentials provided at instantiation
        :param response: the /login page response
        :return: a FormRequest with the credentials to login, calling `login_callback`
        """
        form_kwargs = {
            'formxpath': '//form[@class="form-signin"]',
            'formdata': {
                'username': self.credential.username,
                'password': self.credential.password
            },
            'clickdata': {
                'type': 'submit'
            }
        }

        return FormRequest.from_response(response,
                                         callback=self.login_callback,
                                         **form_kwargs)
    def parse(self, response):
        for element in response.css("#corpo table tr"):
            if element.css("th").extract():
                continue

            date = element.css(self.GAZETTE_DATE_CSS).extract_first()
            date = dateparser.parse(date, languages=["pt"]).date()
            url = element.css(self.GAZETTE_URL_CSS).extract_first()
            gazette_title = element.css(self.GAZETTE_NAME_CSS).extract_first()
            is_extra = "Extra" in gazette_title

            yield Gazette(
                date=date,
                file_urls=[url],
                is_extra_edition=is_extra,
                territory_id=self.TERRITORY_ID,
                power="executive_legislature",
                scraped_at=datetime.utcnow(),
            )

        for element in response.css(self.NEXT_PAGE_LINK_CSS):
            if not element.css("a::text").extract_first() == "Próxima":
                continue

            event_target = element.css("a::attr(href)")
            event_target = event_target.re(
                self.JAVASCRIPT_POSTBACK_REGEX).pop()

            yield FormRequest.from_response(
                response,
                callback=self.parse,
                formname="aspnetForm",
                formxpath="//form[@id='aspnetForm']",
                formdata={
                    "__EVENTARGUMENT": "",
                    "__EVENTTARGET": event_target
                },
                dont_click=True,
                dont_filter=True,
                method="POST",
            )
    def login_to_library(self, response):
        csv_lines = response.meta['csv_lines']

        login_form = response.xpath(self.login_form_xpath)
        if login_form:
            credentials = {
                'user': self.settings['CREDENTIALS']['user'],
                'pass': self.settings['CREDENTIALS']['pass'],
            }
            yield FormRequest.from_response(response,
                                            formxpath=self.login_form_xpath,
                                            formdata=credentials,
                                            meta={'csv_lines': csv_lines},
                                            dont_filter=True,
                                            callback=self.parse_csv_lines)
        else:
            # If no login required
            yield scrapy.Request(response.url,
                                 dont_filter=True,
                                 meta={'csv_lines': csv_lines},
                                 callback=self.parse_csv_lines)
示例#55
0
文件: ces.py 项目: PubFork/Myrepo
    def post_login(self, response):
        """
        登录的页面请求成功后,解析响应的页面,获取登录需要的<input>标签的信息
        :param response: 登录接口返回的页面
        :return:
        """

        username = '******'
        password = '******'

        data = {
            'form_email': username,
            'form_password': password,
        }

        # 发送FormRequest表单请求
        return FormRequest.from_response(response=response,
                                         meta={'cookiejar': response.meta['cookiejar']},
                                         headers=self.post_headers,
                                         formdata=data,
                                         callback=self.after_login)
示例#56
0
    def login(cls, response: HtmlResponse):
        _auth_info = json.loads(response.xpath('//input[@id="init-config"]/@value').extract()[0])
        # print(_auth_info)

        # # App.instance().queue().put("qerqerqerqer")
        formdata = {
            'captcha': '',
            'g_recaptcha_response': '',
            'post_key': _auth_info['pixivAccount.postKey'],
            'source': 'pc',
            'ref': 'wwwtop_accounts_index'
        }
        formdata.update({
            'pixiv_id': '',
            'password': '',
        })
        return FormRequest.from_response(
            response,
            formdata=formdata,
            callback=cls.success,
        )
示例#57
0
 def parse(self, response):
     '''
         parse : parse function
         :param response: response
         :type response: object
         :return yield: iterator
         :rtype: iterator
     '''
     print('\n')
     csrf_token = response.xpath(
         "//input[@id='connection__token']/@value").get()
     yield FormRequest.from_response(response,
                                     formxpath="//form[@name='connection']",
                                     formdata={
                                         'connection[_token]': csrf_token,
                                         'connection[mail]': self.user,
                                         'connection[password]':
                                         self.password,
                                         'connection[stay_connected]': '1'
                                     },
                                     callback=(self.after_login))
示例#58
0
 def parse(self, response):
     """ override level 0 """
     item = response.meta['item']
     sel = Selector(response)
     # get asp cookie contents for sumbit form
     contents = {
         '__EVENTTARGET': '',
         '__EVENTARGUMENT': '',
         '__VIEWSTATE': sel.xpath('.//input[@id="__VIEWSTATE"]/@value').extract()[0],
         '__EVENTVALIDATION': sel.xpath('.//input[@id="__EVENTVALIDATION"]/@value').extract()[0],
         'HiddenField_spDate': '',
         'HiddenField_page': 'PAGE_BS',
         'txtTASKNO': item['stockid'],
         'hidTASKNO': '',
         'btnOK': u'查詢'
     }
     # register next response handler after sumbit form
     request = FormRequest.from_response(
         response,
         formdata=contents,
         callback=self.parse_after_form_submit,
         dont_filter=True)
     request.meta['item'] = item
     yield request
示例#59
0
 def search_form_clicked(self, response):
 # here you would extract links to follow and return Requests for
 # each of them, with another callback
     yield FormRequest.from_response(response, formnumber=1)