def parse(self, response) : # test_urls = [ # "http://ntiaoji.kaoyan.com/tjadm/1.html", # "http://ntiaoji.kaoyan.com/tjadm/2.html", # "http://ntiaoji.kaoyan.com/tjadm/3.html", # "http://ntiaoji.kaoyan.com/tjadm/4.html", # "http://ntiaoji.kaoyan.com/tjadm/5.html", # "http://ntiaoji.kaoyan.com/tjadm/6.html", # "http://ntiaoji.kaoyan.com/tjadm/7.html" # ] # # for url in test_urls : # print url # time.sleep(2) # self.headers['Referer'] = url # yield FormRequest.from_response(response, # headers = self.headers, # formdata = { # 'username' : 'kytj1', # 'password' : '6ujBJ4XQyLeGmJmB' # }, # callback = self.download_page, # dont_filter = True # ) return FormRequest.from_response(response, headers = self.headers, formdata = { 'username' : 'kytj1', 'password' : '6ujBJ4XQyLeGmJmB' }, callback = self.after_login, dont_filter = True )
def _request_next_page(self, response, date_str, callback): current_page = int(response.meta['current_page']) total_string = response.css('#LblTotal').xpath('./text()').extract_first(default='') total = re.search(r'(\d+)', total_string) if total: # Deal with the next page. total = total.group(1) number_of_pages = self._get_number_of_pages(int(total)) if current_page < number_of_pages: current_page += 1 formdata = { 'TxtFecha': date_str, 'BtnBuscar': 'Buscar', 'LwVisitasCR$DpVisitasCR$ctl02$ctl00.x': '1', 'LwVisitasCR$DpVisitasCR$ctl02$ctl00.y': '1' } request = FormRequest.from_response(response, formdata=formdata, dont_click=True, dont_filter=True, callback=callback, ) request.meta['date'] = date_str request.meta['current_page'] = current_page return request
def parse(self, response): """ 这是默认的回调方法,得到response后: 1. 如果需要登录,则先通过FormRequest登录论坛; 2. 如果不需要登录,通过Request继续请求; :param response: :return: """ # 需要登录,使用FormRequest.from_response模拟登录 if 'id="lsform"' in response.body: logging.info('in parse, need to login, url: {0}'.format(response.url)) form_data = {'handlekey': 'ls', 'quickforward': 'yes', 'username': '******', 'password': '******'} request = FormRequest.from_response(response=response, headers=self.headers, formxpath='//form[contains(@id, "lsform")]', formdata=form_data, callback=self.parse_list ) else: logging.info('in parse, NOT need to login, url: {0}'.format(response.url)) request = Request(url=response.url, headers=self.headers, callback=self.parse_list, ) yield request
def parse(self, response): form_data = {'username': '******', 'password': '******', 'remember_me': '1'} return FormRequest.from_response(response, headers=self.headers, formxpath='//form[@class="form-login"]', formdata=form_data, callback=self.after_login, )
def parse(self, response): yield FormRequest.from_response( response, formname='aspnetForm', formdata={'Skin$body$FundingSourceChoices$0': '1', 'Skin$body$FundingSourceChoices$1': '0'}, meta={'curr_listing_page': 1, 'flag': False}, callback=self.after_login)
def parse(self, response): yield FormRequest.from_response(response, formdata={ 'tanggal': '20160817#Rabu, 17 Agustus 2016', 'origination': 'KAC#KIARACONDONG', 'destination': 'MN#MADIUN', 'adult': '1', 'infant': '0' }, callback=self.parseInfo)
def parse(self, response): login_form = { 'login': self.username, 'password': self.password, } return FormRequest.from_response( response, formdata=login_form, callback=self.after_login )
def parse(self, response) : return FormRequest.from_response(response, headers = self.headers, formdata = { 'username' : 'kytj1', 'password' : '6ujBJ4XQyLeGmJmB' }, callback = self.after_login, dont_filter = True )
def login(self,response): # login = requests.post(response.url, # headers = self.headers, # data={ # 'source':'None', # 'redir':'https://www.douban.com/people/60012975/', # 'form_email':'*****@*****.**', # 'form_password':'******', # # 'remember':'on', # 'login':u'登录' # }) hxs = Selector(response) if hxs.xpath('//*[@name="captcha-id"]/@value').extract(): captchaID = hxs.xpath('//*[@name="captcha-id"]/@value').extract()[0] captchAdd = hxs.xpath('//*[@id="captcha_image"]/@src').extract()[0] urllib.urlretrieve(captchAdd,'captcha.jpg') captch = raw_input('please input the captcha:') yield FormRequest.from_response(response, meta =response.meta, # headers = self.headers, formdata={'source':'None', 'redir':'https://www.douban.com/people/unlucky_strike/', 'form_email':'*****@*****.**', 'form_password':'******', 'captcha-solution':captch, 'captcha-id':captchaID, 'remember':'on', 'login':u'登录'}, callback=self.parse) else: yield FormRequest.from_response(response, meta ={'cookiejar':response.meta['cookiejar']}, # headers = self.headers, formdata={'source':'None', 'redir':'https://www.douban.com/people/unlucky_strike/', 'form_email':'*****@*****.**', 'form_password':'******', 'remember':'on', 'login':u'登录'}, callback=self.parse)
def parse(self, response): '''Parse login page''' return FormRequest.from_response( response, formxpath='//form[contains(@action, "login")]', formdata={ 'email': self.username, 'pass': self.password, }, callback=self.parse_home, )
def _get_page_request(self, response, page, date): request = FormRequest.from_response( response, formdata={"txtDesde": date, "__EVENTTARGET": "gvwConsulta", "__EVENTARGUMENT": "Page${}".format(page)}, dont_filter=True, callback=self.parse, ) request.meta["date"] = date return request
def currency_form(self, response): """ Currency form viewed and change to USD posted. """ self.log('currency_form', level=logging.INFO) formdata = { 'ddlCountry1': 'United States', 'ddlCurrency': '503329C6-40CB-47E6-91D1-9F11AF63F706' } return FormRequest.from_response(response, formdata=formdata, callback=self.currency_changed)
def parse_initial_request(self, response): date = response.meta["date"] request = FormRequest.from_response( response, formdata={"txtDesde": date, "btnBuscar.x": "1", "btnBuscar.y": "1"}, dont_filter=True, callback=self.parse_page, ) request.meta["date"] = date yield request
def parse(self, response): """ Overwrites Spiders parse method. Fill in log in details in log in form and submit. :return: """ print('custom settings:') print(self._settings) return FormRequest.from_response( response, formxpath='//div[contains(concat(" ", normalize-space(@class), " "), " main-container ")]/descendant::form', formdata={'EmailOrUsername': self._settings['username'], 'Password': self._settings['password']}, callback=self.go_to_search_site )
def parse_start_url(self, response): sel=Selector(response) passwd=sel.xpath(r'/html/body/div[2]/form/div/input[2]/@name').extract_first() captchaUrl=sel.xpath(r'/html/body/div[2]/form/div/img[1]/@src').extract_first() code=requests.get(captchaUrl) with open('/home/shichangtai/code.gif','wb') as f: f.write(code.content) captcha=raw_input('请输入验证码: ') #此次的meta是第一次请求获取cookie,以后每次的请求都讲传送这个cookie_jar return [FormRequest.from_response(response=response, formdata={'mobile':self.account,passwd:self.password,'code':captcha}, meta = {'cookiejar':1},#不要设置'dont_merge_cookies'为True callback=self.after_log)]
def parse_home(self, response): '''Parse user news feed page''' if response.css('#approvals_code'): # Handle 'Approvals Code' checkpoint (ask user to enter code). if not self.code: # Show facebook messages via logs # and request user for approval code. message = response.css('._50f4::text').extract()[0] self.log(process_string(message)) message = response.css('._3-8y._50f4').xpath('string()').extract()[0] self.log(process_string(message)) self.code = input('Enter the code: ') self.code = str(self.code) if not (self.code and self.code.isdigit()): self.log('Bad approvals code detected.') return return FormRequest.from_response( response, formdata={'approvals_code': self.code}, callback=self.parse_home, ) elif response.css('input#u_0_1'): # Handle 'Save Browser' checkpoint. return FormRequest.from_response( response, formdata={'name_action_selected': 'dont_save'}, callback=self.parse_home, dont_filter=True, ) elif response.css('button#checkpointSubmitButton'): # Handle `Someone tried to log into your account` warning. return FormRequest.from_response( response, callback=self.parse_home, dont_filter=True,) # Else go to the user profile. href = response.css('a[title="Profile"]::attr(href)').extract()[0] return Request( response.urljoin(href), callback=self.parse_profile, )
def _get_page_request(self, response, page, date): request = FormRequest.from_response( response, formdata={ 'txtDesde': date, '__EVENTTARGET': 'gvwConsulta', '__EVENTARGUMENT': 'Page${}'.format(page), }, dont_filter=True, callback=self.parse, ) request.meta['date'] = date return request
def parse_category(self, response): items = response.xpath('//div[@class="datagrid"]//tr') for item in items: product = item.xpath('td//font/b/span[contains(@id, "main_GDVMain_lblProductName")]/text()').extract() #price = item.xpath('td//font/b/span[contains(@id, "main_GDVMain_lblHarga")]/text()').extract() #link = if (len(product) > 0): print product pages = item.xpath('td[@colspan="3"]//a/@href').re("doPostBack\(([^)]+')") if len(pages) > 0: for page in pages: yield FormRequest.from_response(response, formdata={'__EVENTTARGET': eventtarget, '__EVENTARGUMENT': eventargument}, callback = self.parse_items, dont_click = True)
def login(self, response): """ Attempts to login """ return FormRequest.from_response(response, formxpath="//form[@id=\"login_top\"]", formdata={ # "form_sent": "1", # "redirect_url": "/index.php", "req_username": username, "req_password": password, }, callback=self.request_threads )
def parse(self, response): inspect_response(response) all_provinces_value = response.xpath( '//select[@name="propinsi"]/option[contains(., "All")]/@value' ).extract()[0] return FormRequest.from_response( response, formdata={ 'propinsi': all_provinces_value, 'keyword': '', 'submit': 'search!', }, callback=self.parse_list )
def parse_captcha(self, response): with open('captcha.jpg', 'wb') as f: f.write(response.body) subprocess.call(["open", "captcha.jpg"]) answer = input("Digite o captcha, seu lindo!: ") captcha_page = response.meta['captcha_page'] yield FormRequest.from_response( captcha_page, "formConsultaPublica", formdata={'captcha': answer, 'numeroProcesso':'1'}, callback=self.parse_captcha_result)
def parse_initial_request(self, response): date_str = response.meta['date'] request = FormRequest.from_response( response, formdata={ 'txtFecha': date_str, 'txtFechaF': date_str, 'btnListar': 'Listar', 'DDLFuncionario': '', }, dont_filter=True, dont_click=True, callback=self.parse_pages, ) request.meta['date'] = date_str yield request
def _request_initial_date_page(self, response, date_str, callback): formdata = { 'TxtFecha': date_str, 'BtnBuscar': 'Buscar' } request = FormRequest.from_response(response, formdata=formdata, dont_click=True, dont_filter=True, callback=callback ) request.meta['date'] = date_str request.meta['current_page'] = 1 return request
def parse_home(self, response): # goes through the 'save device' part by not saving device if response.xpath("//div/a[contains(@href,'save-device')]"): self.logger.info('"save-device" checkpoint. redirecting...') return FormRequest.from_response( response, formdata={'name_action_selected': 'dont_save'}, callback=self.parse_home) profile_url = self.get_element( response.xpath('//a[contains(text(), "Profile")]/@href').extract()) friends_url = self.clean_url(profile_url) + '/friends' # if "don't save" is selected, go on to profile page return scrapy.Request( url=friends_url, callback=self.parse_profile, )
def parse_initial_request(self, response): date = response.meta['date'] request = FormRequest.from_response( response, formdata={ 'txtDesde': date, 'btnBuscar.x': '62', 'btnBuscar.y': '15', }, dont_filter=True, callback=self.parse_page, ) request.meta['date'] = date yield request
def _request_page(self, response, page_number, date_str, callback): request = FormRequest.from_response(response, formdata={ 'txtFecha': date_str, 'txtFechaF': date_str, 'DDLFuncionario': '[ -- Seleccione Funcionario o Empleado -- ]', '__EVENTTARGET': 'DTGVisitas', '__EVENTARGUMENT': 'Page${}'.format(page_number), }, dont_filter=True, dont_click=True, callback=callback, ) request.meta['date'] = date_str return request
def post_login(self, response): print("preparing login") # 用于抓取请求网页后返回中的_xsrf 字段的文字,用于成功提交表单 xsrf = Selector(response).xpath("//input[@name='_xsrf']/@value").extract()[0] print(xsrf) # formRequest.from_response是scrapy提供的一个函数,用于POST表单 # 登录成功后,会调用 after_login 回调函数 return [FormRequest.from_response(response, meta = {"cookiejar":response.meta["cookiejar"]}, headers = self.headers, formdata= { "_xsrf":xsrf, "email": "*****@*****.**", "password":"******" }, callback = self.after_login, dont_filter = True)]
def parse_initial_request(self, response): date = response.meta['date'] request = FormRequest.from_response( response, formdata={ 'txtDesde': date, 'btnBuscar.x': '1', 'btnBuscar.y': '1', }, dont_filter=True, callback=self.parse_page, ) request.meta['date'] = date yield request
def _check_response(response, callback, **kwargs): # Handle 'save-device' redirection if response.xpath("//div/a[contains(@href,'save-device')]"): return FormRequest.from_response( response, formdata={'name_action_selected': 'dont_save'}, callback=lambda res: callback(), **kwargs) # Handle GDPR redirection if response.xpath("//div/a[contains(@href,'consent_step')]"): return _handle_gdpr_consent_step(response, callback, **kwargs) if '/login' in response.url: raise CloseSpider('login_failed') return callback()
def parse_start_url(self, response): # 如果你登录的有验证码之类的,你就可以在此处加入各种处理方法; # 比如提交给打码平台,或者自己手动输入、再或者pil处理之类的 formdate = { 'log': 'account', 'pwd': 'password', 'rememberme': "forever", 'wp-submit': "登录", 'redirect_to': "http://www.haoduofuli.wang/wp-admin/", 'testcookie': "1" } return [ FormRequest.from_response(response, formdata=formdate, callback=self.after_login) ]
def parse(self, response): """通过公司搜索页查找待用公司信息""" for i in range(26): # 对应字母表搜索公司 form_req = FormRequest.from_response( response, self.search_corp, formdata={'searchIndex': str(i)}, headers={'Referer': self.search_index}, callback=self.parse_companies, errback=self.errback_scraping, meta={ 'seq': i, 'first': True, 'current': 1 }, ) yield form_req.replace(url=self.search_corp)
def _request_page(self, response, page_number, date_str, callback): request = FormRequest.from_response( response, formdata={ 'txtFecha': date_str, 'txtFechaF': date_str, 'DDLFuncionario': '', '__EVENTTARGET': 'DTGVisitas', '__EVENTARGUMENT': 'Page${}'.format(page_number), }, dont_filter=True, dont_click=True, callback=callback, ) request.meta['date'] = date_str return request
def login(self, response): logging.debug("in login()") image = Image.open(BytesIO(response.body)) top = tkinter.Tk() top.title("Solving Captcha") top.geometry("400x200") img = ImageTk.PhotoImage(image, size="400x200") imagelabel = tkinter.Label(top, image=img) textentry = tkinter.Entry(top, font="Helvetica 20 bold") textentry.focus_set() original_response = response.meta['original response'] empty_inputs = original_response.selector.xpath( "//form/div[@class='formInputs']/div/input[not(@value) or @value='']" ).extract() if len(empty_inputs) != 3: with open('strange.html', 'wb') as f: f.write(original_response.body) username_key = Selector( text=empty_inputs[0]).xpath(".//input/@name").extract_first() password_key = Selector( text=empty_inputs[1]).xpath(".//input/@name").extract_first() captcha_key = Selector( text=empty_inputs[2]).xpath(".//input/@name").extract_first() formdata = { username_key: json.load(open('config'))['username'], password_key: json.load(open('config'))['password'] } def callback(en): formdata[captcha_key] = textentry.get() top.destroy() textentry.bind("<Return>", callback) imagelabel.pack(side="top", fill="both", expand="yes") textentry.pack(side="bottom", fill="both", expand="yes") top.mainloop() logging.debug('applying formdata in original response {}'.format( original_response)) yield FormRequest.from_response(original_response, formdata=formdata, callback=self.click_all_drugs)
def parse_captcha(self, response): with open('captcha.jpg', 'wb') as f: f.write(response.body) subprocess.call(["open", "captcha.jpg"]) answer = input("Digite o captcha, seu lindo!: ") captcha_page = response.meta['captcha_page'] yield FormRequest.from_response(captcha_page, "formConsultaPublica", formdata={ 'captcha': answer, 'numeroProcesso': '1' }, callback=self.parse_captcha_result)
def login(self, response): time.sleep(6) data = { "pwuser": "******", "pwpwd": "1qaz2wsx", "question":"0", "step":"2", "lgt":"0", "customquest":"", "answer":"", "head_login":"", "jumpurl":"http://bbs.lcdhome.net/index.php" } return [FormRequest.from_response(response, meta={'cookiejar':response.meta['cookiejar']}, headers=self.send_headers, formdata=data, callback=self.after_login)]
def parse_start_url(self, response): """ 如果你登录的有验证码之类的,你就可以在此处加入各种处理方法; 比如提交给打码平台,或者自己手动输入、再或者pil处理之类的 """ formdate = { "log": account, "pwd": password, "remenberme": "forever", "wp-submit": "登录", "redirect_to": "http://www.haoduofuli.wang/wp-admin/", "testcooke": "1" } return [ FormRequest.from_response(response, formdata=formdate, callback=self.after_login) ]
def tongshi_1(self, response): for course_type in ['02', '03', '04', '05']: et_str = 'gridGModule$ctl' + course_type + '$radioButton' params = {et_str: 'radioButton', '__EVENTTARGET': et_str} item = Course() item['course_type'] = course_type # yield Request(url=TEST_RENWEN_URL, # dont_filter=True, # meta= {'item':item}, # callback = self.tongshi_2 # ) yield FormRequest.from_response(response, dont_filter=True, url=ELECT_URL + 'speltyCommonCourse.aspx', formdata=params, meta={'item': item}, callback=self.tongshi_2)
def login(self, response): """ logs into the forums :param response: :return: """ return FormRequest.from_response( response, # # # !!! TAKE THIS OUT IF YOU DISTRIBUTE THIS !!! formdata={ 'username': self.uname, 'password': self.password, 'checked': 'checked' }, # "checked" is the "use https" checkbox, dgaf formxpath='//form[@class="login_form"]', callback=self.verify_login)
def parse(self, response): view_state = response.xpath("//input[@id='__VIEWSTATE']/@value").get() view_state_generator = response.xpath( "//input[@id='__VIEWSTATEGENERATOR']/@value").get() yield FormRequest.from_response(response, formdata={ '__EVENTTARGET': 'btnSearch', '__VIEWSTATE': view_state, '__VIEWSTATEGENERATOR': view_state_generator, 'today': '20210121', 'sortBy': 'shareholding', 'sortDirection': 'desc', 'txtShareholdingDate': '2021/01/20', 'txtStockCode': '00001' }, callback=self.parse_item)
def post_login(self, response): print 'Preparing login' #下面这句话用于抓取请求网页后返回网页中的_xsrf字段的文字, 用于成功提交表单 xsrf = response.xpath('//input[@name="_xsrf"]/@value').extract()[0] #FormRequeset.from_response是Scrapy提供的一个函数, 用于post表单 #登陆成功后, 会调用after_login回调函数 return [FormRequest.from_response(response, #"http://www.zhihu.com/#signin", # meta={'cookiejar': response.meta['cookiejar']}, headers=self.headers, #注意此处的headers formdata={ '_xsrf': xsrf, 'email': EMAIL, 'password': PASSWORD }, cookies=self.cookies, callback=self.after_login, )]
def parse(self, response): """Submit a gov. resolution form for every given gov. number and parse. Currently there are 6 available governments, and their buttons are confusingly numbered 0 to 5. This function submits a form request for each government separately, and paginates over results per gov. """ # iterate over given gov. indexes # and scrape each one for i in self.gov_indexes: # submit form request, # requesting all pages from given government number # using previously received session headers yield FormRequest.from_response( response, formdata={self.gov_number_header_key % i: self.gov_number_header_value}, callback=self.parse_form_result)
def parse(self, resp): try: import credentials username, password = credentials.USERNAME, credentials.PASSWORD except ImportError: import getpass print "NO CREDENTIALS" username = raw_input("Username (Email): ").strip() password = getpass.getpass("Password: "******"//form[contains(@class,'poeForm')]", formdata={ 'login_email': username, 'login_password': password, }, callback=self.after_login, )
def parse(self, response): self.id_number = urlparse(response.url).query login_url = \ 'https://login.digdag.nl/partououd/ouder?%PARAM%-1.IFormSubmitListener-form'\ .replace('%PARAM%', self.id_number) return [ FormRequest.from_response(response, url=login_url, formdata={ 'loginpanel:gebruikersnaam': USERNAME, 'loginpanel:wachtwoord': PASSWORD, 'loginpanel:rememberme': 'False', 'loginpanel:inloggen': 'x', }, callback=self.after_login) ]
def start_login(self, response): return [ FormRequest.from_response( response, url= 'https://weimaqi.net/admin_mchm_new/control/Handler.ashx?action=login', method='POST', formdata={ 'mch_acc': self.uid, 'mch_pwd': hashlib.md5(hashlib.md5( self.pwd).hexdigest().upper()).hexdigest(), 'auto': 'false' }, callback=self.check_info, dont_filter=True) ]
def post_login(self, response): #下面这句话用于抓取请求网页后返回网页中的_xsrf字段的文字, 用于成功提交表单 xsrf = Selector(response).xpath( '//input[@name="_xsrf"]/@value').extract()[0] #FormRequeset.from_response是Scrapy提供的一个函数, 用于post表单 #登陆成功后, 会调用after_login回调函数 return [ FormRequest.from_response( response, #"http://www.zhihu.com/login", meta={'cookiejar': response.meta['cookiejar']}, headers=self.headers, #注意此处的headers formdata={ '_xsrf': xsrf, 'email': '*****@*****.**', 'password': '******' }, callback=self.after_login, dont_filter=True) ]
def post_login(self, response): # 登陆成功后, 会调用after_login回调函数 print 'Preparing login' # 下面这句话用于抓取请求网页后返回网页中的_xsrf字段的文字, 用于成功提交表单 a = response family = Selector(response).xpath('//input[@name="family"]/@value').extract()[0] print family return FormRequest.from_response(response, meta={ 'cookiejar': response.meta['cookiejar'] }, headers=self.headers, formdata={ 'family': '', 'username': '******', 'password': '******', 'remember': 1 }, callback=self.after_login )
def post_login(self, response): print 'Preparing login' #下面这句话用于抓取请求网页后返回网页中的_xsrf字段的文字, 用于成功提交表单 # xsrf = Selector(response).xpath('//input[@name="_xsrf"]/@value').extract()[0] # print xsrf #FormRequeset.from_response是Scrapy提供的一个函数, 用于post表单 #登陆成功后, 会调用after_login回调函数 return [ FormRequest.from_response( response, #"http://www.zhihu.com/login", meta={'cookiejar': response.meta['cookiejar']}, headers=self.headers, #注意此处的headers formdata={ # '_xsrf': xsrf, 'username': '******', 'password': '******' }, callback=self.after_login, dont_filter=True) ]
def parse_login(self, response): #pdb.set_trace() next = response.xpath('//input[@name="_next"]/@value').extract()[0] formname = response.xpath( '//input[@name="_formname"]/@value').extract()[0] formkey = response.xpath( '//input[@name="_formkey"]/@value').extract()[0] formdata = { 'email': '*****@*****.**', 'password': '******', '_next': next, '_formkey': formkey, '_formname': formname, } yield FormRequest.from_response( response, formdata=formdata, meta={'cookiejar': response.meta['cookiejar']}, #注意这里cookie的获取 callback=self.parse_after)
def parse_shipping(self, response): free_delivery_over = response.xpath('//ul[@class="uspContent"]/li/text()').re(u'Free Standard Delivery over \xa3(.*)') if free_delivery_over and self.exchange_rate: self.free_delivery_over = extract_price(free_delivery_over[0]) * self.exchange_rate self.log('Free delivery over {} USD'.format(self.free_delivery_over)) else: self.free_delivery_over = 0 formdata = {} formdata['__VIEWSTATEGENERATOR'] = response.xpath('//input[@name="__VIEWSTATEGENERATOR"]/@value').extract()[0] formdata['__EVENTVALIDATION'] = response.xpath('//input[@name="__EVENTVALIDATION"]/@value').extract()[0] formdata['__VIEWSTATE'] = response.xpath('//input[@name="__VIEWSTATE"]/@value').extract()[0] formdata['__EVENTTARGET'] = '' formdata['ctl00$ScriptManager1'] = '' formdata['ctl00$ContentMain$product_details1$dd_quantity'] = '1' formdata['ctl00$ContentMain$product_details1$imgbtn_addToBasket.x'] = '0' formdata['ctl00$ContentMain$product_details1$imgbtn_addToBasket.y'] = '0' req = FormRequest.from_response(response, formname='aspnetForm', formdata=formdata, callback=self.parse_shipping1) yield req
def login(self, response): """ Login with the credentials provided at instantiation :param response: the /login page response :return: a FormRequest with the credentials to login, calling `login_callback` """ form_kwargs = { 'formxpath': '//form[@class="form-signin"]', 'formdata': { 'username': self.credential.username, 'password': self.credential.password }, 'clickdata': { 'type': 'submit' } } return FormRequest.from_response(response, callback=self.login_callback, **form_kwargs)
def parse(self, response): for element in response.css("#corpo table tr"): if element.css("th").extract(): continue date = element.css(self.GAZETTE_DATE_CSS).extract_first() date = dateparser.parse(date, languages=["pt"]).date() url = element.css(self.GAZETTE_URL_CSS).extract_first() gazette_title = element.css(self.GAZETTE_NAME_CSS).extract_first() is_extra = "Extra" in gazette_title yield Gazette( date=date, file_urls=[url], is_extra_edition=is_extra, territory_id=self.TERRITORY_ID, power="executive_legislature", scraped_at=datetime.utcnow(), ) for element in response.css(self.NEXT_PAGE_LINK_CSS): if not element.css("a::text").extract_first() == "Próxima": continue event_target = element.css("a::attr(href)") event_target = event_target.re( self.JAVASCRIPT_POSTBACK_REGEX).pop() yield FormRequest.from_response( response, callback=self.parse, formname="aspnetForm", formxpath="//form[@id='aspnetForm']", formdata={ "__EVENTARGUMENT": "", "__EVENTTARGET": event_target }, dont_click=True, dont_filter=True, method="POST", )
def login_to_library(self, response): csv_lines = response.meta['csv_lines'] login_form = response.xpath(self.login_form_xpath) if login_form: credentials = { 'user': self.settings['CREDENTIALS']['user'], 'pass': self.settings['CREDENTIALS']['pass'], } yield FormRequest.from_response(response, formxpath=self.login_form_xpath, formdata=credentials, meta={'csv_lines': csv_lines}, dont_filter=True, callback=self.parse_csv_lines) else: # If no login required yield scrapy.Request(response.url, dont_filter=True, meta={'csv_lines': csv_lines}, callback=self.parse_csv_lines)
def post_login(self, response): """ 登录的页面请求成功后,解析响应的页面,获取登录需要的<input>标签的信息 :param response: 登录接口返回的页面 :return: """ username = '******' password = '******' data = { 'form_email': username, 'form_password': password, } # 发送FormRequest表单请求 return FormRequest.from_response(response=response, meta={'cookiejar': response.meta['cookiejar']}, headers=self.post_headers, formdata=data, callback=self.after_login)
def login(cls, response: HtmlResponse): _auth_info = json.loads(response.xpath('//input[@id="init-config"]/@value').extract()[0]) # print(_auth_info) # # App.instance().queue().put("qerqerqerqer") formdata = { 'captcha': '', 'g_recaptcha_response': '', 'post_key': _auth_info['pixivAccount.postKey'], 'source': 'pc', 'ref': 'wwwtop_accounts_index' } formdata.update({ 'pixiv_id': '', 'password': '', }) return FormRequest.from_response( response, formdata=formdata, callback=cls.success, )
def parse(self, response): ''' parse : parse function :param response: response :type response: object :return yield: iterator :rtype: iterator ''' print('\n') csrf_token = response.xpath( "//input[@id='connection__token']/@value").get() yield FormRequest.from_response(response, formxpath="//form[@name='connection']", formdata={ 'connection[_token]': csrf_token, 'connection[mail]': self.user, 'connection[password]': self.password, 'connection[stay_connected]': '1' }, callback=(self.after_login))
def parse(self, response): """ override level 0 """ item = response.meta['item'] sel = Selector(response) # get asp cookie contents for sumbit form contents = { '__EVENTTARGET': '', '__EVENTARGUMENT': '', '__VIEWSTATE': sel.xpath('.//input[@id="__VIEWSTATE"]/@value').extract()[0], '__EVENTVALIDATION': sel.xpath('.//input[@id="__EVENTVALIDATION"]/@value').extract()[0], 'HiddenField_spDate': '', 'HiddenField_page': 'PAGE_BS', 'txtTASKNO': item['stockid'], 'hidTASKNO': '', 'btnOK': u'查詢' } # register next response handler after sumbit form request = FormRequest.from_response( response, formdata=contents, callback=self.parse_after_form_submit, dont_filter=True) request.meta['item'] = item yield request
def search_form_clicked(self, response): # here you would extract links to follow and return Requests for # each of them, with another callback yield FormRequest.from_response(response, formnumber=1)