def _label_d_v(value): re = etree.HTML(value) res = re.xpath("//text()") string = '\n\n\n' for i in res: string = string + i return string
def ResetPrefs(): items = [] #Log(result) Data.SaveObject('Started', False) re = XML.ElementFromURL( 'http://127.0.0.1:32400/:/plugins/com.plexapp.plugins.dreambox/prefs', timeout=3) settings = re.xpath('//Setting') vals = {} for s in settings: pref = s.xpath('./@id')[0] vals[pref] = None Log(pref) re2 = HTTP.Request( 'http://127.0.0.1:32400/:/plugins/com.plexapp.plugins.dreambox/prefs/set?{}=' .format(pref), timeout=5) re2.load() items.append( DirectoryObject( key=Callback(MainMenu), title='User Prefs reset. Restart plugin to load DefaultPrefs')) items = check_empty_items(items) oc = ObjectContainer(objects=items, title2='Reset user preference', no_history=True) return oc
def parse_search(self,response): item = WeiboItem() results=response.xpath('//div[@class="card-wrap"]') from scrapy.shell import inspect_response inspect_response(response,self) for re in results: contents = re.xpath('./div/div/div[@class="content"]/p[@class="txt"]') if(len(contents) > 1): item['content'] = contents[1].xpath('string(.)').extract_first() elif (len(contents) == 1): item['content'] = contents[0].xpath('string(.)').extract_first() else: item['content']='' item['time'] = re.xpath('./div/div/div[@class="content"]/p[@class="from"]/a').xpath('string(.)').extract_first() item['name'] = re.xpath('./div/div/div[@class="content"]/div[@class="info"]//a[@class="name"]/@nick-name').extract_first() yield item
def ResetPrefs(): items = [] #Log(result) Data.SaveObject('Started', False) re = XML.ElementFromURL('http://127.0.0.1:32400/:/plugins/com.plexapp.plugins.dreambox/prefs', timeout=3) settings = re.xpath('//Setting') vals ={} for s in settings: pref =s.xpath('./@id')[0] vals[pref] = None Log(pref) re2 = HTTP.Request('http://127.0.0.1:32400/:/plugins/com.plexapp.plugins.dreambox/prefs/set?{}='.format(pref), timeout=5) re2.load() items.append(DirectoryObject(key=Callback(MainMenu), title='User Prefs reset. Restart plugin to load DefaultPrefs')) items = check_empty_items(items) oc = ObjectContainer(objects=items, title2='Reset user preference', no_history=True) return oc
import re f = open( 'E:/PycharmProjects/Spider/Distributed_twitter_spider/Distributed_twitter_spider/util/url.txt', 'rb') res = f.read() # print(res) f.close() # articles_pattern = re.compile(r'<article.+?</article>') # articles = re.findall(articles_pattern, res.decode('utf-8')) # print('articles:',articles) # <a href="tel:+886223113731"> tel_pattern = re.compile(r'<a href="tel:(.*?)">') tel = re.findall(tel_pattern, res.decode('utf-8'))[0] # print('tel:',tel) basic_info_pattern = re.xpath('./div[@class=]')
def parse(self, response): re = response # print(response.text) # print("2222222") # 订单状态 order_st = re.xpath( '//*[@id="order-form"]/div[1]/div[2]/div[1]/div[2]/p/text()' ).extract()[0] # 订单id order_id = re.xpath( '//*[@id="order-form"]/div[1]/div[2]/div[1]/div[1]/p/text()' ).extract()[0] if order_st == '已发货': print(order_id) # 物流单号 tracking_number = re.xpath( '//*[@id="order-form"]/div[5]/div[2]/div[2]/div[1]/input/@value' ).extract()[0] # tracking_number = re.xpath('//*[@id="order-form"]/div[4]/div[2]/div[2]/div[1]/input/@value').extract()[0] # print(tracking_number) # 马帮发货时间 expresstime = re.xpath( '//*[@id="order-form"]/div[1]/div[2]/div[8]/div[2]/input/@value' ).extract()[0] # 马帮订单id mb_orderid = re.xpath( '//*[@id="order-form"]/div[1]/div[1]/input[1]/@value').extract( )[0] # print(mb_orderid) # print(order_st) # 传输数据 mb_meta = { 'order_id': order_id, 'tracking_number': tracking_number, 'order_st': order_st, 'expresstime': expresstime, 'mb_orderid': mb_orderid } # 获取合并订单的sku # print(mb_meta) url = 'https://aamz.mabangerp.com/index.php?mod=order.findrelevantinfo' headers = { # "Accept": "application/json, text/javascript, */*; q=0.01", # "Accept-Encoding": "gzip, deflate, br", # "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", # "Cache-Control": "no-cache", # "Connection": "keep-alive", # "Content-Type": "application/json; charset=UTF-8", # "Host": "aamz.mabangerp.com", # "Content-Length": "", # "X-Requested-With": "XMLHttpRequest", # "Referer": "https://aamz.mabangerp.com/index.php?mod=order.detail&platformOrderId=0O43LJNW&orderStatus=2&orderTable=2&tableBase=2&cMKey=MABANG_ERP_PRO_MEMBERINFO_LOGIN_191565&lang=cn", # 注意user-agent不要出现空格 "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0", } # post请求数据 data1 = {'orderId': mb_orderid, 'type': '1', 'tableBase': '2'} # cookies数据 cookies = "lang=cn; signed=222014_00f6735cc675f0abb6f483d9913f72bf; gr_user_id=63993efb-e900-472c-9e8d-4d3e5c1832b2; stock_show_product_data_cookie=ico-minus-circle; employ_rows_per_page_data_cookie=10; stock_data_js_cookie_is_change_weight=1; stock_data_js_cookie_is_change_name=1; CRAWL_KANDENG_KEY=K6uqW0ZkQEouz0n1adoI%2FWqfFs2PbJ8%2BCpQKvtnzAvWpTX174VXBmq5L9cDOSOj%2Bm2IcDf7pRauH34yzR4OEyw%3D%3D; MULTI_LANGUAGE_TYPE=%2BYjZ6oacL7xJ%2FKOcmBg9Z7cTOqi7UgOUgujRs4KQ4Ms%3D; mabang_lite_rowsPerPage=500; PHPSESSID=gbtnacjhjolnnvb3cmcijbcbp0; loginLiteCookie=a%3A2%3A%7Bs%3A8%3A%22username%22%3Bs%3A6%3A%22222014%22%3Bs%3A9%3A%22passsword%22%3Bs%3A32%3A%2282bc6a8ec7fca9d8d0d844f2882546e7%22%3B%7D; event_rember12_222014=0" cookies = { i.split("=")[0]: i.split("=")[1] for i in cookies.split("; ") } # yield scrapy.Request(url=url, cookies=self.cookies, headers=headers, meta=mb_meta, callback=self.parse2) yield scrapy.FormRequest(url=url, cookies=cookies, formdata=data1, headers=headers, meta=mb_meta, callback=self.detail_parse) else: print(order_id + "订单状态为:" + order_st)
def parse(self, response): re = response # print(response.text) # 处理数据 # 订单id order_id = re.xpath( '//*[@id="order-form"]/div[1]/div[2]/div[1]/div[1]/p/text()' ).extract()[0] # 物流单号 tracking_number = re.xpath( '//*[@id="order-form"]/div[5]/div[2]/div[2]/div[1]/input/@value' ).extract()[0] # 订单状态 order_st = re.xpath( '//*[@id="order-form"]/div[1]/div[2]/div[1]/div[2]/p/text()' ).extract()[0] # 马帮发货时间 expresstime = re.xpath( '//*[@id="order-form"]/div[1]/div[2]/div[8]/div[2]/input/@value' ).extract()[0] # 马帮订单id mb_orderid = re.xpath( '//*[@id="order-form"]/div[1]/div[1]/input[1]/@value').extract()[0] # print(mb_orderid) # print(order_st) if order_st == '已发货': # 传输数据 mb_meta = { 'order_id': order_id, 'tracking_number': tracking_number, 'order_st': order_st, 'expresstime': expresstime, 'mb_orderid': mb_orderid } # 获取合并订单的sku url = 'https://aamz.mabangerp.com/index.php?mod=order.findrelevantinfo' headers = { # "Accept": "application/json, text/javascript, */*; q=0.01", # "Accept-Encoding": "gzip, deflate, br", # "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", # "Cache-Control": "no-cache", # "Connection": "keep-alive", # "Content-Type": "application/json; charset=UTF-8", # "Host": "aamz.mabangerp.com", # "Content-Length": "", # "X-Requested-With": "XMLHttpRequest", # "Referer": "https://aamz.mabangerp.com/index.php?mod=order.detail&platformOrderId=0O43LJNW&orderStatus=2&orderTable=2&tableBase=2&cMKey=MABANG_ERP_PRO_MEMBERINFO_LOGIN_191565&lang=cn", # 注意user-agent不要出现空格 "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0", } # post请求数据 data1 = {'orderId': mb_orderid, 'type': '1', 'tableBase': '2'} # cookies数据 cookies = "gr_user_id=493499a8-83fc-4e47-87c3-08b1ded6df3c; MULTI_LANGUAGE_TYPE=%2BYjZ6oacL7xJ%2FKOcmBg9Z7cTOqi7UgOUgujRs4KQ4Ms%3D; lang=cn; stock_show_product_data_cookie=ico-minus-circle; stock_data_js_cookie_is_change_weight=1; mabang_lite_rowsPerPage=500; stock_data_js_cookie_is_change_name=1; order_data_js_cookie_orderErrorbysVal=paidTime; order_data_js_cookie_orderErrorbydacname=orderByspaidTime; order_data_js_cookie_orderErrorbydacnameval=down; order_data_js_cookie_isSyn=2; employ_rows_per_page_data_cookie=50; order_data_js_cookie_isImmediately=1; signed=222014_00f6735cc675f0abb6f483d9913f72bf; PHPSESSID=gjgkl12ntct9knahgq66qtlks1; event_rember12_222014=0; CRAWL_KANDENG_KEY=K6uqW0ZkQEouz0n1adoI%2FWqfFs2PbJ8%2BCpQKvtnzAvWpTX174VXBmq5L9cDOSOj%2Bm2IcDf7pRauH34yzR4OEyw%3D%3D; loginLiteCookie=a%3A2%3A%7Bs%3A8%3A%22username%22%3Bs%3A6%3A%22222014%22%3Bs%3A9%3A%22passsword%22%3Bs%3A32%3A%22f1c7edfb07a416030a0f976bac902add%22%3B%7D" cookies = { i.split("=")[0]: i.split("=")[1] for i in cookies.split("; ") } # yield scrapy.Request(url=url, cookies=self.cookies, headers=headers, meta=mb_meta, callback=self.parse2) yield scrapy.FormRequest(url=url, cookies=cookies, formdata=data1, headers=headers, meta=mb_meta, callback=self.detail_parse) else: pass