def main(host, user, password): host = host user = user password = password email = [] vip_url = 'http://' + host + '/nitro/v1/config/lbvserver?view=summary' stats = session.get(vip_url, auth=('xx', 'xx')) list = json.loads(stats.content) vip = jsonpath.jsonpath(list, "$..lbvserver[?(@.curstate=='UP')]") down_vip = jsonpath.jsonpath(list, "$..lbvserver[?(@.curstate=='DOWN')]") if down_vip: for down in down_vip: down_vip_name = down['name'] email.append(down_vip_name + "vip is down.") for line in vip: vip_name = line['name'] url = 'http://' + host + '/nitro/v1/config/lbvserver_service_binding/' + vip_name binding = session.get(url, auth=('xxx', 'xxx')) servers = json.loads(binding.content) for obj in servers["lbvserver_service_binding"]: svrhost = obj['ipv46'] server = commands.getoutput("psql -t --host localhost --port xxx --dbname xx --user www -c \"select xxx from xxx.xx where address = '"+ svrhost +"'\"") if obj["curstate"] == "DOWN": email.append(svrhost + " in the " + vip_name + " is down") if server == "f": email.append(svrhost + " in the " + vip_name + " vip is set to FALSE in opsdb but is in the LB") if server == "": email.append(svrhost + " in the " + vip_name + " vip is NOT in opsdb") if email: text = "\n".join(email) message = 'Subject: %s\n\n%s' % (SUBJECT, text) server = smtplib.SMTP('localhost') server.sendmail(FROM, TO, message) server.quit()
def get_content(self): jsonobj = json.loads(self.get_html()) # 商品名称 namelist = jsonpath.jsonpath(jsonobj, '$..title') # 商品价格 pricelist = jsonpath.jsonpath(jsonobj, '$..promotionPrice') # 商品图片 imglist = jsonpath.jsonpath(jsonobj, '$..img') listdata = zip(imglist,namelist,pricelist) for item in listdata: # print(item[1]) try: result = self.cursor.execute( "insert into myduodian_aiduodian (image,goodName,price) VALUES (%s,%s,%s)",[item[0],item[1],item[2]]) self.db.commit() print(result) except Exception as e: self.db.rollback() print('失败') # 关闭连接,释放资源 self.db.close()
def x_jsonpath(selector): """select elements with jsonpath in dict stream. """ from jsonpath import jsonpath for val in vals_from_json(): print jsonpath(val, selector)
def getresponse(self): url = "https://api.myjson.com/bins/1leee" response = urlopen(url) data = response.read() somejson = json.loads(data) match = jsonpath.jsonpath(somejson, '$..title') print match
def getStat(name): global last_update, result, url # If time delta is > 20 seconds, then update the JSON results now = time.time() diff = now - last_update if diff > 20: print "[elasticsearch] " + str(diff) + " seconds passed - Fetching " + url result = json.load(urllib.urlopen(url)) last_update = now JsonPathName = keyToPath[name] tmp = jsonpath.jsonpath(result, JsonPathName) # Check to make sure we have a valid result # JsonPath returns False if no match found if not tmp: return None # Convert List to String try: val = " ".join(["%s" % el for el in tmp]) except TypeError: val = None return val pass # Check for integer only result if val.isdigit(): val = int(val) print "********** " + name + ": " + str(val) return val
def returnpath(self,json,query): """ returnpath('topping[*].type') Returns data structure from json, else false """ try: return jsonpath.jsonpath(json,query) except TypeError: # Return is path is missing. return None
def custom_parse(self, data): data = json.loads(data) messages = jsonpath(data, self.paths[0]) if not messages: raise JSONFailedException(self.paths[0]) return [Comment('Not Implemented', 'Not Implemented', text) for text in messages]
def assertGet(entity, jpath, dctAssert=None): err = 0; message = '(%s)' % jpath ret = jsonpath.jsonpath(entity, jpath) if not ret: err = 1 if dctAssert: dctAssert['errnum'] += err dctAssert['asserts'].append('%s:%s' % (message, err==0)) return ret and ret[0]
def parse(self, data): qs = self.inf.get('query') t = self.inf.get('type', 'object') if t=='object': lr = '{}' else: lr = '[]' l,r = data.find(lr[0]),data.rfind(lr[-1]) data = data[l:r+1] return jsonpath(json.loads(data), qs)
def parse_json_item(self, response, loop, fields): meta = response.meta enc = getattr(self, 'json_enc', 'utf-8') txt = unicode(response.body, encoding=enc, errors='ignore') if hasattr(self, 'json_type') and self.json_type=='list': l, r = txt.find('['), txt.rfind(']') else: l, r = txt.find('{'), txt.rfind('}') obj = json.loads(txt[l:r+1]) self.macro.update({'URL':response.url, 'keyword':meta.get('keyword', '')}) for e in jsonpath.jsonpath(obj, loop or '$[]') or []: item = Item() for k,v in fields.iteritems(): if 'value' in v: v_x = self.macro.expand(v.get('value')) elif 'jpath' in v: v_x = jsonpath.jsonpath(e, self.macro.expand(v.get('jpath'))) v_x = None if v_x==False else v_x else: log.msg(u'field [{}] should contains "value" or "jpath"'.format(k), level=log.WARNING) continue val = parser.make_parser(v.get('parse', {}))(v_x) if not val and 'default' in v: val = self.macro.expand(v.get('default')) if not (val or v.get('multi') or v.get('opt')): log.msg(u'field [{}] is empty:\n{}'.format(k, item), level=log.WARNING) break item[k] = arg_to_iter(val) else: yield item
def create_ckan_data(self): ''' Create NRCAN datasets in CKAN format and store in a text file ''' self.config.read('nrcan.config') infile = open('/Users/peder/dev/goc/nrcan.dat', "r") outfile = open('/Users/peder/dev/goc/nrcan-ckan.dat', "w") outfile = open('log.jl', "w") for line in infile: en,fr = str(line).strip().split('|') data_en= eval(en) package_dict = {'extras': {}, 'resources': [], 'tags': []} # start with English Package Fields lookups =[] for ckan, nrcan in config.items('package'): if nrcan=='LOOKUP': lookups.append(ckan) elif "$." in nrcan: print jsonpath(data_en, nrcan) if schema_description.dataset_field_by_id[ckan].get('type') == 'keywords': package_dict[ckan] = ','.join(jsonpath(data_en, nrcan)) else: (package_dict[ckan],) = jsonpath(data_en, nrcan) elif nrcan: package_dict[ckan] = data_en[nrcan] # After both languages are done, we can deal with lookups. for ckan in lookups: if ckan == 'catalog_type': package_dict[ckan] = self.get_choice_for_english_value(ckan,'Geo Data') elif ckan =='maintenance_and_update_frequency': package_dict[ckan] = self.get_choice_for_english_value(ckan,'Unknown') elif ckan =='maintenance_and_update_frequency': package_dict[ckan] = self.get_choice_for_english_value(ckan,'Unknown') pprint(package_dict) sys.exit() pass
def render(self): for logrecord in self.queryset: row = [logrecord.app, logrecord.loggerName, logrecord.get_level_display(), logrecord.timestamp, logrecord.message, logrecord.fileName, logrecord.lineNumber, logrecord.thread, logrecord.exception_message] for json_path in CSV_EXPORT_EXTRA_JSON_PATHS: jsonpath_result = jsonpath.jsonpath(logrecord.extra, json_path) row.append(jsonpath_result[0] if jsonpath_result is not False else u'') self.writer.writerow(row) csv_data = self.buffer.getvalue() self.buffer.close() return csv_data
def main(DCity,ACity,Date,maxprice): headers = { 'Host' : 'flights.ctrip.com', 'Cache-Control' : 'max-age=0', 'Upgrade-Insecure-Requests' : '1', 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36', 'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding' : 'gzip, deflate, sdch', 'Accept-Language' : 'zh-CN,zh;q=0.8', 'Cookie' : '_abtest_userid=d8acf40b-bd99-4d4c-a32a-fc629f1c7551; GUID=09031168410855693377; HotelCityID=2split%E4%B8%8A%E6%B5%B7splitShanghaisplit2018-1-26split2018-01-27split0; appFloatCnt=3; StartCity_Pkg=PkgStartCity=32; traceExt=campaign=CHNbaidu81&adid=index; adscityen=Guangzhou; Session=SmartLinkCode=U153507&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=&SmartLinkLanguage=zh; Union=OUID=title&AllianceID=5376&SID=153507&SourceID=&Expires=1518230709419; manualclose=1; DomesticUserHostCity=CAN|%b9%e3%d6%dd; __zpspc=9.14.1517629443.1517629443.1%233%7Cwww.so.com%7C%7C%7C%7C%23; _jzqco=%7C%7C%7C%7C1517625909536%7C1.1773865467.1516933714186.1517625928012.1517629443498.1517625928012.1517629443498.undefined.0.0.44.44; FD_SearchHistorty={"type":"S","data":"S%24%u5E7F%u5DDE%28CAN%29%24CAN%242018-02-15%24%u4E0A%u6D77%28SHA%29%24SHA"}; _RF1=163.177.136.73; _RSG=lqaqXMEO6B5ogqNCNdMo0A; _RDG=283d86d5305517270a304ebac2d5b88495; _RGUID=52c660e5-a1d2-4d2f-a032-5a75cc5f4ab2; Mkt_UnionRecord=%5B%7B%22aid%22%3A%22761445%22%2C%22timestamp%22%3A1517188667747%7D%2C%7B%22aid%22%3A%224897%22%2C%22timestamp%22%3A1517471270297%7D%2C%7B%22aid%22%3A%225376%22%2C%22timestamp%22%3A1517641193850%7D%5D; _ga=GA1.2.2009874240.1516933714; _gid=GA1.2.318181679.1517625909; MKT_Pagesource=PC; _bfa=1.1516933711238.47ahnf.1.1517625906849.1517641190746.14.81.212093; _bfs=1.2; _bfi=p1%3D101027%26p2%3D101027%26v1%3D81%26v2%3D80' } #url = 'http://flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights?DCity1=CAN&ACity1=SHA&SearchType=S&DDate1=2018-02-11'#测试链接 url = 'http://flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights?DCity1=' + DCity + "&ACity1=" + ACity + '&SearchType=S&DDate1=' + Date response = requests.get(url,headers = headers) try: response.status_code == 200 print('链接正常',response.status_code) except: print('链接有误',response.status_code) print('日期:',Date) demo = json.loads(response.text) #print(response.status_code) aim = jsonpath.jsonpath(demo,"$..acn")#目的地 fn = jsonpath.jsonpath(demo,'$..fn')#航班型号 times = jsonpath.jsonpath(demo,"$..dt")#起飞时间 time = [] for i in times:#筛选times中数据 if len(i) > 1: time.append(i) #price = jsonpath.jsonpath(demo,"$..scs[0].p")#确定价格唯一元素 price = jsonpath.jsonpath(demo,"$..fis.[:100].lp")#价格另一个确定方法 rata = jsonpath.jsonpath(demo,"$..scs[0]..rate")#折扣 #rt = jsonpath.jsonpath(demo,"$..scs[0]..rt")#折扣 #print(len(aim),len(fn),len(time),len(price),len(rata))#列表长度,检测数据是否有误 code = map(list,zip(aim,fn,time,price,rata))#将列表放入一个列表 code = filter(lambda x:x[3]<=int(maxprice),code)#筛选价格 #code = filter(lambda x:x[4]<=0.8,code)#通过折扣筛选列表元素 for i in code: print('航班',i)
def main(query): results = [] json_ = json.load(file(BOOKMARK_PATH)) uid = uid_generator() for entry in jsonpath.jsonpath(json_, '''$..?(@.url and @.type=='url')'''): if not query in entry['name'].lower() and not query in entry['url']: continue results.append(alfred.Item( attributes = { 'uid': uid.next(), 'arg': entry['url'] }, title = entry['name'], subtitle = entry['url'], icon = 'icon.png')) alfred.write(alfred.xml(results))
def list(self, **kwargs): _logger.debug(self.RESOURCE) _logger.debug(type(self)) # Pop jsonpath if exists and pass the rest of arguments to API # for some API calls home center handles additional parameters json_path = kwargs.pop('jsonpath', None) # Home center ignores unknown parameters so there is no need to # remove them from REST reqest. try: items = self.http_client.get(self.RESOURCE, params=kwargs).json() except exceptions.ConnectionError: return # if there is no explicit defined json_path paramers if json_path is None: for value in self.API_PARAMS: kwargs.pop(value, None) condition_expression = "" for k, v in six.iteritems(kwargs): if k.startswith('p_'): # search for properties k = "properties." + k[2:] condition_expression += self.JSON_CONDITION_BASE.format( k, quote_if_string(v)) condition_expression += " and " if condition_expression is not "": # filter the results with json implicit built from # remaining parameters json_path = "$[?({})]".format(condition_expression[:-5]) _logger.debug("Implicit JSON Path: {}".format(json_path)) if json_path: _logger.debug("JSON Path: {}".format(json_path)) filtered_items = jsonpath.jsonpath(items, json_path) if filtered_items: items = filtered_items else: return for item in items: item_obj = self.model(**item) if item_obj: yield item_obj else: continue
def _read_json(self, content, config): data = json.loads(content) responses = [] if not "extract" in config: return data extract_config = config["extract"] if type(extract_config) is unicode: return jsonpath.jsonpath(data, extract_config) else: tmp_responses = {} for key, expression in extract_config.items(): tmp_responses[key] = jsonpath.jsonpath(data, extract_config[key]) keys = tmp_responses.keys() if len(keys) > 0 and len(tmp_responses[keys[0]]): for i in range(0, len(tmp_responses[keys[0]])): response = dict() for key in keys: response[key] = tmp_responses[key][i] responses.append(response) return responses
def parse_item(self, response, loop, fields): try: txt = utils.to_unicode(response.body) if hasattr(self, 'json_type') and self.json_type=='list': l, r = txt.find('['), txt.rfind(']') else: l, r = txt.find('{'), txt.rfind('}') obj = json.loads(txt[l:r+1]) self.macro.update({'URL':response.url}) for e in jsonpath.jsonpath(obj, loop or '$[]') or []: item = Item() for k,v in fields.iteritems(): if 'value' in v: v_x = v.get('value') elif 'jpath' in v: v_x = jsonpath.jsonpath(e, self.macro.expand(v.get('jpath'))) else: log.msg(u'field [{}] should contains "value" or "jpath"'.format(k), level=log.WARNING) continue val = utils.convert_type(v.get('parse', {}))(self.macro.expand(v_x)) if not val and 'default' in v: val = self.macro.expand(v.get('default')) qry = v.get('filter', {}) if utils.filter_data(qry, val): item[k] = arg_to_iter(val) else: break else: yield item except Exception as ex: log.msg(u'{}\n{}'.format(response.url, traceback.format_exc()))
def _convert(data): if t not in ['join', 'list'] and isinstance(data, list): data = TakeFirst()(data) if type(data) in [str, unicode]: data = data.strip() elif type(data) in [int, float, datetime]: data = str(data) else: return data if t=='join': sep = inf.get('sep', u' ') return Join(sep)(data) elif t=='list': sep = inf.get('sep', u' ') return remove_tags(Join(sep)(data)).strip() elif t=='text': return remove_tags(data).strip() elif t=='clean': cleaner = Cleaner(style=True, scripts=True, javascript=True, links=True, meta=True) return cleaner.clean_html(data) elif t=='unesc': return HTMLParser().unescape(data) elif t=='base64': return base64.decodestring(data) elif t=='sub': frm = inf.get('from') to = inf.get('to') return re.sub(frm, to, data) elif t=='jpath': qs = inf.get('query') return jsonpath.jsonpath(json.loads(data), qs) elif t=='map': m = inf.get('map') d = inf.get('default') return m.get(data, d) elif t=='int': return int(float(data)) elif t=='float': return float(data) elif t=='date': fmt = inf.get('fmt', 'auto') tz = inf.get('tz', '+00:00') return parse_date(data, fmt, tz) elif t=='cst': fmt = inf.get('fmt', 'auto') return parse_date(data, fmt, '+08:00') else: return data
def github(profile, token): url = 'https://api.github.com/orgs/%s/repos' % profile logger.debug("Requesting '%s' with '%s' token..." % (url, token)) api_request = requests.get(url, headers={'Authorization' : 'token %s' % token}) logger.debug("Loading JSON content...") repositories = json.loads(api_request.content) if isinstance(repositories, dict) and ('message' in repositories): logger.error("Could not retrieve the expected JSON content from GitHub! '%s'" % repositories['message']) return [ ] logger.debug("Searching for 'clone_url' keys...") urls = jsonpath.jsonpath(repositories, '$[*].ssh_url') logger.debug("%d URLs found:" % len(urls)) logger.debug("URLs %s:" % urls) map(logger.debug, urls) return urls
def pathexists(self,json,path,value=None): """ Checks if JSON path exists with an explicit value. Returns true or false. If arguement 'value' == None; just returns True if path exists. """ if value == None: # If not searching for value, your verifying truth of a path... so try: results = jsonpath.jsonpath(json,path) if results: return True # Path exists! Results exist. else: return False except TypeError: # False if the path is nonexistant. return False else: # You must have a value == so we will see if the value matches a value in the list. try: results = jsonpath.jsonpath(json,path) if value in results: # If value is contained within LIST of results; it exists return True else: # It's not in the list, so its return False except TypeError: # False if the path is nonexistant. return False
def reserved_price(self, region, ins, arch, reserved): region = str(region) if region == 'us-east-1': region = 'us-east' # naming inconsistency! i1, i2 = ins.split('.') r1 = TYPES_AZ[i1].replace('OD', 'Res') assert arch in ('linux', 'windows') if arch == 'linux': data = self.heavylinux else: data = self.heavywindows jpath = 'config.regions[?(@.region==%r)].instanceTypes[?(@.type==%r)].sizes[?(@.size==%r)]' % ( region, r1, SUBTYPES_AZ[i2], ) jpath1 = jpath+'.valueColumns[?(@.name==%r)].prices.USD' % 'yrTerm%dHourly' % reserved jpath2 = jpath+'.valueColumns[?(@.name==%r)].prices.USD' % 'yrTerm%d' % reserved hourly = jsonpath(data, jpath1) upfront = jsonpath(data, jpath2) if not hourly or not upfront: logging.error('Price not found for: %r %r %r' % (region, ins, arch)) return 0.0 return float(hourly[0]) + float(upfront[0])/YEARHOURS/reserved
def ondemand_price(self, region, ins, arch): i1, i2 = ins.split('.') if arch == 'windows': arch = 'mswin' jpath = 'config.regions[?(@.region==%r)].instanceTypes[?(@.type==%r)].sizes[?(@.size==%r)].valueColumns[?(@.name==%r)].prices.USD' % ( REGIONS_AZ[region], TYPES_AZ[i1], SUBTYPES_AZ[i2], arch, ) r = jsonpath(self.ondemand, jpath) if not r: logging.error('Price not found for: %r %r %r' % (region, ins, arch)) return 0.0 return float(r[0])
def assertRex(entity, jpath, pat, mode='match', dctAssert=None): err = 0; message = 're.%s(%s, %s)' % (mode, pat, jpath); m = None ret = jsonpath.jsonpath(entity, jpath) if not ret: err = 1 if not err: refunc = eval('re.%s' % mode); text = ret[0] m = refunc(pat, text) if not m: err = 1 if dctAssert: dctAssert['errnum'] += err dctAssert['asserts'].append('%s:%s' % (message, err==0)) return m
def get_json(self): headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0)'} request = urllib2.Request(self.url,headers=headers) response = urllib2.urlopen(request) html = response.read() jsonobj = json.loads(html) # 获取城市名称 namelist = jsonpath.jsonpath(jsonobj,'$..name') for name in namelist: print(name) # 把列表存储为字符串 nametext = json.dumps(namelist,ensure_ascii=False) with open('name.txt','a') as file: file.write(nametext.encode("utf-8")) file.close
def _complete_href_links(self, parent_collection, current): """Resolves self hyperlinks (JSONPath and JSONPointers.""" if isinstance(current, HyperLink) or \ (isinstance(current, dict) and "href" in current): if isinstance(current["href"], (unicode, str)): resource = None if current["href"] in self._cache: resource = self._cache[current["href"]] elif current["href"].startswith("#"): resource = jsonpointer.resolve_pointer(parent_collection, current["href"][1:]) if not resource: resource = "Unresolved" elif current["href"].startswith("$"): path = jsonpath(parent_collection, current["href"], result_type="PATH") if path: resource = eval("parent_collection%s" % path[0].lstrip("$")) else: resource = "Unresolved" self._cache[current["href"]] = resource if resource and resource != "Unresolved": if "selfRef" not in resource: ret = self.set_self_ref(resource) if ret < 0: return ret current["href"] = resource["selfRef"] return 0 elif isinstance(current, list): keys = range(len(current)) elif isinstance(current, dict): keys = current.keys() else: return 0 for key in keys: value = current[key] if isinstance(value, (NetworkResource, Topology)) and \ "selfRef" not in value: ret = self.set_self_ref(value) if ret < 0: return ret if isinstance(value, list) or isinstance(value, dict): ret = self._complete_href_links(parent_collection, value) if ret < 0: return ret return 0
def _extract_section_data(self, section_source): """ Given a section extract the relevant json for it :param section_source: the json source of the elasticsearch section docuement :return: the json representation of the section's content """ page_title = section_source['page_title'] section_title = section_source['section_title'] page_json = self._page_from_title(page_title) try: return jsonpath( page_json, "$..wikitext-json.." "[?(@.__type==\"section\" and @.title==\"{}\")]".format(section_title) ) except UnicodeEncodeError: return False
def get_parks(self): for document, dom in self.sections_with_content(self.query): least_once = False if not dom: self.processed(False) continue content = jsonpath(dom, "$..[?(@.__type==\"list_item\")].content") if not content: self.processed(False) continue for name in self._get_park_names(content): yield document['page_title'], name least_once = True self.processed(least_once)
def __call__(self, request_context, env): global dsl_debug root = None p = request_context l = len(self.access_path) for i in range(l): n = self.access_path[i] if n in p: p = p[n] else: p = None break if self.use_env and self.env_param in env and self.field_name in env[self.env_param]: p = env[self.env_param][self.field_name] elif self.field_name is not None and p is not None and self.field_name in p: p = p[self.field_name] root = p if self.is_json: if dsl_debug: print "jsonpath context: ", root print "\tpath: ", self.value root = jsonpath.jsonpath(root, self.value) if dsl_debug: print "\tjsonpath result: ", root print "\tpath: ", self.value if not root: root = None else: root = root[0] if dsl_debug: print "Call returns: " , root return root
def __parse_expression(self, expression, container=None): """ Parses the jsonpath expression in {self.output_results} and return its result """ if container is None: container = self.output_results expression = expression.strip() as_list = False if expression.endswith("as list"): as_list = True expression = expression.replace("as list", "").strip() try: results = jsonpath(container, expression) except Exception, e: print e raise RuntimeError("Error when parsing the expression {}".format(expression))
def assertFunc(entity, jpath, func_name, val, dctAssert=None): err = 0; message = '(%s %s %s)' % (jpath, func_name, val) ret = jsonpath.jsonpath(entity, jpath) if not ret: err = 1 if not err: ret = ret[0] if isinstance(val, basestring): val = val.replace('"', '\"') good = eval('ret %s "%s"' % (func_name, val)) else: good = eval('ret %s %s' % (func_name, val)) err = not good and 1 or 0 if dctAssert: dctAssert['errnum'] += err dctAssert['asserts'].append('%s:%s' % (message, err==0)) return ret
def parse(self, response): # 打开数据库连接 db = pymysql.Connect(host='192.168.1.22', port=7306, user='******', passwd='123456', db='joom', charset='utf8') # 使用cursor()方法获取操作游标 cursor = db.cursor() if response.status == 404: order_id = str( re.compile(r"(?<==)(.+?)\b").search( response.request.url).group(0)) sql = "INSERT INTO shipping(order_id, tracking_true) \ VALUES ('%s', %s)" % \ (order_id, 0) else: shipping_re = json.loads(response.text) # print(shipping_re) # if shipping_re['code'] == 1000 : # sql = "INSERT INTO shipping(order_id, tracking_true, delivered, depth, tracking_id, trackingNumber, arrived, passedCustoms) \ # VALUES ('%s',%s,%s,'%s','%s','%s',%s,%s)" % \ # (str(re.compile(r"(?<==)(.+?)\b").search(response.request.url).group(0)), True, False, 10, '0', '0', False, '0') # else: # print(shipping_re) # 处理数据 order_id = str( re.compile(r"(?<==)(.+?)\b").search( response.request.url).group(0)) # 如果有结果返回的类型是list 否则 为false tracking_true = 'True' if type( jsonpath.jsonpath( shipping_re, '$..checkpoints[0].depth')) == list else 'False' # tracking_ture = type(jsonpath.jsonpath(re,'$..checkpoints[0].depth')) depth = jsonpath.jsonpath( shipping_re, '$..checkpoints[-1:].depth' )[0] if tracking_true == 'True' else 'False' delivered = shipping_re['data']['delivered'] tracking_id = str(shipping_re['data']['id']) trackingNumber = str(shipping_re['data']['trackingNumber']) arrived = shipping_re['data']['arrived'] passedCustoms = shipping_re['data']['passedCustoms'] # print(order_id, tracking_true, delivered, depth, tracking_id, trackingNumber, arrived, passedCustoms) # SQL 插入语句 sql = "INSERT INTO shipping(order_id, tracking_true, delivered, depth, tracking_id, trackingNumber, arrived, passedCustoms) \ VALUES ('%s',%s,%s,'%s','%s','%s',%s,%s)" % \ (order_id, tracking_true, delivered, depth, tracking_id, trackingNumber, arrived, passedCustoms) # print(sql) try: # 执行sql语句 cursor.execute(sql) # 执行sql语句 db.commit() print(order_id + "订单物流信息已插入") except pymysql.Error as e: # 发生错误时回滚 print("错误代码 %d: %s" % (e.args[0], e.args[1])) # print('Got error {!r}, errno is {}'.format(e, e.args[0])) print(sql) db.rollback() print(order_id + "出现错误") # 关闭数据库连接 db.close()
listDict = [{"city": "北京"}, {"name": "小明"}] strlist = json.dumps(listDict, ensure_ascii=False) print type(strlist) # <type 'str'> # 写数据 with open("listDict.json", 'w') as file: file.write(strlist) # 2、第二种存储字典和数组 listStr = [{"city": "北京"}, {"name": "大刘"}] json.dump(listStr, open("listStr.json", "w"), ensure_ascii=False) dictStr = {"city": "北京", "name": "大刘"} json.dump(dictStr, open("dictStr.json", "w"), ensure_ascii=False) time.sleep(1) # ------------ 从文件里面取数据 --------- dictList = json.load(open("listDict.json", 'r')) # 输出北京 print dictList[0]["city"] # ------------ 读出字典loads ---------- strDict = '{"city": "北京", "name": "大猫"}' # <type 'dict'> print type(json.loads(strDict)) jsonobj = json.loads(strDict) # 从根节点开始,匹配name节点 citylist = jsonpath.jsonpath(jsonobj, '$..name') print citylist[0].encode('utf-8')
import requests import json import jsonpath url = "https://reqres.in/api/users/2" # Read Json input file file = open( '/Users/ivoitkiv/PycharmProjects/APIAutomation/CreatingNewUser.json', 'r') json_input = file.read() request_json = json.loads(json_input) print(request_json) #Send Put Request response = requests.put(url, request_json) #Response validation assert response.status_code == 200 #print(response.headers.get('Content-Length')) json_response = json.loads(response.text) #id = jsonpath.jsonpath(json_response,'id') #print(id[0]) updated_li = jsonpath.jsonpath(json_response, 'updatedAt') print(updated_li[0])
data = { 'offset': '{}'.format(num), 'limit': '20', 'is_refresh': '1', 'city_name': '{}'.format(a) } try: resp_json = srequest.post(url=url, data=data, headers=headers, verify=False).json() except: continue # resp_json = srequest.post(url=url, data=data, headers=headers).json() time.sleep(1.5) series_id_list = jsonpath.jsonpath(resp_json, '$..id') for series_id in series_id_list: # print(series_id) spec_link = 'https://www.dcdapp.com/motor/car_page/m/v1/series_all_json/?series_id={}&city_name={}&show_city_price=1&m_station_dealer_price_v=1'.format( series_id, a) print('spec_link', spec_link) try: spec_data = srequest.get( url=spec_link, headers=headers, verify=False).json(strict=False)['data'] time.sleep(1.5) # 车系名称 series_name = jsonpath.jsonpath(spec_data, '$[online]..series_name')
from pyecharts.charts import Map #地图matplotlib :静态图 from pyecharts import options as opts #配置项 from demo1 import nameMap #自己写的模块 引入 #爬取全球疫情数据 #数据源 url='https://api.inews.qq.com/newsqa/v1/automation/foreign/country/ranklist' resp = requests.post(url).text #get post 对网址post请求 # print(resp) #字符串 data = json.loads(resp) #string——dict 方便后期提取内容 # print(type(data)) #1提取国家名字 病死率数量 name=jsonpath.jsonpath(data,"$..name") #从网页源代码提取名字 print(name) # #病死率数量 confirm=jsonpath.jsonpath(data,"$..confirm") #提取数据 print(confirm) # 整理数据 zip a = list(zip(name,confirm)) print(a) # 可视化地图分析 map_ = Map(opts.InitOpts(width='1200px',height='600px')).add(series_name="世界各国的病死率", data_pair = a,#输入数据
def parse(self, response): data_dict = json.loads(response.body) try: flight_list = jsonpath(data_dict, '$..departures')[0] except: # self.token_flag = True logging.info('# invalid airport') print '# flight_list ', response.text return self.proxy_flag = False datas = response.meta.get('invalid') if len(flight_list) == 0: datas = response.meta.get('invalid') logging.info('# no flight: ' + datas.get('depAirport') + datas.get('arrAirport') + datas.get('date')) self.task.append(response.meta.get('invalid')) for data in flight_list: # 中转 if len(data.get('legs')) > 1: logging.info('is change') continue flight_info = data.get('legs')[0] flight_number = str(flight_info.get('flightNumber')).replace( ' ', '') carrier = flight_number[0:2] if carrier != 'TR': print '# other airline' continue # 2018-12-31T00: 55: 00 dep_time = time.mktime( time.strptime(flight_info.get('departureDateTime'), '%Y-%m-%dT%H:%M:%S')) arr_time = time.mktime( time.strptime(flight_info.get('arrivalDateTime'), '%Y-%m-%dT%H:%M:%S')) dep_airport = flight_info.get('departure') arr_airport = flight_info.get('arrival') price_info_list = data.get('fareClasses') price_info = price_info_list[0] try: adult_price = float(price_info.get('price').get('amount')) except: print '### price error: ' + datas.get( 'depAirport') + datas.get('arrAirport') + datas.get('date') continue currency = price_info.get('price').get('currency') net_fare = adult_price cabin = price_info.get('productCode') max_seats = int(data.get('journeyInfo').get('seatLeft')) adult_tax = 0 is_change = 1 segments_data = '' for i in price_info_list: if i.get('name') == 'FlyBag': segments_data = i break segments = [] try: if segments_data != '': segments.append( [segments_data.get('price').get('amount'), max_seats]) else: segments = [[0, 0]] except: print '### segments price error: ' + datas.get( 'depAirport') + datas.get('arrAirport') + datas.get('date') continue item = SpidersHynItem() item.update( dict(f=flight_number, d=dep_time, a=arr_time, fc=self.port_city.get(dep_airport, dep_airport), tc=self.port_city.get(arr_airport, arr_airport), c=currency, ap=adult_price, at=adult_tax, n=net_fare, m=max_seats, cb=cabin, cr=carrier, i=is_change, s=json.dumps(segments), g=time.time(), da=dep_airport, aa=arr_airport)) yield item
def treating_data(self, is_token, file_var, file_path, parameters, dependent, data, save_response_dict): #使用哪个header if is_token == '': header = self.no_token_header else: header = self.token_header #if file_var != '' and file_path != '': #调上传文件接口用 # self.token_header['Content-Type'] = "multipart/form-data" #else: # del self.token_header['Content-Type'] logger.info(f'处理依赖前data的数据:{data}') #处理依赖数据data if dependent != '': if dependent.find('={') != -1: dependent_key = dependent.split('=')[0] dependent_value = dependent.split('=')[1] #dependent_data = {dependent_key:save_response_dict.read_depend_data(dependent_value)} dependent_data = json.loads(dependent_value) else: dependent_data = save_response_dict.read_depend_data(dependent) logger.debug(f'依赖数据解析获取的字典{dependent_data}') if parameters != '' and data != '': data = json.loads(data) exists_key = False parameters_list = parameters.split('/') for dk, dv in dependent_data.items(): for pl in parameters_list: if pl == dk: if isinstance(dv, int): dv = str(dv) parameters = parameters.replace(pl, dv) logger.info(f'parameters有数据,依赖有数据时{parameters}') #处理data与依赖中有相同key的问题,目前支持列表,字典,本地列表形式调试通过,需要在定义时,data中该key定义成列表 #实例{"id":[1],"user":{"username":"******"}} for k, v in data.items(): for dk, dv in dependent_data.items(): if k == dk: if isinstance(data[k], list): data[k].append(dv) if isinstance(data[k], dict): data[k].update(dv) if isinstance(data[k], int): data[k] = dv if isinstance(data[k], str): data[k] = dv exists_key = True if exists_key is False: #合并组成一个新的data dependent_data.update(data) data = dependent_data logger.info(f'data有数据,依赖有数据时{data}') elif parameters != '' and data == '': #实例/id/name/num parameters_list = parameters.split('/') for dk, dv in dependent_data.items(): for pl in parameters_list: if pl == dk: if isinstance(dv, int): dv = str(dv) parameters = parameters.replace(pl, dv) logger.info(f'parameters有数据,依赖有数据时{parameters}') elif data != '' and parameters == '': data = json.loads(data) exists_key = False #处理data与依赖中有相同key的问题,目前支持列表,字典,本地列表形式调试通过,需要在定义时,data中该key定义成列表 #实例{"id":[1],"user":{"username":"******"}} for k, v in data.items(): for dk, dv in dependent_data.items(): if k == dk: print(type(data[k])) if isinstance(data[k], list): data[k].append(dv) if isinstance(data[k], dict): data[k].update(dv) if isinstance(data[k], int): #自加1 data[k] = dv + 1 if isinstance(data[k], str): #为用例而加,双主键 data[k] = dv + str(random.randint(0, 10000)) exists_key = True if exists_key is False: #合并组成一个新的data dependent_data.update(data) data = dependent_data logger.info(f'data有数据,依赖有数据时{data}') else: #赋值给data data = dependent_data logger.info(f'data无数据,依赖有数据时{data}') else: if data == '': data = None logger.info(f'data无数据,依赖无数据{data}') else: try: data = json.loads(data) logger.info(f'data有数据,依赖无数据{data}') except JSONDecodeError as e: logger.error(f'data格式有误,请检查数据格式{e}') #处理路径参数Path的依赖 #传进来的参数类似{"case_002“:”$.data.id"}/item/{"case_002":"$.meta.status"},进行列表拆分 path_list = parameters.split('/') #获取列表长度迭代 for i in range(len(path_list)): try: #尝试序列化dict: json.loads('2') 可以转换成2 path_dict = json.loads(path_list[i]) except JSONDecodeError as e: #序列化失败此path_list[i]的值不变化 logger.error(f'无法转换字典,进入下一个检查,本轮值不发生变化:{path_list},{e}') #跳过进入下次循环 continue else: #解析该字典,获取用例编号,表达式 logger.info(f'获得字典信息:{path_dict}') #处理json.loads('数字')正常序列化导致的AttributeError try: for k, v in path_dict.items(): try: #尝试从对应的case实际响应提取某个字段内容 #path_list[i] = jsonpath.jsonpath(json.dumps(save_response_dict.actual_response[k]),v)[0] path_list[i] = jsonpath.jsonpath( save_response_dict.actual_response[k], v) if isinstance(path_list[i], list): path_list[i] = path_list[i][0] except TypeError as e: logger.error(f'无法提取,请检查响应字典中是否支持该表达式.{e}') except AttributeError as e: logger.error( f'类型错误:{type(path_list[i])},本次将不转换值{path_list[i]},{e}') #字典中存在有不是str的元素:使用map转换在全字符串的列表 path_list = list(map(str, path_list)) #将字符串列表转换成字符:500/item/200 parameters_path_url = "/".join(path_list) logger.info(f'path路径参数解析依赖后的路径为{parameters_path_url}') return data, header, parameters_path_url
def existing_bookings(): url = 'https://restful-booker.herokuapp.com/booking' response = (requests.get(url)).json() for booking in response: bookingids = jsonpath.jsonpath(response, '$.[bookingid]') return list(bookingids)
def extract_json_field(resp, json_field): value = jsonpath.jsonpath(resp.json(), json_field) return value[0]
# Provides the list of currently existing bookings def existing_bookings(): url = 'https://restful-booker.herokuapp.com/booking' response = (requests.get(url)).json() for booking in response: bookingids = jsonpath.jsonpath(response, '$.[bookingid]') return list(bookingids) BOOKING = random.choice(existing_bookings()) UPDATE = 'Edited' # GET Pre-request: Takes booking and its firstname get_response = requests.get(URL.format(BOOKING)) get_firstname = jsonpath.jsonpath(get_response.json(), '$.firstname')[0] def test_update_booking(token): # PUT: Updates booking headers = { 'Content-Type': 'application/json', 'Cookie': 'token=' + str(token) } put_data = json.dumps({ "firstname": "{}".format(UPDATE), "lastname": "Brown", "totalprice": 111, "depositpaid": True, "bookingdates": { "checkin": "2018-01-01",
def covid_19_163(indicator: str = "实时") -> pd.DataFrame: """ 网易-新冠状病毒 https://news.163.com/special/epidemic/?spssid=93326430940df93a37229666dfbc4b96&spsw=4&spss=other&#map_block https://news.163.com/special/epidemic/?spssid=93326430940df93a37229666dfbc4b96&spsw=4&spss=other& :return: 返回指定 indicator 的数据 :rtype: pandas.DataFrame """ url = "https://c.m.163.com/ug/api/wuhan/app/data/list-total" headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36", } payload = { "t": int(time.time() * 1000), } r = requests.get(url, params=payload, headers=headers) data_json = r.json() # data info url = "https://news.163.com/special/epidemic/" r = requests.get(url, headers=headers) soup = BeautifulSoup(r.text, "lxml") data_info_df = pd.DataFrame([ item.text.strip().split(".")[1] for item in soup.find("div", attrs={ "class": "data_tip_pop_text" }).find_all("p") ]) data_info_df.columns = ["info"] # 中国历史时点数据 hist_today_df = pd.DataFrame( [item["today"] for item in data_json["data"]["chinaDayList"]], index=[item["date"] for item in data_json["data"]["chinaDayList"]]) # 中国历史累计数据 hist_total_df = pd.DataFrame( [item["total"] for item in data_json["data"]["chinaDayList"]], index=[item["date"] for item in data_json["data"]["chinaDayList"]]) # 中国实时数据 current_df = pd.DataFrame.from_dict(data_json["data"]["chinaTotal"]) # 世界历史时点数据 outside_today_df = pd.DataFrame( [item["today"] for item in data_json["data"]["areaTree"]], index=[item["name"] for item in data_json["data"]["areaTree"]]) # 世界历史累计数据 outside_total_df = pd.DataFrame( [item["total"] for item in data_json["data"]["areaTree"]], index=[item["name"] for item in data_json["data"]["areaTree"]]) # 全球所有国家及地区时点数据 all_world_today_df = pd.DataFrame( jsonpath.jsonpath(data_json["data"]["areaTree"], '$..today'), index=jsonpath.jsonpath(data_json["data"]["areaTree"], '$..name')) # 全球所有国家及地区累计数据 all_world_total_df = pd.DataFrame( jsonpath.jsonpath(data_json["data"]["areaTree"], '$..total'), index=jsonpath.jsonpath(data_json["data"]["areaTree"], '$..name')) # 中国各地区时点数据 area_total_df = pd.DataFrame( [ item["total"] for item in data_json["data"]["areaTree"][0]["children"] ], index=[ item["name"] for item in data_json["data"]["areaTree"][0]["children"] ]) # 中国各地区累计数据 area_today_df = pd.DataFrame( [ item["today"] for item in data_json["data"]["areaTree"][0]["children"] ], index=[ item["name"] for item in data_json["data"]["areaTree"][0]["children"] ]) # 疫情学术进展 url_article = "https://vip.open.163.com/api/cms/topic/list" payload_article = { "topicid": "00019NGQ", "listnum": "1000", "liststart": "0", "pointstart": "0", "pointend": "255", "useproperty": "true" } r_article = requests.get(url_article, params=payload_article) article_df = pd.DataFrame(r_article.json()["data"]).iloc[:, 1:] # 咨询 url_info = "https://ent.163.com/special/00035080/virus_report_data.js" payload_info = { "_": int(time.time() * 1000), "callback": "callback", } r_info = requests.get(url_info, params=payload_info, headers=headers) data_info_text = r_info.text data_info_json = demjson.decode(data_info_text.strip(" callback(")[:-1]) if indicator == "数据说明": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return data_info_df if indicator == "中国实时数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return current_df if indicator == "中国历史时点数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return hist_today_df if indicator == "中国历史累计数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return hist_total_df if indicator == "世界历史时点数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return outside_today_df if indicator == "世界历史累计数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return outside_total_df if indicator == "全球所有国家及地区时点数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return all_world_today_df elif indicator == "全球所有国家及地区累计数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return all_world_total_df elif indicator == "中国各地区时点数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return area_today_df elif indicator == "中国各地区累计数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return area_total_df elif indicator == "疫情学术进展": return article_df elif indicator == "实时资讯新闻播报": return pd.DataFrame(data_info_json["list"]) elif indicator == "实时医院新闻播报": return pd.DataFrame(data_info_json["hospital"]) elif indicator == "前沿知识": return pd.DataFrame(data_info_json["papers"]) elif indicator == "权威发布": return pd.DataFrame(data_info_json["power"]) elif indicator == "滚动新闻": return pd.DataFrame(data_info_json["scrollNews"])
# Reading data from the file file = open('E:\\API_automation\\GET_Request\\CreateUser.json', 'r') json_input = file.read() #Converting into Json format requests_json = json.loads(json_input) #For POST request we need to pass JSON as input #print(requests_json) #Make POST request with Json Input body response = requests.post(url, requests_json) print(response.content) #Validating response code. assert response.status_code == 201, "Actual status code is different" # Fetch header from response #print(response.headers) # This prints all the header details present. print( response.headers.get("Content-Length") ) # This gives information about the particular header, here content-length # Parse response to Json Format response_json = json.loads(response.text) #Pick Id using Json Path id = jsonpath.jsonpath(response_json, 'id') # This will return the list. print(id[0]) # Print(id)
def parse_page(self, response): url = response.url keywords = response.meta['keywords'] r = requests.get(url, headers=DEFAULT_REQUEST_HEADERS) sel = etree.HTML(r.text) content = sel.xpath('/html/body/script[1]/text()') content = re.sub('window._sharedData = ', '', content[0]).rstrip(';') data = json.loads(content) end_cursor = jsonpath( data, '$.entry_data.TagPage[0].graphql.hashtag.edge_hashtag_to_media.page_info.end_cursor' )[0] has_next_page = jsonpath( data, '$.entry_data.TagPage[0].graphql.hashtag.edge_hashtag_to_media.page_info.has_next_page' )[0] print(has_next_page) posts = jsonpath( data, '$.entry_data.TagPage[0].graphql.hashtag.[edge_hashtag_to_media,edge_hashtag_to_top_posts].edges[*]' ) for post in posts: self.item['keywords'] = keywords self.item['type'] = jsonpath(post, '$.node.__typename')[0] try: self.item['img_description'] = ''.join( jsonpath(post, '$.node.accessibility_caption')) except TypeError as e: self.item['img_description'] = '' self.item['cover_height'] = jsonpath(post, '$.node.dimensions.height')[0] self.item['cover_width'] = jsonpath(post, '$.node.dimensions.width')[0] self.item['cover_link'] = jsonpath(post, '$.node.display_url')[0] self.item['liked_count'] = jsonpath( post, '$.node.edge_liked_by.count')[0] try: self.item['content'] = ''.join( jsonpath( post, '$.node.edge_media_to_caption.edges[*].node.text')) except Exception as e: pass self.item['comment_count'] = jsonpath( post, '$.node.edge_media_to_comment.count')[0] self.item['post_id'] = jsonpath(post, '$.node.id')[0] self.item['post_url'] = jsonpath(post, '$.node.shortcode')[0] self.item['post_link'] = self.start_urls[0] + 'p/' + self.item[ 'post_url'] + '/' self.item['user_id'] = jsonpath(post, '$.node.owner.id')[0] self.item['is_video'] = jsonpath(post, '$.node.is_video')[0] timestamp = jsonpath(post, '$.node.taken_at_timestamp')[0] self.item['pub_time'] = time.strftime("%Y-%m-%d", time.localtime(timestamp)) yield self.item if has_next_page: variable_dict = { "tag_name": keywords, "first": 12, "after": end_cursor } variable_json = json.dumps(variable_dict) print(variable_dict) params = { 'query_hash': '174a5243287c5f3a7de741089750ab3b', 'variables': variable_json } url = 'https://www.instagram.com/graphql/query/?{}'.format( urlencode(params)) yield Request(url=url, meta={ 'keywords': keywords, 'break_count': 1 }, callback=self.parse_nextpage)
def check(Case, Response): ''' 检查返回结果 :param Case: :param Response: :return: ''' if Case['Out']['type'] == 'key': ''' 校验返回的指定key的value值 ''' isPass = True for keyOut in Case['Out']: if keyOut == 'type': pass else: if '$' in keyOut: Log.print_info(2, "use Jsonpath") res = jsonpath(Response, keyOut) if res: Error = numpy.where( Case['Out'][keyOut] in res, 'Pass', 'expect:{0},Actual:{1},Res:{2}'.format( Case['Out'][keyOut], res, Response)) else: Error = 'Failed,{0} 查找不到匹配 key'.format(keyOut) else: Error = numpy.where( Case['Out'][keyOut] == Response[keyOut], 'Pass', 'expect:{0},Actual:{1},Res:{2}'.format( Case['Out'][keyOut], Response[keyOut], Response)) if 'Pass' != Error: isPass = False Log.print_info(1, 'INSTRUMENTATION_STATUS: result=Failed') Log.print_info( 1, 'INSTRUMENTATION_STATUS: log={0}'.format(Error)) break if isPass: Log.print_info(1, 'INSTRUMENTATION_STATUS: result=Pass') Log.print_info(1, 'INSTRUMENTATION_STATUS: log={0}'.format('null')) elif Case['Out']['type'] == 'type': ''' 校验返回结果指定key的type数据类型 ''' isPass = True for keyOut in Case['Out']: if keyOut == 'type': pass else: if Case['Out'][keyOut] == 'int': expect_type = int elif Case['Out'][keyOut] == 'str': expect_type = str elif Case['Out'][keyOut] == 'dict': expect_type = dict elif Case['Out'][keyOut] == 'list': expect_type = list elif Case['Out'][keyOut] == 'bool': expect_type = bool elif Case['Out'][keyOut] == 'none': expect_type = type(None) Error = numpy.where( expect_type == type(Response[keyOut]), 'Pass', 'expect:{0},Actual:{1},Res:{2}'.format( Case['Out'][keyOut], type(Response[keyOut]), Response)) if 'Pass' != Error: isPass = False Log.print_info(1, 'INSTRUMENTATION_STATUS: result=Failed') Log.print_info( 1, 'INSTRUMENTATION_STATUS: log={0}'.format(Error)) break if isPass: Log.print_info(1, 'INSTRUMENTATION_STATUS: result=Pass') elif Case['Out']['type'] == 'file': ''' 校验整体返回 ''' Local = os.getcwd() if Local.endswith('API_test_Pro'): pass else: Local = Local.split('API_test_Pro')[0] + 'API_test_Pro' file = open('{1}/Entry/Result/{0}'.format(Case['CaseName'], Local), 'w') file.write(str(Response)) file.close() Actual = getMD5('{1}/Entry/Result/{0}'.format(Case['CaseName'], Local)) expect = getMD5('{1}/Utils/Result/{0}'.format(Case['Out']['file'], Local)) Error = numpy.where(Actual == expect, 'Pass', '数据不匹配,检查返回:{0}'.format(Response)) if 'Pass' != Error: Log.print_info(1, 'INSTRUMENTATION_STATUS: result=Failed') Log.print_info(1, 'INSTRUMENTATION_STATUS: log={0}'.format(Error)) else: Log.print_info(1, 'INSTRUMENTATION_STATUS: result=Pass') os.remove('{1}/Entry/Result/{0}'.format(Case['CaseName'], Local))
def parse_nextpage(self, response): try: keywords = response.meta['keywords'] break_count = int(response.meta['break_count']) r = requests.get(response.url) time.sleep(1) data = r.json() end_cursor = jsonpath( data, '$.data.hashtag.edge_hashtag_to_media.page_info.end_cursor')[0] has_next_page = jsonpath( data, '$.data.hashtag.edge_hashtag_to_media.page_info.has_next_page' )[0] posts = jsonpath(data, '$.data.hashtag.edge_hashtag_to_media.edges[*]') for post in posts: self.item['keywords'] = keywords self.item['type'] = jsonpath(post, '$.node.__typename')[0] try: self.item['img_description'] = ''.join( jsonpath(post, '$.node.accessibility_caption')) except TypeError as e: self.item['img_description'] = '' try: self.item['content'] = ''.join( jsonpath( post, '$.node.edge_media_to_caption.edges[*].node.text')) except TypeError as e: self.item['content'] = '' self.item['cover_height'] = jsonpath( post, '$.node.dimensions.height')[0] self.item['cover_width'] = jsonpath( post, '$.node.dimensions.width')[0] self.item['cover_link'] = jsonpath(post, '$.node.display_url')[0] self.item['liked_count'] = jsonpath( post, '$.node.edge_liked_by.count')[0] self.item['comment_count'] = jsonpath( post, '$.node.edge_media_to_comment.count')[0] self.item['post_id'] = jsonpath(post, '$.node.id')[0] self.item['post_url'] = jsonpath(post, '$.node.shortcode')[0] self.item['post_link'] = self.start_urls[0] + 'p/' + self.item[ 'post_url'] + '/' self.item['user_id'] = jsonpath(post, '$.node.owner.id')[0] self.item['is_video'] = jsonpath(post, '$.node.is_video')[0] timestamp = jsonpath(post, '$.node.taken_at_timestamp')[0] self.item['pub_time'] = time.strftime( "%Y-%m-%d", time.localtime(timestamp)) yield self.item logging.error('{},{}.'.format(has_next_page, end_cursor)) if has_next_page and break_count < 200: break_count += 1 print( '********************************************************************************************************************************' ) print(break_count) logging.error('break_count:{}.'.format(break_count)) variable_dict = { "tag_name": keywords, "first": 12, "after": end_cursor } variable_json = json.dumps(variable_dict) params = { 'query_hash': '174a5243287c5f3a7de741089750ab3b', 'variables': variable_json } url = 'https://www.instagram.com/graphql/query/?{}'.format( urlencode(params)) yield Request(url=url, meta={ 'keywords': keywords, 'break_count': break_count }, callback=self.parse_nextpage) except Exception as e: logging.error('{},{}.'.format(e, response.url))
"isbn": "0-395-19395-8", "price": 22.99 }], "bicycle": { "color": "red", "price": 19.95 } } } from jsonpath import jsonpath """jsonpath:用来解析多层嵌套的json数据""" # 1. 安装:pip install jsonpath # 2. 用法:jsonpath(要被提取的python数据类型, '提取的规则') print(jsonpath(book_dict, '$..author')) # 返回值为列表,如果取不到,返回False print(jsonpath(book_dict, '$.store.bicycle.price')) """ $.store.book[*].author store中的所有的book的作者 $..author 所有的作者 $.store.* store下的所有的元素 $.store..price store中的所有的内容的价格 $..book[2] 第三本书 $..book[(@.length-1)] | $..book[-1:] 最后一本书 $..book[0,1] | $..book[:2] 前两本书 $..book[?(@.isbn)] 获取有isbn的所有数 $..book[?(@.price<10)] 获取价格大于10的所有的书 $..* 获取所有的数据 """
def generated_datas(data, sent_data=None): """ 存在多个依赖数据时,循环合成新的数据 """ if sent_data == None: temp_data = {} for i in data.split(","): for key, value in generated_data(i).items(): temp_data[key] = value else: temp_data = sent_data for i in data.split(","): for key, value in generated_data(i).items(): temp_data[key] = value return temp_data if __name__ == '__main__': data = "case_001>data.token" data1 = "case_001>data.token,case_001>data" data2 = "case_001>data" #print(split_data(data)) #print(split_key(data2)) #data3={"111":111} #print(generated_datas(data1,data3)) print(depend_data(data)) print(type(json.loads(depend_data(data)))) print(jsonpath.jsonpath(json.loads(depend_data(data)), "$.data.token"))
} max_id = "" while True: if max_id == "": url = "https://m.weibo.cn/comments/hotflow?id=4485613145089303&mid=4485613145089303&max_id_type=0" else: url = "https://m.weibo.cn/comments/hotflow?id=4485613145089303&mid=4485613145089303&max_id={}&max_id_type=0".format( max_id) response1 = requests.get(url=url, headers=headers) if response1.status_code != 200: break print("请求成功:{}".format(response1.status_code)) response = response1.json() names = jsonpath.jsonpath(response, "$..screen_name") ids = jsonpath.jsonpath(response, "$..user.id") max_id = jsonpath.jsonpath(response, "$..max_id")[0] print("请求的是:{}".format(max_id)) texts = jsonpath.jsonpath(response, "$..text") textss = [] for i in texts: a = i.split("<", 1)[0] textss.append(a) for id, name, text in zip(ids, names, textss): item1 = {"id": id, "name": name, "text": text} client.weibo.pinglun.insert(item1) print("写入成功") client.close() time.sleep(3)
month = int(input('输入月份:')) date1 = int(input('输入起始日期:')) date2 = int(input('输入结束日期:')) if month < 10: month = '0' + str(month) for date in range(date1, date2 + 1): if date < 10: date = '0' + str(date) url = 'https://api.sharkshopping.com/ec/api?method=tv.program.data&appid=webapp&token=&version=4.4.1&source=wap&city_num=310100&brand_id=&date=' + str( year) + '-' + str(month) + '-' + str(date) + '&cat_id=' print(url) res = requests.get(url) json_data = demjson.decode(res.text) start_time_list = jsonpath(json_data, '$..start_time') end_time_list = jsonpath(json_data, '$..end_time') sku_list = jsonpath(json_data, '$..sku') name_list = jsonpath(json_data, '$..name') price_list = jsonpath(json_data, '$..price') product_brand_list = jsonpath(json_data, '$..product_brand') start_time_list = [i for i in start_time_list if i != 0] end_time_list = [i for i in end_time_list if i != 0] list_content = [ start_time_list, end_time_list, sku_list, name_list, price_list, product_brand_list ] df = pd.DataFrame(list_content) df = df.T
import sys sys.path.append('G:\\framework_practicsse\\apiautomation') print(sys.path) from helpers import crudAPI from utility import Config import json import jsonpath gurl = Config.readConfigData("APIDetails", "get_url") a = crudAPI.hitgetApi("get", gurl) #print(a) json_response = json.loads(a) print(json_response) x = jsonpath.jsonpath(json_response, 'total') print(x) assert x[0] == 12
import requests import json import jsonpath url = 'https://reqres.in/api/users/2' data_dict = {"name": "Sanjay Singh Panwar", "job": "leader"} response = requests.put(url, data_dict) print(response) assert response.status_code == 200 json_response = json.loads(response.text) print json_response updated_time = jsonpath.jsonpath(json_response, 'updatedAt') print updated_time[0]
# village = "zhjy" # village_name = "中虹家园" # location = '&location=121.672462,31.28271' # 中虹家园 village = "dxy" village_name = "丁香园" location = '&location=121.415404,31.14585' # 丁香园 keywords = '&keywords=亲子' type = '&types=' other = '&radius=3000&offset=20&page=1&extensions=base' amap_url = 'http://restapi.amap.com/v3/place/around?key=eb38430327c843a503698c6eb015ec48' + location + keywords + type + other page = urllib2.urlopen(amap_url) data = json.load(page) citylist = jsonpath.jsonpath(data, '$..pois') for city in citylist: i = 0 while i < len(city): distance = jsonpath.jsonpath(city[i], '$..distance')[0] name = jsonpath.jsonpath(city[i], '$..name')[0] type = jsonpath.jsonpath(city[i], '$..type')[0] typecode = jsonpath.jsonpath(city[i], '$..typecode')[0] address = jsonpath.jsonpath(city[i], '$..address')[0] location = jsonpath.jsonpath(city[i], '$..location')[0] tel = jsonpath.jsonpath(city[i], '$..tel')[0] if tel==[]: tel='' print(tel) i += 1 # rank_date=time.strptime(str(rank_date),"%Y-%m-%d")
def getcms(keyword): # , language, resPeople result = keyWordsCollection.find_one({"originKey": keyword}) language = result["language"] resPeople = result["resPeople"] part = result["part"] station = result["station"] # 改变关键词获取状态 # if " " in keyword: # updateStatusKeyWord(keyword, part) # return word = words.get(language) if not word: updateStatusKeyWord(keyword, part) logging.error("没有匹配的语言:{}".format(language)) return keywordnew = "inurl:telegram.me " + keyword keywordnew = keywordnew.replace(" ", "%20") url = "http://api.serpprovider.com/5bfdf4cd7d33d1d77b9875d1/google/en-us/{}/{}".format( word, keywordnew) logging.info("请求数据,关键字:{},url:{}".format(keywordnew, url)) html = sendRequest(url) # 请求 try: datas = json.loads(html) except Exception as e: return reslist = jsonpath.jsonpath(datas, "$..res") if reslist: reslist = reslist[0] else: logging.error("google搜索后没有数据:{}".format(url)) updateStatusKeyWord(keyword, part) return if not reslist: updateStatusKeyWord(keyword, part) logging.error("google搜索后没有数据:{}".format(url)) return for data in reslist: url = data["url"] # 协议 scheme = urlparse(data['url']).scheme # 域名 domain = urlparse(data['url']).netloc if not scheme or not domain: continue link = scheme + '://' + domain # 拼接链接 if domain != "telegram.me": logging.error("域名不为telegram.me :{}".format(domain)) continue # 此时域名已经是telegram.me if url.split("telegram.me")[-1] == "/": logging.error("url为{}".format(url)) continue if url.endswith("telegram.me"): logging.error("url为{}".format(url)) continue url = link + url.split("telegram.me")[-1] if url.endswith("/"): url = url[:-1] if url in urlList: logging.warn("该地址已经获取,存在缓存中,url:{}".format(url)) continue # 判断是否在数据库中 result = googleUrlCollection.find_one({"url": url, "part": part}) if result: logging.warn("该url已经获取,存在数据库中中,url:{}".format(url)) continue title = data['title'] # 获取标题 describition = data['desc'] # 获取描述 urlList.append(url) sourceUrl = data["url"] insertItem(domain, url, sourceUrl, scheme, keyword, language, resPeople, title, describition, word, part, station) updateStatusKeyWord(keyword, part)
import json import requests import jsonpath load = { 'id': 4296, 'language': ['python', 'selenium'], 'yearsexp': '2', 'lastused': '2019', 'st_id': '4296' } result = requests.get('http://thetestingworldapi.com/api/technicalskills/4182') print(result.status_code) rece = result.content jse = json.loads(rece) print(jse) print(jsonpath.jsonpath(jse, 'id'))
""" import requests import jsonpath headers = { "X-Lemonban-Media-Type": "lemonban.v2" } # 登录的请求 url = "http://api.lemonban.com/futureloan/member/login" data = { "mobile_phone": "13367899876", "pwd": "lemonban" } res = requests.post(url=url, json=data, headers=headers) data = res.json() print(data) # 提取token token = jsonpath.jsonpath(data, "$..token") # [0] print(token) # 提取token值 token_type = jsonpath.jsonpath(data, "$..token_type")[0] print(token_type) # 提取reg_name reg_name = jsonpath.jsonpath(data, "$..reg_name")[0] print(reg_name)
from jsonpath import jsonpath import requests from fake_useragent import UserAgent import json url = "https://www.lagou.com/lbs/getAllCitySearchLabels.json" headers = { "User-Agent": UserAgent().random } response = requests.get(url, headers=headers) names = jsonpath(json.loads(response.text), '$..name') codes = jsonpath(response.json(), '$..code') print(names) print(codes)
"SecurityGroup": [ { "CreationTime": "2016-10-27T01:49:17Z", "Tags": { "Tag": [] }, "SecurityGroupId": "sg-SecurityGroupId", "SecurityGroupName": "SecurityGroupName", "Description": "Description", "ResourceGroupId": "", "SecurityGroupType": "normal", "VpcId": "" } ] } } """ wanted_res_1 = jsonpath.jsonpath( json.loads(test_json_str), "$.SecurityGroups.SecurityGroup[0].SecurityGroupId") wanted_res_2 = jsonpath.jsonpath( json.loads(test_json_str), '$.SecurityGroups.SecurityGroup[?(@.Description=="Description")].SecurityGroupId' ) wanted_res_3 = jsonpath.jsonpath( json.loads(test_json_str), "$.SecurityGroups.SecurityGroup[*].SecurityGroupId")
def parse_data(self, spec_datas): # print(spec_datas) for spec_data in spec_datas: spec_id = jsonpath.jsonpath(spec_data, '$[specList]..specId') # print('车型id',spec_id) if spec_id == False: continue # 厂商id brand_id = jsonpath.jsonpath(spec_data, '$[specinfo]..fctid')[0] # print('品牌id',brand_id) # 厂商名称 brand_name = jsonpath.jsonpath(spec_data, '$[specinfo]..fctname')[0] # print('品牌名称',brand_name) # 品牌id contain_brand_id = jsonpath.jsonpath(spec_data, '$[specinfo]..brandid')[0] # print('品牌id',brand_id) # 品牌名称 contain_brand_name = jsonpath.jsonpath(spec_data, '$[specinfo]..brandname')[0] # print('品牌名称',brand_name) # 车系id series_id = jsonpath.jsonpath(spec_data, '$[specinfo]..seriesid')[0] # print('车系id',series_id) # 车系名称 category_fullname = jsonpath.jsonpath(spec_data, '$[specinfo]..seriesname')[0] # print('车系名称',category_fullname) """将车系名称与车型名称拼接""" # 车系名称 series_names = jsonpath.jsonpath(spec_data, '$[specList]..SeriesName') # 车型名称 spec_names = jsonpath.jsonpath(spec_data, '$[specList]..specName') # 将车系名称拼接到车型名称之前 spec_name_list = [] for i in range(0, len(series_names)): spec_name_list.append(series_names[i] + ' ' + spec_names[i]) # print('名称',spec_name_list) # 提取年款 拼接到名称最后 spec_name_list2 = [] for name in spec_name_list: s_name = "".join(re.findall(r'[0-9]{4}款*', name)) sp_name = name.replace(s_name + " ", '') spec_names = sp_name + s_name spec_name_list2.append(spec_names) # print(spec_name_list2) # 手动 spec_name_shou = [] for name in spec_name_list2: name1 = name.replace('手动', '手动 ') spec_name_shou.append(name1) # print(spec_name_shou) # 更改后 spec_name = [] for name in spec_name_shou: name1 = name.replace('自动', '自动 ') spec_name.append(name1) # print(spec_name) gc.collect() # 城市id city_id = jsonpath.jsonpath(spec_data, '$..cid')[0] # print('城市id', city_id) # 城市名称 city_name = jsonpath.jsonpath(spec_data, '$..cityName')[0] # print('城市名称',city_name) # 指导价 guidance_price = jsonpath.jsonpath( spec_data, '$[specList]..MinOriginalPrice') # print('指导价', guidance_price) # 现价 price = jsonpath.jsonpath(spec_data, '$[specList]..Price') # 降价 cut_price = (list(map(lambda x, y: x - y, guidance_price, price))) # 最后一次更新时间 last_sync_time = datetime.now().strftime("%Y-%m") a_last_sync_time = datetime.now().strftime("%Y-%m-%d") yield brand_id, brand_name, series_id, category_fullname, spec_id, spec_name, city_id, city_name, guidance_price, cut_price, price, contain_brand_id, contain_brand_name, last_sync_time, a_last_sync_time
def test_order_check_add(self): res = requests.get(url, params=None, headers=heads) msg = jsonpath.jsonpath(res.json(), '$.msg')[0] self.assertEqual(msg, '成功')
def get_all_data(self, url): html = self.get_html(url, 'get_all_data') if html is not None: html = json.loads(html) titles = jsonpath.jsonpath(html, "$..title") print 'titles ', titles url = jsonpath.jsonpath(html, "$..mediaUrl") descs = jsonpath.jsonpath(html, "$..description") if url: print url i = 0 for i in range(0, len(url)): download_url = 'https://static.wixstatic.com/media/' + url[ i] print download_url if titles: title = titles[0] subcat = re.sub(r'-png-\d+', '', title) if 'pngs' in subcat: subcat = subcat.replace('pngs', '') if 'icon' in subcat: subcat = subcat.replace('icon', '') if 'PNGs' in subcat: subcat = subcat.replace('PNGs', '') if 'PNG' in subcat: subcat = subcat.replace('PNG', '') if 'images' in subcat: subcat = subcat.replace('images', '') if 'image' in subcat: subcat = subcat.replace('image', '') if 'free' in subcat: subcat = subcat.replace('free', '') if 'Free' in subcat: subcat = subcat.replace('Free', '') if 'cutouts' in subcat: subcat = subcat.replace('cutouts', '') if 'cutout' in subcat: subcat = subcat.replace('cutout', '') if 'holiday' in subcat: subcat = subcat.replace('holiday', '') if 'collection' in subcat: subcat = subcat.replace('collection', '') if 'Letter' in subcat: subcat = subcat.replace('Letter', '') if 'Number' in subcat: subcat = subcat.replace('Number', '') if ',' in subcat: subcat = subcat.replace(',', ' ') if ':' in subcat: subcat = subcat.replace(':', ' ') if 'transparent' in subcat: subcat = subcat.replace('transparent', ' ') if 'ex' in subcat: subcat = subcat.replace('ex', ' ') subcat = subcat.strip() cat = self.get_cat_from_mysql(subcat) if descs: desc = descs[0].strip('- ') if '\\' in desc: desc = desc.replace('\\', '`') if cat: sql = 'insert into freepngs_pdts (cat,subcat,title,download_url,description) VALUES ("%s","%s","%s","%s","%s")' % ( cat, subcat, title, download_url, desc) else: sql = 'insert into freepngs_pdts (subcat,title,download_url,description) VALUES ("%s","%s","%s","%s")' % ( subcat, title, download_url, desc) else: if cat: sql = 'insert into freepngs_pdts (cat,subcat,title,download_url) VALUES ("%s","%s","%s","%s")' % ( cat, subcat, title, download_url) else: sql = 'insert into freepngs_pdts (subcat,title,download_url) VALUES ("%s","%s","%s")' % ( subcat, title, download_url) else: sql = 'insert into freepngs_pdts (download_url) VALUES ("%s")' % download_url i += 1 try: self.cursor.execute(sql) self.db.commit() print '插入成功' except Exception, e: self.db.rollback() print sql print str(e)