def test_one(): assert pyjq.one(". + $foo", "val", vars=dict(foo="bar")) == "valbar" # if got multiple elements with pytest.raises(IndexError): pyjq.one(".[]", [1, 2]) # if got no elements with pytest.raises(IndexError): pyjq.one(".[]", [])
def get_dataframe_zoe(url): global df_links df = pd.DataFrame({ 'Region': [], 'Version': [], 'Year': [], 'Mileage': [], 'Owner\'s phone': [], 'Price': [], 'Argus rating': [], 'Benefit Buyer': [] }) try: page = requests.get(url, headers=request_headers) soup = _handle_request_result_and_build_soup(page) reg_json = re.compile(r"xtMultC:(.*),") reg_num = re.compile(r"(\d+)") reg_phone = re.compile(r"[0-9 ]{2,}") json_tab = json.loads(str(reg_json.findall(page.text))[3:-2]) version = str(pyjq.one(".\"16\"", json_tab))[1:-1] mileage = str(pyjq.one(".\"33\"", json_tab))[1:-1] mileage = str(reg_num.findall(mileage))[2:-2] year = str(pyjq.one(".\"36\"", json_tab))[1:-1] year = str(reg_num.findall(year))[2:-2] price = str(pyjq.one(".\"32\"", json_tab))[1:-1] price = int(str(reg_num.findall(price))[2:-2]) phone = soup.find("div", class_="phoneNumber1").text phone = ''.join(reg_phone.findall(phone)).strip(' ') rating_argus = int(get_rating_argus(url, reg_num)) if rating_argus > 0: diff_price = rating_argus - price else: diff_price = 0 df = df.append( { 'Region': df_links.set_index("Link")['region'].loc[(url)], 'Version': version, 'Year': year, 'Mileage': mileage, 'Owner\'s phone': phone, 'Price': price, 'Argus rating': rating_argus, 'Benefit Buyer': diff_price }, ignore_index=True) except UnboundLocalError: pass return df
def test_one(self): self.assertEqual(pyjq.one('. + $foo', 'val', vars=dict(foo='bar')), 'valbar') # raise IndexError if got multiple elements with self.assertRaises(IndexError): pyjq.one('.[]', [1, 2]) # raise IndexError if got no elements with self.assertRaises(IndexError): pyjq.one('.[]', [])
def on_message(client, userdata, msg): received_json = json.loads(msg.payload.decode('utf-8')) published_json = pyjq.one(LIGHT_JQ_FILTER, received_json) client.publish(PUB_TOPIC, json.dumps(published_json)) #print(json.dumps(published_json)) client.publish(PUB_TOPIC, windDomoticz(received_json)) #print(windDomoticz(received_json)) published_json = pyjq.one(UV_JQ_FILTER, received_json) client.publish(PUB_TOPIC, json.dumps(published_json)) #print(json.dumps(published_json)) published_json = pyjq.one(UV_test_JQ_FILTER, received_json) client.publish(PUB_TOPIC, json.dumps(published_json))
def test_one(self): self.assertEqual( pyjq.one('. + $foo', 'val', vars=dict(foo='bar')), 'valbar' ) # raise IndexError if got multiple elements with self.assertRaises(IndexError): pyjq.one('.[]', [1, 2]) # raise IndexError if got no elements with self.assertRaises(IndexError): pyjq.one('.[]', [])
def get_role_iam(rolename, account_iam): """Given the IAM of an account, and a role name, return the IAM data for the role""" try: role_iam = pyjq.one('.RoleDetailList[] | select(.RoleName == "{}")'.format(rolename), account_iam) except IndexError: raise Exception("Unknown role named {}".format(rolename)) return role_iam
def get_user_iam(username, account_iam): """Given the IAM of an account, and a username, return the IAM data for the user""" try: user_iam = pyjq.one('.UserDetailList[] | select(.UserName == "{}")'.format(username), account_iam) except IndexError: exit("ERROR: Unknown user named {}".format(username)) return user_iam
def drain(config, file_name, keep=False): """ Receive messages from the configured queue and write them to a file, pretty print them to stdout and then delete them from the queue """ queue_url = config["queue_url"] printer = config["printer"] if config.get("printer", None) else None count = 0 if os.path.isfile(file_name) and os.path.exists(file_name): print(f"{file_name} already exists", file=sys.stderr) exit(1) sqs_client = boto3.client("sqs", region_name=config["region"]) with open(file_name, "wb", buffering=0) as o: for message in receive_and_delete_messages(sqs_client, queue_url, keep): formatted_message = json.dumps(message) + "\n" o.write(formatted_message.encode("utf-8", "ignore")) if printer: print(pyjq.one(printer, message)) count += 1 print("Drained " + str(count) + " messages.")
def create_node_extra(issue_key, fields): import pyjq try: return pyjq.one(extra_jq, fields, vars=dict(issue_key=issue_key)) except Exception: log('Problem with extra for issue %s' % issue_key) print(fields) raise
def get_user_allowed_actions(aws_api_list, user_iam, account_iam): """Return the privileges granted to a user by IAM""" groups = user_iam['GroupList'] managed_policies = user_iam['AttachedManagedPolicies'] privileges = Privileges(aws_api_list) # Get permissions from groups for group in groups: group_iam = pyjq.one( '.GroupDetailList[] | select(.GroupName == "{}")'.format(group), account_iam) # Get privileges from managed policies attached to the group for managed_policy in group_iam['AttachedManagedPolicies']: policy_filter = '.Policies[] | select(.Arn == "{}") | ' \ '.PolicyVersionList[] | select(.IsDefaultVersion == true) | .Document' policy = pyjq.one( policy_filter.format(managed_policy['PolicyArn']), account_iam) for stmt in make_list(policy['Statement']): privileges.add_stmt(stmt) # Get privileges from in-line policies attached to the group for inline_policy in group_iam['GroupPolicyList']: policy = inline_policy['PolicyDocument'] for stmt in make_list(policy['Statement']): privileges.add_stmt(stmt) # Get privileges from managed policies attached to the user for managed_policy in managed_policies: policy_filter = '.Policies[] | select(.Arn == "{}") | ' \ '.PolicyVersionList[] | select(.IsDefaultVersion == true) | .Document' policy = pyjq.one(policy_filter.format(managed_policy['PolicyArn']), account_iam) for stmt in make_list(policy['Statement']): privileges.add_stmt(stmt) # Get privileges from inline policies attached to the user for stmt in pyjq.all('.UserPolicyList[].PolicyDocument.Statement[]', user_iam): privileges.add_stmt(stmt) return privileges.determine_allowed()
def load(self, json_file): data = json.load(json_file) ## If we wanted to allow the user to use JQ to select the keys to use ## we would change the order of both these lines ## (... self._target_data(...) and data = jqp.one(...) ...) ## ## Or another behaviour you may want to allow by swapping their order ## is allowing the user to use keys and data outside the self.collection ## attribute as part of the preprocessing. It offers more possibilities data = self._target_data(data) if not self.context_constants: self.context_constants = {"aux":{"_file_": json_file.name}} else: assert "aux" in self.context_constants, "Missing the root key 'aux' in 'context-constants' of the outline file" self.context_constants["aux"]["_file_"] = json_file.name # performance: avoid calling jq if identity data = jqp.one(self.preprocessing, data, vars=self.context_constants) if jqp and self.preprocessing else data ## Mapping and processing self.process_each(data) # performance: avoid calling jq if identity if jqp and self.postprocessing: self.rows = jqp.one(self.postprocessing, self.rows, vars=self.context_constants) self._update_header_keys(self.rows) # special values vnone = self.special_values_mapping.get("null", "") vempty = self.special_values_mapping.get("empty", "") vtrue = self.special_values_mapping.get("true", "true") vfalse = self.special_values_mapping.get("false", "false") # a tad faster than the 2 calls equivalent # however, replace it if needed for maintenance self.rows = self._replace_nulls(self.rows, vnone, vempty) # self.rows = self._replace_value(self.rows, None, vnone, by_identity=True) # self.rows = self._replace_value(self.rows, "", vempty, by_identity=False) self.rows = self._replace_value(self.rows, True, vtrue, by_identity=True) self.rows = self._replace_value(self.rows, False, vfalse, by_identity=True)
def service_calendars(self): return [ _ServiceCalendar( endpoint.description, [ datetime.strptime(date, self._DATE_FORMAT).date() for date in pyjq.one('.calendar."{}"'.format(endpoint.id), self._response) ] ) for endpoint in self._service_endpoints ]
def get_role_allowed_actions(aws_api_list, role_iam, account_iam): """Return the privileges granted to a role by IAM""" privileges = Privileges(aws_api_list) # Get privileges from managed policies for managed_policy in role_iam['AttachedManagedPolicies']: policy = pyjq.one( '.Policies[] | select(.Arn == "{}") | .PolicyVersionList[] | select(.IsDefaultVersion == true) | .Document' .format(managed_policy['PolicyArn']), account_iam) for stmt in make_list(policy['Statement']): privileges.add_stmt(stmt) # Get privileges from attached policies for policy in role_iam['RolePolicyList']: for stmt in make_list(policy['PolicyDocument']['Statement']): privileges.add_stmt(stmt) return privileges.determine_allowed()
def jadual_negeri(negeri): fetch_state = api.get_negeri( args.negeri) if args.negeri else api.get_negeri() states = pyjq.one(".states", fetch_state) myzone = [] sp.start() for i in range(len(states)): fetch_zon = api.get_negeri(str(states[i])) myzone.append(pyjq.all(".results[]", fetch_zon)) sp.hide() sp.write(states[i] + "✅") sp.ok() zon_formatted = pyjq.all(".[][]", myzone) data_format = data_for_jadual(zon_formatted, fields) print(tabulate(data_format, fields, tablefmt="fancy_grid"))
def create_node_label(issue_key, fields): # truncate long labels with "...", but only if the three dots are # replacing more than two characters -- otherwise the truncated # label would be taking more space than the original. summary = fields['summary'] if len(summary) > MAX_SUMMARY_LENGTH + 2: summary = summary[:MAX_SUMMARY_LENGTH] + '...' short_summary = summary.replace('"', '\\"') if not jq: return '{} ({})'.format(issue_key, short_summary) else: import pyjq try: return str(pyjq.one(jq, fields, vars=dict(issue_key=issue_key))) #'"{}({})\n{}\n{}"'.format(issue_key, short_summary, fields['assignee']['displayName'], fields['labels']) except Exception: log('Error with issue %s' % issue_key) print(fields) raise
def jadual_lokasi(args): lok = get_zon(args.lokasi.title()) data = pyjq.all( ".prayer_times[]|{tarikh:.date,subuh:.subuh,zohor:.zohor,asar:.asar,maghrib:.maghrib,isyak:.isyak}", api.get_week(lok) ) if args.minggu else pyjq.one( ".|[{tarikh:.prayer_times.date,subuh:.prayer_times.subuh,zohor:.prayer_times.zohor,asar:.prayer_times.asar, maghrib:.prayer_times.maghrib,isyak:.prayer_times.isyak}]", api.get_today(lok)) if args.fields: fields = ["tarikh"] + args.fields or [ "tarikh", "subuh", "zohor", "asar", "maghrib", "isyak" ] data_format = data_for_jadual(data, fields) if args.notify: for waktu in args.notify: notify(data[0][waktu]) else: print(tabulate(data_format, fields, tablefmt="fancy_grid"))
def main(): parser = ArgumentParser() parser.add_argument('host') parser.add_argument('port') parser.add_argument('job') parser.add_argument('build_number', nargs='?', default='lastSuccessfulBuild') parser.add_argument('-o', '--output-dir', action='store', dest='output_dir', default='artifacts') parser.add_argument('--console-text', '-c', action='store_true', dest='console_text') args = parser.parse_args() output_dir = Path(args.output_dir) create_empty_dir(output_dir) def api_url(url): return urljoin(url, 'api/json') build_api_url = api_url('http://{0.host}:{0.port}/job/{0.job}/{0.build_number}/'.format(args)) for run_url in jq.all('.runs[].url', url=build_api_url): subjob_url = urljoin(run_url, '../') subjob_name = jq.one('.displayName', url=api_url(subjob_url)) subjob_dir = output_dir / urllib.request.quote(subjob_name, '') if not subjob_dir.is_dir(): subjob_dir.mkdir(parents=True) with (subjob_dir / 'consoleText').open('wb') as local_fp, \ urlopen(urljoin(run_url, 'consoleText')) as http_fp: shutil.copyfileobj(http_fp, local_fp) zip_fp = io.BytesIO(urlopen(urljoin(run_url, 'artifact/*zip*/archive.zip')).read()) with ZipFile(zip_fp) as z: for name in z.namelist(): prefix = 'archive/' if not name.startswith(prefix): continue path = subjob_dir / name[len(prefix):] if not path.parent.is_dir(): path.parent.mkdir(parents=True) with path.open('wb') as fp: fp.write(z.read(name))
def info_zon(args, fields=["zone", "negeri", "lokasi"]): def jadual_negeri(negeri): fetch_state = api.get_negeri( args.negeri) if args.negeri else api.get_negeri() states = pyjq.one(".states", fetch_state) myzone = [] sp.start() for i in range(len(states)): fetch_zon = api.get_negeri(str(states[i])) myzone.append(pyjq.all(".results[]", fetch_zon)) sp.hide() sp.write(states[i] + "✅") sp.ok() zon_formatted = pyjq.all(".[][]", myzone) data_format = data_for_jadual(zon_formatted, fields) print(tabulate(data_format, fields, tablefmt="fancy_grid")) if args.zonkod is None: jadual_negeri(args.negeri) else: sp.start() fetch_zon = api.get_today(args.zonkod) data = pyjq.one( """ .|{zone,tarikh:.prayer_times.date,locations,azan:{ subuh: .prayer_times.subuh, zohor: .prayer_times.zohor, asar: .prayer_times.asar, maghrib:.prayer_times.maghrib, isyak: .prayer_times.isyak}} """, fetch_zon) fields = data.keys() vals = list(map(lambda x: format_value(data[x]), fields)) items = list(zip(fields, vals)) sp.ok() print(tabulate(items, tablefmt="fancy_grid"))
def _get_data_zoe_page_data(soup, region): row_dict = {} pattern = re.compile( "(?i)(\\blife\\b)|(\\bintens\\b)|(\\bedition\\b)|(\\bzen\\b)") tags = soup.find_all('script') json_data = json.loads(tags[-3].text[20:]) row_dict.update({"Area": region}) row_dict.update({ "Year": pyjq.one(".adview.attributes[] | select(.key == \"regdate\").value", json_data) }) row_dict.update({ "Mileage": pyjq.one(".adview.attributes[] | select(.key == \"mileage\").value", json_data) }) row_dict.update({"Price": pyjq.one(".adview.price[]", json_data)}) row_dict.update( {"Phone": pyjq.one(".stores.byId[].phone_number", json_data)}) row_dict.update({"Owner_type": pyjq.one(".adview.owner.type", json_data)}) try: version = pattern.search(pyjq.one(".adview.subject", json_data))[0] row_dict.update({"Version": version.upper()}) except TypeError: row_dict.update({"Version": ""}) if (row_dict["Year"] != "2018"): row_dict.update({ "Argus": _prix_argus_zoe(postal=regions_argus[region], year=row_dict["Year"], mileage=row_dict["Mileage"]) }) else: row_dict.update({"Argus": None}) return row_dict
def process_solr_cursor_mark(self, json_data): next_cursorMark = pyjq.one('{"cursorMark": .nextCursorMark}', json_data) return next_cursorMark['cursorMark']
def account_number(self): return pyjq.one(self._ACCOUNT_NUMBER_JQ, self._json_response)
def test_assigning_values(self): self.assertEqual(pyjq.one('$foo', {}, vars=dict(foo='bar')), 'bar') self.assertEqual(pyjq.one('$foo', {}, vars=dict(foo=['bar'])), ['bar'])
def _token_type(self): return pyjq.one(self._TOKEN_TYPE_JQ, self._json_response)
def company_cd(self): return pyjq.one(self._COMPANY_CD_JQ, self._json_response)
def person_id(self): return pyjq.one('.accountSummaryType.personId', self._json_response)
def _access_token(self): return pyjq.one(self._ACCESS_TOKEN_JQ, self._json_response)
def prem_code(self): return pyjq.one(self._PREM_CODE_JQ, self._json_response)
def test_assigning_values(): assert pyjq.one("$foo", {}, vars=dict(foo="bar")) == "bar" assert pyjq.one("$foo", {}, vars=dict(foo=["bar"])) == ["bar"]
def feed_create(feed_name, feed_config=None, basedir=None, confirm=False, max_audit=5): ''' Using the Configuration Specified, Query the RSS Feed and Create Audits for Missing Entries. ''' logger = logging.getLogger("rss_creator.py:feed_create") audit_source_items = dict() if feed_config is None: logger.debug( "Feed Config Not Given, Choosing {} from Global Config.".format( feed_name)) feed_config = _known_feeds[feed_name] this_path = os.path.join(basedir, feed_config.get("subdir", feed_name)) if os.path.isdir(basedir) is False: # Base Directory Exists logger.error("Base Path of {} Doesn't Exist.") raise FileNotFoundError("Base Path Missing") if os.path.isdir(this_path) is False: logger.warning("Subdirectory doesn't exist attempting to Create") try: os.mkdir(this_path) except Exception as subdir_error: logger.error( "Error when creating subdirectory : {}".format(subdir_error)) raise subdir_error # I have a valid place to Put my Stuff. Let's Grab my URL try: if feed_config.get("reqtype", "rss") == "rss": feed_obj = feedparser.parse(feed_config["url"]) elif feed_config.get("reqtype", "json") == "json": feed_req = requests.get(feed_config["url"]) feed_obj = {"entries": feed_req.json()} except Exception as feed_read_error: logger.error("Unable to Read RSS Feed Returning Empty") logger.debug("Feed Read Error : {}".format(feed_read_error)) feed_obj = {"entries": list()} if len(feed_obj["entries"]) == 0: logger.warning("No Entries in Given URL.") else: # Have Entries Let's give this a whirl current_num = 0 if feed_config.get("presort", False) is False: cycle_object = feed_obj["entries"] else: # API is Unsorted, let's Sort it reverse = bool(feed_config["presort"] == "reverse") ordered_keys = list(feed_obj["entries"].keys()) ordered_keys.sort(reverse=reverse) logger.debug(ordered_keys) cycle_object = {k: feed_obj["entries"][k] for k in ordered_keys} for entry in cycle_object: logger.debug("Entry : {}".format(entry)) current_num = current_num + 1 best_source_key = None if "jq_obj_source_key" in feed_config.keys(): # I have JQ to Try jq_result = pyjq.one(feed_config["jq_obj_source_key"], entry) if jq_result is not None: best_source_key = jq_result logger.debug( "Best Source key After JQ : {}".format(best_source_key)) if "regex_obj_source_key" in feed_config.keys(): regex_result = re.search(feed_config["regex_obj_source_key"], str(best_source_key), re.I) if regex_result is not None: best_source_key = regex_result.group(1) logger.debug( "Best Source key After Regex : {}".format(best_source_key)) if "regex_obj_replace" in feed_config.keys(): regex_replace = re.sub( *[*feed_config["regex_obj_replace"], str(best_source_key)]) if regex_replace is not None: best_source_key = regex_replace logger.debug( "Best Source key After Replace : {}".format(best_source_key)) if best_source_key is not None and len(best_source_key) > 0: as_kwargs = { "source_key": best_source_key, "audit_filename": "{}.{}".format(best_source_key, feed_config["format"]), "audit_path": this_path, **feed_config.get("audit_source_kwargs", dict()) } as_args = [*feed_config.get("audit_source_args", list())] try: as_obj = feed_config["audit_source_obj"](*as_args, **as_kwargs) except Exception as audit_source_error: logger.error( "Unable to Pull Audit {}.".format(best_source_key)) logger.debug("Pull Error : {}".format(audit_source_error)) audit_source_items[best_source_key] = [ False, "Error on Creation." ] else: if as_obj.validate_audit_live() is True: # See if File Exists if as_obj.audit_file_exists() is False: # Add to Object if confirm is False: logger.info( "Audit {} File Not Written to {} Confirm not Set." .format(best_source_key, as_obj.audit_filename)) audit_source_items[best_source_key] = [ "False", "Confirm not Set" ] else: logger.info("Audit {} Writing to {}.".format( best_source_key, as_obj.audit_filename)) audit_source_items[ best_source_key] = as_obj.write_audit( file_format=feed_config["format"]) else: logger.info( "Audit File {} Has existing File.".format( best_source_key)) audit_source_items[best_source_key] = [ False, "Pre-Existing File." ] else: logger.warning( "Audit Finding for Source {} Not Valid.".format( best_source_key)) audit_source_items[best_source_key] = [ False, "Invalid Audit on Creation" ] else: logger.warning("No Source Key found for Entry : {}".format( entry["id"])) if max_audit is not None and max_audit != -1 and current_num > ( max_audit - 1): logger.info("Reached Maximum of {} Audits Processed.".format( current_num)) break return audit_source_items
def process_solr_item_count(self, json_data): # get number of hits num_found = pyjq.one('.response | {"numFound": .\"numFound\"}', json_data) return num_found['numFound']
def _prix_argus_zoe(postal, year, mileage): argus_url = f"https://www.lacentrale.fr/get_co_prox.php?km={mileage}&zipcode={postal}&month=06&year={year}" json_data = json.loads(requests.get(argus_url, headers=argus_headers).text) return pyjq.one(".cote_brute", json_data)
def process_row(self, item, index): """Process a row of json data against the key map """ row = {} for header, keys in self.key_map.items(): try: if keys: row[header] = reduce(operator.getitem, keys, item) else: row[header] = None except (KeyError, IndexError, TypeError): row[header] = None ###### Map-processing row-wise ###### ### Preferred way to process using JQ (much much more efficient ### than field-wise selectors). # to make custom generated fields available in JQ as $myvar jq_params = row.copy() jq_params.update(self.context_constants) jq_params.update({'__row__': index}) if self.mapprocessing: try: computed = jqp.one(self.mapprocessing, item, vars=jq_params) row.update(computed) self.header_keys.update({key: None for key in computed.keys()}) except Exception as err: logging.warning(" JQ Error with map-processing JQ script '{}'. Error text: {}".format(self.mapprocessing, err)) ###### Individual field-wise JQ selectors ###### ### Note: The user should rely mostly on row-wise map-processing ### instead of these field-wise calls. This is left here for ### historical reason since the code was still working. ### ### Field-wise JQ processing slows down the processing linearly with ### the number of rows (and the number of different field-wise). ### NB calls to JQ = NB Rows X NB Field-wise ### ### Design choice: jq scripts DO NOT override default accessors ### because accessing using JQ *dramatically* decreases performance ### for every call. It also means it is far better to group every JQ ### calls unless there is no other choice. for header, data in self.key_processing_map.items(): if jqp and row[header] is None and data is not None: # row[header] is None: try: selector = data.get('jq') args = data.get('args', {}) ## NOTE: this causes more variables to be available than ## should be. However it's fine we let user be smart about ## their selector scripts. Internals should not be abused. ## Avoid performance hits jq_params.update(args) selector = self._optimized_jq_selector(selector) if selector: try: tmp = jqp.one(selector, item, vars=jq_params) except Exception as err: logging.warning("Error on key '{}' with JQ '{}'. Error text: {}".format(header, selector, err)) tmp = None row[header] = tmp except (KeyError, IndexError, TypeError, ValueError): pass return row