def compare( a: Dict[HashableLessThan, FileProperties], b: Dict[HashableLessThan, FileProperties], hasher: Optional[Hasher], left: bool = True, right: bool = True, both: bool = True, ignore: re.Pattern = None, file: IO[str] = stdout, ) -> None: aset = a.keys() bset = b.keys() # retleft = None # retright = None # note: the key is usually the `relpath` or the `hash` if left: print("In left only", file=file) for key in sorted(aset - bset): if ignore and ignore.match(fspath(key)): continue print("lo:", key, a[key].relpath, file=file) if right: print("In right only", file=file) for key in sorted(bset - aset): if ignore and ignore.match(fspath(key)): continue print("ro:", key, b[key].relpath, file=file) if both: print("On both, but different", file=file) for key in sorted(aset & bset): if ignore and ignore.match(fspath(key)): continue aprops = a[key] bprops = b[key] if aprops.isdir != bprops.isdir: print("bo:", "one is dir, one is file", key, file=file) if not aprops.isdir: if aprops.size != bprops.size: print("bo:", "size different", key, aprops.size, bprops.size, file=file) elif aprops.size == 0 and bprops.size == 0: pass elif hasher is not None: # same size if (aprops.hash or aprops.abspath) and (bprops.hash or bprops.abspath): if not aprops.hash: aprops.hash = hasher.get(Path(aprops.abspath)) # type: ignore [arg-type] if not bprops.hash: bprops.hash = hasher.get(Path(bprops.abspath)) # type: ignore [arg-type] if aprops.hash != bprops.hash: print("bo:", "hash different", key, aprops.hash, bprops.hash, file=file) # else: pass # same files else: print("bo:", "no hash or abspath for same size files", key, file=file)
def _minify_dir(name: str, regex: re.Pattern = re.compile(r'^(\W*\w)')) -> str: """Shorten a string to the first group that matches regex. :param name: the single name from the path that is being shrunk :param regex: the pattern used to minify the name (using group 0) :return: the minified name if possible, else the whole name """ if match := regex.match(name): return cast(str, match[0])
def display_aggregated_results( task_name: str, *, use_simplified_metric_name: bool = False, metrics_names: List[str] = None, exclude_regex: Pattern = None, include_regex: Pattern = None, renames: List[Tuple[str, str]] = None, n_steps: int, ): df = read_csv(get_aggregate_csv_file(task_name)) if use_simplified_metric_name: df["metric"] = df["metric"].map(lambda s: s.replace( "/eval_phase/test_stream", "").replace("/Task000", "")) if exclude_regex: df = df[df["run_algo"].map(lambda s: exclude_regex.match(s) is None)] if include_regex: df = df[df["run_algo"].map( lambda s: include_regex.match(s) is not None)] for algo_name, replacement in renames or []: df["run_algo"] = df["run_algo"].map(lambda s: replacement if s == algo_name else s) algo_name2score = dict(df[(df["step"] == n_steps) & (df["metric"] == "Top1_Acc_Stream")].groupby( "run_algo").mean()["value"].iteritems()) df["run_algo"] = df["run_algo"].map( lambda name: f"{name} ({algo_name2score[name]:.1%})") all_metrics_names = sorted(set(df["metric"]), key=_get_metric_name_priority) print(all_metrics_names) metrics_names = metrics_names or all_metrics_names g: FacetGrid = relplot( data=df, kind="line", x="step", y="value", hue="run_algo", col="metric", col_order=metrics_names, col_wrap=min(3, len(metrics_names)), facet_kws={ "sharex": False, "sharey": False, "legend_out": False }, ) fig: Figure = g.fig fig.suptitle(task_name, fontsize=16) fig.tight_layout() fig.show()
def check_pattern(arg_value: str, pattern: re.Pattern): if not pattern.match(arg_value): raise argparse.ArgumentTypeError( f"Invalid value provided! Must match regex pattern: {pattern.pattern}" ) else: return arg_value
def _parse_from_keywords( self, transaction_type: str, description: list[str], keywords: re.Pattern, *, bookdate: date, value_date: date, amount: Decimal, ) -> BaseTransaction: d = dict[str, str]() current_key = 'transaction_type' current_value = transaction_type for line in description[1:]: m = keywords.match(line) if m is None: current_value += line else: d[current_key] = current_value.rstrip() current_key = m.group(1) current_value = line[m.end():] d[current_key] = current_value.rstrip() omschrijving = d.get('Omschrijving') if omschrijving is None: omschrijving = d['Kenmerk'] return Transaction(account=self.account, description=omschrijving, operation_date=bookdate, value_date=value_date, amount=amount, currency=self.currency, metadata=d)
def _verify_public_instance_jwt( cache: 'cg_cache.inter_request.Backend[str]', signature: str, allowed_hosts: re.Pattern, ) -> str: # First get the url from the jwt without verifying, then get the # public key and do the verification. unsafe_decoded = jwt.decode(signature, verify=False) if allowed_hosts.match(unsafe_decoded.get('url', None)) is None: raise PermissionException(401) try: decoded = cache.cached_call( key=unsafe_decoded['url'], get_value=lambda: _download_public_key( unsafe_decoded['url'], unsafe_decoded['id'], ), callback=lambda public_key: jwt.decode( signature, key=public_key, algorithms='RS256', verify=True, ) ) assert decoded == unsafe_decoded except BaseException as exc: # pylint: disable=broad-except logger.error('Got unauthorized broker request', exc_info=True) raise PermissionException(401) from exc else: return decoded['url']
def match(self, regexp: Pattern) -> Optional[ParseResult[str]]: match = regexp.match(self.string, self.index) if match: value = match.group(0) source = Source(self.string, self.index + len(value)) return ParseResult(value, source) return None
def samp(corpus: TextIO, samp_corpora: List[TextIO], samp_size: int, fd_removed: TextIO, valid_pwd: Pattern): for samp_corpus in samp_corpora: if not samp_corpus.writable(): print("Training and Testing SHOULD be Writable!", file=sys.stderr) sys.exit(-1) if len(samp_corpora) < 1: print("At least one sample file!", file=sys.stderr) sys.exit(-1) pwd_set = [] count_invalid = defaultdict(int) for line in corpus: line = line.strip("\r\n") if valid_pwd.match(line) is None: count_invalid[line] += 1 continue pwd_set.append(line) samp_size = min(len(pwd_set), samp_size) for idx, samp_corpus in enumerate(samp_corpora): shuffle(pwd_set) for line in pwd_set[:samp_size]: samp_corpus.write(f"{line}\n") samp_corpus.flush() print(f"{idx + 1} sample file saved here: {samp_corpus.name}", file=sys.stderr) samp_corpus.close() if len(count_invalid) != 0 and fd_removed is not None: print(f"Removed invalid passwords saved in {fd_removed.name}", file=sys.stderr) for p, n in sorted(count_invalid.items(), key=lambda x: x[1], reverse=True): fd_removed.write(f"{p}\t{n}\n") fd_removed.close() print("Done!", file=sys.stderr)
def process_logs(query: re.Pattern, args: argparse.Namespace) -> None: with open(args.input_file_name, 'r') as f: with open(args.output_file_name, 'a') as o: for line in f: match = query.match(line) if match is None: continue named_matches = match.groupdict() # these if statements could probably be put into a method and refactored to be better # but for now this is okay if named_matches['start'] != '-' and args.start is not None and \ int(named_matches['start']) < args.start: continue if named_matches['start'] != '-' and args.end is not None and \ int(named_matches['start']) > args.end: continue if named_matches['bytes'] != '-' and args.bytes is not None and \ int(named_matches['bytes']) < args.bytes: continue if named_matches['packets'] != '-' and args.packets is not None and \ int(named_matches['packets']) < args.packets: continue # print(f"[FOUND] {line.rstrip()}") o.write(line)
def password_match(line: str, password_policy_and_password_regex: re.Pattern) -> bool: match = password_policy_and_password_regex.match(line) if match == None: return False min_letter_count = int(match.group(1)) max_letter_count = int(match.group(2)) letter = match.group(3) password = match.group(4) occurences_count = password.count(letter) result = min_letter_count <= occurences_count <= max_letter_count print( "{password:<30} {occurences_count:>3}{letter} {belongs} [{min_letter_count}, {max_letter_count}] {result}" .format( password=password, occurences_count=occurences_count, letter=letter, belongs=("∈" if result else "∉"), min_letter_count=min_letter_count, max_letter_count=max_letter_count, result=('\033[92mOK\033[0m' if result else '\033[91mKO\033[0m'))) return result
def valid_fc(argument: str, *, _fc: re.Pattern = _friend_code) -> str: fc = argument.upper().strip('"') m = _fc.match(fc) if m is None: raise commands.BadArgument("Not a valid friend code!") return "{one}-{two}-{three}".format(**m.groupdict())
def consume(self, size: int = 1, regex: re.Pattern = None, text: str = None, regex_group: int = 0): at = self.pos if regex: if not isinstance(regex, re.Pattern): print("uncompiled regex passed to peek!") regex = re.compile(regex) match = regex.match(self.content[at:]) if match is None: return None if regex_group != 0 and not match.group(0).startswith(match.group(regex_group)): print("Cannot consume regex group that does not start at match start!") return None self.pos += len(match.group(regex_group)) return match.group(regex_group) if text: if self.content[at:].startswith(text): self.pos += len(text) return text return None self.pos += size return self.content[at:at + size]
def get_data(sourceDir: str, pattern: re.Pattern): versionPath = os.path.join(sourceDir, 'version.nut') with open(versionPath, 'r') as f: for line in f: m = pattern.match(line) if m: return m.group(1) return None
def _find_child_name(self, item: Item, pat: re.Pattern): res = tuple( filter(lambda p: p is not None and pat.match(p), map(lambda c: c.name, item.children))) if len(res) > 0: for c in item.children: if c.name == res[0]: return self.itemmapper.get_resource_name(c)
def _find_child_resource(self, item: Item, pat: re.Pattern): res = tuple( filter( lambda p: p is not None and pat.match(p), map(lambda c: self.itemmapper.get_resource_name(c), item.children))) if len(res) == 1: return res[0]
def glob_predicate(table: str, pattern: re.Pattern, arg: str) -> bool: """Log tables skipped due to table filter arguments.""" matched = pattern.match(table) is not None if arg == "except": matched = not matched if not matched: logging.info(f"Skipping {table} due to --{arg} argument") return matched
def get_columns_by_pattern(columns, pattern: re.Pattern): rv = list() for column_name in columns: match = pattern.match(column_name) if match is None: continue rv.append(column_name) return rv
def count_patterns(coq_file_name: str, pattern: re.Pattern) -> int: f = open(coq_file_name, 'r') total = 0 for line in f: m = pattern.match(line) if m: total += 1 f.close() return total
def get_def_or_thm(coq_file_name: str, pattern: re.Pattern): f = open(coq_file_name, 'r') names = [] for line in f: m = pattern.match(line) if m: names.append(m.group(2)) f.close() return names
def get_file_matching_date_pattern(files: List[File], pattern: Pattern): for file in files: match = pattern.match(file.name) if match: year = match[1] month = match[2] day = match[3] file_date = date(year=int(year), month=int(month), day=int(day)) return file, file_date
def validate_regex(ctx: BaseValidator, data: str, schema_node: Pattern) -> bool: if (not isinstance(data, str) or schema_node.match(data) is None): ctx.current_path.append((type(data), data)) ctx.errors.append(deepcopy(ctx.current_path)) ctx.current_path.pop() return False return True
def __article_url_selectors( self, input_with_urls, custom_lambda, custom_regexp: re.Pattern, ): return list( filter(lambda url: type(url) == str and custom_regexp.match(url), list(set(map(custom_lambda, input_with_urls)))))
def get_input_with_pattern(r: Pattern, prompt_text: str, error_text: str = "Invalid input"): while True: user_input = input(prompt_text) match = r.match(user_input) if match: return match[1] else: print(error_text)
def validateDate(self, re_pattern: Pattern): """ returns True if date is valid. Returns false if not """ if len(self.date) == (7, 8, 9): return False elif not re_pattern.match(self.date): return False else: return True
def parse_lines_regex(lines, regex: re.Pattern, transform_match=None): if transform_match is None: # Return match groups as a tuple by default transform_match = lambda m: m.groups() results = [] for line in lines: m = regex.match(line.strip()) if not m: raise ValueError(f'Failed to match {line}') results.append(transform_match(m)) return results
def get_date_from_snapshot_regex(snapshot_path: Path, snapshot_regex: re.Pattern): """Derive date from snapshot directory name using given regex""" match = snapshot_regex.match(str(snapshot_path)) if not match: raise ValueError( f"Failed to parse date from {str(snapshot_path)!r} with " f"given regex {snapshot_regex}") return datetime( **{key: int(value) for key, value in match.groupdict().items()})
def test_updater_update_license_io_error(monkeypatch, target_config: TargetConfig, expected_err_pattern: re.Pattern): """ All of these tests will raise IOError when they attempt to patch the license file. """ with pytest.raises(UpdaterError) as err: with TemporaryDirectory() as empty_dir: # Try to update a Qt installation that does not exist Updater.update(target_config, base_dir=empty_dir) assert err.type == UpdaterError err_msg = format(err.value) assert expected_err_pattern.match(err_msg)
def _regex(self, regex: re.Pattern, category: Category): match = regex.match(self._input[self._cursor:]) if match: token = Token(offset=self._cursor, category=category, value=match.group(0)) self._cursor += len(match.group(0)) self._output.append(token) return True return False
def get_sample(reg: re.Pattern, bs: str, count: int) -> tuple: o = subprocess.check_output(get_call(bs, count), stderr=subprocess.STDOUT) o = o.decode('ASCII').split('\n')[-2] m = reg.match(o) if not m: raise ValueError(f'missing regex for | {o}') rec_time = float(re.sub(r',', '.', m.group(1))) rec_rate = float(re.sub(r',', '.', m.group(2))) if m.group(3) == 'M': rec_rate *= 1024 return rec_time, int(rec_rate)
def test_molecule_rfc5424( given_expression: re.Pattern, given_string: str, expected_result: Optional[Dict[str, str]], ) -> None: result = given_expression.match(given_string) if expected_result is None: assert result is None, f'Expected no match, but got: {repr(result.groupdict())}' else: assert result is not None, f'Expected match, but got: None' assert result.groupdict() == expected_result