def search_reporter(): try: newsDB = NewsDB() reqData = request.json name = str_param('name', reqData.get("name")).strip() # 去空格 for char in r'.+*?^$|-#><=(){}[]\\': # 过滤正则表达式关键字 if char in name: raise re.error('include illegal char') if name == '': raise re.error('no name') regex = re.compile("|".join(name.split())) # | 连接多个名字片段 rpts = [rpt for rpt in rptDB.get_names() if regex.search(rpt) is not None] rptsInfo = [rptDB.get_rpt(rpt,keys=("name","avatar","news")) for rpt in rpts] for rpt in rptsInfo: rpt["newsCount"] = len(rpt.pop("news")) rptsInfo.sort(key=lambda rpt: rpt["newsCount"], reverse=True) except re.error as err: jsonPack = {"errcode": 0, "reporters": [], "error": repr(err)} except Exception as err: jsonPack = {"errcode": -1, "error": repr(err)} raise err else: jsonPack = {"errcode": 0, "reporters": rptsInfo} finally: newsDB.close() return json.dumps(jsonPack)
def example_21(): """ Исключения в регулярных выражениях """ pfi() try: re.match(r'\w+)', 'Text to test') except Exception as e: print('\nException class =', e.__class__, '\nError text =', e) print('Exception bases =', e.__class__.__bases__) if sys.version_info >= (3,5,0): print('msg =', e.msg) print('pattern =', e.pattern) print('pos =', e.pos) print('lineno =', e.lineno) print('colno =', e.colno) try: re.match(r'(\w+)\2', 'Text to test') except Exception as e: print('\nException class =', e.__class__, '\nError text =', e) print('Exception bases =', e.__class__.__bases__) if sys.version_info >= (3,5,0): print('msg =', e.msg) print('pattern =', e.pattern) print('pos =', e.pos) print('lineno =', e.lineno) print('colno =', e.colno) re.error('Error message', 'abc', 17)
def format_field(self, value, format_spec): """Format the value using the format_spec. The aim of this function is to remove the delegation to __format__() on the object. For our needs we do not want the object to be responsible for its own formatting since it is not aware of the renderer itself. A rekall.obj.BaseObject instance must support the following formatting operations: __unicode__ __str__ __repr__ and may also support __int__ (for formatting in hex). """ m = self.standard_format_specifier_re.match(format_spec) if not m: raise re.error("Invalid regex") fields = m.groupdict() # Format the value according to the basic type. type = fields["type"] or "s" try: value = getattr( self, "format_type_%s" % type)(value, fields) except AttributeError: raise re.error("No formatter for type %s" % type) try: return format(value, format_spec) except ValueError: return str(value)
def update_creatures(creatures, which): """Parses a database and adds the data to a dict of creatures. @creatures: the dict returned from get_creatures. @which: the database to parse.""" regex = regexes[which] ckey = name2key[which] if "dbid" not in regex.groupindex: raise re.error("regex must match dbid group", regex) if regex.groups < 2: raise re.error("regex must match something in addition to dbid", regex) matches = re.finditer(regex, get_db_contents(which)) if which == "Lore": # dbid == cid for match in matches: creatures[match["dbid"]][ckey] = match["lore"] else: info = {} for match in matches: groups = match.groupdict() dbid = groups.pop("dbid") if regex.groups == 2: info[dbid] = groups.popitem()[1] # get the other item's value elif regex.groups > 2: info[dbid] = groups for cid in creatures: # not every creature has a material creatures[cid][ckey] = info.get(creatures[cid]["dbid"])
def get_from_file(self, fp): raw_data = fp.read() fname = os.path.basename(fp.name) m = re.match(PION_LEC_REGEX['filename'], fname) if m: config_number = int(m.group('config_number')) else: raise re.error("Cannot match filename") r = LEC_COMPILED_REGEX m = re.match(r, raw_data) if m: dic = { 'config_number': config_number, 'LS': float(m.group('LS')), 'B0': float(m.group('B0')), 'F0': float(m.group('F0')), 'L64': float(m.group('L64')), 'L85': float(m.group('L85')), 'L4': float(m.group('L4')), 'L5': float(m.group('L5')), 'm_res': float(m.group('MRES')), 'miu': float(m.group('miu')), } else: raise re.error("Cannot match file") return dic
def format_field(self, value, format_spec): """Format the value using the format_spec. The aim of this function is to remove the delegation to __format__() on the object. For our needs we do not want the object to be responsible for its own formatting since it is not aware of the renderer itself. A rekall.obj.BaseObject instance must support the following formatting operations: __unicode__ __str__ __repr__ and may also support __int__ (for formatting in hex). """ m = self.standard_format_specifier_re.match(format_spec) if not m: raise re.error("Invalid regex") fields = m.groupdict() # Format the value according to the basic type. type = fields["type"] or "s" try: value = getattr(self, "format_type_%s" % type)(value, fields) except AttributeError: raise re.error("No formatter for type %s" % type) try: return format(value, format_spec) except ValueError: return str(value)
def storeAttrVarPair(varName, attrName): if attrName in attrSet: v = 'reassigment of attribute '+ attrName + ' to var ' + varName + '; was var ' + attrSet[attrName] raise re.error(v) if varName in varSet: v = 'redefinition of group name '+ varName + ' as group ' + str(len(varList) + 1) + '; was group ' + str(varSet[varName]) raise re.error(v) varList.append([attrName, varName]) attrSet[attrName] = varName varSet[varName] = len(varList) pass
def __init__(self, regex_pattern, input_files): try: self.regex = re.compile(regex_pattern) self.input_files = input_files self.search_results = {} except re.error: raise re.error("regular expression is not valid")
def search_by_regex(self): """ Search by regex pattern""" clear_screen() tasks = self.fetch_tasks() results = [] while True: regex_search = input("This will search regular expression. " "Please enter: ") try: regex_search = re.compile(regex_search) except re.error(): print("Not a valid regular expression. Try again.") continue for task in tasks: if (re.search(regex_search, task['Task name']) or re.search(regex_search, task['Date']) or re.search(regex_search, task['Duration']) or re.search(regex_search, task['Notes'])): results.append(task) if len(results) == 0: clear_screen() print("Sorry. No matches. Please try again.") continue else: clear_screen() self.print_tasks(results) break
def shlex_argparse(argstr): """ Get positional arguments and optional arguments from argstr. Example:: parse p1 p2 'p3 three' --o1=one '--o2=two' --o3='3 three' p'4 four' --o4 four Four as args = ['p1', 'p2', 'p3 three', 'p4 four'] and kwargs = {'o1':'one', 'o2':'two', 'o3':'3 three', 'o4':['four', 'Four']} """ args = shlex.split(argstr) differentiate = re.compile(r"^(?:(?:--(\w+)=([\s\S]*))|(?:--(\w+))|(\S[\s\S]*))$") positional_args = [] optional_args = {} i = 0 while i < len(args): match = differentiate.match(args[i]) if match is None: raise re.error("Not matched", pattern=differentiate) key, value, var_key, pos = match.groups() if pos: if pos != "--": positional_args.append(pos) elif key: optional_args[key] = value elif var_key: optional_args[var_key], j = _until_dh(args[i + 1 :]) i += j i += 1 return positional_args, optional_args
def parse(self): if os.stat(self.path).st_size == 0: raise IOError("File is empty.") with open(self.path, "r") as f: for sentence in f: cond_trans = self.cond_trans_pattern.search(sentence) uncond_trans = self.uncond_trans_pattern.search(sentence) if sentence == "": break elif cond_trans: transition_dest, params, condition, \ output_range, state_name = cond_trans.groups() tr = Conditional(source=transition_dest, target=state_name, cond_expr=condition, labels=output_range, params=params) self.states.add(tr.target) self.conditional_transitions.append(tr) elif uncond_trans: transition_dest, state_name = uncond_trans.groups() tr = UnConditional(source=transition_dest, target=state_name) self.states.add(tr.target) self.unconditional_transitions.append(tr) else: raise re.error("{} didn't match.".format(sentence)) self.begin_state = sorted(self.states)[0]
def get_resampler(self, resampler_type=ResamplerEnum.SMOTE.name, sampling_strategy='minority', k_neighbors = 3, allow_minority = True, tomek_sampling_strategy = 'majority'): smote = SMOTE(random_state=self.random_state, n_jobs=self.n_jobs, sampling_strategy=sampling_strategy, k_neighbors=k_neighbors) if resampler_type.name == ResamplerEnum.SMOTE.name: return smote if resampler_type.name == ResamplerEnum.AllKNN.name: return AllKNN(allow_minority=allow_minority, n_jobs=self.n_jobs) if resampler_type.name == ResamplerEnum.SMOTETomek.name: tomekLinks = TomekLinks(n_jobs=self.n_jobs, sampling_strategy=tomek_sampling_strategy) return SMOTETomek(random_state=self.random_state, n_jobs=self.n_jobs, smote=smote, tomek=tomekLinks) if resampler_type.name == ResamplerEnum.RandomUnderSampler.name: return RandomUnderSampler(random_state=self.random_state, sampling_strategy=sampling_strategy) if resampler_type.name == ResamplerEnum.SMOTEENN.name: return SMOTEENN(random_state=self.random_state, n_jobs=self.n_jobs, smote=smote) raise error('Theres is no resampler configured')
def match_groups(regex, target): match = re.match(regex, target) if match is None: raise re.error( f"Regex does not match “{target}”. RE Pattern: {regex}", regex ) return match.groups()
def test_sphinx_raises_with_prefix_one(self): """This is a sphinx docstring. :raises ~re.error: Sometimes """ import re raise re.error("hi")
def compile(cls, pattern, flags=0): # TODO: pass in UParseError argument to get better diagnostics args = str_to_uchar_array_with_len(pattern) + (flags, None) try: regex = icu.uregex_open(*args) return _ICURegex(regex) except PICUException as e: raise re.error(str(e))
def test_find_sphinx_attr_raises_exact_exc(self): """This is a sphinx docstring. :raises re.error: Sometimes """ import re raise re.error("hi")
def test_find_valid_missing_sphinx_attr_raises(self): # [missing-raises-doc] """This is a sphinx docstring. :raises re.anothererror: Sometimes """ from re import error raise error("hi")
def test_find_sphinx_attr_raises_substr_exc(self): """This is a sphinx docstring. :raises re.error: Sometimes """ from re import error raise error("hi")
def __get_regexngrams(self, doc: str, pattern: str): try: pattern = re.compile(pattern) doc = pattern.split(doc) except re.error as e: print(f"Error with regex '{pattern}'") raise re.error(e) grams = [doc[x:x + self.n:1] for x in range(len(doc) - self.n + 1)] return ['_'.join(g) for g in grams]
def _get_video_fps(self, file): output = getoutput(fps_command % (ffprobe_path, file)) if not output: return 1 # in case of audio output = output.strip() if '/' in output: output = output.split('/') if len(output) == 2: return float(output[0]) / float(output[1]) elif len(output) == 3: return float(output[0]) / float(output[1].split('\n')[0]) else: exception('GET FPS FAIL %s' % str(output)) raise NotImplementedError error('UNKNOWN FRAMERATE VALUE %s' % output) raise NotImplementedError
def test_find_valid_missing_google_attr_raises(self): # [missing-raises-doc] """This is a google docstring. Raises: re.anothererror: Sometimes """ from re import error raise error("hi")
def test_scan_aborts_due_to_invalid_regex(self, mock_config: mock.MagicMock): self.options.regex = True test_scanner = TestScanner(self.options) mock_config.side_effect = re.error( # type: ignore msg="Invalid regular expression", pattern="42") with self.assertRaisesRegex(types.ConfigException, "Invalid regular expression"): test_scanner.scan()
def get_vectorizer(self, vectorizer_type=VectEnum.TfidfVectorizer, min_df=1, tokenizer_type=None): if vectorizer_type.name == VectEnum.TfidfVectorizer.name: return TfidfVectorizer(min_df=min_df, tokenizer=tokenizer_type) if vectorizer_type.name == VectEnum.CountVectorizer.name: return CountVectorizer(min_df=min_df, tokenizer=tokenizer_type) raise error('Theres is no vect configured')
def test_google_raises_with_prefix_one(self): """This is a google docstring. Raises: ~re.error: Sometimes """ import re raise re.error("hi")
def test_find_google_attr_raises_exact_exc(self): """This is a google docstring. Raises: re.error: Sometimes """ import re raise re.error("hi")
def test_find_google_attr_raises_substr_exc(self): """This is a google docstring. Raises: re.error: Sometimes """ from re import error raise error("hi")
def test_latency(sockperf_path, host, port): regex = re.compile(r'^sockperf: Summary: Latency is ([0-9.]+) usec') cmd = [sockperf_path, 'ping-pong', '-i', host, '-p', str(port)] result = run(cmd, stdin=None, stdout=PIPE, stderr=STDOUT) lines = result.stdout.decode().split('\n') for line in lines: match = regex.search(line) if match: return float(match.group(1)) raise re.error("No lines in the stdout of sockperf ping-pong matched the regex")
def replacement_func(match_obj): """Unquote match obj of two character sequence""" two_chars = match_obj.group(0) if two_chars == b"\\n": return b"\n" elif two_chars == b"\\\\": return b"\\" else: raise re.error( "Unknown quoted sequence {qs} found".format(qs=two_chars))
def replacement_func(match_obj): """This is called on the match obj of any char that needs quoting""" char = match_obj.group(0) if char == b"\n": return b"\\n" elif char == b"\\": return b"\\\\" else: raise re.error( "Bad char '{bc}' shouldn't need quoting".format(bc=char))
def _parse_line_regex(self, line): self.line_str = line # this next line object is non-copyable! be careful when copying this class! self.match_obj = self.regex_comp.match(line) if self.match_obj == None: raise re.error("Failure while parsing line: " + line + " as expression of the form " + self.regex_str + ". Apparently it doesn't comply to the regex provided.") if self.match_obj.group("sync") == "*": self.sync = True else: self.sync = False
def __init__(self, ref_str: str): self.str = ref_str if ref_str in ONE_CHAPTER_BOOKS: ref_str = f"{ref_str} 1" # make it parse match = VERSE_REGEX.match(ref_str) if match is None: raise re.error(f"'{ref_str}' does not match regex!") book, first, last = match.groups() self.first = Verse(book.strip(), first) self.last = Verse(book.strip(), last) if last else None
def test_find_invalid_missing_sphinx_attr_raises(self): """This is a sphinx docstring. pylint allows this to pass since the comparison between Raises and raise are based on the class name, not the qualified name. :raises bogusmodule.error: Sometimes """ from re import error raise error("hi")
def get_from_file(self, file_name): """ Parse all the data from an Iwasaki Charged Meson file e.g. \ meson_BOX_RELOADED.src0.ch1-0.3333333333.ch2-0.3333333333.m10.03.m20.03.dat.510 """ data = [] raw_data = file_name.read() fname = os.path.basename(file_name.name) m = re.match(IWASAKI_REGEX['filename'], fname) if m: charge_1 = int(round(3 * float(m.group('charge1')), 1)) charge_2 = int(round(3 * float(m.group('charge2')), 1)) mass_1 = float(m.group('mass1')) mass_2 = float(m.group('mass2')) config_number = int(m.group('config_number')) else: raise re.error("Cannot match filename") if self.pseudo: r = IWASAKI_COMPILED_REGEX_PSEUDO else: r = IWASAKI_COMPILED_REGEX matched = [m.groupdict() for m in r.finditer(raw_data)] for match in matched: if self.pseudo: source = 'GAM_5' sink = 'GAM_5' else: source = match['source'] sink = match['sink'] re_data = [] im_data = [] time_slices = [] for line in match['data'].split('\n'): try: n, re_c, im_c = line.split() re_c = float(re_c) im_c = float(im_c) n = int(n) re_data.append(re_c) im_data.append(im_c) time_slices.append(n) except ValueError: pass dic = {'source': source, 'sink': sink, 'data': re_data, 'im_data': im_data, 'time_slices': time_slices, 'mass_1': mass_1, 'mass_2': mass_2, 'charge_1': charge_1, 'charge_2': charge_2, 'config_number': config_number} data.append(dic) return data
def get_from_file(self, fp): raw_data = fp.read() fname = os.path.basename(fp.name) m = re.match(KAON_LEC_REGEX['filename'], fname) if m: config_number = int(m.group('config_number')) else: raise re.error("Cannot match filename") r = LEC_COMPILED_REGEX m = re.match(r, raw_data) if m: dic = { 'config_number': config_number, 'M2': float(m.group('M2')), 'A_3': float(m.group('A3')), 'A_4': float(m.group('A4')), } log.debug(dic) else: raise re.error("Cannot match file") return dic
def isMatch(self, task): """ Return a boolean based on whether the supplied task satisfies self.text. This filter can handle basic and/or/not conditions. The syntax is as follows: :AND : ',' or whitespace (' ') :OR : '|' :NOT : prefixed '~' or '!' These operators follow the following order of precedence: OR, AND, NOT. So, for example: :'work job1 | home': Either (matches 'work' AND 'job1') OR (matches 'home') :'norweigan blue ~dead | !parrot': Either (matches 'norweigan' AND 'blue' AND does NOT match 'dead') OR (does NOT match 'parrot') Since the python re module is used, most of the escaped regex characters will also work when attached to one of the (comma- or space- delimited) strings. E.g.: - \bcleese\b will match 'cleese' but not 'johncleese' - 2014-\d\d-07 will match '2014-03-07' but not '2014-ja-07' The method can handle parentheses in the search strings. Unlike most regex characters, these don't need to be escaped since they are escaped automatically. So the search string '(B)' will match '(B) nail its feet to the perch'. """ mymatch = False comp = re.compile(r'\s*([\!~])?([\(\)\w\\\-]+)[\s,]*', re.U) restring = comp.sub(simpleTextFilterRepl, self.text, re.U) try: if ')' in restring: raise re.error('') # otherwise adding closing parenth avoids error here mymatch = re.search(restring, task.text, re.I | re.U) except re.error: comp2 = re.compile(r'\s*\((?=[^?])', re.U) restring2 = comp2.sub(r'\\(', restring, re.U) comp3 = re.compile(r'(?<!\))\)(?=\))', re.U) restring3 = comp3.sub(r'\\)', restring2, re.U) """temporary solution: user input illegal characters in the search string (+,?,\) otherwise the program crashed""" try: mymatch = re.search(restring3, task.text, re.I | re.U) except Exception: mymatch = True return mymatch
def parse_iwasaki_32c_charged_meson_file(f): """ This is just as slow as regex """ data = [] fname = os.path.basename(f.name) m = re.match(IWASAKI_REGEX['filename'], fname) if m: charge_1 = int(round(3 * float(m.group('charge1')), 1)) charge_2 = int(round(3 * float(m.group('charge2')), 1)) mass_1 = float(m.group('mass1')) mass_2 = float(m.group('mass2')) config_number = int(m.group('config_number')) else: raise re.error("Cannot match filename") for line in f: ls = line.strip() split_spaces = ls.split(' ') if ls == 'STARTPROP': re_data = [] im_data = [] time_slices = [] elif split_spaces[0] == 'MASSES:': pass elif split_spaces[0] == 'SOURCE:': source = split_spaces[1] elif split_spaces[0] == 'SINKS:': sink = split_spaces[1] elif ls == 'ENDPROP': dic = {'source': source, 'sink': sink, 'data': re_data, 'im_data': im_data, 'time_slices': time_slices, 'mass_1': mass_1, 'mass_2': mass_2, 'charge_1': charge_1, 'charge_2': charge_2, 'config_number': config_number} data.append(dic) else: t, real, im = ls.split() time_slices.append(int(t)) re_data.append(float(real)) im_data.append(float(im)) return data
def servicesInFolder(self, foldername, namefilter=None): """ """ # test if name filter is valid regex if namefilter: try: re.compile(namefilter) except re.error: raise re.error("Specified namefilter argument must be a vaild regex. Aborting.") listofservices = [] folderURL = "/arcgis/admin/services/" + foldername # This request only needs the token and the response formatting parameter params = urllib.urlencode({'token': self.token, 'f': 'json'}) headers = {"Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain"} # Connect to URL and post parameters httpConn = httplib.HTTPConnection(self.server, self.port) httpConn.request("POST", folderURL, params, headers) # Read response response = httpConn.getresponse() if (response.status != 200): httpConn.close() raise ServiceException("Could not read folder information.") else: data = response.read() # Check that data returned is not an error object if not assertJsonSuccess(data): raise ServiceException("Error when reading folder information. " + str(data)) # Deserialize response into Python object dataObj = json.loads(data) httpConn.close() for item in dataObj['services']: # if namefilter, check to see if name matches; if not, skip to next item if namefilter: if not re.search(namefilter, item['serviceName']): continue listofservices.append(item['serviceName'] + "." + item['type']) return listofservices
def compile(regexPatternIn, flags=0, debug = 0): BEG = r'(?#' END = r')' ZIN = r'<<>>' NXTTAG = r'<><>' CHILDREN = r'<<>*>' PARENT = r'<*<>>' SIBLING = r'<>*<>' TAG = r'<>' PATDIRECT = r'\(\?#(<(?:PASS|SPAN|SEARCH|NXTPOSINI).*?>)\)' directives = re.findall(PATDIRECT, regexPatternIn) # @UnusedVariable regexPattern = re.sub(PATDIRECT, '', regexPatternIn) patterns = re.split(r'(?:<\*<|>\*<|>\*>|>|<)', regexPattern) if not (patterns[0].startswith(BEG) and patterns[-1].endswith(END)): return re.compile(regexPatternIn, flags) patterns = patterns[1:-1] tags = re.findall(r'[<>][<>*]*', regexPattern) if len(tags) <= 1 or tags[0] not in ['<', '<*<']: return None token = ''.join(tags) if token == TAG: return ExtCompile(regexPattern, flags) patt1 = BEG + '<' + patterns[0] + '>' + END zone = 'zin' if token == ZIN: patt2 = patterns[-1] if tags[-1] == '>' else '' patt2 = BEG + '<' + patt2 + '>' + END zone = 0 if tags[1] == '<' else patterns[1] elif token == CHILDREN: chldpat = '|'.join(re.findall(r'(?<= )[.]([a-z\d_-]+)(?=[ >])', patt1)) chldpat = chldpat or '__TAG__' patt2 = patterns[-1] if tags[-1] == '>*>' else '' patt2 = BEG + '<' + chldpat + ' ' + patt2 + '>' + END elif token == NXTTAG: patt2 = patterns[-1] if tags[-1] == '>' else '' patt2 = BEG + '<__TAG__ ' + patt2 + '>' + END zone = 'zoutr' elif token in [PARENT, SIBLING]: raise re.error(token + ' not yet implemented') pass else: return None if debug: return regexPattern, flags, patt1, patt2, zone srchRegexObj = ExtCompile(patt2, flags) if not patt1: return srchRegexObj spanRegexObj = ExtCompile(patt1, flags) return zinwrapper(regexPattern, flags, spanRegexObj, srchRegexObj, zone)
def raw_match_indexes(self, query, ignore_case=False) -> list: """Returns a list with the indexes of each message that match the passed message Parameters: query: The string query to search for ignore_case (optional): Whether to search by case sensitive Return: A list sorted list of indexes for messages exactly matching query """ # python re cheat sheet: https://www.debuggex.com/cheatsheet/regex/python indexes = [] try: r = re.compile(query, re.IGNORECASE) if ignore_case else re.compile(query) for i in range(len(self._convo)): if r.fullmatch(self._convo[i][1]) is not None: indexes.append(i) return indexes except re.error: raise re.error("\"{0}\" is not a valid regex string".format(query))
def create_logmatches(self): logmatch_set = model.LogmatchSet(display_name=u'Default') model.DBSession.add(logmatch_set) for row in database_data.logfiles: lf = model.Logfile(row[0], row[1]) lf.logmatchset = logmatch_set model.DBSession.add(lf) for row in database_data.logmatch_default_rows: lmr = model.LogmatchRow() try: (lmr.match_text, lmr.match_start, lmr.host_match, lmr.attribute_match, lmr.state_match, event_tag, fields) = row except Exception as errmsg: raise ValueError( "Cannot add row \"%s\": %s.\n" % (row[0], errmsg)) else: lmr.event_type = model.EventType.by_tag(event_tag) if lmr.event_type is None: raise ValueError( "Bad EventType tag \"{}\" in LogMatchRow {}". format(event_tag, lmr.match_text)) self.used_event_types.append(lmr.event_type.id) try: lmr.match_sre = re.compile(row[0]) except re.error as errmsg: raise re.error( "Cannot compile message \"{}\": {}". format(row[0], errmsg)) lmr.logmatch_set = logmatch_set if fields is not None: for field in fields: lmf = model.LogmatchField() try: (lmf.event_field_tag, lmf.field_match) = field except ValueError: raise ValueError( "Bad Field \"{}\" in LogMatchRow {}".format( field, lmr.match_text)) lmr.fields.append(lmf) model.DBSession.add(lmr)
def base(self): """Parse a subexpression that can be starred: single letter or group.""" if self.pos == len(self.expr) or self.expr[self.pos] == ')': return self.epsilon() if self.expr[self.pos] == '(': self.pos += 1 ret = self.expression() if self.pos == len(self.expr) or self.expr[self.pos] != ')': raise LanguageError("Close paren expected at char " + str(self.pos)) self.pos += 1 return ret if self.expr[self.pos] == '\\': self.pos += 1 if self.pos == len(self.expr): raise re.error("Character expected after backslash") self.alphabet.add(self.expr[self.pos]) state = self.newstate(self.expr[self.pos]) self.pos += 1 state = frozenset([state]) return state, state, False
def test_sanity_re(): ''' Basic sanity tests for the re module. Each module member is used at least once. ''' #compile Assert(hasattr(re.compile("(abc){1}"), "pattern")) Assert(hasattr(re.compile("(abc){1}", re.L), "pattern")) Assert(hasattr(re.compile("(abc){1}", flags=re.L), "pattern")) #I IGNORECASE L LOCAL MMULTILINE S DOTALL U UNICODE X VERBOSE flags = ["I", "IGNORECASE", "L", "LOCALE", "M", "MULTILINE", "S", "DOTALL", "U", "UNICODE", "X", "VERBOSE"] for f in flags: Assert(hasattr(re, f)) #search AreEqual(re.search("(abc){1}", ""), None) AreEqual(re.search("(abc){1}", "abcxyz").span(), (0,3)) AreEqual(re.search("(abc){1}", "abcxyz", re.L).span(), (0,3)) AreEqual(re.search("(abc){1}", "abcxyz", flags=re.L).span(), (0,3)) AreEqual(re.search("(abc){1}", "xyzabc").span(), (3,6)) AreEqual(re.search("(abc){1}", buffer("")), None) AreEqual(re.search("(abc){1}", buffer("abcxyz")).span(), (0,3)) AreEqual(re.search("(abc){1}", buffer("abcxyz"), re.L).span(), (0,3)) AreEqual(re.search("(abc){1}", buffer("abcxyz"), flags=re.L).span(), (0,3)) AreEqual(re.search("(abc){1}", buffer("xyzabc")).span(), (3,6)) #match AreEqual(re.match("(abc){1}", ""), None) AreEqual(re.match("(abc){1}", "abcxyz").span(), (0,3)) AreEqual(re.match("(abc){1}", "abcxyz", re.L).span(), (0,3)) AreEqual(re.match("(abc){1}", "abcxyz", flags=re.L).span(), (0,3)) #split AreEqual(re.split("(abc){1}", ""), ['']) AreEqual(re.split("(abc){1}", "abcxyz"), ['', 'abc', 'xyz']) #maxsplit AreEqual(re.split("(abc){1}", "abc", 0), ['', 'abc', '']) for i in xrange(3): AreEqual(re.split("(abc){1}", "abc", maxsplit=i), ['', 'abc', '']) AreEqual(re.split("(abc){1}", "", maxsplit=i), ['']) AreEqual(re.split("(abc){1}", "abcxyz", maxsplit=i), ['', 'abc', 'xyz']) AreEqual(re.split("(abc){1}", "abcxyzabc", maxsplit=0), ['', 'abc', 'xyz', 'abc', '']) AreEqual(re.split("(abc){1}", "abcxyzabc", maxsplit=1), ['', 'abc', 'xyzabc']) AreEqual(re.split("(abc){1}", "abcxyzabc", maxsplit=2), ['', 'abc', 'xyz', 'abc', '']) #findall AreEqual(re.findall("(abc){1}", ""), []) AreEqual(re.findall("(abc){1}", "abcxyz"), ['abc']) AreEqual(re.findall("(abc){1}", "abcxyz", re.L), ['abc']) AreEqual(re.findall("(abc){1}", "abcxyz", flags=re.L), ['abc']) AreEqual(re.findall("(abc){1}", "xyzabcabc"), ['abc', 'abc']) #finditer AreEqual([x.group() for x in re.finditer("(abc){1}", "")], []) AreEqual([x.group() for x in re.finditer("(abc){1}", "abcxyz")], ['abc']) AreEqual([x.group() for x in re.finditer("(abc){1}", "abcxyz", re.L)], ['abc']) AreEqual([x.group() for x in re.finditer("(abc){1}", "abcxyz", flags=re.L)], ['abc']) AreEqual([x.group() for x in re.finditer("(abc){1}", "xyzabcabc")], ['abc', 'abc']) rex = re.compile("foo") for m in rex.finditer("this is a foo and a foo bar"): AreEqual((m.pos, m.endpos), (0, 27)) for m in rex.finditer(""): AreEqual((m.pos, m.endpos), (0, 1)) for m in rex.finditer("abc"): AreEqual((m.pos, m.endpos), (0, 4)) for m in rex.finditer("foo foo foo foo foo"): AreEqual((m.pos, m.endpos), (0, 19)) #sub AreEqual(re.sub("(abc){1}", "9", "abcd"), "9d") AreEqual(re.sub("(abc){1}", "abcxyz",'abcd'), "abcxyzd") AreEqual(re.sub("(abc){1}", "1", "abcd", 0), "1d") AreEqual(re.sub("(abc){1}", "1", "abcd", count=0), "1d") AreEqual(re.sub("(abc){1}", "1", "abcdabcd", 1), "1dabcd") AreEqual(re.sub("(abc){1}", "1", "abcdabcd", 2), "1d1d") #subn AreEqual(re.subn("(abc){1}", "9", "abcd"), ("9d", 1)) AreEqual(re.subn("(abc){1}", "abcxyz",'abcd'), ("abcxyzd",1)) AreEqual(re.subn("(abc){1}", "1", "abcd", 0), ("1d",1)) AreEqual(re.subn("(abc){1}", "1", "abcd", count=0), ("1d",1)) AreEqual(re.subn("(abc){1}", "1", "abcdabcd", 1), ("1dabcd",1)) AreEqual(re.subn("(abc){1}", "1", "abcdabcd", 2), ("1d1d",2)) #escape AreEqual(re.escape("abc"), "abc") AreEqual(re.escape(""), "") AreEqual(re.escape("_"), "\\_") AreEqual(re.escape("a_c"), "a\\_c") #error exc = re.error() exc = re.error("some args") #purge re.purge()
def ExtCompileOLD(regexPattern, flags = 0): """ tr : TAG buscado td.width : Atributo "width" buscado en td td.div.class="playlist_thumb" : Atributo class de td.div con valor playlist_thumb td.div.a.img.src=icon : Atributo "src" buscado y almacenado en variable "icon". td.div.a.img = td..img td[2].div[2].*=hist : td[2] = Segundo tag td hijo de tr, td[2].div[2] = Segundo tag div de td[2], * = comentario td..a.href=url : td..a es notación compacta de td.div.a >>> req = compile(r'(?#<TAG tr td.width div.class="playlist_thumb" td.div.a.img.src=icon td[2].div[2].*=hist td..a.href=url>)', 0) >>> req.tags.keys() ['tr.td[2].div[2]', 'tr.div', 'tr.td', 'tr', 'tr.td.div.a.img', 'tr.td..a'] >>> req.varList [['tr.td.div.a.img.src', 'icon'], ['tr.td[2].div[2].*', 'hist'], ['tr.td..a.href', 'url']] >>> equis = compile('(?#<TAG ese a.*="http//esto/es/prueba"=icon href=url>)', 0) >>> equis.varList [['ese.a.*', 'icon'], ['ese.href', 'url']] >>> for tag in equis.tags: ... print tag, [(key, equis.tags[tag][key].pattern) for key in equis.tags[tag] if equis.tags[tag][key]] ... ese [] ese.a [('*', 'http//esto/es/prueba\\\\Z')] >>> equis = compile('(?#<TAG a href span.src=icon span.*=label div.id>)',0) >>> equis.tags {'a': {'href': ''}, 'a.div': {'id': ''}, 'a.span': {'src': '', '*': ''}} >>> equis = compile('(?#<TAG a href span{src=icon *=label} div.id>)',0) >>> equis.tags {'a': {'href': ''}, 'a.div': {'id': ''}, 'a.span': {'src': '', '*': ''}} >>> equis = compile('(?#<TAG a href span{src=icon *=label div.id>}',0) Traceback (most recent call last): File "C:\Python27\lib\doctest.py", line 1315, in __run compileflags, 1) in test.globs File "<doctest __main__.compile[10]>", line 1, in <module> equis = compile('(?#<TAG a href span(src=icon *=label div.id>)',0) File "C:\\Users\\Alex Montes Barrios\\git\\addonDevelopment\\xbmc addon development\\src\\xbmcUI\\CustomRegEx.py", line 534, in compile raise re.error(v) error: unbalanced parenthesis """ def storeAttrVarPair(varName, attrName): if attrName in attrSet: v = 'reassigment of attribute '+ attrName + ' to var ' + varName + '; was var ' + attrSet[attrName] raise re.error(v) if varName in varSet: v = 'redefinition of group name '+ varName + ' as group ' + str(len(varList) + 1) + '; was group ' + str(varSet[varName]) raise re.error(v) varList.append([attrName, varName]) attrSet[attrName] = varName varSet[varName] = len(varList) pass # match = re.search('\(\?#<(?P<tagpattern>[a-zA-Z][a-zA-Z\d]*)(?P<vars>[^>]*>)\)',regexPattern) match = re.search('\(\?#<(?P<tagpattern>[a-zA-Z]\S*|__TAG__)(?P<vars>[^>]*>)\)',regexPattern) if not match: return re.compile(regexPattern, flags) ATTR = r'(?P<ATTR>(?<=[{ ])\(*[a-zA-Z\d\*\.\[\]_-]+\)*(?==))' REQATTR = r'(?P<REQATTR>(?<=[{ ])\(*[a-zA-Z\d\*\.\[\]_-]+\)*(?=[ >\)]))' VAR = r'(?P<VAR>(?<==)[a-zA-Z][a-zA-Z\d]*(?=[ >}]))' STRPVAR = r'(?P<STRPVAR>(?<==)&[a-zA-Z][a-zA-Z\d]*&(?=[ >}]))' PARAM = r'(?P<PARAM>(?<==)[\'\"][^\'\"]+[\'\"](?=[ >=]))' TAGSUFFIX = r'(?P<TAGSUFFIX>(?<=[ {])[a-zA-Z\d\*\.\[\]]+(?={))' OPENP = r'(?P<OPENP>{)' CLOSEP = r'(?P<CLOSEP>})' EQ = r'(?P<EQ>=)' WS = r'(?P<WS>\s+)' END = r'(?P<END>>)' tagPattern = match.group('tagpattern') if tagPattern == '__TAG__': tagPattern = '[a-zA-Z][^\s>]*' rootTag = "tagpholder" rootTagStck = [rootTag] master_pat = re.compile('|'.join([TAGSUFFIX, ATTR, REQATTR, VAR, STRPVAR, PARAM, OPENP, CLOSEP, EQ, WS, END])) scanner = master_pat.scanner(match.group('vars')) totLen = 0 varSet={} attrSet = {} tags = {rootTag:{}} varList = [] for m in iter(scanner.match, None): sGroup = m.group() totLen += len(sGroup) # print m.lastgroup, m.group() if m.lastgroup in ["ATTR", "REQATTR"]: if sGroup[0] == "(" and sGroup[-1] == ")": sGroup = sGroup[1:-1] if sGroup[0] == '.' and '.' not in sGroup[1:]: v = 'Included tags not allowed as variables' raise re.error(v) varName = "group" + str(1 + len(varList)) attrName = rootTag + ATTRSEP + sGroup storeAttrVarPair(varName, attrName) pass elif (sGroup[0] == "(" and sGroup[-1] != ")") or (sGroup[0] != "(" and sGroup[-1] == ")"): v = 'unbalanced parenthesis' raise re.error(v) pathKey, sep, attrKey = sGroup.rpartition(ATTRSEP) if not pathKey and sep: pathKey = rootTag + 2*ATTRSEP + attrKey attrKey = '' tags.setdefault(pathKey, {}) continue pathKey = rootTag + sep + pathKey pathKey = re.sub(r'[.](\d+)(?=[.]*)', r'[\1]', pathKey).replace('[1]', '') tags.setdefault(pathKey, {}) sGroup = "" if m.lastgroup in ["ATTR", "WS", "EQ", 'END']: continue if m.lastgroup == "TAGSUFFIX": pathKey = sGroup continue if m.lastgroup == "OPENP": rootTagStck.append(pathKey) rootTag = '.'.join(rootTagStck) continue if m.lastgroup == "CLOSEP": rootTagStck.pop() rootTag = '.'.join(rootTagStck) continue if m.lastgroup in ["VAR", "STRPVAR"]: varName = sGroup if m.lastgroup == "VAR" else sGroup[1:-1] attrName = pathKey + ATTRSEP + attrKey storeAttrVarPair(varName, attrName) sGroup = "" if m.lastgroup == "VAR" else " \s*([ \S]*?)\s* " tagDict = tags[pathKey] if not attrKey: v = 'Included tags not allowed as variables' raise re.error(v) if attrKey in tagDict and m.lastgroup != "VAR": v = 'reasociation of attribute '+ pathKey + ATTRSEP + attrKey raise re.error(v) tagDict.setdefault(attrKey, '') if sGroup: tagDict[attrKey] = re.compile(sGroup[1:-1] + r'\Z') if totLen == len(match.group('vars')) and len(rootTagStck) > 1: v = 'unbalanced parenthesis' raise re.error(v) if totLen != len(match.group('vars')): v = 'unable to process pattern from: ' + match.group('vars')[totLen:] raise re.error(v) if 'tagpholder..*' in tags: if not varList: tags.pop('tagpholder..*') varList.append(['tagpholder..*', 'group']) else: v = 'With required Tag ".*", var are not allowed' raise re.error(v) # print '****' # print regexPattern # print tags # print varList # for tag in tags: # if not any(tags[tag].values()):continue # print tag, dict((key, value.pattern) for key, value in tags[tag].items() if value) return ExtRegexObject(regexPattern, flags, tagPattern, tags, varList)
def ExtCompile(regexPattern, flags = 0): def skipCharsInSet(strvar, aSet, npos = 0, peek = False): m = cmp_patt__in[aSet].match(strvar,npos) res = m.group() if m else '' return (res, npos + len(res)) if not peek else res def getCharsNotInSet(strvar, aSet, npos = 0, peek = False): m = cmp_patt_not[aSet].match(strvar,npos) res = m.group() if m else '' return (res, npos + len(res)) if not peek else res match = re.search('\(\?#<\(*(?P<tagpattern>[a-zA-Z]\S*|__TAG__)\)*(?P<vars>[^>]*>)\)',regexPattern) if not match: return re.compile(regexPattern, flags) WHITESPACE = ' \t\n\r\f\v' ATTRSEP = '.' PATH_MOD = '{' PATH_RES = '}' END_PAT = '>' PARAM_DEL = '\'"' EQ = '=' ATTR_RDEL = PATH_MOD + EQ + END_PAT + WHITESPACE ATTR_LDEL = PARAM_DEL + WHITESPACE TAG_PATT = END_PAT + WHITESPACE ATTR_PATT = PATH_RES + ATTR_RDEL PARAM_PATT = PARAM_DEL VAR_PATT= EQ + PATH_RES + END_PAT + ATTR_LDEL cmp = re.compile cmp_patt_not = dict([('TAG_PATT', cmp('[^' + TAG_PATT + ']+')), ('ATTR_PATT', cmp('[^' + ATTR_PATT + ']+')), ('PARAM_PATT', cmp('[^' + PARAM_PATT + ']+')), ('VAR_PATT', cmp('[^' + VAR_PATT + ']+'))]) cmp_patt__in = dict([('PATH_RES', cmp('[' + PATH_RES + ']+')), ('ATTR_LDEL', cmp('[' + ATTR_LDEL + ']+')), ('ATTR_RDEL', cmp('[' + ATTR_RDEL + ']+')), ('PARAM_DEL', cmp('[' + PARAM_DEL + ']+'))]) rootTag = "tagpholder" rootTagStck = [rootTag] pattern = regexPattern.strip('(?#)') npos = 1 pmax = len(pattern) - 1 tags = {rootTag:{}} varList = [] TAG, npos = getCharsNotInSet(pattern, 'TAG_PATT', npos) if TAG[0] == '(' and TAG[-1] == ')': tagPattern = TAG[1:-1] tags[rootTag]['__TAG__'] = '' varList.append([rootTag + ATTRSEP + '__TAG__', '__TAG__']) else: tagPattern = TAG if tagPattern == '__TAG__': tagPattern = '[a-zA-Z][^\s>]*' ATTR = PARAM = VAR = '' while 1: sep, npos = skipCharsInSet(pattern, 'PATH_RES', npos) while sep: if len(rootTagStck) == 1: v = 'Closing curly brace without corresponding opening curly brace' raise re.error(v) rootTagStck.pop() rootTag = '.'.join(rootTagStck) sep = sep[:-1] sep, npos = skipCharsInSet(pattern, 'ATTR_LDEL', npos) if npos >= pmax: break ATTR, npos = getCharsNotInSet(pattern, 'ATTR_PATT', npos) PARAM = VAR = '' sep, npos = skipCharsInSet(pattern, 'ATTR_RDEL', npos) if sep == PATH_MOD: rootTagStck.append(ATTR) rootTag = ATTRSEP.join(rootTagStck) continue if sep[0] in PATH_MOD + EQ and len(sep) > 1: v = 'the pattern "%s" is not allowed ' % sep raise re.error(v) attrName = ATTR.rstrip(')').lstrip('(') if attrName != ATTR: if len(attrName) - len(ATTR) == 1: v = 'unbalanced parenthesis' raise re.error(v) if attrName[0] == ATTRSEP and ATTRSEP not in attrName[1:]: v = 'Required tags not allowed as variables' raise re.error(v) VAR = "group" + str(1 + len(varList)) pathKey, psep, attrKey = attrName.rpartition(ATTRSEP) if not pathKey and psep: pathKey = rootTag + 2*ATTRSEP + attrKey attrKey = '' else: pathKey = rootTag + psep + pathKey pathKey = re.sub(r'[.](\d+)(?=[.]*)', r'[\1]', pathKey).replace('[1]', '') tags.setdefault(pathKey, {}) if attrKey: tagDict = tags[pathKey] tagDict.setdefault(attrKey, '') if VAR: varName = VAR attrName = pathKey + ATTRSEP + attrKey varList.append([attrName, varName]) if sep == "=": nt = n1 = n2 = 0 while 1: nt += 1 if nt == 3: v = 'Triple asignation not allowed' raise re.error(v) sep, npos = skipCharsInSet(pattern, 'PARAM_DEL', npos) if sep: n1 += 1 if n1 == 2: v = 'Double attr value not allowed' raise re.error(v) posfin = npos attr = sep while 1: attrInc, posfin = getCharsNotInSet(pattern, 'PARAM_PATT', posfin) if attrInc[-1] in '=>': break attr += attrInc sep, posfin = skipCharsInSet(pattern, 'PARAM_DEL', posfin) attr += sep PARAM = attr[1:-1] if attrKey != '__EZONE__': tagDict[attrKey] = re.compile(PARAM + r'\Z', re.DOTALL) else: tagDict[attrKey] = PARAM if len(PARAM): npos += len(PARAM) + 1 if pattern[npos] != '=': break npos += 1 continue if not sep: n2 += 1 if n2 == 2: v = 'Double variable asignation to the same attribute value not allowed' raise re.error(v) VAR, npos = getCharsNotInSet(pattern, 'VAR_PATT', npos) if not attrKey: v = 'Store tags as variables is not allowes' raise re.error(v) attrName = pathKey + ATTRSEP + attrKey varName = VAR.strip('&') if attrName in map(operator.itemgetter(0), varList): v = 'reassigment of attribute '+ attrName + ' to var ' + varName raise re.error(v) if varName in map(operator.itemgetter(1), varList): v = 'redefinition of group name '+ varName + ' as group ' + str(len(varList) + 1) raise re.error(v) varList.append([attrName, varName]) if VAR != varName: tagDict[attrKey] = re.compile("\s*([ \S]*?)\s*" + r'\Z') if pattern[npos] != '=': break npos += 1 if len(rootTagStck) > 1: v = 'Unbalanced curly braces' raise re.error(v) if 'tagpholder..*' in tags: if not varList: tags.pop('tagpholder..*') varList.append(['tagpholder..*', 'text_tags']) else: v = 'With required Tag ".*", var are not allowed' raise re.error(v) stags = tags[rootTag].get('__EZONE__','[!--|script]') if tags[rootTag].has_key('__EZONE__'): tags[rootTag].pop('__EZONE__') incFlag = not stags.startswith('^[') stags = stags.strip("[^]").split('|') if stags else [] ezones = [] for stag in stags: if stag == '!--': stag = '<' + stag etag = '-->' else: etag = '</' + stag + '>' stag = '<' + stag ezones.append([stag, etag, 0, 0]) return ExtRegexObject(regexPattern, flags, tagPattern, tags, varList, (ezones, incFlag))
def main(): """ Reads a line from stdin and expect it to be a wellformed Babeltrace CLI output containing containing a callstack context of the domain passed as argument. """ expected_callstack = set() recorded_callstack = set() cs_type=None if len(sys.argv) <= 2: print(sys.argv) raise ValueError('USAGE: ./{} (--kernel|--user EXE) FUNC-NAMES'.format(sys.argv[0])) # If the `--user` option is passed, save the next argument as the path # to the executable argc=1 executable=None if sys.argv[argc] in '--kernel': rexp = kernel_cs_rexp cs_type='kernel' elif sys.argv[argc] in '--user': rexp = user_cs_rexp cs_type='user' argc+=1 executable = sys.argv[argc] else: raise Exception('Unknown domain') argc+=1 # Extract the function names that are expected to be found call stack of # the current events for func in sys.argv[argc:]: expected_callstack.add(func) # Read the tested line for STDIN event_line = None for line in sys.stdin: event_line = line break # Extract the userspace callstack context of the event m = re.match(rexp, event_line) # If there is no match, exit with error if m is None: raise re.error('Callstack not found in event line') else: raw_callstack = str(m.group(1)) if cs_type in 'user': recorded_callstack=extract_user_func_names(executable, raw_callstack) elif cs_type in 'kernel': recorded_callstack=extract_kernel_func_names(raw_callstack) else: raise Exception('Unknown domain') # Verify that all expected function are present in the callstack for e in expected_callstack: if e not in recorded_callstack: raise Exception('Expected function name not found in recorded callstack') sys.exit(0)
def re_compile(pattern, name): try: return re.compile(pattern, re.UNICODE|flags) except re.error as err: raise re.error('bad %s pattern "%s": %s' % (name, pattern, err))