def get_info(Term, Subject): url = "https://ssbp.mycampus.ca/prod/bwckschd.p_get_crse_unsec?TRM=U&term_in=" + Term + "&sel_subj=dummy&sel_day=dummy&sel_schd=dummy&sel_insm=dummy&sel_camp=dummy&sel_levl=dummy&sel_sess=dummy&sel_instr=dummy&sel_ptrm=dummy&sel_attr=dummy&sel_subj=" + Subject + "&sel_crse=&sel_title=&sel_from_cred=&sel_to_cred=&sel_camp=UON&begin_hh=0&begin_mi=0&begin_ap=a&end_hh=0&end_mi=0&end_ap=a" htmltext = urllib.urlopen(url).read(); regex = '<TH CLASS="ddheader" scope="col" >(.+?)<BR><BR></TH>' pattern = re.compile(regex) courses = re.split(pattern, htmltext) re.purge() for course in courses: regex = '<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) \(<ABBR title= "Primary">P</ABBR>\)</TD>' regex2 = '<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault"><ABBR title = "To Be Announced">(.+?)</ABBR></TD>' regex3 = '<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault"><ABBR title = "To Be Announced">(.+?)</ABBR></TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault"><ABBR title = "To Be Announced">(.+?)</ABBR></TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault"><ABBR title = "To Be Announced">(.+?)</ABBR></TD>' regex4 = '<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) \(<ABBR title= "Primary">P</ABBR>\)(.+)?</TD>' pattern = re.compile(regex) pattern2 = re.compile(regex2) pattern3 = re.compile(regex3) pattern4 = re.compile(regex4) entries = re.findall(pattern3, course) #this pattern is for courses that do not have a start time or class assigned if entries: print entries else: entries = re.findall(pattern2, course) #this pattern is for instructor TBA if entries: print entries else: entries = re.findall(pattern, course) #this pattern is for default structure of courses if entries: print entries else: entries = re.findall(pattern4, course) #this pattern returns two values for instructor print entries
def main(): times = {} html = urllib2.urlopen('http://example.webscraping.com/places/default/view/United-Kingdom-239').read() NUM_ITERATIONS = 1000 # number of times to test each scraper for name, scraper in ('Regular expressions', regex_scraper), ('Beautiful Soup', beautiful_soup_scraper), ('Lxml', lxml_scraper): times[name] = [] # record start time of scrape start = time.time() for i in range(NUM_ITERATIONS): if scraper == regex_scraper: # the regular expression module will cache results # so need to purge this cache for meaningful timings re.purge() result = scraper(html) # check scraped result is as expected assert(result['area'] == '244,820 square kilometres') times[name].append(time.time() - start) # record end time of scrape and output the total end = time.time() print('{}: {:.2f} seconds'.format(name, end - start)) writer = csv.writer(open('times.csv', 'w')) header = sorted(times.keys()) writer.writerow(header) for row in zip(*[times[scraper] for scraper in header]): writer.writerow(row)
def finditer(content, encodings, charset, min_size): '''Generator function that iterates over all string matches inside the given content which are at least min_size characters long. @param content Binary content to search in @param encodings Dictionary of encoding functions @param charset An interable object containing the characters to consider as part of a string @param min_size Minimal string size to consider as a string match @return A tuple containing the match offset in content, encoding name, encoding key and the deobfuscated string reconstructed from the blob found ''' # iterate over available encoding fucntions for encoding_name, (encoding_function, encoding_range) in encodings.items(): # iterate over all keys in range for that encoding function for key in encoding_range: encoded_charset = encoding_function(charset, key) pattern = '[%s]{%d,}' % (re.escape(encoded_charset), min_size) for match in re.finditer(pattern, content): # deobfuscation: reconstruct the original string deobf = ''.join(charset[encoded_charset.index(c)] for c in match.group(0)) yield (match.start(0), encoding_name, key, deobf) # cleanup regex cache once in a while re.purge()
def clear_cache(self): try: re.purge() dircache.reset() tiedobj.reset() except Exception, err: sys.stderr.write('Crond.clear_cache(): %s\n' % err)
def dash_R_cleanup(fs, ps, pic): import gc, copy_reg import _strptime, linecache, dircache import urlparse, urllib, urllib2, mimetypes, doctest import struct, filecmp from distutils.dir_util import _path_created # Restore some original values. warnings.filters[:] = fs copy_reg.dispatch_table.clear() copy_reg.dispatch_table.update(ps) sys.path_importer_cache.clear() sys.path_importer_cache.update(pic) # Clear assorted module caches. _path_created.clear() re.purge() _strptime._regex_cache.clear() urlparse.clear_cache() urllib.urlcleanup() urllib2.install_opener(None) dircache.reset() linecache.clearcache() mimetypes._default_mime_types() struct._cache.clear() filecmp._cache.clear() doctest.master = None # Collect cyclic trash. gc.collect()
def color ( adjoining_words_i, data, balises ): """Colorie les groupes de mots contigus dans une page web""" n = len(adjoining_words_i) + 1 # on commence par les groupes les plus longs for i in xrange( n, 1, -1 ): # pour chaque groupe de mots for j in adjoining_words_i[i]: text = u'(\A|\W)(%s)(\W|\Z)'%( string.join([j[0][k] for k in range(0,i)] ,'(?:(?:</span>\W?)|\W)') ) pattern1 = re.compile(text, re.I|re.U|re.S) replace = u'\g<1><span class="%s" style="color:blue; background-color:grey;">\g<2></span>\g<3>'%(string.join(j[0],"")) data = pattern1.sub(replace, data) re.purge() # recherche les emplacements sauvegardés des balises data_color = u'' flag3 = re.compile( u'#([0-9]+?)#', re.I|re.U|re.S ) m = flag3.finditer( data ) k = 0 # remet les balises dans la chaine for j in m: data_color += data[k:j.start()] + balises[j.group(1)] k = j.end() data_color += data[k:] return data_color
def retrieve_devpaths(): pipe = Popen('si projectinfo --devpaths --noacl --noattributes --noshowCheckpointDescription --noassociatedIssues --project="%s"' % sys.argv[1], shell=True, bufsize=1024, stdout=PIPE) devpaths = pipe.stdout.read() devpaths = devpaths [1:] devpaths_re = re.compile(' (.+) \(([0-9][\.0-9]+)\)\n') devpath_col = devpaths_re.findall(devpaths) re.purge() devpath_col.sort(key=lambda x: map(int, x[1].split('.'))) #order development paths by version return devpath_col
def get_skips(self, line): skip_points = [] for r in self.skip_rules: pattern = '('+r[0]+')('+r[1]+')' matchobjs = re.finditer(pattern, line) for i in matchobjs: skip_points.append(i.end() ) re.purge() return skip_points
def get_breaks(self, line): break_points = [] for r in self.break_rules: pattern = '('+r[0]+')('+r[1]+')' matchobjs = re.finditer(pattern, line) for i in matchobjs: break_points.append(i.end() ) re.purge() return break_points
def purge(): """re.purge: Purge internal regular expressions cache.""" def _cache_empty(): return not getattr(re, '_cache') re.match('', '') cache_created = not _cache_empty() re.purge() return cache_created and _cache_empty() and "empty cache"
def test_regex_equality_nocache(self): pattern = r'^(?:[a-z0-9\.\-]*)://' left = RegexValidator(pattern) re.purge() right = RegexValidator(pattern) self.assertEqual( left, right, )
def check(self, pattern): self.model.clear() if not pattern: return False try: re.compile(pattern, self.insertFlags()) re.purge() return True except re.error as rerr: self.model.showError(str(rerr)) return False
def getRegexpFeatures(dct, number_of_words_per_type, number_of_words, select = None): it = list() for (mt, sen) in dct.iteritems(): it.append((len(sen), mt, sen)) it.sort(reverse=False) itt = list() for (l, mt, sen) in it: random.shuffle(sen) itt.append((l, mt, sen[0:1000])) regexps = dict() ret = list() types = list() for (_, meme, _sentences) in itt: types.extend([meme for _ in _sentences]) types = [types] #glob = regExpChooser() #glob.add_types(types) for (_, meme_type, sentences) in it: if select != None and meme_type != select: continue regexps[meme_type] = cluster(sentences, meme_type) N = len(regexps[meme_type]) n = 0 start = time.time() loc = regExpChooser() loc.add_types(replaceNotEqual(types, meme_type)) for regexp in regexps[meme_type]: re.purge() n += 1 sys.stdout.write( "\r[{0}] {1}/{2} RE in {3} s. ({4})".format( meme_type, n, N, round(time.time() - start), regexp )) sys.stdout.flush() compiled = re.compile(regexp) search_result = list() for (_, meme, _sentences) in itt: for sent in _sentences: search_result.append( 1 if compiled.search(sent.lower()) != None else 0) loc.add_regexp(regexp, search_result) #glob.add_regexp(regexp, search_result) selection = loc.getBest(number_of_words_per_type) ret.extend(selection) print("\r[{0}] Regular expressions selected in {1} seconds. (best: {2})".format( meme_type, time.time() - start, selection[0]) ) #ret.extend(glob.getBest(number_of_words)) return ret
def markdownify_content(self): self.content = re.sub(r'({{% question) "(.*)"(\s*%}})',r'### \2', self.content) re.purge() self.content = re.sub(r'{{< relref "(\w*)\.md[#\w\-éèà]*"\s*>}}\s*',r'\1', self.content) re.purge() self.content = re.sub(r'{{% (\w*) "(.*)" *%}}([\s\S]*?){{% \/\1 %}}',r'*\2*\3', self.content) re.purge() self.content = re.sub(r'\* Exemple : <.*\)',r'', self.content) re.purge() self.content = re.sub(r'(#+)\s',r'\1# ', self.content) re.purge()
def bench_regex_compile(loops, regexes): range_it = xrange(loops) t0 = perf.perf_counter() for _ in range_it: for regex, flags in regexes: re.purge() # ignore result (compiled regex) re.compile(regex, flags) return perf.perf_counter() - t0
def remove_links(s, replace_by): #quita url www.algo.com/djj re.purge() temp = re.compile(r"\s*www\.\. \w+\.(com|net|me|org)?(\s|/*[-\w+&@#/%!?=~_:.\[\]()0-9]*)") s = temp.sub(replace_by, s) #quita http:// temp = re.compile(r"(((http|ftp|https)://\. |(http|ftp|https)://\.)[-/\w.]*)") s = temp.sub(replace_by, s) temp = re.compile(r"\w+/\w") s = temp.sub(replace_by, s) return s
def test_regex_compile(count, timer): regexes = capture_regexes() times = [] for _ in xrange(count): t0 = timer() for regex, flags in regexes: re.purge() re.compile(regex, flags) t1 = timer() times.append(t1 - t0) return times
def process(self, context, collection): ''' Process collection, send names to rename and shared sort. ''' # compare compare = [] # clean clean = [] # clean duplicates for name in collection: # remove duplicates if name[3][0] not in compare: # append compare.append(name[3][0]) clean.append(name) # done with collection collection.clear() # name for i, name in enumerate(clean): rename(self, context, name, i) # randomize names (prevents conflicts) for name in clean: # randomize name name[3][0].name = str(random()) # is shared sort or shared count if context.window_manager.BatchShared.sort or context.window_manager.BatchShared.count: # sort shared.main(self, context, clean, context.window_manager.BatchShared) # isnt shared sort or shared count else: # apply names for name in clean: name[3][0].name = name[1] # count if name[1] != name[2]: self.count += 1 # purge re re.purge()
def dash_R_cleanup(fs, ps, pic, zdc, abcs): import gc, copy_reg import _strptime, linecache dircache = test_support.import_module('dircache', deprecated=True) import urlparse, urllib, urllib2, mimetypes, doctest import struct, filecmp from distutils.dir_util import _path_created # Clear the warnings registry, so they can be displayed again for mod in sys.modules.values(): if hasattr(mod, '__warningregistry__'): del mod.__warningregistry__ # Restore some original values. warnings.filters[:] = fs copy_reg.dispatch_table.clear() copy_reg.dispatch_table.update(ps) sys.path_importer_cache.clear() sys.path_importer_cache.update(pic) try: import zipimport except ImportError: pass # Run unmodified on platforms without zipimport support else: zipimport._zip_directory_cache.clear() zipimport._zip_directory_cache.update(zdc) # clear type cache sys._clear_type_cache() # Clear ABC registries, restoring previously saved ABC registries. for abc, registry in abcs.items(): abc._abc_registry = registry.copy() abc._abc_cache.clear() abc._abc_negative_cache.clear() # Clear assorted module caches. _path_created.clear() re.purge() _strptime._regex_cache.clear() urlparse.clear_cache() urllib.urlcleanup() urllib2.install_opener(None) dircache.reset() linecache.clearcache() mimetypes._default_mime_types() filecmp._cache.clear() struct._clearcache() doctest.master = None # Collect cyclic trash. gc.collect()
def dash_R_cleanup(fs, ps, pic, abcs): import gc, copy_reg import _strptime, linecache dircache = test_support.import_module('dircache', deprecated=True) import urlparse, urllib, urllib2, mimetypes, doctest import struct, filecmp from distutils.dir_util import _path_created # Clear the warnings registry, so they can be displayed again for mod in sys.modules.values(): if hasattr(mod, '__warningregistry__'): del mod.__warningregistry__ # Restore some original values. warnings.filters[:] = fs copy_reg.dispatch_table.clear() copy_reg.dispatch_table.update(ps) sys.path_importer_cache.clear() sys.path_importer_cache.update(pic) # clear type cache sys._clear_type_cache() # Clear ABC registries, restoring previously saved ABC registries. for abc, registry in abcs.items(): abc._abc_registry = registry.copy() abc._abc_cache.clear() abc._abc_negative_cache.clear() # Clear assorted module caches. _path_created.clear() re.purge() _strptime._regex_cache.clear() urlparse.clear_cache() urllib.urlcleanup() urllib2.install_opener(None) dircache.reset() linecache.clearcache() mimetypes._default_mime_types() filecmp._cache.clear() struct._clearcache() doctest.master = None if _llvm: code_types = (types.CodeType, types.FunctionType, types.MethodType) for obj in gc.get_objects(): if isinstance(obj, code_types): _llvm.clear_feedback(obj) # Collect cyclic trash. gc.collect()
def wrap_pieces_in_text(text, ordered_cont_pieces): text_length = len(text) text = text #if text: #print('wp text in ok') try: try: re.purge() opener_segment = get_segment(text, ordered_cont_pieces[0][0], ordered_cont_pieces[0][1]) except Exception as e: #print('wp_openseg error') raise e try: re.purge() closer_segment = get_segment(text, ordered_cont_pieces[-1][0], ordered_cont_pieces[-1][1]) except Exception as e: #print('wp_closeg error') raise e # Maybe some more checking in case there's some shit at the top/bottom? -- i.e. check # by length or content? try: if ordered_cont_pieces[-1][1] > text_length * 0.7 and '<salute>' in closer_segment: text = re.sub(closer_segment, fix_closer_wraps(closer_segment), text) except Exception as e: #print('wp closersub failed') raise e try: text = re.sub(opener_segment, fix_opener_wraps(opener_segment), text) except Exception as e: #print('wp openersub failed') print(e) #print('wp_ index error not triggered') except IndexError: # presumably from fail if there is only one segment identified #print('wp_index error') opener_segment = get_segment(text, cont_pieces[0][0], cont_pieces[0][1]) text = re.sub(opener_segment, fix_opener_wraps(opener_segment), text) except Exception as e: #print('wp_general exception', e) raise e # Remove all remaining temps text = re.sub(r'<TEMP>','',text) text = re.sub(r'</TEMP>','',text) #print(text) return text
def findPrice (product, logger, host): product_url = "/dp/" + product.id conn = http.client.HTTPConnection(host) conn.request("GET", product_url) r1 = conn.getresponse() dataRep = r1.read().decode("UTF-8") conn.close() shortRep = re.findall("<span.*priceblock_.*/span>",dataRep)[-1] shortRep = parsePrice(shortRep) re.purge() price = float(shortRep) logger.info(str(product) + " :: " + shortRep) if(product.setPrice(price)): logger.info("New price on product " + str(product) + " at " + str(product.price) + " Link : http://" + host + "/dp/" + str(product.id))
def _replaceBrackets(self, string): """ Resolves property variable within a string into a string :param string: :return: """ m = re.findall(self.regEx, str(string)) if m: for key in m: value = self.getItem(key, self.rawParameters) if re.findall(self.regEx, str(value)): value = self._replaceBrackets(value) string = string.replace('[' + key + ']', value) re.purge() gc.collect() return string
def format(self, data, format, filter): """ Função para tratar o campo o texto coletado da aranha :param format: O formato esperado. :param filter: O formato de saida. """ if type(filter) == int: filter = "\\" + str(filter) _result = re.subn(format, filter, data) _data = _result[0] if _result[1] >= 1 else "" re.purge() return _data
def dash_R_cleanup(fs, ps, pic, abcs): import gc, copyreg import _strptime, linecache import urllib.parse, urllib.request, mimetypes, doctest import struct, filecmp, _abcoll from distutils.dir_util import _path_created from weakref import WeakSet # Clear the warnings registry, so they can be displayed again for mod in sys.modules.values(): if hasattr(mod, '__warningregistry__'): del mod.__warningregistry__ # Restore some original values. warnings.filters[:] = fs copyreg.dispatch_table.clear() copyreg.dispatch_table.update(ps) sys.path_importer_cache.clear() sys.path_importer_cache.update(pic) # clear type cache sys._clear_type_cache() # Clear ABC registries, restoring previously saved ABC registries. for abc in [getattr(_abcoll, a) for a in _abcoll.__all__]: if not isabstract(abc): continue for obj in abc.__subclasses__() + [abc]: obj._abc_registry = abcs.get(obj, WeakSet()).copy() obj._abc_cache.clear() obj._abc_negative_cache.clear() # Clear assorted module caches. _path_created.clear() re.purge() _strptime._regex_cache.clear() urllib.parse.clear_cache() urllib.request.urlcleanup() linecache.clearcache() mimetypes._default_mime_types() filecmp._cache.clear() struct._clearcache() doctest.master = None # Collect cyclic trash. gc.collect()
def trace_memory_clean_caches(self): """ Avoid polluting results with some builtin python caches """ urlparse.clear_cache() re.purge() linecache.clearcache() copy_reg.clear_extension_cache() if hasattr(fnmatch, "purge"): fnmatch.purge() # pylint: disable=no-member elif hasattr(fnmatch, "_purge"): fnmatch._purge() if hasattr(encodings, "_cache") and len(encodings._cache) > 0: encodings._cache = {} context.log.handler.flush()
def get_segment(text, s,e): try: regex = r'[\s\S]*' re.purge() pattern = re.compile(regex) #print(pattern) try: segment = pattern.search(text,s,e).group() return segment except Exception as e: #print('getseg regex err -', e) raise e except Exception as e: #print('getseg error', e) raise e
def set(cls, ssquo=None, esquo=None, sdquo=None, edquo=None, dir=None): """ Set the HTML entities (and indirectly, the Unicode glyphs) used to represent starting and ending single and double quotes, respectively, and language direction. """ if ssquo is not None: cls.SSQUO = ssquo if esquo is not None: cls.ESQUO = esquo if sdquo is not None: cls.SDQUO = sdquo if edquo is not None: cls.EDQUO = edquo if dir is not None: if cls.direction != dir: re.purge() cls.direction = dir cls.direction_explicit = True
def run(): responses.add(responses.GET, "http://example.com/zero") responses.add(responses.GET, "http://example.com/one") responses.add(responses.GET, "http://example.com/two") responses.add(responses.GET, re.compile(r"http://example\.com/three")) responses.add(responses.GET, re.compile(r"http://example\.com/four")) re.purge() responses.remove(responses.GET, "http://example.com/two") responses.remove(Response(method=responses.GET, url="http://example.com/zero")) responses.remove(responses.GET, re.compile(r"http://example\.com/four")) with pytest.raises(ConnectionError): requests.get("http://example.com/zero") requests.get("http://example.com/one") with pytest.raises(ConnectionError): requests.get("http://example.com/two") requests.get("http://example.com/three") with pytest.raises(ConnectionError): requests.get("http://example.com/four")
def addItem(self): """Add Items from Locate command.""" start_time = datetime.now().second self.stringlist.clear() lineText = self.lineEdit.text() if len(lineText) and str(lineText).strip() not in self.history: self.history.append(lineText + "\n") self.historyCurrentItem = 1 self.saveHistory() self.historyCurrentItem = self.historyCurrentItem - 1 command = "ionice --ignore --class 3 chrt --idle 0 " # Nice CPU / IO command += "locate --ignore-case --existing --quiet --limit 9999 {}" condition = str(self.applet.configurations.readEntry("Home")) == "true" if len(str(lineText).strip()) and condition: command_to_run = command.format( # Only Search inside Home folders path.join(path.expanduser("~"), "*{}*".format(lineText))) else: command_to_run = command.format(lineText) locate_output = Popen(command_to_run, shell=True, stdout=PIPE).stdout results = tuple(locate_output.readlines()) banned = self.applet.configurations.readEntry("Banned") banned_regex_pattern = str(banned).strip().lower().replace(" ", "|") for item in results: if not search(banned_regex_pattern, str(item)): # banned words self.stringlist.append(item[:-1]) purge() # Purge RegEX Cache self.model.setStringList(self.stringlist) self.treeview.nativeWidget().resizeColumnToContents(0) number_of_results = len(results) if number_of_results: # if tems found Focus on item list self.lineEdit.nativeWidget().clear() self.label.setText("Found {} results on {} seconds !".format( number_of_results, abs(datetime.now().second - start_time))) self.resize(500, 12 * number_of_results) self.treeview.nativeWidget().show() self.treeview.nativeWidget().setFocus() else: # if no items found Focus on LineEdit self.label.setText("Search") self.resize(self.minimumSize()) self.treeview.nativeWidget().hide() self.lineEdit.nativeWidget().selectAll() self.lineEdit.nativeWidget().setFocus()
for field in FIELDS: results[field] = soup.find('help') return results def lxml_scraper(html): tree = lxml.html.fromstring(html) results = {} for field in FIELDS: results[field] = tree.cssselect('help') return results times = {} html = '''<body>help</body>''' for name, scraper in ('Regular expressions', regex_scraper), ('Beautiful Soup', beautiful_soup_scraper), ('Lxml', lxml_scraper): times[name] = [] start = time.time() for i in range(1000): if scraper == regex_scraper: re.purge() #RE will use cache so we purge(clean) it. result = scraper(html) # check scraped result is as expected times[name].append( time.time() - start) # record end time of scrape and output the total end = time.time() print '{}: {:.2f} seconds'.format(name, end - start)
def purge(): """Purge caches.""" _purge_cache() _re.purge()
def regular(): data = "She is more than pretty. 520" # --- 正则 --- reg = r"mo" # 指定字符 => span=(7, 9), match='mo' reg = r"." # (.)单个字符 => span=(0, 1), match='S' reg = r"\." # (\)转义符 => span=(23, 24), match='.' reg = r"[.]" # ([])字符集合(注意:部分特殊字符失去特殊意义) => span=(23, 24), match='.' reg = r"[love]" # []内任意字符 => span=(2, 3), match='e' reg = r"[i-u]" # (-)范围 => span=(4, 5), match='i' reg = r"t{2}" # {}内为长度(3个6) => span=(20, 22), match='tt' reg = r"t{1,3}" # {M,} / {.N} / {N} => span=(12, 13), match='t' reg = r"(i|o|u){1}" # (())组 => span=(4, 5), match='i' reg = r"^S" # (^)开头 => span=(0, 1), match='S' reg = r"[^S]" # ([^])取反(不含H) => span=(1, 2), match='h' reg = r"520$" # ($)结尾 => span=(25, 28), match='520' reg = r"et*" # (*)匹配{0,}个表达式 => ['e', 'e', 'ett'] reg = r"et+" # (+)匹配{1,}个表达式 => ['ett'] reg = r"et?" # (?)匹配{0,1}个表达式 => ['e', 'e', 'et'] reg = r".+?e" # (?)非贪婪模式(span=(0, 20), match='She is more than pre' => span=(0, 3), match='She') reg = r"\145" # ascii标的8进制数(145=101=e) => span=(2, 3), match='e' reg = r"\d" # (\d)单个数字 => span=(25, 26), match='5' (推荐:[0-9]) reg = r"\D" # (\D)非数字 => span=(0, 1), match='S' (推荐:[^0-9]) reg = r"\s" # (\s)空白字符 => span=(3, 4), match=' ' (推荐:[\t\n\r\f\v]) reg = r"\S" # (\S)非空白字符 => span=(0, 1), match='S' (推荐:[^\t\n\r\f\v]) reg = r"\w" # (\w)单词 => span=(0, 1), match='S' (推荐:[a-zA-Z0-9_]) reg = r"\W" # (\W)非单词 => span=(3, 4), match=' ' (推荐:[^a-zA-Z0-9_]) reg = r"\AS" # (\A)开头 => span=(0, 1), match='S' reg = r"520\Z" # (\Z)结尾 => span=(25, 28), match='520' reg = r"y\b" # (\b)单词边界(Hello) => span=(22, 23), match='y' reg = r"o\B" # (\B)非单词边界(world) => span=(8, 9), match='o' reg = r"[01]\d\d|2[0-4]\d|25[0-5]" # 或(|) 多位数(匹配0 - 255 直接的数字) index = re.search(reg, data) # 查找单个匹配项 index = re.match(r"She", data) # 匹配开头 => span=(0, 3), match='She' index = re.fullmatch( r".+", data) # 匹配全部 => span=(0, 28), match='She is more than pretty. 520' lists = re.findall(reg, data) # 查找所有匹配项(列表) lists = re.split( r"o", data, maxsplit=1 ) # 根据正则分割字符串(maxsplit分割次数) => ['She is m', 're than pretty. 520'] strs = re.sub( r"\.", r"!", data, count=1) # 替换(count:替换次数)(匹配替换,未匹配原样) => She is more than pretty! 520 re.purge() # 清除正则表达式缓存 # --- 正则表达式对象 --- pat = re.compile(r"e") # 编译成正则对象 index = pat.search(data) # 查找单个匹配项 => span=(2, 3), match='e' index = pat.search(data, 5) # => span=(10, 11), match='e' index = pat.search(data, 1, 10) index = pat.match(data) # 匹配开头 => None index = pat.match(data, 2) # => span=(2, 3), match='e' index = pat.match(data, 1, 10) index = pat.fullmatch(data) # 匹配全部 => None index = pat.fullmatch(data, 2) # => None index = pat.fullmatch(data, 2, 3) # span=(2, 3), match='e' lists = pat.split( data, maxsplit=0) # 分割 => ['Sh', ' is mor', ' than pr', 'tty. 520'] lists = pat.findall(data) # 查找全部 => ['e', 'e', 'e'] lists = pat.findall(data, 5) # => ['e', 'e'] lists = pat.findall(data, 1, 10) # => ['e'] strs = pat.sub(r"o", data, count=0) # 替换 => Sho is moro than protty. 520 # --- Match --- match = index # span=(2, 3), match='e' strs = match.string # 被匹配的数据 => She is more than pretty. 520 strs = match.group() # 获取 match 数据 => e pos = match.pos # => 2 pos = match.endpos # => 3
def all(): """Translation of the entire DocFX project""" global processed global greenFlag global reqs global chars RepoCheck() while not processed: if greenFlag: for item in Path().iterdir(): if item.name != sourceDir and item.name in list(map(lambda x: '_'.join(x.split('-')).lower(), targetLangs)) and item.is_dir(): shutil.rmtree(item.name) processed = True if greenFlag: for path in targetPaths: os.mkdir(path) for entry in Path(sourceDir).iterdir(): if entry.is_dir(): dirLevel2 = sourceDir + '/' + entry.name if greenFlag: for path in targetPaths: os.mkdir(path + '/' + entry.name) for entry2 in Path(dirLevel2).iterdir(): if entry2.is_dir(): tgSeg = '/' + entry.name + '/' + entry2.name dirLevel3 = dirLevel2 + '/' + entry2.name if greenFlag: for path in targetPaths: os.mkdir(path + tgSeg) for entry3 in Path(dirLevel3).iterdir(): if entry3.is_dir(): tgSeg = '/' + entry.name + '/' + entry2.name + '/' + entry3.name dirLevel4 = dirLevel3 + '/' + entry3.name if greenFlag: for path in targetPaths: os.mkdir(path + tgSeg) for entry4 in Path(dirLevel4).iterdir(): if entry4.is_dir(): tgSeg = '/' + entry.name + '/' + entry2.name + '/' + entry3.name + '/' + entry4.name dirLevel5 = dirLevel4 + '/' + entry4.name if greenFlag: for path in targetPaths: os.mkdir(path + tgSeg) for entry5 in Path(dirLevel5).iterdir(): if not entry5.is_dir(): if greenFlag: ProcessFiles(dirLevel5 + '/' + entry5.name) else: stats.append(FileStats(dirLevel5 + '/' + entry5.name)) else: if greenFlag: ProcessFiles(dirLevel4 + '/' + entry4.name) else: stats.append(FileStats(dirLevel4 + '/' + entry4.name)) else: if greenFlag: ProcessFiles(dirLevel3 + '/' + entry3.name) else: stats.append(FileStats(dirLevel3 + '/' + entry3.name)) else: if greenFlag: ProcessFiles(dirLevel2 + '/' + entry2.name) else: stats.append(FileStats(dirLevel2 + '/' + entry2.name)) else: if greenFlag: ProcessFiles(sourceDir + '/' + entry.name) else: stats.append(FileStats(sourceDir + '/' + entry.name)) if not greenFlag: fls = list(filter(lambda x: x is not None, stats)) nFls = len(fls)*len(targetLangs) print('\n Target languages:\t\t\t' + ', '.join(targetLangs)) print(' Total of source language files:\t' + str(len(fls))) for i in range(len(fls)): reqs += fls[i][0] chars += fls[i][1] estimatedT = int(reqs * 1.3) print(' Total of source language characters:\t' + str(chars)) print(' Total of files to be generated:\t' + str(nFls)) print(' Total of calls to translation service:\t' + str(reqs)) print(' Total of characters for translation:\t' + str(chars * len(targetLangs))) print(' Estimated process duration:\t\t' + str(datetime.timedelta(seconds = estimatedT))) cont = input('\n Continue [c] or abort [Enter]? ') if cont == 'c': greenFlag = True else: break if greenFlag and not len(haltedTranslation): PrGreen('\n Completed successfully!') else: if greenFlag: PrYellow("The following files could neither be processed nor copied to target language directories:") for notTranslated in haltedTranslation: print(' ' + notTranslated) PrRed('\n Exiting...') time.sleep(1) re.purge() exit()
def basic_operation(): r""" # Special sequence. \number \A \b \B \d \D \s \S \w \W \Z # Standard escape. \a \b \f \n \N \r \t \u \U \v \x \\ # Flag. re.A, re.ASCII re.I, re.IGNORECASE re.L, re.LOCALE re.M, re.MULTILINE re.S, re.DOTALL re.U, re.UNICODE re.X, re.VERBOSE re.DEBUG re.search(pattern, string, flags=0) Scan through string looking for the first location where the regular expression pattern produces a match. re.match(pattern, string, flags=0) If zero or more characters at the beginning of string match the regular expression pattern. re.fullmatch(pattern, string, flags=0) If the whole string matches the regular expression pattern. re.split(pattern, string, maxsplit=0, flags=0) re.findall(pattern, string, flags=0) re.finditer(pattern, string, flags=0) re.sub(pattern, repl, string, count=0, flags=0) re.subn(pattern, repl, string, count=0, flags=0) re.escape(pattern) re.purge() """ #-------------------- # Search. # *, +, ?. # The '*', '+', and '?' qualifiers are all greedy; they match as much text as possible. # If the RE <.*> is matched against '<a> b <c>', it will match the entire string, and not just '<a>'. # *?, +?, ??. # Adding ? after the qualifier makes it perform the match in non-greedy or minimal fashion; as few characters as possible will be matched. # Using the RE <.*?> will match only '<a>' against '<a> b <c>'. re.search(r'''['"].*['"]''', '''ab'cd'ef'gh'ij"kl"mn'op"qr"st'uv"wx'yz'AB"CD''') # Result: '\'cd\'ef\'gh\'ij"kl"mn\'op"qr"st\'uv"wx\'yz\'AB"'. re.search(r'''['"].*?['"]''', '''ab'cd'ef'gh'ij"kl"mn'op"qr"st'uv"wx'yz'AB"CD''') # Result: "'cd'". # (...): Group. # (?P<name>...): Named group. # (?P=name): Backreference to a named group. re.search(r'''(?P<quote>['"]).*(?P=quote)''', '''ab'cd'ef'gh'ij"kl"mn'op"qr"st'uv"wx'yz'AB"CD''') # Result: '\'cd\'ef\'gh\'ij"kl"mn\'op"qr"st\'uv"wx\'yz\''. re.search(r'''(?P<quote>['"]).*?(?P=quote)''', '''ab'cd'ef'gh'ij"kl"mn'op"qr"st'uv"wx'yz'AB"CD''') # Result: "'cd'". re.search(r'''(?P<asterisk>\*).*?(?P=asterisk)|(?P<quote>['"]).*?(?P=quote)''', '''ab'cd'ef'gh'ij"kl"mn*op*qr'st"uv"wx'yz*AB*CD"EF'GH'IJ"KL*MN*OPQRSTUVWXYZ''') # Result: "'cd'". # (?=...): Lookahead assertion. # (?!...): Negative lookahead assertion. # (?<=...): Positive lookbehind assertion. # (?<!...): Negative lookbehind assertion. # (?!...): Negative lookahead assertion. re.search(r'(?!ABC)\w*', 'Aabcde') # Matched. re.search(r'(?!ABC)\w*', 'Babcde') # Matched. re.search(r'(?!ABC)\w*', 'Cabcde') # Matched. re.search(r'(?!ABC)\w*', 'ABabcde') # Matched. re.search(r'(?!ABC)\w*', 'BCabcde') # Matched. re.search(r'(?!ABC)\w*', 'ABCabcde') # Unmatched. #-------------------- # Match. # [^...]: Complementation of a set of characters. # The first character. re.match(r'[^A]\w*', 'abcde') # Matched. re.match(r'[^A]\w*', 'Babcde') # Matched. re.match(r'[^A]\w*', 'Aabcde') # Unmatched. re.match(r'[^ABC]\w*', 'abcde') # Matched. re.match(r'[^ABC]\w*', 'aAabcde') # Matched. re.match(r'[^ABC]\w*', 'Aabcde') # Unmatched. re.match(r'[^ABC]\w*', 'Babcde') # Unmatched. re.match(r'[^ABC]\w*', 'Cabcde') # Unmatched. # The second character. re.match(r'\w[^A]\w*', 'abcde') # Matched. re.match(r'\w[^A]\w*', 'aBabcde') # Matched. re.match(r'\w[^A]\w*', 'aAabcde') # Unmatched. # The first and second characters. re.match(r'[^A][^B]\w*', 'abcde') # Matched. re.match(r'[^A][^B]\w*', 'Babcde') # Matched. re.match(r'[^A][^B]\w*', 'Aabcde') # Unmatched. re.match(r'[^A][^B]\w*', 'aBabcde') # Unmatched. #-------------------- # Split. re.split(r'\W+', 'Words, words, words.') re.split(r'(\W+)', 'Words, words, words.') re.split(r'\W+', 'Words, words, words.', 1) re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE) re.split(r'(\W+)', '...words, words...') #-------------------- # Find. re.findall(r'''['"].*?['"]''', '''ab'cd'ef'gh'ij"kl"mn'op"qr"st'uv"wx'yz'AB"CD''') # Result: ["'cd'", "'gh'", '"kl"', '\'op"', '"st\'', '"wx\'', '\'AB"']. re.findall(r'''(?P<quote>['"]).*?(?P=quote)''', '''ab'cd'ef'gh'ij"kl"mn'op"qr"st'uv"wx'yz'AB"CD''') # Result: ["'", "'", '"', "'", '"']. re.findall(r'''['"].*?['"]|\*.*?\*''', '''ab'cd'ef'gh'ij"kl"mn*op*qr'st"uv"wx'yz*AB*CD"EF'GH'IJ"KL*MN*OPQRSTUVWXYZ''') # Result: ["'cd'", "'gh'", '"kl"', '*op*', '\'st"', '"wx\'', '*AB*', '"EF\'', '\'IJ"', '*MN*']. re.findall(r'''(?P<quote>['"]).*?(?P=quote)|(?P<asterisk>\*).*?(?P=asterisk)''', '''ab'cd'ef'gh'ij"kl"mn*op*qr'st"uv"wx'yz*AB*CD"EF'GH'IJ"KL*MN*OPQRSTUVWXYZ''') # Result: [("'", ''), ("'", ''), ('"', ''), ('', '*'), ("'", ''), ('', '*'), ('"', ''), ('', '*')]. re.findall(r'''(?P<asterisk>\*).*?(?P=asterisk)|(?P<quote>['"]).*?(?P=quote)''', '''ab'cd'ef'gh'ij"kl"mn*op*qr'st"uv"wx'yz*AB*CD"EF'GH'IJ"KL*MN*OPQRSTUVWXYZ''') # Result: [('', "'"), ('', "'"), ('', '"'), ('*', ''), ('', "'"), ('*', ''), ('', '"'), ('*', '')]. #-------------------- # Substitute. def dash_repl(match): if match.group(0) == '-': return ' ' # The entire match. else: return '-' re.sub('-{1,2}', '-', 'pro----gram-files') # Result: "pro--gram-files". re.sub('-{1,2}', dash_repl, 'pro----gram-files') # Result: "pro--gram files". re.sub(r'\sAND\s', ' & ', 'Baked Beans And Spam', flags=re.IGNORECASE) # Result: "Baked Beans & Spam". re.subn('-{1,2}', dash_repl, 'pro----gram-files') # Result: "('pro--gram files', 3)". re.subn(r'\sAND\s', ' & ', 'Baked Beans And Spam', flags=re.IGNORECASE) # Result: "('Baked Beans & Spam', 1)". #-------------------- re.escape('http://www.python.org') # Result: "http://www\\.python\\.org". re.purge() # Clear the regular expression cache. #-------------------- try: re.compile('[a-z+') except re.error as ex: print('re.error: {}.'.format(ex))
def check_file(filename): """Check input file and verify its content. Checks that the file begins with HEX, BIN or ASC keyword, verifies the claimed content, and splits it into appropriate chunks. Returns integer (0=invalid, 1=HEX, 2=BIN, 3=ASCII) and the chunked file content or error message. Valid example hex file content: HEX 35 00 FF A2 81 9B E3 """ file = open(filename, 'r') file_content = file.read() if len(file_content) < 3 or file_content.isspace(): file.close() return (0, 'File content must begin with a keyword (HEX, BIN or ASC)!') # First 3 characters should represent the base of the content. base = file_content[0:3] file_content = file_content[3:] forbidden_chars = {'BIN': [None], 'HEX': [None]} # Content is claimed to be hexadecimal: if base == 'HEX': file_content = ''.join(file_content.split()) file_content = file_content.upper() if len(file_content) < 2: file.close() return ( 0, 'File must contain at least 1 byte of data after the keyword!') mod = len(file_content) % 2 if mod != 0: return ( 0, 'File must contain full bytes of data (2 hex digits = 1 byte)!' ) # Use regular expression for verifying the content. if re.match('[0-9A-F]+$', file_content): content = '' for start in range(0, len(file_content), 2): if start + 2 <= len(file_content): content += file_content[start:start + 2] + ' ' else: content += file_content[start:] # add the remainings content = content.rstrip() # remove possible whitespace at the end # Check that the file doesn't contain any forbidden control characters for val in content.split(): if val in forbidden_chars['HEX']: file.close() return ( 0, 'File must not contain other control characters than TAB, LF or CR!' ) # Return type indicator and the chopped content. file.close() return (1, content) else: file.close() return (0, 'File content was invalid hexadecimal data!') # Content is claimed to be binary: elif base == 'BIN': file_content = ''.join(file_content.split()) if len(file_content) < 8: file.close() return ( 0, 'File must contain at least 1 byte of data after the keyword!') mod = len(file_content) % 8 if mod != 0: return (0, 'File must contain full bytes of data (8 bits = 1 byte)!') # Use regular expression for verifying the content. re.purge() # clear regex cache if re.match('[0-1]+$', file_content): content = '' for start in range(0, len(file_content), 8): if start + 8 <= len(file_content): content += file_content[start:start + 8] + ' ' else: content += file_content[start:] # add the remainings content = content.rstrip() # remove possible whitespace at the end # Check that the file doesn't contain any forbidden control characters for val in content.split(): if val in forbidden_chars['BIN']: file.close() return ( 0, 'File must not contain other control characters than TAB, LF or CR!' ) # Return type indicator and the chopped content. file.close() return (2, content) else: file.close() return (0, 'File content was invalid binary data!') # Content is claimed to be ASCII: elif base == 'ASC': escape_chars = ['\a', '\b', '\f', '\n', '\r', '\t', '\v'] escape_letters = ['a', 'b', 'f', 'n', 'r', 't', 'v'] # Use regular expression for verifying the content. re.purge() # clear regex cache if re.match('[\x00-\x7F]+$', file_content): # [\x20-\x7E] # Check that the file doesn't contain any forbidden control characters for c in file_content: if binascii.hexlify(c).upper() in forbidden_chars['HEX']: file.close() return (0, 'File contains illegal control characters!') for c in escape_chars: if file_content.count(c) != 0: file_content = file_content.replace(c, '') # Replace all "\\n", "\\r" etc. with "\n", "\r" etc. (i.e. remove # the extra backslash) so that the control characters are interpreted # correctly into hex values. for c in range(0, len(file_content)): if file_content[c:c + 1] == '\\': if file_content[c + 1:c + 2] in escape_letters: for e in escape_letters: if file_content[c + 1:c + 2] == e: file_content = file_content[:c] + escape_chars[ escape_letters.index(e)] + file_content[c + 2:] break else: return ( 0, 'File contains illegal control characters!\n\n' + 'Legal characters after a backslash are: a, b, f, n, r, t, and v.' ) # Return type indicator and the file content. file.close() return (3, file_content) else: file.close() return (0, 'File content was invalid ASCII data!') # Content is invalid: else: file.close() return (0, 'File content must begin with a keyword (HEX, BIN or ASC)!')
import gc print "Is Enabled? %s"%gc.isenabled() print gc.set_debug(gc.DEBUG_STATS) #print gc.set_debug(gc.DEBUG_UNCOLLECTABLE) #print gc.set_debug(gc.DEBUG_COLLECTABLE) print gc.get_debug() print "Objects: %s"%gc.garbage print "Collect: %s"%gc.collect() import re print "re: %s"%re.purge()
def replace_numbers(s, replace_by): re.purge() #temp = re.compile(ur'([0-9]+(st|th|rd|nd|,[0-9]+|.[0-9]+)?)', re.UNICODE) s = temp.sub(replace_by, s) return s
def findAllMatches(string, pattern, flag=re.MULTILINE | re.DOTALL): regex = re.compile(pattern, flag) list = regex.findall(string) re.purge() return list
def test_sanity_re(self): ''' Basic sanity tests for the re module. Each module member is used at least once. ''' #compile self.assertTrue(hasattr(re.compile("(abc){1}"), "pattern")) self.assertTrue(hasattr(re.compile("(abc){1}", re.L), "pattern")) self.assertTrue(hasattr(re.compile("(abc){1}", flags=re.L), "pattern")) #I IGNORECASE L LOCAL MMULTILINE S DOTALL U UNICODE X VERBOSE flags = [ "I", "IGNORECASE", "L", "LOCALE", "M", "MULTILINE", "S", "DOTALL", "U", "UNICODE", "X", "VERBOSE" ] for f in flags: self.assertTrue(hasattr(re, f)) #search self.assertEqual(re.search("(abc){1}", ""), None) self.assertEqual(re.search("(abc){1}", "abcxyz").span(), (0, 3)) self.assertEqual(re.search("(abc){1}", "abcxyz", re.L).span(), (0, 3)) self.assertEqual( re.search("(abc){1}", "abcxyz", flags=re.L).span(), (0, 3)) self.assertEqual(re.search("(abc){1}", "xyzabc").span(), (3, 6)) self.assertEqual(re.search("(abc){1}", buffer("")), None) self.assertEqual( re.search("(abc){1}", buffer("abcxyz")).span(), (0, 3)) self.assertEqual( re.search("(abc){1}", buffer("abcxyz"), re.L).span(), (0, 3)) self.assertEqual( re.search("(abc){1}", buffer("abcxyz"), flags=re.L).span(), (0, 3)) self.assertEqual( re.search("(abc){1}", buffer("xyzabc")).span(), (3, 6)) #match self.assertEqual(re.match("(abc){1}", ""), None) self.assertEqual(re.match("(abc){1}", "abcxyz").span(), (0, 3)) self.assertEqual(re.match("(abc){1}", "abcxyz", re.L).span(), (0, 3)) self.assertEqual( re.match("(abc){1}", "abcxyz", flags=re.L).span(), (0, 3)) #split self.assertEqual(re.split("(abc){1}", ""), ['']) self.assertEqual(re.split("(abc){1}", "abcxyz"), ['', 'abc', 'xyz']) #maxsplit self.assertEqual(re.split("(abc){1}", "abc", 0), ['', 'abc', '']) for i in xrange(3): self.assertEqual(re.split("(abc){1}", "abc", maxsplit=i), ['', 'abc', '']) self.assertEqual(re.split("(abc){1}", "", maxsplit=i), ['']) self.assertEqual(re.split("(abc){1}", "abcxyz", maxsplit=i), ['', 'abc', 'xyz']) self.assertEqual(re.split("(abc){1}", "abcxyzabc", maxsplit=0), ['', 'abc', 'xyz', 'abc', '']) self.assertEqual(re.split("(abc){1}", "abcxyzabc", maxsplit=1), ['', 'abc', 'xyzabc']) self.assertEqual(re.split("(abc){1}", "abcxyzabc", maxsplit=2), ['', 'abc', 'xyz', 'abc', '']) #findall self.assertEqual(re.findall("(abc){1}", ""), []) self.assertEqual(re.findall("(abc){1}", "abcxyz"), ['abc']) self.assertEqual(re.findall("(abc){1}", "abcxyz", re.L), ['abc']) self.assertEqual(re.findall("(abc){1}", "abcxyz", flags=re.L), ['abc']) self.assertEqual(re.findall("(abc){1}", "xyzabcabc"), ['abc', 'abc']) #finditer self.assertEqual([x.group() for x in re.finditer("(abc){1}", "")], []) self.assertEqual( [x.group() for x in re.finditer("(abc){1}", "abcxyz")], ['abc']) self.assertEqual( [x.group() for x in re.finditer("(abc){1}", "abcxyz", re.L)], ['abc']) self.assertEqual( [x.group() for x in re.finditer("(abc){1}", "abcxyz", flags=re.L)], ['abc']) self.assertEqual( [x.group() for x in re.finditer("(abc){1}", "xyzabcabc")], ['abc', 'abc']) rex = re.compile("foo") for m in rex.finditer("this is a foo and a foo bar"): self.assertEqual((m.pos, m.endpos), (0, 27)) for m in rex.finditer(""): self.assertEqual((m.pos, m.endpos), (0, 1)) for m in rex.finditer("abc"): self.assertEqual((m.pos, m.endpos), (0, 4)) for m in rex.finditer("foo foo foo foo foo"): self.assertEqual((m.pos, m.endpos), (0, 19)) #sub self.assertEqual(re.sub("(abc){1}", "9", "abcd"), "9d") self.assertEqual(re.sub("(abc){1}", "abcxyz", 'abcd'), "abcxyzd") self.assertEqual(re.sub("(abc){1}", "1", "abcd", 0), "1d") self.assertEqual(re.sub("(abc){1}", "1", "abcd", count=0), "1d") self.assertEqual(re.sub("(abc){1}", "1", "abcdabcd", 1), "1dabcd") self.assertEqual(re.sub("(abc){1}", "1", "abcdabcd", 2), "1d1d") self.assertEqual(re.sub("(abc){1}", "1", "ABCdabcd", 2, flags=re.I), "1d1d") #subn self.assertEqual(re.subn("(abc){1}", "9", "abcd"), ("9d", 1)) self.assertEqual(re.subn("(abc){1}", "abcxyz", 'abcd'), ("abcxyzd", 1)) self.assertEqual(re.subn("(abc){1}", "1", "abcd", 0), ("1d", 1)) self.assertEqual(re.subn("(abc){1}", "1", "abcd", count=0), ("1d", 1)) self.assertEqual(re.subn("(abc){1}", "1", "abcdabcd", 1), ("1dabcd", 1)) self.assertEqual(re.subn("(abc){1}", "1", "abcdabcd", 2), ("1d1d", 2)) self.assertEqual(re.subn("(abc){1}", "1", "ABCdabcd", 2, flags=re.I), ("1d1d", 2)) #escape self.assertEqual(re.escape("abc"), "abc") self.assertEqual(re.escape(""), "") self.assertEqual(re.escape("_"), "\\_") self.assertEqual(re.escape("a_c"), "a\\_c") #error exc = re.error() exc = re.error("some args") #purge re.purge()
def describe_regex(regex): re.purge() re.compile(regex, re.DEBUG)
def clear_caches(): # Clear the warnings registry, so they can be displayed again for mod in sys.modules.values(): if hasattr(mod, '__warningregistry__'): del mod.__warningregistry__ # Flush standard output, so that buffered data is sent to the OS and # associated Python objects are reclaimed. for stream in (sys.stdout, sys.stderr, sys.__stdout__, sys.__stderr__): if stream is not None: stream.flush() # Clear assorted module caches. # Don't worry about resetting the cache if the module is not loaded try: distutils_dir_util = sys.modules['distutils.dir_util'] except KeyError: pass else: distutils_dir_util._path_created.clear() re.purge() try: _strptime = sys.modules['_strptime'] except KeyError: pass else: _strptime._regex_cache.clear() try: urllib_parse = sys.modules['urllib.parse'] except KeyError: pass else: urllib_parse.clear_cache() try: urllib_request = sys.modules['urllib.request'] except KeyError: pass else: urllib_request.urlcleanup() try: linecache = sys.modules['linecache'] except KeyError: pass else: linecache.clearcache() try: mimetypes = sys.modules['mimetypes'] except KeyError: pass else: mimetypes._default_mime_types() try: filecmp = sys.modules['filecmp'] except KeyError: pass else: filecmp._cache.clear() try: struct = sys.modules['struct'] except KeyError: pass else: # TODO: fix # struct._clearcache() pass try: doctest = sys.modules['doctest'] except KeyError: pass else: doctest.master = None try: ctypes = sys.modules['ctypes'] except KeyError: pass else: ctypes._reset_cache() try: typing = sys.modules['typing'] except KeyError: pass else: for f in typing._cleanups: f() support.gc_collect()
def execute(self, message): """ :type message: IrcMessage """ #Immediately check if there's any parameters, to prevent useless work if message.messagePartsLength == 0: message.reply("Please provide a term to search for. See '{}help {}' for an explanation how to use this command".format(message.bot.commandPrefix, message.trigger), "say") return searchType = message.messageParts[0].lower() addExtendedInfo = message.trigger == 'netrunner' #Check for update command before file existence, to prevent message that card file is missing after update, which doesn't make much sense if searchType == 'update' or searchType == 'forceupdate': if self.areCardfilesBeingUpdated: replytext = "I'm already updating!" elif not message.bot.isUserAdmin(message.user, message.userNickname, message.userAddress): replytext = "Sorry, only admins can use my update function" elif not searchType == 'forceupdate' and not self.shouldUpdate()[0]: replytext = "The last update check was done pretty recently, there's no need to check again so soon" else: replytext = self.updateCardFile()[1] #Since we're checking now, set the automatic check to start counting from now on self.resetScheduledFunctionGreenlet() message.reply(replytext, "say") return #Check if the data file even exists elif not os.path.exists(os.path.join(GlobalStore.scriptfolder, 'data', 'NetrunnerCards.json')): if self.areCardfilesBeingUpdated: message.reply("I don't have my card database, but I'm solving that problem as we speak! Try again in, oh, 10, 15 seconds") else: message.reply("Sorry, I don't appear to have my card database. I'll try to retrieve it though! Give me 20 seconds, tops") gevent.spawn(self.updateCardFile) self.resetScheduledFunctionGreenlet() return #If we reached here, we're gonna search through the card store searchDict = {} # If there is an actual search (with colon key-value separator OR a random card is requested with specific search requirements if (searchType == 'search' and ':' in message.message) or (searchType == 'random' and message.messagePartsLength > 1): #Advanced search! if message.messagePartsLength <= 1: message.reply("Please provide an advanced search query too, in JSON format, so 'key1: value1, key2: value2'") return #Turn the search string (not the argument) into a usable dictionary, case-insensitive, searchDict = SharedFunctions.stringToDict(" ".join(message.messageParts[1:]).lower(), True) if len(searchDict) == 0: message.reply("That is not a valid search query. It should be entered like JSON, so 'name: Wall of Thorns, type: ICE,...'. ") return #If the searchtype is just 'random', don't set a 'name' field so we don't go through all the cards first # Otherwise, set the whole message as the 'name' search, since that's the default search elif not searchType.startswith('random'): searchDict['title'] = message.message.lower() #Correct some values, to make searching easier (so a search for 'set' or 'sets' both work) searchTermsToCorrect = {'setname': ['set', 'sets'], 'flavor': ['flavour'], 'title': ['name']} for correctTerm, listOfWrongterms in searchTermsToCorrect.iteritems(): for wrongTerm in listOfWrongterms: if wrongTerm in searchDict: if correctTerm not in searchDict: searchDict[correctTerm] = searchDict[wrongTerm] searchDict.pop(wrongTerm) #Turn the search strings into actual regexes regexDict = {} errors = [] for attrib, query in searchDict.iteritems(): try: #Since the query is a string, and the card data is unicode, convert the query to unicode before turning it into a regex regex = re.compile(unicode(query, encoding='utf8'), re.IGNORECASE) except (re.error, SyntaxError) as e: self.logError("[Netrunner] Regex error when trying to parse '{}': {}".format(query, e)) errors.append(attrib) except UnicodeDecodeError as e: self.logError("[Netrunner] Unicode error in key '{}': {}".format(attrib, e)) errors.append(attrib) else: regexDict[attrib] = regex #If there were errors parsing the regular expressions, don't continue, to prevent errors further down if len(errors) > 0: #If there was only one search element to begin with, there's no need to specify if len(searchDict) == 1: message.reply("An error occurred when trying to parse your search query. Please check if it is a valid regular expression, and that there are no non-UTF8 characters") #If there were more elements but only one error, specify elif len(errors) == 1: message.reply("An error occurred while trying to parse the query for the '{}' field. Please check if it is a valid regular expression without non-UTF8 characters".format(errors[0])) #Multiple errors, list them all else: message.reply("Errors occurred while parsing attributes: {}. Please check your search query for errors".format(", ".join(errors))) return #All entered data is valid, look through the stored cards with open(os.path.join(GlobalStore.scriptfolder, 'data', 'NetrunnerCards.json'), 'r') as jsonfile: cardstore = json.load(jsonfile) for index in xrange(0, len(cardstore)): carddata = cardstore.pop(0) #Then check if the rest of the attributes match for attrib in regexDict: if attrib not in carddata or not regexDict[attrib].search(carddata[attrib]): #If the wanted attribute is either not in the card, or it doesn't match, throw it out break #The else-block of a for-loop is executed when a for-loop isn't broken out of. So if everything matches, we get here else: cardstore.append(carddata) numberOfCardsFound = len(cardstore) #Pick a random card if needed and possible if searchType.startswith('random') and numberOfCardsFound > 0: cardstore = [random.choice(cardstore)] numberOfCardsFound = 1 if numberOfCardsFound == 0: replytext = "Sorry, no card matching your query was found" elif numberOfCardsFound == 1: replytext = self.getFormattedCardInfo(cardstore[0], addExtendedInfo) else: nameMatchedCardFound = False replytext = "" #If there was a name search, check if the literal name is in the resulting cards if 'title' in searchDict: titleMatchIndex = None for index, card in enumerate(cardstore): if card['title'].lower() == searchDict['title']: titleMatchIndex = index break if titleMatchIndex: replytext = self.getFormattedCardInfo(cardstore[titleMatchIndex], addExtendedInfo) cardstore.pop(titleMatchIndex) numberOfCardsFound -= 1 nameMatchedCardFound = True #Pick some cards to show maxCardsToList = 15 if numberOfCardsFound > maxCardsToList: cardstore = random.sample(cardstore, maxCardsToList) cardnameText = "" for card in cardstore: cardnameText += card['title'].encode('utf-8') + "; " cardnameText = cardnameText[:-2] if nameMatchedCardFound: replytext += " ({:,} more match{} found: ".format(numberOfCardsFound, 'es' if numberOfCardsFound > 1 else '') else: replytext += "Your search returned {:,} cards: ".format(numberOfCardsFound) replytext += cardnameText if numberOfCardsFound > maxCardsToList: replytext += " and {:,} more".format(numberOfCardsFound - maxCardsToList) #Since the extra results list is bracketed when a literal match was also found, it needs a closing bracket if nameMatchedCardFound: replytext += ")" re.purge() #Clear the stored regexes, since we don't need them anymore message.reply(replytext)
def export_to_git(revisions, done_count, devpath=False, ancestor=False, ancestorDate=None): if len(revisions) == 0: return done_count abs_sandbox_path = os.getcwd() abs_sandbox_path = abs_sandbox_path.replace("\\", "/") integrity_file = os.path.basename(project) git_folder_re = re.compile( "\.git(\\\|$)" ) #any path named .git, with or without child elements. But will not match .gitignore if "ancestorDate" in revisions[0]: ancestor = revisions[0]["ancestor"] ancestorDate = revisions[0]["ancestorDate"] for revision in revisions: print("%d of %d (%0.2f%%)" % (done_count + 1, total_revision_count, done_count / total_revision_count * 100), file=sys.stderr) done_count += 1 mark = marks[revision["number"]] si('si retargetsandbox %s --quiet --project="%s" --projectRevision=%s "%s/%s"' % (additional_si_args, project, revision["number"], abs_sandbox_path, integrity_file)) si('si resync --yes --recurse %s --quiet --sandbox="%s/%s"' % (additional_si_args, abs_sandbox_path, integrity_file)) if devpath: print_out('commit refs/heads/devpath/%s' % devpath) else: print_out('commit refs/heads/main') print_out('mark %s' % mark) print_out('committer %s <> %d +0000' % (revision["author"], revision["seconds"])) export_string(revision["description"]) if ancestor: print_out( 'from %s' % marks[ancestor] ) # we're starting a development path so we need to start from it was originally branched from ancestor = False #set to zero so it doesn't loop back in to here print_out('deleteall') tree = os.walk('.') for dir in tree: for filename in dir[2]: if (dir[0] == '.'): fullfile = filename else: fullfile = os.path.join(dir[0], filename)[2:] if (fullfile.find('.pj') != -1): continue #if (fullfile[0:4] == ".git"): if git_folder_re.search(fullfile): continue if (fullfile.find('mks_checkpoints_to_git') != -1): continue inline_data(fullfile) for tag in revision["tags"]: print_out('tag %s' % tag.replace(" ", "_")) print_out('from %s' % mark) print_out('tagger %s <> %d +0000' % (revision["author"], revision["seconds"])) export_string("") # Tag message re.purge() print_out('checkpoint') return done_count
tr = soup.find(attrs={'id': 'places_area__row'}) td = tr.find(attrs={'class': 'w2p_fw'}) print td.text broken_html = "<ul class = country><li>Area<li>Population</ul>" tree = lxml.html.fromstring(broken_html) fixed_html = lxml.html.tostring(tree, pretty_print=True) print "new html:\n", fixed_html tree2 = lxml.html.fromstring(html) td = tree2.cssselect("tr#places_area__row > td.w2p_fw")[0] print td.text_content() for name, scraper in [('Regular expressions', re_scraper), ('BeautifulSoup', bs_scraper), ('lxml', lxml_scraper)]: start_time = time.time() for i in range(NUM_ITERATIONS): if scraper == re_scraper: re.purge() result = scraper(html) assert (result['area'] == '1580 square kilometres') end = time.time() print '%s:%.2f seconds' % (name, end - start_time) #2018.08.05 test ''' 1580 square kilometres Regular expressions:15.61 seconds BeautifulSoup:77.98 seconds lxml:3.76 seconds '''
def parseExpr(self, expr:str): re.purge() return(re.findall(r"[^[]*\[([^]]*)\]", expr))
def validate(self:object, customValidator:str=None): """ Validate a resultset against predefined metadata based on the LANG rules of data quality. """ if (self.metadata is None): raise ValidationError("LANG Exception: meta-data has not been set", None) elif (self.dataset is None): raise ValidationError("LANG Exception: resultset has not been set", None) """ Change request: find and output the primary key in the error report file if specified """ primary_key = "" primary_key_values = None for key, item in self.metadata.items(): if (MetaUtils.isTrue(item, "PrimaryKey")): primary_key = key primary_key_values = self.dataset[primary_key] break """ Execute a series of validations against the supplied column of data and the metadata for the column. Which validation is run is determined by entries in the metadata. """ for meta_attribute_key, meta_attribute_definition in self.metadata.items(): if (meta_attribute_key in self.dataset): print("Validating attribute \t'" + meta_attribute_key + "'...", end='\r') attribute = self.dataset[meta_attribute_key] for row_count in range(len(attribute)): value = attribute[row_count] """ If a primarykey tag has been found then output the value so that the user has a reference to search for the record in the source system. If there is no primary key attribute set then output the row count """ if (not primary_key_values is None): primary_key_value = primary_key_values[row_count] else: primary_key_value = "Row: " + str(row_count+1) self.checkMandatory(meta_attribute_definition, meta_attribute_key, value, primary_key_value) self.checkSize(meta_attribute_definition, meta_attribute_key, value, primary_key_value) self.checkType(meta_attribute_definition, meta_attribute_key, value, primary_key_value) self.checkEnum(meta_attribute_definition, meta_attribute_key, value, primary_key_value) self.checkStartsWith(meta_attribute_definition, meta_attribute_key, value, primary_key_value) # format check (must provide a regex) if (MetaUtils.exists(meta_attribute_definition, "Format")): re.purge() regex=re.compile(meta_attribute_definition["Format"]) for row_count in range(len(attribute)): primary_key_value = primary_key_values[row_count] value = attribute[row_count] isMatch = (not regex.match(value) is None) if ( (not isMatch) and (not MetaUtils.isAllowBlank(meta_attribute_definition)) ): self.addDataQualityError(DataQualityError(meta_attribute_key,error_dimension=DataQualityDimension.FORMATCONSISTENCY.value, description="Error: Value '" + value + "' does not match regex #'" + meta_attribute_definition["Format"] + "'")) # unique field check if (MetaUtils.isTrue(meta_attribute_definition, "Unique") ): # quick count the number of times values occurs in the column. Assumes possibly sorted so breaks the loop if >1 occurences to save time0 seen = set() for row_count in range(len(attribute)): primary_key_value = primary_key_values[row_count] value = attribute[row_count] if (not value in seen): seen.add(value) #only process a value once else: self.addDataQualityError(DataQualityError(meta_attribute_key,error_dimension=DataQualityDimension.UNIQUENESS.value, description="Error: Value '" + value + "' is not UNIQUE. A unique value was expected.")) self.checkComposite(meta_attribute_definition, meta_attribute_key) # expression evaluation is different to processing field specific validations as it could link in other columns from the resultset self.evaluateExpression(meta_attribute_definition, meta_attribute_key) print("Validating attribute \t'" + meta_attribute_key + "'...\t\t..Complete.") else: self.addDataQualityError(DataQualityError(meta_attribute_key, error_dimension=DataQualityDimension.METADATACOMPLIANCE.value, description="Error: Attribute '" + meta_attribute_key + "' was not found in the dataset.")) # only invoke the custom validator if one has been provoded if (not customValidator is None and len(customValidator) > 0): self.customValidator(customValidator)
def remove_returns(s, replace_by): '''EOL: End of Line''' re.purge() temp = re.compile(r"\s+") s = temp.sub(replace_by, s) return s
def __setUpGrammars(self, defaultGrammars): self.grammars = {} # Arrange all the grammars by name. for k, v in defaultGrammars.items(): v['name'] = k self.grammars[k] = v # Compile regexes for each grammar. for k, v in defaultGrammars.items(): if 0: # keywords re. v['keywordsRe'] = re.compile( app.regex.joinReWordList( v.get('keywords', []) + v.get('types', []))) v['errorsRe'] = re.compile( app.regex.joinReList(v.get('errors', []))) v['specialsRe'] = re.compile( app.regex.joinReList(v.get('special', []))) # contains and end re. matchGrammars = [] markers = [] # Index [0] if v.get('escaped'): markers.append(v['escaped']) matchGrammars.append(v) else: # Add a non-matchable placeholder. markers.append(app.regex.kNonMatchingRegex) matchGrammars.append(None) # Index [1] if v.get('end'): markers.append(v['end']) matchGrammars.append(v) else: # Add a non-matchable placeholder. markers.append(app.regex.kNonMatchingRegex) matchGrammars.append(None) # |Contains| markers start at index 2. for grammarName in v.get('contains', []): g = self.grammars.get(grammarName, None) if g is None: self._raiseGrammarNotFound() markers.append(g.get('begin', g.get('matches', u""))) matchGrammars.append(g) # |Next| markers start after |contains|. for grammarName in v.get('next', []): g = self.grammars.get(grammarName, None) if g is None: self._raiseGrammarNotFound() markers.append(g['begin']) matchGrammars.append(g) # |Errors| markers start after |next| markers. markers += v.get('errors', []) # |Keywords| markers start after |errors| markers. for keyword in v.get('keywords', []): markers.append(r'\b' + keyword + r'\b') # |Types| markers start after |keywords| markers. for types in v.get('types', []): markers.append(r'\b' + types + r'\b') # |Special| markers start after |types| markers. markers += v.get('special', []) # Variable width characters are at index [-3] in markers. markers.append(r'\t+') # Double wide characters are at index [-2] in markers. markers.append(u'[\u3000-\uffff]+') # Carriage return characters are at index [-1] in markers. markers.append(r'\n') #app.log.startup('markers', v['name'], markers) v['matchRe'] = re.compile(app.regex.joinReList(markers)) v['markers'] = markers v['matchGrammars'] = matchGrammars containsGrammarIndexLimit = 2 + len(v.get('contains', [])) nextGrammarIndexLimit = containsGrammarIndexLimit + len( v.get('next', [])) errorIndexLimit = nextGrammarIndexLimit + len(v.get('errors', [])) keywordIndexLimit = errorIndexLimit + len(v.get('keywords', [])) typeIndexLimit = keywordIndexLimit + len(v.get('types', [])) specialIndexLimit = typeIndexLimit + len(v.get('special', [])) v['indexLimits'] = (containsGrammarIndexLimit, nextGrammarIndexLimit, errorIndexLimit, keywordIndexLimit, typeIndexLimit, specialIndexLimit) # Reset the re.cache for user regexes. re.purge()
The expression’s behaviour can be modified by specifying a flags value. Flag Values can be any of the re flag variables, combined using bitwise OR (the | operator). Note: Using re.compile() and saving the resulting regular expression object for reuse is more efficient when the expression will be used several times in a single program. ''' string1 = "18IT033" string2 = "My id is 18CE033" pattern1 = "^[0-9]{2}(IT)[0-9]{3}" patt1 = re.compile(pattern1) result1 = patt1.match(string1) print(result1) result1 = re.match(pattern1, string1) print(result1) result2 = patt1.match(string2) print(result2) ''' re.purge() Clear the regular expression cache. ''' re.purge() ''' re.escape(pattern) Escape special characters in pattern. This is useful if you want to match an arbitrary literal string that may have regular expression metacharacters in it. ''' print(re.escape("h.(h)")) print(re.escape("n&n")) print(re.escape("n*{n}"))
def purge(): re.purge()
def clear_caches(): import gc # Clear the warnings registry, so they can be displayed again for mod in sys.modules.values(): if hasattr(mod, '__warningregistry__'): del mod.__warningregistry__ # Clear assorted module caches. # Don't worry about resetting the cache if the module is not loaded try: distutils_dir_util = sys.modules['distutils.dir_util'] except KeyError: pass else: distutils_dir_util._path_created.clear() re.purge() try: _strptime = sys.modules['_strptime'] except KeyError: pass else: _strptime._regex_cache.clear() try: urlparse = sys.modules['urlparse'] except KeyError: pass else: urlparse.clear_cache() try: urllib = sys.modules['urllib'] except KeyError: pass else: urllib.urlcleanup() try: urllib2 = sys.modules['urllib2'] except KeyError: pass else: urllib2.install_opener(None) try: dircache = sys.modules['dircache'] except KeyError: pass else: dircache.reset() try: linecache = sys.modules['linecache'] except KeyError: pass else: linecache.clearcache() try: mimetypes = sys.modules['mimetypes'] except KeyError: pass else: mimetypes._default_mime_types() try: filecmp = sys.modules['filecmp'] except KeyError: pass else: filecmp._cache.clear() try: struct = sys.modules['struct'] except KeyError: pass else: struct._clearcache() try: doctest = sys.modules['doctest'] except KeyError: pass else: doctest.master = None try: ctypes = sys.modules['ctypes'] except KeyError: pass else: ctypes._reset_cache() # Collect cyclic trash. support.gc_collect()
def WriteCert(ProgPath, InputName, OutputName, IsLabelInOutput=True, DoEncode=False): TempStr = '' RawLine = '' EncodeLine = '' CleanLine = '' I = 0 VarLen = 0 ListObj = None ReObj = None FileLineNo = 0 ErrorNumber = 0 CertBegin = False DataBegin = False DataEnd = False TrustBegin = False LabelPrinted = False CertReObj = re.compile('CKA_CLASS CK_OBJECT_CLASS CKO_CERTIFICATE', re.IGNORECASE) if not CertReObj: print(ErrorMainList[0]) return 254 LabelStr = '' LabelReObj = re.compile(r'CKA_LABEL UTF8 \"([^\"]+)\"', re.IGNORECASE) if not LabelReObj: re.purge() print(ErrorMainList[0]) return 254 DataRawStr = '' DataEncSplit = None DataReObj = re.compile('CKA_VALUE MULTILINE_OCTAL', re.IGNORECASE) if not DataReObj: re.purge() print(ErrorMainList[0]) return 254 OctetsReObj = re.compile('[0-7][0-7][0-7]', re.IGNORECASE) if not OctetsReObj: re.purge() print(ErrorMainList[0]) return 254 EndReObj = re.compile('END', re.IGNORECASE) if not EndReObj: re.purge() print(ErrorMainList[0]) return 254 TrustReObj = re.compile('CKA_CLASS CK_OBJECT_CLASS CKO_NSS_TRUST', re.IGNORECASE) if not TrustReObj: print(ErrorMainList[0]) return 254 TrustPurpose = '' TrustLevel = '' TrustPrimaryReObj = re.compile( r'CKA_TRUST_([a-z_]+) CK_TRUST CKT_NSS_([a-z_]+)', re.IGNORECASE) if not TrustPrimaryReObj: print(ErrorMainList[0]) return 254 try: os.remove(OutputName) print('Deleted file "%s".' % OutputName) except: pass FTxtInObj = open(InputName, 'rb') FTxtOutObj = open(OutputName, 'wb') for RawLine in FTxtInObj: FileLineNo += 1 if DoEncode: try: EncodeLine = RawLine.encode('utf_8', 'strict') except: try: EncodeLine = '' if LabelPrinted: TempStr = ErrorEncodeList[0] + ErrorEncodeList[ 1] + ErrorEncodeList[0] + ErrorEncodeList[2] else: TempStr = '\n' + ErrorEncodeList[1] + ErrorEncodeList[ 0] + ErrorEncodeList[2] print(TempStr % FileLineNo) TempStr = '' EncodeLine = RawLine.encode('utf_8', 'ignore') if LabelPrinted: TempStr = ErrorEncodeList[0] TempStr += ErrorEncodeList[0] + ErrorEncodeList[4] print(TempStr) TempStr = '' except: try: EncodeLine = RawLine if LabelPrinted: TempStr = ErrorEncodeList[0] TempStr += ErrorEncodeList[0] + ErrorEncodeList[4] if not LabelPrinted: TempStr += '\n' print(TempStr) TempStr = '' except: ErrorNumber = 250 break ListObj = EncodeLine.splitlines(False) EncodeLine = '' else: ListObj = RawLine.splitlines(False) ListRemoveEmpty(ListObj) if not ListObj: if DataBegin: try: print(ErrorReadList[1] % FileLineNo) finally: ErrorNumber = 3 break continue CleanLine = ListObj[0].strip() ListClean(ListObj) if (not CleanLine) or (CleanLine == '#'): if DataBegin: try: print(ErrorReadList[1] % FileLineNo) finally: ErrorNumber = 3 break continue if CertBegin: if not DataBegin: if CertReObj.match(CleanLine): ListClean(DataEncSplit) DataRawStr = '' LabelStr = '' TrustBegin = False DataEnd = False DataBegin = False if LabelPrinted: LabelPrinted = False print(' CANCELING. Found NON CA. Line %d.' % FileLineNo) else: if not TrustBegin: if not DataEnd: if not LabelStr: try: ReObj = LabelReObj.match(CleanLine) if ReObj: LabelStr = ReObj.group(1) if not LabelStr: raise ValueError else: LabelPrinted = True LabelStr = CorrectCertLabel( LabelStr) print( '\nCertificate on Line %d\n "%s"' % (FileLineNo, LabelStr)) VarLen = len(LabelStr) LabelStr += FStyleNL for I in range(0, VarLen, 1): LabelStr += FStyleLabelUR LabelStr += FStyleNL except: try: print(ErrorReadList[0] % FileLineNo) finally: ErrorNumber = 2 break elif DataReObj.match(CleanLine): DataBegin = True elif TrustReObj.match(CleanLine): TrustBegin = True else: try: ReObj = TrustPrimaryReObj.match(CleanLine) if ReObj: TrustPurpose = ReObj.group(1).upper() TrustLevel = ReObj.group(2).upper() if (TrustPurpose in MozillaTrustReqPrimary ) and (TrustLevel == MozillaTrustLevels[0]): FTxtOutObj.write(FStyleNL) if IsLabelInOutput: FTxtOutObj.write(LabelStr) for I in range(0, len(DataEncSplit), 1): FTxtOutObj.write(DataEncSplit[I]) FTxtOutObj.flush() ListClean(DataEncSplit) DataRawStr = '' LabelStr = '' LabelPrinted = False TrustBegin = False DataEnd = False DataBegin = False CertBegin = False print(' SAVE.') TrustLevel = '' TrustPurpose = '' except: try: print(ErrorReadList[2] % FileLineNo) finally: ErrorNumber = 4 break else: if EndReObj.match(CleanLine): try: DataEncSplit = CertToBase64(DataRawStr, True, True) DataRawStr = '' if not DataEncSplit: raise ValueError except: try: print(ErrorReadList[1] % FileLineNo) finally: ErrorNumber = 250 break DataBegin = False DataEnd = True else: try: ListObj = CleanLine.split('\\') if not ListObj: raise ValueError except: try: print(ErrorReadList[1] % FileLineNo) finally: ErrorNumber = 250 break ListRemoveEmpty(ListObj) if not ListObj: try: print(ErrorReadList[1] % FileLineNo) finally: ErrorNumber = 3 break for I in range(0, len(ListObj), 1): if not OctetsReObj.match(ListObj[I]): ErrorNumber = 3 break try: DataRawStr += chr(int(ListObj[I], 8)) except: ErrorNumber = 3 break ListClean(ListObj) if ErrorNumber: try: print(ErrorReadList[1] % FileLineNo) finally: break elif CertReObj.match(CleanLine): CertBegin = True FTxtOutObj.close() FTxtInObj.close() re.purge() return ErrorNumber