Exemplos de purge em Python, exemplos de re.purge em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: WebScraper.py Projeto: Adge2k/MyCampus-WebScraper

def get_info(Term, Subject):
	url = "https://ssbp.mycampus.ca/prod/bwckschd.p_get_crse_unsec?TRM=U&term_in=" + Term + "&sel_subj=dummy&sel_day=dummy&sel_schd=dummy&sel_insm=dummy&sel_camp=dummy&sel_levl=dummy&sel_sess=dummy&sel_instr=dummy&sel_ptrm=dummy&sel_attr=dummy&sel_subj=" + Subject + "&sel_crse=&sel_title=&sel_from_cred=&sel_to_cred=&sel_camp=UON&begin_hh=0&begin_mi=0&begin_ap=a&end_hh=0&end_mi=0&end_ap=a"
	htmltext = urllib.urlopen(url).read();
	regex = '<TH CLASS="ddheader" scope="col" >(.+?)<BR><BR></TH>'
	pattern = re.compile(regex)
	courses = re.split(pattern, htmltext)
	re.purge()
	for course in courses:
		regex = '<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) \(<ABBR title= "Primary">P</ABBR>\)</TD>'
		regex2 = '<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault"><ABBR title = "To Be Announced">(.+?)</ABBR></TD>'
		regex3 = '<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault"><ABBR title = "To Be Announced">(.+?)</ABBR></TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault"><ABBR title = "To Be Announced">(.+?)</ABBR></TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault"><ABBR title = "To Be Announced">(.+?)</ABBR></TD>'
		regex4 = '<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) - (.+?)</TD>\n<TD CLASS="dbdefault">(.+?)</TD>\n<TD CLASS="dbdefault">(.+?) \(<ABBR title= "Primary">P</ABBR>\)(.+)?</TD>'
		pattern = re.compile(regex)
		pattern2 = re.compile(regex2)
		pattern3 = re.compile(regex3)
		pattern4 = re.compile(regex4)
		entries = re.findall(pattern3, course)				#this pattern is for courses that do not have a start time or class assigned
		if entries:											 
			print entries
		else:
			entries = re.findall(pattern2, course)			#this pattern is for instructor TBA
			if entries:
				print entries
			else:
				entries = re.findall(pattern, course)		#this pattern is for default structure of courses
				if entries:
					print entries
				else:
					entries = re.findall(pattern4, course)	#this pattern returns two values for instructor
					print entries

Exemplo n.º 2

0

Exibir arquivo

Arquivo: bs_lxml_reg_perf.py Projeto: Rockyzsu/base_function

def main():
    times = {}
    html = urllib2.urlopen('http://example.webscraping.com/places/default/view/United-Kingdom-239').read()
    NUM_ITERATIONS = 1000 # number of times to test each scraper
    for name, scraper in ('Regular expressions', regex_scraper), ('Beautiful Soup', beautiful_soup_scraper), ('Lxml', lxml_scraper):
        times[name] = []
        # record start time of scrape
        start = time.time()
        for i in range(NUM_ITERATIONS):
            if scraper == regex_scraper:
                # the regular expression module will cache results
                # so need to purge this cache for meaningful timings
                re.purge() 
            result = scraper(html)

            # check scraped result is as expected
            assert(result['area'] == '244,820 square kilometres')
            times[name].append(time.time() - start)
        # record end time of scrape and output the total
        end = time.time()
        print('{}: {:.2f} seconds'.format(name, end - start))

    writer = csv.writer(open('times.csv', 'w'))
    header = sorted(times.keys())
    writer.writerow(header)
    for row in zip(*[times[scraper] for scraper in header]):
        writer.writerow(row)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: xstrings.py Projeto: shrek0/xstrings

def finditer(content, encodings, charset, min_size):
    '''Generator function that iterates over all string matches inside the given content which are at least
    min_size characters long.

    @param    content    Binary content to search in
    @param    encodings  Dictionary of encoding functions
    @param    charset    An interable object containing the characters to consider as part of a string
    @param    min_size   Minimal string size to consider as a string match

    @return A tuple containing the match offset in content, encoding name, encoding key and the deobfuscated
            string reconstructed from the blob found
    '''

    # iterate over available encoding fucntions
    for encoding_name, (encoding_function, encoding_range) in encodings.items():

        # iterate over all keys in range for that encoding function
        for key in encoding_range:
            encoded_charset = encoding_function(charset, key)

            pattern = '[%s]{%d,}' % (re.escape(encoded_charset), min_size)

            for match in re.finditer(pattern, content):
                # deobfuscation: reconstruct the original string
                deobf = ''.join(charset[encoded_charset.index(c)] for c in match.group(0))

                yield (match.start(0), encoding_name, key, deobf)

        # cleanup regex cache once in a while
        re.purge()

Exemplo n.º 4

0

Exibir arquivo

Arquivo: crond.py Projeto: Hiroyuki-Nagata/saku

 def clear_cache(self):
     try:
         re.purge()
         dircache.reset()
         tiedobj.reset()
     except Exception, err:
         sys.stderr.write('Crond.clear_cache(): %s\n' % err)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: regrtest.py Projeto: alkorzt/pypy

def dash_R_cleanup(fs, ps, pic):
    import gc, copy_reg
    import _strptime, linecache, dircache
    import urlparse, urllib, urllib2, mimetypes, doctest
    import struct, filecmp
    from distutils.dir_util import _path_created

    # Restore some original values.
    warnings.filters[:] = fs
    copy_reg.dispatch_table.clear()
    copy_reg.dispatch_table.update(ps)
    sys.path_importer_cache.clear()
    sys.path_importer_cache.update(pic)

    # Clear assorted module caches.
    _path_created.clear()
    re.purge()
    _strptime._regex_cache.clear()
    urlparse.clear_cache()
    urllib.urlcleanup()
    urllib2.install_opener(None)
    dircache.reset()
    linecache.clearcache()
    mimetypes._default_mime_types()
    struct._cache.clear()
    filecmp._cache.clear()
    doctest.master = None

    # Collect cyclic trash.
    gc.collect()

Exemplo n.º 6

0

Exibir arquivo

Arquivo: tp.py Projeto: Samael609/PompOView

def color ( adjoining_words_i, data, balises ):
	"""Colorie les groupes de mots contigus dans une page web"""
	n = len(adjoining_words_i) + 1
	
	# on commence par les groupes les plus longs
	for i in xrange( n, 1, -1 ):
		
		# pour chaque groupe de mots
		for j in adjoining_words_i[i]:
			
			text = u'(\A|\W)(%s)(\W|\Z)'%( string.join([j[0][k] for k in range(0,i)] ,'(?:(?:</span>\W?)|\W)') )
			pattern1 = re.compile(text, re.I|re.U|re.S)		
			replace = u'\g<1><span class="%s" style="color:blue; background-color:grey;">\g<2></span>\g<3>'%(string.join(j[0],""))
			data = pattern1.sub(replace, data)
	
	re.purge()
	# recherche les emplacements sauvegardés des balises
	data_color = u''	
	flag3 = re.compile( u'#([0-9]+?)#', re.I|re.U|re.S )
	m = flag3.finditer( data )
	k = 0
	
	# remet les balises dans la chaine
	for j in m:
		data_color += data[k:j.start()] + balises[j.group(1)]
		k = j.end()
		
	data_color += data[k:]
	
	return data_color

Exemplo n.º 7

0

Exibir arquivo

Arquivo: mks_checkpoints_to_git.py Projeto: XenuIsWatching/integrity2git

def retrieve_devpaths():
    pipe = Popen('si projectinfo --devpaths --noacl --noattributes --noshowCheckpointDescription --noassociatedIssues --project="%s"' % sys.argv[1], shell=True, bufsize=1024, stdout=PIPE)
    devpaths = pipe.stdout.read()
    devpaths = devpaths [1:]
    devpaths_re = re.compile('    (.+) \(([0-9][\.0-9]+)\)\n')
    devpath_col = devpaths_re.findall(devpaths)
    re.purge()
    devpath_col.sort(key=lambda x: map(int, x[1].split('.'))) #order development paths by version
    return devpath_col

Exemplo n.º 8

0

Exibir arquivo

Arquivo: segment.py Projeto: FrITA/frita

    def get_skips(self, line):
        skip_points = []
        for r in self.skip_rules:
            pattern = '('+r[0]+')('+r[1]+')'
            matchobjs = re.finditer(pattern, line)
            for i in matchobjs:
                skip_points.append(i.end() )

        re.purge()
        return skip_points

Exemplo n.º 9

0

Exibir arquivo

Arquivo: segment.py Projeto: FrITA/frita

    def get_breaks(self, line):
        break_points = []
        for r in self.break_rules:
            pattern = '('+r[0]+')('+r[1]+')'
            matchobjs = re.finditer(pattern, line)
            for i in matchobjs:
                break_points.append(i.end() )

        re.purge()
        return break_points

Exemplo n.º 10

0

Exibir arquivo

Arquivo: re.py Projeto: yoeo/pyhow

def purge():
    """re.purge: Purge internal regular expressions cache."""

    def _cache_empty():
        return not getattr(re, '_cache')

    re.match('', '')
    cache_created = not _cache_empty()
    re.purge()
    return cache_created and _cache_empty() and "empty cache"

Exemplo n.º 11

0

Exibir arquivo

Arquivo: tests.py Projeto: 01-/django

    def test_regex_equality_nocache(self):
        pattern = r'^(?:[a-z0-9\.\-]*)://'
        left = RegexValidator(pattern)
        re.purge()
        right = RegexValidator(pattern)

        self.assertEqual(
            left,
            right,
        )

Exemplo n.º 12

0

Exibir arquivo

Arquivo: stdRe.py Projeto: Stamoulohta/PyRex

 def check(self, pattern):
     self.model.clear()
     if not pattern: return False
     try:
         re.compile(pattern, self.insertFlags())
         re.purge()
         return True
     except re.error as rerr:
         self.model.showError(str(rerr))
         return False

Exemplo n.º 13

0

Exibir arquivo

Arquivo: regexpfeatures.py Projeto: mic47/Generate-me-2

def getRegexpFeatures(dct, number_of_words_per_type, number_of_words, select = None):
    it = list()
    for (mt, sen) in dct.iteritems():
        it.append((len(sen), mt, sen))
    it.sort(reverse=False)
    itt = list()
    for (l, mt, sen) in it:
        random.shuffle(sen)
        itt.append((l, mt, sen[0:1000]))
    regexps = dict()
    ret = list()
    types = list()
    for (_, meme, _sentences) in itt:
        types.extend([meme for _ in _sentences])
    types = [types]
    #glob = regExpChooser()
    #glob.add_types(types)
    for (_, meme_type, sentences) in it:
        if select != None and meme_type != select:
            continue
        regexps[meme_type] = cluster(sentences, meme_type)
        N = len(regexps[meme_type])
        n = 0
        start = time.time()
        loc = regExpChooser()
        loc.add_types(replaceNotEqual(types, meme_type))
        for regexp in regexps[meme_type]:
            re.purge()
            n += 1
            sys.stdout.write(
                "\r[{0}] {1}/{2} RE in {3} s. ({4})".format(
                    meme_type,
                    n,
                    N,
                    round(time.time() - start),
                    regexp
                ))
            sys.stdout.flush()
            compiled = re.compile(regexp)
            search_result = list()
            for (_, meme, _sentences) in itt: 
                for sent in _sentences:
                    search_result.append(
                        1 if compiled.search(sent.lower()) != None else 0)
            loc.add_regexp(regexp, search_result)
            #glob.add_regexp(regexp, search_result)
        selection = loc.getBest(number_of_words_per_type)
        ret.extend(selection)
        print("\r[{0}] Regular expressions selected in {1} seconds. (best: {2})".format(
            meme_type,
            time.time() - start,
            selection[0])
        )
    #ret.extend(glob.getBest(number_of_words))
    return ret

Exemplo n.º 14

0

Exibir arquivo

Arquivo: import-politique-de-contribution-open-source.py Projeto: numerique-gouv/numerique.gouv.fr

 def markdownify_content(self):
     self.content = re.sub(r'({{% question) "(.*)"(\s*%}})',r'### \2', self.content)
     re.purge()
     self.content = re.sub(r'{{< relref "(\w*)\.md[#\w\-éèà]*"\s*>}}\s*',r'\1', self.content)
     re.purge()
     self.content = re.sub(r'{{% (\w*) "(.*)" *%}}([\s\S]*?){{% \/\1 %}}',r'*\2*\3', self.content)
     re.purge()
     self.content = re.sub(r'\* Exemple : <.*\)',r'', self.content)
     re.purge()
     self.content = re.sub(r'(#+)\s',r'\1# ', self.content)
     re.purge()

Exemplo n.º 15

0

Exibir arquivo

Arquivo: bm_regex_compile.py Projeto: Yaspee/performance

def bench_regex_compile(loops, regexes):
    range_it = xrange(loops)
    t0 = perf.perf_counter()

    for _ in range_it:
        for regex, flags in regexes:
            re.purge()
            # ignore result (compiled regex)
            re.compile(regex, flags)

    return perf.perf_counter() - t0

Exemplo n.º 16

0

Exibir arquivo

Arquivo: utility_functions.py Projeto: alexgalo/Scripts_Terminal

def remove_links(s, replace_by):
	#quita url www.algo.com/djj
    re.purge()
    temp = re.compile(r"\s*www\.\. \w+\.(com|net|me|org)?(\s|/*[-\w+&@#/%!?=~_:.\[\]()0-9]*)")
    s = temp.sub(replace_by, s)
    #quita http://
    temp = re.compile(r"(((http|ftp|https)://\. |(http|ftp|https)://\.)[-/\w.]*)")
    s = temp.sub(replace_by, s)
    temp = re.compile(r"\w+/\w")
    s = temp.sub(replace_by, s)
    return s

Exemplo n.º 17

0

Exibir arquivo

Arquivo: bm_regex_compile.py Projeto: bennn/retic_performance

def test_regex_compile(count, timer):
    regexes = capture_regexes()
    times = []

    for _ in xrange(count):
        t0 = timer()
        for regex, flags in regexes:
            re.purge()
            re.compile(regex, flags)
        t1 = timer()
        times.append(t1 - t0)
    return times

Exemplo n.º 18

0

Exibir arquivo

Arquivo: auto.py Projeto: trentinfrederick/name-panel

def process(self, context, collection):
    '''
        Process collection, send names to rename and shared sort.
    '''

    # compare
    compare = []

    # clean
    clean = []

    # clean duplicates
    for name in collection:

        # remove duplicates
        if name[3][0] not in compare:

            # append
            compare.append(name[3][0])
            clean.append(name)

    # done with collection
    collection.clear()

    # name
    for i, name in enumerate(clean):
        rename(self, context, name, i)

    # randomize names (prevents conflicts)
    for name in clean:

        # randomize name
        name[3][0].name = str(random())

    # is shared sort or shared count
    if context.window_manager.BatchShared.sort or context.window_manager.BatchShared.count:

        # sort
        shared.main(self, context, clean, context.window_manager.BatchShared)

    # isnt shared sort or shared count
    else:

        # apply names
        for name in clean:
            name[3][0].name = name[1]

            # count
            if name[1] != name[2]:
                self.count += 1

    # purge re
    re.purge()

Exemplo n.º 19

0

Exibir arquivo

Arquivo: regrtest.py Projeto: carlosrcjunior/BCC-2s13-PI4-WebCrawler

def dash_R_cleanup(fs, ps, pic, zdc, abcs):
    import gc, copy_reg
    import _strptime, linecache
    dircache = test_support.import_module('dircache', deprecated=True)
    import urlparse, urllib, urllib2, mimetypes, doctest
    import struct, filecmp
    from distutils.dir_util import _path_created

    # Clear the warnings registry, so they can be displayed again
    for mod in sys.modules.values():
        if hasattr(mod, '__warningregistry__'):
            del mod.__warningregistry__

    # Restore some original values.
    warnings.filters[:] = fs
    copy_reg.dispatch_table.clear()
    copy_reg.dispatch_table.update(ps)
    sys.path_importer_cache.clear()
    sys.path_importer_cache.update(pic)
    try:
        import zipimport
    except ImportError:
        pass # Run unmodified on platforms without zipimport support
    else:
        zipimport._zip_directory_cache.clear()
        zipimport._zip_directory_cache.update(zdc)

    # clear type cache
    sys._clear_type_cache()

    # Clear ABC registries, restoring previously saved ABC registries.
    for abc, registry in abcs.items():
        abc._abc_registry = registry.copy()
        abc._abc_cache.clear()
        abc._abc_negative_cache.clear()

    # Clear assorted module caches.
    _path_created.clear()
    re.purge()
    _strptime._regex_cache.clear()
    urlparse.clear_cache()
    urllib.urlcleanup()
    urllib2.install_opener(None)
    dircache.reset()
    linecache.clearcache()
    mimetypes._default_mime_types()
    filecmp._cache.clear()
    struct._clearcache()
    doctest.master = None

    # Collect cyclic trash.
    gc.collect()

Exemplo n.º 20

0

Exibir arquivo

Arquivo: regrtest.py Projeto: ianloic/unladen-swallow

def dash_R_cleanup(fs, ps, pic, abcs):
    import gc, copy_reg
    import _strptime, linecache
    dircache = test_support.import_module('dircache', deprecated=True)
    import urlparse, urllib, urllib2, mimetypes, doctest
    import struct, filecmp
    from distutils.dir_util import _path_created

    # Clear the warnings registry, so they can be displayed again
    for mod in sys.modules.values():
        if hasattr(mod, '__warningregistry__'):
            del mod.__warningregistry__

    # Restore some original values.
    warnings.filters[:] = fs
    copy_reg.dispatch_table.clear()
    copy_reg.dispatch_table.update(ps)
    sys.path_importer_cache.clear()
    sys.path_importer_cache.update(pic)

    # clear type cache
    sys._clear_type_cache()

    # Clear ABC registries, restoring previously saved ABC registries.
    for abc, registry in abcs.items():
        abc._abc_registry = registry.copy()
        abc._abc_cache.clear()
        abc._abc_negative_cache.clear()

    # Clear assorted module caches.
    _path_created.clear()
    re.purge()
    _strptime._regex_cache.clear()
    urlparse.clear_cache()
    urllib.urlcleanup()
    urllib2.install_opener(None)
    dircache.reset()
    linecache.clearcache()
    mimetypes._default_mime_types()
    filecmp._cache.clear()
    struct._clearcache()
    doctest.master = None

    if _llvm:
        code_types = (types.CodeType, types.FunctionType, types.MethodType)
        for obj in gc.get_objects():
            if isinstance(obj, code_types):
                _llvm.clear_feedback(obj)

    # Collect cyclic trash.
    gc.collect()

Exemplo n.º 21

0

Exibir arquivo

Arquivo: WrapperUtils.py Projeto: oculardexterity/Letters-Processing

def wrap_pieces_in_text(text, ordered_cont_pieces):
	text_length = len(text)
	text = text
	#if text:
		#print('wp text in ok')
	try: 
	
		try:
			re.purge()
			opener_segment = get_segment(text, ordered_cont_pieces[0][0], ordered_cont_pieces[0][1])
		except Exception as e:
			#print('wp_openseg error')
			raise e
		try:
			re.purge()
			closer_segment = get_segment(text, ordered_cont_pieces[-1][0], ordered_cont_pieces[-1][1])
		except Exception as e:
			#print('wp_closeg error')
			raise e
		# Maybe some more checking in case there's some shit at the top/bottom? -- i.e. check
		# by length or content?	
		
		try:
			if ordered_cont_pieces[-1][1] > text_length * 0.7 and '<salute>' in closer_segment:
				text = re.sub(closer_segment, fix_closer_wraps(closer_segment), text)
		except Exception as e:
			#print('wp closersub failed')
			raise e

		try:
			text = re.sub(opener_segment, fix_opener_wraps(opener_segment), text)	
		except Exception as e:
			#print('wp openersub failed')
			print(e)
	
		#print('wp_ index error not triggered')
	except IndexError: # presumably from fail if there is only one segment identified
		#print('wp_index error')
		opener_segment = get_segment(text, cont_pieces[0][0], cont_pieces[0][1])
		text = re.sub(opener_segment, fix_opener_wraps(opener_segment), text)
	except Exception as e:

		#print('wp_general exception', e)
		raise e
	# Remove all remaining temps
	text = re.sub(r'<TEMP>','',text)
	text = re.sub(r'</TEMP>','',text)
	#print(text)
	return text

Exemplo n.º 22

0

Exibir arquivo

Arquivo: AmazonPriceWatcher.py Projeto: Jersker/PriceWatcher

def findPrice (product, logger, host):
    product_url = "/dp/" + product.id
    
    conn = http.client.HTTPConnection(host)
    conn.request("GET", product_url)
    r1 = conn.getresponse()
    dataRep = r1.read().decode("UTF-8")
    conn.close()
    shortRep = re.findall("<span.*priceblock_.*/span>",dataRep)[-1]
    shortRep = parsePrice(shortRep)
    re.purge()
    price = float(shortRep)
    logger.info(str(product) + " :: " + shortRep)
    if(product.setPrice(price)):
        logger.info("New price on product " + str(product) + " at " + str(product.price) + " Link : http://" + host + "/dp/" + str(product.id))

Exemplo n.º 23

0

Exibir arquivo

Arquivo: yamlUtilities.py Projeto: knittledan/solr_lxml_Example

 def _replaceBrackets(self, string):
     """
     Resolves property variable within a string into a string
     :param string:
     :return:
     """
     m = re.findall(self.regEx, str(string))
     if m:
         for key in m:
             value = self.getItem(key, self.rawParameters)
             if re.findall(self.regEx, str(value)):
                 value = self._replaceBrackets(value)
             string = string.replace('[' + key + ']', value)
         re.purge()
     gc.collect()
     return string

Exemplo n.º 24

0

Exibir arquivo

Arquivo: PoliticoSpider.py Projeto: carlosandrade/information_retrieval_20132

    def format(self, data, format, filter):
        """
        Função para tratar o campo o texto coletado da aranha

        :param format: O formato esperado.
        :param filter: O formato de saida.
        """

        if type(filter) == int:
            filter = "\\" + str(filter)

        _result = re.subn(format, filter, data)
        _data = _result[0] if _result[1] >= 1 else ""

        re.purge()
        return _data

Exemplo n.º 25

0

Exibir arquivo

def dash_R_cleanup(fs, ps, pic, abcs):
    import gc, copyreg
    import _strptime, linecache
    import urllib.parse, urllib.request, mimetypes, doctest
    import struct, filecmp, _abcoll
    from distutils.dir_util import _path_created
    from weakref import WeakSet

    # Clear the warnings registry, so they can be displayed again
    for mod in sys.modules.values():
        if hasattr(mod, '__warningregistry__'):
            del mod.__warningregistry__

    # Restore some original values.
    warnings.filters[:] = fs
    copyreg.dispatch_table.clear()
    copyreg.dispatch_table.update(ps)
    sys.path_importer_cache.clear()
    sys.path_importer_cache.update(pic)

    # clear type cache
    sys._clear_type_cache()

    # Clear ABC registries, restoring previously saved ABC registries.
    for abc in [getattr(_abcoll, a) for a in _abcoll.__all__]:
        if not isabstract(abc):
            continue
        for obj in abc.__subclasses__() + [abc]:
            obj._abc_registry = abcs.get(obj, WeakSet()).copy()
            obj._abc_cache.clear()
            obj._abc_negative_cache.clear()

    # Clear assorted module caches.
    _path_created.clear()
    re.purge()
    _strptime._regex_cache.clear()
    urllib.parse.clear_cache()
    urllib.request.urlcleanup()
    linecache.clearcache()
    mimetypes._default_mime_types()
    filecmp._cache.clear()
    struct._clearcache()
    doctest.master = None

    # Collect cyclic trash.
    gc.collect()

Exemplo n.º 26

0

Exibir arquivo

Arquivo: job.py Projeto: IAlwaysBeCoding/mrq

    def trace_memory_clean_caches(self):
        """ Avoid polluting results with some builtin python caches """

        urlparse.clear_cache()
        re.purge()
        linecache.clearcache()
        copy_reg.clear_extension_cache()

        if hasattr(fnmatch, "purge"):
            fnmatch.purge()  # pylint: disable=no-member
        elif hasattr(fnmatch, "_purge"):
            fnmatch._purge()

        if hasattr(encodings, "_cache") and len(encodings._cache) > 0:
            encodings._cache = {}

        context.log.handler.flush()

Exemplo n.º 27

0

Exibir arquivo

Arquivo: WrapperUtils.py Projeto: oculardexterity/Letters-Processing

def get_segment(text, s,e):
	try:
		regex = r'[\s\S]*'
		re.purge()
		pattern = re.compile(regex)
		#print(pattern)
		
		try:
			segment = pattern.search(text,s,e).group()
			
			return segment
		except Exception as e:
			#print('getseg regex err -', e)
			raise e
		
	except Exception as e:
		#print('getseg error', e)
		raise e

Exemplo n.º 28

0

Exibir arquivo

Arquivo: spants.py Projeto: FuzzyWuzzie/markdown_thirdparty

 def set(cls, ssquo=None, esquo=None, sdquo=None, edquo=None, dir=None):
     """
     Set the HTML entities (and indirectly, the Unicode glyphs) used to
     represent starting and ending single and double quotes, respectively,
     and language direction.
     """
     if ssquo is not None:
         cls.SSQUO = ssquo
     if esquo is not None:
         cls.ESQUO = esquo
     if sdquo is not None:
         cls.SDQUO = sdquo
     if edquo is not None:
         cls.EDQUO = edquo
     if dir is not None:
         if cls.direction != dir:
             re.purge()
         cls.direction = dir
         cls.direction_explicit = True

Exemplo n.º 29

0

Exibir arquivo

Arquivo: test_responses.py Projeto: getsentry/responses

    def run():
        responses.add(responses.GET, "http://example.com/zero")
        responses.add(responses.GET, "http://example.com/one")
        responses.add(responses.GET, "http://example.com/two")
        responses.add(responses.GET, re.compile(r"http://example\.com/three"))
        responses.add(responses.GET, re.compile(r"http://example\.com/four"))
        re.purge()
        responses.remove(responses.GET, "http://example.com/two")
        responses.remove(Response(method=responses.GET, url="http://example.com/zero"))
        responses.remove(responses.GET, re.compile(r"http://example\.com/four"))

        with pytest.raises(ConnectionError):
            requests.get("http://example.com/zero")
        requests.get("http://example.com/one")
        with pytest.raises(ConnectionError):
            requests.get("http://example.com/two")
        requests.get("http://example.com/three")
        with pytest.raises(ConnectionError):
            requests.get("http://example.com/four")

Exemplo n.º 30

0

Exibir arquivo

Arquivo: main.py Projeto: juancarlospaco/pylou

 def addItem(self):
     """Add Items from Locate command."""
     start_time = datetime.now().second
     self.stringlist.clear()
     lineText = self.lineEdit.text()
     if len(lineText) and str(lineText).strip() not in self.history:
         self.history.append(lineText + "\n")
         self.historyCurrentItem = 1
         self.saveHistory()
     self.historyCurrentItem = self.historyCurrentItem - 1
     command = "ionice --ignore --class 3 chrt --idle 0 "  # Nice CPU / IO
     command += "locate --ignore-case --existing --quiet --limit 9999 {}"
     condition = str(self.applet.configurations.readEntry("Home")) == "true"
     if len(str(lineText).strip()) and condition:
         command_to_run = command.format(  # Only Search inside Home folders
             path.join(path.expanduser("~"), "*{}*".format(lineText)))
     else:
         command_to_run = command.format(lineText)
     locate_output = Popen(command_to_run, shell=True, stdout=PIPE).stdout
     results = tuple(locate_output.readlines())
     banned = self.applet.configurations.readEntry("Banned")
     banned_regex_pattern = str(banned).strip().lower().replace(" ", "|")
     for item in results:
         if not search(banned_regex_pattern, str(item)):  # banned words
             self.stringlist.append(item[:-1])
     purge()  # Purge RegEX Cache
     self.model.setStringList(self.stringlist)
     self.treeview.nativeWidget().resizeColumnToContents(0)
     number_of_results = len(results)
     if number_of_results:  # if tems found Focus on item list
         self.lineEdit.nativeWidget().clear()
         self.label.setText("Found {} results on {} seconds !".format(
             number_of_results, abs(datetime.now().second - start_time)))
         self.resize(500, 12 * number_of_results)
         self.treeview.nativeWidget().show()
         self.treeview.nativeWidget().setFocus()
     else:  # if no items found Focus on LineEdit
         self.label.setText("Search")
         self.resize(self.minimumSize())
         self.treeview.nativeWidget().hide()
         self.lineEdit.nativeWidget().selectAll()
         self.lineEdit.nativeWidget().setFocus()

Exemplo n.º 31

0

Exibir arquivo

    for field in FIELDS:
        results[field] = soup.find('help')
    return results


def lxml_scraper(html):
    tree = lxml.html.fromstring(html)
    results = {}
    for field in FIELDS:
        results[field] = tree.cssselect('help')
    return results


times = {}
html = '''<body>help</body>'''
for name, scraper in ('Regular expressions',
                      regex_scraper), ('Beautiful Soup',
                                       beautiful_soup_scraper), ('Lxml',
                                                                 lxml_scraper):
    times[name] = []
    start = time.time()
    for i in range(1000):
        if scraper == regex_scraper:
            re.purge()  #RE will use cache so we purge(clean) it.
        result = scraper(html)  # check scraped result is as expected
        times[name].append(
            time.time() -
            start)  # record end time of scrape and output the total
    end = time.time()
    print '{}: {:.2f} seconds'.format(name, end - start)

Exemplo n.º 32

0

Exibir arquivo

def purge():
    """Purge caches."""

    _purge_cache()
    _re.purge()

Exemplo n.º 33

0

Exibir arquivo

def regular():
    data = "She is more than pretty. 520"

    # --- 正则 ---
    reg = r"mo"  # 指定字符 => span=(7, 9), match='mo'
    reg = r"."  # (.)单个字符 => span=(0, 1), match='S'
    reg = r"\."  # (\)转义符 => span=(23, 24), match='.'
    reg = r"[.]"  # ([])字符集合(注意:部分特殊字符失去特殊意义) => span=(23, 24), match='.'
    reg = r"[love]"  # []内任意字符 => span=(2, 3), match='e'
    reg = r"[i-u]"  # (-)范围 => span=(4, 5), match='i'
    reg = r"t{2}"  # {}内为长度(3个6) => span=(20, 22), match='tt'
    reg = r"t{1,3}"  # {M,} / {.N} / {N} => span=(12, 13), match='t'
    reg = r"(i|o|u){1}"  # (())组 => span=(4, 5), match='i'
    reg = r"^S"  # (^)开头 => span=(0, 1), match='S'
    reg = r"[^S]"  # ([^])取反(不含H) => span=(1, 2), match='h'
    reg = r"520$"  # ($)结尾 => span=(25, 28), match='520'
    reg = r"et*"  # (*)匹配{0,}个表达式 => ['e', 'e', 'ett']
    reg = r"et+"  # (+)匹配{1,}个表达式 => ['ett']
    reg = r"et?"  # (?)匹配{0,1}个表达式 => ['e', 'e', 'et']
    reg = r".+?e"  # (?)非贪婪模式(span=(0, 20), match='She is more than pre' => span=(0, 3), match='She')

    reg = r"\145"  # ascii标的8进制数(145=101=e) => span=(2, 3), match='e'
    reg = r"\d"  # (\d)单个数字 => span=(25, 26), match='5' (推荐:[0-9])
    reg = r"\D"  # (\D)非数字 => span=(0, 1), match='S' (推荐:[^0-9])
    reg = r"\s"  # (\s)空白字符 => span=(3, 4), match=' ' (推荐:[\t\n\r\f\v])
    reg = r"\S"  # (\S)非空白字符 => span=(0, 1), match='S' (推荐:[^\t\n\r\f\v])
    reg = r"\w"  # (\w)单词 => span=(0, 1), match='S' (推荐:[a-zA-Z0-9_])
    reg = r"\W"  # (\W)非单词 => span=(3, 4), match=' ' (推荐:[^a-zA-Z0-9_])
    reg = r"\AS"  # (\A)开头 => span=(0, 1), match='S'
    reg = r"520\Z"  # (\Z)结尾 => span=(25, 28), match='520'
    reg = r"y\b"  # (\b)单词边界(Hello) => span=(22, 23), match='y'
    reg = r"o\B"  # (\B)非单词边界(world) => span=(8, 9), match='o'
    reg = r"[01]\d\d|2[0-4]\d|25[0-5]"  # 或(|) 多位数(匹配0 - 255 直接的数字)

    index = re.search(reg, data)  # 查找单个匹配项
    index = re.match(r"She", data)  # 匹配开头 => span=(0, 3), match='She'
    index = re.fullmatch(
        r".+",
        data)  # 匹配全部 => span=(0, 28), match='She is more than pretty. 520'

    lists = re.findall(reg, data)  # 查找所有匹配项(列表)
    lists = re.split(
        r"o", data, maxsplit=1
    )  # 根据正则分割字符串(maxsplit分割次数) => ['She is m', 're than pretty. 520']

    strs = re.sub(
        r"\.", r"!", data,
        count=1)  # 替换(count:替换次数)(匹配替换,未匹配原样) => She is more than pretty! 520

    re.purge()  # 清除正则表达式缓存

    # --- 正则表达式对象 ---
    pat = re.compile(r"e")  # 编译成正则对象

    index = pat.search(data)  # 查找单个匹配项 => span=(2, 3), match='e'
    index = pat.search(data, 5)  # => span=(10, 11), match='e'
    index = pat.search(data, 1, 10)
    index = pat.match(data)  # 匹配开头 => None
    index = pat.match(data, 2)  # => span=(2, 3), match='e'
    index = pat.match(data, 1, 10)
    index = pat.fullmatch(data)  # 匹配全部 => None
    index = pat.fullmatch(data, 2)  # => None
    index = pat.fullmatch(data, 2, 3)  # span=(2, 3), match='e'

    lists = pat.split(
        data, maxsplit=0)  # 分割 => ['Sh', ' is mor', ' than pr', 'tty. 520']
    lists = pat.findall(data)  # 查找全部 => ['e', 'e', 'e']
    lists = pat.findall(data, 5)  # => ['e', 'e']
    lists = pat.findall(data, 1, 10)  # => ['e']

    strs = pat.sub(r"o", data, count=0)  # 替换 => Sho is moro than protty. 520

    # --- Match ---
    match = index
    # span=(2, 3), match='e'
    strs = match.string  # 被匹配的数据 => She is more than pretty. 520
    strs = match.group()  # 获取 match 数据 => e
    pos = match.pos  # => 2
    pos = match.endpos  # => 3

Exemplo n.º 34

0

Exibir arquivo

Arquivo: tradocs.py Projeto: eberjoe/tradocs

def all():
    """Translation of the entire DocFX project"""
    global processed
    global greenFlag
    global reqs
    global chars
    RepoCheck()
    while not processed:
        if greenFlag:
            for item in Path().iterdir():
                if item.name != sourceDir and item.name in list(map(lambda x: '_'.join(x.split('-')).lower(), targetLangs)) and item.is_dir():
                    shutil.rmtree(item.name)
            processed = True
        if greenFlag:
            for path in targetPaths:
                os.mkdir(path)
        for entry in Path(sourceDir).iterdir():
            if entry.is_dir():
                dirLevel2 = sourceDir + '/' + entry.name
                if greenFlag:
                    for path in targetPaths:
                        os.mkdir(path + '/' + entry.name)
                for entry2 in Path(dirLevel2).iterdir():
                    if entry2.is_dir():
                        tgSeg = '/' + entry.name + '/' + entry2.name
                        dirLevel3 = dirLevel2 + '/' + entry2.name
                        if greenFlag:
                            for path in targetPaths:
                                os.mkdir(path + tgSeg)
                        for entry3 in Path(dirLevel3).iterdir():
                            if entry3.is_dir():
                                tgSeg = '/' + entry.name + '/' + entry2.name + '/' + entry3.name
                                dirLevel4 = dirLevel3 + '/' + entry3.name
                                if greenFlag:
                                    for path in targetPaths:
                                        os.mkdir(path + tgSeg)
                                for entry4 in Path(dirLevel4).iterdir():
                                    if entry4.is_dir():
                                        tgSeg = '/' + entry.name + '/' + entry2.name + '/' + entry3.name + '/' + entry4.name
                                        dirLevel5 = dirLevel4 + '/' + entry4.name
                                        if greenFlag:
                                            for path in targetPaths:
                                                os.mkdir(path + tgSeg)
                                        for entry5 in Path(dirLevel5).iterdir():
                                            if not entry5.is_dir():
                                                if greenFlag:
                                                    ProcessFiles(dirLevel5 + '/' + entry5.name)
                                                else:
                                                    stats.append(FileStats(dirLevel5 + '/' + entry5.name))
                                    else:
                                        if greenFlag:
                                            ProcessFiles(dirLevel4 + '/' + entry4.name)
                                        else:
                                            stats.append(FileStats(dirLevel4 + '/' + entry4.name))
                            else:
                                if greenFlag:
                                    ProcessFiles(dirLevel3 + '/' + entry3.name)
                                else:
                                    stats.append(FileStats(dirLevel3 + '/' + entry3.name))
                    else:
                        if greenFlag:
                            ProcessFiles(dirLevel2 + '/' + entry2.name)
                        else:
                            stats.append(FileStats(dirLevel2 + '/' + entry2.name))
            else:
                if greenFlag:
                    ProcessFiles(sourceDir + '/' + entry.name)
                else:
                    stats.append(FileStats(sourceDir + '/' + entry.name))
        if not greenFlag:
            fls = list(filter(lambda x: x is not None, stats))
            nFls = len(fls)*len(targetLangs)
            print('\n Target languages:\t\t\t' + ', '.join(targetLangs))
            print(' Total of source language files:\t' + str(len(fls)))
            for i in range(len(fls)):
                reqs += fls[i][0]
                chars += fls[i][1]
            estimatedT = int(reqs * 1.3)
            print(' Total of source language characters:\t' + str(chars))
            print(' Total of files to be generated:\t' + str(nFls))
            print(' Total of calls to translation service:\t' + str(reqs))
            print(' Total of characters for translation:\t' + str(chars * len(targetLangs)))
            print(' Estimated process duration:\t\t' + str(datetime.timedelta(seconds = estimatedT)))
            cont = input('\n Continue [c] or abort [Enter]? ')
            if cont == 'c':
                greenFlag = True
            else:
                break
    if greenFlag and not len(haltedTranslation):
        PrGreen('\n Completed successfully!')
    else:
        if greenFlag:
            PrYellow("The following files could neither be processed nor copied to target language directories:")
            for notTranslated in haltedTranslation:
                print(' ' + notTranslated)
        PrRed('\n Exiting...')
    time.sleep(1)
    re.purge()
    exit()

Exemplo n.º 35

0

Exibir arquivo

Arquivo: regular_expression_test.py Projeto: sangwook236/SWDT

def basic_operation():
	r"""
	# Special sequence.
	\number \A \b \B \d \D \s \S \w \W \Z

	# Standard escape.
	\a \b \f \n \N \r \t \u \U \v \x \\

	# Flag.
	re.A, re.ASCII
	re.I, re.IGNORECASE
	re.L, re.LOCALE
	re.M, re.MULTILINE
	re.S, re.DOTALL
	re.U, re.UNICODE
	re.X, re.VERBOSE

	re.DEBUG

	re.search(pattern, string, flags=0)
		Scan through string looking for the first location where the regular expression pattern produces a match.
	re.match(pattern, string, flags=0)
		If zero or more characters at the beginning of string match the regular expression pattern.
	re.fullmatch(pattern, string, flags=0)
		If the whole string matches the regular expression pattern.
	re.split(pattern, string, maxsplit=0, flags=0)
	re.findall(pattern, string, flags=0)
	re.finditer(pattern, string, flags=0)
	re.sub(pattern, repl, string, count=0, flags=0)
	re.subn(pattern, repl, string, count=0, flags=0)

	re.escape(pattern)

	re.purge()
	"""

	#--------------------
	# Search.

	# *, +, ?.
	#	The '*', '+', and '?' qualifiers are all greedy; they match as much text as possible.
	#	If the RE <.*> is matched against '<a> b <c>', it will match the entire string, and not just '<a>'.
	# *?, +?, ??.
	#	Adding ? after the qualifier makes it perform the match in non-greedy or minimal fashion; as few characters as possible will be matched.
	#	Using the RE <.*?> will match only '<a>' against '<a> b <c>'.

	re.search(r'''['"].*['"]''', '''ab'cd'ef'gh'ij"kl"mn'op"qr"st'uv"wx'yz'AB"CD''')  # Result: '\'cd\'ef\'gh\'ij"kl"mn\'op"qr"st\'uv"wx\'yz\'AB"'.
	re.search(r'''['"].*?['"]''', '''ab'cd'ef'gh'ij"kl"mn'op"qr"st'uv"wx'yz'AB"CD''')  # Result: "'cd'".

	# (...): Group.
	# (?P<name>...): Named group.
	# (?P=name): Backreference to a named group.

	re.search(r'''(?P<quote>['"]).*(?P=quote)''', '''ab'cd'ef'gh'ij"kl"mn'op"qr"st'uv"wx'yz'AB"CD''')  # Result: '\'cd\'ef\'gh\'ij"kl"mn\'op"qr"st\'uv"wx\'yz\''.
	re.search(r'''(?P<quote>['"]).*?(?P=quote)''', '''ab'cd'ef'gh'ij"kl"mn'op"qr"st'uv"wx'yz'AB"CD''')  # Result: "'cd'".
	re.search(r'''(?P<asterisk>\*).*?(?P=asterisk)|(?P<quote>['"]).*?(?P=quote)''', '''ab'cd'ef'gh'ij"kl"mn*op*qr'st"uv"wx'yz*AB*CD"EF'GH'IJ"KL*MN*OPQRSTUVWXYZ''')  # Result: "'cd'".

	# (?=...): Lookahead assertion.
	# (?!...): Negative lookahead assertion.
	# (?<=...): Positive lookbehind assertion.
	# (?<!...): Negative lookbehind assertion.

	# (?!...): Negative lookahead assertion.
	re.search(r'(?!ABC)\w*', 'Aabcde')  # Matched.
	re.search(r'(?!ABC)\w*', 'Babcde')  # Matched.
	re.search(r'(?!ABC)\w*', 'Cabcde')  # Matched.
	re.search(r'(?!ABC)\w*', 'ABabcde')  # Matched.
	re.search(r'(?!ABC)\w*', 'BCabcde')  # Matched.
	re.search(r'(?!ABC)\w*', 'ABCabcde')  # Unmatched.

	#--------------------
	# Match.

	# [^...]: Complementation of a set of characters.
	# The first character.
	re.match(r'[^A]\w*', 'abcde')  # Matched.
	re.match(r'[^A]\w*', 'Babcde')  # Matched.
	re.match(r'[^A]\w*', 'Aabcde')  # Unmatched.
	re.match(r'[^ABC]\w*', 'abcde')  # Matched.
	re.match(r'[^ABC]\w*', 'aAabcde')  # Matched.
	re.match(r'[^ABC]\w*', 'Aabcde')  # Unmatched.
	re.match(r'[^ABC]\w*', 'Babcde')  # Unmatched.
	re.match(r'[^ABC]\w*', 'Cabcde')  # Unmatched.
	# The second character.
	re.match(r'\w[^A]\w*', 'abcde')  # Matched.
	re.match(r'\w[^A]\w*', 'aBabcde')  # Matched.
	re.match(r'\w[^A]\w*', 'aAabcde')  # Unmatched.
	# The first and second characters.
	re.match(r'[^A][^B]\w*', 'abcde')  # Matched.
	re.match(r'[^A][^B]\w*', 'Babcde')  # Matched.
	re.match(r'[^A][^B]\w*', 'Aabcde')  # Unmatched.
	re.match(r'[^A][^B]\w*', 'aBabcde')  # Unmatched.

	#--------------------
	# Split.

	re.split(r'\W+', 'Words, words, words.')
	re.split(r'(\W+)', 'Words, words, words.')
	re.split(r'\W+', 'Words, words, words.', 1)
	re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE)
	re.split(r'(\W+)', '...words, words...')

	#--------------------
	# Find.

	re.findall(r'''['"].*?['"]''', '''ab'cd'ef'gh'ij"kl"mn'op"qr"st'uv"wx'yz'AB"CD''')  # Result: ["'cd'", "'gh'", '"kl"', '\'op"', '"st\'', '"wx\'', '\'AB"'].
	re.findall(r'''(?P<quote>['"]).*?(?P=quote)''', '''ab'cd'ef'gh'ij"kl"mn'op"qr"st'uv"wx'yz'AB"CD''')  # Result: ["'", "'", '"', "'", '"'].

	re.findall(r'''['"].*?['"]|\*.*?\*''', '''ab'cd'ef'gh'ij"kl"mn*op*qr'st"uv"wx'yz*AB*CD"EF'GH'IJ"KL*MN*OPQRSTUVWXYZ''')  # Result: ["'cd'", "'gh'", '"kl"', '*op*', '\'st"', '"wx\'', '*AB*', '"EF\'', '\'IJ"', '*MN*'].
	re.findall(r'''(?P<quote>['"]).*?(?P=quote)|(?P<asterisk>\*).*?(?P=asterisk)''', '''ab'cd'ef'gh'ij"kl"mn*op*qr'st"uv"wx'yz*AB*CD"EF'GH'IJ"KL*MN*OPQRSTUVWXYZ''')  # Result: [("'", ''), ("'", ''), ('"', ''), ('', '*'), ("'", ''), ('', '*'), ('"', ''), ('', '*')].
	re.findall(r'''(?P<asterisk>\*).*?(?P=asterisk)|(?P<quote>['"]).*?(?P=quote)''', '''ab'cd'ef'gh'ij"kl"mn*op*qr'st"uv"wx'yz*AB*CD"EF'GH'IJ"KL*MN*OPQRSTUVWXYZ''')  # Result: [('', "'"), ('', "'"), ('', '"'), ('*', ''), ('', "'"), ('*', ''), ('', '"'), ('*', '')].

	#--------------------
	# Substitute.

	def dash_repl(match):
		if match.group(0) == '-': return ' '  # The entire match.
		else: return '-'
	re.sub('-{1,2}', '-', 'pro----gram-files')  # Result: "pro--gram-files".
	re.sub('-{1,2}', dash_repl, 'pro----gram-files')  # Result: "pro--gram files".
	re.sub(r'\sAND\s', ' & ', 'Baked Beans And Spam', flags=re.IGNORECASE)  # Result: "Baked Beans & Spam".

	re.subn('-{1,2}', dash_repl, 'pro----gram-files')  # Result: "('pro--gram files', 3)".
	re.subn(r'\sAND\s', ' & ', 'Baked Beans And Spam', flags=re.IGNORECASE)  # Result: "('Baked Beans & Spam', 1)".

	#--------------------
	re.escape('http://www.python.org')  # Result: "http://www\\.python\\.org".

	re.purge()  # Clear the regular expression cache.

	#--------------------
	try:
		re.compile('[a-z+')
	except re.error as ex:
		print('re.error: {}.'.format(ex))

Exemplo n.º 36

0

Exibir arquivo

def check_file(filename):
    """Check input file and verify its content.
	
	Checks that the file begins with HEX, BIN or ASC keyword,
	verifies the claimed content, and splits it into appropriate
	chunks.
	
	Returns integer (0=invalid, 1=HEX, 2=BIN, 3=ASCII) and the
	chunked file content or error message.
	
	Valid example hex file content:
	HEX 35 00 FF A2 81 9B E3
	"""
    file = open(filename, 'r')
    file_content = file.read()
    if len(file_content) < 3 or file_content.isspace():
        file.close()
        return (0, 'File content must begin with a keyword (HEX, BIN or ASC)!')
    # First 3 characters should represent the base of the content.
    base = file_content[0:3]
    file_content = file_content[3:]
    forbidden_chars = {'BIN': [None], 'HEX': [None]}

    # Content is claimed to be hexadecimal:
    if base == 'HEX':
        file_content = ''.join(file_content.split())
        file_content = file_content.upper()
        if len(file_content) < 2:
            file.close()
            return (
                0,
                'File must contain at least 1 byte of data after the keyword!')
        mod = len(file_content) % 2
        if mod != 0:
            return (
                0,
                'File must contain full bytes of data (2 hex digits = 1 byte)!'
            )
        # Use regular expression for verifying the content.
        if re.match('[0-9A-F]+$', file_content):
            content = ''
            for start in range(0, len(file_content), 2):
                if start + 2 <= len(file_content):
                    content += file_content[start:start + 2] + ' '
                else:
                    content += file_content[start:]  # add the remainings

            content = content.rstrip()  # remove possible whitespace at the end
            # Check that the file doesn't contain any forbidden control characters
            for val in content.split():
                if val in forbidden_chars['HEX']:
                    file.close()
                    return (
                        0,
                        'File must not contain other control characters than TAB, LF or CR!'
                    )
            # Return type indicator and the chopped content.
            file.close()
            return (1, content)
        else:
            file.close()
            return (0, 'File content was invalid hexadecimal data!')

    # Content is claimed to be binary:
    elif base == 'BIN':
        file_content = ''.join(file_content.split())
        if len(file_content) < 8:
            file.close()
            return (
                0,
                'File must contain at least 1 byte of data after the keyword!')
        mod = len(file_content) % 8
        if mod != 0:
            return (0,
                    'File must contain full bytes of data (8 bits = 1 byte)!')

        # Use regular expression for verifying the content.
        re.purge()  # clear regex cache
        if re.match('[0-1]+$', file_content):
            content = ''
            for start in range(0, len(file_content), 8):
                if start + 8 <= len(file_content):
                    content += file_content[start:start + 8] + ' '
                else:
                    content += file_content[start:]  # add the remainings

            content = content.rstrip()  # remove possible whitespace at the end
            # Check that the file doesn't contain any forbidden control characters
            for val in content.split():
                if val in forbidden_chars['BIN']:
                    file.close()
                    return (
                        0,
                        'File must not contain other control characters than TAB, LF or CR!'
                    )
            # Return type indicator and the chopped content.
            file.close()
            return (2, content)
        else:
            file.close()
            return (0, 'File content was invalid binary data!')

    # Content is claimed to be ASCII:
    elif base == 'ASC':
        escape_chars = ['\a', '\b', '\f', '\n', '\r', '\t', '\v']
        escape_letters = ['a', 'b', 'f', 'n', 'r', 't', 'v']
        # Use regular expression for verifying the content.
        re.purge()  # clear regex cache
        if re.match('[\x00-\x7F]+$', file_content):  # [\x20-\x7E]
            # Check that the file doesn't contain any forbidden control characters
            for c in file_content:
                if binascii.hexlify(c).upper() in forbidden_chars['HEX']:
                    file.close()
                    return (0, 'File contains illegal control characters!')
            for c in escape_chars:
                if file_content.count(c) != 0:
                    file_content = file_content.replace(c, '')
            # Replace all "\\n", "\\r" etc. with "\n", "\r" etc. (i.e. remove
            # the extra backslash) so that the control characters are interpreted
            # correctly into hex values.
            for c in range(0, len(file_content)):
                if file_content[c:c + 1] == '\\':
                    if file_content[c + 1:c + 2] in escape_letters:
                        for e in escape_letters:
                            if file_content[c + 1:c + 2] == e:
                                file_content = file_content[:c] + escape_chars[
                                    escape_letters.index(e)] + file_content[c +
                                                                            2:]
                                break
                    else:
                        return (
                            0,
                            'File contains illegal control characters!\n\n' +
                            'Legal characters after a backslash are: a, b, f, n, r, t, and v.'
                        )

            # Return type indicator and the file content.
            file.close()
            return (3, file_content)
        else:
            file.close()
            return (0, 'File content was invalid ASCII data!')

    # Content is invalid:
    else:
        file.close()
        return (0, 'File content must begin with a keyword (HEX, BIN or ASC)!')

Exemplo n.º 37

0

Exibir arquivo

import gc

print "Is Enabled? %s"%gc.isenabled()
print gc.set_debug(gc.DEBUG_STATS)
#print gc.set_debug(gc.DEBUG_UNCOLLECTABLE)
#print gc.set_debug(gc.DEBUG_COLLECTABLE)
print gc.get_debug()
print "Objects: %s"%gc.garbage
print "Collect: %s"%gc.collect()

import re
print "re: %s"%re.purge()

Exemplo n.º 38

0

Exibir arquivo

def replace_numbers(s, replace_by):
    re.purge()
    #temp = re.compile(ur'([0-9]+(st|th|rd|nd|,[0-9]+|.[0-9]+)?)', re.UNICODE)
    s = temp.sub(replace_by, s)
    return s

Exemplo n.º 39

0

Exibir arquivo

def findAllMatches(string, pattern, flag=re.MULTILINE | re.DOTALL):
    regex = re.compile(pattern, flag)
    list = regex.findall(string)
    re.purge()
    return list

Exemplo n.º 40

0

Exibir arquivo

    def test_sanity_re(self):
        '''
        Basic sanity tests for the re module.  Each module member is
        used at least once.
        '''
        #compile
        self.assertTrue(hasattr(re.compile("(abc){1}"), "pattern"))
        self.assertTrue(hasattr(re.compile("(abc){1}", re.L), "pattern"))
        self.assertTrue(hasattr(re.compile("(abc){1}", flags=re.L), "pattern"))

        #I IGNORECASE L LOCAL MMULTILINE S DOTALL U UNICODE X VERBOSE
        flags = [
            "I", "IGNORECASE", "L", "LOCALE", "M", "MULTILINE", "S", "DOTALL",
            "U", "UNICODE", "X", "VERBOSE"
        ]

        for f in flags:
            self.assertTrue(hasattr(re, f))

        #search
        self.assertEqual(re.search("(abc){1}", ""), None)
        self.assertEqual(re.search("(abc){1}", "abcxyz").span(), (0, 3))
        self.assertEqual(re.search("(abc){1}", "abcxyz", re.L).span(), (0, 3))
        self.assertEqual(
            re.search("(abc){1}", "abcxyz", flags=re.L).span(), (0, 3))
        self.assertEqual(re.search("(abc){1}", "xyzabc").span(), (3, 6))

        self.assertEqual(re.search("(abc){1}", buffer("")), None)
        self.assertEqual(
            re.search("(abc){1}", buffer("abcxyz")).span(), (0, 3))
        self.assertEqual(
            re.search("(abc){1}", buffer("abcxyz"), re.L).span(), (0, 3))
        self.assertEqual(
            re.search("(abc){1}", buffer("abcxyz"), flags=re.L).span(), (0, 3))
        self.assertEqual(
            re.search("(abc){1}", buffer("xyzabc")).span(), (3, 6))

        #match
        self.assertEqual(re.match("(abc){1}", ""), None)
        self.assertEqual(re.match("(abc){1}", "abcxyz").span(), (0, 3))
        self.assertEqual(re.match("(abc){1}", "abcxyz", re.L).span(), (0, 3))
        self.assertEqual(
            re.match("(abc){1}", "abcxyz", flags=re.L).span(), (0, 3))

        #split
        self.assertEqual(re.split("(abc){1}", ""), [''])
        self.assertEqual(re.split("(abc){1}", "abcxyz"), ['', 'abc', 'xyz'])
        #maxsplit
        self.assertEqual(re.split("(abc){1}", "abc", 0), ['', 'abc', ''])
        for i in xrange(3):
            self.assertEqual(re.split("(abc){1}", "abc", maxsplit=i),
                             ['', 'abc', ''])
            self.assertEqual(re.split("(abc){1}", "", maxsplit=i), [''])
            self.assertEqual(re.split("(abc){1}", "abcxyz", maxsplit=i),
                             ['', 'abc', 'xyz'])
        self.assertEqual(re.split("(abc){1}", "abcxyzabc", maxsplit=0),
                         ['', 'abc', 'xyz', 'abc', ''])
        self.assertEqual(re.split("(abc){1}", "abcxyzabc", maxsplit=1),
                         ['', 'abc', 'xyzabc'])
        self.assertEqual(re.split("(abc){1}", "abcxyzabc", maxsplit=2),
                         ['', 'abc', 'xyz', 'abc', ''])

        #findall
        self.assertEqual(re.findall("(abc){1}", ""), [])
        self.assertEqual(re.findall("(abc){1}", "abcxyz"), ['abc'])
        self.assertEqual(re.findall("(abc){1}", "abcxyz", re.L), ['abc'])
        self.assertEqual(re.findall("(abc){1}", "abcxyz", flags=re.L), ['abc'])
        self.assertEqual(re.findall("(abc){1}", "xyzabcabc"), ['abc', 'abc'])

        #finditer
        self.assertEqual([x.group() for x in re.finditer("(abc){1}", "")], [])
        self.assertEqual(
            [x.group() for x in re.finditer("(abc){1}", "abcxyz")], ['abc'])
        self.assertEqual(
            [x.group() for x in re.finditer("(abc){1}", "abcxyz", re.L)],
            ['abc'])
        self.assertEqual(
            [x.group() for x in re.finditer("(abc){1}", "abcxyz", flags=re.L)],
            ['abc'])
        self.assertEqual(
            [x.group() for x in re.finditer("(abc){1}", "xyzabcabc")],
            ['abc', 'abc'])
        rex = re.compile("foo")
        for m in rex.finditer("this is a foo and a foo bar"):
            self.assertEqual((m.pos, m.endpos), (0, 27))
        for m in rex.finditer(""):
            self.assertEqual((m.pos, m.endpos), (0, 1))
        for m in rex.finditer("abc"):
            self.assertEqual((m.pos, m.endpos), (0, 4))
        for m in rex.finditer("foo foo foo foo foo"):
            self.assertEqual((m.pos, m.endpos), (0, 19))

        #sub
        self.assertEqual(re.sub("(abc){1}", "9", "abcd"), "9d")
        self.assertEqual(re.sub("(abc){1}", "abcxyz", 'abcd'), "abcxyzd")
        self.assertEqual(re.sub("(abc){1}", "1", "abcd", 0), "1d")
        self.assertEqual(re.sub("(abc){1}", "1", "abcd", count=0), "1d")
        self.assertEqual(re.sub("(abc){1}", "1", "abcdabcd", 1), "1dabcd")
        self.assertEqual(re.sub("(abc){1}", "1", "abcdabcd", 2), "1d1d")
        self.assertEqual(re.sub("(abc){1}", "1", "ABCdabcd", 2, flags=re.I),
                         "1d1d")

        #subn
        self.assertEqual(re.subn("(abc){1}", "9", "abcd"), ("9d", 1))
        self.assertEqual(re.subn("(abc){1}", "abcxyz", 'abcd'), ("abcxyzd", 1))
        self.assertEqual(re.subn("(abc){1}", "1", "abcd", 0), ("1d", 1))
        self.assertEqual(re.subn("(abc){1}", "1", "abcd", count=0), ("1d", 1))
        self.assertEqual(re.subn("(abc){1}", "1", "abcdabcd", 1),
                         ("1dabcd", 1))
        self.assertEqual(re.subn("(abc){1}", "1", "abcdabcd", 2), ("1d1d", 2))
        self.assertEqual(re.subn("(abc){1}", "1", "ABCdabcd", 2, flags=re.I),
                         ("1d1d", 2))

        #escape
        self.assertEqual(re.escape("abc"), "abc")
        self.assertEqual(re.escape(""), "")
        self.assertEqual(re.escape("_"), "\\_")
        self.assertEqual(re.escape("a_c"), "a\\_c")

        #error
        exc = re.error()
        exc = re.error("some args")

        #purge
        re.purge()

Exemplo n.º 41

0

Exibir arquivo

def describe_regex(regex):
    re.purge()
    re.compile(regex, re.DEBUG)

Exemplo n.º 42

0

Exibir arquivo

def clear_caches():
    # Clear the warnings registry, so they can be displayed again
    for mod in sys.modules.values():
        if hasattr(mod, '__warningregistry__'):
            del mod.__warningregistry__

    # Flush standard output, so that buffered data is sent to the OS and
    # associated Python objects are reclaimed.
    for stream in (sys.stdout, sys.stderr, sys.__stdout__, sys.__stderr__):
        if stream is not None:
            stream.flush()

    # Clear assorted module caches.
    # Don't worry about resetting the cache if the module is not loaded
    try:
        distutils_dir_util = sys.modules['distutils.dir_util']
    except KeyError:
        pass
    else:
        distutils_dir_util._path_created.clear()
    re.purge()

    try:
        _strptime = sys.modules['_strptime']
    except KeyError:
        pass
    else:
        _strptime._regex_cache.clear()

    try:
        urllib_parse = sys.modules['urllib.parse']
    except KeyError:
        pass
    else:
        urllib_parse.clear_cache()

    try:
        urllib_request = sys.modules['urllib.request']
    except KeyError:
        pass
    else:
        urllib_request.urlcleanup()

    try:
        linecache = sys.modules['linecache']
    except KeyError:
        pass
    else:
        linecache.clearcache()

    try:
        mimetypes = sys.modules['mimetypes']
    except KeyError:
        pass
    else:
        mimetypes._default_mime_types()

    try:
        filecmp = sys.modules['filecmp']
    except KeyError:
        pass
    else:
        filecmp._cache.clear()

    try:
        struct = sys.modules['struct']
    except KeyError:
        pass
    else:
        # TODO: fix
        # struct._clearcache()
        pass

    try:
        doctest = sys.modules['doctest']
    except KeyError:
        pass
    else:
        doctest.master = None

    try:
        ctypes = sys.modules['ctypes']
    except KeyError:
        pass
    else:
        ctypes._reset_cache()

    try:
        typing = sys.modules['typing']
    except KeyError:
        pass
    else:
        for f in typing._cleanups:
            f()

    support.gc_collect()

Exemplo n.º 43

0

Exibir arquivo

	def execute(self, message):
		"""
		:type message: IrcMessage
		"""
		#Immediately check if there's any parameters, to prevent useless work
		if message.messagePartsLength == 0:
			message.reply("Please provide a term to search for. See '{}help {}' for an explanation how to use this command".format(message.bot.commandPrefix, message.trigger), "say")
			return

		searchType = message.messageParts[0].lower()

		addExtendedInfo = message.trigger == 'netrunner'

		#Check for update command before file existence, to prevent message that card file is missing after update, which doesn't make much sense
		if searchType == 'update' or searchType == 'forceupdate':
			if self.areCardfilesBeingUpdated:
				replytext = "I'm already updating!"
			elif not message.bot.isUserAdmin(message.user, message.userNickname, message.userAddress):
				replytext = "Sorry, only admins can use my update function"
			elif not searchType == 'forceupdate' and not self.shouldUpdate()[0]:
				replytext = "The last update check was done pretty recently, there's no need to check again so soon"
			else:
				replytext = self.updateCardFile()[1]
				#Since we're checking now, set the automatic check to start counting from now on
				self.resetScheduledFunctionGreenlet()
			message.reply(replytext, "say")
			return

		#Check if the data file even exists
		elif not os.path.exists(os.path.join(GlobalStore.scriptfolder, 'data', 'NetrunnerCards.json')):
			if self.areCardfilesBeingUpdated:
				message.reply("I don't have my card database, but I'm solving that problem as we speak! Try again in, oh,  10, 15 seconds")
			else:
				message.reply("Sorry, I don't appear to have my card database. I'll try to retrieve it though! Give me 20 seconds, tops")
				gevent.spawn(self.updateCardFile)
				self.resetScheduledFunctionGreenlet()
			return

		#If we reached here, we're gonna search through the card store
		searchDict = {}
		# If there is an actual search (with colon key-value separator OR a random card is requested with specific search requirements
		if (searchType == 'search' and ':' in message.message) or (searchType == 'random' and message.messagePartsLength > 1):
			#Advanced search!
			if message.messagePartsLength <= 1:
				message.reply("Please provide an advanced search query too, in JSON format, so 'key1: value1, key2: value2'")
				return

			#Turn the search string (not the argument) into a usable dictionary, case-insensitive,
			searchDict = SharedFunctions.stringToDict(" ".join(message.messageParts[1:]).lower(), True)
			if len(searchDict) == 0:
				message.reply("That is not a valid search query. It should be entered like JSON, so 'name: Wall of Thorns, type: ICE,...'. ")
				return
		#If the searchtype is just 'random', don't set a 'name' field so we don't go through all the cards first
		#  Otherwise, set the whole message as the 'name' search, since that's the default search
		elif not searchType.startswith('random'):
			searchDict['title'] = message.message.lower()

		#Correct some values, to make searching easier (so a search for 'set' or 'sets' both work)
		searchTermsToCorrect = {'setname': ['set', 'sets'], 'flavor': ['flavour'], 'title': ['name']}
		for correctTerm, listOfWrongterms in searchTermsToCorrect.iteritems():
			for wrongTerm in listOfWrongterms:
				if wrongTerm in searchDict:
					if correctTerm not in searchDict:
						searchDict[correctTerm] = searchDict[wrongTerm]
					searchDict.pop(wrongTerm)

		#Turn the search strings into actual regexes
		regexDict = {}
		errors = []
		for attrib, query in searchDict.iteritems():
			try:
				#Since the query is a string, and the card data is unicode, convert the query to unicode before turning it into a regex
				regex = re.compile(unicode(query, encoding='utf8'), re.IGNORECASE)
			except (re.error, SyntaxError) as e:
				self.logError("[Netrunner] Regex error when trying to parse '{}': {}".format(query, e))
				errors.append(attrib)
			except UnicodeDecodeError as e:
				self.logError("[Netrunner] Unicode error in key '{}': {}".format(attrib, e))
				errors.append(attrib)
			else:
				regexDict[attrib] = regex
		#If there were errors parsing the regular expressions, don't continue, to prevent errors further down
		if len(errors) > 0:
			#If there was only one search element to begin with, there's no need to specify
			if len(searchDict) == 1:
				message.reply("An error occurred when trying to parse your search query. Please check if it is a valid regular expression, and that there are no non-UTF8 characters")
			#If there were more elements but only one error, specify
			elif len(errors) == 1:
				message.reply("An error occurred while trying to parse the query for the '{}' field. Please check if it is a valid regular expression without non-UTF8 characters".format(errors[0]))
			#Multiple errors, list them all
			else:
				message.reply("Errors occurred while parsing attributes: {}. Please check your search query for errors".format(", ".join(errors)))
			return

		#All entered data is valid, look through the stored cards
		with open(os.path.join(GlobalStore.scriptfolder, 'data', 'NetrunnerCards.json'), 'r') as jsonfile:
			cardstore = json.load(jsonfile)

		for index in xrange(0, len(cardstore)):
			carddata = cardstore.pop(0)

			#Then check if the rest of the attributes match
			for attrib in regexDict:
				if attrib not in carddata or not regexDict[attrib].search(carddata[attrib]):
					#If the wanted attribute is either not in the card, or it doesn't match, throw it out
					break
			#The else-block of a for-loop is executed when a for-loop isn't broken out of. So if everything matches, we get here
			else:
				cardstore.append(carddata)

		numberOfCardsFound = len(cardstore)
		#Pick a random card if needed and possible
		if searchType.startswith('random') and numberOfCardsFound > 0:
			cardstore = [random.choice(cardstore)]
			numberOfCardsFound = 1

		if numberOfCardsFound == 0:
			replytext = "Sorry, no card matching your query was found"
		elif numberOfCardsFound == 1:
			replytext = self.getFormattedCardInfo(cardstore[0], addExtendedInfo)
		else:
			nameMatchedCardFound = False
			replytext = ""
			#If there was a name search, check if the literal name is in the resulting cards
			if 'title' in searchDict:
				titleMatchIndex = None
				for index, card in enumerate(cardstore):
					if card['title'].lower() == searchDict['title']:
						titleMatchIndex = index
						break

				if titleMatchIndex:
					replytext = self.getFormattedCardInfo(cardstore[titleMatchIndex], addExtendedInfo)
					cardstore.pop(titleMatchIndex)
					numberOfCardsFound -= 1
					nameMatchedCardFound = True

			#Pick some cards to show
			maxCardsToList = 15
			if numberOfCardsFound > maxCardsToList:
				cardstore = random.sample(cardstore, maxCardsToList)
			cardnameText = ""
			for card in cardstore:
				cardnameText += card['title'].encode('utf-8') + "; "
			cardnameText = cardnameText[:-2]

			if nameMatchedCardFound:
				replytext += " ({:,} more match{} found: ".format(numberOfCardsFound, 'es' if numberOfCardsFound > 1 else '')
			else:
				replytext += "Your search returned {:,} cards: ".format(numberOfCardsFound)
			replytext += cardnameText
			if numberOfCardsFound > maxCardsToList:
				replytext += " and {:,} more".format(numberOfCardsFound - maxCardsToList)
			#Since the extra results list is bracketed when a literal match was also found, it needs a closing bracket
			if nameMatchedCardFound:
				replytext += ")"


		re.purge()  #Clear the stored regexes, since we don't need them anymore
		message.reply(replytext)

Exemplo n.º 44

0

Exibir arquivo

def export_to_git(revisions,
                  done_count,
                  devpath=False,
                  ancestor=False,
                  ancestorDate=None):
    if len(revisions) == 0: return done_count

    abs_sandbox_path = os.getcwd()
    abs_sandbox_path = abs_sandbox_path.replace("\\", "/")
    integrity_file = os.path.basename(project)
    git_folder_re = re.compile(
        "\.git(\\\|$)"
    )  #any path named .git, with or without child elements. But will not match .gitignore

    if "ancestorDate" in revisions[0]:
        ancestor = revisions[0]["ancestor"]
        ancestorDate = revisions[0]["ancestorDate"]

    for revision in revisions:
        print("%d of %d (%0.2f%%)" % (done_count + 1, total_revision_count,
                                      done_count / total_revision_count * 100),
              file=sys.stderr)
        done_count += 1

        mark = marks[revision["number"]]
        si('si retargetsandbox %s --quiet --project="%s" --projectRevision=%s "%s/%s"'
           % (additional_si_args, project, revision["number"],
              abs_sandbox_path, integrity_file))
        si('si resync --yes --recurse %s --quiet --sandbox="%s/%s"' %
           (additional_si_args, abs_sandbox_path, integrity_file))
        if devpath:
            print_out('commit refs/heads/devpath/%s' % devpath)
        else:
            print_out('commit refs/heads/main')
        print_out('mark %s' % mark)
        print_out('committer %s <> %d +0000' %
                  (revision["author"], revision["seconds"]))
        export_string(revision["description"])
        if ancestor:
            print_out(
                'from %s' % marks[ancestor]
            )  # we're starting a development path so we need to start from it was originally branched from
            ancestor = False  #set to zero so it doesn't loop back in to here
        print_out('deleteall')
        tree = os.walk('.')
        for dir in tree:
            for filename in dir[2]:
                if (dir[0] == '.'):
                    fullfile = filename
                else:
                    fullfile = os.path.join(dir[0], filename)[2:]
                if (fullfile.find('.pj') != -1):
                    continue
                #if (fullfile[0:4] == ".git"):
                if git_folder_re.search(fullfile):
                    continue
                if (fullfile.find('mks_checkpoints_to_git') != -1):
                    continue
                inline_data(fullfile)

        for tag in revision["tags"]:
            print_out('tag %s' % tag.replace(" ", "_"))
            print_out('from %s' % mark)
            print_out('tagger %s <> %d +0000' %
                      (revision["author"], revision["seconds"]))
            export_string("")  # Tag message

        re.purge()
    print_out('checkpoint')
    return done_count

Exemplo n.º 45

0

Exibir arquivo

    tr = soup.find(attrs={'id': 'places_area__row'})
    td = tr.find(attrs={'class': 'w2p_fw'})
    print td.text

    broken_html = "<ul class = country><li>Area<li>Population</ul>"
    tree = lxml.html.fromstring(broken_html)
    fixed_html = lxml.html.tostring(tree, pretty_print=True)
    print "new html:\n", fixed_html

    tree2 = lxml.html.fromstring(html)
    td = tree2.cssselect("tr#places_area__row > td.w2p_fw")[0]
    print td.text_content()

    for name, scraper in [('Regular expressions', re_scraper),
                          ('BeautifulSoup', bs_scraper),
                          ('lxml', lxml_scraper)]:
        start_time = time.time()
        for i in range(NUM_ITERATIONS):
            if scraper == re_scraper:
                re.purge()
            result = scraper(html)
            assert (result['area'] == '1580 square kilometres')
        end = time.time()
        print '%s:%.2f seconds' % (name, end - start_time)
#2018.08.05 test
'''
1580 square kilometres
Regular expressions:15.61 seconds
BeautifulSoup:77.98 seconds
lxml:3.76 seconds
'''

Exemplo n.º 46

0

Exibir arquivo

 def parseExpr(self, expr:str): 
     re.purge()
     return(re.findall(r"[^[]*\[([^]]*)\]", expr))

Exemplo n.º 47

0

Exibir arquivo

    def validate(self:object, customValidator:str=None):
        """
        Validate a resultset against predefined metadata based on the LANG rules of data quality.
        """
        if (self.metadata is None):
            raise ValidationError("LANG Exception: meta-data has not been set", None)
        elif (self.dataset is None):
            raise ValidationError("LANG Exception: resultset has not been set", None)

        """
        Change request: find and output the primary key in the error report file if specified
        """
        primary_key = ""
        primary_key_values = None
        
        for key, item in self.metadata.items():                
            if (MetaUtils.isTrue(item, "PrimaryKey")):
                primary_key = key
                primary_key_values = self.dataset[primary_key]
                break
                
        """
        Execute a series of validations against the supplied column of data and the metadata for the column.
        Which validation is run is determined by entries in the metadata.
        """         
        for meta_attribute_key, meta_attribute_definition in self.metadata.items():                
            if (meta_attribute_key in self.dataset):
                print("Validating attribute \t'" + meta_attribute_key + "'...", end='\r')
                                
                attribute = self.dataset[meta_attribute_key]
                                
                for row_count in range(len(attribute)):
                    value = attribute[row_count]
                    
                    """ 
                    If a primarykey tag has been found then output the value so that the user 
                     has a reference to search for the record in the source system. 
                     If there is no primary key attribute set then output the row count 
                    """
                    
                    if (not primary_key_values is None):
                        primary_key_value = primary_key_values[row_count]
                    else:
                        primary_key_value = "Row: " + str(row_count+1)
                    
                    self.checkMandatory(meta_attribute_definition, meta_attribute_key, value, primary_key_value)                  
                    self.checkSize(meta_attribute_definition, meta_attribute_key, value, primary_key_value)
                    self.checkType(meta_attribute_definition, meta_attribute_key, value, primary_key_value)
                    self.checkEnum(meta_attribute_definition, meta_attribute_key, value, primary_key_value)
                    self.checkStartsWith(meta_attribute_definition, meta_attribute_key, value, primary_key_value)

                
                # format check (must provide a regex)
                if (MetaUtils.exists(meta_attribute_definition, "Format")):
                    re.purge()
                    regex=re.compile(meta_attribute_definition["Format"])
                    
                    for row_count in range(len(attribute)):
                        primary_key_value = primary_key_values[row_count]
                        value = attribute[row_count]
                        
                        isMatch = (not regex.match(value) is None)
                        
                        if ( (not isMatch) and (not MetaUtils.isAllowBlank(meta_attribute_definition)) ):
                            self.addDataQualityError(DataQualityError(meta_attribute_key,error_dimension=DataQualityDimension.FORMATCONSISTENCY.value, description="Error: Value '" + value + "' does not match regex #'" + meta_attribute_definition["Format"] + "'"))

                   
                # unique field check        
                if (MetaUtils.isTrue(meta_attribute_definition, "Unique") ):
                    # quick count the number of times values occurs in the column. Assumes possibly sorted so breaks the loop if >1 occurences to save time0
                    seen = set()          

                    for row_count in range(len(attribute)):
                        primary_key_value = primary_key_values[row_count]
                        value = attribute[row_count]

                        if (not value in seen):
                            seen.add(value) #only process a value once 
                        else:    
                            self.addDataQualityError(DataQualityError(meta_attribute_key,error_dimension=DataQualityDimension.UNIQUENESS.value, description="Error: Value '" + value + "' is not UNIQUE. A unique value was expected."))
                            
                self.checkComposite(meta_attribute_definition, meta_attribute_key)
                
                # expression evaluation is different to processing field specific validations as it could link in other columns from the resultset
                self.evaluateExpression(meta_attribute_definition, meta_attribute_key)

                print("Validating attribute \t'" + meta_attribute_key + "'...\t\t..Complete.")
            else:
                self.addDataQualityError(DataQualityError(meta_attribute_key, error_dimension=DataQualityDimension.METADATACOMPLIANCE.value, description="Error: Attribute '" + meta_attribute_key + "' was not found in the dataset."))
        
        # only invoke the custom validator if one has been provoded
        if (not customValidator is None and len(customValidator) > 0):
            self.customValidator(customValidator)

Exemplo n.º 48

0

Exibir arquivo

def remove_returns(s, replace_by):
    '''EOL: End of Line'''
    re.purge()
    temp = re.compile(r"\s+")
    s = temp.sub(replace_by, s)
    return s

Exemplo n.º 49

0

Exibir arquivo

Arquivo: prefs.py Projeto: shushantkumar/ci_edit

    def __setUpGrammars(self, defaultGrammars):
        self.grammars = {}
        # Arrange all the grammars by name.
        for k, v in defaultGrammars.items():
            v['name'] = k
            self.grammars[k] = v

        # Compile regexes for each grammar.
        for k, v in defaultGrammars.items():
            if 0:
                # keywords re.
                v['keywordsRe'] = re.compile(
                    app.regex.joinReWordList(
                        v.get('keywords', []) + v.get('types', [])))
                v['errorsRe'] = re.compile(
                    app.regex.joinReList(v.get('errors', [])))
                v['specialsRe'] = re.compile(
                    app.regex.joinReList(v.get('special', [])))
            # contains and end re.
            matchGrammars = []
            markers = []
            # Index [0]
            if v.get('escaped'):
                markers.append(v['escaped'])
                matchGrammars.append(v)
            else:
                # Add a non-matchable placeholder.
                markers.append(app.regex.kNonMatchingRegex)
                matchGrammars.append(None)
            # Index [1]
            if v.get('end'):
                markers.append(v['end'])
                matchGrammars.append(v)
            else:
                # Add a non-matchable placeholder.
                markers.append(app.regex.kNonMatchingRegex)
                matchGrammars.append(None)
            # |Contains| markers start at index 2.
            for grammarName in v.get('contains', []):
                g = self.grammars.get(grammarName, None)
                if g is None:
                    self._raiseGrammarNotFound()
                markers.append(g.get('begin', g.get('matches', u"")))
                matchGrammars.append(g)
            # |Next| markers start after |contains|.
            for grammarName in v.get('next', []):
                g = self.grammars.get(grammarName, None)
                if g is None:
                    self._raiseGrammarNotFound()
                markers.append(g['begin'])
                matchGrammars.append(g)
            # |Errors| markers start after |next| markers.
            markers += v.get('errors', [])
            # |Keywords| markers start after |errors| markers.
            for keyword in v.get('keywords', []):
                markers.append(r'\b' + keyword + r'\b')
            # |Types| markers start after |keywords| markers.
            for types in v.get('types', []):
                markers.append(r'\b' + types + r'\b')
            # |Special| markers start after |types| markers.
            markers += v.get('special', [])
            # Variable width characters are at index [-3] in markers.
            markers.append(r'\t+')
            # Double wide characters are at index [-2] in markers.
            markers.append(u'[\u3000-\uffff]+')
            # Carriage return characters are at index [-1] in markers.
            markers.append(r'\n')
            #app.log.startup('markers', v['name'], markers)
            v['matchRe'] = re.compile(app.regex.joinReList(markers))
            v['markers'] = markers
            v['matchGrammars'] = matchGrammars
            containsGrammarIndexLimit = 2 + len(v.get('contains', []))
            nextGrammarIndexLimit = containsGrammarIndexLimit + len(
                v.get('next', []))
            errorIndexLimit = nextGrammarIndexLimit + len(v.get('errors', []))
            keywordIndexLimit = errorIndexLimit + len(v.get('keywords', []))
            typeIndexLimit = keywordIndexLimit + len(v.get('types', []))
            specialIndexLimit = typeIndexLimit + len(v.get('special', []))
            v['indexLimits'] = (containsGrammarIndexLimit,
                                nextGrammarIndexLimit, errorIndexLimit,
                                keywordIndexLimit, typeIndexLimit,
                                specialIndexLimit)

        # Reset the re.cache for user regexes.
        re.purge()

Exemplo n.º 50

0

Exibir arquivo

Arquivo: regular_expression_session_1_18it033.py Projeto: nidhi2802/18IT033_IT374_PythonProgramming_Practical_Tasks

The expression’s behaviour can be modified by specifying a flags value. 
Flag Values can be any of the re flag variables, combined using bitwise OR (the | operator).

Note: Using re.compile() and saving the resulting regular expression object for 
reuse is more efficient when the expression will be used several times in a single program.
'''
string1 = "18IT033"
string2 = "My id is 18CE033"
pattern1 = "^[0-9]{2}(IT)[0-9]{3}"
patt1 = re.compile(pattern1)
result1 = patt1.match(string1)
print(result1)
result1 = re.match(pattern1, string1)
print(result1)
result2 = patt1.match(string2)
print(result2)
'''
re.purge()
  Clear the regular expression cache.
'''

re.purge()
'''
re.escape(pattern)
Escape special characters in pattern. 
This is useful if you want to match an arbitrary literal string 
that may have regular expression metacharacters in it.
'''
print(re.escape("h.(h)"))
print(re.escape("n&n"))
print(re.escape("n*{n}"))

Exemplo n.º 51

0

Exibir arquivo

Arquivo: rex.py Projeto: sahejsingh/sysl

def purge():
    re.purge()

Exemplo n.º 52

0

Exibir arquivo

Arquivo: refleak_27.py Projeto: pv/pytest-leaks

def clear_caches():
    import gc

    # Clear the warnings registry, so they can be displayed again
    for mod in sys.modules.values():
        if hasattr(mod, '__warningregistry__'):
            del mod.__warningregistry__

    # Clear assorted module caches.
    # Don't worry about resetting the cache if the module is not loaded
    try:
        distutils_dir_util = sys.modules['distutils.dir_util']
    except KeyError:
        pass
    else:
        distutils_dir_util._path_created.clear()

    re.purge()

    try:
        _strptime = sys.modules['_strptime']
    except KeyError:
        pass
    else:
        _strptime._regex_cache.clear()

    try:
        urlparse = sys.modules['urlparse']
    except KeyError:
        pass
    else:
        urlparse.clear_cache()

    try:
        urllib = sys.modules['urllib']
    except KeyError:
        pass
    else:
        urllib.urlcleanup()

    try:
        urllib2 = sys.modules['urllib2']
    except KeyError:
        pass
    else:
        urllib2.install_opener(None)

    try:
        dircache = sys.modules['dircache']
    except KeyError:
        pass
    else:
        dircache.reset()

    try:
        linecache = sys.modules['linecache']
    except KeyError:
        pass
    else:
        linecache.clearcache()

    try:
        mimetypes = sys.modules['mimetypes']
    except KeyError:
        pass
    else:
        mimetypes._default_mime_types()

    try:
        filecmp = sys.modules['filecmp']
    except KeyError:
        pass
    else:
        filecmp._cache.clear()

    try:
        struct = sys.modules['struct']
    except KeyError:
        pass
    else:
        struct._clearcache()

    try:
        doctest = sys.modules['doctest']
    except KeyError:
        pass
    else:
        doctest.master = None

    try:
        ctypes = sys.modules['ctypes']
    except KeyError:
        pass
    else:
        ctypes._reset_cache()

    # Collect cyclic trash.
    support.gc_collect()

Exemplo n.º 53

0

Exibir arquivo

def WriteCert(ProgPath,
              InputName,
              OutputName,
              IsLabelInOutput=True,
              DoEncode=False):
    TempStr = ''
    RawLine = ''
    EncodeLine = ''
    CleanLine = ''
    I = 0
    VarLen = 0
    ListObj = None
    ReObj = None

    FileLineNo = 0
    ErrorNumber = 0
    CertBegin = False
    DataBegin = False
    DataEnd = False
    TrustBegin = False
    LabelPrinted = False

    CertReObj = re.compile('CKA_CLASS CK_OBJECT_CLASS CKO_CERTIFICATE',
                           re.IGNORECASE)
    if not CertReObj:
        print(ErrorMainList[0])
        return 254
    LabelStr = ''
    LabelReObj = re.compile(r'CKA_LABEL UTF8 \"([^\"]+)\"', re.IGNORECASE)
    if not LabelReObj:
        re.purge()
        print(ErrorMainList[0])
        return 254
    DataRawStr = ''
    DataEncSplit = None
    DataReObj = re.compile('CKA_VALUE MULTILINE_OCTAL', re.IGNORECASE)
    if not DataReObj:
        re.purge()
        print(ErrorMainList[0])
        return 254
    OctetsReObj = re.compile('[0-7][0-7][0-7]', re.IGNORECASE)
    if not OctetsReObj:
        re.purge()
        print(ErrorMainList[0])
        return 254
    EndReObj = re.compile('END', re.IGNORECASE)
    if not EndReObj:
        re.purge()
        print(ErrorMainList[0])
        return 254

    TrustReObj = re.compile('CKA_CLASS CK_OBJECT_CLASS CKO_NSS_TRUST',
                            re.IGNORECASE)
    if not TrustReObj:
        print(ErrorMainList[0])
        return 254
    TrustPurpose = ''
    TrustLevel = ''
    TrustPrimaryReObj = re.compile(
        r'CKA_TRUST_([a-z_]+) CK_TRUST CKT_NSS_([a-z_]+)', re.IGNORECASE)
    if not TrustPrimaryReObj:
        print(ErrorMainList[0])
        return 254

    try:
        os.remove(OutputName)
        print('Deleted file "%s".' % OutputName)
    except:
        pass
    FTxtInObj = open(InputName, 'rb')
    FTxtOutObj = open(OutputName, 'wb')

    for RawLine in FTxtInObj:
        FileLineNo += 1
        if DoEncode:
            try:
                EncodeLine = RawLine.encode('utf_8', 'strict')
            except:
                try:
                    EncodeLine = ''
                    if LabelPrinted:
                        TempStr = ErrorEncodeList[0] + ErrorEncodeList[
                            1] + ErrorEncodeList[0] + ErrorEncodeList[2]
                    else:
                        TempStr = '\n' + ErrorEncodeList[1] + ErrorEncodeList[
                            0] + ErrorEncodeList[2]
                    print(TempStr % FileLineNo)
                    TempStr = ''
                    EncodeLine = RawLine.encode('utf_8', 'ignore')
                    if LabelPrinted:
                        TempStr = ErrorEncodeList[0]
                    TempStr += ErrorEncodeList[0] + ErrorEncodeList[4]
                    print(TempStr)
                    TempStr = ''
                except:
                    try:
                        EncodeLine = RawLine
                        if LabelPrinted:
                            TempStr = ErrorEncodeList[0]
                        TempStr += ErrorEncodeList[0] + ErrorEncodeList[4]
                        if not LabelPrinted:
                            TempStr += '\n'
                        print(TempStr)
                        TempStr = ''
                    except:
                        ErrorNumber = 250
                        break
            ListObj = EncodeLine.splitlines(False)
            EncodeLine = ''
        else:
            ListObj = RawLine.splitlines(False)
        ListRemoveEmpty(ListObj)
        if not ListObj:
            if DataBegin:
                try:
                    print(ErrorReadList[1] % FileLineNo)
                finally:
                    ErrorNumber = 3
                    break
            continue
        CleanLine = ListObj[0].strip()
        ListClean(ListObj)
        if (not CleanLine) or (CleanLine == '#'):
            if DataBegin:
                try:
                    print(ErrorReadList[1] % FileLineNo)
                finally:
                    ErrorNumber = 3
                    break
            continue
        if CertBegin:
            if not DataBegin:
                if CertReObj.match(CleanLine):
                    ListClean(DataEncSplit)
                    DataRawStr = ''
                    LabelStr = ''
                    TrustBegin = False
                    DataEnd = False
                    DataBegin = False
                    if LabelPrinted:
                        LabelPrinted = False
                        print('    CANCELING. Found NON CA. Line %d.' %
                              FileLineNo)
                else:
                    if not TrustBegin:
                        if not DataEnd:
                            if not LabelStr:
                                try:
                                    ReObj = LabelReObj.match(CleanLine)
                                    if ReObj:
                                        LabelStr = ReObj.group(1)
                                        if not LabelStr:
                                            raise ValueError
                                        else:
                                            LabelPrinted = True
                                            LabelStr = CorrectCertLabel(
                                                LabelStr)
                                            print(
                                                '\nCertificate on Line %d\n    "%s"'
                                                % (FileLineNo, LabelStr))
                                            VarLen = len(LabelStr)
                                            LabelStr += FStyleNL
                                            for I in range(0, VarLen, 1):
                                                LabelStr += FStyleLabelUR
                                            LabelStr += FStyleNL
                                except:
                                    try:
                                        print(ErrorReadList[0] % FileLineNo)
                                    finally:
                                        ErrorNumber = 2
                                        break
                            elif DataReObj.match(CleanLine):
                                DataBegin = True
                        elif TrustReObj.match(CleanLine):
                            TrustBegin = True
                    else:
                        try:
                            ReObj = TrustPrimaryReObj.match(CleanLine)
                            if ReObj:
                                TrustPurpose = ReObj.group(1).upper()
                                TrustLevel = ReObj.group(2).upper()
                                if (TrustPurpose in MozillaTrustReqPrimary
                                    ) and (TrustLevel
                                           == MozillaTrustLevels[0]):
                                    FTxtOutObj.write(FStyleNL)
                                    if IsLabelInOutput:
                                        FTxtOutObj.write(LabelStr)
                                    for I in range(0, len(DataEncSplit), 1):
                                        FTxtOutObj.write(DataEncSplit[I])
                                    FTxtOutObj.flush()
                                    ListClean(DataEncSplit)
                                    DataRawStr = ''
                                    LabelStr = ''
                                    LabelPrinted = False
                                    TrustBegin = False
                                    DataEnd = False
                                    DataBegin = False
                                    CertBegin = False
                                    print('    SAVE.')
                                TrustLevel = ''
                                TrustPurpose = ''
                        except:
                            try:
                                print(ErrorReadList[2] % FileLineNo)
                            finally:
                                ErrorNumber = 4
                                break
            else:
                if EndReObj.match(CleanLine):
                    try:
                        DataEncSplit = CertToBase64(DataRawStr, True, True)
                        DataRawStr = ''
                        if not DataEncSplit:
                            raise ValueError
                    except:
                        try:
                            print(ErrorReadList[1] % FileLineNo)
                        finally:
                            ErrorNumber = 250
                            break
                    DataBegin = False
                    DataEnd = True
                else:
                    try:
                        ListObj = CleanLine.split('\\')
                        if not ListObj:
                            raise ValueError
                    except:
                        try:
                            print(ErrorReadList[1] % FileLineNo)
                        finally:
                            ErrorNumber = 250
                            break
                    ListRemoveEmpty(ListObj)
                    if not ListObj:
                        try:
                            print(ErrorReadList[1] % FileLineNo)
                        finally:
                            ErrorNumber = 3
                            break
                    for I in range(0, len(ListObj), 1):
                        if not OctetsReObj.match(ListObj[I]):
                            ErrorNumber = 3
                            break
                        try:
                            DataRawStr += chr(int(ListObj[I], 8))
                        except:
                            ErrorNumber = 3
                            break
                    ListClean(ListObj)
                    if ErrorNumber:
                        try:
                            print(ErrorReadList[1] % FileLineNo)
                        finally:
                            break
        elif CertReObj.match(CleanLine):
            CertBegin = True

    FTxtOutObj.close()
    FTxtInObj.close()
    re.purge()
    return ErrorNumber