Пример #1
0
 def getTitle(self, page):
     pattern = re.complie('<h1 class="core_title_txt.*?>(.*?)</h1>', re.S)
     result = re.search(pattern, page)
     if result:
         return result.group(1).strip()
     else:
         return None
Пример #2
0
    async def list_pipelines(
            self,
            pipeline_prefix: str = None,
            filter_tags: Dict[str, str] = None) -> List[PipelineHeaderModel]:
        self.log.debug("Received request to list pipelines")
        query_dict = {}
        if pipeline_prefix is not None:
            the_regex = '[a-zA-Z0-9]+'
            regex = re.complie(the_regex)
            if regex.match(pipeline_prefix) is None:
                raise QueryException(
                    f"The prefix must match against regular expression {the_regex}"
                )
            query_dict['pipeline_uuid'] = {'$regex': f"{pipeline_prefix}.*"}
            if filter_tags is not None:
                for k, v in filter_tags.items():
                    query_dict[f'tags.{k}'] = v

            return [
                PipelineHeaderModel(**e)
                for e in self._pipeline_def_coll.find(query_dict, {
                    '_id': False,
                    'pipeline_body': False
                })
            ]
Пример #3
0
	def __init__(self, func):
		self.path = func.__web_route__
		self.method = func.__web_method__
		self.is_static = _re_route.search(self.path) is None
		if not self.is_static:
			self.route = re.complie(_build_regex(self.path))
		self.func = func
Пример #4
0
	def getTitle(self, page):
		pattern = re.complie('<h1 class="core_title_txt.*?>(.*?)</h1>', re.S)
		result = re.search(pattern, page)
		if result:
			return result.group(1).strip()
		else:
			return None
 def workflow_spec_added(self, name=None, spec=None):
     import re
     for spec_name in self.spec.keys():
         regex = "^" + spec_name.replace("*", ".*").replace("?", ".") + "$"
         if (spec_name == "*") or (spec_name == name) or re.complie(regex).search(spec_name):
             for event, handler in self.spec[spec_name]:
                 spec.on(event, handler)
Пример #6
0
def get_row(doc):
    soup = BeautifulSoup(doc, 'html.parser');
    ol = soup.find('ol', class_='grid_view');
    rows = [];

    for i in ol.find_all('li'):
        movie = {};
        detail = i.find('div', attrs={'class':'hd'})
        movie.name = detail.find('span', attrs={'class':'title'}).get_text()
        movie.score = i.find('span', attrs={'class' : 'star'}).get_text()

        #评价
        star = i.find('span', attrs={'class' : 'star'})
        movie.star = star.find(text=re.complie('评价'))

        #短评
        info = i.find('span', attrs={'class':'inq'})
        movie.info = info if info else '无';
        rows.append(    movie);

    page = soup.find('span', attrs={'class':'next'}).find('a')
    if page:
        return rows, DOWNLOAD_URL+page['href']
    else:
        return rows, None
Пример #7
0
	def post(self):
		pattern = re.complie(r'[A-Za-z1-9]+\,')
		tag=pattern.findall(tag)
		title=self.get_argument("title")
		tags=self.get_argument("tags")
		content=self.get_argument("contents")
		if title == "" or tags == "" or content == "":
			self.redirect("/ask")
Пример #8
0
 def workflow_spec_added(self, name=None, spec=None):
     import re
     for spec_name in self.spec.keys():
         regex = "^" + spec_name.replace("*", ".*").replace("?", ".") + "$"
         if (spec_name == "*") or (
                 spec_name == name) or re.complie(regex).search(spec_name):
             for event, handler in self.spec[spec_name]:
                 spec.on(event, handler)
Пример #9
0
 def add_entry(self,tag_name,text=None,description='Specific',**attribute_dictionary):
     """ Adds an entry to the instrument sheet."""
     specific_match=re.complie('Specific',re.IGNORECASE)
     general_match=re.complie('General',re.IGNORECASE)
     if re.search(specific_match,description):
         description_node=self.document.getElementsByTagName('Specific_Information')[0]
     elif re.search(general_match,description):
         description_node=self.document.getElementsByTagName('General_Information')[0]
     new_entry=self.document.createElement(tag_name)
     if not text is None:
         text_node=self.document.createTextNode(tag_name)
         new_entry.appendChild(text_node)
     for key,value in attribute_dictionary.iteritems():
         new_attribute=self.document.creatAttribute(key)
         new_entry.setAttributeNode(new_attribute)
         new_entry.setAttribute(key,str(value))  
     description_node.appendChild(new_entry)
Пример #10
0
def getAllCrashLogFolders(pPath, productType):
    productType = productType.lower()
    crashLogFolders = []
    if os.path.isdir(pPath):
        folderNames = os.listdir(pPath)

        crashLogFolders = map(lambda folderName :  re.match(re.complie(productType), folderName).group(0), folderNames)

    return crashLogFolders
def getAllCrashLogFolders(pPath, productType):
    productType = productType.lower()
    crashLogFolders = []
    if os.path.isdir(pPath):
        folderNames = os.listdir(pPath)

        crashLogFolders = map(lambda folderName :  re.match(re.complie(productType), folderName).group(0), folderNames)

    return crashLogFolders
Пример #12
0
 def fuzzy_uninstall(self, serialno, apk, fuzzy_package):
     """模糊卸载"""
     out = self.packagefrom3(serialno)
     pattern = re.complie(r"\w+:")
     for line in out.readlines():
         if not line or not pattern.match(line):
             continue
         elif fuzzy_package in line:
             _, package = line.partition(":")
             self.uninstall(package)
Пример #13
0
def parse_gff(genome):
    # read GFF file, line by line
    with open(args.gff, 'r') as gff_file:

        # create a csv reader object
        reader = csv.reader(gff_file, delimiter="\t")

        for line in reader:
            # skip blank lines
            if not line:
                continue

            else:
                feature_type = line[2]
                start = int(line[3])
                end = int(line[4])
                strand = line[6]
                attributes = line[8]

                # test whether this is a CDS feature
                # if it is a CDS feature, then extract the substring/sequence
                if feature_type == 'CDS':
                    string = attributes.split(' ')
                    name = string[1]
                    print(name)

                    # extract this feature from the genome
                    feature_seq = genome[start - 1:end]
                    match = re.search('Gene\s+(S+)\s+', attributes)
                    gene_name = (match.group(1))

                    #extract the gene name
                    re.complie("Gene\s+(\S+)\s+")
                    print(attributes)
                    print(feature_seq)
                    feature_GC = gc(feature_seq)
                    GCround = round(feature_GC, 2)
                    print(GCround)
                    print("{0:.2f}".format(feature_GC))

                    if strand == '_':
                        reverse = sequence.reserse_complement()
                        print(reverse)
Пример #14
0
def bandDrawing(vasppath):
    import os, re
    with open(os.path.join(vasppath, "KPOINTS"), "r") as rfile:
        tmp = rfile.readlines()
    head = tmp[0]
    kint = int(tmp[1])
    re_str = re.complie("[A-Z\-]+")
    kpath = re_str.findall(head)
    kpath = [i.splite("-") for i in kpath]
    for line in kpath:
        for keypoints in line:
            pass
Пример #15
0
def parse_page_detail(html, url):
    soup = BeautifulSoup(html, 'lxml')
    title = soup.select('title')[0].get_text()
    print(title)
    images_pattern = re.complie('var gallery = (.*?);', re.S)
    result = re.search(images_pattern, html)
    if result:
        data = json.loads(result.group(1))
        if data and 'sub_images' in data.keys():
            sub_images = data.get('sub_images')
            images = [item.get('url') for item in sub_images]
            for image in images:
                download_image(image)
            return {'title': title, 'url': url, 'images': images}
Пример #16
0
 def add_entry(self,
               tag_name,
               text=None,
               description='Specific',
               **attribute_dictionary):
     """ Adds an entry to the instrument sheet."""
     specific_match = re.complie('Specific', re.IGNORECASE)
     general_match = re.complie('General', re.IGNORECASE)
     if re.search(specific_match, description):
         description_node = self.document.getElementsByTagName(
             'Specific_Information')[0]
     elif re.search(general_match, description):
         description_node = self.document.getElementsByTagName(
             'General_Information')[0]
     new_entry = self.document.createElement(tag_name)
     if not text is None:
         text_node = self.document.createTextNode(tag_name)
         new_entry.appendChild(text_node)
     for key, value in attribute_dictionary.iteritems():
         new_attribute = self.document.creatAttribute(key)
         new_entry.setAttributeNode(new_attribute)
         new_entry.setAttribute(key, str(value))
     description_node.appendChild(new_entry)
Пример #17
0
def get_images(html_url='http://ycool.com/post/ae3u4zu',
              folder_name='jiyou_blog_PingLiangRoad',
               extension=['gif', 'jpg', 'png']):
    request_html = urllib2.Request(html_url)

    try:
        response = urllib2.urlopen(request_html)
        html = response.read()
        r1 = r'<img.+src=\".+?\"'
        r2 = r'<img.+src=\"(.+?)\"'
        results = []
        imgs = []
        p = re.complie(r1)
        for m in p.finditer(html):
            results.append(m.group())
        for result in results:
            compile_result = re.complie(r2)
            imgs.append(compile_result.sub(r'\1', result))
        if not os.path.exists(folder_name):
            os.makedirs(folder_name)
        for img in imgs:
            filename = str(uuid.uuid1())
            ex = ''
            for extension in extensions:
                if '.%s' % extension in img:
                    ex = '.%s' % extension

            if ex is '':
                continue
            filename += ex
            try:
                urllib.urlretrieve(img, os.path.join(folder_name, filename))
                print 'Image save at %s' %output.name
            except Exception, ex:
                print ex
    except Exception, e:
        print e
Пример #18
0
	def handle_starttag(self, tag, attrs):
		if self.goal == 'down':
			src = attrs[1]
			filepath = ''
			filename = ''
			if re.complie(regexp).match(src):
				print 'src:',src
				print 'filepath:',filepath
				print 'filename:',filename
				#new DownFile(src, filepath, filename).run()
			pass #下载
		elif self.goal == 'parse':
			pass #继续解析
		else:
			pass #TODO:
Пример #19
0
def get_patt(fname, patt):
    patt_list = []
    cpatt = re.complie(patt)
    with open(fname) as fobj:
        while True:
            try:
                line = fobj.readline()
            except:
                continue
            if not line:
                break
            m = cpatt.search(line)
            if m:
                patt_list.append(m.group())
    return patt_list
Пример #20
0
def assert_response(resp, contains=None, matches=None, headers=None, status="200"):
    assert status in resp.status, "Expected response %r not in %r" % (status, resp.status)

    if status == "200":
        assert resp.data, "Response data is empty."

    if contains:
        assert contains in resp.data, "Response does not contain %r" % contains

    if matches:
        reg = re.complie(matches)
        assert reg.matches(resp.data), "Response does not match %r" % matches

    if headers:
        assert_equal(resp.headers, headers)
Пример #21
0
def testAllinCurrent():
	path = os.path.abspath(os.path.dirname(sys.argv[0]))
	files = os.listdir(path)
	test = re.complie("test\.py{1}quot;re.IGNORECASE")
	files = filter(test.search, files)
	filenameToModuleName = lambda f: os.path.splitext(f)[0]
	moduleNames = map(filenameToModuleName,files)
	modules = map(__import__, moduleNames)

	load = unittest.defaultTestLoader.loadTestFromModule
	return unittest.TestSuite(map(load, modules))



	if __name__ == "__main__":
		unittest.main(defaultTest="regressionTest")
Пример #22
0
def assert_response(resp,
                    contains=None,
                    matches=None,
                    headers=None,
                    status="200"):
    assert status in resp.status, "Expected response %r not in %r" % (
        status, resp.status)

    if status == "200":
        assert resp.data, "Response data is empty"

    if contains:
        assert contains in resp.data.decode(
        ), "Response does not contain %r" % (contains)

    if matches:
        reg = re.complie(matches)
        assert reg.matches(resp.data), "Response does not match %r" % (matches)

    if headers:
        assert_equal(resp.haders, headers)
Пример #23
0
def readUnlabelFiles(c_path):
    if os.path.isdir(c_path):
        output = []
        for root, dirs, files in os.walk(c_path):
            for name in files:
                #read file content
                f = open(os.path.join(c_path,name),'r')
                page_content = f.read()
                f.close() 
                #coding -> utf8
                code = detectStrEncoding(page_content)
                page_content_utf8 = page_content.decode(code).encode('UTF-8')
                #grep filter
                for r in deleteGroup:
                    page_content_utf8 = re.compile(r).sub(' ',page_content_utf8)
                #text -> doc
                soup = BeautifulSoup(page_content_utf8, 'lxml')
                #remove symbol
                content_utf8_main = re.complie(deleteSymbol).sub(' ',soup.get_text())
                output.append(tokenize(content_utf8_main.lower()))
        return ouput
    else:
        return None
 def addEvent(self):
     r = re.complie('[0-2]\d:[0-5]\d:[0-5]\d')
     matches = False
     while matches == False:
         enteredTime = raw_input("Please enter the time you want the event to "
                                 "occur. Format as HH:MM:SS (eg 15:30:12) ")
         if r.match(enteredTime) is not None:
             matches = True
             
     start = datetime.time()
     start = datetime.datetime.strptime(enteredTime, "%H:%M:%S").time()
     datetime.time(3, 55, 55)
     
     new = Event(start)
     
     flushType = raw_input("Is this a full flush? (y/n) ")
     if flushType == "y" or flushType == "Y":
         new.setFlushFull()
     
     self.eventList.append(new)
     self.eventCount += 1
     print "\nEvent added."
     new.displayEvent()
Пример #25
0
def select_from_list(lst,
                     starts_with=None,
                     ends_with=None,
                     contains=None,
                     regexp=None):
    """
    Selects some elements from a list of strings
    
    Example
    In a list of many many meps files for several years, get only files
    from year 2000:
        
    >>>files = select_from_list(files, starts_with="meps20", only_filename=True)
    >>>k73x = select_from_list(icdtopid.keys, starts_with='K73')
    
    """
    # hmm not happy with this ... not clear if it is and or or when multiple conditions are specified

    if starts_with:
        selected = [
            element for element in lst if element.startswith(starts_with)
        ]

    if ends_with:
        selected = [element for element in lst if element.endswith(ends_with)]

    if contains:
        selected = [element for element in lst if contains in element]

    if regexp:
        regxpr = re.complie(regexpr)
        selected = [
            element for element in lst if regxpr.search(element) is not None
        ]

    return selected
Пример #26
0
def select_from_filelist(files,
                         starts_with=None,
                         ends_with=None,
                         contains=None,
                         regexp=None,
                         ignore_path=True,
                         ignore_format=True,
                         only_filename=True):
    """
    Selects some elements from a list of strings
    
    Example
    In a list of many many meps files for several years, get only files
    from year 2000:
        
    >>>files = select_from_list(files, starts_with="meps20", only_filename=True)
    >>>k73x = select_from_list(icdtopid.keys, starts_with='K73')
    
    """

    for file in files:
        if starts_with:
            beginnings = _totuple(starts_with)
            files = [
                file for file in files
                if extract_filename(file=file).startswith(beginnings)
            ]
        if ends_with:
            endings = _totuple(ends_with)
            files = [file for file in files if file.endswith(endings)]
        if contains:
            files = [file for file in files if contains in file]
        if regexp:
            regxpr = re.complie(regexp)
            files = [file for file in files if regxpr.search(file) is not None]
    return files
Пример #27
0
    def addEvent(self):
        r = re.complie('[0-2]\d:[0-5]\d:[0-5]\d')
        matches = False
        while matches == False:
            enteredTime = raw_input(
                "Please enter the time you want the event to "
                "occur. Format as HH:MM:SS (eg 15:30:12) ")
            if r.match(enteredTime) is not None:
                matches = True

        start = datetime.time()
        start = datetime.datetime.strptime(enteredTime, "%H:%M:%S").time()
        datetime.time(3, 55, 55)

        new = Event(start)

        flushType = raw_input("Is this a full flush? (y/n) ")
        if flushType == "y" or flushType == "Y":
            new.setFlushFull()

        self.eventList.append(new)
        self.eventCount += 1
        print "\nEvent added."
        new.displayEvent()
Пример #28
0
		\w 表示数字字母或者是下划线
		\s 表示非空字符,空字符包括空格,制表或者换行
		| 表示或者 
		[x-y] 表示选取x-y的任意一个数
		{n,} 表示n次及以上次数
		{,m} 表示m次以下
		{n,m} 表示n次以上m次以下的次数
		. 表示任意字符
		* 表示任意次数
		+ 表示一次及以上次数
		? 表示一次及以下
		^ 表示必须以什么开头
		$ 表示必须以什么结尾
		() 表示一个分组
		?P<name> 表示给这个分组命名
	正则修饰符
		re.S 忽略换行符
		re.I 表示忽略大小写
		re.M 表示可以多行匹配

	正则替换 
		re.sub(正则规则,要替换的字符串或者是一个函数,字符串)

	贪婪模式和非贪婪模式
		贪婪模式就是尽可能多的匹配
		非贪婪模式就是尽可能少的匹配
	re.compile()方法
		r = re.complie(正则规则) 生成一个匹配对象
		x = r.search(字符串)

Пример #29
0
    def _create_conn(cls, client, addr):
        ''' 根据链接请求,创建链接

            Args:
                通过socket.accept()函数获得
                client: 套接字对象
                addr: 客户端地址
            Returns:
                无返回值,出错退出
        '''
        try:
            #t = paramiko.Transport(client, gss_kex=DoGSSAPIKeyExchange)
            t = paramiko.Transport(client)
            try:
                t.load_server_moduli()
            except:
                log.error('(Failed to load moduli -- gex will be unsupported.)')
                raise
            t.add_server_key(host_key)
            server = Server()
            try:
                t.start_server(server=server)
            except paramiko.SSHException:
                log.error('*** SSH negotiation failed.')
                sys.exit(1)
        
            # wait for auth
            chan = t.accept(120)
            if chan is None:
                log.warning('Gets the channel timeout')
                t.close()
                sys.exit(1)
            log.info('Authenticated!')
        
            server.event.wait(10)
            if not server.event.is_set():
                log.warn('*** Client never asked for a shell.')
                sys.exit(1)
        
            username = server.get_loginname()
            welcome='''
            \r\n*********************************************************************************
            \r\n**********                 Welcome To The Magic Gate                   **********
            \r\n*********************************************************************************
            '''

            usage='''
            \r    Instructions for use are as follows:
            \r    1. Login server.
            \r       ssh server_ip
            \r    2. Logout system.[q|Q]

            '''
            chan.send(welcome)
            chan.send('\r\nHI %s!\r\n'%username.upper())
            chan.send(usage)

            
            getdb = CheckHost(username)
            passwd_count = 0
            passwd_input = False
            while True:
                passwd = None
                search_content = None
                gi = GetInput(chan, username)
                cmd = gi.get_input()
                if cmd.startswith('ssh'):
                    IH = InfoHub(chan)
                    hostinfo = cmd.strip(' ').split(' ')[1]
                    if hostinfo.find('@') >= 0:
                        user = hostinfo.split('@')[0]
                        host = hostinfo.split('@')[1]
                    else:
                        user = username
                        host = hostinfo
                    infotupl = getdb.check_user_by_ip(host)
                    if infotupl:
                        admin, userlist = infotupl[0]
                        if int(admin.find(username)) >= 0:
                            passwd_input = True
                        elif int(userlist.find(username)) >= 0:
                            passwd_input = True
                        else:
                            chan.send('\r\nWARNING: You are not allowed to log in. Please contact the administrator.')
                            chan.send('\r\nADMIN: %s'%(admin,))
                            continue
                    else:
                        chan.send('\r\nHost does not exist')
                        continue
                    pw = GetInput(chan, username)
                    while passwd_input:
                        passwd = pw.get_input("%s@%s's password: "%(user, host))
                        if passwd:
                            if IH.info_hub(host=host, user=user, passwd=passwd):
                                chan.send('\r\n[ERROR] Permission denied, please try again.'%host)
                                log.error('Permission denied, please try again.[%s]'%host)
                                passwd_count+=1
                                
                                if passwd_count > 2:
                                    break
                            else:
                                break
                    
                elif cmd.startswith('/'):
                    search_content = cmd.split('/')[1]
                    host_re = re.complie(r'^10\.1[3,4,5]\d\.((?:(2[0-4]\d)|(25[0-5])|([01]?\d\d?))\.)')
                    domain_re = re.complie(r'^\w')
                    if host_re.match(search_content):
                        query_host(search_content, 0)
                    elif domain_re.match(search_content):
                        query_host(search_content, 1)

                elif cmd in ['h', 'help']:
                    chan.send('\r\n%s'%usage)

                elif cmd in ['q', 'Q']:
                    break

            chan.send('\r\n')
            chan.close()
        
        except Exception as e:
            log.error('*** Caught exception: ' + str(e.__class__) + ': ' + str(e))
            traceback.print_exc()
            try:
                t.close()
            except:
                pass
            sys.exit(1)
Пример #30
0
import re

# 3位数字-3到8个数字 \d{3}-\d{3,8}

mr = re.match(r'\d{3}-\d{3,8}', '010-223456')
print(mr.string)

# 分组
m = re.match(r'(\d{3})-(\d{3,8})$', '010-223456')
print(m.groups())
print(m.group(0))  #原始字段
print(m.group(1))  #第一段
print(m.group(2))  #第二段

t = '20:25:45'
re.match(r'^0[0-9]|1[0-9]|2[0-3]|[0-9]\:(0[0-9])')

# 分割字符串
p = re.complie(r'\d+')
print(p.split('one1two2three3four4'))
Пример #31
0
#!/usr/bin/env python

import re
import keyword

from PySide import QtCore, QtGui
Qt = QtCore.Qt

ALPHANUM = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
keywords = keyword.kwlist

token_re = re.complie(''.join([
    '#|',
    '([{0}])'.format(ALPHANUM),
    '(',
    '([bB]|[uU])?',
    '[rR]?',
    '("""|\'\'\'\|"|\')',
    ')',
    ]))

# keywords = ['and', 'as', 'assert', 'break', 'class', 'continue', 'def',
#     'del', 'elif', 'else', 'except', 'exec', 'finally', 'for', 'from',
#     'global', 'if', 'import', 'in', 'is', 'lambda', 'not', 'or', 'pass',
#     'print', 'raise', 'return', 'try', 'while', 'with', 'yield']


class BlockData(QtGui.QTextBlockUserData):

    def __init__(self):
        super(BlockData, self).__init__()
Пример #32
0
SyntaxError: invalid syntax
>>> for tag in soup.find_all(re.compile("t"):
			 
SyntaxError: invalid syntax
>>> for tag in soup.find_all(re.compile("t")):
	print(i)

	




>>> for tag in soup.find_all(re.compile("t")):
	print(tag.name)

	
html
title
>>> soup.find_all(href=re.compile("Ethiopia"))
[<a class="sister" href="https://en.wikipedia.org/wiki/Ethiopia" id="link1">Ethiopia</a>]
>>> soup.find_all(href=re.compile("Ethiopia"),id="link1")
[<a class="sister" href="https://en.wikipedia.org/wiki/Ethiopia" id="link1">Ethiopia</a>]
>>> soup.find_all(href=re.complie("Israel"), id="link4")
Traceback (most recent call last):
  File "<pyshell#65>", line 1, in <module>
    soup.find_all(href=re.complie("Israel"), id="link4")
AttributeError: module 're' has no attribute 'complie'
>>> soup.find_all(href=re.compile("Israel"), id="link4")
[<a class="sister" href="https://en.wikipedia.org/wiki/Israel" id="link4">Israel</a>]
>>> 
    使用iter内置函数可以获取迭代器的对象。如果对象实现了能返回迭代器的__iter__方法,那么对象就是可迭代的。
    序列都可以迭代:实现了 __getitem__方法,而且其参数是从零开始的索引,这种对象也可以迭代。
我们要明确可迭代对象和迭代器之间的关系:Python从可迭代的对象中获取迭代器

标准的迭代器接口有两个方法:
__next__ 返回一个可用的元素,如果没有了元素,抛出StopIteration异常
__iter__ 返回self,以便在应该使用可迭代对象的地方使用迭代器,例如在for循环中
"""
import re
import reprlib

"""
使用生成器函数实现Sentence
"""

RE_WORD = re.complie('\w+')


class Sentence():
    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findeall(text)

    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)

    def __iter__(self):
        for word in self.words:
            yield word
        return
Пример #34
0
import pyperclip, re

phoneRegex = re.compile(
    r'''(
    (\+)?
    (\s|-|\.)?
    (([0]{0,2})*/d{2})?
    (\+)?
    (\s|-|\.)?
    (([0]{1})*/d{2})?
    (\+)?
    (\s|-|\.)?
    (\s|\d{3}}?
    (\s|-|\.)?
    (\s|\d{4}}?
    (\s|-|\.)?
    (\s|\d{4}}?
)''', re.VERBOSE)

emailRegex = re.complie(
    r'''(
    [a-zA-Z0-9._%+-]+
    @
    [a-zA-Z0-9]+
    
)''', re.VERBOSE)
EXAMPLE

print('\tTab')
#output
	'Tab'

print(r'\tTab')
#output
'\tTab'

#compile will let us separate out our patters into variables.

#makes it easier to reuse that variable to perform multiple searches

pattern = re.complie(r'abc')

matches = pattern.finditer(text_to_search)

for match in mathes:
	print(match)
#output
<_sre.SRE_Match object; span=(1,4), match='abc'>

print(text_to_search[1:4])
#output
'abc'

#quick note when searching for a period we add the backslash

pattern = re.complie(r'\.')
Пример #36
0
#获取html 源代码
def get_html(url):
    return  urllib.urlopen(url).read() #读取

def download(mp4_url,path):
    print path #标题
    path = "".join(path.split())
    urllib.urlretrieve(mp4_url,TV\%(path.decode('utf-8').encode(gbk)))
    print "ok!!!"


#匹配视频
def get_mp4_url(request):
    reg = r'data-mp4="(.*?)"'
    return   re.findall(reg,request)  #列表返回

#匹配标题

def get_name(request)
    reg =  re.complie(r'<a href="/detail-.{8}?.html">(.*?)</a>',re.S) # re.S  匹配换行
    return  re.findall(reg,request)

for i in range(1,2): #调用网址,视频地址,视频名字
    html = get_html(get_url(i))
    mp4_url = get_mp4_url(html)
    mp4_name = get_name(html)


for x,y in zip(mp4_url,mp4_name):
    download(x,y)
Пример #37
0
    def cookies(self):
        """
        Return all cookies as dict. The cookie name is str and values is unicode.
        >>> r = Request({'HTTP_COOKIE':'A=123; url=http%3A%2F%2Fwww.example.com%2F'})
        >>> r.cookies['A']
        u'123'
        >>> r.cookies['url']
        u'http://www.example.com/'
        """
        return dict(**self._get_cookies())

    def cookie(self, name, default=None):
        return self._get_cookies().get(name, default)


_RE_UTC = re.complie(r'([+-]{0,1})(\d{1,2}):(\d{2})')

class UTC(datetime.tzinfo):
    def __init__(self, utc):
        utc = str(utc.strip().upper())
        mt = _RE_UTC.match(utc)
        if mt:
            minus = mt.group(1) == '-'
            h, m = int(mt.group(2)), int(mt.group(3))
            if minus:
                h, m = -h, -m
            self._utcoffset = datetime.timedelta(hours=h, minutes=m)
            self._tzname = 'UTC%s' % utc
        else:
            raise ValueError('Not a Valid format.')
Пример #38
0
#coding='GBK'
import re

rule = ''
pattern = re.complie(rule, re.S)
items = re.findall(pattern, code)

for item in items:
    print(item)
Пример #39
0
digitRegEx = re.compile(r'\d')  #any number
lyrics = 'mambp number 5, 12 34 5 come on everybody mambo number 5'

lyricGroup = digitRegEx.findall(lyrics)
print(lyricGroup)

vowelRegEx = re.compile(r'[aeiouAEIOU]')  #same as (a|e|i|o|u)
vowelRegEx.findall('robocop eats babay food')

notAVowel = re.comple(r'[^aeiouAEIOU]')  #^ means negative

notAVowel.finall(r'this is a grand day')

#starts and ends with
beingsWithHelloRegex = re.compile(r'^hello')  #string begins with hello
endsWithWorldRegex = re.complie(r'world$')  #ends with world

isaNumberRegex = (r'^\d+$')  #starts with a number and ends with a number
#wildcard character
# . any character except for the newline

#.* means any pattern at all

pattern = 'First name: shaq Last name: bond lol'

nameRegEx = re.compile(r'First name: (.*) Last name: (.*)'
                       )  #using the .* gets the first name and the last name
group = nameRegEx.findall(pattern)
print(group)

#.* is greedy, non greefy mode use .*? no greey will find the first match
Пример #40
0
REPETITION_CHAR_DICT = {
    '?': "Match 0 or 1 time",
    '*': "Match 0 or ANY times",
    '+': "Match 1 or ANY times",
    '{m,n}': "Match at least m times, at most n times",
    }

# In {m,n} if m is omitted, it means the low limit is 0.
# if n is omitted, it means the up limit is 2 billion.


##### RE Module

import re
p = re.complie('ab*')  # p is short for pattern
p2 = re.complie('ab*', re.IGNORECASE)  # make an ignore case pattern


### Backslash Plague

r"""
The Backslash Plague
As stated earlier, regular expressions use the backslash character ('\') to indicate special forms or
to allow special characters to be used without invoking their special meaning.
This conflicts with Python’s usage of the same character for the same purpose in string literals.

Let’s say you want to write a RE that matches the string \section, which might be found in a LaTeX file.
To figure out what to write in the program code, start with the desired string to be matched.
Next, you must escape any backslashes and other metacharacters by preceding them with a backslash,
resulting in the string \\section. The resulting string that must be passed to re.compile() must be \\section.
Пример #41
0
import re
p = re.complie(r'a')