def dup2ins( self, chgvs ): if not(chgvs is not None and re.search(r'dup',chgvs)): sys.stderr.write("cHGVS is not 'dup'\n") return dup1, dup2 = "", "" m=re.serach(r'c\.([\*\-]?)(\d+)([\+\-]\d+)?_([\*\-]?)(\d+)([\+\-]\d+)?dup(.*)',chgvs) m2=re.search(r'c\.([\*\-]?)(\d+)([\+\-]?)(\d+?)dup(.*)',chgvs) if m: c1 = m.group(1) if m.group(1) is not None else "" c2 = m.group(2) if m.group(2) is not None else "" c3,c4 = "","" tf =None if m.group(3) is not None: tf=m.group(3) c5 = m.group(4) if m.group(4) is not None else "" c6 = m.group(5) if m.group(5) is not None else "" c7,c8="","" tb =None if m.group(6) is not None: tb =m.group(6) c9 = m.group(7) if m.group(7) is not None else "" m=re.search(r'([\+\-])(\d+)',tf) if tf is not None and not re.search(r'^\s*$',tf) and m: c3 = m.group(1) if m.group(1) is not None else "" c4 = m.group(2) if m.group(2) is not None else "" m=re.search(r'([\+\-])(\d+)',tb) if tb is not None or not re.search(r'^\s*$',tb) and m: c7 = m.group(1) if m.group(1) is not None else "" c8 = m.group(2) if m.group(2) is not None else "" if not re.search(r'^\s*$',c4): t = "{}{}".format(c3 ,int(c4)-1) if int(c4)-1==0: t = "" dup1 = "{}{}{}{}{}{}{}{}{}{}{}".format("c\.",c1,c2,t,"\_",c1,c2,c3,c4,"ins",c9) else: t = int(c2) - 1 dup1 = "{}{}{}{}{}{}{}{}".format("c\." , c1 , t , "\_" , c1 , c2 , "ins" , c9) if not re.search(r'^\s*$',c8): dup2 ="{}{}{}{}{}{}{}{}{}{}{}{}".format("c\.",c5,c6,c7,c8 , "\_",c5,c6,c7,int(c8) + 1 ,"ins",c9) else: dup2 = "{}{}{}{}{}{}{}{}".format("c\." , c5 , c6 , "\_" , c5 , int(c6) + 1 , "ins" , c9) elif m2: c1 = m2.group(1) if m2.group(1) is not None else "" c2 = m2.group(2) if m2.group(2) is not None else "" c3 = m2.group(3) if m2.group(3) is not None else "" c4 = m2.group(4) if m2.group(4) is not None else "" c5 = m2.group(5) if m2.group(5) is not None else "" if not re.search(r'^\s*$',c4): t = "{}{}".format(c3,int(c4)-1) if int(c4)-1==0: t = "" dup1 ="{}{}{}{}{}{}{}{}{}{}{}".fortmat("c\.",c1,c2,t,"\_",c1,c2,c3,c4,"ins",c5) dup2 ="{}{}{}{}{}{}{}{}{}{}{}{}".format("c\.",c1,c2,c3,c4,"\_",c1,c2,c3,int(c4) + 1,"ins",c5) else: t = int(c2) - 1 if t == 0: t = "" dup1 = "{}{}{}{}{}{}{}{}".format("c\." ,c1 ,t , "\_" ,c1 ,c2 , "ins" ,c5) dup2 = "{}{}{}{}{}{}{}{}".format("c\." ,c1 ,c2 , "\_" ,c1, int(c2) + 1 , "ins" ,c5) return [ dup1, dup2 ]
def process_rec(rec): if re.search(r'^\s*$',rec): return rec=rec.strip("\r\n") keya ="id mutation_name pmid disease pred".split() cols = rec.split('\t') qresult=dict() mutid, geneid, gene, nm, chgvs, \ phgvs, disease, pmid, pred = cols[3:12] rchr, rstart, rend=cols[:3] if nm is not None and not re.search(r'^\s*$',nm) and nm !=".": nm=nm else: nm="" if gene is not None and not re.search(r'^\s*$',gene) and gene !=".": gene="("+str(gene)+")" else: gene="" if chgvs is not None and not re.search(r'^\s*$',chgvs) and chgvs !=".": chgvs=chgvs else: chgvs="" if phgvs is not None and not re.search(r'^\s*$',phgvs) and phgvs !=".": phgvs=" ("+str(phgvs)+")" else: phgvs="" mutname = "{}{}{}{}{}".format(nm,gene,": ",chgvs,phgvs) qresult=dict(zip(keya,[mutid, mutname, pmid, disease, pred])) if inchgvs is not None and inchgvs== chgvs: returns.append(qresult) elif inchgvs is not None \ and re.search(r'dup',inchgvs) \ and not re.serach(r'del',inchgvs) \ and not re.serach(r'^\s*$',chgvs) \ and re.serach(r'ins',chgvs) \ and not re.serach(r'del',chgvs): tempc = inchgvs ins1, ins2 = "", "" dup2ins=self.dup2ins(inchgvs) if dup2ins is not None and len(dup2ins)==2: ins1, ins2 = dup2ins[:2] if (ins1 is not None and not re.search(r'^\s*$',ins1) and ins1==chgvs) \ or (not re.search(r'^\s*$',ins2) and ins2 == chgvs): returns.append(qresult) elif not(inchgvs is not None or inphgvs is not None): returns.append(qresult)
def login(self, site): """Perform authentication steps""" self.site = site # check if http(s) is prepended (not found < 0) if site.find('://') < 0: self.tenantBaseURL = re.search(r"([^/]+)", site).group(0) else: # http(s) exists self.tenantBaseURL = re.serach(r'/{2}([^/]+)', site).group(1) self.tenantBaseURL.find self._get_token() self._get_cookie() self._get_digest()
def analize_html(url, root_url): savepath = download_file(url) if savepath is None: return if savepath in proc_files: return proc_files[savepath] = True print("analize_html=", url) html = open(savepath, "r", encoding="utf-8").read() links = enum = links(html, url) for link_url in links: if link_url.find(root_url) != 0: continue if re.serach(r".(html|htm)$", link_url): analize_html(link_url, root_url) continue download_file(link_url)
def Report_Period(title): ''' 根据公告标题获取报告期''' pattern = re.compile(r'2\d{3}') year = pattern.search(title).group() if re.search(r'年半年度(报告|财务报表)', title): return str(year) + '0630' elif re.search(r'年年度(报告|财务报表)', title): return str(year) + '1231' elif re.search(r'年一季度(报告|财务报表)', title): return str(year) + '0331' elif re.serach(r'年三季度(报告|财务报表)', title): return str(year) + '0930' else: None
# Regular expressions import re hand = open('mbox-short') for line in hand: line = line.rstrip() if re.serach('^From:', line): print(line)
# The program inputs an email address and returns the username # and domain name import re user_email_address = input("Please input your email address: ") pattern = r'^(/w)($@)(/w)($.)([a-z])' if re.serach(user_email_address, pattern): pass else: print("Invaild email address") list1 = user_email_address.split("@") list2 = list1[1].split(".") print(f"The username is {list1[0]}") print(f"The domain name is {list2[0]}")
def search_line_regex(pattern, line): return re.serach(pattern, line)
#!/usr/bin/env python3 import sys import re import ro import os if len(sys.argv) !=2 : print("usage:",sys.argv[0],"a|p|ap|pa") sys.exit(1) res=re.serach("^(-a|-p|-ap|-pa!$",sys.arg[1]) if not res : print() sys.exit(1) b=os.path.expanduser("~/.bashrc") if os.path.isfile(b): f=open(b,"r") else: print("bashrc .....") sys.exit(2) alias l=ls -ali if re.serach('a',sys.argv[1]): ligne=f.readline() while '' != ligne : r=re.serarch("alias (.*)=(.*)
import re pattern = "do you remember .*" message = "do you remember when you ate strawberries in the garden" match = re.search(pattern, message) if match: print("string matches") pattern = "if (.*)" message = "what would you do if bots took over the world" match = re.serach(pattern, message) match.group(0) # whole string match.group(1) # first match ... # grammatical transformation def swap_pronouns(phrase): if 'I' in phrase: return re.sub('I', 'you', phrase) if 'my' in phrase: return re.sub('my', 'your', phrase) else: return phrase swap_pronouns("I walk my dog") # "you walk your dog" pattern = "do you remember (.*)"
] print(propernouns) public_tweets = api.search(q=news_query, lang='en', show_user='******', rpp=100) ls = [] data = [None] * 2 data[0] = news_query if (len(propernouns) > 0): for tweet in public_tweets: count = 0 count1 = 0 for k in propernouns: result1 = re.serach(k, tweet.text) if result1 != None: count1 = count1 + 1 if (count1 == len(propernouns)): for j in news_keywords: result = re.search(j, tweet.text) if result != None: count = count + 1 Match_percent = (count / len(news_keywords)) ls.append(Match_percent) with open('Learned.csv', 'a', encoding='UTF-8') as w: writer = csv.writer(w) data[1] = np.mean(ls) print(data[1]) writer.writerow(data) w.close()
def people_info(string): global info;global ID; ID=None;nickname=None; if re.search('ID暱稱',string)!=None: ob=re.search('《ID暱稱》(?p<id>\w*).*\((?P<nick>.*)\).*《',string) ID=ob.group(1) nickname=ob.group(2) if re.serach('經濟狀況',string)!=None: ob=re.search('《經濟狀況》(?P<f>\S*)',string) economic=ob.group(1) if re.serach('登入次數',string)!=None: ob=re.search('《登入次數》(?P<f>\d*)',string) log_num=ob.group(1) if re.serach('有效文章',string)!=None: ob=re.search('《有效文章》(?P<f>\d*)',string) article_num=ob.group(1) if re.serach('\(退:',string)!=None: ob=re.search('《經濟狀況》(?P<f>\S*)',string) bad_arcitle=ob.group(1) ''' temp_string=re.search('《ID暱稱》.*《經濟狀況》',string).group(0) temp_string=re.sub('《經濟狀況》','',temp_string) temp_string=re.sub('《ID暱稱》','',temp_string) nickname=re.search('\((?P<nick>.*)\)',temp_string).group(1) #print(nickname) temp_string=re.sub('\(.*\)','',temp_string) ID=temp_string.split()[0] #string_list=string.split() #ID=re.sub('《ID暱稱》','',string_list[0]) string=re.sub('《ID暱稱》.*《','',string) string_list=string.split();index=0; for test in string_list: print(test) ''' ''' for i in range(1,len(string_list)): if re.search('經濟狀況》',string_list[i]): index=i; break; economic=re.sub('經濟狀況》','',string_list[index]) log_num=re.sub('《登入次數》','',string_list[index+1]) article_num=re.sub('《有效文章》','',string_list[index+4]) bad_article=re.sub('\(退:','',string_list[index+6]);bad_article=re.sub('\)','',bad_article); current_state=re.sub('《目前動態》','',string_list[index+7]) mail=re.sub('《私人信箱》','',string_list[index+8]) last_log=re.sub('《上次上站》','',string_list[index+9]);last_log=last_log+" "+string_list[index+10]+" "+string_list[index+11]; last_ip=re.sub('《上次故鄉》','',string_list[index+12]) five_in_a_Row=string_list[index+16]+string_list[index+17]+string_list[index+18]+string_list[index+19]+string_list[index+20]+string_list[index+21] chinese_chess=string_list[index+23]+string_list[index+24]+string_list[index+25]+string_list[index+26]+string_list[index+27]+string_list[index+28] signature_line="" ''' for i in range(index+29,len(string_list)): signature_line=signature_line+" "+string_list[i] info= ID+'\t'+nickname+'\t'+economic+'\t'+log_num+'\t'+article_num+'\t'+bad_article+'\t'+current_state+'\t'+mail+'\t'+last_log+'\t'+last_ip+'\t'+five_in_a_Row+'\t'+chinese_chess+'\t'+signature_line+'\n'