def MAIN(murl): if 'TV' in murl: main.addDir('Movies','MOVIES',451,art+'/shush.png') link=main.OPENURL('http://www.shush.se/index.php?shows') link=link.replace('\r','').replace('\n','').replace('\t','').replace(' ','').replace('»','') match=re.compile('(?sim)class="shows"><a href="([^"]+)"><img src="([^"]+)" alt="Watch (.+?) online').findall(link) for url,thumb,name in match: main.addDirT(name.title(),'http://www.shush.se/'+url,452,thumb,'','','','','') else: main.addDir('TV','TV',451,art+'/shush.png') link=main.OPENURL('http://www.shush.se/index.php?movies') link=link.replace('\r','').replace('\n','').replace('\t','').replace(' ','').replace('»','') match=re.compile('(?sim)class="shows"><a href="([^"]+)"><img src="([^"]+)" alt="([^"]+)" title=').findall(link) dialogWait = xbmcgui.DialogProgress() ret = dialogWait.create('Please wait until Movie list is cached.') totalLinks = len(match) loadedLinks = 0 remaining_display = 'Movies loaded :: [B]'+str(loadedLinks)+' / '+str(totalLinks)+'[/B].' dialogWait.update(0,'[B]Will load instantly from now on[/B]',remaining_display) xbmc.executebuiltin("XBMC.Dialog.Close(busydialog,true)") for url,thumb,name in match: main.addPlayM(name.title(),'http://www.shush.se/'+url,453,thumb,'','','','','') loadedLinks = loadedLinks + 1 percent = (loadedLinks * 100)/totalLinks remaining_display = 'Movies loaded :: [B]'+str(loadedLinks)+' / '+str(totalLinks)+'[/B].' dialogWait.update(percent,'[B]Will load instantly from now on[/B]',remaining_display) if (dialogWait.iscanceled()): return False dialogWait.close() del dialogWait
def __parse_positions(self, var): listPattern = re.compile("\[[^\[]+?\]") tagPattern = re.compile("<.+?>") betweenTagPattern = re.compile(">(.+?)<") numberPattern = re.compile("-?\d+\.?\d*") stringPattern = re.compile('".*?[^\\\\]"') positions = [] columns = ("pid", "date", "stock", "percentage", "shares", "notes") for text in listPattern.findall(var): data = stringPattern.findall(text[1:-1]) stock = betweenTagPattern.findall(data[0])[0] if self.user == list_user: percentage = shares = "NULL" notes = tagPattern.sub(" ", data[-1][1:-1]) else: comments = tagPattern.split(data[-1][1:-1]) try: percentage = float(numberPattern.findall(comments[0])[0]) except: percentage = 0 try: shares = float(numberPattern.findall(comments[1])[0]) except: shares = 0 try: notes = comments[2] except: notes = "" positions.append( dict(zip(columns, (self.id, self.now, stock, percentage, shares, notes.encode("ascii", "ignore")))) ) return positions
def get_user_info(self, uid): """ 获取用户基本信息 :param uid: 用户id :return: 用户基本信息 """ user_info_url = 'http://weibo.cn/%s/info' % uid user_info_page = self.get_page(user_info_url) sex_pattern = re.compile('性别:(.*?)<br/>') area_pattern = re.compile('地区:(.*?)<br/>') birth_pattern = re.compile('生日:(\d*?)-.*?<br/>') sex = re.search(sex_pattern, user_info_page) area = re.search(area_pattern, user_info_page) birth = re.search(birth_pattern, user_info_page) if sex: sex = sex.group(1) if area: area = area.group(1) if birth: birth = birth.group(1) if int(birth) != 0001: # 将年龄为微博默认设置的用户过滤 info = {'性别': sex, '地区': area, '年龄': 2016-int(birth)} return info info = {'性别': sex, '地区': area, '年龄': None} return info
def makeconfig(infp, outfp, modules, with_ifdef=0): m1 = re.compile('-- ADDMODULE MARKER 1 --') m2 = re.compile('-- ADDMODULE MARKER 2 --') while 1: line = infp.readline() if not line: break outfp.write(line) if m1 and m1.search(line): m1 = None for mod in modules: if mod in never: continue if with_ifdef: outfp.write("#ifndef init%s\n"%mod) outfp.write('extern void init%s(void);\n' % mod) if with_ifdef: outfp.write("#endif\n") elif m2 and m2.search(line): m2 = None for mod in modules: if mod in never: continue outfp.write('\t{"%s", init%s},\n' % (mod, mod)) if m1: sys.stderr.write('MARKER 1 never found\n') elif m2: sys.stderr.write('MARKER 2 never found\n')
def acquire(self, testname, buf, status, command): # record failures based on exit status if status: self.failures.append("Exit %s: %s" % (status, command)) # scan test log for magical tokens # see also: http://hg.mozilla.org/automation/logparser/ passre = re.compile("^TEST-(PASS|EXPECTED-FAIL).*") failre = re.compile("^TEST-UNEXPECTED-.*") tback = re.compile("^Traceback.*") excpt = re.compile("^Exception:.*") self.text[testname] = [] for line in buf: print line if passre.match(line): self.passes.append(line) elif failre.match(line): self.failures.append(line) elif tback.match(line): self.failures.append(line) elif excpt.match(line): self.failures.append(line) else: self.info.append(line) self.text[testname].append(line)
def check_easyblocks_for_environment(home): """ check whether os.putenv or os.environ[]= is used inside easyblocks """ files = glob.glob(os.path.join(home, 'easybuild/easyblocks/[a-z]/*.py')) eb_files = filter(lambda x: os.path.basename(x) != '__init__.py', files) os_env_re = re.compile(r"os\.environ\[\w+\]\s*=\s*") os_putenv_re = re.compile(r"os\.putenv") found = [] for eb_file in eb_files: f = open(eb_file, "r") text = f.read() f.close() if os_putenv_re.search(text) or os_env_re.search(text): found.append(eb_file) for faulty in found: warning("found os.environ or os.putenv inside eb_file: %s" % faulty) if found: warning("Only easybuild.tools.environment.set should be used for setting environment variables.") return len(found) == 0
def catchweibo(): c = 3362 # c是爬虫起始页 for i in range(6906): pn = (i+c) url = 'http://weibo.cn/1767797335/profile?filter=0&page='+str(pn) #上面地址是你要爬的人的微薄url,用weibo.cn是因为那个地方进去访问限制少 print url req = urllib2.Request(url) req.add_header("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36") req.add_header("Cookie", "_T_WM=edf4469bb5245a50aa32006460daa5ae; _T_WL=1; _WEIBO_UID=5638019231; SUB=_2A254gp9aDeTxGeNI6FoR8SfOyD2IHXVbjCESrDV6PUJbrdAKLXOnkW1HSRVVWhAfa6SQUOfsMJvV5z1nWg..; gsid_CTandWM=4u3Fdd4a1W8HT0Rlp91lUnEHN3J") #上面这行修改自己的cookie,每个cookie大概能爬1000页左右,如果只有一个帐号就隔一个小时之后再爬 try: res = urllib2.urlopen(req) print 'ok1' except: print 'error open' continue html = res.read() print html reg1 = re.compile(r'(<div class="c" id="M_[\d\D]*?)<div class="s"></div>') reg2 = re.compile(r'<span class="ct">(.*?) ') yuanchuang = reg1.findall(html) # atime = reg2.findall(html) if not yuanchuang: print 'reg none' c = c-1 continue for j in range(0, len(yuanchuang)): print len(yuanchuang) print yuanchuang[j] print '\n' fout.write(yuanchuang[j]+'\n'+'\n<br><br>')
def listsItems(self, url): query_data = { 'url': url, 'use_host': True, 'host': HOST, 'use_cookie': True, 'save_cookie': True, 'load_cookie': False, 'cookiefile': self.COOKIEFILE, 'use_post': False, 'return_data': True } link = self.cm.getURLRequestData(query_data) HEADER = {'Accept-Language': 'pl,en-US;q=0.7,en;q=0.3', 'Referer': url, 'User-Agent': HOST, 'X-Requested-With':'XMLHttpRequest', 'Content-Type:': 'application/json'} #http://www.cda.pl/tick.php?ts=1443133845 #query_data2 = { 'url': url, 'use_host': True, 'host': HOST, 'use_header': True, 'header': HEADER, # 'use_cookie': True, 'save_cookie': False, 'load_cookie': True, # 'cookiefile': self.COOKIEFILE, 'use_post': True, 'return_data': True } #link = self.cm.getURLRequestData(query_data2) #print("Link", link) match = re.compile('<label(.*?)>(.*?)</label>', re.DOTALL).findall(link) if len(match) > 0: for i in range(len(match)): match1 = re.compile('<img height="90" width="120" src="(.*?)" (.*?)>(.*?)<span class="timeElem">(.*?)</span>(.*?)</a>(.*?)<a class="titleElem" href="(.*?)">(.*?)</a>', re.DOTALL).findall(match[i][1]) if len(match1) > 0: self.add('cdapl', 'playSelectedMovie', 'None', self.cm.html_special_chars(match1[0][7]) + ' - '+ match1[0][3].strip(), match1[0][0], mainUrlb+match1[0][6], 'aaaa', 'None', False, False) else: match2 = re.compile('<div class="block upload" id="dodane_video">(.*?)<div class="paginationControl">', re.DOTALL).findall(link) match3 = re.compile('<div class="videoElem">\n <a href="(.*?)" style="position:relative;width:120px;height:90px" title="(.*?)">\n <img width="120" height="90" src="(.*?)" title="(.*?)" alt="(.*?)" />\n ', re.DOTALL).findall(match2[0]) if len(match3) > 0: for i in range(len(match3)): self.add('cdapl', 'playSelectedMovie', 'None', self.cm.html_special_chars(match3[i][1]) , match3[i][2], mainUrlb+match3[i][0], 'aaaa', 'None', True, False) # <span class="next-wrapper"><a onclick="javascript:changePage(2);return false;" class="sbmBigNext btn-my btn-large fiximg" href=" "> Następna strona ></a></span> match10 = re.compile('<span class="next-wrapper"><a onclick="javascript:changePage\((.*?)\);return false;" class="sbmBigNext btn-my btn-large fiximg" href="(.*?)">(.*?)></a></span>', re.DOTALL).findall(link) print("M10000",match10) if len(match10) > 0: self.add('cdapl', 'categories-menu', 'Następna strona', 'None', 'None', mainUrlb+match10[0][1], 'None', 'None', True, False,match10[0][0]) xbmcplugin.endOfDirectory(int(sys.argv[1]))
def __cut_internal(sentence,HMM=True): if not ( type(sentence) is unicode): try: sentence = sentence.decode('utf-8') except: sentence = sentence.decode('gbk','ignore') re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5a-zA-Z0-9+#&\._]+)"), re.compile(ur"(\r\n|\s)") re_eng,re_num = re.compile(ur"[a-zA-Z0-9]+"), re.compile(ur"[\.0-9]+") blocks = re_han.split(sentence) if HMM: __cut_blk = __cut_DAG else: __cut_blk = __cut_DAG_NO_HMM for blk in blocks: if re_han.match(blk): for word in __cut_blk(blk): yield word else: tmp = re_skip.split(blk) for x in tmp: if re_skip.match(x): yield pair(x,'x') else: for xx in x: if re_num.match(xx): yield pair(xx,'m') elif re_eng.match(x): yield pair(xx,'eng') else: yield pair(xx,'x')
def _create_regs(self, clist=None): """Creates regular expressions for all connected routes""" if clist is None: if self.directory: clist = self.controller_scan(self.directory) elif callable(self.controller_scan): clist = self.controller_scan() elif not self.controller_scan: clist = [] else: clist = self.controller_scan for key, val in self.maxkeys.iteritems(): for route in val: route.makeregexp(clist) regexps = [] routematches = [] for route in self.matchlist: if not route.static: routematches.append(route) regexps.append(route.makeregexp(clist, include_names=False)) self._routematches = routematches # Create our regexp to strip the prefix if self.prefix: self._regprefix = re.compile(self.prefix + '(.*)') # Save the master regexp regexp = '|'.join(['(?:%s)' % x for x in regexps]) self._master_reg = regexp self._master_regexp = re.compile(regexp) self._created_regs = True
def fromUrl( streamUrl ): Log( "Channel.py fromUrl ..." ) """ Two types of valid stream URLs: hdhomerun://<device-id>-<tuner>/ch<physical-channel>-<program-number> hdhomerun://<device-id>-<tuner>/tuner<tuner>?channel=<modulation>:<frequency>&program=<program-number> """ channel = Channel() urlRe = re.compile( r'^\s*hdhomerun\:\/\/([\w\-]+)\-(\d+)\/tuner(\d+)\?channel\=([^\:]+)\:(.+)\&program\=(.+)$' ) reMatch = urlRe.match( streamUrl ) if reMatch: deviceId = reMatch.group(1) tunerId1 = reMatch.group(2) tunerId2 = reMatch.group(3) channel.Modulation = reMatch.group(4) channel.Frequency = reMatch.group(5) channel.ProgramNumber = reMatch.group(6) return channel urlRe = re.compile( r'^\s*hdhomerun\:\/\/([\w\-]+)\-(\d+)\/ch([^\-]+)-(\w+)$' ) reMatch = urlRe.match( streamUrl ) if reMatch: deviceId = reMatch.group(1) tunerId1 = reMatch.group(2) channel.PhysicalChannel = reMatch.group(3) channel.ProgramNumber = reMatch.group(4) return channel return None
def parse_stat_str(s): print 'Parsing %s: ' % s d = {'mode': 'NOTPARSED', 'zxid': 'NOTPARSED'} d['mode'] = re.compile('.*Mode:\s(.*)').search(s).group(1) d['zxid'] = re.compile('.*Zxid:\s(.*)').search(s).group(1) print 'Parsed %s: ' % d return d
def clean(sentence): """ Takes the tweet as an input, cleans it using the regular expression defined and explained below and returns the cleaned string. All the "stop words" are removed by using the below list of regexs. Of the following the regex r"http[s]*://\S+", selects all the links in the sentence. r" q.\d+", selects the strings like q.1653 from the sentence. r"[#@]\w+", selects the @ mentions and hashtags in the sentence. r"[^A-Za-z0-9]", selects all the special characters in the sentence. r"\w+[-']\w+" selects all the words with "-" or "'" in between them. """ common = [r"\bi\b", r"\bi[nfs]\b", r"\bo[nfr]\b", r"\ba\b", r"\ba[nts]\b", r"^i", r"\bother\b", r"\bhe\b", r"\bhave\b", r"\bus\b", r"\b[gdtsn]o\b", r"\bnot\b", r"\b[wb]e\b", r"\byour[s]*\b", r"\bwhich\b", r"\bthat\b", r"\bha[sd]\b", r"\band\b", r"\bby\b", r"\bthe[y]*\b", r"\b[t]*his\b", r"\bit[s]*\b", r"\bfor\b", r"\byou\b", r"\bwill\b", r"\bg[eo]t\b", r"\bbut\b", r"\bour\b", r"\bwas\b", r"\bcan\b", r"\balso\b", r"\byet\b", r"\bafter\b", r"\bwith\b", r"\bthem\b", r"\bdid\b", r"\bare\b", r"\bfrom\b", r"http[s]*://\S+", r" q.\d+", r"[#@]\w+", r"[^A-Za-z0-9]", r"\w+[-']\w+"] pattern = r"(" + r"|".join(common) + r")" p = re.compile(pattern) sentence = p.sub(" ", sentence) p = re.compile(" +") sentence = p.sub(" ", sentence).strip() return sentence
def find_bug_ids(self, ctx): '''find valid bug ids that are referred to in changeset comments and that do not already have references to this changeset.''' if bugzilla._bug_re is None: bugzilla._bug_re = re.compile( self.ui.config('bugzilla', 'regexp', bugzilla._default_bug_re), re.IGNORECASE) bugzilla._split_re = re.compile(r'\D+') start = 0 ids = set() while True: m = bugzilla._bug_re.search(ctx.description(), start) if not m: break start = m.end() for id in bugzilla._split_re.split(m.group(1)): if not id: continue ids.add(int(id)) if ids: ids = self.filter_real_bug_ids(ids) if ids: ids = self.filter_unknown_bug_ids(ctx.node(), ids) return ids
def interact(line, stdin, process): global tot_sec global VIDEO_PAUSED global omx_stdin global omx_process omx_stdin = stdin omx_process = process # video regexp video_curr_rexp = re.compile(r'V :\s*([\d.]+).*') video_total_rexp = re.compile(r'Length : *([\d.]+)*') # get current video time curr = video_curr_rexp.search(line) if curr and tot_sec: pts = curr.group(1) sec = int(pts.split(".")[0]) / 1000000 print(sec, tot_sec) # stop video to last seconds if tot_sec == sec and VIDEO_PAUSED == False: VIDEO_PAUSED = True stdin.put('p') print("---- PAUSE ----") else: len = video_total_rexp.search(line) if len: tot_pts = len.group(1) tot_sec = (int(tot_pts) / 1000) - 11
def ParseMethodAnnotation(self, annotation): if annotation.find('reservable = true') >= 0: self._is_reservable = True delegate_re = re.compile('delegate\s*=\s*' '(?P<delegate>(true|false))') for match in re.finditer(delegate_re, annotation): delegate = match.group('delegate') if delegate == 'true': self._is_delegate = True elif delegate == 'false': self._is_delegate = False disable_reflect_method_re = re.compile('disableReflectMethod\s*=\s*' '(?P<disableReflectMethod>(true|false))') for match in re.finditer(disable_reflect_method_re, annotation): disable_reflect_method = match.group('disableReflectMethod') if disable_reflect_method == 'true': self._disable_reflect_method = True else: self._disable_reflect_method = False pre_wrapline_re = re.compile('preWrapperLines\s*=\s*\{\s*(' '?P<pre_wrapline>(".*")(,\s*".*")*)\s*\}') for match in re.finditer(pre_wrapline_re, annotation): pre_wrapline = self.FormatWrapperLine(match.group('pre_wrapline')) self._method_annotations[self.ANNOTATION_PRE_WRAPLINE] = pre_wrapline post_wrapline_re = re.compile('postWrapperLines\s*=\s*\{\s*(' '?P<post_wrapline>(".*")(,\s*".*")*)\s*\}') for match in re.finditer(post_wrapline_re, annotation): post_wrapline = self.FormatWrapperLine(match.group('post_wrapline')) self._method_annotations[self.ANNOTATION_POST_WRAPLINE] = post_wrapline
def LISTTV4(murl): main.addDir('Search Rlsmix','rlsmix',136,"%s/art/search.png"%selfAddon.getAddonInfo("path")) main.addLink('[COLOR red]First turbobit Link could be HD[/COLOR]','',"%s/art/tvb.png"%selfAddon.getAddonInfo("path")) urllist=['http://www.rlsmix.net/category/tv-shows/','http://www.rlsmix.net/category/tv-shows/page/2/','http://www.rlsmix.net/category/tv-shows/page/3/','http://www.rlsmix.net/category/tv-shows/page/4/','http://www.rlsmix.net/category/tv-shows/page/5/','http://www.rlsmix.net/category/tv-shows/page/6/','http://www.rlsmix.net/category/tv-shows/page/7/','http://www.rlsmix.net/category/tv-shows/page/8/','http://www.rlsmix.net/category/tv-shows/page/9/','http://www.rlsmix.net/category/tv-shows/page/10/'] dialogWait = xbmcgui.DialogProgress() ret = dialogWait.create('Please wait until Show list is cached.') totalLinks = 10 loadedLinks = 0 remaining_display = 'Pages loaded :: [B]'+str(loadedLinks)+' / '+str(totalLinks)+'[/B].' dialogWait.update(0,'[B]Will load instantly from now on[/B]',remaining_display) for murl in urllist: link=main.OPENURL(murl) link=link.replace('\r','').replace('\n','').replace('\t','').replace(' ','') match=re.compile('<h1 class="titles"><a href="(.+?)" title="Permanent Link to (.+?)">.+?src="http://uppix.net/(.+?)"').findall(link) for url,name,thumb in match: match2=re.compile('TV Round Up').findall(name) name=name.replace('\xc2\xa0','').replace('" ','').replace(' "','').replace('"','').replace("'","'").replace("&","and").replace("’","'").replace("amp;","and").replace("#8211;","-") if len(match2)==0: main.addDir(name,url,62,'http://uppix.net/'+thumb) loadedLinks = loadedLinks + 1 percent = (loadedLinks * 100)/totalLinks remaining_display = 'Pages loaded :: [B]'+str(loadedLinks)+' / '+str(totalLinks)+'[/B].' dialogWait.update(percent,'[B]Will load instantly from now on[/B]',remaining_display) if (dialogWait.iscanceled()): return False dialogWait.close() del dialogWait main.GA("TV","Rlsmix")
def getDiseaseLink(catgory): ''' 根据类型列表,获取各类型的疾病列表 ''' global disease_list global host global lock re1 = re.compile(r'\d+') re2 = re.compile(r'.*(/dkd/disease/.*/)".*') page = 1 while True: replace_url = host +re1.sub(str(page), catgory) try: content = getContent(replace_url) except Exception as e: break else: links = getMatchItems(re2, content) if links: lock.acquire() try: page += 1 for link in links: if link not in disease_list: disease_list.append(link) finally: lock.release() else: break return disease_list
def listA(name, type, url, thumb): link = GetHttpData(url) match1 = re.compile("<!-- 剧集列表 start -->(.+?)<!-- 剧集列表 end -->", re.DOTALL).findall(link) match2 = re.compile('<div class="left">(.+?)</div>', re.DOTALL).findall(match1[0]) if match2: match = re.compile(r"'videoListCon', '(.+?)'", re.DOTALL).findall(match2[0]) if match: FindItems(type, match1[0]) for url in match: link = GetHttpData("http://www.juchang.com" + url) link = link.decode("gbk").encode("utf8") FindItems(type, link) match2 = re.compile('<a href="#" class="one"(.+?)<a class="two"', re.DOTALL).findall(link) if match2: match3 = re.compile(r"'videoListCon','(.+?)'", re.DOTALL).findall(match2[0]) for urla in match3: link = GetHttpData("http://www.juchang.com" + urla) link = link.decode("gbk").encode("utf8") FindItems(type, link) else: FindItems(type, match1[0]) else: FindItems(type, match1[0]) xbmcplugin.setContent(int(sys.argv[1]), "movies") xbmcplugin.endOfDirectory(int(sys.argv[1]))
def loginWithCheckCode(self): checkcode = raw_input('请输入验证码') self.post['TPL_checkcode'] = checkcode self.postData = urllib.urlencode(self.post) try: request = urllib2.Request(self.loginURL,self.postData,self.loginHeaders) response = self.opener.open(request) content = response.read().decode('gbk') pattern = re.compile(u'\u9a8c\u8bc1\u7801\u9519\u8bef',re.S) result = re.search(pattern,content) if result: print u"验证码输入错误" return False else: tokenPattern = re.compile('id="J_HToken" value="(.*?)"') tokenMatch = re.search(tokenPattern,content) if tokenMatch: print u"验证码输入正确" print tokenMatch.group(1) return tokenMatch.group(1) else: print u"J_Token" return False except urllib2.HTTPError,e: print u"出错",e.reason return False
def GetSetting(setting, timeout=5): if not IsSetting(setting): raise ValueError("Not a setting.") if not Global.serverlog: raise RuntimeError("Script wasn't started with run.py or Global.serverlog wasn't set.") startpattern=r"\[0[^\]]*\] " pattern1=re.compile(startpattern+setting.upper()+r" is currently set to (?P<value>[^.]*)\.") pattern2=re.compile(startpattern+setting.upper()+r" changed from (?P<value>((([^t]|t+[^o])*)(to)*)*)to \.") serverlog=open(Global.serverlog, encoding="latin-1") serverlog.seek(0,2) SendCommand(setting.upper()) match=None for i in range(timeout*2): #@UnusedVariable for line in serverlog.readlines(): match=pattern1.search(line) if match==None: match=pattern2.search(line) if match!=None: break if match!=None: break time.sleep(0.5) if match==None: return "" value=match.group("value") SendCommand(setting.upper()+" "+value) return value.strip()
def normalizeTitle(title): # remove leading whitespace and underscores title = title.strip(' _') # replace sequences of whitespace and underscore chars with a single space title = re.compile(r'[\s_]+').sub(' ', title) m = re.compile(r'([^:]*):(\s*)(\S(?:.*))').match(title) if m: prefix = m.group(1) if m.group(2): optionalWhitespace = ' ' else: optionalWhitespace = '' rest = m.group(3) ns = prefix.capitalize() if ns in acceptedNamespaces: # If the prefix designates a known namespace, then it might be # followed by optional whitespace that should be removed to get # the canonical page name # (e.g., "Category: Births" should become "Category:Births"). title = ns + ":" + rest.capitalize() else: # No namespace, just capitalize first letter. # If the part before the colon is not a known namespace, then we must # not remove the space after the colon (if any), e.g., # "3001: The_Final_Odyssey" != "3001:The_Final_Odyssey". # However, to get the canonical page name we must contract multiple # spaces into one, because # "3001: The_Final_Odyssey" != "3001: The_Final_Odyssey". title = prefix.capitalize() + ":" + optionalWhitespace + rest else: # no namespace, just capitalize first letter title = title.capitalize(); return title
def get_ticket_tumbers(cls, build): """Extract ticket ids from the changeset of a Jenkins build""" items = build.get_changeset_items() ticket_numbers = [] regex = re.compile(cls.TICKET_REGEX) for entry in items: message = entry["msg"] print("-- found message: ", message) noissue = re.compile(r"#noissue") if not noissue.search(message): match = regex.search(message) if match is None: print( "found malformed message in build: ", build.get_number(), "\n", "with message: ", message ) else: ticket = match.group(1) if ticket not in ticket_numbers: ticket_numbers.append(ticket) return ticket_numbers
def parse_replace_hook(s): """ Returns a (pattern, regex, replacement) tuple. The general form for a replacement hook is as follows: /patt/regex/replacement The first character specifies the separator. Example: :~q:foo:bar If only two clauses are specified, the pattern is set to match universally (i.e. ".*"). Example: /foo/bar/ Clauses are parsed from left to right. Extra separators are taken to be part of the final clause. For instance, the replacement clause below is "foo/bar/": /one/two/foo/bar/ Checks that pattern and regex are both well-formed. Raises ParseException on error. """ patt, regex, replacement = _parse_hook(s) try: re.compile(regex) except re.error, e: raise ParseException("Malformed replacement regex: %s"%str(e.message))
def __load_book_menu (self, lines) : r1 = re.compile(u'^\s*目\s*录\s*$') r2 = re.compile(u'^\s*([^·…]+)\s*[·.…]{2,}\s*([l\d]+)\s*$') menus = {} start = False not_match = 0 for line in lines : words = line.decode(self.default_coding) words.strip('\n') if re.match(r1, words) : start = True continue elif start : m = re.match(r2, words) if m : title = m.group(1) page = m.group(2) page = page.replace('l', '1') page = int(page.encode(self.default_coding)) menus[page] = self.__get_simple_string(title) not_match = 0 else : not_match += 1 if not_match > 10 : break return menus
def Episodes(url, name): # try: link = GetContentMob(url) newlink = "".join(link.splitlines()).replace("\t", "") match = re.compile( '<td style="text-align:justify" class="movieepisode"><strong>' + name + "</strong>(.+?)</td>" ).findall(newlink) mirrors = re.compile("<a [^>]*href=[\"']?([^>^\"^']+)[\"']?[^>]*>(.+?)</a>").findall(match[0]) if len(mirrors) >= 1: i = 1 for mcontent in mirrors: vLinktemp, vLinkName = mcontent vLink = "" j = 1 k = 1 for mlink in mirrors: vLink1, vLinkName1 = mlink if j >= i: if i == len(mirrors) or j == len(mirrors) or k == 12: vLink += viddomain + vLink1 + "+++" + vLinkName1 else: vLink += viddomain + vLink1 + "+++" + vLinkName1 + "***" if k % 12 == 0: break k += 1 j += 1 i += 1 # addLink("tập: " + RemoveHTML(vLinkName).strip(),mobileurl+"/"+vLink,3,'',"") addLink("Tập: " + RemoveHTML(vLinkName).strip(), vLink, 3, "", "") print vLink
def _translate(version, rules, standard): """Translate Python version into Debian one. >>> _translate('1.C2betac', ['s/c//gi'], None) '1.2beta' >>> _translate('5-fooa1.2beta3-fooD', ... ['s/^/1:/', 's/-foo//g', 's:([A-Z]):+$1:'], 'PEP386') '1:5~a1.2~beta3+D' >>> _translate('x.y.x.z', ['tr/xy/ab/', 'y,z,Z,'], None) 'a.b.a.Z' """ for rule in rules: # uscan supports s, tr and y operations if rule.startswith(('tr', 'y')): # Note: no support for escaped separator in the pattern pos = 1 if rule.startswith('y') else 2 tmp = rule[pos + 1:].split(rule[pos]) version = version.translate(str.maketrans(tmp[0], tmp[1])) elif rule.startswith('s'): # uscan supports: g, u and x flags tmp = rule[2:].split(rule[1]) pattern = re.compile(tmp[0]) count = 1 if tmp[2:]: flags = tmp[2] if 'g' in flags: count = 0 if 'i' in flags: pattern = re.compile(tmp[0], re.I) version = pattern.sub(_pl2py(tmp[1]), version, count) else: log.warn('unknown rule ignored: %s', rule) if standard == 'PEP386': version = PRE_VER_RE.sub(r'~\g<1>', version) return version
def add(self, irc, msg, args, channel, regexp, action): """[<channel>] <regexp> <action> Associates <regexp> with <action>. <channel> is only necessary if the message isn't sent on the channel itself. Action is echoed upon regexp match, with variables $1, $2, etc. being interpolated from the regexp match groups.""" if not self._checkManageCapabilities(irc, msg, channel): capabilities = self.registryValue('requireManageCapability') irc.errorNoCapability(capabilities, Raise=True) db = self.getDb(channel) cursor = db.cursor() cursor.execute("SELECT id, usage_count, locked FROM triggers WHERE regexp=?", (regexp,)) results = cursor.fetchall() if len(results) != 0: (id, usage_count, locked) = map(int, results[0]) else: locked = 0 usage_count = 0 if not locked: try: re.compile(regexp) except Exception, e: irc.error('Invalid python regexp: %s' % (e,)) return if ircdb.users.hasUser(msg.prefix): name = ircdb.users.getUser(msg.prefix).name else: name = msg.nick cursor.execute("""INSERT INTO triggers VALUES (NULL, ?, ?, ?, ?, ?, ?)""", (regexp, name, int(time.time()), usage_count, action, locked,)) db.commit() irc.replySuccess()
def init_db(db_url): regex = re.compile('^mongodb:\\/\\/(.*?):(.*?)@(.*?):([0-9]+)\\/(.*)$') match = regex.match(db_url) if not match: regex = re.compile('^mongodb:\\/\\/(.*?)\\/(.*)$') match = regex.match(db_url) username = None password = None host = match.group(1) port = None db_name = match.group(2) else: username = match.group(1) password = match.group(2) host = match.group(3) port = int(match.group(4)) db_name = match.group(5) conn = mongoengine.connect(db_name, host=host, port=port, username=username, password=password) return conn[db_name]
def __init__(self, **kwargs): for k, v in kwargs.items(): if hasattr(self, k): setattr(self, k, v) self.keys = AttrDictSimple( name=("Name", SENTENCE_SYMBOLS_RE, "Name " + SENTENCE_SYMBOLS_WARNING), ident=("Identifier", re.compile(r"^[a-zA-Z][a-zA-Z\d_]{2,}$"), "Identifier must be 3 or more alphanumeric characters" " (underscore allowed)."), desc=("Description", SENTENCE_SYMBOLS_RE, "Description " + SENTENCE_SYMBOLS_WARNING), prefix=("Prefix", re.compile(r"^[a-zA-Z][a-zA-Z\d_]{2,4}$"), "Prefix must be 3 to 5 alphanumeric characters" " (underscores allowed)."), domain=("Domain", re.compile(r"^([a-zA-Z0-9]+(\-[a-zA-Z0-9]+)*\.)*" "[a-zA-Z0-9]+(\-[a-zA-Z0-9]+)*" "\.[a-zA-Z]{2,4}$"), "Domain must be a valid domain name."), mimetype=("MIME type", re.compile(r"^[a-zA-Z0-9]+" "\/[a-zA-Z0-9\-]+$"), "Please use alphanumeric characters and dashes in the" " format: application/x-firebreath"), disable_gui=("has no UI", re.compile(r"^true$|false$"), "Please enter valid input: true or false"), )
import aniso8601 import pytz # Constants for upgrading date-based intervals to full datetimes. START_OF_DAY = time(0, 0, 0, tzinfo=pytz.UTC) END_OF_DAY = time(23, 59, 59, 999999, tzinfo=pytz.UTC) # https://code.djangoproject.com/browser/django/trunk/django/core/validators.py # basic auth added by frank url_regex = re.compile( r"^(?:http|ftp)s?://" # http:// or https:// r"(?:[^:@]+?:[^:@]*?@|)" # basic auth r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+" r"(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|" # domain... r"localhost|" # localhost... r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|" # ...or ipv4 r"\[?[A-F0-9]*:[A-F0-9:]+\]?)" # ...or ipv6 r"(?::\d+)?" # optional port r"(?:/?|[/?]\S+)$", re.IGNORECASE, ) def url(value): """Validate a URL. :param string value: The URL to validate :returns: The URL if valid. :raises: ValueError """ if not url_regex.search(value):
class EventsBackend(BaseBackend): ACCOUNT_ID = re.compile(r"^(\d{1,12}|\*)$") STATEMENT_ID = re.compile(r"^[a-zA-Z0-9-_]{1,64}$") def __init__(self, region_name): self.rules = {} # This array tracks the order in which the rules have been added, since # 2.6 doesn't have OrderedDicts. self.rules_order = [] self.next_tokens = {} self.region_name = region_name self.event_buses = {} self.event_sources = {} self._add_default_event_bus() def reset(self): region_name = self.region_name self.__dict__ = {} self.__init__(region_name) def _add_default_event_bus(self): self.event_buses["default"] = EventBus(self.region_name, "default") def _get_rule_by_index(self, i): return self.rules.get(self.rules_order[i]) def _gen_next_token(self, index): token = os.urandom(128).encode("base64") self.next_tokens[token] = index return token def _process_token_and_limits(self, array_len, next_token=None, limit=None): start_index = 0 end_index = array_len new_next_token = None if next_token: start_index = self.next_tokens.pop(next_token, 0) if limit is not None: new_end_index = start_index + int(limit) if new_end_index < end_index: end_index = new_end_index new_next_token = self._gen_next_token(end_index) return start_index, end_index, new_next_token def delete_rule(self, name): self.rules_order.pop(self.rules_order.index(name)) return self.rules.pop(name) is not None def describe_rule(self, name): return self.rules.get(name) def disable_rule(self, name): if name in self.rules: self.rules[name].disable() return True return False def enable_rule(self, name): if name in self.rules: self.rules[name].enable() return True return False def list_rule_names_by_target(self, target_arn, next_token=None, limit=None): matching_rules = [] return_obj = {} start_index, end_index, new_next_token = self._process_token_and_limits( len(self.rules), next_token, limit ) for i in range(start_index, end_index): rule = self._get_rule_by_index(i) for target in rule.targets: if target["Arn"] == target_arn: matching_rules.append(rule.name) return_obj["RuleNames"] = matching_rules if new_next_token is not None: return_obj["NextToken"] = new_next_token return return_obj def list_rules(self, prefix=None, next_token=None, limit=None): match_string = ".*" if prefix is not None: match_string = "^" + prefix + match_string match_regex = re.compile(match_string) matching_rules = [] return_obj = {} start_index, end_index, new_next_token = self._process_token_and_limits( len(self.rules), next_token, limit ) for i in range(start_index, end_index): rule = self._get_rule_by_index(i) if match_regex.match(rule.name): matching_rules.append(rule) return_obj["Rules"] = matching_rules if new_next_token is not None: return_obj["NextToken"] = new_next_token return return_obj def list_targets_by_rule(self, rule, next_token=None, limit=None): # We'll let a KeyError exception be thrown for response to handle if # rule doesn't exist. rule = self.rules[rule] start_index, end_index, new_next_token = self._process_token_and_limits( len(rule.targets), next_token, limit ) returned_targets = [] return_obj = {} for i in range(start_index, end_index): returned_targets.append(rule.targets[i]) return_obj["Targets"] = returned_targets if new_next_token is not None: return_obj["NextToken"] = new_next_token return return_obj def put_rule(self, name, **kwargs): rule = Rule(name, self.region_name, **kwargs) self.rules[rule.name] = rule self.rules_order.append(rule.name) return rule.arn def put_targets(self, name, targets): rule = self.rules.get(name) if rule: rule.put_targets(targets) return True return False def put_events(self, events): num_events = len(events) if num_events < 1: raise JsonRESTError("ValidationError", "Need at least 1 event") elif num_events > 10: raise JsonRESTError("ValidationError", "Can only submit 10 events at once") # We dont really need to store the events yet return [] def remove_targets(self, name, ids): rule = self.rules.get(name) if rule: rule.remove_targets(ids) return True return False def test_event_pattern(self): raise NotImplementedError() def put_permission(self, event_bus_name, action, principal, statement_id): if not event_bus_name: event_bus_name = "default" event_bus = self.describe_event_bus(event_bus_name) if action is None or action != "events:PutEvents": raise JsonRESTError( "ValidationException", "Provided value in parameter 'action' is not supported.", ) if principal is None or self.ACCOUNT_ID.match(principal) is None: raise JsonRESTError( "InvalidParameterValue", "Principal must match ^(\d{1,12}|\*)$" ) if statement_id is None or self.STATEMENT_ID.match(statement_id) is None: raise JsonRESTError( "InvalidParameterValue", "StatementId must match ^[a-zA-Z0-9-_]{1,64}$" ) event_bus._permissions[statement_id] = { "Action": action, "Principal": principal, } def remove_permission(self, event_bus_name, statement_id): if not event_bus_name: event_bus_name = "default" event_bus = self.describe_event_bus(event_bus_name) if not len(event_bus._permissions): raise JsonRESTError( "ResourceNotFoundException", "EventBus does not have a policy." ) if not event_bus._permissions.pop(statement_id, None): raise JsonRESTError( "ResourceNotFoundException", "Statement with the provided id does not exist.", ) def describe_event_bus(self, name): if not name: name = "default" event_bus = self.event_buses.get(name) if not event_bus: raise JsonRESTError( "ResourceNotFoundException", "Event bus {} does not exist.".format(name), ) return event_bus def create_event_bus(self, name, event_source_name): if name in self.event_buses: raise JsonRESTError( "ResourceAlreadyExistsException", "Event bus {} already exists.".format(name), ) if not event_source_name and "/" in name: raise JsonRESTError( "ValidationException", "Event bus name must not contain '/'." ) if event_source_name and event_source_name not in self.event_sources: raise JsonRESTError( "ResourceNotFoundException", "Event source {} does not exist.".format(event_source_name), ) self.event_buses[name] = EventBus(self.region_name, name) return self.event_buses[name] def list_event_buses(self, name_prefix): if name_prefix: return [ event_bus for event_bus in self.event_buses.values() if event_bus.name.startswith(name_prefix) ] return list(self.event_buses.values()) def delete_event_bus(self, name): if name == "default": raise JsonRESTError( "ValidationException", "Cannot delete event bus default." ) self.event_buses.pop(name, None)
def group_callback(test_id, regex=re.compile(group_regex)): match = regex.match(test_id) if match: return match.group(0)
def get_hostmask_regex(mask): """Return a compiled `re.RegexObject` for an IRC hostmask""" mask = re.escape(mask) mask = mask.replace(r'\*', '.*') return re.compile(mask + '$', re.I)
def test_setting_regexp(self): with Settings(SENTRY_ALLOW_ORIGIN=[re.compile('https?\://(.*\.)?example\.com')]): assert is_valid_origin('http://example.com')
class CSV(object): eval_re = re.compile('eval\((.*)\)') def __init__(self, fpath, newnames=None, delimiter=None, transpose=False): f = csv_open(fpath) if delimiter is None: dialect = csv.Sniffer().sniff(f.read(1024)) # dialect = csv.Sniffer().sniff(f.read(1024), ',:|\t') f.seek(0) data_stream = csv.reader(f, dialect) else: data_stream = csv.reader(f, delimiter=delimiter) if transpose: transposed = transpose_table(list(data_stream)) data_stream = iter(transposed) else: transposed = None self.fpath = fpath if newnames is not None: # TODO: move this junk out of the class basename = os.path.splitext(os.path.basename(fpath))[0] for k in newnames: m = self.eval_re.match(newnames[k]) if m: eval_str = m.group(1) newnames[k] = eval_with_template(eval_str, {'basename': basename}) self.newnames = newnames self.transposed = transposed self.f = f self.data_stream = data_stream self._fields = None self._field_names = None self._numlines = None def __iter__(self): return iter(self.data_stream) def next(self): return next(self.data_stream) def rewind(self): if self.transposed is not None: self.data_stream = iter(self.transposed) else: self.f.seek(0) def close(self): self.f.close() @property def field_names(self): if self._field_names is None: # TODO: use self._fields instead if it was already computed # read the first line in the file self.rewind() fnames = self.next() if self.newnames is not None: fnames = [self.newnames.get(name, name) for name in fnames] self._field_names = fnames return self._field_names @property def fields(self): if self._fields is None: self.rewind() fields = detect_column_types(self.data_stream) if self.newnames is not None: fields = [(self.newnames.get(name, name), type_) for name, type_ in fields] self._fields = fields return self._fields @property def numlines(self): if self._numlines is None: if self.transposed is not None: self._numlines = len(self.transposed) - 1 else: self._numlines = countlines(self.fpath) - 1 return self._numlines def read(self, fields=None): """imports one Xsv file with all columns * columns can be in any order (they will be reordered if needed) * row order is preserved """ print(" - reading", self.fpath) if fields is None: fields = self.fields positions = None else: available = self.field_names missing = set(name for name, _ in fields) - set(available) if missing: raise Exception("%s does not contain any field(s) named: %s" % (self.fpath, ", ".join(missing))) positions = [available.index(name) for name, _ in fields] self.rewind() self.next() return convert(self.data_stream, fields, positions) def as_array(self, fields=None): if fields is None: fields = self.fields # csv file is assumed to be in the correct order (ie by period then id) datastream = self.read(fields) return fromiter(datastream, dtype=np.dtype(fields), count=self.numlines)
def __init__(self, pattern, flags=0): self.pattern = pattern self.re = re.compile(pattern, flags)
def _setup_new_database(cur, database_engine): """Sets up the database by finding a base set of "full schemas" and then applying any necessary deltas. The "full_schemas" directory has subdirectories named after versions. This function searches for the highest version less than or equal to `SCHEMA_VERSION` and executes all .sql files in that directory. The function will then apply all deltas for all versions after the base version. Example directory structure: schema/ delta/ ... full_schemas/ 3/ test.sql ... 11/ foo.sql bar.sql ... In the example foo.sql and bar.sql would be run, and then any delta files for versions strictly greater than 11. """ current_dir = os.path.join(dir_path, "schema", "full_schemas") directory_entries = os.listdir(current_dir) valid_dirs = [] pattern = re.compile(r"^\d+(\.sql)?$") for filename in directory_entries: match = pattern.match(filename) abs_path = os.path.join(current_dir, filename) if match and os.path.isdir(abs_path): ver = int(match.group(0)) if ver <= SCHEMA_VERSION: valid_dirs.append((ver, abs_path)) else: logger.warn("Unexpected entry in 'full_schemas': %s", filename) if not valid_dirs: raise PrepareDatabaseException( "Could not find a suitable base set of full schemas" ) max_current_ver, sql_dir = max(valid_dirs, key=lambda x: x[0]) logger.debug("Initialising schema v%d", max_current_ver) directory_entries = os.listdir(sql_dir) for filename in fnmatch.filter(directory_entries, "*.sql"): sql_loc = os.path.join(sql_dir, filename) logger.debug("Applying schema %s", sql_loc) executescript(cur, sql_loc) cur.execute( database_engine.convert_param_style( "INSERT INTO schema_version (version, upgraded)" " VALUES (?,?)" ), (max_current_ver, False,) ) _upgrade_existing_database( cur, current_version=max_current_ver, applied_delta_files=[], upgraded=False, database_engine=database_engine, config=None, is_empty=True, )
def beginSpider(self,url): user = "******" pwd = "94c05j02p" # 1. 前期工作 browser = webdriver.Chrome() self.loginzhihu(browser,user,pwd) browser.get(url) #x = browser.get_cookies() #browser.refresh() try: # 首先加载出全部的内容,判断是否页面中存在“更多”这一个按钮 while True: # 这里需要注意的是:selenium2 是不支持 类名之中 有空格的 try: self.scroll(browser) time.sleep(18) more = browser.find_element_by_css_selector("button.Button.QuestionMainAction") actions = ActionChains(browser) actions.move_to_element(more) actions.click(more) actions.perform() # more.click() # 如果我们在页面中找到了更多,那么就点击更多,然后等两秒 except NoSuchElementException as e: break # 加载了全部的内容后,获取到所有内容,存为items soup = BeautifulSoup(browser.page_source,"html.parser") # 2. 对soup进行操作,获取出title,和包含内容的列表items titles = soup.find("title").text.replace('\n', '').replace('?', '').split() title = titles[0] print(title) # 如果当前目录下没有title命名的文件夹,则创建一个 dirpath = os.getcwd() + "\\" + title + "\\" if not os.path.exists(dirpath): os.makedirs(dirpath) items = soup.find_all("div", class_=re.compile("List-item")) nimingCount = 0 for item in items: # 分为两种情况:1.匿名用户,没有第一张头像 2. 匿名用户有第一张头像 userName = item.find('img', class_="Avatar AuthorInfo-avatar").get("alt") if "匿名" in userName: userName = "******" + str(nimingCount) nimingCount += 1 count = 0 # 一个用户下有多个照片的 images = item.find_all('img',class_ = "origin_image zh-lightbox-thumb lazy") for image in images: # 保存图片 imageSrc = image.get("src") picName = dirpath + userName + '_' + str(count) + ".jpg" count += 1 try: imageData = requests.get(imageSrc, stream=True).content try: with open(picName, 'wb') as jpg: jpg.write(imageData) except IOError as e: print(userName + "的一张图片写入错误") except MissingSchema as e: print(userName + "的一张图片获取失败") print("地址为:" + imageSrc) finally: # 最后要记得关闭浏览器,否则就会永远开着 browser.quit()
''' response.view = 'generic.json' # read NexSON from 'nexson' arg or (more likely) the request body nexson = extract_nexson_from_http_call( request, **request.vars) # web2py equivalent to **kwargs try: o = merge_otus_and_trees(nexson) return {'data': o, 'error': 0} except Exception, x: s = str(x) return {'error': 1, 'description': s} UPLOADID_PAT = re.compile(r'^[a-zA-Z_][-_.a-zA-Z0-9]{4,84}$') ID_PREFIX_PAT = re.compile(r'^[a-zA-Z_][-_.a-zA-Z0-9]*$') def to_nexson(): global UPLOADID_PAT from externalproc import get_external_proc_dir_for_upload, get_logger, invoc_status, \ ExternalProcStatus, get_conf, write_input_files, write_ext_proc_content, do_ext_proc_launch import os import datetime import codecs import locket import shutil import uuid from peyotl.nexson_syntax import can_convert_nexson_forms, \ get_ot_study_info_from_nexml, \
L_cheru = {W_cheru ∪ `\\W`}* 切噜语由切噜词与标点符号连接而成 """ from nonebot.message import escape from itertools import zip_longest import re from nonebot import * bot = get_bot() CHERU_SET = '切卟叮咧哔唎啪啰啵嘭噜噼巴拉蹦铃' CHERU_DIC = { c: i for i, c in enumerate(CHERU_SET) } ENCODING = 'gb18030' rex_split = re.compile(r'\b', re.U) rex_word = re.compile(r'^\w+$', re.U) rex_cheru_word:re.Pattern = re.compile(rf'切[{CHERU_SET}]+', re.U) def grouper(iterable, n, fillvalue=None): args = [iter(iterable)] * n return zip_longest(*args, fillvalue=fillvalue) def word2cheru(w:str) -> str: c = ['切'] for b in w.encode(ENCODING): c.append(CHERU_SET[b & 0xf]) c.append(CHERU_SET[(b >> 4) & 0xf]) return ''.join(c)
from __future__ import print_function # Useage: python trafoXML_visualize.py input.trafoXML output_dir import re, numpy, sys from matplotlib.mlab import * from matplotlib.pyplot import * resdir = '' file_in = 'small_002.trafoXML' if len(sys.argv) > 1: file_in = sys.argv[1] if len(sys.argv) > 2: resdir = sys.argv[2] f = open(file_in) text = f.read() f.close() # parse the input file into pairs of x/y coordinates pair_re = re.compile('<Pair from="([^ ]*)" to="([^ ]*)"/>') x = [] y = [] for pair in pair_re.finditer(text): x.append(float(pair.group(1))) y.append(float(pair.group(2))) # calculate least squares regression A = np.vstack([x, np.ones(len(x))]).T m, c = numpy.linalg.lstsq(A, y)[0] print("Use linear fit", m, c) # calculate resides residues = [] predicted = []
DistutilsFileError) from setuptools.command.egg_info import manifest_maker from setuptools.dist import Distribution from setuptools.extension import Extension try: import ConfigParser as configparser except ImportError: import configparser from pbr import extra_files import pbr.hooks # A simplified RE for this; just checks that the line ends with version # predicates in () _VERSION_SPEC_RE = re.compile(r'\s*(.*?)\s*\((.*)\)\s*$') # Mappings from setup() keyword arguments to setup.cfg options; # The values are (section, option) tuples, or simply (section,) tuples if # the option has the same name as the setup() argument D1_D2_SETUP_ARGS = { "name": ("metadata", ), "version": ("metadata", ), "author": ("metadata", ), "author_email": ("metadata", ), "maintainer": ("metadata", ), "maintainer_email": ("metadata", ), "url": ("metadata", "home_page"), "description": ("metadata", "summary"), "keywords": ("metadata", ), "long_description": ("metadata", "description"),
#Write a function that uses regular expressions to make sure the password string it is passed is strong. # #A strong password is defined as one that is at least eight characters long, contains both uppercase and lowercase characters, # and has at least one digit. You may need to test the string against multiple regex patterns to validate its strength. import re passworddetectionRegex = re.compile( r'''( ^(?=.*[A-Z].*[A-Z]) # two capital letters (?=.*[!@#$&*]) # special characters (?=.*[0-9].*[0-9]) # two numeric digits (?=.*[a-z].*[a-z].*[a-z]) #three lower case letters .{10,} #at least 10 digits $ )''', re.VERBOSE) def passwordDetection(): passwordCheck = input("Please enter your password: "******"Strong password.") else: return print("Not a strong password.") passwordDetection()
#!/usr/bin/env python import os import re import urllib2 #base= "http://dl.film2movie.biz/serial/The%20Walking%20Dead/S01/" get = raw_input("enter url: ") response = urllib2.urlopen(get) html = response.read() mas = re.compile('<[a-z]\s\w{3,4}\=".*a\>') check = mas.findall(html) for i in check: mov = i prep = re.compile('>[a-z]?[A-Z]?.*.mkv') prep1 = prep.findall(mov) for p in prep1: ok = p.replace('>', '') print get + ok
def __init__(self, ignore): self.__ignore = re.compile( r'(%s)' % ('|'.join([pat.replace('*', '.*') for pat in ignore])))
class EST_SIZE(): IP = len('111.222.333.444') PORT = 6 USER = 42 PASSWORD = 42 ssh_version = it.cycle(['SSH-2.0-OpenSSH_6.1', 'SSH-2.0-OpenSSH_5.1', 'SSH-2.0-OpenSSH_4.1']) DOMAIN_RE = re.compile( "([A-Za-z]{3,9}:(?:\/\/)?)?" + # match protocol, allow in format http:// or mailto: "(?P<domain>" + # domain part #"(?:[\-;:&=\+\$,\w]+@)?" + # allow something@ for email addresses #"[A-Za-z0-9\.\-]+" + # anything looking at all like a domain, non-unicode domains #"|" + # or instead of above "(?:www\.|[\-;:&=\+\$,\w]+@)?" + # starting with something@ or www. "[A-Za-z0-9\.\-]+" + # anything looking at all like a domain ")" + "(?P<params>" + # path / querystr part "(?:\/[\+~%\/\.\w\-_]*)" + # allow optional /path "?\??(?:[\-\+=&;%@\.\w_]*)" + # allow optional query string starting with ? "#?(?:[\.\!\/\\\w]*)" # allow optional anchor #anchor ")?") IP_RE = re.compile("^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$") RNGE_RE = re.compile("^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])" + "-(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$") MASK_RE = re.compile("^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])" + "?\/([0-9]|[1-3][0-2])+$") MAX_THREADS = 100 Q_TIMEOUT = 1 FB_PORT = None
MAX_COL_WIDTHS = 30 MIN_PASS_LEN = 6 ADMIN_USER = False INITIALLY_ACTIVE = True # first users under db_init - RIBCAGE_KEY01 = os.environ.get('RIBCAGE_ADMIN_KEYS1', 'BLANK BLANK').split() RIBCAGE_KEY02 = os.environ.get('RIBCAGE_ADMIN_KEYS2', 'BLANK BLANK').split() # ==================== # CLIENT IP HANDLING # ==================== VALID_IP = re.compile( r""" \b (25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) \.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) \.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) \.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) \b """, re.VERBOSE) # ==================== # NAME OF YOUR LOG FILE # ==================== LOGOUT = 'Devel_logs.log' # ==================== # MAILGUN VARIABLES # ==================== MAILGUN_URL = 'https://api.mailgun.net/v3/{}/messages' SANDBOX = 'sandbox26a6aabbd3e946feba81293c4b4d9dcc.mailgun.org'
from settings import LOGGING import logging, logging.config import urllib, urllib2 import re, urlparse import traceback from database import CrawlerDb # Debugging # import pdb;pdb.set_trace() # Logging logging.config.dictConfig(LOGGING) logger = logging.getLogger("crawler_logger") google_adurl_regex = re.compile('adurl=(.*?)"') google_url_regex = re.compile('url\?q=(.*?)&sa=') email_regex = re.compile('([A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4})', re.IGNORECASE) url_regex = re.compile('<a\s.*?href=[\'"](.*?)[\'"].*?>') # Below url_regex will run into 'Castrophic Backtracking'! # http://stackoverflow.com/questions/8010005/python-re-infinite-execution # url_regex = re.compile('<a\s(?:.*?\s)*?href=[\'"](.*?)[\'"].*?>') # Maximum number of search results to start the crawl EMAILS_FILENAME = 'data/emails.csv' DOMAINS_FILENAME = 'data/domains.csv' # Set up the database db = CrawlerDb() db.connect()
from __future__ import print_function import os import re import sys def execute_cmd(cmd): print(cmd) os.system(cmd) # The regular expression we use to match compiler-crasher lines. regex = re.compile( '.*Swift(.*) :: ' '(compiler_crashers|compiler_crashers_2|IDE/crashers|SIL/crashers)' '/(.*\.swift|.*\.sil).*') # Take the output of lit as standard input. for line in sys.stdin: match = regex.match(line) if match: suffix = match.group(2) filename = match.group(3) # Move the test over to the fixed suite. from_filename = 'validation-test/%s/%s' % (suffix, filename) to_filename = 'validation-test/%s_fixed/%s' % (suffix, filename) git_mv_cmd = 'git mv %s %s' % (from_filename, to_filename) execute_cmd(git_mv_cmd)
import pyperclip, re phoneRegex = re.compile( r'''( (\d{3}|\(\d{3}\))? (\s|-|\.)? (\d{3}) (\s|-|\.) (\d{4}) (\s*(ext|x|ext.)\s*(\d{2,5}))? )''', re.VERBOSE) emailRegex = re.compile( r'''([a-zA-Z0-9._%+-]+@+[a-zA-Z0-9.-]+(\.[a-zA-Z]{2,6}))''', re.VERBOSE) text = str(pyperclip.paste()) matches = [] for groups in phoneRegex.findall(text): phoneNum = '-'.join([groups[1], groups[3], groups[5]]) if groups[8] != '': phoneNum += ' x' + groups[8] matches.append(phoneNum) for groups in emailRegex.findall(text): matches.append(groups[0]) if len(matches) > 0: pyperclip.copy('\n'.join(matches)) print('copied to buffer') print('\n'.join(matches)) else: print('not discovered phone and email in buffer')
from babel.dates import format_timedelta as babel_format_timedelta from flask_babelplus import lazy_gettext as _ from flask_themes2 import render_theme_template, get_themes_list from flask_login import current_user # from flaskbb.user.models import User from werkzeug.local import LocalProxy from flaskbb._compat import range_method, text_type, iteritems, to_unicode, to_bytes from flaskbb.extensions import redis_store, babel from flaskbb.utils.settings import flaskbb_config from flaskbb.utils.markup import markdown from flask_allows import Permission _punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+') def slugify(text, delim=u'-'): """Generates an slightly worse ASCII-only slug. Taken from the Flask Snippets page. :param text: The text which should be slugified :param delim: Default "-". The delimeter for whitespace """ text = unidecode.unidecode(text) result = [] for word in _punct_re.split(text.lower()): if word: result.append(word) return text_type(delim.join(result))
import re from sopel import web from sopel.module import commands, example import json import time def formatnumber(n): """Format a number with beautiful commas.""" parts = list(str(n)) for i in range((len(parts) - 3), 0, -3): parts.insert(i, ',') return ''.join(parts) r_bing = re.compile(r'<h3><a href="([^"]+)"') def bing_search(query, lang='en-GB'): query = web.quote(query) base = 'http://www.bing.com/search?mkt=%s&q=' % lang bytes = web.get(base + query) m = r_bing.search(bytes) if m: return m.group(1) r_duck = re.compile(r'nofollow" class="[^"]+" href="(.*?)">') def duck_search(query): query = query.replace('!', '')
# printouts will tell us about problems if this API changes import traceback traceback.print_exc() self.debugger = Pdb(colors) def __call__(self): """Starts an interactive debugger at the point where called. This is similar to the pdb.set_trace() function from the std lib, but using IPython's enhanced debugger.""" self.debugger.set_trace(sys._getframe().f_back) RGX_EXTRA_INDENT = re.compile(r'(?<=\n)\s+') def strip_indentation(multiline_string): return RGX_EXTRA_INDENT.sub('', multiline_string) def decorate_fn_with_doc(new_fn, old_fn, additional_text=""): """Make new_fn have old_fn's doc string. This is particularly useful for the ``do_...`` commands that hook into the help system. Adapted from from a comp.lang.python posting by Duncan Booth.""" def wrapper(*args, **kw): return new_fn(*args, **kw) if old_fn.__doc__: wrapper.__doc__ = strip_indentation(old_fn.__doc__) + additional_text
def resp_regex_compiled(): return re.compile(SBE16NOCalibrationParticle.resp_regex(), re.DOTALL)
#!/bin/python3 import os import sys import re import fdb_embedded as fdb # Use database, or use actual dir? # 1: fetch all _1280 files from CGI.db, build list in file # path/file_1280.ext # 2: for each file_, lookup in download.db # if _raw is found, should be moved # if no _raw is found, should be listed in separate list for further investigation # # path/file_1280.ext path/file_raw.ext # make list of 1280 # exclude those to delete (list from tumbler_scrape.txt) # -> list TODELETE (move them before delete) # make list of 1280 with no corresponding _raw -> list REVERSE_SEARCH (move them) repattern_tumblr_1280 = re.compile(r'tumblr_.*_1280.*', re.I) class FDBEMBEDDED(): """handles queries to the fdb databases"""
from itertools import chain, repeat from functools import update_wrapper from _internal import _decode_unicode, _empty_stream from urls import url_decode_stream from wsgi import LimitedStream, make_line_iter from exceptions import RequestEntityTooLarge from datastructures import Headers, FileStorage, MultiDict from http import parse_options_header #: an iterator that yields empty strings _empty_string_iter = repeat('') #: a regular expression for multipart boundaries _multipart_boundary_re = re.compile('^[ -~]{0,200}[!-~]$') #: supported http encodings that are also available in python we support #: for multipart messages. _supported_multipart_encodings = frozenset(['base64', 'quoted-printable']) def default_stream_factory(total_content_length, filename, content_type, content_length=None): """The stream factory that is used per default.""" if total_content_length > 1024 * 500: return TemporaryFile('wb+') return StringIO() def parse_form_data(environ, stream_factory=None, charset='utf-8',
'BoolFacet', 'TermsFacet', 'ModelTermsFacet', 'RangeFacet', 'TemporalCoverageFacet', 'BoolBooster', 'FunctionBooster', 'ValueFactor', 'GaussDecay', 'ExpDecay', 'LinearDecay', ) ES_NUM_FAILURES = '-Infinity', 'Infinity', 'NaN', None RE_TIME_COVERAGE = re.compile(r'\d{4}-\d{2}-\d{2}-\d{4}-\d{2}-\d{2}') OR_SEPARATOR = '|' OR_LABEL = _('OR') def obj_to_string(obj): '''Render an object into a unicode string if possible''' if not obj: return None elif isinstance(obj, bytes): return obj.decode('utf-8') elif isinstance(obj, str): return obj elif is_lazy_string(obj): return obj.value
access_key = u'182309114-zBJTzx72PGS3p5yTikSMi6lTXPEIR3f92Ky8KsLU' access_secret = u'2R9TwGSfvH7z8eDpMaHsoHFwRLA2r7bMixG4wfIhJU' # create twitter API object auth = OAuth1(consumer_key, consumer_secret, access_key, access_secret, signature_type='query') stream = TwitterStream(auth=auth, secure=True) # iterate over tweets matching this filter text # IMPORTANT! this is not quite the same as a standard twitter search tweet_iter = stream.statuses.filter(track=search_term) pattern = re.compile("%s" % search_term, re.IGNORECASE) for tweet in tweet_iter: # check whether this is a valid tweet if tweet.get('text'): # turn the date string into a date object that python can handle timestamp = parsedate(tweet["created_at"]) # now format this nicely into HH:MM:SS format timetext = strftime("%H:%M:%S", timestamp) # colour our tweet's time, user and text time_colored = colored(timetext, color="white", attrs=["bold"]) user_colored = colored(tweet["user"]["screen_name"], "green") text_colored = tweet["text"] # replace each instance of our search terms with a highlighted version
def resp_regex_compiled(): """ get the compiled regex pattern @return: compiled re """ return re.compile(SBE16NOHardwareParticle.resp_regex(), re.DOTALL)