def __update_registers(self): """ Sends listdev command to the IBOB tinyshell and processes the output of the command to create a map from registers to their addresses """ self.flushInput() self.__write("listdev\n") sleep(self.timeout) response = "" while(self.inWaiting()>0): response += self.__read(self.inWaiting()) sleep(self.timeout/10) array1 = re_findall('(0x[A-F0-9]*|<NO ADDR>)', response) array2 = re_findall(' -> [a-zA-Z0-9_/]*', response) def hex_to_dec(x): if x=="<NO ADDR>": return None dec = int(x, 16) # num = int(dec & 0xFFFFFFFF) # self.__logger.debug("type of the integer is - %s %s,,,, %s %s"%(type(dec), dec, type(num), num)) return dec registers = {} for i in range(len(array1)): reg = array2[i][4:] registers[reg] = hex_to_dec(array1[i]) self.registers = registers self.__logger.debug("Updated registers dictionary for port %s. New dictionary has length %d"%(self.port, len(registers)))
def __std_coordinate(xl_coordinate): letters = re_findall("[a-zA-Z]", xl_coordinate.lower()) row_index = int(''.join(re_findall("[0-9]", xl_coordinate.lower()))) - 1 loop_counter, col_index, alphabet_loop, ord_conversion = 0, 0, 25, 96 for letter in letters: carry_value = alphabet_loop * loop_counter col_index += ord(letter) - ord_conversion + carry_value loop_counter += 1 col_index -= 1 return row_index, col_index
def basicInfoOfPlayer(tempStr): """ // 部分item的提取过程中会出现>strong<的乱入 // 所以需要特殊处理 """ playerName = voidDeal(re_findall(r"player\"\s><.{20}.+?>(.+?)<", tempStr)) #球员姓名 mainPageUrl = r"https://www.basketball-reference.com" + \ voidDeal( re_findall(r"player\"\s><.+?\"(.+?)\"", tempStr) ) #球员主页网址 firstYear = voidDeal(re_findall(r"\"year_min\"\s>(\d+?)<", tempStr)) #新秀年份 lastYear = voidDeal(re_findall(r"\"year_max\"\s>(\d+?)<", tempStr)) #退役年份 pos = voidDeal(re_findall(r"\"pos\"\s>(.+?)<", tempStr)) #位置 height = voidDeal(re_findall(r"\"height\".+?>(.+?)<", tempStr)) #身高 weight = voidDeal(re_findall(r"\"weight\"\s>(\d+?)<", tempStr)) #体重 birthday = voidDeal(re_findall(r"\"birth_date\"\scsk=\"(.+?)\"", tempStr)) #出生日期 college = voidDeal(re_findall(r"college=.+?>(.+?)<", tempStr)) #毕业大学 return [playerName, mainPageUrl, firstYear, lastYear, \ pos, height, weight, birthday, college]
def videoCmd(self) -> str: # get video info vidinfo: str = run([ 'ffprobe', '-i', self.pth_in, '-v', 'error', '-select_streams', f"v{'' if _Opt.vid_strm == 'all' else f':{_Opt.vid_strm}'}", '-show_entries', 'format=duration:stream=codec_name,height,width', '-of', 'default=nw=1' ], capture_output=True, text=True).stdout codec = re_search(r'codec_name=(.+)', vidinfo).group(1).lower() v_ht = int(re_search(r'height=(.+)', vidinfo).group(1)) v_wd = int(re_search(r'width=(.+)', vidinfo).group(1)) self.vid_dur = float(re_search(r'duration=(.+)', vidinfo).group(1)) # get crop cropinfo: str = run([ 'ffmpeg', '-hide_banner', '-i', self.pth_in, '-vframes', '10', '-vf', 'cropdetect', '-f', 'null', '-' ], capture_output=True, text=True, cwd=self.pth_in.parent).stderr try: crop = re_findall(r'crop=.+', cropinfo)[-1] cw, ch, cx, cy = re_findall(r'\d+', crop) if '-' in crop or (cw - cx) < (v_wd / 3) or (ch - cy) < (v_ht / 3): crop = str() else: crop = f"-vf {crop}" except: crop = str() # build command hevcpth = self.pth_in.with_name(f"[HEVC-AAC] {self.pth_in.stem}.mkv") vp9pth = self.pth_in.with_name(f"[VP9-OPUS] {self.pth_in.stem}.webm") if 'hevc' in codec or 'vp9' in codec or 'vp8' in codec: self.pth_out = hevcpth if 'hevc' in codec else vp9pth return crop elif CONV_NVENC and _Opt.output != '.webm' and self.vid_dur >= _Opt.playtime: self.pth_out = hevcpth return f'{crop} -c:v hevc_nvenc -preset slow' else: # get crf from v_ht crf = 7 for ht, val in CRF_VALS: if ht >= v_ht: crf = val break if _Opt.output != '.webm' and self.vid_dur >= _Opt.playtime: self.pth_out = hevcpth return f'{crop} -c:v libx265 -crf {crf} -preset slow' else: self.pth_out = vp9pth return f'{crop} -c:v vp9 -b:v 0 -crf {crf}'
def json_from_string(s): """ :type s: str :rtype: dict """ match = re_findall(r"{.+[:,].+}|\[.+[,:].+\]", s) return json_loads(match[0]) if match else None
def extract_paras(string): """ 从用户的请求中解析出参数,具体格式见webqq_client.py中的说明 :param string: str """ search = re_findall(r'_(?P<name>\w+?)_=_\{\{\{\{(?P<value>.*?)\}\}\}\}_', string, flags=RE_MULTILINE | RE_DOTALL) return {name: value for name, value in search}
def SASLHandler(self, conn, challenge): if challenge.getNamespace() != NS_SASL: return None if challenge.getName() == "failure": self.startsasl = "failure" try: reason = challenge.getChildren()[0] except Exception: reason = challenge self.DEBUG("Failed SASL authentification: %s" % reason, "error") raise NodeProcessed() elif challenge.getName() == "success": self.startsasl = "success" self.DEBUG("Successfully authenticated with remote server.", "ok") handlers = self._owner.Dispatcher.dumpHandlers() self._owner.Dispatcher.PlugOut() dispatcher.Dispatcher().PlugIn(self._owner) self._owner.Dispatcher.restoreHandlers(handlers) self._owner.User = self.username raise NodeProcessed() incoming_data = challenge.getData() chal = {} data = decodestring(incoming_data) self.DEBUG("Got challenge:" + data, "ok") for pair in re_findall('(\w+\s*=\s*(?:(?:"[^"]+")|(?:[^,]+)))', data): key, value = [x.strip() for x in pair.split("=", 1)] if value[:1] == '"' and value[-1:] == '"': value = value[1:-1] chal[key] = value if "qop" in chal and "auth" in [x.strip() for x in chal["qop"].split(",")]: resp = {} resp["username"] = self.username resp["realm"] = self._owner.Server resp["nonce"] = chal["nonce"] cnonce = "" for i in xrange(7): cnonce += hex(int(_random() * 65536 * 4096))[2:] resp["cnonce"] = cnonce resp["nc"] = ("00000001") resp["qop"] = "auth" resp["digest-uri"] = "xmpp/" + self._owner.Server A1 = C([H(C([resp["username"], resp["realm"], self.password])), resp["nonce"], resp["cnonce"]]) A2 = C(["AUTHENTICATE", resp["digest-uri"]]) response = HH(C([HH(A1), resp["nonce"], resp["nc"], resp["cnonce"], resp["qop"], HH(A2)])) resp["response"] = response resp["charset"] = "utf-8" sasl_data = "" for key in ("charset", "username", "realm", "nonce", "nc", "cnonce", "digest-uri", "response", "qop"): if key in ("nc", "qop", "response", "charset"): sasl_data += "%s=%s," % (key, resp[key]) else: sasl_data += "%s=\"%s\"," % (key, resp[key]) node = Node("response", attrs={"xmlns": NS_SASL}, payload=[encodestring(sasl_data[:-1]).replace("\r", "").replace("\n", "")]) self._owner.send(node.__str__()) elif "rspauth" in chal: self._owner.send(Node("response", attrs={"xmlns": NS_SASL}).__str__()) else: self.startsasl = "failure" self.DEBUG("Failed SASL authentification: unknown challenge", "error") raise NodeProcessed()
def get_lyrics(self, title, artist): lyrics = '' artist = artist.replace(' ', '_').lower() artist = normalize('NFD', artist).encode('ascii', 'ignore') title = title.replace(' ', '_').lower() title = normalize('NFD', title).encode('ascii', 'ignore') url = ('http://www.lyricsmode.com/lyrics/%s/%s/%s.html' % (urllib_quote(artist.decode('utf-8'))[0], urllib_quote(artist.decode('utf-8')), urllib_quote(title.decode('utf-8')))) try: page = self.get_html(url) except HTTPError: page = '' clean_reg = re_compile('<.*?>') for txt in re_findall( '(?s)<p id="lyrics_text" ' + 'class="ui-annotatable">(.*?)</p>', str(page)): txt = re_sub(clean_reg, '', txt) txt = txt.replace('\\\'', "'") txt = txt.replace('\\n', '\n') lyrics = txt if lyrics != '': return lyrics else: return None
def get_lyrics(self, title, artist): lyrics = '' artist = artist.replace(' ', '_') artist = normalize('NFD', artist).encode('ascii', 'ignore') title = title.replace(' ', '_') title = normalize('NFD', title).encode('ascii', 'ignore') url = ('http://lyrics.wikia.com/wiki/%s:%s' % (urllib_quote( artist.decode('utf-8')), urllib_quote(title.decode('utf-8')))) try: page = self.get_html(url) except HTTPError: page = '' soup = BeautifulSoup(page, 'html.parser') rew = soup.find('div', {'class': 'lyricbox'}) if rew is None: return None else: for txt in re_findall('(?s)</script>(.*?)<!--', str(rew)): txt = txt.replace('<br/>', '\n') lyrics = txt if lyrics != '': return lyrics else: return None
def get_cookies(): try: # s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # http s = ssl.wrap_socket(socket.socket()) # https s.connect(('ipcrs.pbccrc.org.cn', 443)) s.send(b'''GET / HTTP/1.1 Host: ipcrs.pbccrc.org.cn User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3 Accept-Encoding: gzip, deflate, br Connection: keep-alive Upgrade-Insecure-Requests: 1 ''') raw_content = b"" buf = s.recv(1024) while len(buf): raw_content += buf buf = s.recv(1024) s.close() content = raw_content.decode("gbk") cookies = re_findall("Set-Cookie:([\s\S]+?);", content) cookie_dic = {cookie.split("=")[0].strip(): cookie.split("=")[1].strip() for cookie in cookies} return cookie_dic except Exception as e: return {}
def parse_all_games_from_series(soup: BeautifulSoup, url: str) -> List[PlayoffGame]: "Get the data from all games of the series" series_games = [] for game in soup.find_all("div", {"class": "game_summary expanded nohover"}): # processing scores winner, score_winner = list( game.tbody.find("tr", { "class": "winner" }).stripped_strings)[:2] loser, score_loser = list( game.tbody.find("tr", { "class": "loser" }).stripped_strings)[:2] # Processing game number and date game_number, date = game.tbody.find("tr", { "class": "date" }).string.split(",") year = re_findall(YEAR_PATTERN, url).pop() date = datetime.strptime(date.strip() + year, STRPTIME_GAME_PATTERN) game_number = int(game_number.replace("Game ", "")) pgame = PlayoffGame( winner=winner, loser=loser, score_winner=score_winner, score_loser=score_loser, date=date, game_number=game_number, ) series_games.append(pgame) return series_games
def parse_xml(pdf): x = soup(codecs_encode(pdf,'utf8','ignore')).findAll('page') cols = ['page','font','top','left','width','height','text'] g = pd_DataFrame(columns=cols) for pg in x: idx = x.index(pg)+1 pg = str(pg) line_iter = re_findall(r'(<text.*?</text>)',pg) for it in line_iter: a = ['page']+re_findall('([a-zA-Z]+)+\=', it)+['text'] text_attrs = it[5:it.find('>')].strip() text_contents = str(soup(it).text) b = [idx]+map(lambda s: int(s),re_findall('[0-9]+', text_attrs))+[text_contents] if text_contents.strip() != '': g = g.append(dict(zip(a,b)),ignore_index=True) return g
def assert_number_of_tokens(res): req = urlopen("https://coinmarketcap.com/tokens/views/all/") data = req.read() req.close() indexes = int( re_findall(r'<td class="text-center">\n*(.+)\n*</td>', data.decode())[-1]) assert len(res) in [indexes - 1, indexes, indexes + 1]
def load_module_from_file_location(location: Union[bytes, str], encoding: str = "utf8", *args, **kwargs): """Returns loaded module provided as a file path. :param args: Coresponds to importlib.util.spec_from_file_location location parameters,but with this differences: - It has to be of a string or bytes type. - You can also use here environment variables in format ${some_env_var}. Mark that $some_env_var will not be resolved as environment variable. :encoding: If location parameter is of a bytes type, then use this encoding to decode it into string. :param args: Coresponds to the rest of importlib.util.spec_from_file_location parameters. :param kwargs: Coresponds to the rest of importlib.util.spec_from_file_location parameters. For example You can: some_module = load_module_from_file_location( "some_module_name", "/some/path/${some_env_var}" ) """ # 1) Parse location. if isinstance(location, bytes): location = location.decode(encoding) # A) Check if location contains any environment variables # in format ${some_env_var}. env_vars_in_location = set(re_findall(r"\${(.+?)}", location)) # B) Check these variables exists in environment. not_defined_env_vars = env_vars_in_location.difference(os_environ.keys()) if not_defined_env_vars: raise LoadFileException( "The following environment variables are not set: " f"{', '.join(not_defined_env_vars)}") # C) Substitute them in location. for env_var in env_vars_in_location: location = location.replace("${" + env_var + "}", os_environ[env_var]) # 2) Load and return module. name = location.split("/")[-1].split(".")[ 0] # get just the file name without path and .py extension _mod_spec = spec_from_file_location(name, location, *args, **kwargs) module = module_from_spec(_mod_spec) _mod_spec.loader.exec_module(module) # type: ignore return module
def SASLHandler(self,conn,challenge): """ Perform next SASL auth step. Used internally. """ if challenge.getNamespace()<>NS_SASL: return if challenge.getName()=='failure': self.startsasl='failure' try: reason=challenge.getChildren()[0] except: reason=challenge self.DEBUG('Failed SASL authentification: %s'%reason,'error') raise NodeProcessed elif challenge.getName()=='success': self.startsasl='success' self.DEBUG('Successfully authenticated with remote server.','ok') handlers=self._owner.Dispatcher.dumpHandlers() self._owner.Dispatcher.PlugOut() dispatcher.Dispatcher().PlugIn(self._owner) self._owner.Dispatcher.restoreHandlers(handlers) self._owner.User=self.username raise NodeProcessed ########################################3333 incoming_data=challenge.getData() chal={} data=decodestring(incoming_data) self.DEBUG('Got challenge:'+data,'ok') for pair in re_findall('(\w+\s*=\s*(?:(?:"[^"]+")|(?:[^,]+)))',data): key,value=[x.strip() for x in pair.split('=', 1)] if value[:1]=='"' and value[-1:]=='"': value=value[1:-1] chal[key]=value if chal.has_key('qop') and 'auth' in [x.strip() for x in chal['qop'].split(',')]: resp={} resp['username']=self.username resp['realm']=self._owner.Server resp['nonce']=chal['nonce'] cnonce='' for i in range(7): cnonce+=hex(int(_random()*65536*4096))[2:] resp['cnonce']=cnonce resp['nc']=('00000001') resp['qop']='auth' resp['digest-uri']='xmpp/'+self._owner.Server A1=C([H(C([resp['username'],resp['realm'],self.password])),resp['nonce'],resp['cnonce']]) A2=C(['AUTHENTICATE',resp['digest-uri']]) response= HH(C([HH(A1),resp['nonce'],resp['nc'],resp['cnonce'],resp['qop'],HH(A2)])) resp['response']=response resp['charset']='utf-8' sasl_data='' for key in ['charset','username','realm','nonce','nc','cnonce','digest-uri','response','qop']: if key in ['nc','qop','response','charset']: sasl_data+="%s=%s,"%(key,resp[key]) else: sasl_data+='%s="%s",'%(key,resp[key]) ########################################3333 node=Node('response',attrs={'xmlns':NS_SASL},payload=[encodestring(sasl_data[:-1]).replace('\r','').replace('\n','')]) self._owner.send(node.__str__()) elif chal.has_key('rspauth'): self._owner.send(Node('response',attrs={'xmlns':NS_SASL}).__str__()) else: self.startsasl='failure' self.DEBUG('Failed SASL authentification: unknown challenge','error') raise NodeProcessed
def add_schedule(schedule, element): name = normalize("NFKD", unicode(element.string)).strip() if len(name): times = re_findall("\d*[,.:]\d{2}", name) timetable = [] for time in times: hour, minute = re_sub("[,.]", ":", time).split(":") if len(hour): timetable.append("{:02d}:{:s}".format(int(hour), minute)) schedule.append({"time": timetable, "name": name})
def get_file(s): data = s.recv(DATA_SIZE) header, _, reply = data.partition("\r\n\r\n") header_list = dict([(x.lower(), y) for (x, y) in re_findall("(.*):\s+?(.*)", header)]) content_length, recieved_so_far = int(header_list["content-length"]), len(reply) while content_length > recieved_so_far: reply += s.recv(DATA_SIZE) recieved_so_far = len(reply) print reply s.close()
def CompleteHTML(html): # 1. Close every unclosed tags. e.g., <a bracket_cnt = 0 brackets = re_findall('[<>]', html) is_in_pre_tag = False previous_6 = '' for c in html: if not is_in_pre_tag: if c == '<': print previous_6 bracket_cnt += 1 elif c == '>': print previous_6 bracket_cnt -= 1 previous_6 = (previous_6 + c)[-6:] if previous_6.endswith('<pre>'): is_in_pre_tag = True if previous_6.endswith('</pre>'): is_in_pre_tag = False html += '>' * bracket_cnt # 2. Close every unmatched tags. e.g., <a href="..."> stk = [] is_in_pre_tag = False tags = [ str[1:-1] for str in re_findall('<\/?\w+[\'\"a-zA-Z\.\=\ ]*>', html) ] for tag in tags: if tag == 'pre': is_in_pre_tag = True elif tag == '/pre': is_in_pre_tag = False if is_in_pre_tag: continue if tag[0] != '/': stk.append(tag.split()[0].split('>')[0]) elif tag[0] == '/' and len(stk) and stk[-1] == tag[1:]: stk.pop() else: pass stk.reverse() return html + ''.join(['</%s>' % tagname for tagname in stk])
def css(css) -> str: """ Minify the CSS """ # remove comments - this will break a lot of hacks :-P css = re_sub(r'\s*/\*\s*\*/', "$$HACK1$$", css) # preserve IE<6 comment hack css = re_sub(r'/\*[\s\S]*?\*/', "", css) css = css.replace("$$HACK1$$", '/**/') # preserve IE<6 comment hack # url() doesn't need quotes css = re_sub(r'url\((["\'])([^)]*)\1\)', r'url(\2)', css) # spaces may be safely collapsed as generated content will collapse them anyway css = re_sub(r'\s+', ' ', css) # shorten collapsable colors: #aabbcc to #abc css = re_sub(r'#([0-9a-f])\1([0-9a-f])\2([0-9a-f])\3(\s|;)', r'#\1\2\3\4', css) # fragment values can loose zeros css = re_sub(r':\s*0(\.\d+([cm]m|e[mx]|in|p[ctx]))\s*;', r':\1;', css) for rule in re_findall(r'([^{]+){([^}]*)}', css): # we don't need spaces around operators selectors = [ re_sub(r'(?<=[\[\(>+=])\s+|\s+(?=[=~^$*|>+\]\)])', r'', selector.strip()) for selector in rule[0].split(',') ] # order is important, but we still want to discard repetitions properties = {} porder = [] for prop in re_findall('(.*?):(.*?)(;|$)', rule[1]): key = prop[0].strip().lower() if key not in porder: porder.append(key) properties[key] = prop[1].strip() return css
def __init__(self, beta=2): self.beta = beta self.__check_beta_non_negative() # Split object name at uppercase self.name = ' '.join( re_findall('[A-Z][^A-Z]*', self.__class__.__name__)) self.params = {'size_N': 10} # Number of points and ref measure params self.sampling_mode = '' self.list_of_samples = []
def parse_test_steps(contents, test_steps, test_name, dict_test_case, test_dir): global ERROR_LIST test_steps_re = ( r'("|\'){3}\s*(?:Step:|STEP:|\s+\d+\.)\s*(?P<description>.*?)' r'(?:(?:Result:|RESULT:)\s*(?P<result>.*?)|)("|\'){3}') function_re = (r'def\s{}\(.*?\):(?:\s+#\snoqa)?\n' r'(?P<test_case>.*?)(?=def\s|$)') # collect all test steps re_steps = re_finditer(test_steps_re, contents, flags=DOTALL) step_count = 1 for re_step in re_steps: step = parse_test_step(re_step, test_name, step_count) # check for stub if not dict_test_case['contains_stub'] and \ step['description'].lower() == "stub test step": dict_test_case['contains_stub'] = True re_result = re_match(r'<(\S+)>', step['description']) if re_result is not None: func_name = re_result.group(1) file = get_referenced_file_path(test_dir, step['result']) if os.path.exists(file) and os.path.isfile(file): with open(file, 'r') as file_pointer: contents = file_pointer.read() re_result = re_findall(function_re.format(func_name), contents, flags=DOTALL) if re_result: func_contents = re_result[0] sub_step_name = "{} {} {}".format(test_name, step_count, func_name) parse_test_steps(func_contents, test_steps, sub_step_name, dict_test_case, test_dir) else: ERROR_LIST["TF23"] = ("Referenced function {} does not " "exist!".format(func_name)) else: ERROR_LIST["TF22"] = ("Referenced file {} does not " "exist!".format(file)) else: test_steps.append(step) step_count += 1
async def sixdigits(self, ctx: NewCtx): """Provides you a magical six digits number""" async with self.bot.session.head("https://nhentai.net/random", allow_redirects=True) as resp: url = str(resp.url) digits = re_findall(r'\d+', url)[0] if ctx.channel.is_nsfw(): return await ctx.send(embed=BetterEmbed(title=digits, url=url)) await ctx.send(digits)
def CompleteHTML(html): # 1. Close every unclosed tags. e.g., <a bracket_cnt = 0 brackets = re_findall('[<>]', html) is_in_pre_tag = False previous_6 = '' for c in html: if not is_in_pre_tag: if c == '<': print previous_6 bracket_cnt += 1 elif c == '>': print previous_6 bracket_cnt -= 1 previous_6 = (previous_6 + c)[-6:] if previous_6.endswith('<pre>'): is_in_pre_tag = True if previous_6.endswith('</pre>'): is_in_pre_tag = False html += '>' * bracket_cnt # 2. Close every unmatched tags. e.g., <a href="..."> stk = [] is_in_pre_tag = False tags = [str[1:-1] for str in re_findall('<\/?\w+[\'\"a-zA-Z\.\=\ ]*>', html)] for tag in tags: if tag == 'pre': is_in_pre_tag = True elif tag == '/pre': is_in_pre_tag = False if is_in_pre_tag: continue if tag[0] != '/': stk.append(tag.split()[0].split('>')[0]) elif tag[0] == '/' and len(stk) and stk[-1] == tag[1:]: stk.pop() else: pass stk.reverse() return html + ''.join(['</%s>' % tagname for tagname in stk])
def move_files_to_folder(*args, **kwargs): # Maximum backup allowed by user BACKUP_COUNT = bpy.context.user_preferences.filepaths.save_version # If saving backups option is 'ON' if BACKUP_COUNT: # Function level constants PATH = bpy.data.filepath # Full path FILE = bpy.path.display_name_from_filepath(PATH) # File name CWD = os_path_dirname(PATH) # Current Working Directory CBD = os_path_join(CWD, BACKUP_FOLDER_NAME) # Current Backup Directory REXT = r"{}\.blend(\d+)$".format(FILE) # Regex to catch backups EXT = "{}.blend{}" # Extension placeholder OLD = EXT.format(FILE, BACKUP_COUNT) # Oldest backup name # Create backup directory if not exists try: os_makedirs(CBD) except OSError as e: if e.errno != EEXIST: # If other error appears then "dir already exists" reraise # the caught error again and print out the traceback raise OSError("\n".join(traceback_extract_stack())) from None # Get all files in current directory, move them to the # backup folder, if they are backup files and maintain # the backup folder's instances for filename in reversed(sorted(os_listdir(CWD))): # If file is a backup file try: index = int(re_findall(REXT, filename)[-1]) # If file's index is greater than the # current number of backups allowed the full path # of the file will be returned and will be deleted # else os.remove will raise FileNotFoundError os_remove( increase_index_and_move( src_folder=CWD, dst_folder=CBD, file=FILE, extension=EXT, src_index=index, dst_index=index, max_index=BACKUP_COUNT, ) ) # If file is not a backup file except (IndexError, FileNotFoundError): pass # If everything went fine, print out information if PRINT_INFO: print(INFO_TEXT.format(CWD, CBD))
def __init__(self, beta=2): if not (beta >= 0): raise ValueError('`beta` must be >=0. Given: {}'.format(self.beta)) self.beta = beta # Split object name at uppercase self.name = ' '.join( re_findall('[A-Z][^A-Z]*', self.__class__.__name__)) self.params = {'size_N': 10} # Number of points and ref measure params self.sampling_mode = '' self.list_of_samples = []
def read_graphs(inputs): network_path = inputs['dir_nm'] + inputs['netf_nm'] out_comp_nm = inputs['dir_nm'] + inputs['out_comp_nm'] use_full = inputs['use_full'] with open(out_comp_nm + '_metrics.out', "w") as fid: print("Network:", network_path, file=fid) logging_info("Reading network...") G = nx.read_weighted_edgelist(network_path, nodetype=str) logging_info("Finished Reading network") # Removing self loops G.remove_edges_from(nx.selfloop_edges(G)) # CHECK IF DUPLICATE NODES AND EDGES ARE REMOVED if use_full == 0: logging_info("Extracting sub network with proteins from complexes...") # Extracting subnetwork of the original PPIN containing proteins in the known complexes complex_path = inputs['dir_nm'] + inputs['comf_nm'] test_complex_path = inputs['dir_nm'] + inputs['comf_test_nm'] with open(complex_path) as f: data = f.read() id_list_train = re_findall(r"[\w']+", data) with open(test_complex_path) as f: data = f.read() id_list_test = re_findall(r"[\w']+", data) prot_list = set(id_list_train + id_list_test) G = G.subgraph(prot_list) if use_full == 1: write_graph_stats_neig_lists(G, inputs) return G
def info(thing): """Prints the callables of a supplied argument: module, class, list, dict etc and their documentations """ access_methods = [m for m in dir(thing) if callable(getattr(thing, m)) and not m.startswith("__")] process_doc = lambda f: "\n".join(["{:>10}{}".format("", s) for s in re_findall(r".{,90}\w*", f.__doc__)]) if f.__doc__ else "None" print if callable(thing): print "{:>4}{}(..)\n{}".format("", thing.__name__, process_doc(thing)) if access_methods: print "\n".join(["{justify:>4}{the_method}(..)\n{the_docs}".format(justify = "", the_method = method, the_docs = process_doc(getattr(thing, method))) for method in access_methods])
def get_all_urls(message_content: str) -> list: """ Description: Returns a list of all the urls in a message Args: message_content (str): The message Returns: list: The list of urls """ URL_REGEX = "(?P<url>https?://[^\s]+)" return re_findall(URL_REGEX, message_content)
def get_text(self, text): """ 富文本转纯文本 :param text: :return: """ if not isinstance(text, basestring): self.logger.debug( "===>This text is not basestring,text:{0}".format(text)) return "" text_all = '>' + text + '<' pattern = '>([\s\S]*?)<' info = re_findall(pattern, text_all) infos = ''.join(info) return infos.strip()
def getvendor(raw,vendors_str): vendors=vendors_str.split(',') words=re_findall('[A-Za-z]+',raw) max_simi=0 for word in words: for vendor in vendors: set_r=fuzz.partial_ratio(vendor.lower(), word.lower()) if max_simi<set_r: max_simi=set_r similar=vendor if max_simi < 60: similar='' elif max_simi < 98: similar=similar+'('+str(max_simi)+'?)' return similar
def version(infolder, sub_max, rev_max, build_max, main=0, sub=1, rev=0, build=0): # Funtion level constants DATETIME = datetime.now().strftime('%Y%m%d') REGEXP = re_compile(r'^\s*(\d+)\.(\d+)\.(\d+)\.(\d+)\s*\(\d+\)\s*$') FORMAT = '{{}}.{{:0{}d}}.{{:0{}d}}.{{:0{}d}} ({{}})'.format( *map(lambda v: len(str(v)), (sub_max, rev_max, build_max))) # Create filepath filepath = join(infolder, _VERSION) # If file already exists open it try: with open(filepath, 'r+') as file: try: # Parse file and get values main, sub, rev, build = map(int, re_findall(REGEXP, file.read())[0]) # Increase values if necessary build += 1 if build > build_max: build = 0 rev += 1 if rev > rev_max: rev = 0 sub += 1 if sub > sub_max: sub = 0 main += 1 # Write changes back to file file.seek(0) file.write(FORMAT.format(main, sub, rev, build, DATETIME)) file.truncate() # If version format in file is invalid except IndexError: raise Exception('Invalid version format ' 'in {!r} file'.format(filepath)) from None # Create a new file to store version data # and write intial values except FileNotFoundError: with open(filepath, 'w') as file: file.write(FORMAT.format(main, sub, rev, build, DATETIME)) # Return current version return main, sub, rev, build, DATETIME
def filterDataForPlayOffs(strOfSeasonPage): """ // 获取赛季所对应页面列出的所有职业生涯场次号 // 季后赛为 pgl_basic_playoffs.xxx playoffs = Po """ PoAllGameIndex = re_findall(r"pgl_basic_playoffs.(\d+?)\"", strOfSeasonPage) dataOfOneSeason = DataFrame(columns = ("pgl_basic", "game", "date", "age", "team", "opp", "gameResult", \ "gs", "mp", "fg", "fga", "fg_pct", "fg3", "fg3a", "fg3_pct", \ "ft", "fta", "ft_pct", "orb", "drb", "trb", "ast", "stl",\ "blk", "tov", "pf", "pts", "game_score", "plus_minus") \ ) """ // 截取单场比赛str描述段落 """ initIndex = 0 for ii in range(0, len(PoAllGameIndex)): strForFind = strOfSeasonPage[initIndex:] #减少检索时间 startStrForDes = "pgl_basic_playoffs." + PoAllGameIndex[ii] startIndexForDes = strForFind.find(startStrForDes) if (ii + 1) < len(PoAllGameIndex): endStrForDes = "pgl_basic_playoffs." + PoAllGameIndex[ii + 1] endIndexForDes = strForFind.find(endStrForDes) gameDescriptor = strForFind[startIndexForDes:endIndexForDes] else: endIndexForDes = -1 gameDescriptor = strForFind[startIndexForDes: endIndexForDes] #最后一场,没有下一场 """ // 正则匹配每场数据,存入DataFrame """ pgl_basic = int(PoAllGameIndex[ii]) #职业生涯场次 allStatItems = gameStatOfPlayer(gameDescriptor) allStatItems.insert(0, pgl_basic) #无返回值 dataOfOneSeason.loc[ii] = allStatItems initIndex = endIndexForDes return dataOfOneSeason
def filterDataForallSeasonWebLog(mainPageUrl): res = requests_get(mainPageUrl) strOfMainPage = res.text """ // 获取球员所有效力年份 // 去除重复年份 """ allSeasonYears = re_findall(r"per_game.(\d+?)\"", strOfMainPage) allSeasonYears = list( set(allSeasonYears) ) allSeasonYears = [int(year) for year in allSeasonYears] allSeasonYears = sorted(allSeasonYears) allSeasonYears = [str(year) for year in allSeasonYears] """ // 截取单场比赛str描述段落 // 最后一段的切片是经验的,有可能不稳定 """ allSeasonWebLog = [] for ii in range(0, len(allSeasonYears)): startStrForDes = "per_game." + str( allSeasonYears[ii] ) startIndexForDes = strOfMainPage.find(startStrForDes) if (ii + 1) < len(allSeasonYears): endStrForDes = "per_game." + str( allSeasonYears[ii+1] ) endIndexForDes = strOfMainPage.find(endStrForDes) SeasonDescriptor = strOfMainPage[startIndexForDes : endIndexForDes] else: SeasonDescriptor = strOfMainPage[startIndexForDes : startIndexForDes + 300] #最后一赛季,无下赛季 reTemplate = re_compile(r"href=\"(.+?)\">" + str( int(allSeasonYears[ii]) - 1) ) seasonWebLog = reTemplate.findall(SeasonDescriptor) if(seasonWebLog): seasonWebLog = seasonWebLog[0] else: continue allSeasonWebLog.append(r"https://www.basketball-reference.com" + seasonWebLog) """ // 去除重复链接 """ return allSeasonWebLog
def decrypt(data): def check_bin(binstr): p = set(binstr) s = {'0', '1'} return s == p or p == {'0'} or p == {'1'} data = data.split() for i in range(len(data)): while len(data[i]) % 8 != 0: data[i] = '0' + data[i] for i in data: if not check_bin(i): print('Error: Given cypher contained not binary number') exit() words_data = [[int(x, 2) for x in re_findall(r'.{8}', i)] for i in data] decrypts = [bytearray(i).decode('utf-8') for i in words_data] return decrypts
def run(self): while True: # grabs host from queue new_file_discriptor = self.queue.get() try: header = new_file_discriptor.recv(4096)\ .partition("\r\n\r\n")[0] # header_list = dict(re_findall(r"(?P<name>.*?): \ # (?P<value>.*?)\r\n", header)) file_path = re_findall(r"^GET (/.*) HTTP", header, I)[0] new_file_discriptor.sendall(self.compose_message( "."+file_path)) except Exception as e: new_file_discriptor.sendall("HTTP 400 Bad Request\r\n\r\n%s" % e) finally: new_file_discriptor.close() self.queue.task_done()
def parse_a_series(series_games: List[PlayoffGame], url: str): teams_and_wins = get_teams_and_total_wins(series_games) series_winner = max(teams_and_wins, key=teams_and_wins.get) series_length = SERIES_LENGTH_BY_WINNER_WINS.get( str(teams_and_wins.get(series_winner))) loser_definer = lambda dic, winner: (dic.keys() - [series_winner]).pop() series_loser = loser_definer(teams_and_wins, series_winner) series_name = re_findall(SERIES_NAME_PATTERN, url).pop() return PlayoffSeries( games=series_games, winner=series_winner, loser=series_loser, series_name=series_name, best_of_series=series_length, )
def before_step(context, step): if step.name.find("a successful 'handshake' with Aporo servers from a(n)")>=0: cred,machine_id = re_findall(r'\"(.+?)\"',str(step.name)) if cred=='manager': t=pd_read_sql(""" INSERT INTO aprinto_access (date_created,machine_id,ip_addr,vend_name,known_user,type_admin) VALUES ('now'::timestamp with time zone,'%s','%s','tmp','true','true') RETURNING uid """ % (machine_id,THIS_IP),aprinto_engine)['uid'][0] context.row_created = t elif cred=='vendor' and context.feature.name.find('Client Computer Authentication via a Contract')==-1: t=pd_read_sql(""" INSERT INTO aprinto_access (date_created,machine_id,ip_addr,vend_name,known_user,type_vendor) VALUES ('now'::timestamp with time zone,'%s','%s','tmp','true','true') RETURNING uid """ % (machine_id,THIS_IP),aprinto_engine)['uid'][0] context.row_created = t
def add_term_weights_from_text(self, text, weight, term_weights): raw_words = re_findall('[\w]+', text.lower()) strong_words = [] for word in raw_words: if len( word ) >= self.MINIMUM_WORD_LENGTH and word not in self.WEAK_WORDS: strong_words.append(word) # a term is either a single word or a N words separated by spaces (currently N==2) terms = [] for i in range(0, len(strong_words)): terms.append(strong_words[i]) if i > 0: terms.append(strong_words[i - 1] + ' ' + strong_words[i]) for term in terms: term_weights[term] = term_weights.get(term, 0) + weight
def get_features(self, source): """ Feature extraction from text. The point where you can customize features. source - opened file return feature set, iterable object """ words = [] for line in source: if self.kind_of_partition == 'word': words.extend([word for word in re_split('[\s,.:;!?<>+="()%\-0-9d]', line.decode("utf-8").lower().encode("utf-8")) if word and (not self.est_words or word in self.est_words) and word not in self.stopwords]) # not is_bad(word.decode("utf-8"))]) elif self.kind_of_partition == 'ngram': for word in re_split('[\s,.:;!?<>+="()%\-]', line.decode("utf-8").lower().encode("utf-8")): if word and word not in self.stopwords and word not in self.stopwords: words.extend(re_findall('.{1,%d}' % self.ngram, word)) return words
def _create_slug(word): if len(re_findall(u'([а-я:;_,.\s\$!\?\"\'@#\+\(\)&\^№=\*%]+)', word)) == 0: return word ru = { u'а': 'a', u'б': 'b', u'в': 'v', u'г': 'g', u'д': 'd', u'е': 'e', u'ё': 'e', u'ж': 'zh', u'з': 'z', u'и': 'i', u'й': 'i', u'к': 'k', u'л': 'l', u'м': 'm', u'н': 'n', u'о': 'o', u'п': 'p', u'р': 'r', u'с': 's', u'т': 't', u'у': 'u', u'ф': 'f', u'х': 'h', u'ц': 'c', u'ч': 'ch', u'ш': 'sh', u'щ': 'sh', u'ъ': '', u'ы': 'i', u'ь': '', u'э': 'e', u'ю': 'yu', u'я': 'ia', '_': '-', '-': '-', ' ': '-' } ret = '' for letter in word.lower(): if letter in ru: ret += ru[letter] elif re_search(r'[a-z0-9]', letter) is not None: ret += letter return ret
def magic_from_file(filename, mime=False): """ Fallback function to retrieve file type when python-magic is missing Parameters ---------- filename : str File path to read mime : bool Retrieve the file mimetype, otherwise a readable name (Default: False) Returns ------- str File type result as human readable name or mimetype """ # Use dereference to follow symlink commands = ["file", "--dereference", str(filename)] if mime: commands.insert(1, "--mime-type") with Popen(commands, stdout=PIPE, stdin=PIPE, stderr=STDOUT, universal_newlines=True) as pipe: output, error_output = pipe.communicate() result = re_findall(r"^%s\:\s+(.*)$" % re_escape(str(filename)), output[:-1]) if len(result) > 0: return result[0] return str()
def before_scenario(context, scenario): celery_tails = ['A manager prints a Client Contract', 'After a manager prints a Client Contract, the Client prints the same Contract', 'Check In Requests & Document Post Attempts Made to Aporo'] if scenario.name.find('Check In Requests')>=0: t = str(scenario.steps[1]) t = t[t.find('"')+1:t.rfind('"')] cred,machine_id = re_findall(r'\"(.+?)\"',t) if cred=='manager': t=pd_read_sql(""" INSERT INTO aprinto_access (date_created,machine_id,ip_addr,vend_name,known_user,type_admin) VALUES ('now'::timestamp with time zone,'%s','%s','tmp','true','true') RETURNING uid """ % (machine_id,THIS_IP),aprinto_engine)['uid'][0] context.row_created = t elif cred=='vendor': t=pd_read_sql(""" INSERT INTO aprinto_access (date_created,machine_id,ip_addr,vend_name,known_user,type_vendor) VALUES ('now'::timestamp with time zone,'%s','%s','tmp','true','true') RETURNING uid """ % (machine_id,THIS_IP),aprinto_engine)['uid'][0] context.row_created = t if celery_tails.count(scenario.name)>0: context.celery_tail = '/tmp/aprinto_celery_tail' cmd = PY_PATH + '/tests/files/tail_celery.bash' proc = sub_popen([''.join(cmd)], stdout=sub_PIPE, shell=True) (t, err) = proc.communicate() if hasattr(context,'processes'): context.processes.append(str(t)) else: context.processes = [str(t)] # from ipdb import set_trace as i_trace; i_trace() delay(2)
def _m_full_reactions(self, rxn_recipes_path): #### for character matching that are returned DEFAULT_STOICHIO_RESCUE = { "4n": 4, "3n": 3, "2n": 2, 'n': 1, '(n)': 1, '(N)': 1, '(2n)': 2, '(x)': 1, 'N': 1, 'm': 1, 'q': 1, '0.01': 1, '0.1': 1, '0.5': 1, '1.5': 1, '0.02': 1, '0.2': 1, '(n-1)': 0, '(n-2)': -1 } reaction = {} try: for row in csv_DictReader(open(rxn_recipes_path), delimiter='\t'): tmp = {} # makes sure that if theres an error its not added #parse the reaction equation if not len(row['Equation'].split('=')) == 2: self.logger.warning( 'There should never be more or less than a left and right of an equation' ) self.logger.warnin(row['Equation']) continue ######### LEFT ###### #### MNX id tmp['left'] = {} # if row['#Reaction_ID']=="MNXR141948": # print(row) # exit() for spe in re_findall( '(\(n-1\)|\d+|4n|3n|2n|n|\(n\)|\(N\)|\(2n\)|\(x\)|N|m|q|\(n\-2\)|\d+\.\d+) ([\w\d]+)@\w+', row['Equation'].split('=')[0]): #1) try to rescue if its one of the values try: tmp['left'][self._checkMNXMdeprecated( spe[1])] = DEFAULT_STOICHIO_RESCUE[spe[0]] except KeyError: #2) try to convert to int if its not try: tmp['left'][self._checkMNXMdeprecated( spe[1])] = int(spe[0]) except ValueError: self.logger.warning('Cannot convert ' + str(spe[0])) continue ####### RIGHT ##### #### MNX id tmp['right'] = {} for spe in re_findall( '(\(n-1\)|\d+|4n|3n|2n|n|\(n\)|\(N\)|\(2n\)|\(x\)|N|m|q|\(n\-2\)|\d+\.\d+) ([\w\d]+)@\w+', row['Equation'].split('=')[1]): #1) try to rescue if its one of the values try: tmp['right'][self._checkMNXMdeprecated( spe[1])] = DEFAULT_STOICHIO_RESCUE[spe[0]] except KeyError: #2) try to convert to int if its not try: tmp['right'][self._checkMNXMdeprecated( spe[1])] = int(spe[0]) except ValueError: self.logger.warning('Cannot convert ' + str(spe[0])) continue ####### DIRECTION ###### try: tmp['direction'] = int(row['Direction']) except ValueError: self.logger.error('Cannot convert ' + str(row['Direction']) + ' to int') continue ### add the others tmp['main_left'] = row['Main_left'].split(',') tmp['main_right'] = row['Main_right'].split(',') reaction[self._checkMNXRdeprecated(row['#Reaction_ID'])] = tmp return reaction except FileNotFoundError: self.logger.error('Cannot find file: ' + str(path)) return False
def modify_message(self, tab, msg_as_string): ph_matchnum_txt = tab.param_handl_txtfield_match_indices.getText() ph_target_exp = tab.get_exp_pane_expression(tab.param_handl_exp_pane_target ) ph_extract_static_exp = tab.get_exp_pane_expression(tab.param_handl_exp_pane_extract_static) ph_extract_single_exp = tab.get_exp_pane_expression(tab.param_handl_exp_pane_extract_single) ph_extract_macro_exp = tab.get_exp_pane_expression(tab.param_handl_exp_pane_extract_macro ) ph_extract_cached_exp = tab.get_exp_pane_expression(tab.param_handl_exp_pane_extract_cached) if not ph_target_exp: self.logger.warning( 'No match expression specified! Skipping tab "{}".'.format( tab.namepane_txtfield.getText() ) ) return msg_as_string exc_invalid_regex = 'Skipping tab "{}" due to error in expression {{}}: {{}}'.format( tab.namepane_txtfield.getText() ) try: match_exp = re_compile(ph_target_exp) except re_error as e: self.logger.error(exc_invalid_regex.format(ph_target_exp, e)) return msg_as_string # The following code does not remove support for groups, # as the original expression will be used for actual replacements. # We simply need an expression without capturing groups to feed into re.findall(), # which enables the logic for granular control over which match indices to target. # Removing named groups to normalize capturing groups. findall_exp = re_sub('\?P<.+?>', '', ph_target_exp) # Removing capturing groups to search for full matches only. findall_exp = re_sub(r'(?<!\\)\(([^?]*?)(?<!\\)\)', '\g<1>', findall_exp) findall_exp = re_compile(findall_exp) self.logger.debug('findall_exp: {}'.format(findall_exp.pattern)) all_matches = re_findall(findall_exp, msg_as_string) self.logger.debug('all_matches: {}'.format(all_matches)) match_count = len(all_matches) if not match_count: self.logger.warning( 'Skipping tab "{}" because this expression found no matches: {}'.format( tab.namepane_txtfield.getText(), ph_target_exp ) ) return msg_as_string matches = list() dyn_values = '' replace_exp = ph_extract_static_exp if tab.param_handl_dynamic_chkbox.isSelected(): find_exp, target_txt = '', '' selected_item = tab.param_handl_combo_extract.getSelectedItem() if selected_item == tab.PARAM_HANDL_COMBO_EXTRACT_CACHED: find_exp, target_txt = ph_extract_cached_exp, tab.param_handl_cached_resp_viewer.getMessage() target_txt = self.helpers.bytesToString(target_txt) elif selected_item == tab.PARAM_HANDL_COMBO_EXTRACT_SINGLE: self.issue_request(tab) find_exp, target_txt = ph_extract_single_exp, self.helpers.bytesToString(tab.response) elif selected_item == tab.PARAM_HANDL_COMBO_EXTRACT_MACRO: find_exp, target_txt = ph_extract_macro_exp, self.final_macro_resp if not find_exp: self.logger.warning( 'No dynamic value extraction expression specified! Skipping tab "{}".'.format( tab.namepane_txtfield.getText() ) ) return msg_as_string try: # Making a list to enable multiple iterations. matches = list(re_finditer(find_exp, target_txt)) except re_error as e: self.logger.error(exc_invalid_regex.format(ph_extract_macro_exp, e)) return msg_as_string if not matches: self.logger.warning('Skipping tab "{}" because this expression found no matches: {}'.format( tab.namepane_txtfield.getText(), find_exp )) return msg_as_string groups = {} groups_keys = groups.viewkeys() for match in matches: gd = match.groupdict() # The given expression should have unique group matches. for k in gd.keys(): if k in groups_keys: self.logger.warning('Skipping tab "{}" because this expression found ambiguous matches: {}'.format( tab.namepane_txtfield.getText(), find_exp )) return msg_as_string groups.update(gd) # Remove '$' not preceded by '\' exp = re_sub(r'(?<!\\)\$', '', ph_target_exp) flags = re_match('\(\?[Limuxs]{1,6}\)', ph_target_exp) if flags is not None and 'x' in flags.group(0): exp += '\n' groups_exp = ''.join(['(?P<{}>{})'.format(group_name, group_match) for group_name, group_match in groups.items()]) dyn_values = ''.join(groups.values()) # No need for another try/except around this re.compile(), # as ph_target_exp was already checked when compiling match_exp earlier. # match_exp = re_compile(exp + groups_exp + end) match_exp = re_compile(exp + groups_exp) self.logger.debug('match_exp adjusted to:\n{}'.format(match_exp.pattern)) subsets = ph_matchnum_txt.replace(' ', '').split(',') match_indices = [] for subset in subsets: try: if ':' in subset: sliceindex = subset.index(':') start = int(subset[:sliceindex ]) end = int(subset[ sliceindex+1:]) if start < 0: start = match_count + start if end < 0: end = match_count + end for match_index in range(start, end): match_indices.append(match_index) else: match_index = int(subset) if match_index < 0: match_index = match_count + match_index match_indices.append(match_index) except ValueError as e: self.logger.error( 'Ignoring invalid match index or slice on tab "{}" due to {}'.format( tab.namepane_txtfield.getText(), e ) ) continue match_indices = set(sorted([m for m in match_indices if m < match_count])) self.logger.debug('match_indices: {}'.format(match_indices)) # Using findall_exp to avoid including capture groups in the result. message_parts = re_split(findall_exp, msg_as_string) self.logger.debug('message_parts: {}'.format(message_parts)) # The above strategy to use re.split() in order to enable the usage of match_indices # ends up breaking non-capturing groups. At this point, however, we can safely remove # all non-capturing groups and everything will be peachy. ncg_exp = re_compile('\(\?[^P].+?\)') if re_search(ncg_exp, match_exp.pattern) is not None: match_exp = re_compile(ncg_exp.sub('', match_exp.pattern)) if flags is not None: match_exp = re_compile(flags.group(0) + match_exp.pattern) self.logger.debug('match_exp adjusted to:\n{}'.format(match_exp.pattern)) modified_message = '' remaining_indices = list(match_indices) for part_index, message_part in enumerate(message_parts): if remaining_indices and part_index == remaining_indices[0]: try: final_value = match_exp.sub(replace_exp, all_matches[part_index] + dyn_values) except (re_error, IndexError) as e: self.logger.error(exc_invalid_regex.format(match_exp.pattern + ' or expression ' + replace_exp, e)) return msg_as_string self.logger.debug('Found:\n{}\nreplaced using:\n{}\nin string:\n{}'.format( match_exp.pattern, replace_exp, all_matches[part_index] + dyn_values )) final_value = message_part + final_value modified_message += final_value remaining_indices.pop(0) elif part_index < match_count: modified_message += message_part + all_matches[part_index] else: modified_message += message_part return modified_message
def load_module_from_file_location( location: Union[bytes, str, Path], encoding: str = "utf8", *args, **kwargs ): # noqa """Returns loaded module provided as a file path. :param args: Coresponds to importlib.util.spec_from_file_location location parameters,but with this differences: - It has to be of a string or bytes type. - You can also use here environment variables in format ${some_env_var}. Mark that $some_env_var will not be resolved as environment variable. :encoding: If location parameter is of a bytes type, then use this encoding to decode it into string. :param args: Coresponds to the rest of importlib.util.spec_from_file_location parameters. :param kwargs: Coresponds to the rest of importlib.util.spec_from_file_location parameters. For example You can: some_module = load_module_from_file_location( "some_module_name", "/some/path/${some_env_var}" ) """ if isinstance(location, bytes): location = location.decode(encoding) if isinstance(location, Path) or "/" in location or "$" in location: if not isinstance(location, Path): # A) Check if location contains any environment variables # in format ${some_env_var}. env_vars_in_location = set(re_findall(r"\${(.+?)}", location)) # B) Check these variables exists in environment. not_defined_env_vars = env_vars_in_location.difference( os_environ.keys() ) if not_defined_env_vars: raise LoadFileException( "The following environment variables are not set: " f"{', '.join(not_defined_env_vars)}" ) # C) Substitute them in location. for env_var in env_vars_in_location: location = location.replace( "${" + env_var + "}", os_environ[env_var] ) location = str(location) if ".py" in location: name = location.split("/")[-1].split(".")[ 0 ] # get just the file name without path and .py extension _mod_spec = spec_from_file_location( name, location, *args, **kwargs ) assert _mod_spec is not None # type assertion for mypy module = module_from_spec(_mod_spec) _mod_spec.loader.exec_module(module) # type: ignore else: module = types.ModuleType("config") module.__file__ = str(location) try: with open(location) as config_file: exec( # nosec compile(config_file.read(), location, "exec"), module.__dict__, ) except IOError as e: e.strerror = "Unable to load configuration file (e.strerror)" raise except Exception as e: raise PyFileError(location) from e return module else: try: return import_string(location) except ValueError: raise IOError("Unable to load configuration %s" % str(location))
def wordsonly(raw): return ' '.join(re_findall('[A-z][A-Za-z0-9_]+-?[A-z]?[A-Za-z0-9_]*',raw))
#!/usr/bin/env python3 from re import findall as re_findall from re import compile as re_compile from collections import defaultdict if __name__ == '__main__': # helpers modify_bit = lambda b_i, b, n: (n & ~(1 << b_i)) | ( (b << b_i) & (1 << b_i)) re_nums = re_compile(r'[0-9]+') # given mem = defaultdict(int) with open('input.txt', 'r') as f: while l := f.readline().strip(): if l[:4] == 'mask': mask = l.split(' = ')[1][::-1] overwrites = [(i, int(mask[i])) for i in range(len(mask)) if mask[i] != 'X'] else: mem_loc, mem_val = tuple(map(int, re_findall(re_nums, l))) for (bit_i, bit_val) in overwrites: mem_val = modify_bit(bit_i, bit_val, mem_val) mem[mem_loc] = mem_val print(sum(mem.values()))
def run_method(illustrate=True): """ Every time log files are evaluated, keep a log file if: (1) the log file falls on a marked day as graphed above, or (2) there is no log file for a marked day "A" and a log file "F" is the closest in time between and including the day before X and the day after the next oldest marked day "B", i.e., B+1<= F <=A-1 GOAL: find log closest to ILD """ check_db_for_action() df = make_ideal_log_spectrum() # Drop non-log dates for now (and in practice, but will re-include for illustration) idx = df[df.log_dates.isnull()].index df = df.drop(idx,axis=0).reset_index(drop=True) # Conform Data Type df['log_dates'] = df.log_dates.map(lambda D: pd.to_datetime(D)) ideal_log_dates = df.log_dates.tolist() # Get Logs p = sub_popen(['ls ~/.pg_dump'],stdout=sub_PIPE,shell=True) (_out, _err) = p.communicate() logs = _out.strip('\n').split('\n') log_dates = map(lambda X: dt.datetime.strptime("%s/%s/%s" % re_findall(r'(\d{4})[_\.](\d{2})[_\.](\d{2})\.',X)[0], "%Y/%m/%d"),logs) log_dict = dict(zip(map(lambda D: pd.to_datetime(D),log_dates),logs)) lf = pd.DataFrame(data={'logs':pd.unique(log_dates)}) # Find Intersecting Values initial_matches = pd.Series(pd.np.intersect1d(df.log_dates.values,lf.logs.values)) # (1) the log file falls on a marked day lf['keep'] = lf.logs.where(lf.logs.isin(initial_matches)) df['paired'] = df.log_dates.where(df.log_dates.isin(initial_matches)) # (2) What is left? # A. Check by getting date bounds of unclaimed logs, # then counting how many remaining ILDs are not yet paired with a log. # B. Iterate these remaining ILDs to match up with log, # then discard any unmatched logs. # (A) to_check = lf[lf.keep.isnull()] oldest_log,latest_log = to_check.logs.min(),to_check.logs.max() older_dates,earlier_dates = df[df.log_dates<oldest_log],df[latest_log<df.log_dates] assert len(older_dates)+len(earlier_dates)>=2 next_older_date,prev_earlier_date = older_dates.iloc[0,:],earlier_dates.iloc[-1,:] idl_dates = df.ix[prev_earlier_date.name:next_older_date.name,:] # (B) pt,last_idx = 0,idl_dates.index.tolist()[-1] for idx,row in idl_dates.iterrows(): if idx==last_idx: break if pd.isnull(row.paired): A,B=row.log_dates,idl_dates.iloc[pt+1,:].log_dates possible_logs = lf[(lf.logs<A)&(B<lf.logs)] if len(possible_logs): res = possible_logs.sort('logs',ascending=False).iloc[0,:].logs D=row.to_dict() D.update({'paired':res}) df[df.index==idx].update(D.values()) pt+=1 # Find Intersecting Values b/t Paired IDLs and Remaining Logs final_matches = pd.Series(pd.np.intersect1d(idl_dates.paired.values,lf.logs.values)) lf.keep.update(lf.logs.where(lf.logs.isin(final_matches))) if illustrate: # SHOW ME THE RESULTS: [ what did we want, what did we get, what did we do ] # Plot these "IDLs" as blue vertical bars # Then Overlay all logs in yellow # Then Overlay all logs to be deleted in red start,end=lf.logs.max(),lf.logs.min() one_day = dt.timedelta(days=+1) res = pd.DataFrame({'dates':[start-(i*one_day) for i in range( (start-end).days )] }) res['days'] = res.dates.map(lambda D: D.dayofyear) ndf = make_ideal_log_spectrum() ndf['log_dates'] = ndf.log_dates.map(lambda D: pd.to_datetime(D)) all_log_dates = ndf.log_dates.tolist() res['IDLs'] = res.dates.map(lambda D: 0 if not all_log_dates.count(D) else 3) logs_to_keep = lf[lf.keep.notnull()].logs.tolist() logs_to_delete = lf[lf.keep.isnull()].logs.tolist() res['Keep'] = res.dates.map(lambda D: 0 if not logs_to_keep.count(D) else 2) res['Delete'] = res.dates.map(lambda D: 0 if not logs_to_delete.count(D) else 1) # Make Plot import pylab as plt fig = plt.figure() axes = fig.add_subplot(1,1,1) res.plot(x='days',y='IDLs',ylim=(0,6),ax=axes,kind='bar',figsize=(107,8),color='b') res.plot(x='days',y='Keep',ylim=(0,6),ax=axes,kind='bar',figsize=(107,8),color='y') res.plot(x='days',y='Delete',ylim=(0,6),ax=axes,kind='bar',figsize=(107,8),color='r') axes.invert_xaxis() fig.savefig('/Volumes/mbp2/Users/admin/Desktop/plot.png') log_files = res[res.Delete==1].dates.map(log_dict).tolist() cmd = """ insert into system_log (operation,started,parameters) values ('rotate_pgdump',now(),'%s') """ % str(log_files).strip('[]').replace("'",'').replace(' ','') SYS_R.T.conn.set_isolation_level( 0) SYS_R.T.cur.execute( cmd) SYS_R._growl('check desktop for intended logrotate actions and update pgsql')