def __init__( self, workerId, logPath, nameserver, qin = None, sqout = None, eqout = None, mqout = None, metaQin = None, metaQout = None, geoip = None ): """ Create DNS Broker, Solicit's Nameservers for Queued host information before forwarding host object to aggregation brokers. @param int workerId - Worker Id @param String logPath - Path to log to @param NameServer nameserver - Namerserver object @param Queue qin - Input Queue @param Queue sqout - SqlBroker Input Queue @param Queue eqout - Json Broker Output Queue @param Queue mqout - MX Broker Output Queue @param Queue metaQin - Meta Input Queue (Used by menus) @param Queue metaQout - Meta Output Queue (Used by menus) @param Reader geoip - Initialized geoip2.database.Reader object """ super( dnsBroker, self ).__init__( workerId = workerId, workerPurpose = "Probe", logPath = logPath, qin = qin, metaQin = metaQin, metaQout = metaQout ) self.state.update( { # DNS Probe 'probe' : Probe( workerId = workerId, logPath = logPath, nameserver = nameserver ), # Google MX Regex 'rgmx' : reg_compile( "([0-9]+)\s(.*\.google(?:mail)?\.com$)" ), # SPF Regex 'rgspf' : reg_compile( '^"v\=(spf[0-9].*)"$' ), # Output Queues 'qout' : [ sqout, eqout, mqout ], # GeoIp Db Wrapper 'geoip' : geoip, } )
def getConcatenatedMessagesForEachParticipant(self) -> Dict[str, str]: ''' Concatenating all messages sent to this chat, by each participant ''' _mediaOmitted = reg_compile(r'\<media\s+omitted\>', flags=IGNORECASE) _asciiWords = reg_compile(r'\w{2,}', flags=ASCII) return dict(map(lambda e: (e.name, '\n'.join(filter(lambda e: not e.isnumeric(), _asciiWords.findall('\n'.join( filter(lambda e: not _mediaOmitted.match(e), map(lambda e: e.content, e.messages))))))), self.users))
def classifyCompaniesUsingPinCodeOfRegisteredAddress( dataStream: chain) -> Counter: reg = reg_compile(r'(\d{6})') # pincode extraction regular expression return Counter( map( lambda e: __extractPinCodeFromAddress__( reg, e.registeredOfficeAddress), dataStream))
def pub_metadata(self): if not self.is_valid(): raise Exception("Invalid form content, unable to render metadata") keyword_delimiter = reg_compile("[\s]*,[\s]*") keywords = [] if self.cleaned_data['keywords']: k = keyword_delimiter.split(self.cleaned_data['keywords']) keywords.extend(k) creator = { 'name': self.cleaned_data['name'], } if self.cleaned_data['affiliation']: creator['affiliation'] = self.cleaned_data['affiliation'] if self.cleaned_data['orcid']: creator['orcid'] = self.cleaned_data['orcid'] data = { 'title': self.cleaned_data['title'], 'description': self.cleaned_data['description'], 'keywords': keywords, 'creators': [creator], } return data
def load_answers(): global answers answers = open_json_file("abot", []) # compile answers for answer in answers: answer["regex"] = [reg_compile(reg.lower()) for reg in answer["regex"]]
def _getRegex() -> Pattern: ''' Regex to be used for extracting timestamp of a certain message from `*.txt` file ''' return reg_compile( r'(\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}\:\d{1,2}(\s?[a|p]m)?)', flags=IGNORECASE)
def _identify_base_from_name(name, base_class): # Check if string is empty if len(name) == 0: return # Use regex to clean the string from unwanted characters pattern = reg_compile("[^a-zA-Z0-9 ]") name = reg_sub(" {2,}", " ", pattern.sub('', name)).strip() # Add all matching bases to list results = base_class.get_bases_from_name(name) # If only one matching base if len(results) == 1: return results[0] # If not, we take only the bases which are in pool if len(results) > 1: results_2 = list() for base in results: if base.pool: results_2.append(base) # If only one matching base if len(results_2) == 1: return results_2[0]
def __init__(self, _max: str, departFromMax: str, _min: str, departFromMin: str, rainfall: str, relativeHumidityFirst: str, relativeHumidityFinal: str, sunset: str, sunrise: str, moonset: str, moonrise: str): self.timestamp = datetime.now().timestamp() reg = reg_compile(r'^(-?\d*\.?\d{1,})$') tmp = reg.search(_max) self.max = float(tmp.group()) if tmp else None tmp = reg.search(_min) self.min = float(tmp.group()) if tmp else None tmp = reg.search(departFromMax) self.departFromMax = float(tmp.group()) if tmp else None tmp = reg.search(departFromMin) self.departFromMin = float(tmp.group()) if tmp else None tmp = reg.search(rainfall) self.rainfall = float(tmp.group()) if tmp else None tmp = reg.search(relativeHumidityFirst) self.relativeHumidityAt08_30 = float(tmp.group()) if tmp else None tmp = reg.search(relativeHumidityFinal) self.relativeHumidityAt17_30 = float(tmp.group()) if tmp else None self.sunset = time( *[int(i.strip(), base=10) for i in sunset.split(':')]) self.sunrise = time( *[int(i.strip(), base=10) for i in sunrise.split(':')]) self.moonset = time( *[int(i.strip(), base=10) for i in moonset.split(':')]) self.moonrise = time( *[int(i.strip(), base=10) for i in moonrise.split(':')])
def pickByName(self, name: str) -> Station: possibleMatches = [] reg = reg_compile(r'({})'.format(name), flags=I) for i in self.stations: _match = reg.search(i.name) if _match: possibleMatches.append((i.id, i.name)) return possibleMatches
def __init__(self, date: str, _min: str, _max: str, img: str, stat: str): reg = reg_compile(r'^(-?\d*\.?\d{1,})$') self.date = date tmp = reg.search(_min) self.min = float(tmp.group()) if tmp else None tmp = reg.search(_max) self.max = float(tmp.group()) if tmp else None self.img = img self.stat = stat
def getAllActivities(tree: BeautifulSoup) -> List[Tag]: reg = reg_compile(r'^(message[0-9]{1,})$') tmp = tree.findAll('div', attrs={'class': 'message default clearfix'}) tmp.extend( tree.findAll('div', attrs={'class': 'message default clearfix joined'})) tmp.extend([ i for i in tree.findAll('div', attrs={'class': 'message service'}) if reg.match(i.get('id')) ]) return tmp
def _parse(content: str) -> Places: ''' Parses HTML, using BeautifulSoup. ''' cityIdReg = reg_compile(r'([\d]{1,5})$') base_url = 'http://city.imd.gov.in/citywx/' placesObj = Places([]) state_name = reg_compile(r'^(##)$') station_name = reg_compile(r'^(city_weather.php\?id=[\d]{1,5})$') root = BeautifulSoup(content, features='lxml') currentState = None for i in root.findAll('a'): if state_name.match(i.get('href')): currentState = State(i.getText(), []) placesObj.push(currentState) if station_name.match(i.get('href')): currentState.push( Station(i.getText(), int(cityIdReg.search(i.get('href')).group()), currentState.name, urljoin(base_url, i.get('href')))) return placesObj
class RedirectToHomeOrArticlePage(yui.RequestHandler): _PATTERN = reg_compile(r'tid-(\d+)(?:-page-\d+)?.html') @yui.client_cache(ARTICLE_CACHE_TIME, 'public') def get(self): match = RedirectToHomeOrArticlePage._PATTERN.match( self.request.query_string) if match: id = int(match.group(1)) if id: article = Article.get_article_by_id(id) if article: self.redirect( BLOG_HOME_RELATIVE_PATH + article.quoted_not_escaped_url(), 301) return self.redirect('/', 301)
def extractAllCompanyEmailProvider(dataStream: map) -> (Dict[str, int], int): # Extracts email service provider's name using regular expression def __getEmailProvider__(email: str) -> str: matchObj = reg.search(email) return matchObj.group().lower() if (matchObj) else None # Increments usage count email service provider & returns updated Dictionary def __updateCounter__(holder: Dict[str, int], email: str) -> Dict[str, int]: ''' return holder if not email else dict([(email, 1)] + [(k, v) for k, v in holder.items()]) if email not in holder else dict( [(k, v + 1) if k == email else (k, v) for k, v in holder.items()]) ''' if (email): holder.update({email: holder.get(email, 0) + 1}) return holder # Keeps only top 5 elements ( having highest usage count ) in dictionary def __cleanupCounter__(holder: Dict[str, int], count: int, findTotal: bool = True) -> Dict[str, int]: nonlocal total total += sum(holder.values()) if findTotal else 0 return dict( map(lambda v: (v, holder[v]), sorted(holder, key=lambda v: holder[v], reverse=True)[:count])) try: total = 0 reg = reg_compile(r'(?<=@)[^.]+(?=\.)') # processes each state of India at a time & extracts top 5 # email service providers, finally we calculate top 5 # email service providers used by companies spread across different states of India return __cleanupCounter__(reduce( lambda acc, cur: __mergeTwoDicts__( acc, __cleanupCounter__( reduce( lambda acc, cur: __updateCounter__( acc, __getEmailProvider__(cur.email)), cur, {}), 10 )), dataStream, {}), 10, findTotal=False), total except Exception: return None
def parse_url2(s): timestamp, title = s.split(' ', 1) dt = datetime.utcfromtimestamp(int(timestamp)) return dt.strftime('%Y/%m/%d/') + title def parse_format(formats): formats = formats.split() return (int(formats[0]) and CONTENT_FORMAT_FLAG['html']) | (not int(formats[1]) and CONTENT_FORMAT_FLAG['bbcode']) def join_list(list): return ','.join(list) if list else '' def to_list(string): return string.split(',') if string else [] HTML_TAG_PATTERN = reg_compile('<.*?>') BBCODE_TAG_PATTERN = reg_compile(r'\[.*?\]') def break_content_to_summary(content, format, size): if format & CONTENT_FORMAT_FLAG['bbcode']: content = BBCODE_TAG_PATTERN.sub(' ', content) if format == 0: content = escape(content) content = HTML_TAG_PATTERN.sub(' ', content)[:size].strip() amp_position = content[-6:].rfind('&') if amp_position > -1: content = content[:len(content) - 6 + amp_position] return content def parse_summary(size, flag): def break_content(content):
from helpers import * from re import compile as reg_compile from traceback import format_exc as print_traceback mentions = open_json_file("mentio", {}) # contains a list of keywords for each player (uuid) max_amount = 3 arrow = colorify(u"&r&7\u2192&r") colors_reg = reg_compile(u"\u00A7[\\da-fk-or]") # finds color codes def saveMentions(): save_json_file("mentio", mentions) @hook.event("player.AsyncPlayerChatEvent", "high") def onChat(event): if not event.isCancelled(): sender = event.getPlayer() words = event.getMessage().split(" ") recipients = event.getRecipients() # set of <Player>, may be a lazy or unmodifiable collection for recipient in list(recipients): recuid = uid(recipient) if recuid in mentions: keywords = mentions[uid(recipient)] else: # player keywords = [recipient.getName().lower(), stripcolors(recipient.getDisplayName()).lower()]
def extractUserAndBotNameFromMessage(self, username: str) -> Tuple[str, str]: regex = reg_compile(r'(.+)(?=\svia\s)\svia\s(.+)') return regex.search(username).groups()
def isAViaBotMessage(self, username: str) -> bool: regex = reg_compile(r'(.+)(?=\svia\s)\svia\s(.+)') return True if regex.search(username) else False
def _getMessage(text: str) -> str: ''' Extracts actual message sent by a certain user using regex ''' return reg_compile(r'\s-\s.+?(?=:):\s*').sub('', text)
# -*-coding: utf-8-*- """Python module to work with SF-56 spectrum files.""" from os.path import exists, isfile, basename from re import compile as reg_compile, split as reg_split from plotter import Chart, show_charts SF_COMMENT = '//' SF_EXTENSION = '.sf' SF_SEPARATOR_PTRN = reg_compile(r'\s+') def read_sf(filename, name=None): '''Reads spectrum file into dictionary. Note for sf files: first column value must be UNIQUE!''' spectrum = Chart(name or basename(filename)) with open(filename, 'r') as sf_file: for row in sf_file: row = validate_sf_row(row) if row: columns = reg_split(SF_SEPARATOR_PTRN, row) spectrum.insert(float(columns[0]) * 10**-9, float(columns[1])) return spectrum def validate_sf_row(row): '''Clears spaces chars and replaces commas to dots if given row is not a comment.''' return row.strip().replace(',', '.') if not row.startswith(SF_COMMENT) else None
def _getTimeFormatRegex() -> Pattern: ''' Returns regular expression for extracting AM/PM pattern from chat timestamp, where AM/PM could be prefixed with "\s" -> whitespace ''' return reg_compile(r'^(\s?[a|p]m)$', flags=IGNORECASE)
def __extract_state__(fromIt: str) -> str: match_obj = reg_compile(r'^mca_(\w+)_[0-9]{8,}\.csv$').match(fromIt) return match_obj.group(1).capitalize() if match_obj else None
def __getMessage__(text: str) -> str: return reg_compile(r'\s-\s.+?(?=:):\s*').sub('', text)
class PostModel: """ A class that represents a user post from social media """ def __init__( self, post_id: str, title: str, content: str, url: str, score: int, created_date: int, scope: str, has_external: bool, ): """ Initialises the post class :param post_id: he post id :param title: the title :param content: the content :param url: the post url :param score: the number of likes/ upvotes/ retweets :param created_date: a numeric representation of the date the post was created :param scope: the origin scope of the post :param has_external: a flag indicating if the post has external links """ self.post_id: str = self.get_id(scope, post_id) self.title: str = title self.content: str = content self.url: str = url self.score: int = score self.created_date: datetime = datetime.fromtimestamp(created_date) self.has_external: bool = has_external url_regex = reg_compile( r"^(https?://)?([\da-z.-]+)\.([a-z.]{2,6})([/\w .-]*)*/?") @staticmethod def get_id(source, post_id) -> str: """ A function to generate a new id :param source: the source from which the post was obtained :param post_id: the post id :return: the new id """ return f"{source}_{post_id}" def get_links(self) -> list: """ A function to check if the content has a link https://www.google.com http://www.google.com www.google.com www.google239.com/ayy/lmao?=123 :return: true IFF the content has a link """ return list(self.url_regex.findall(self.content)) def as_dict(self): """ A utility function to get the dict representation of the post object :return: """ dict_object = dict((key, value) for key, value in self.__dict__.items() if not callable(value) and not key.startswith("__")) dict_object["_id"] = self.post_id return dict_object
def __getUser__(text: str) -> str: matchObj = reg_compile(r'(?<=\s-\s).+?(?=:)').search(text) return matchObj.group() if matchObj else ''
from helpers import * from re import compile as reg_compile from traceback import format_exc as print_traceback mentions = open_json_file("mentio", {}) # contains a list of keywords for each player (uuid) max_amount = 1000 arrow = colorify(u"&r&7\u2192&r") colors_reg = reg_compile(u"\u00A7[\\da-fk-or]") # finds color codes def saveMentions(): save_json_file("mentio", mentions) @hook.event("player.AsyncPlayerChatEvent", "monitor") def onChat(event): if not event.isCancelled(): sender = event.getPlayer() words = event.getMessage().split(" ") recipients = event.getRecipients() # set of <Player>, may be a lazy or unmodifiable collection for recipient in list(recipients): recuid = uid(recipient) if recuid in mentions: keywords = mentions[uid(recipient)] else: # player keywords = [recipient.getName().lower(), stripcolors(recipient.getDisplayName()).lower()]
def replace_exclude(input_text, replace_text="", exclude_regex="[^a-zA-Z]"): """Replace all characters from input_text with provided replace_text, excluding specified in exclude_regex.""" replace_pattern = reg_compile(exclude_regex) return sub(replace_pattern, replace_text, input_text)
def input_reader(path: str) -> List[List[str]]: with open(path) as f: lines = f.read(-1).split() reg = reg_compile('e|se|sw|w|nw|ne') return [reg.findall(line) for line in lines]
def _getUser(text: str) -> str: ''' Helps in extracting participating user name from message ( text ) ''' matchObj = reg_compile(r'(?<=\s-\s).+?(?=:)').search(text) return matchObj.group() if matchObj else ''
def __getRegex__() -> Pattern: return reg_compile( r'(\d{1,2}/\d{1,2}/\d{2}, \d{1,2}\:\d{1,2} [a|p]m)')
from django.conf import settings from django.core.exceptions import ValidationError from django.urls.base import reverse import django.forms as forms def validate_orcid(orcid): if orcid: r = reg_search(settings.ORICD_REGEX, orcid) if not r or len(r.groups()) < 1: raise ValidationError('Invalid ORCID identifier.') _KEYWORD_REGEX = reg_compile(r'\bqa4sm\b', IGNORECASE) def validate_keywords(keywordlist): if not _KEYWORD_REGEX.search(keywordlist): raise ValidationError('Missing required keyword') class PublishingForm(forms.Form): title = forms.CharField(label='Title', widget=forms.Textarea(attrs={'rows': '2'}), help_text='Title of the Zenodo entry') description = forms.CharField(label='Description', widget=forms.Textarea(attrs={'rows': '2'}), help_text='Description of the Zenodo entry')