示例#1
0
    def __init__( self, 
                  workerId, 
                  logPath, 
                  nameserver, 
                  qin       = None, 
                  sqout     = None, 
                  eqout     = None,
                  mqout     = None,
                  metaQin   = None, 
                  metaQout  = None, 
                  geoip     = None ):
        """

               Create DNS Broker, Solicit's Nameservers for Queued host
            information before forwarding host object to aggregation 
            brokers. 
            
            @param int        workerId   - Worker Id
            @param String     logPath    - Path to log to
            @param NameServer nameserver - Namerserver object
            @param Queue      qin        - Input Queue
            @param Queue      sqout      - SqlBroker Input  Queue
            @param Queue      eqout      - Json Broker Output Queue
            @param Queue      mqout      - MX Broker Output Queue
            @param Queue      metaQin    - Meta Input Queue  (Used by menus)
            @param Queue      metaQout   - Meta Output Queue (Used by menus)
            @param Reader     geoip      - Initialized geoip2.database.Reader object

        """

        super( dnsBroker, self ).__init__( workerId      = workerId, 
                                           workerPurpose = "Probe",
                                           logPath       = logPath,
                                           qin           = qin, 
                                           metaQin       = metaQin,
                                           metaQout      = metaQout )

        self.state.update( {

            # DNS Probe
            'probe'   : Probe( workerId   = workerId, 
                               logPath    = logPath, 
                               nameserver = nameserver ),

            # Google MX Regex
            'rgmx'    : reg_compile( "([0-9]+)\s(.*\.google(?:mail)?\.com$)" ),

            # SPF Regex
            'rgspf'   : reg_compile( '^"v\=(spf[0-9].*)"$' ),
            
            # Output Queues
            'qout'    : [ sqout, eqout, mqout ],

            # GeoIp Db Wrapper
            'geoip'   : geoip,
 
        } )
示例#2
0
    def getConcatenatedMessagesForEachParticipant(self) -> Dict[str, str]:
        '''
            Concatenating all messages sent to this chat, by each participant
        '''
        _mediaOmitted = reg_compile(r'\<media\s+omitted\>', flags=IGNORECASE)
        _asciiWords = reg_compile(r'\w{2,}', flags=ASCII)

        return dict(map(lambda e: (e.name,
                                   '\n'.join(filter(lambda e: not e.isnumeric(),
                                                    _asciiWords.findall('\n'.join(
                                                        filter(lambda e: not _mediaOmitted.match(e),
                                                               map(lambda e: e.content, e.messages))))))), self.users))
示例#3
0
def classifyCompaniesUsingPinCodeOfRegisteredAddress(
        dataStream: chain) -> Counter:
    reg = reg_compile(r'(\d{6})')  # pincode extraction regular expression
    return Counter(
        map(
            lambda e: __extractPinCodeFromAddress__(
                reg, e.registeredOfficeAddress), dataStream))
示例#4
0
    def pub_metadata(self):
        if not self.is_valid():
            raise Exception("Invalid form content, unable to render metadata")

        keyword_delimiter = reg_compile("[\s]*,[\s]*")
        keywords = []

        if self.cleaned_data['keywords']:
            k = keyword_delimiter.split(self.cleaned_data['keywords'])
            keywords.extend(k)

        creator = {
            'name': self.cleaned_data['name'],
        }

        if self.cleaned_data['affiliation']:
            creator['affiliation'] = self.cleaned_data['affiliation']

        if self.cleaned_data['orcid']:
            creator['orcid'] = self.cleaned_data['orcid']

        data = {
            'title': self.cleaned_data['title'],
            'description': self.cleaned_data['description'],
            'keywords': keywords,
            'creators': [creator],
        }

        return data
示例#5
0
def load_answers():
    global answers
    answers = open_json_file("abot", [])

    # compile answers
    for answer in answers:
        answer["regex"] = [reg_compile(reg.lower()) for reg in answer["regex"]]
示例#6
0
 def _getRegex() -> Pattern:
     '''
         Regex to be used for extracting timestamp of
         a certain message from `*.txt` file
     '''
     return reg_compile(
         r'(\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}\:\d{1,2}(\s?[a|p]m)?)', flags=IGNORECASE)
示例#7
0
def _identify_base_from_name(name, base_class):
    # Check if string is empty
    if len(name) == 0:
        return

    # Use regex to clean the string from unwanted characters
    pattern = reg_compile("[^a-zA-Z0-9 ]")
    name = reg_sub(" {2,}", " ", pattern.sub('', name)).strip()

    # Add all matching bases to list
    results = base_class.get_bases_from_name(name)

    # If only one matching base
    if len(results) == 1:
        return results[0]

    # If not, we take only the bases which are in pool
    if len(results) > 1:
        results_2 = list()
        for base in results:
            if base.pool:
                results_2.append(base)
        # If only one matching base
        if len(results_2) == 1:
            return results_2[0]
示例#8
0
 def __init__(self, _max: str, departFromMax: str, _min: str,
              departFromMin: str, rainfall: str, relativeHumidityFirst: str,
              relativeHumidityFinal: str, sunset: str, sunrise: str,
              moonset: str, moonrise: str):
     self.timestamp = datetime.now().timestamp()
     reg = reg_compile(r'^(-?\d*\.?\d{1,})$')
     tmp = reg.search(_max)
     self.max = float(tmp.group()) if tmp else None
     tmp = reg.search(_min)
     self.min = float(tmp.group()) if tmp else None
     tmp = reg.search(departFromMax)
     self.departFromMax = float(tmp.group()) if tmp else None
     tmp = reg.search(departFromMin)
     self.departFromMin = float(tmp.group()) if tmp else None
     tmp = reg.search(rainfall)
     self.rainfall = float(tmp.group()) if tmp else None
     tmp = reg.search(relativeHumidityFirst)
     self.relativeHumidityAt08_30 = float(tmp.group()) if tmp else None
     tmp = reg.search(relativeHumidityFinal)
     self.relativeHumidityAt17_30 = float(tmp.group()) if tmp else None
     self.sunset = time(
         *[int(i.strip(), base=10) for i in sunset.split(':')])
     self.sunrise = time(
         *[int(i.strip(), base=10) for i in sunrise.split(':')])
     self.moonset = time(
         *[int(i.strip(), base=10) for i in moonset.split(':')])
     self.moonrise = time(
         *[int(i.strip(), base=10) for i in moonrise.split(':')])
示例#9
0
def load_answers():
    global answers
    answers = open_json_file("abot", [])

    # compile answers
    for answer in answers:
        answer["regex"] = [reg_compile(reg.lower()) for reg in answer["regex"]]
示例#10
0
 def pickByName(self, name: str) -> Station:
     possibleMatches = []
     reg = reg_compile(r'({})'.format(name), flags=I)
     for i in self.stations:
         _match = reg.search(i.name)
         if _match:
             possibleMatches.append((i.id, i.name))
     return possibleMatches
示例#11
0
 def __init__(self, date: str, _min: str, _max: str, img: str, stat: str):
     reg = reg_compile(r'^(-?\d*\.?\d{1,})$')
     self.date = date
     tmp = reg.search(_min)
     self.min = float(tmp.group()) if tmp else None
     tmp = reg.search(_max)
     self.max = float(tmp.group()) if tmp else None
     self.img = img
     self.stat = stat
示例#12
0
def getAllActivities(tree: BeautifulSoup) -> List[Tag]:
    reg = reg_compile(r'^(message[0-9]{1,})$')
    tmp = tree.findAll('div', attrs={'class': 'message default clearfix'})
    tmp.extend(
        tree.findAll('div', attrs={'class':
                                   'message default clearfix joined'}))
    tmp.extend([
        i for i in tree.findAll('div', attrs={'class': 'message service'})
        if reg.match(i.get('id'))
    ])
    return tmp
示例#13
0
 def _parse(content: str) -> Places:
     '''
         Parses HTML, using BeautifulSoup.
     '''
     cityIdReg = reg_compile(r'([\d]{1,5})$')
     base_url = 'http://city.imd.gov.in/citywx/'
     placesObj = Places([])
     state_name = reg_compile(r'^(##)$')
     station_name = reg_compile(r'^(city_weather.php\?id=[\d]{1,5})$')
     root = BeautifulSoup(content, features='lxml')
     currentState = None
     for i in root.findAll('a'):
         if state_name.match(i.get('href')):
             currentState = State(i.getText(), [])
             placesObj.push(currentState)
         if station_name.match(i.get('href')):
             currentState.push(
                 Station(i.getText(),
                         int(cityIdReg.search(i.get('href')).group()),
                         currentState.name, urljoin(base_url,
                                                    i.get('href'))))
     return placesObj
示例#14
0
class RedirectToHomeOrArticlePage(yui.RequestHandler):
    _PATTERN = reg_compile(r'tid-(\d+)(?:-page-\d+)?.html')

    @yui.client_cache(ARTICLE_CACHE_TIME, 'public')
    def get(self):
        match = RedirectToHomeOrArticlePage._PATTERN.match(
            self.request.query_string)
        if match:
            id = int(match.group(1))
            if id:
                article = Article.get_article_by_id(id)
                if article:
                    self.redirect(
                        BLOG_HOME_RELATIVE_PATH +
                        article.quoted_not_escaped_url(), 301)
                    return
        self.redirect('/', 301)
示例#15
0
def extractAllCompanyEmailProvider(dataStream: map) -> (Dict[str, int], int):
    # Extracts email service provider's name using regular expression

    def __getEmailProvider__(email: str) -> str:
        matchObj = reg.search(email)
        return matchObj.group().lower() if (matchObj) else None

    # Increments usage count email service provider & returns updated Dictionary
    def __updateCounter__(holder: Dict[str, int],
                          email: str) -> Dict[str, int]:
        '''
        return holder if not email else dict([(email, 1)] + [(k, v) for k, v in holder.items()]) if email not in holder else dict(
            [(k, v + 1) if k == email else (k, v) for k, v in holder.items()])
        '''
        if (email):
            holder.update({email: holder.get(email, 0) + 1})
        return holder

    # Keeps only top 5 elements ( having highest usage count ) in dictionary
    def __cleanupCounter__(holder: Dict[str, int],
                           count: int,
                           findTotal: bool = True) -> Dict[str, int]:
        nonlocal total
        total += sum(holder.values()) if findTotal else 0
        return dict(
            map(lambda v: (v, holder[v]),
                sorted(holder, key=lambda v: holder[v], reverse=True)[:count]))

    try:
        total = 0
        reg = reg_compile(r'(?<=@)[^.]+(?=\.)')
        # processes each state of India at a time & extracts top 5
        # email service providers, finally we calculate top 5
        # email service providers used by companies spread across different states of India
        return __cleanupCounter__(reduce(
            lambda acc, cur: __mergeTwoDicts__(
                acc,
                __cleanupCounter__(
                    reduce(
                        lambda acc, cur: __updateCounter__(
                            acc, __getEmailProvider__(cur.email)), cur, {}), 10
                )), dataStream, {}),
                                  10,
                                  findTotal=False), total
    except Exception:
        return None
示例#16
0
def parse_url2(s):
	timestamp, title = s.split(' ', 1)
	dt = datetime.utcfromtimestamp(int(timestamp))
	return dt.strftime('%Y/%m/%d/') + title

def parse_format(formats):
	formats = formats.split()
	return (int(formats[0]) and CONTENT_FORMAT_FLAG['html']) | (not int(formats[1]) and CONTENT_FORMAT_FLAG['bbcode'])

def join_list(list):
	return ','.join(list) if list else ''

def to_list(string):
	return string.split(',') if string else []

HTML_TAG_PATTERN = reg_compile('<.*?>')
BBCODE_TAG_PATTERN = reg_compile(r'\[.*?\]')

def break_content_to_summary(content, format, size):
	if format & CONTENT_FORMAT_FLAG['bbcode']:
		content = BBCODE_TAG_PATTERN.sub(' ', content)
	if format == 0:
		content = escape(content)
	content = HTML_TAG_PATTERN.sub(' ', content)[:size].strip()
	amp_position = content[-6:].rfind('&')
	if amp_position > -1:
		content = content[:len(content) - 6 + amp_position]
	return content

def parse_summary(size, flag):
	def break_content(content):
示例#17
0
from helpers import *
from re import compile as reg_compile
from traceback import format_exc as print_traceback


mentions   = open_json_file("mentio", {}) # contains a list of keywords for each player (uuid)
max_amount = 3
arrow      = colorify(u"&r&7\u2192&r")
colors_reg = reg_compile(u"\u00A7[\\da-fk-or]") # finds color codes


def saveMentions():
    save_json_file("mentio", mentions)


@hook.event("player.AsyncPlayerChatEvent", "high")
def onChat(event):
    if not event.isCancelled():
        sender     = event.getPlayer()
        words      = event.getMessage().split(" ")
        recipients = event.getRecipients() # set of <Player>, may be a lazy or unmodifiable collection

        for recipient in list(recipients):
            recuid = uid(recipient)

            if recuid in mentions:
                keywords = mentions[uid(recipient)]
            else:
                # player
                keywords = [recipient.getName().lower(), stripcolors(recipient.getDisplayName()).lower()]
示例#18
0
 def extractUserAndBotNameFromMessage(self, username: str) -> Tuple[str, str]:
     regex = reg_compile(r'(.+)(?=\svia\s)\svia\s(.+)')
     return regex.search(username).groups()
示例#19
0
 def isAViaBotMessage(self, username: str) -> bool:
     regex = reg_compile(r'(.+)(?=\svia\s)\svia\s(.+)')
     return True if regex.search(username) else False
示例#20
0
 def _getMessage(text: str) -> str:
     '''
         Extracts actual message sent by a certain user using regex
     '''
     return reg_compile(r'\s-\s.+?(?=:):\s*').sub('', text)
示例#21
0
# -*-coding: utf-8-*-
"""Python module to work with SF-56 spectrum files."""

from os.path import exists, isfile, basename
from re import compile as reg_compile, split as reg_split

from plotter import Chart, show_charts

SF_COMMENT = '//'
SF_EXTENSION = '.sf'
SF_SEPARATOR_PTRN = reg_compile(r'\s+')


def read_sf(filename, name=None):
    '''Reads spectrum file into dictionary.
	Note for sf files: first column value must be UNIQUE!'''
    spectrum = Chart(name or basename(filename))
    with open(filename, 'r') as sf_file:
        for row in sf_file:
            row = validate_sf_row(row)
            if row:
                columns = reg_split(SF_SEPARATOR_PTRN, row)
                spectrum.insert(float(columns[0]) * 10**-9, float(columns[1]))

    return spectrum


def validate_sf_row(row):
    '''Clears spaces chars and replaces commas to dots if given row is not a comment.'''
    return row.strip().replace(',',
                               '.') if not row.startswith(SF_COMMENT) else None
示例#22
0
 def _getTimeFormatRegex() -> Pattern:
     '''
         Returns regular expression for extracting AM/PM pattern
         from chat timestamp, where AM/PM could be prefixed with "\s" -> whitespace
     '''
     return reg_compile(r'^(\s?[a|p]m)$', flags=IGNORECASE)
示例#23
0
 def __extract_state__(fromIt: str) -> str:
     match_obj = reg_compile(r'^mca_(\w+)_[0-9]{8,}\.csv$').match(fromIt)
     return match_obj.group(1).capitalize() if match_obj else None
示例#24
0
 def __getMessage__(text: str) -> str:
     return reg_compile(r'\s-\s.+?(?=:):\s*').sub('', text)
示例#25
0
class PostModel:
    """
    A class that represents a user post from social media
    """
    def __init__(
        self,
        post_id: str,
        title: str,
        content: str,
        url: str,
        score: int,
        created_date: int,
        scope: str,
        has_external: bool,
    ):
        """
        Initialises the post class
        :param post_id: he post id
        :param title: the title
        :param content: the content
        :param url: the post url
        :param score: the number of likes/ upvotes/ retweets
        :param created_date: a numeric representation of the date the post was created
        :param scope: the origin scope of the post
        :param has_external: a flag indicating if the post has external links
        """
        self.post_id: str = self.get_id(scope, post_id)
        self.title: str = title
        self.content: str = content
        self.url: str = url
        self.score: int = score
        self.created_date: datetime = datetime.fromtimestamp(created_date)
        self.has_external: bool = has_external

    url_regex = reg_compile(
        r"^(https?://)?([\da-z.-]+)\.([a-z.]{2,6})([/\w .-]*)*/?")

    @staticmethod
    def get_id(source, post_id) -> str:
        """
        A function to generate a new id
        :param source: the source from which the post was obtained
        :param post_id: the post id
        :return: the new id
        """
        return f"{source}_{post_id}"

    def get_links(self) -> list:
        """
        A function to check if the content has a link
        https://www.google.com
        http://www.google.com
        www.google.com
        www.google239.com/ayy/lmao?=123
        :return: true IFF the content has a link
        """
        return list(self.url_regex.findall(self.content))

    def as_dict(self):
        """
        A utility function to get the dict representation of the post object
        :return:
        """
        dict_object = dict((key, value)
                           for key, value in self.__dict__.items()
                           if not callable(value) and not key.startswith("__"))
        dict_object["_id"] = self.post_id
        return dict_object
示例#26
0
 def __getUser__(text: str) -> str:
     matchObj = reg_compile(r'(?<=\s-\s).+?(?=:)').search(text)
     return matchObj.group() if matchObj else ''
示例#27
0
from helpers import *
from re import compile as reg_compile
from traceback import format_exc as print_traceback


mentions   = open_json_file("mentio", {}) # contains a list of keywords for each player (uuid)
max_amount = 1000
arrow      = colorify(u"&r&7\u2192&r")
colors_reg = reg_compile(u"\u00A7[\\da-fk-or]") # finds color codes


def saveMentions():
    save_json_file("mentio", mentions)


@hook.event("player.AsyncPlayerChatEvent", "monitor")
def onChat(event):
    if not event.isCancelled():
        sender     = event.getPlayer()
        words      = event.getMessage().split(" ")
        recipients = event.getRecipients() # set of <Player>, may be a lazy or unmodifiable collection

        for recipient in list(recipients):
            recuid = uid(recipient)

            if recuid in mentions:
                keywords = mentions[uid(recipient)]
            else:
                # player
                keywords = [recipient.getName().lower(), stripcolors(recipient.getDisplayName()).lower()]
示例#28
0
def replace_exclude(input_text, replace_text="", exclude_regex="[^a-zA-Z]"):
    """Replace all characters from input_text with provided replace_text, excluding specified in exclude_regex."""

    replace_pattern = reg_compile(exclude_regex)
    return sub(replace_pattern, replace_text, input_text)
示例#29
0
def input_reader(path: str) -> List[List[str]]:
    with open(path) as f:
        lines = f.read(-1).split()
    reg = reg_compile('e|se|sw|w|nw|ne')
    return [reg.findall(line) for line in lines]
示例#30
0
 def _getUser(text: str) -> str:
     '''
         Helps in extracting participating user name from message ( text )
     '''
     matchObj = reg_compile(r'(?<=\s-\s).+?(?=:)').search(text)
     return matchObj.group() if matchObj else ''
示例#31
0
 def __getRegex__() -> Pattern:
     return reg_compile(
         r'(\d{1,2}/\d{1,2}/\d{2}, \d{1,2}\:\d{1,2} [a|p]m)')
示例#32
0
from django.conf import settings
from django.core.exceptions import ValidationError
from django.urls.base import reverse

import django.forms as forms


def validate_orcid(orcid):
    if orcid:
        r = reg_search(settings.ORICD_REGEX, orcid)
        if not r or len(r.groups()) < 1:
            raise ValidationError('Invalid ORCID identifier.')


_KEYWORD_REGEX = reg_compile(r'\bqa4sm\b', IGNORECASE)


def validate_keywords(keywordlist):
    if not _KEYWORD_REGEX.search(keywordlist):
        raise ValidationError('Missing required keyword')


class PublishingForm(forms.Form):

    title = forms.CharField(label='Title',
                            widget=forms.Textarea(attrs={'rows': '2'}),
                            help_text='Title of the Zenodo entry')
    description = forms.CharField(label='Description',
                                  widget=forms.Textarea(attrs={'rows': '2'}),
                                  help_text='Description of the Zenodo entry')