Пример #1
0
    def check_for_match(self, pattern: re.Pattern) -> bool:
        match = []
        if self.title:
            match += pattern.findall(self.title.lower())

        if self.text:
            match += pattern.findall(self.text.lower())

        if match:
            logger.info('{0} - {1}: Match!!!'.format(self.id,
                                                     self.source_name))
            self.match_words = tools.delete_duplicates(match)
            return True
        else:
            return False
Пример #2
0
def parse_cpp(input_string: str, regex: re.Pattern = FRAME_REGEX) ->...:
    """
    this method parses the input_string using the regex pattern
    and compiles a list of Class objects containing every c++ class

    :input_string: a string containing c++ code.
    :regex: a regex pattern, to be used to parse c++ structs and classes
    :return: a list() containg Class() objects
    """
    classes = []
    for match in regex.findall(input_string):
        cpp_class = Class(match[1].strip(), [], [])
        # parse description
        cli_description = CLI_FLAG_REGEX.findall(match[0])
        if cli_description:
            cpp_class.doc_string.extend([
                line.strip()
                for line in COMMENT_REGEX.findall(cli_description[0])
            ])
        # parse function body
        for line in match[2].split('\n'):
            line = line.strip()
            if line.endswith((';', ',')):
                line = line[:-1]
            if not line or line.startswith('//'):
                continue
            cpp_class.members.append(line)
        classes.append(cpp_class)
    return classes
Пример #3
0
    def resolve_path(
        self,
        path: str,
        *,
        _rx: re.Pattern = re.compile("{([^}{]+)}"),  # {xxx} or {xxx:int}
        _default_schema: t.Dict[str, t.Any] = {"type": "string"},
    ) -> t.Tuple[str, t.Optional[t.Dict[str, t.Dict[str, t.Any]]]]:
        v = self._parsed_path_map.get(path)
        if v is not None:
            return v
        if "{" not in path:
            v = self._parsed_path_map[path] = (path, None)
            return v

        schemas = {}
        for pattern in _rx.findall(path):
            name = pattern.strip()
            schema = _default_schema
            if ":" in pattern:
                name, typ = name.split(":", 1)
                name = name.strip()
                typ = typ.strip()  # todo: lookup type
                schema = {"type": typ}  # xxx
                path = path.replace(pattern, name)
            schemas[name] = schema
        return path, schemas
Пример #4
0
def normalise_text_to_only_regex_matches(text: str, matcher: re.Pattern) -> str:
    """
    Filter out the characters in the text that do not match the given matcher

    :param text: str
    :param matcher: re.Pattern
    :return: str
    """
    return "".join(matcher.findall(text))
Пример #5
0
def process_url(url:str, pattern:re.Pattern, target:str) -> str:
    try:
        html = url_to_html(url) # Get html from a given url
        matches = pattern.findall(html) # Find all regex matches in html according to given pattern
        matches = [match for match in matches if target in match] # Filter the matches to those which contain a particular target string
        return ','.join(matches) if matches else 'none' # Convert this list of matches to a comma-separated string
    except HTTPError:
        return 'url not found'
    except ValueError:
        return 'not a valid web address'
Пример #6
0
    def _find_inner(partial_xmp: str, pattern: re.Pattern) -> str:
        match = pattern.findall(partial_xmp)

        # If called on a string but no match was found, findall() returns an empty list:
        if match:
            # Returns the whole match
            return match[0]
        else:
            raise XMPTagNotFoundError(
                "A tag pattern did not match with the XMP string. The tag may not exist."
            )
Пример #7
0
def findall_in_files(pattern: re.Pattern, filenames: List[str],
                     encoding: str) -> re.Match:
    """Generator"""
    for filename in filenames:
        logging.debug('util.findall_in_files: input file %s', filename)
        with open(filename, 'rb') as ifile:
            for match in pattern.findall(ifile.read().decode(encoding)):
                logging.debug(
                    'util.findall_in_files(): match: file = %s, text = %s',
                    filename, match)
                yield match
Пример #8
0
    def _fuzzy_id(self, regex: re.Pattern, text: str) -> str:
        """transform a sample id into fuzzy mode according the regex pattern

        Args:
            regex (re.Pattern): The info retains are in the capture patterns
            text (str): input sample id

        Returns:
            str: fuzzy mode sample id
        """
        matches = regex.findall(text)
        if matches:
            text = '_'.join(matches[0])

        return text
Пример #9
0
def parse_single_match(words: list, compiled_re: re.Pattern) -> float:
    """
    Loop through words and try to match them to compiled_re.
    Return match as a float.
    Note: current word is removed from original list.
    :param words: list of words
    :param compiled_re:
    :return: float volume
    """
    for i in range(len(words)):
        w = words[i].replace(",", ".")
        m = compiled_re.findall(w)
        if m:
            val = float(m[0])
            words.pop(i)
            return val
Пример #10
0
def parse_dup_match(words: list, compiled_re: re.Pattern, units: dict) -> float:
    """
    Loop through words and return a float, if a volume string (1dl, 0.5l etc) is found.
    Note: current word is removed from original list.
    :param words: list of words
    :param compiled_re:
    :param units:
    :return: float volume
    """
    for i in range(len(words)):
        w = words[i].replace(",", ".")
        m = compiled_re.findall(w)
        if m:
            val = float(m[0][0]) * units[m[0][1]]
            words.pop(i)
            return val
def _get_latest(
    html: str,
    pattern: re.Pattern,
    sort_key=t.Optional[t.Callable]
) -> t.Optional[t.Union[str, t.Tuple[str, ...]]]:
    match = pattern.findall(html)
    if not match:
        log.warning("%s did not match", pattern.pattern)
        return None
    if sort_key is None or len(match) == 1:
        result = match[0]
    else:
        log.debug("%s matched multiple times, selected latest",
                  pattern.pattern)
        result = max(match, key=sort_key)
    log.debug("%s matched %s", pattern.pattern, result)
    return result
Пример #12
0
    def replace(self, search_value: re.Pattern, replace_value: str, replace_replaced_words: bool = False):
        if not replace_replaced_words and self.search_value_contains_replaced_words(search_value, replace_value):
            return self
        replacing_word = self.word
        if search_value.search(self.word) is not None:
            replacing_word = search_value.sub(replace_value, self.word)
        collection = search_value.findall(self.word)
        replaced_words: List[str]
        if len(collection) > 1:
            replaced_words = list(map(lambda s: s.replace(s, replace_value), collection))
        else:
            replaced_words = []

        if replacing_word != self.word:
            for word in replaced_words:
                self.replaced_words.add(word)
            self.word = replacing_word
        return self
Пример #13
0
    def replace_with_func_single(self, search_value: re.Pattern, func: Callable[[], str], replace_replaced_words: bool = False):
        replace_value = func()
        if not replace_replaced_words and self.search_value_contains_replaced_words(search_value, replace_value):
            return self

        replacing_word = self.word
        if search_value.search(self.word) is not None:
            match = search_value.search(self.word).group()
            replacing_word = self.word.replace(match, replace_value)
        collection = search_value.findall(self.word)
        replaced_words: List[str]
        if len(collection) > 1:
            replaced_words = list(map(lambda s: s.replace(s, replace_value), collection))
        else:
            replaced_words = []
        if replacing_word != self.word:
            for word in replaced_words:
                self.replaced_words.add(word)
            self.word = replacing_word
        return self
Пример #14
0
def cut_part(
        text: str,
        split_pattern: re.Pattern,
        with_spliter: bool = True,
        with_offset: bool = False) -> list:
    """
    Cut text to parts by the given Regex Pattern.

    Parameters
    ----------
    text: raw text.
    split_pattern: how to split text.
    with_spliter: whether the parts contain spliters.
    with_offset: whether the parts contain offsets.

    Returns
    --------
    out: cutted parts.
    """
    spliters = split_pattern.findall(text)
    length = len(spliters)
    lst = []
    start = 0
    for i, part in enumerate(split_pattern.split(text)):
        if i < length:
            if with_spliter:
                part = part + spliters[i]
                len_spliter = 0
            else:
                len_spliter = len(spliters[i])
        else:
            len_spliter = 0
        end = start + len(part) + len_spliter
        if part:
            if with_offset:
                item = (part, start, end)
            else:
                item = part
            lst.append(item)
        start = end
    return lst
Пример #15
0
    def parseTimeFromRegex_(regex: re.Pattern, text: str) -> int:
        results = regex.findall(text)
        if len(results) == 0:
            return None
        if results[0][0] == '':
            return None

        hours = 0
        minutes = 0
        seconds = 0
        milliseconds = 0
        try:
            hours = int(results[0][0])
            minutes = int(results[0][1])
            seconds = float(results[0][2])
            milliseconds = float(results[0][3])
        except Exception:
            pass
        # 对于 timeColonFormatMilliseconds_ 来说 这里是匹配不到 milliseconds 的
        # 不过下一步计算的时候 由于seconds是小数 所以又修正了...

        return (milliseconds / 1000) + seconds + (minutes * 60) + (hours * 3600)
Пример #16
0
    def replace_with_func_multiple(self, search_value: re.Pattern, func: Callable[[str, str], str], replace_replaced_words: bool = False):
        if search_value.search(self.word) is None:
            return self
        word = self.word
        captures = search_value.search(word)
        replace_value = func(captures.group(1), captures.group(2))
        if not replace_replaced_words and self.search_value_contains_replaced_words(search_value, replace_value):
            return self
        replacing_word = self.word.replace(captures.group(0), replace_value)
        collection = search_value.findall(self.word)
        collection = list(flatten(collection))
        replaced_words: List[str]
        if len(collection) > 1:
            replaced_words = list(map(lambda s: s.replace(s, replace_value), collection))
        else:
            replaced_words = []

        if replacing_word != self.word:
            for word in replaced_words:
                self.replaced_words.add(word)
            self.word = replacing_word
        return self
Пример #17
0
def main(imgur_id: str, imgur_secret: str, imgur_refresh: str,
         reddit_secret: str, reddit_id: str, reddit_password: str,
         reddit_agent: str, reddit_username: str, latex: re.Pattern,
         context: re.Pattern, hypercontext: re.Pattern) -> None:
    """Runs the bot

    :param imgur_id: The Imgur client ID
    :type imgur_id: str
    :param imgur_secret: The Imgur client secret
    :type imgur_secret: str
    :param imgur_refresh: The Imgur client refresh token
    :type imgur_refresh: str
    :param reddit_secret:  The script secret
    :type reddit_secret: str
    :param reddit_id: The script ID
    :type reddit_id: str
    :param reddit_password: The bot account's password
    :type reddit_password: str
    :param reddit_agent: The script's user agent
    :type reddit_agent: str
    :param reddit_username: The bot account's username
    :type reddit_username: str
    :param latex: The pattern to match for a LaTeX expression
    :type latex: re.Pattern
    :param context: The pattern to match for a context
    :type context: re.Pattern
    :param hypercontext: The pattern to match for a hyperlink's context
    :type hypercontext: re.Pattern
    :raises ValueError: Any of the credentials are invalid
    """

    # Recursively starts bot in case of 503
    try:
        # Creates the Reddit client and the Imgur client
        r = reddit_client(reddit_secret, reddit_id, reddit_password,
                          reddit_agent, reddit_username)
        i = authenticate(imgur_id, imgur_secret, imgur_refresh)

        while True:
            # Inbox records all mentions in Reddit
            # This will use Reddit inbox's read/unread feature to keep track of processed comments
            for comment in praw.models.util.stream_generator(r.inbox.unread):
                contexts = []
                formulae = []
                ctx = []
                hyperctx = []
                # For each formula found, add to the list
                formulae.extend(latex.findall(comment.body))
                # Add context for each formula to the list
                contexts.extend(re.split(latex, comment.body))
                for content in contexts:
                    # Add primary contexts to a list
                    ctx.extend(context.findall(content))
                    # Add hyperlink contexts to a list
                    hyperctx.extend(hypercontext.findall(content))
                if formulae != []:
                    try:
                        with timeout(10):
                            form_comment(i, comment, formulae, ctx, hyperctx)

                    # This covers people making LaTeX renders that are too big
                    except Exception:
                        comment.mark_read()

    except Exception:
        time.sleep(60)
        main(imgur_id, imgur_secret, imgur_refresh, reddit_secret, reddit_id,
             reddit_password, reddit_agent, reddit_username, latex, context,
             hypercontext)
Пример #18
0
def find_matches(pattern: re.Pattern, input_str: str) -> bool:
    matches = pattern.findall(input_str)
    if len(matches) == 0:
        return False
    return True
Пример #19
0
 def _test_regex_findall_dict(self, regex: re.Pattern,
                              dct: Dict[str, List[str]]):
     for test, matches in dct.items():
         with self.subTest(test=test):
             self.assertEqual(matches, regex.findall(test))
Пример #20
0
 def find_tier(x: str, compiled_re: re.Pattern):
     matches = compiled_re.findall(x)
     if matches:
         return matches[0]
     return None