Пример #1
0
    def make_link(m: typing.Match) -> str:
        url = m.group(1)
        proto = m.group(2)
        if require_protocol and not proto:
            return url  # not protocol, no linkify

        if proto and proto not in permitted_protocols:
            return url  # bad protocol, no linkify

        href = m.group(1)
        if not proto:
            href = "http://" + href  # no proto specified, use http

        if callable(extra_params):
            params = " " + extra_params(href).strip()
        else:
            params = extra_params

        # clip long urls. max_len is just an approximation
        max_len = 30
        if shorten and len(url) > max_len:
            before_clip = url
            if proto:
                proto_len = len(proto) + 1 + len(m.group(3) or "")  # +1 for :
            else:
                proto_len = 0

            parts = url[proto_len:].split("/")
            if len(parts) > 1:
                # Grab the whole host part plus the first bit of the path
                # The path is usually not that interesting once shortened
                # (no more slug, etc), so it really just provides a little
                # extra indication of shortening.
                url = (
                    url[:proto_len]
                    + parts[0]
                    + "/"
                    + parts[1][:8].split("?")[0].split(".")[0]
                )

            if len(url) > max_len * 1.5:  # still too long
                url = url[:max_len]

            if url != before_clip:
                amp = url.rfind("&")
                # avoid splitting html char entities
                if amp > max_len - 5:
                    url = url[:amp]
                url += "..."

                if len(url) >= len(before_clip):
                    url = before_clip
                else:
                    # full url is visible on mouse-over (for those who don't
                    # have a status bar, such as Safari by default)
                    params += ' title="%s"' % href

        return u'<a href="%s"%s>%s</a>' % (href, params, url)
Пример #2
0
    def _replacement_map_func(match_obj: Match) -> str:
        """Maps the replace_from match to the replace_to string.

        :param match_obj: The replacement character as a regex match object,
            to be used as a key.
        return: The matching value, a string from the replacements dictionary.
        """

        # Preserve the spacing in group one, but swap the matched char(s)
        # with their replacement from the dict
        return match_obj.group(1) + replacement_dict[match_obj.group(2)]
Пример #3
0
 def _from_match( cls, match: Match,
     *, origin: Optional[str] = None,
 ) -> 'Target':
     path = RecordPath(match.group('path'))
     flags_group = match.group('flags')
     try:
         flags = FlagContainer.split_flags_string( flags_group,
             relative_flags=False )
     except FlagError as error:
         raise TargetError( "Error while parsing target '{}' flags."
             .format(match.group(0))) from error
     if any(flag.startswith('-') for flag in flags):
         raise RuntimeError
     return cls(path, flags, origin=origin)
Пример #4
0
 def _derive_from_match( self, match: Match,
     *, origin: Optional[str] = None,
 ) -> 'Target':
     try:
         subpath = RecordPath(self.path, match.group('path'))
     except RecordPathError as error:
         raise TargetError(self, match.group(0)) from error
     flags_group = match.group('flags')
     try:
         flags = FlagContainer.split_flags_string(flags_group)
     except FlagError as error:
         raise TargetError(self, match.group(0)) from error
     subflags = self.flags.delta_mixed(flags=flags, origin=origin)
     return self.__class__(subpath, subflags)
Пример #5
0
def escape_repl(m: Match, prefix: str) -> str:
    """Translate a string escape sequence, e.g. \t -> the tab character.

    Assume that the Match object is from escape_re.
    """
    
    seq = m.group(1)
    if len(seq) == 1 and seq in escape_map:
        # Single-character escape sequence, e.g. \n.
        return escape_map[seq]
    elif seq.startswith('x'):
        # Hexadecimal sequence \xNN.
        return chr(int(seq[1:], 16))
    elif seq.startswith('u'):
        # Unicode sequence \uNNNN.
        if 'b' not in prefix:
            return chr(int(seq[1:], 16))
        else:
            return '\\' + seq
    else:
        # Octal sequence.
        ord = int(seq, 8)
        if 'b' in prefix:
            # Make sure code is no larger than 255 for bytes literals.
            ord = ord % 256
        return chr(ord)
Пример #6
0
    def handle_thread(self, source: str,
                      match: Match) -> Generator[List[Content], None, None]:
        """Handles ``thread`` links to 4chan media.

        Args:
            source (str): The source url
            match (Match): The source match regex

        Yields:
            list[Content]: A list of various levels of quality content for \
                the same source url
        """

        matchdict = match.groupdict()
        data = self._get_data(matchdict["board"], matchdict["id"])
        for post in data.get("posts", []):
            if "md5" in post:
                content_list = []
                for (post_type, url_path, quality,
                     extension_type) in self._content_configs:
                    # build post_type depending on existing post_type
                    post_type = (f"-{post_type}" if post_type else "")
                    content_uid = (
                        f'{self.name}-{matchdict["board"]}-'
                        f'{matchdict["id"]}-{post["tim"]}{post_type}')
                    content_fragments = [
                        furl(self._img_base).add(path=url_path.format(
                            board=matchdict["board"], post=post)).url
                    ]
                    content_extension = (extension_type if extension_type else
                                         post["ext"].split(".")[-1])
                    content_description = None
                    if "com" in post and len(post["com"]) > 0:
                        content_description = HTML(html=post.get("com")).text

                    content_list.append(
                        Content(
                            uid=content_uid,
                            source=source,
                            fragments=content_fragments,
                            extractor=self,
                            extension=content_extension,
                            title=post.get("filename"),
                            description=content_description,
                            quality=quality,
                            uploaded_by=post.get("name"),
                            uploaded_date=datetime.datetime.fromtimestamp(
                                int(post.get("time"))),
                            metadata=post,
                        ))
                yield content_list
Пример #7
0
    def get_acronym_words_start(phrase: str, match: Match) -> int:
        """
        each acronym match should be preceded by capitalized words that start from the same letters
        :param phrase: "rompió el silencio tras ser despedido del Canal del Fútbol (CDF). "
        :param match: "(CDF)" Match object for this example
        :return: start letter (42 for this case) index or -1
        """
        proc = UniversalDefinitionsParser.basic_line_processor
        name = match.group().strip('() ').upper()
        start = match.start()
        words = proc.split_text_on_words(phrase[:start])
        if len(words) < 2:
            return -1

        mistakes = 0
        uppercases = 0
        acr_index = len(name) - 1
        acr_start = words[-1].start

        for i in range(len(words) - 1, -1, -1):
            if words[i].is_separator:
                continue
            l = words[i].text[0]
            l_upper = l.upper()
            is_upper = l_upper == l
            if is_upper:
                uppercases += 1
            is_correct = name[acr_index] == l_upper
            if not is_correct:
                mistakes += 1
                if mistakes > 1:
                    return -1
                continue
            acr_start = words[i].start
            acr_index -= 1
            if acr_index < 0:
                break
        return acr_start if uppercases > 1 and acr_index < 0 else -1
Пример #8
0
def handle(bot: Bot, event: events.TextMessage, match: typing.Match):
    if event.uid not in Config.whitelist_admin:
        return

    # Grab client_uid
    try:
        user = bot.exec_("clientgetnamefromdbid", cldbid=match.group(1))
        client_uid = user[0]["cluid"]
    except ts3.query.TS3QueryError:
        bot.send_message(event.id, "user_not_found")
        return

    try:
        json = fetch_api("account", api_key=match.group(2))
        account = models.Account.get_or_create(bot.session, json,
                                               match.group(2))
        identity: models.Identity = models.Identity.get_or_create(
            bot.session, client_uid)

        # Save api key in account
        account.api_key = match.group(2)
        account.is_valid = True
        bot.session.commit()

        transfer_registration(
            bot,
            account,
            event,
            is_admin=True,
            target_identity=identity,
            target_dbid=match.group(1),
        )
    except InvalidKeyException:
        logging.info("This seems to be an invalid API key.")
        bot.send_message(event.id, "invalid_token")
        return
    except (RateLimitException, RequestException, ApiErrBadData):
        bot.send_message(event.id, "error_api")
Пример #9
0
 def replace(match: Match) -> str:
     s = match.group(0)
     try:
         return ESCAPE_DICT[s]
     except KeyError:
         n = ord(s)
         if n < 0x10000:
             return '\\u%04x' % (n, )
         else:
             # surrogate pair
             n -= 0x10000
             s1 = 0xd800 | ((n >> 10) & 0x3ff)
             s2 = 0xdc00 | (n & 0x3ff)
             return '\\u%04x\\u%04x' % (s1, s2)
Пример #10
0
    def run(self, m: Match, file_reader: FileReader) -> bool:
        if file_reader.status:
            return False
        file_reader.status = 'axiom'
        axiom = Axiom(sideBar=True)
        axiom.name = m.group(1).strip()
        file_reader.objects.append(axiom)

        def normal_line(file_reader: FileReader, line: str) -> None:
            axiom.append(line)

        file_reader.normal_line_handler = normal_line
        file_reader.blank_line_handler = normal_line
        return True
Пример #11
0
    def run(self, m: Match, file_reader: FileReader) -> bool:
        if file_reader.status:
            return False
        file_reader.status = 'tactic'
        tactic = Tactic(sideBar=True)
        tactic.name = m.group(1).strip()
        file_reader.objects.append(tactic)

        def normal_line(file_reader: FileReader, line: str) -> None:
            tactic.append(line)

        file_reader.normal_line_handler = normal_line
        file_reader.blank_line_handler = normal_line
        return True
Пример #12
0
    def hyperlink(match: Match) -> str:
        end_text = ''
        objname = match.group(0)
        if objname.endswith('.'):
            end_text = objname[-1]
            objname = objname[:-1]
        _, short_objname = objname.split('.', 1)

        # Try importing the string, to make sure it's not pointing at
        # a symbol that doesn't actually exist.
        import_string(objname)

        return '<a href="{}reference.html#{}"><code>{}</code></a>{}'.format(
            settings.DOCS_URL, objname, short_objname, end_text)
Пример #13
0
    def _is_valid_match(self, match: Match) -> bool:
        is_valid_tld = False
        is_ip_url = RegExpUtility.get_group(match, 'IPurl')

        if not is_ip_url:
            tld_string = RegExpUtility.get_group(match, 'Tld')
            tld_matches = self.tld_matcher().find(tld_string)
            if any(o.start == 0 and o.end == len(tld_string) for o in tld_matches):
                is_valid_tld = True

        # For cases like "7.am" or "8.pm" which are more likely time terms.
        if re.match(self.ambiguous_time_term.re, match.group(0)) is not None:
            return False
        return is_valid_tld or is_ip_url
Пример #14
0
def _get_numbered_group(match: typing.Match,
                        name: str,
                        start: int = 0) -> typing.Optional[str]:
    groups = match.groupdict()
    idx = start

    while True:
        key = f'{name}{idx}'
        if key not in groups:
            return None
        if groups[key] is not None:
            return groups[key]
        idx += 1
    return None
Пример #15
0
def extract_entity_attributes(match: Match) -> EntityAttributes:
    """Extract the entity attributes, i.e. type, value, etc., from the
    regex match.

    Args:
        match: Regex match to extract the entity attributes from.

    Returns:
        EntityAttributes object.
    """
    entity_text = match.groupdict()[GROUP_ENTITY_TEXT]

    if match.groupdict()[GROUP_ENTITY_DICT]:
        return extract_entity_attributes_from_dict(entity_text, match)

    entity_type = match.groupdict()[GROUP_ENTITY_TYPE]

    if match.groupdict()[GROUP_ENTITY_VALUE]:
        entity_value = match.groupdict()[GROUP_ENTITY_VALUE]
    else:
        entity_value = entity_text

    return EntityAttributes(entity_type, entity_value, entity_text, None, None)
Пример #16
0
    def wombojify_repl(cls, m: Match) -> str:
        s_tag = '<img class="emoji emoji--large" alt="'

        m1_tag = '" title="'
        m2_tag = '" src="'
        e_tag = '">'
        if m.group('animated'):
            # animated emoji
            emoji_url = f"https://cdn.discordapp.com/emojis/{m.group('id')}.gif"
        else:
            emoji_url = f"https://cdn.discordapp.com/emojis/{m.group('id')}.png"

        replacement = f"{s_tag}{m.group('name')}{m1_tag}{m.group('name')}{m2_tag}{emoji_url}{e_tag}"
        return replacement
Пример #17
0
def _starts_ends_overall(
        m: Match) -> Tuple[MatchIndexes, MatchIndexes, MatchIndexes]:
    """
    Extracts indices from a match object.

    Returns

    (groupstarts, groupends, [overall_start, overall_end])

    >>> m = re.match(r'.(.)', 'abc')
    >>> _starts_ends_overall(m)
    ([1], [2], [0, 2])
    >>> m = re.match(r'.', 'abc')
    >>> _starts_ends_overall(m)
    ([], [], [0, 1])
    """
    overall_start, overall_end = m.span()
    n_matches = len(m.groups())

    spans = [m.span(n) for n in range(1, n_matches + 1)]
    starts = [span[0] for span in spans]
    ends = [span[1] for span in spans]
    return starts, ends, [overall_start, overall_end]
Пример #18
0
 def fixup(m: Match):
     text = m.group(0)
     if text[:2] == "&#":
         # character reference
         with cl.suppress(ValueError):
             if text[:3] == "&#x":
                 return chr(int(text[3:-1], 16))
             else:
                 return chr(int(text[2:-1]))
     else:
         # named entity
         with cl.suppress(KeyError):
             text = chr(html.entities.name2codepoint[text[1:-1]])
     return text  # leave as is
Пример #19
0
def _upper_zero_group(match: typing.Match, /) -> str:
    """
    Поднимает все символы в верхний
    регистр у captured-группы `let`. Используется
    для конвертации snake_case в camelCase.

    Arguments:
      match: Регекс-группа, полученная в результате `re.sub`

    Returns:
        Ту же букву из группы, но в верхнем регистре

    """
    return match.group("let").upper()
 def inline_obj(
     self,
     match: Match,
     lineno: int,
     end_pattern: Pattern,
     nodeclass: nodes.TextElement,
     restore_backslashes: bool = False,
 ):
     """Create the node for an inline class, if the end string match can be found."""
     string = match.string
     matchstart = match.start("start")
     matchend = match.end("start")
     if self.quoted_start(match):
         return (string[:matchend], [], string[matchend:], [], "")
     endmatch = end_pattern.search(string[matchend:])
     if endmatch and endmatch.start(1):  # 1 or more chars
         _text = endmatch.string[: endmatch.start(1)]
         text = unescape(_text, restore_backslashes)
         textend = matchend + endmatch.end(1)
         rawsource = unescape(string[matchstart:textend], True)
         node = nodeclass(rawsource, text)
         node[0].rawsource = unescape(_text, True)
         return (
             string[:matchstart],
             [node],
             string[textend:],
             [],
             endmatch.group(1),
         )
     msg = self.reporter.warning(
         "Inline %s start-string without end-string." % nodeclass.__name__,
         line=lineno,
     )
     text = unescape(string[matchstart:matchend], True)
     rawsource = unescape(string[matchstart:matchend], True)
     prb = self.problematic(text, rawsource, msg)
     return string[:matchstart], [prb], string[matchend:], [msg], ""
Пример #21
0
def _convert_entity(m: typing.Match) -> str:
    if m.group(1) == "#":
        try:
            if m.group(2)[:1].lower() == 'x':
                return chr(int(m.group(2)[1:], 16))
            else:
                return chr(int(m.group(2)))
        except ValueError:
            return "&#%s;" % m.group(2)
    try:
        return _HTML_UNICODE_MAP[m.group(2)]
    except KeyError:
        return "&%s;" % m.group(2)
def _convert_entity(m: typing.Match) -> str:
    if m.group(1) == "#":
        try:
            if m.group(2)[:1].lower() == "x":
                return chr(int(m.group(2)[1:], 16))
            else:
                return chr(int(m.group(2)))
        except ValueError:
            return "&#%s;" % m.group(2)
    try:
        return _HTML_UNICODE_MAP[m.group(2)]
    except KeyError:
        return "&%s;" % m.group(2)
Пример #23
0
 def get_object_html(cls,
                     match: Match,
                     use_preretrieved_html: bool = False) -> str:
     """Return the obj's HTML based on a placeholder in the admin."""
     if use_preretrieved_html:
         # Return the pre-retrieved HTML (already included in placeholder)
         preretrieved_html = match.group(PlaceholderGroups.HTML)
         if preretrieved_html:
             return preretrieved_html.strip()
     quote = cls.objects.get(pk=match.group(PlaceholderGroups.PK))
     if isinstance(quote, dict):
         body = quote['text']
         footer = quote.get('citation_html') or quote.get(
             'attributee_string')
     else:
         body = quote.text.html
         footer = quote.citation_html or quote.attributee_string
     return (
         f'<blockquote class="blockquote">'
         f'{body}'
         f'<footer class="blockquote-footer" style="position: relative;">'
         f'{footer}'
         f'</footer>'
         f'</blockquote>')
Пример #24
0
    def _graft_match(
        self,
        graft: pvproject.Graft,
        fbytes: bytes,
        match: Match,
        offset: int,
        project: 'pvproject.Project',
    ) -> Tuple[bytes, int]:
        """
        :param graft:
            a graft with a non-null :attr:`pvproject.Graft.subst`
        :return:
            the substituted fbytes
        """
        subst = graft.subst_resolved(project)
        if subst is not None:
            mstart, mend = match.span()
            new_text = match.expand(subst)
            head = fbytes[:mstart + offset]
            tail = fbytes[mend + offset:]
            fbytes = head + new_text + tail
            offset += len(new_text) - (mend - mstart)

        return fbytes, offset
Пример #25
0
 def from_match(cls, match: Match, mail_dtime: datetime, ttype: TransType) -> 'Transaction':
     groups_dict = match.groupdict()
     transaction = cls.__new__(cls)
     kwargs = {
         'ttype': ttype,
         'mail_dtime': mail_dtime,
     }
     if 'nat' in groups_dict:
         kwargs['amount'] = float(groups_dict['nat'].replace('.', '').replace(',', '.'))
     if 'int' in groups_dict:
         kwargs['int_amount'] = float(groups_dict['int'].replace('.', '').replace(',', '.'))
     if 'rate' in groups_dict:
         kwargs['rate'] = float(groups_dict['rate'].replace('.', '').replace(',', '.'))
     transaction.__init__(**kwargs)
     return transaction
Пример #26
0
async def reassign_callback(event: slack_util.Event, match: Match) -> None:
    verb = slack_util.VerboseWrapper(event)

    # Find out our two targets
    from_name = match.group(1).strip()
    to_name = match.group(2).strip()

    # Get them as brothers
    from_bro = await verb(scroll_util.find_by_name(from_name, MIN_RATIO))
    to_bro = await verb(scroll_util.find_by_name(to_name, MIN_RATIO))

    # Score by name similarity to the first brother. Don't care if signed off or not,
    # as we want to be able to transfer even after signoffs (why not, amirite?)
    def scorer(assign: house_management.JobAssignment):
        if assign.assignee is not None:
            r = fuzz.ratio(from_bro.name, assign.assignee.name)
            if r > MIN_RATIO:
                return r

    # Change the assignee
    async def modifier(context: _ModJobContext):
        context.assign.assignee = to_bro

        # Say we did it
        reassign_msg = "Job {} reassigned from {} to {}".format(
            context.assign.job.pretty_fmt(), from_bro, to_bro)
        client.get_slack().reply(event, reassign_msg)

        # Tell the people
        reassign_msg = "Job {} reassigned from {} to {}".format(
            context.assign.job.pretty_fmt(), from_bro, to_bro)
        await alert_user(from_bro, reassign_msg)
        await alert_user(to_bro, reassign_msg)

    # Fire it off
    await _mod_jobs(event, scorer, modifier)
Пример #27
0
def process_amp(data: TreeManager,
                spec: TreeManager,
                properties: PropertyManager,
                match: Match,
                lookup_offset=0) -> str:
    """
    Process Ampersand matches and replace the & expression with the resolved value

    match: is the regular expression match and groups should 3 match groups &{0} | &({0},{0})
    """
    # Process the [] operator
    if match.group()[0] == '[':
        rematch = re.match(r'\[#([0-9]+)\]', match.group())
        # Default to "up 2 levels" which uses data[:-1] to generate the key value for the array
        t = tuple(data.path[:-int(rematch.groups()[0]) +
                            1]) if rematch else tuple(data.path[:-1]), tuple(
                                spec.path[:-1])
        # Use a default dict in the property class to return the index
        return properties[t].array_bind[data.current_key]
    elif match.group()[0] == '\\':
        # Catch the case where \ is used to escape an operator []@#$& or \ itself
        return match.group()[1:]

    ascend = int(match.groups()[0] or match.groups()[1] or 0) - lookup_offset
    descend = int(match.groups()[2] or 0) if (
        match.groups()[2] or '0').isnumeric() else match.groups()[2]

    # Return the processed &,@ pattern result by ascending and descending the data tree
    if isinstance(descend, int):
        if descend == 0:
            return get_operator_value(data.ascend(ascend), spec.ascend(ascend),
                                      properties, match)
        return properties[data.ascend(ascend).path].matches[descend]
    elif isinstance(descend, str):
        # Spec is not defined for string key descent
        return get_operator_value(
            data.ascend(ascend - 1)[descend], None, properties, match)
    elif isinstance(descend, list):
        return reduce(operator.getitem, [data.ascend(ascend)] + descend)
    raise JoltException()
    def replace_contraction_matches(contraction_match: Match) -> str:
        """
        Replaces contraction matches (used as argument to re.sub).

        Parameters
        ----------
        contraction_match : re.Match
            Contraction regex match.

        Returns
        -------
        match_result : str
            Fixed string (mapping from contraction match).
        """
        match = contraction_match.group(0).lower()
        return contractions_dict_lower.get(match)
Пример #29
0
def _linkify(match: Match, link: Callable[..., str], module: pdoc.Module,
             _is_pyident=re.compile(r'^[a-zA-Z_]\w*(\.\w+)+$').match, **kwargs):
    matched = match.group(0)
    refname = matched.strip('`')
    dobj = module.find_ident(refname)
    if isinstance(dobj, pdoc.External):
        if not _is_pyident(refname):
            return matched
        # If refname in documentation has a typo or is obsolete, warn.
        # XXX: Assume at least the first part of refname, i.e. the package, is correct.
        module_part = module.find_ident(refname.split('.')[0])
        if not isinstance(module_part, pdoc.External):
            warn('Code reference `{}` in module "{}" does not match any '
                 'documented object.'.format(refname, module.refname),
                 ReferenceWarning, stacklevel=3)
    return link(dobj, **kwargs)
Пример #30
0
def _expand_dollars(match: T.Match) -> str:
    group = match.group(1)
    parts = group.split(".")
    dollars = int(parts[0]) if parts[0] else 0
    cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
    if dollars and cents:
        dollar_unit = "dollar" if dollars == 1 else "dollars"
        cent_unit = "cent" if cents == 1 else "cents"
        return "%s %s, %s %s" % (dollars, dollar_unit, cents, cent_unit)
    if dollars:
        dollar_unit = "dollar" if dollars == 1 else "dollars"
        return "%s %s" % (dollars, dollar_unit)
    if cents:
        cent_unit = "cent" if cents == 1 else "cents"
        return "%s %s" % (cents, cent_unit)
    return "zero dollars"
Пример #31
0
def _expand_number(m: Match) -> str:
    num = int(m.group(0))
    if num > 1000 and num < 3000:
        if num == 2000:
            return 'two thousand'
        elif num > 2000 and num < 2010:
            return 'two thousand ' + _inflect.number_to_words(num % 100)
        elif num % 100 == 0:
            return _inflect.number_to_words(num // 100) + ' hundred'
        else:
            return _inflect.number_to_words(num,
                                            andword='',
                                            zero='oh',
                                            group=2).replace(', ', ' ')
    else:
        return _inflect.number_to_words(num, andword='')
Пример #32
0
    def _map_token_to_re(cls, token_match: ReMatch,
                         indices: Set[AnyStr]) -> AnyStr:
        index, modifier = token_match.group("index", "modifier")

        if index in indices:
            cls._raise_on_duplicate_token_index(token_match)
        else:
            indices.add(index)

        regex = f"(?P<{_TOKEN_GROUP_PREFIX}{index}>%s)"
        if not modifier:
            return regex % ".*?"
        if modifier == "G":
            return regex % ".*"
        elif modifier.startswith("S"):
            total_spaces = modifier[1:]
            return regex % fr"(?:\S*\s\S*){{{total_spaces}}}\S*"
Пример #33
0
def md_codeblock(match: typing.Match) -> str:
    """Substitution method to replace markdown code blocks with pygmented HTML.

    Should be called from substition (sub) regex method.

    Args:
        match: matched block

    Returns:
        A string containing the highlighted (HTML) code block.
    """
    lang, code = match.groups()
    try:
        lexer = pygments.lexers.get_lexer_by_name(lang)
    except ValueError:
        lexer = pygments.lexers.TextLexer()
    return pygments.highlight(code, lexer, FORMATTER)
Пример #34
0
def datetime_from_string(o: Match) -> datetime.datetime:
    date = date_from_string(o)
    time = time_from_string(o)
    tz = o.group('tz')
    tzi: Optional[datetime.timezone]
    if tz == 'Z':
        tzi = datetime.timezone.utc
    elif tz:
        td = datetime.timedelta(hours=int(tz[1:3]), minutes=int(tz[4:6]))
        if tz[0] == '-':
            td = -td
        tzi = datetime.timezone(td)
    else:
        tzi = None
    rv = datetime.datetime(date.year, date.month, date.day, time.hour,
                           time.minute, time.second, time.microsecond, tzi)
    return rv
Пример #35
0
 def peek_quoted_part(match: Match,
                      start_func: Callable[[Match, Match], int],
                      end_func: Callable[[Match, Match], int],
                      match_prob: int) -> List[DefinitionMatch]:
     defs = []
     text = match.group()
     quoted_entries = [m for m in CommonDefinitionPatterns.reg_quoted.finditer(text)]
     if len(quoted_entries) == 0:
         return defs
     for entry in quoted_entries:
         df = DefinitionMatch()
         df.name = entry.group()
         df.start = start_func(match, entry)
         df.end = end_func(match, entry)
         df.probability = match_prob
         defs.append(df)
     return defs
Пример #36
0
def _instantiate_matched(match: Match, group_name: str, inclusion: str) -> Tuple[str, str]:
    num_perc_s = group_name.count("%s")
    matches = [g or "" for g in match.groups()]

    if len(matches) < num_perc_s:
        raise RuntimeError("Invalid entry in inventory_logwatch_groups: group name "
                           "%r contains %d times '%%s', but regular expression "
                           "%r contains only %d subexpression(s)." %
                           (group_name, num_perc_s, inclusion, len(matches)))

    if not matches:
        return group_name, inclusion

    for num, group in enumerate(matches):
        inclusion = eval_regex.instantiate_regex_pattern_once(inclusion, group)
        group_name = group_name.replace("%%%d" % (num + 1), group)
    return group_name % tuple(matches[:num_perc_s]), inclusion
Пример #37
0
def lex_olist(m: Match) -> Optional[Tuple[str, int]]:
	"""
	Attempt to parse a numeral on the list item, be it decimal, roman or alphabetical
	returns list_type, number
	"""
	# TODO: support for non-latin alphabet numbering? HTML doesn't seem to support it
	_, numeral = m.groups()
	try:
		return '1', int(numeral)  # is it an integer?
	except ValueError:
		try:
			value = from_roman(numeral.upper())  # is it a roman numeral?
			case = 'i' if numeral.lower() == numeral else 'I'
			return case, value
		except InvalidRomanNumeralError:
			value = 0  # is it just a letter?
			for char in numeral:
				if char not in string.ascii_letters:
					return None
				value = value * 26 + (string.ascii_lowercase.index(char.lower()) + 1)
			case = 'a' if numeral.lower() == numeral else 'A'
			return case, value
Пример #38
0
 def link_to(matchobj: Match) -> str:
     return r"%s<a href='?%s' class='nocode'>%s</a>%s" % (
         matchobj.group(1),
         self.req_qs(link, use_stored=False),
         e_html(link),
         matchobj.group(1))
Пример #39
0
 def from_match(cls, match: Match) -> 'RouteNode':
     place = match.group('place').strip(QUOTES_AND_WHITESPACE)
     visited = bool(match.group('strikethrough'))
     skipped = match.group('skipped') is not None
     skip_reason = match.group('skip_reason') or ('' if skipped else None)
     return cls(name=place, visited=visited, skip_reason=skip_reason)
Пример #40
0
 def substitute_one(m:Match):
     name=m.group(1)
     if name not in args:
         return m.group()
     return self._to_rdf(args[name], prefixes).n3()