Exemplo n.º 1
0
    def __call__(self, value):
        if not value or '@' not in value:
            raise ValidationError(self.message,
                                  code=self.code,
                                  params={'value': value})

        user_part, domain_part = value.rsplit('@', 1)

        if not self.user_regex.match(user_part):
            raise ValidationError(self.message,
                                  code=self.code,
                                  params={'value': value})

        if (domain_part not in self.domain_allowlist
                and not self.validate_domain_part(domain_part)):
            # Try for possible IDN domain-part
            try:
                domain_part = punycode(domain_part)
            except UnicodeError:
                pass
            else:
                if self.validate_domain_part(domain_part):
                    return
            raise ValidationError(self.message,
                                  code=self.code,
                                  params={'value': value})
Exemplo n.º 2
0
def smart_urlquote(url):
    """Quote a URL if it isn't already quoted."""
    def unquote_quote(segment):
        segment = unquote(segment)
        # Tilde is part of RFC3986 Unreserved Characters
        # https://tools.ietf.org/html/rfc3986#section-2.3
        # See also https://bugs.python.org/issue16285
        return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + '~')

    # Handle IDN before quoting.
    try:
        scheme, netloc, path, query, fragment = urlsplit(url)
    except ValueError:
        # invalid IPv6 URL (normally square brackets in hostname part).
        return unquote_quote(url)

    try:
        netloc = punycode(netloc)  # IDN -> ACE
    except UnicodeError:  # invalid domain part
        return unquote_quote(url)

    if query:
        # Separately unquoting key/value, so as to not mix querystring separators
        # included in query values. See #22267.
        query_parts = [(unquote(q[0]), unquote(q[1]))
                       for q in parse_qsl(query, keep_blank_values=True)]
        # urlencode will take care of quoting
        query = urlencode(query_parts)

    path = unquote_quote(path)
    fragment = unquote_quote(fragment)

    return urlunsplit((scheme, netloc, path, query, fragment))
Exemplo n.º 3
0
def sanitize_address(addr, encoding):
    """
    Format a pair of (name, address) or an email address string.
    """
    address = None
    if not isinstance(addr, tuple):
        addr = force_str(addr)
        try:
            token, rest = parser.get_mailbox(addr)
        except (HeaderParseError, ValueError, IndexError):
            raise ValueError('Invalid address "%s"' % addr)
        else:
            if rest:
                # The entire email address must be parsed.
                raise ValueError(
                    'Invalid address; only %s could be parsed from "%s"' %
                    (token, addr))
            nm = token.display_name or ""
            localpart = token.local_part
            domain = token.domain or ""
    else:
        nm, address = addr
        localpart, domain = address.rsplit("@", 1)

    nm = Header(nm, encoding).encode()
    # Avoid UTF-8 encode, if it's possible.
    try:
        localpart.encode("ascii")
    except UnicodeEncodeError:
        localpart = Header(localpart, encoding).encode()
    domain = punycode(domain)

    parsed_address = Address(nm, username=localpart, domain=domain)
    return str(parsed_address)
Exemplo n.º 4
0
    def __call__(self, value):
        if not isinstance(value, str):
            raise ValidationError(self.message,
                                  code=self.code,
                                  params={'value': value})
        if self.unsafe_chars.intersection(value):
            raise ValidationError(self.message,
                                  code=self.code,
                                  params={'value': value})
        # Check if the scheme is valid.
        scheme = value.split('://')[0].lower()
        if scheme not in self.schemes:
            raise ValidationError(self.message,
                                  code=self.code,
                                  params={'value': value})

        # Then check full URL
        try:
            splitted_url = urlsplit(value)
        except ValueError:
            raise ValidationError(self.message,
                                  code=self.code,
                                  params={'value': value})
        try:
            super().__call__(value)
        except ValidationError as e:
            # Trivial case failed. Try for possible IDN domain
            if value:
                scheme, netloc, path, query, fragment = splitted_url
                try:
                    netloc = punycode(netloc)  # IDN -> ACE
                except UnicodeError:  # invalid domain part
                    raise e
                url = urlunsplit((scheme, netloc, path, query, fragment))
                super().__call__(url)
            else:
                raise
        else:
            # Now verify IPv6 in the netloc part
            host_match = re.search(r'^\[(.+)\](?::\d{1,5})?$',
                                   splitted_url.netloc)
            if host_match:
                potential_ip = host_match[1]
                try:
                    validate_ipv6_address(potential_ip)
                except ValidationError:
                    raise ValidationError(self.message,
                                          code=self.code,
                                          params={'value': value})

        # The maximum length of a full host name is 253 characters per RFC 1034
        # section 3.1. It's defined to be 255 bytes or less, but this includes
        # one byte for the length of the name and one byte for the trailing dot
        # that's used to indicate absolute names in DNS.
        if splitted_url.hostname is None or len(splitted_url.hostname) > 253:
            raise ValidationError(self.message,
                                  code=self.code,
                                  params={'value': value})
Exemplo n.º 5
0
 def handle_word(
     self,
     word,
     *,
     safe_input,
     trim_url_limit=None,
     nofollow=False,
     autoescape=False,
 ):
     if "." in word or "@" in word or ":" in word:
         # lead: Punctuation trimmed from the beginning of the word.
         # middle: State of the word.
         # trail: Punctuation trimmed from the end of the word.
         lead, middle, trail = self.trim_punctuation(word)
         # Make URL we want to point to.
         url = None
         nofollow_attr = ' rel="nofollow"' if nofollow else ""
         if self.simple_url_re.match(middle):
             url = smart_urlquote(html.unescape(middle))
         elif self.simple_url_2_re.match(middle):
             url = smart_urlquote("http://%s" % html.unescape(middle))
         elif ":" not in middle and self.is_email_simple(middle):
             local, domain = middle.rsplit("@", 1)
             try:
                 domain = punycode(domain)
             except UnicodeError:
                 return word
             url = self.mailto_template.format(local=local, domain=domain)
             nofollow_attr = ""
         # Make link.
         if url:
             trimmed = self.trim_url(middle, limit=trim_url_limit)
             if autoescape and not safe_input:
                 lead, trail = escape(lead), escape(trail)
                 trimmed = escape(trimmed)
             middle = self.url_template.format(
                 href=escape(url),
                 attrs=nofollow_attr,
                 url=trimmed,
             )
             return mark_safe(f"{lead}{middle}{trail}")
         else:
             if safe_input:
                 return mark_safe(word)
             elif autoescape:
                 return escape(word)
     elif safe_input:
         return mark_safe(word)
     elif autoescape:
         return escape(word)
     return word
Exemplo n.º 6
0
    def __call__(self, value):
        # Check first if the scheme is valid
        split = value.split('://')
        if (len(split) > 1):
            # in words, there was a split
            scheme = split[0].lower()
            if scheme not in self.schemes:
                raise ValidationError(self.message, code=self.code)

        # Then check full URL
        try:
            super().__call__(value)
        except ValidationError as e:
            # Trivial case failed. Try for possible IDN domain
            if value:
                try:
                    scheme, netloc, path, query, fragment = urlsplit(value)
                except ValueError:  # for example, "Invalid IPv6 URL"
                    raise ValidationError(self.message, code=self.code)
                try:
                    netloc = punycode(netloc)  # IDN -> ACE
                except UnicodeError:  # invalid domain part
                    raise e
                url = urlunsplit((scheme, netloc, path, query, fragment))
                super().__call__(url)
            else:
                raise
        else:
            # Now verify IPv6 in the netloc part
            host_match = re.search(r'^\[(.+)\](?::\d{2,5})?$',
                                   urlsplit(value).netloc)
            if host_match:
                potential_ip = host_match.groups()[0]
                try:
                    validate_ipv6_address(potential_ip)
                except ValidationError:
                    raise ValidationError(self.message, code=self.code)

        # The maximum length of a full host name is 253 characters per RFC 1034
        # section 3.1. It's defined to be 255 bytes or less, but this includes
        # one byte for the length of the name and one byte for the trailing dot
        # that's used to indicate absolute names in DNS.
        if len(urlsplit(value).netloc) > 253:
            raise ValidationError(self.message, code=self.code)
Exemplo n.º 7
0
def sanitize_address(addr, encoding):
    """
    Format a pair of (name, address) or an email address string.
    """
    address = None
    if not isinstance(addr, tuple):
        addr = force_str(addr)
        try:
            token, rest = parser.get_mailbox(addr)
        except (HeaderParseError, ValueError, IndexError):
            raise ValueError('Invalid address "%s"' % addr)
        else:
            if rest:
                # The entire email address must be parsed.
                raise ValueError(
                    'Invalid address; only %s could be parsed from "%s"'
                    % (token, addr)
                )
            nm = token.display_name or ''
            localpart = token.local_part
            domain = token.domain or ''
    else:
        nm, address = addr
        localpart, domain = address.rsplit('@', 1)

    address_parts = nm + localpart + domain
    if '\n' in address_parts or '\r' in address_parts:
        raise ValueError('Invalid address; address parts cannot contain newlines.')

    # Avoid UTF-8 encode, if it's possible.
    try:
        nm.encode('ascii')
        nm = Header(nm).encode()
    except UnicodeEncodeError:
        nm = Header(nm, encoding).encode()
    try:
        localpart.encode('ascii')
    except UnicodeEncodeError:
        localpart = Header(localpart, encoding).encode()
    domain = punycode(domain)

    parsed_address = Address(username=localpart, domain=domain)
    return formataddr((nm, parsed_address.addr_spec))
Exemplo n.º 8
0
 def handle_word(self, word):
     if '.' in word or '@' in word or ':' in word:
         # lead: Punctuation trimmed from the beginning of the word.
         # middle: State of the word.
         # trail: Punctuation trimmed from the end of the word.
         lead, middle, trail = self.trim_punctuation(word)
         # Make URL we want to point to.
         url = None
         nofollow_attr = ' rel="nofollow"' if self.nofollow else ''
         if self.simple_url_re.match(middle):
             url = smart_urlquote(html.unescape(middle))
         elif self.simple_url_2_re.match(middle):
             url = smart_urlquote('http://%s' % html.unescape(middle))
         elif ':' not in middle and self.is_email_simple(middle):
             local, domain = middle.rsplit('@', 1)
             try:
                 domain = punycode(domain)
             except UnicodeError:
                 return word
             url = self.mailto_template.format(local=local, domain=domain)
             nofollow_attr = ''
         # Make link.
         if url:
             trimmed = self.trim_url(middle)
             if self.autoescape and not self.safe_input:
                 lead, trail = escape(lead), escape(trail)
                 trimmed = escape(trimmed)
             middle = self.url_template.format(
                 href=escape(url),
                 attrs=nofollow_attr,
                 url=trimmed,
             )
             return mark_safe(f'{lead}{middle}{trail}')
         else:
             if self.safe_input:
                 return mark_safe(word)
             elif self.autoescape:
                 return escape(word)
     elif self.safe_input:
         return mark_safe(word)
     elif self.autoescape:
         return escape(word)
     return word
Exemplo n.º 9
0
    def __call__(self, value):
        if not value or "@" not in value:
            raise ValidationError(self.message, code=self.code)

        user_part, domain_part = value.rsplit("@", 1)

        if not self.user_regex.match(user_part):
            raise ValidationError(self.message, code=self.code)

        if domain_part not in self.domain_whitelist and not self.validate_domain_part(
                domain_part):
            # Try for possible IDN domain-part
            try:
                domain_part = punycode(domain_part)
            except UnicodeError:
                pass
            else:
                if self.validate_domain_part(domain_part):
                    return
            raise ValidationError(self.message, code=self.code)
Exemplo n.º 10
0
def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
    """
    Convert any URLs in text into clickable links.

    Works on http://, https://, www. links, and also on links ending in one of
    the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org).
    Links can have trailing punctuation (periods, commas, close-parens) and
    leading punctuation (opening parens) and it'll still do the right thing.

    If trim_url_limit is not None, truncate the URLs in the link text longer
    than this limit to trim_url_limit - 1 characters and append an ellipsis.

    If nofollow is True, give the links a rel="nofollow" attribute.

    If autoescape is True, autoescape the link text and URLs.
    """
    safe_input = isinstance(text, SafeData)

    def trim_url(x, limit=trim_url_limit):
        if limit is None or len(x) <= limit:
            return x
        return '%s…' % x[:max(0, limit - 1)]

    def trim_punctuation(lead, middle, trail):
        """
        Trim trailing and wrapping punctuation from `middle`. Return the items
        of the new state.
        """
        # Continue trimming until middle remains unchanged.
        trimmed_something = True
        while trimmed_something:
            trimmed_something = False
            # Trim wrapping punctuation.
            for opening, closing in WRAPPING_PUNCTUATION:
                if middle.startswith(opening):
                    middle = middle[len(opening):]
                    lead += opening
                    trimmed_something = True
                # Keep parentheses at the end only if they're balanced.
                if (middle.endswith(closing) and middle.count(closing)
                        == middle.count(opening) + 1):
                    middle = middle[:-len(closing)]
                    trail = closing + trail
                    trimmed_something = True
            # Trim trailing punctuation (after trimming wrapping punctuation,
            # as encoded entities contain ';'). Unescape entities to avoid
            # breaking them by removing ';'.
            middle_unescaped = html.unescape(middle)
            stripped = middle_unescaped.rstrip(TRAILING_PUNCTUATION_CHARS)
            if middle_unescaped != stripped:
                trail = middle[len(stripped):] + trail
                middle = middle[:len(stripped) - len(middle_unescaped)]
                trimmed_something = True
        return lead, middle, trail

    def is_email_simple(value):
        """Return True if value looks like an email address."""
        # An @ must be in the middle of the value.
        if '@' not in value or value.startswith('@') or value.endswith('@'):
            return False
        try:
            p1, p2 = value.split('@')
        except ValueError:
            # value contains more than one @.
            return False
        # Dot must be in p2 (e.g. example.com)
        if '.' not in p2 or p2.startswith('.'):
            return False
        return True

    words = word_split_re.split(str(text))
    for i, word in enumerate(words):
        if '.' in word or '@' in word or ':' in word:
            # lead: Current punctuation trimmed from the beginning of the word.
            # middle: Current state of the word.
            # trail: Current punctuation trimmed from the end of the word.
            lead, middle, trail = '', word, ''
            # Deal with punctuation.
            lead, middle, trail = trim_punctuation(lead, middle, trail)

            # Make URL we want to point to.
            url = None
            nofollow_attr = ' rel="nofollow"' if nofollow else ''
            if simple_url_re.match(middle):
                url = smart_urlquote(html.unescape(middle))
            elif simple_url_2_re.match(middle):
                url = smart_urlquote('http://%s' % html.unescape(middle))
            elif ':' not in middle and is_email_simple(middle):
                local, domain = middle.rsplit('@', 1)
                try:
                    domain = punycode(domain)
                except UnicodeError:
                    continue
                url = 'mailto:%s@%s' % (local, domain)
                nofollow_attr = ''

            # Make link.
            if url:
                trimmed = trim_url(middle)
                if autoescape and not safe_input:
                    lead, trail = escape(lead), escape(trail)
                    trimmed = escape(trimmed)
                middle = '<a href="%s"%s>%s</a>' % (escape(url), nofollow_attr,
                                                    trimmed)
                words[i] = mark_safe('%s%s%s' % (lead, middle, trail))
            else:
                if safe_input:
                    words[i] = mark_safe(word)
                elif autoescape:
                    words[i] = escape(word)
        elif safe_input:
            words[i] = mark_safe(word)
        elif autoescape:
            words[i] = escape(word)
    return ''.join(words)
Exemplo n.º 11
0
 def get_fqdn(self):
     if not hasattr(self, '_fqdn'):
         self._fqdn = punycode(socket.getfqdn())
     return self._fqdn
Exemplo n.º 12
0
    def handle(self, *args, **kwargs):
        # read sites from site
        querySites = Site.objects.all().order_by('name')
        print("Found {0} sites".format(len(querySites)))
        logfile = 'siteinfo.log'
        logging.basicConfig(filename=logfile, level=logging.INFO)
        logging.info('Starting logfile')
        # loop through sites and scan
        for querysite in querySites:
            print("Url: {0}".format(querysite.url))
            url_encoded = punycode(querysite.url)
            print("Encoded url: {0}".format(url_encoded))
            # enter whatis_query result into database
            scan_result = whatis_query(url_encoded)
            # print(scan_result)
            print("Count {0}, type: {1}".format(len(scan_result),
                                                type(scan_result)))
            scan_json = json.loads(scan_result)
            # print(scan_json)
            for key, value in scan_json.items():
                print("Checking site: {0}".format(key))
                # print("Content:")
                # print(value)
                print("Type: {0}".format(type(value)))
                for app in value:
                    # check if value already exists, then update existing
                    insertdata = app.copy()
                    if not insertdata['ver']:
                        insertdata['ver'] = "N/A"
                    print(insertdata)
                    try:
                        obj, created = Framework.objects.update_or_create(
                            app=app['app'],
                            site=querysite,
                            defaults=insertdata)
                        if created:
                            print('created new databaseentry')
                        elif obj:
                            print('updated existing databasentry')
                        else:
                            print("Couldn't update at all")
                    except BaseException as e:
                        print("Couldn't insert: {0}. \n Cause: {1}".format(
                            insertdata, e))
            # check provider
            print("Checking provider for {0}".format(url_encoded))
            hostprovider = GetHostProvider(address=url_encoded)
            print("Provider: {0}, Ip: {1}, source: {2}".format(
                hostprovider.provider, hostprovider.ip, hostprovider.source))
            # add provider to database
            provider_insertdata = {
                'provider': hostprovider.provider,
                'ip': hostprovider.ip,
                'source': hostprovider.source
            }
            try:
                provider_obj, provider_created = Provider.objects.update_or_create(
                    site=querysite, defaults=provider_insertdata)
                if provider_created:
                    print('created new providerdatabase for {0}'.format(
                        url_encoded))
                else:
                    print('Updated existing providerentry')
            except BaseException as e:
                print("Couldn't insert {0}. \n Cause: {1}".format(
                    provider_insertdata, e))

            # check geodata
            geo_insertdata = ping_geo(url_encoded)
            # add geodata to database
            print(geo_insertdata)
            try:
                geo_obj, geo_created = GeoInfo.objects.update_or_create(
                    site=querysite, defaults=geo_insertdata)
                if provider_created:
                    print('created new providerdatabase for {0}'.format(
                        url_encoded))
                else:
                    print('Updated existing providerentry')
            except BaseException as e:
                print("Couldn't insert {0}. \n Cause: {1}".format(
                    geo_insertdata, e))