Пример #1
0
    def node(self, data, tags):
        err = []
        for key, value in tags.items():
            m = self.non_printable.search(key)
            if m:
                err.append({"class": 50702, "subclass": 0, "text": T_f(u"\"{0}\" unexpected non printable char ({1}, 0x{2:04x}) in key at position {3}", key, unicodedata.name(m.group(0), ''), ord(m.group(0)), m.start() + 1)})
                continue

            m = self.non_printable.search(value)
            if m:
                err.append({"class": 50702, "subclass": 1, "text": T_f(u"\"{0}\"=\"{1}\" unexpected non printable char ({2}, 0x{3:04x}) in value at position {4}", key, value, unicodedata.name(m.group(0), ''), ord(m.group(0)), m.start() + 1)})
                continue

            m = self.other_symbol.search(key)
            if m:
                err.append({"class": 50703, "subclass": 0, "text": T_f(u"\"{0}\" unexpected symbol char ({1}, 0x{2:04x}) in key at position {3}", key, unicodedata.name(m.group(0), ''), ord(m.group(0)), m.start() + 1)})
                continue

            m = self.other_symbol.search(value)
            if m:
                err.append({"class": 50703, "subclass": 1, "text": T_f(u"\"{0}\"=\"{1}\" unexpected symbol char ({2}, 0x{3:04x}) in value at position {4}", key, value, unicodedata.name(m.group(0), ''), ord(m.group(0)), m.start() + 1)})
                continue

            # https://en.wikipedia.org/wiki/Bi-directional_text#Table_of_possible_BiDi-types
            for c in u"\u200E\u200F\u061C\u202A\u202D\u202B\u202E\u202C\u2066\u2067\u2068\u2069":
                m = key.find(c)
                if m > 0:
                    err.append({"class": 50702, "subclass": 2, "text": T_f(u"\"{0}\" unexpected non printable char ({1}, 0x{2:04x}) in key at position {3}", key, unicodedata.name(c, ''), ord(c), m + 1)})

                m = value.find(c)
                if m > 0:
                    err.append({"class": 50702, "subclass": 2, "text": T_f(u"\"{0}\"=\"{1}\" unexpected non printable char ({2}, 0x{3:04x}) in value at position {4}", key, value, unicodedata.name(c, ''), ord(c), m + 1)})

            if self.default:
                if key in self.names:
                    s = self.non_letter.sub(u" ", value)
                    s = self.alone_char.sub(u"", s)
                    s = self.roman_number.sub(u"", s)
                    s = self.default.sub(u"", s)
                    if len(s) > 0 and not(len(value) == 2 and len(s) == 1) and len(s) <= len(value) / 10 + 1:
                        if len(s) == 1:
                            c = s[0]
                            u = self.uniq_script and confusables.unconfuse(c, self.uniq_script)
                            if u:
                                err.append({"class": 50701, "subclass": 0,
                                    "text": T_f(u"\"{0}\"=\"{1}\" unexpected char \"{2}\" ({3}, 0x{4:04x}). Means \"{5}\" ({6}, 0x{7:04x})?", key, value, s, unicodedata.name(c, ''), ord(c), u, unicodedata.name(u, ''), ord(u)),
                                    "fix": {key: value.replace(c, u)}
                                })
                            else:
                                err.append({"class": 50701, "subclass": 0,
                                    "text": T_f(u"\"{0}\"=\"{1}\" unexpected char \"{2}\" ({3}, 0x{4:04x})", key, value, s, unicodedata.name(c, ''), ord(c))
                                })
                        else:
                            err.append({"class": 50701, "subclass": 0, "text": T_f(u"\"{0}\"=\"{1}\" unexpected \"{2}\"", key, value, s)})

            l = key.split(':')
            if len(l) > 1 and l[0] in self.names and l[1] in self.lang:
                s = self.non_letter.sub(u" ", value)
                s = self.alone_char.sub(u"\\1", s)
                s = self.roman_number.sub(u"\\1", s)
                s = self.lang[l[1]].sub(u"", s)
                if len(s) > 0:
                    if len(s) == 1:
                        c = s[0]
                        u = self.uniq_scripts.get(l[1]) and confusables.unconfuse(c, self.uniq_scripts.get(l[1]))
                        if u:
                            err.append({"class": 50701, "subclass": 1,
                                "text": T_f(u"\"{0}\"=\"{1}\" unexpected char \"{2}\" ({3}, 0x{4:04x}). Means \"{5}\" ({6}, 0x{7:04x})?", key, value, s, unicodedata.name(c, ''), ord(c), u, unicodedata.name(u, ''), ord(u)),
                                "fix": {key: value.replace(c, u)}
                            })
                        else:
                            err.append({"class": 50701, "subclass": 1,
                                "text": T_f(u"\"{0}\"=\"{1}\" unexpected char \"{2}\" ({3}, 0x{4:04x})", key, value, s, unicodedata.name(c, ''), ord(c))
                            })
                    else:
                        err.append({"class": 50701, "subclass": 1, "text": T_f(u"\"{0}\"=\"{1}\" unexpected \"{2}\"", key, value, s)})

        return err
Пример #2
0
    def node(self, data, tags):
        err = []
        for key, value in tags.items():
            m = self.non_printable.search(key)
            if m:
                err.append({
                    "class":
                    50702,
                    "subclass":
                    0,
                    "text":
                    T_(
                        "\"%s\" unexpected non printable char (%s, 0x%04x) in key at position %s",
                        key, unicodedata.name(m.group(0), ''), ord(m.group(0)),
                        m.start() + 1)
                })
                continue

            m = self.non_printable.search(value)
            if m:
                err.append({
                    "class":
                    50702,
                    "subclass":
                    1,
                    "text":
                    T_(
                        "\"%s\"=\"%s\" unexpected non printable char (%s, 0x%04x) in value at position %s",
                        key, value, unicodedata.name(m.group(0), ''),
                        ord(m.group(0)),
                        m.start() + 1)
                })
                continue

            m = self.other_symbol.search(key)
            if m:
                err.append({
                    "class":
                    50703,
                    "subclass":
                    0,
                    "text":
                    T_(
                        "\"%s\" unexpected symbol char (%s, 0x%04x) in key at position %s",
                        key, unicodedata.name(m.group(0), ''), ord(m.group(0)),
                        m.start() + 1)
                })
                continue

            m = self.other_symbol.search(value)
            if m:
                err.append({
                    "class":
                    50703,
                    "subclass":
                    1,
                    "text":
                    T_(
                        "\"%s\"=\"%s\" unexpected symbol char (%s, 0x%04x) in value at position %s",
                        key, value, unicodedata.name(m.group(0), ''),
                        ord(m.group(0)),
                        m.start() + 1)
                })
                continue

            # https://en.wikipedia.org/wiki/Bi-directional_text#Table_of_possible_BiDi-types
            for c in u"\u200E\u200F\u061C\u202A\u202D\u202B\u202E\u202C\u2066\u2067\u2068\u2069":
                m = key.find(c)
                if m > 0:
                    err.append({
                        "class":
                        50702,
                        "subclass":
                        2,
                        "text":
                        T_(
                            "\"%s\" unexpected non printable char (%s, 0x%04x) in key at position %s",
                            key, unicodedata.name(c, ''), ord(c), m + 1)
                    })

                m = value.find(c)
                if m > 0:
                    err.append({
                        "class":
                        50702,
                        "subclass":
                        2,
                        "text":
                        T_(
                            "\"%s\"=\"%s\" unexpected non printable char (%s, 0x%04x) in value at position %s",
                            key, value, unicodedata.name(c, ''), ord(c), m + 1)
                    })

            if self.default:
                if key in self.names:
                    s = self.non_letter.sub(u" ", value)
                    s = self.alone_char.sub(u"", s)
                    s = self.roman_number.sub(u"", s)
                    s = self.default.sub(u"", s)
                    if len(s) > 0 and \
                        not(len(value) == 2 and len(s) == 1) and \
                        len(s) <= len(value) / 10 + 1:
                        if len(s) == 1:
                            c = s[0]
                            u = self.uniq_script and confusables.unconfuse(
                                c, self.uniq_script)
                            if u:
                                err.append({
                                    "class":
                                    50701,
                                    "subclass":
                                    0,
                                    "text":
                                    T_(
                                        "\"%s\"=\"%s\" unexpected char \"%s\" (%s, 0x%04x). Means \"%s\" (%s, 0x%04x)?",
                                        key, value, s, unicodedata.name(c, ''),
                                        ord(c), u, unicodedata.name(u, ''),
                                        ord(u)),
                                    "fix": {
                                        key: value.replace(c, u)
                                    }
                                })
                            else:
                                err.append({
                                    "class":
                                    50701,
                                    "subclass":
                                    0,
                                    "text":
                                    T_(
                                        "\"%s\"=\"%s\" unexpected char \"%s\" (%s, 0x%04x)",
                                        key, value, s, unicodedata.name(c, ''),
                                        ord(c))
                                })
                        else:
                            err.append({
                                "class":
                                50701,
                                "subclass":
                                0,
                                "text":
                                T_("\"%s\"=\"%s\" unexpected \"%s\"", key,
                                   value, s)
                            })

            l = key.split(':')
            if len(l) > 1 and l[0] in self.names and l[1] in self.lang:
                s = self.non_letter.sub(u" ", value)
                s = self.alone_char.sub(u"\\1", s)
                s = self.roman_number.sub(u"\\1", s)
                s = self.lang[l[1]].sub(u"", s)
                if len(s) > 0:
                    if len(s) == 1:
                        c = s[0]
                        u = self.uniq_scripts.get(
                            l[1]) and confusables.unconfuse(
                                c, self.uniq_scripts.get(l[1]))
                        if u:
                            err.append({
                                "class":
                                50701,
                                "subclass":
                                1,
                                "text":
                                T_(
                                    "\"%s\"=\"%s\" unexpected char \"%s\" (%s, 0x%04x). Means \"%s\" (%s, 0x%04x)?",
                                    key, value, s, unicodedata.name(c, ''),
                                    ord(c), u, unicodedata.name(u,
                                                                ''), ord(u)),
                                "fix": {
                                    key: value.replace(c, u)
                                }
                            })
                        else:
                            err.append({
                                "class":
                                50701,
                                "subclass":
                                1,
                                "text":
                                T_(
                                    "\"%s\"=\"%s\" unexpected char \"%s\" (%s, 0x%04x)",
                                    key, value, s, unicodedata.name(c, ''),
                                    ord(c))
                            })
                    else:
                        err.append({
                            "class":
                            50701,
                            "subclass":
                            1,
                            "text":
                            T_("\"%s\"=\"%s\" unexpected \"%s\"", key, value,
                               s)
                        })

        return err
Пример #3
0
def score_domain(domain):
    """Score `domain`.
    The highest score, the most probable `domain` is a phishing site.
    Args:
        domain (str): the domain to check.
    Returns:
        int: the score of `domain`.
    """

    score = 0
    for t in suspicious['tlds']:
        if domain.endswith(t):
            score += 20

    # Remove initial '*.' for wildcard certificates bug
    if domain.startswith('*.'):
        domain = domain[2:]

    # Removing TLD to catch inner TLD in subdomain (ie. paypal.com.domain.com)
    try:
        res = get_tld(domain,
                      as_object=True,
                      fail_silently=True,
                      fix_protocol=True)
        domain = '.'.join([res.subdomain, res.domain])
    except Exception:
        pass

    # Higer entropy is kind of suspicious
    score += int(round(entropy(domain) * 10))

    # Remove lookalike characters using list from http://www.unicode.org/reports/tr39
    domain = unconfuse(domain)

    words_in_domain = re.split("\W+", domain)

    # ie. detect fake .com (ie. *.com-account-management.info)
    if words_in_domain[0] in ['com', 'net', 'org']:
        score += 10

    # Testing keywords
    for word in suspicious['keywords']:
        if word in domain:
            score += suspicious['keywords'][word]

    # Testing Levenshtein distance for strong keywords (>= 70 points) (ie. paypol)
    for key in [k for (k, s) in suspicious['keywords'].items() if s >= 70]:
        # Removing too generic keywords (ie. mail.domain.com)
        for word in [
                w for w in words_in_domain
                if w not in ['email', 'mail', 'cloud']
        ]:
            if distance(str(word), str(key)) == 1:
                score += 70

    # Lots of '-' (ie. www.paypal-datacenter.com-acccount-alert.com)
    if 'xn--' not in domain and domain.count('-') >= 4:
        score += domain.count('-') * 3

    # Deeply nested subdomains (ie. www.paypal.com.security.accountupdate.gq)
    if domain.count('.') >= 3:
        score += domain.count('.') * 3

    return score