示例#1
0
    def run(self):
        i = 0
        while (True):
            i = i + 1
            if i % 1000 == 0:
                time.sleep(10)
            self.lock.acquire()
            self.r.switchDB(1)
            url = self.r.rpop('crawl')
            self.lock.release()
            # print url
            fex = Faup()
            if url:
                print "url found: " + url
                try:
                    fex.decode(url)
                    domain = fex.get_host()
                    entry = self.db.new_domaines.find_one({'domaine': domain})
                    if entry == None:
                        print "record: " + domain
                        self.db.new_domaines.save({
                            'domaine': domain,
                            'urls': [url]
                        })

                    urls_stored = entry['urls']
                    if not url in urls_stored:
                        urls_stored.append(url)
                        entry['urls'] = urls_stored
                        self.db.new_domaines.save(entry)
                except:
                    print "parsing fault " + url
示例#2
0
def getmisp_urls(key, url, timeframe):
    response_domains = []
    headers = {
        'Authorization': '{}'.format(key),
        'Content-type': 'application/json',
        'Accept': 'application/json'
    }
    payload = '{ "returnFormat": "json", "type": "url", "last": "%s", "enforceWarninglist": true }' % timeframe
    response = requests.post(url, headers=headers, data=payload, verify=False)
    json_response = json.loads(response.text)
    fp = Faup()
    try:
        for attr in json_response['response']['Attribute']:
            url = attr['value']
            eventid = attr['event_id']
            if eventid not in ignore_eventid:
                category = attr['category']
                timestamp = datetime.datetime.utcfromtimestamp(
                    int(attr['timestamp'])).strftime('%Y-%m-%d')
                fp.decode(url)
                domain = fp.get_domain()
                if re.match(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", domain):
                    response_domains.append({
                        'domain': domain,
                        'eventid': eventid,
                        'category': category,
                        'timestamp': timestamp
                    })

        return response_domains
    except:
        return response_domains
示例#3
0
    def __init__(self):
        """
        Init Urls
        """
        super(Urls, self).__init__()

        self.faup = Faup()
        self.redis_cache_key = regex_helper.generate_redis_cache_key(
            self.module_name)

        # Protocol file path
        protocolsfile_path = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "protocolsfile"))
        # Get all uri from protocolsfile (Used for Curve)
        uri_scheme = ""
        with open(protocolsfile_path, 'r') as scheme_file:
            for scheme in scheme_file:
                uri_scheme += scheme[:-1] + "|"
        uri_scheme = uri_scheme[:-1]

        self.url_regex = "((?i:"+uri_scheme + \
            ")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:[a-zA-Z]{2,15}))(?:\:[0-9]+)*(?:/?(?:[a-zA-Z0-9\.\,\?'\\+&%\$#\=~_\-]+))*)"

        # Send module state to logs
        self.redis_logger.info(f"Module {self.module_name} initialized")
示例#4
0
def dns_resolve(url):
    cached = _cache_get(url, 'dns')
    if cached is not None:
        return cached
    fex = Faup()
    fex.decode(url)
    host = fex.get_host().lower()
    ipv4 = None
    ipv6 = None
    if is_ip(host):
        if ':' in host:
            try:
                socket.inet_pton(socket.AF_INET6, host)
                ipv6 = [host]
            except:
                pass
        else:
            try:
                socket.inet_aton(host)
                ipv4 = [host]
            except:
                pass
    else:
        try:
            ipv4 = [str(ip) for ip in dns.resolver.query(host, 'A')]
        except:
            logging.debug("No IPv4 address assigned to: " + host)
        try:
            ipv6 = [str(ip) for ip in dns.resolver.query(host, 'AAAA')]
        except:
            logging.debug("No IPv6 address assigned to: " + host)
    _cache_set(url, (ipv4, ipv6), 'dns')
    return ipv4, ipv6
示例#5
0
def whois(server, port, domain, ignorelist, replacelist):
    cached = _cache_get(domain, 'whois')
    if cached is not None:
        return cached
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.settimeout(15)
    try:
        s.connect((server, port))
    except Exception:
        print("Connection problems - check WHOIS server")
        print(("WHOIS request while problem occurred: ", domain))
        print(("WHOIS server: {}:{}".format(server, port)))
        sys.exit(0)
    if domain.startswith('http'):
        fex = Faup()
        fex.decode(domain)
        d = fex.get_domain().lower()
    else:
        d = domain
    s.send(d + "\r\n")
    response = ''
    while True:
        d = s.recv(4096)
        response += d
        if d == '':
            break
    s.close()
    match = re.findall(r'[\w\.-]+@[\w\.-]+', response)
    emails = process_emails(match, ignorelist, replacelist)
    if len(emails) == 0:
        return None
    list_mail = list(set(emails))
    _cache_set(domain, list_mail, 'whois')
    return list_mail
示例#6
0
def whois(server, port, domain, ignorelist, replacelist):
    cached = _cache_get(domain, 'whois')
    if cached is not None:
        return cached
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.settimeout(15)
    try:
        s.connect((server, port))
    except Exception:
        print("Connection problems - check WHOIS server")
        print(("WHOIS request while problem occurred: ", domain))
        print(("WHOIS server: {}:{}".format(server, port)))
        sys.exit(0)
    if domain.startswith('http'):
        fex = Faup()
        fex.decode(domain)
        d = fex.get_domain().lower()
    else:
        d = domain
    s.send(d + "\r\n")
    response = ''
    while True:
        d = s.recv(4096)
        response += d
        if d == '':
            break
    s.close()
    match = re.findall(r'[\w\.-]+@[\w\.-]+', response)
    emails = process_emails(match, ignorelist, replacelist)
    if len(emails) == 0:
        return None
    list_mail = list(set(emails))
    _cache_set(domain, list_mail, 'whois')
    return list_mail
示例#7
0
 def __init__(self,
              misp_url,
              misp_key,
              verifycert,
              config,
              offline=False,
              urlsonly=False):
     self.offline = offline
     if not self.offline:
         self.misp = ExpandedPyMISP(misp_url,
                                    misp_key,
                                    verifycert,
                                    debug=config.debug)
     self.config = config
     self.urlsonly = urlsonly
     if not hasattr(self.config, 'enable_dns'):
         setattr(self.config, 'enable_dns', True)
     if self.urlsonly is False:
         setattr(self.config, 'enable_dns', False)
     self.debug = self.config.debug
     self.config_from_email_body = {}
     if not hasattr(self.config, 'ignore_nullsize_attachments'):
         setattr(self.config, 'ignore_nullsize_attachments', False)
     self.ignore_nullsize_attachments = self.config.ignore_nullsize_attachments
     # Init Faup
     self.f = Faup()
     self.sightings_to_add = []
示例#8
0
    def __init__(self):
        super(Credential, self).__init__()

        self.faup = Faup()

        self.regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
        self.regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
        self.regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"

        self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)

        # Database
        config_loader = ConfigLoader.ConfigLoader()
        self.server_cred = config_loader.get_redis_conn("ARDB_TermCred")
        self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")

        # Config values
        self.minimumLengthThreshold = config_loader.get_config_int("Credential", "minimumLengthThreshold")
        self.criticalNumberToAlert = config_loader.get_config_int("Credential", "criticalNumberToAlert")

        self.max_execution_time = 30

        # Waiting time in secondes between to message proccessed
        self.pending_seconds = 10

        # Send module state to logs
        self.redis_logger.info(f"Module {self.module_name} initialized")
示例#9
0
    def run(self):
        i = 0
        while (True):
            i = i + 1
            if i % 1000 == 0:
                time.sleep(10)
            self.lock.acquire()
            self.r.switchDB(1)
            url = self.r.rpop('crawl')
            self.lock.release()
            # print url
            fex = Faup()
            if url:
                print "url found: " + url
                try:
                    fex.decode(url)
                    domain = fex.get_host()
                    entry = self.db.new_domaines.find_one({'domaine': domain})
                    if entry == None:
                        print "record: " + domain
                        self.db.new_domaines.save({'domaine': domain, 'urls': [url]})

                    urls_stored = entry['urls']
                    if not url in urls_stored:
                        urls_stored.append(url)
                        entry['urls'] = urls_stored
                        self.db.new_domaines.save(entry)
                except:
                    print "parsing fault " + url
示例#10
0
文件: crawler.py 项目: mdeous/OSINT
    def run(self):
        i = 0
        while (True):
            i = i + 1
            if i % 1000 == 0:
                time.sleep(10)
            url = self.r.rpop('crawl')
            fex = Faup()
            if url:
                print "url found: " + url
                fex.decode(url)
                domain = fex.get_host()
                entry = self.db.new_domaines.find_one({'domaine': domain})
                if entry == None:
                    print "record: " + domain
                    self.db.new_domaines.save({
                        'domaine': domain,
                        'urls': [url]
                    })

                urls_stored = entry['urls']
                if not url in urls_stored:
                    urls_stored.append(url)
                    entry['urls'] = urls_stored
                    self.db.new_domaines.save(entry)
示例#11
0
class Urls(AbstractModule):
    """
    Urls module for AIL framework
    """
    def __init__(self):
        """
        Init Urls
        """
        super(Urls, self).__init__()

        self.faup = Faup()
        self.redis_cache_key = regex_helper.generate_redis_cache_key(
            self.module_name)

        # Protocol file path
        protocolsfile_path = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "protocolsfile"))
        # Get all uri from protocolsfile (Used for Curve)
        uri_scheme = ""
        with open(protocolsfile_path, 'r') as scheme_file:
            for scheme in scheme_file:
                uri_scheme += scheme[:-1] + "|"
        uri_scheme = uri_scheme[:-1]

        self.url_regex = "((?i:"+uri_scheme + \
            ")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:[a-zA-Z]{2,15}))(?:\:[0-9]+)*(?:/?(?:[a-zA-Z0-9\.\,\?'\\+&%\$#\=~_\-]+))*)"

        # Send module state to logs
        self.redis_logger.info(f"Module {self.module_name} initialized")

    def compute(self, message):
        """
        Search for Web links from given message
        """
        # Extract item
        id, score = message.split()

        item = Item(id)
        item_content = item.get_content()

        l_urls = regex_helper.regex_findall(self.module_name,
                                            self.redis_cache_key,
                                            self.url_regex, item.get_id(),
                                            item_content)
        for url in l_urls:
            self.faup.decode(url)
            unpack_url = self.faup.get()

            to_send = f"{url} {item.get_id()}"
            print(to_send)
            self.send_message_to_queue(to_send, 'Url')
            self.redis_logger.debug(f"url_parsed: {to_send}")

        if len(l_urls) > 0:
            to_print = f'Urls;{item.get_source()};{item.get_date()};{item.get_basename()};'
            self.redis_logger.info(
                f'{to_print}Detected {len(l_urls)} URL;{item.get_id()}')
示例#12
0
    def __init__(self):
        super(LibInjection, self).__init__()

        self.faup = Faup()

        config_loader = ConfigLoader()
        self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")

        self.redis_logger.info(f"Module: {self.module_name} Launched")
示例#13
0
def harvesting_google(query, numberofpage):
    listreturn = []
    result = Popen(['casperjs', 'CeleryWeb/casperjs/googlesearch.js', str(query), str(numberofpage)], stdout=PIPE)
    urls = result.stdout.readlines()
    for url in urls:
        f = Faup()
        url=url.replace('\n','')
        f.decode(url)
        listreturn.append(f.get())
    return listreturn
示例#14
0
def get_urls(url, depth=1):
    if depth > 5:
        print('Too many redirects.')
        return
    fex = Faup()

    def meta_redirect(content):
        c = content.lower()
        soup = BeautifulSoup(c, "html.parser")
        for result in soup.find_all(attrs={'http-equiv': 'refresh'}):
            if result:
                out = result["content"].split(";")
                if len(out) == 2:
                    wait, text = out
                    a, url = text.split('=', 1)
                    return url.strip()
        return None

    resolve, reason = try_resolve(fex, url)
    if not resolve:
        # FIXME: inform that the domain does not resolve
        yield url
        return

    logging.debug("Making HTTP connection to " + url)

    headers = {'User-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0'}
    try:
        response = requests.get(url, allow_redirects=True, headers=headers,
                                timeout=15, verify=False)
    except:
        # That one can fail (DNS for example)
        # FIXME: inform that the get failed
        yield url
        return
    if response.history is not None:
        for h in response.history:
            # Yeld the urls in the order we find them
            yield h.url

    yield response.url

    meta_redir_url = meta_redirect(response.content)
    if meta_redir_url is not None:
        depth += 1
        if not meta_redir_url.startswith('http'):
            fex.decode(url)
            base = '{}://{}'.format(fex.get_scheme(), fex.get_host())
            port = fex.get_port()
            if port is not None:
                base += ':{}'.format(port)
            if not meta_redir_url.startswith('/'):
                # relative redirect. resource_path has the initial '/'
                if fex.get_resource_path() is not None:
                    base += fex.get_resource_path()
            if not base.endswith('/'):
                base += '/'
            meta_redir_url = base + meta_redir_url
        for url in get_urls(meta_redir_url, depth):
            yield url
class SQLInjectionDetection(AbstractModule):
    """docstring for SQLInjectionDetection module."""

    # # TODO: IMPROVE ME
    # Reference: https://github.com/stamparm/maltrail/blob/master/core/settings.py
    SQLI_REGEX = r"information_schema|sysdatabases|sysusers|floor\(rand\(|ORDER BY \d+|\bUNION\s+(ALL\s+)?SELECT\b|\b(UPDATEXML|EXTRACTVALUE)\(|\bCASE[^\w]+WHEN.*THEN\b|\bWAITFOR[^\w]+DELAY\b|\bCONVERT\(|VARCHAR\(|\bCOUNT\(\*\)|\b(pg_)?sleep\(|\bSELECT\b.*\bFROM\b.*\b(WHERE|GROUP|ORDER)\b|\bSELECT \w+ FROM \w+|\b(AND|OR|SELECT)\b.*/\*.*\*/|/\*.*\*/.*\b(AND|OR|SELECT)\b|\b(AND|OR)[^\w]+\d+['\") ]?[=><]['\"( ]?\d+|ODBC;DRIVER|\bINTO\s+(OUT|DUMP)FILE"

    def __init__(self):
        super(SQLInjectionDetection, self).__init__()

        self.faup = Faup()

        config_loader = ConfigLoader()
        self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")

        self.redis_logger.info(f"Module: {self.module_name} Launched")

    def compute(self, message):
        url, id = message.split()

        if self.is_sql_injection(url):
            self.faup.decode(url)
            url_parsed = self.faup.get()

            item = Item(id)
            item_id = item.get_id()
            print(f"Detected SQL in URL: {item_id}")
            print(urllib.request.unquote(url))
            to_print = f'SQLInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}'
            self.redis_logger.warning(to_print)

            # Send to duplicate
            self.send_message_to_queue(item_id, 'Duplicate')

            # Tag
            msg = f'infoleak:automatic-detection="sql-injection";{item_id}'
            self.send_message_to_queue(msg, 'Tags')

            # statistics
            tld = url_parsed['tld']
            if tld is not None:
                ## TODO: # FIXME: remove me
                try:
                    tld = tld.decode()
                except:
                    pass
                date = datetime.now().strftime("%Y%m")
                self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1)

    # Try to detect if the url passed might be an sql injection by appliying the regex
    # defined above on it.
    def is_sql_injection(self, url_parsed):
        line = urllib.request.unquote(url_parsed)

        return re.search(SQLInjectionDetection.SQLI_REGEX, line, re.I) is not None
示例#16
0
    def __post_init__(self):
        f = Faup(
        )  # Example code at https://programtalk.com/python-examples-amp/pyfaup.faup.Faup/
        f.decode(self.url)

        self.scheme = f.get_scheme()
        self.top_level_domain = f.get_tld()
        self.domain = f.get_domain()
        self.subdomain = f.get_subdomain()
        self.path = f.get_resource_path()
示例#17
0
def get_urls(url, depth=1):
    if depth > 5:
        print('Too many redirects.')
        return
    fex = Faup()

    def meta_redirect(content):
        c = content.lower()
        soup = BeautifulSoup(c)
        for result in soup.find_all(attrs={'http-equiv': 'refresh'}):
            if result:
                out = result["content"].split(";")
                if len(out) == 2:
                    wait, text = out
                    a, url = text.split('=', 1)
                    return url.strip()
        return None

    resolve, reason = try_resolve(fex, url)
    if not resolve:
        # FIXME: inform that the domain does not resolve
        yield url
        return

    logging.debug("Making HTTP connection to " + url)

    headers = {'User-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0'}
    try:
        response = requests.get(url, allow_redirects=True, headers=headers,
                                timeout=15, verify=False)
    except:
        # That one can fail (DNS for example)
        # FIXME: inform that the get failed
        yield url
        return
    if response.history is not None:
        for h in response.history:
            # Yeld the urls in the order we find them
            yield h.url

    yield response.url

    meta_redir_url = meta_redirect(response.content)
    if meta_redir_url is not None:
        depth += 1
        if not meta_redir_url.startswith('http'):
            fex.decode(url)
            base = '{}://{}'.format(fex.get_scheme(), fex.get_host())
            port = fex.get_port()
            if port is not None:
                base += ':{}'.format(port)
            if not meta_redir_url.startswith('/'):
                # relative redirect. resource_path has the initial '/'
                if fex.get_resource_path() is not None:
                    base += fex.get_resource_path()
            if not base.endswith('/'):
                base += '/'
            meta_redir_url = base + meta_redir_url
        for url in get_urls(meta_redir_url, depth):
            yield url
示例#18
0
 def run(self):
     i = 0
     while True:
         i = i + 1
         if i % 1000 == 0:
             time.sleep(10)
         url = self.r.rpop("crawl")
         fex = Faup()
         if url:
             fex.decode(url)
             domain = fex.get_host()
             entry = self.db.new_domaines.find_one({"domaine": domain})
             if entry == None:
                 print "record: " + domain
                 self.db.new_domaines.save({"domaine": domain, "urls": [url]})
示例#19
0
def tld_extract(domain):

    if "_faup" not in __builtins__:
        __builtins__["_faup"] = Faup()
    _faup = __builtins__["_faup"]
    _faup.decode(domain.decode("utf-8").strip(b"."))
    return (_faup.get_subdomain() or b"", _faup.get_domain_without_tld() or b"", _faup.get_tld() or b"")
示例#20
0
 def __post_init__(self):
     if self.domain is None:
         f = Faup(
         )  # Example code at https://programtalk.com/python-examples-amp/pyfaup.faup.Faup/
         f.decode(self.address.split("@")[-1])
         self.top_level_domain = f.get_tld()
         self.domain = f.get_domain()
         self.subdomain = f.get_subdomain()
示例#21
0
 def process(self):
     list_domains = self.db['new_domaines'].distinct('domaine')
     fex = Faup()
     for domain in list_domains:
         url = 'http://' + str(domain)
         fex.decode(url, False)
         print(fex.get_tld() + ',' + fex.get_domain() + ',' +
               ','.join(fex.get_subdomain().split('.')[::-1]).replace(
                   'www', '')).replace(',,', ',')
示例#22
0
def dns_resolve(url):
    cached = _cache_get(url, 'dns')
    if cached is not None:
        return cached
    fex = Faup()
    fex.decode(url)
    host = fex.get_host().lower()
    ipv4 = None
    ipv6 = None
    if not is_ip(host):
        try:
            ipv4 = [str(ip) for ip in dns.resolver.query(host, 'A')]
        except:
            logging.debug("No IPv4 address assigned to: " + host)
        try:
            ipv6 = [str(ip) for ip in dns.resolver.query(host, 'AAAA')]
        except:
            logging.debug("No IPv6 address assigned to: " + host)
    _cache_set(url, (ipv4, ipv6), 'dns')
    return ipv4, ipv6
示例#23
0
def is_valid_url(url):
    cached = _cache_get(url, 'valid')
    key = date.today().isoformat() + '_submissions'
    r_cache.zincrby(key, url)
    if cached is not None:
        return cached
    fex = Faup()
    if url.startswith('hxxp'):
        url = 'http' + url[4:]
    elif not url.startswith('http'):
        url = 'http://' + url
    logging.debug("Checking validity of URL: " + url)
    fex.decode(url)
    scheme = fex.get_scheme()
    host = fex.get_host()
    if scheme is None or host is None:
        reason = "Not a valid http/https URL/URI"
        return False, url, reason
    _cache_set(url, (True, url, None), 'valid')
    return True, url, None
示例#24
0
 def process(self):
     list_domains=self.db['new_domaines'].distinct('domaine')
     fex=Faup()
     for domain in list_domains:
         url='http://'+str(domain)
         fex.decode(url, False)
         print (fex.get_tld()+','+fex.get_domain()+','+','.join(fex.get_subdomain().split('.')[::-1]).replace('www','')).replace(',,',',')
示例#25
0
 def run(self):
     i=0
     while(True):
         i=i+1
         if i % 1000==0:
             time.sleep(10)
         url=self.r.rpop('crawl')
         fex=Faup()
         if url:
             print "url found: "+url
             fex.decode(url)
             domain=fex.get_host()
             entry = self.db.new_domaines.find_one({'domaine':domain})
             if entry== None:
                 print "record: "+ domain
                 self.db.new_domaines.save({'domaine':domain,'urls':[url]})
                           
             urls_stored = entry['urls']
             if not url in urls_stored:
                 urls_stored.append(url)
                 entry['urls']=urls_stored
                 self.db.new_domaines.save(entry)
示例#26
0
    def __init__(self):
        super(WebStats, self).__init__()

        # Send module state to logs
        self.redis_logger.info("Module %s initialized" % (self.module_name))
        # Sent to the logging a description of the module
        self.redis_logger.info("Makes statistics about valid URL")

        self.pending_seconds = 5 * 60

        # REDIS #
        self.r_serv_trend = redis.StrictRedis(
            host=self.process.config.get("ARDB_Trending", "host"),
            port=self.process.config.get("ARDB_Trending", "port"),
            db=self.process.config.get("ARDB_Trending", "db"),
            decode_responses=True)

        # FILE CURVE SECTION #
        self.csv_path_proto = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "protocolstrending_csv"))
        self.protocolsfile_path = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "protocolsfile"))

        self.csv_path_tld = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "tldstrending_csv"))
        self.tldsfile_path = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "tldsfile"))

        self.csv_path_domain = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "domainstrending_csv"))

        self.faup = Faup()
        self.generate_new_graph = False
示例#27
0
    def initialize(self, stormconf, context):
        super(Urls, self).initialize(stormconf, context)

        # Faup
        self.faup = Faup()

        # Input bolts for Phishing bolt
        self.input_bolts = set(context["source->stream->grouping"].keys())

        # All mails
        self._mails = {}

        # Load keywords
        self._load_lists()
示例#28
0
    def __init__(self):
        """
        Init Web
        """
        super(Web, self).__init__()

        # REDIS Cache
        self.r_serv2 = redis.StrictRedis(
            host=self.process.config.get("Redis_Cache", "host"),
            port=self.process.config.getint("Redis_Cache", "port"),
            db=self.process.config.getint("Redis_Cache", "db"),
            decode_responses=True)

        # Country to log as critical
        self.cc_critical = self.process.config.get("Url", "cc_critical")

        # FUNCTIONS #

        self.faup = Faup()

        # Protocol file path
        protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
                                          self.process.config.get("Directories", "protocolsfile"))
        # Get all uri from protocolsfile (Used for Curve)
        uri_scheme = ""
        with open(protocolsfile_path, 'r') as scheme_file:
            for scheme in scheme_file:
                uri_scheme += scheme[:-1]+"|"
        uri_scheme = uri_scheme[:-1]

        self.url_regex = "((?i:"+uri_scheme + \
            ")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"

        self.prec_filename = None

        # Send module state to logs
        self.redis_logger.info("Module %s initialized" % (self.module_name))
示例#29
0
    def test_urls_extractor(self):

        body = """
        bla bla https://tweetdeck.twitter.com/random bla bla
        http://kafka.apache.org/documentation.html
        http://kafka.apache.org/documentation1.html
        bla bla bla https://docs.python.org/2/library/re.html bla bla
        bla bla bla https://docs.python.org/2/library/re_2.html> bla bla
        <p>https://tweetdeck.twitter.com/random</p> bla bla
        <p>https://tweetdeck.twitter.com/random_2</p>
        """

        body_unicode_error = """
        Return-Path: <>
        Delivered-To: [email protected]
        Received: (qmail 15482 invoked from network); 29 Nov 2015 12:28:40 -000
        Received: from unknown (HELO 112.149.154.61) (112.149.154.61)
        by smtp.customers.net with SMTP; 29 Nov 2015 12:28:40 -0000
        Received: from unknown (HELO localhost)
            ([email protected]@110.68.103.81)
                by 112.149.154.61 with ESMTPA; Sun, 29 Nov 2015 21:29:24 +0900
                From: [email protected]
                To: [email protected]
                Subject: Gain your male attrctiveness

                Give satisfaction to your loved one
                http://contents.xn--90afavbplfx2a6a5b2a.xn--p1ai/
        """
        parser = Faup()

        urls = utils.urls_extractor(parser, body)
        self.assertIsInstance(urls, dict)
        self.assertIn("apache.org", urls)
        self.assertIn("python.org", urls)
        self.assertIn("twitter.com", urls)

        for i in ("apache.org", "python.org", "twitter.com"):
            self.assertIsInstance(urls[i], list)
            self.assertEqual(len(urls[i]), 2)

        urls = utils.urls_extractor(parser, body_unicode_error)
        self.assertIsInstance(urls, dict)
        self.assertIn("xn--90afavbplfx2a6a5b2a.xn--p1ai", urls)
        self.assertEqual(len(urls["xn--90afavbplfx2a6a5b2a.xn--p1ai"]), 1)
示例#30
0
def is_valid_url(url):
    cached = _cache_get(url, 'valid')
    key = date.today().isoformat() + '_submissions'
    r_cache.zincrby(key, url)
    if cached is not None:
        return cached
    fex = Faup()
    if url.startswith('hxxp'):
        url = 'http' + url[4:]
    elif not url.startswith('http'):
        url = 'http://' + url
    logging.debug("Checking validity of URL: " + url)
    fex.decode(url)
    scheme = fex.get_scheme()
    host = fex.get_host()
    if scheme is None or host is None:
        reason = "Not a valid http/https URL/URI"
        return False, url, reason
    _cache_set(url, (True, url, None), 'valid')
    return True, url, None
示例#31
0
    def sort(self, elem_links, url):
        fex = Faup()
        f = Filters()
        f.load()
        self.r.switchDB(1)
        extend = True
        domainfilter = True
        schemefilter = True
        try:
            for link in elem_links:
                new_url = link
                self.r.switchDB(2)
                if not self.r.get(new_url) and new_url:
                    self.r.switchDB(1)
                    if not self.r.get(new_url):
                        fex.decode(new_url)
                        domain = fex.get_host()
                        if f.isfilteredscheme(fex.get_scheme()):
                            self.r.switchDB(2)
                            self.r.put(new_url, new_url)
                            schemefilter = False
                        if f.isfiltereddomains(domain):
                            self.r.switchDB(2)
                            self.r.put(new_url, new_url)
                            domainfilter = False
                        if f.isfilteredextention(fex.get_resource_path()):
                            extend = False
                            self.r.switchDB(2)
                            self.r.put(new_url, new_url)

                        if extend and domainfilter and schemefilter:
                            self.r.switchDB(1)
                            self.r.rpush('crawl', new_url)
                            self.queue.append(new_url)
        except TypeError as e:
            print "TypeError"
示例#32
0
    def sort(self, elem_links, url):
        fex = Faup()
        f = Filters()
        f.load()
        self.r.switchDB(1)
        extend = True
        domainfilter = True
        schemefilter = True
        try:
            for link in elem_links:
                new_url = link
                self.r.switchDB(2)
                if not self.r.get(new_url) and new_url:
                    self.r.switchDB(1)
                    if not self.r.get(new_url):
                        fex.decode(new_url)
                        domain = fex.get_host()
                        if f.isfilteredscheme(fex.get_scheme()):
                            self.r.switchDB(2)
                            self.r.put(new_url, new_url)
                            schemefilter = False
                        if f.isfiltereddomains(domain):
                            self.r.switchDB(2)
                            self.r.put(new_url, new_url)
                            domainfilter = False
                        if f.isfilteredextention(fex.get_resource_path()):
                            extend = False
                            self.r.switchDB(2)
                            self.r.put(new_url, new_url)

                        if extend and domainfilter and schemefilter:
                            self.r.switchDB(1)
                            self.r.rpush('crawl', new_url)
                            self.queue.append(new_url)
        except TypeError as e:
            print "TypeError"
示例#33
0
import time
import sys
from packages import Paste
from pubsublogger import publisher
from Helper import Process
import re
from pyfaup.faup import Faup

if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"
    config_section = "Credential"
    p = Process(config_section)
    publisher.info("Find credentials")

    faup = Faup()

    critical = 8

    regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
    regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
    regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"
    while True:
        message = p.get_from_set()
        if message is None:
            publisher.debug("Script Credential is Idling 10s")
            time.sleep(10)
            continue

        filepath, count = message.split()
示例#34
0
 def initialize(self, stormconf, context):
     super(AbstractUrlsHandlerBolt, self).initialize(stormconf, context)
     self._load_whitelist()
     self._parser_faup = Faup()
示例#35
0
# dynamically list all Faup's methods
methods = []
for m in dir( Faup ):
	if re.search("^get_", m) :
		methods.append( m )
methods.remove("get_version")


# run
# run
if len(sys.argv) != 2 :
    print "%s <file containing 1 url per line>" % sys.argv[0]
    sys.exit(0)

f = Faup()
file_urls=codecs.open(sys.argv[1],'r','ascii',errors='ignore')
urls=file_urls.readlines()

for url in urls:
    url=url.replace('\n','')
    print("URL:[%s]" % (url))
    f.decode(url)
#    print("-----> Extracted TLD:%s" % f.get_tld())
#    print("-----> Extracted TLD:%s" % f.get_domain_without_tld())

    for m in methods:
	fct = getattr(f, m)
	print "\t%s : %s" % (re.sub("^get_", "", m), fct())

示例#36
0
    # Getting the first message from redis.
    message = p.get_from_set()
    prec_filename = None

    max_execution_time = p.config.getint("Onion", "max_execution_time")

    # send to crawler:
    activate_crawler = p.config.get("Crawler", "activate_crawler")
    if activate_crawler == 'True':
        activate_crawler = True
        print('Crawler enabled')
    else:
        activate_crawler = False
        print('Crawler disabled')

    faup = Faup()

    # Thanks to Faup project for this regex
    # https://github.com/stricaud/faup
    url_regex = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
    i2p_regex = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
    re.compile(url_regex)


    while True:
        message = p.get_from_set()
        if message is not None:
            print(message)
            filename, score = message.split()

            # "For each new paste"
示例#37
0
        position = new_position
email_data = t_email_data

# Refang email data
email_data = refang(email_data)

## Extract various IOCs

urllist = list()
urllist += re.findall(urlmarker.WEB_URL_REGEX, email_data)
urllist += re.findall(urlmarker.IP_REGEX, email_data)
if debug:
    syslog.syslog(str(urllist))

# Init Faup
f = Faup()

# Add tags according to configuration
for malware in malwaretags:
    if malware in email_subject.lower():
        for tag in malwaretags[malware]:
            misp.add_tag(new_event, tag)

# Extract and add hashes
hashlist_md5 = re.findall(hashmarker.MD5_REGEX, email_data)
hashlist_sha1 = re.findall(hashmarker.SHA1_REGEX, email_data)
hashlist_sha256 = re.findall(hashmarker.SHA256_REGEX, email_data)

for h in hashlist_md5:
    misp.add_hashes(new_event, md5=h)
for h in hashlist_sha1:
示例#38
0
文件: test.py 项目: aguinet/faup
#!/usr/bin/python

from pyfaup.faup import Faup
import sys
import codecs
import binascii

f = Faup()
file_urls=codecs.open(sys.argv[1],'r','ascii',errors='ignore')
urls=file_urls.readlines()

for url in urls:
    url=url.replace('\n','')
    print("URL:[%s]" % (url))
    f.decode(url)
    print("-----> Extracted TLD:%s" % f.get_tld())

示例#39
0
# Add additional tags depending on others
for tag in dependingtags:
    if tag in tlp_tag:
        for dependingtag in dependingtags[tag]:
            misp.add_tag(new_event, dependingtag)

# Extract IOCs
email_data = refang(email_data)
urllist = re.findall(urlmarker.WEB_URL_REGEX, email_data)
urllist += re.findall(urlmarker.IP_REGEX, email_data)
if debug:
    target.write(str(urllist))

# Init Faup
f = Faup()

# Add tags according to configuration
for malware in malwaretags:
    if malware in email_subject.lower():
        for tag in malwaretags[malware]:
            misp.add_tag(new_event, tag)

# Extract and add hashes
hashlist_md5 = re.findall(hashmarker.MD5_REGEX, email_data)
hashlist_sha1 = re.findall(hashmarker.SHA1_REGEX, email_data)
hashlist_sha256 = re.findall(hashmarker.SHA256_REGEX, email_data)

for h in hashlist_md5:
    misp.add_hashes(new_event, md5=h)
for h in hashlist_sha1:
示例#40
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import pprint
from pyfaup.faup import Faup

f = Faup()
f.decode("www.météo.fr")
pprint.pprint(f.get())

示例#41
0
    # Protocol file path
    protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
                         p.config.get("Directories", "protocolsfile"))

    # Country to log as critical
    cc_critical = p.config.get("Url", "cc_critical")

    # FUNCTIONS #
    publisher.info("Script URL subscribed to channel web_categ")

    # FIXME For retro compatibility
    channel = 'web_categ'

    message = p.get_from_set()
    prec_filename = None
    faup = Faup()

    # Get all uri from protocolsfile (Used for Curve)
    uri_scheme = ""
    with open(protocolsfile_path, 'r') as scheme_file:
        for scheme in scheme_file:
            uri_scheme += scheme[:-1]+"|"
    uri_scheme = uri_scheme[:-1]

    url_regex = "("+uri_scheme+")\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*"

    while True:
        if message is not None:
            filename, score = message.split()

            if prec_filename is None or filename != prec_filename:
示例#42
0
import dns.exception
from packages import Paste
from packages import lib_refine
from pubsublogger import publisher

from pyfaup.faup import Faup

from Helper import Process

if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = 'Mail'

    faup = Faup()

    p = Process(config_section)
    addr_dns = p.config.get("Mail", "dns")

    # REDIS #
    r_serv2 = redis.StrictRedis(
        host=p.config.get("Redis_Cache", "host"),
        port=p.config.getint("Redis_Cache", "port"),
        db=p.config.getint("Redis_Cache", "db"),
        decode_responses=True)
    # ARDB #
    server_statistics = redis.StrictRedis(
        host=p.config.get("ARDB_Statistics", "host"),
        port=p.config.getint("ARDB_Statistics", "port"),
        db=p.config.getint("ARDB_Statistics", "db"),
示例#43
0
    try:
        urls_file = codecs.open(sys.argv[1], 'r', 'ascii', errors='ignore')
    except IOError:
        url_arg = sys.argv[1]

    if urls_file is None:
        source_info = "arg:%s" % (sys.argv[1])
    else:
        source_info = "file:%s" % (sys.argv[1])

    urlw_log = UrlwLog(source_info)
    urlw_log.open()
    urlw_log.custom_log("Starting...")
    urlw_p = UrlwPlugins(urlw_log)

    fauplib = Faup()

    if source_info.startswith("arg:"):
        fauplib.decode(sys.argv[1])
        faup_object = fauplib.get()
        for plugin in urlw_p.plugins_list:
            urlw_p.run(plugin, sys.argv[1], faup_object)

    elif source_info.startswith("file:"):
        urls = urls_file.readlines()
        for url in urls:
            fauplib.decode(url)
            faup_object = fauplib.get()
            for plugin in urlw_p.plugins_list:
                urlw_p.run(plugin, url, faup_object)
示例#44
0
    # FILE CURVE SECTION #
    csv_path_proto = os.path.join(os.environ['AIL_HOME'],
                                  p.config.get("Directories", "protocolstrending_csv"))
    protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
                                 p.config.get("Directories", "protocolsfile"))

    csv_path_tld = os.path.join(os.environ['AIL_HOME'],
                                p.config.get("Directories", "tldstrending_csv"))
    tldsfile_path = os.path.join(os.environ['AIL_HOME'],
                                 p.config.get("Directories", "tldsfile"))

    csv_path_domain = os.path.join(os.environ['AIL_HOME'],
                                   p.config.get("Directories", "domainstrending_csv"))

    faup = Faup()
    generate_new_graph = False
    # Endless loop getting messages from the input queue
    while True:
        # Get one message from the input queue
        message = p.get_from_set()

        if message is None:
            if generate_new_graph:
                generate_new_graph = False
                today = datetime.date.today()
                year = today.year
                month = today.month

                print('Building protocol graph')
                lib_words.create_curve_with_word_file(r_serv_trend, csv_path_proto,
示例#45
0
文件: test.py 项目: sim0nx/faup
#!/usr/bin/python

from pyfaup.faup import Faup

url = "http://www.wallinfire.net"

f = Faup()
print("We decode the url: %s" % (url))
f.decode(url)
data = f.get()
print("URL TLD: %s" % (data['tld']))

示例#46
0
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'core/'))
import screen

config_loader = ConfigLoader.ConfigLoader()
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
r_cache = config_loader.get_redis_conn("Redis_Cache")
config_loader = None

# load crawler config
config_loader = ConfigLoader.ConfigLoader(config_file='crawlers.cfg')
#splash_manager_url = config_loader.get_config_str('Splash_Manager', 'splash_url')
#splash_api_key = config_loader.get_config_str('Splash_Manager', 'api_key')
config_loader = None

faup = Faup()


def generate_uuid():
    return str(uuid.uuid4()).replace('-', '')


def is_valid_onion_domain(domain):
    if not domain.endswith('.onion'):
        return False
    domain = domain.replace('.onion', '', 1)
    if len(domain) == 16:  # v2 address
        r_onion = r'[a-z0-9]{16}'
        if re.match(r_onion, domain):
            return True
    elif len(domain) == 56:  # v3 address
示例#47
0
class TestPhishing(unittest.TestCase):
    faup = Faup()

    def setUp(self):
        parser = mailparser.parse_from_file(mail_thug)
        self.email = parser.mail
        self.attachments = parser.attachments

        parser = mailparser.parse_from_file(mail_form)
        self.email_form = parser.mail

        body = self.email_form.get("body")
        self.urls = utils.urls_extractor(body, self.faup)

        d = {
            "generic": "conf/keywords/targets.example.yml",
            "custom": "conf/keywords/targets_english.example.yml"
        }
        self.targets = utils.load_keywords_dict(d)

        d = {
            "generic": "conf/keywords/subjects.example.yml",
            "custom": "conf/keywords/subjects_english.example.yml"
        }
        self.subjects = utils.load_keywords_list(d)

    def test_ParserError(self):
        parser = mailparser.parse_from_file(mail_test_6)
        body = parser.mail.get("body")
        flag_form = phishing.check_form(body)
        self.assertFalse(flag_form)

    def test_none_values(self):
        email = copy.deepcopy(self.email)
        email.pop("body", None)
        email.pop("subjects", None)
        email.pop("from", None)

        phishing.check_phishing(email=email,
                                attachments=self.attachments,
                                urls_body=self.urls,
                                urls_attachments=self.urls,
                                target_keys=self.targets,
                                subject_keys=self.subjects)

    def test_check_form(self):
        body = self.email_form.get("body")
        flag_form = phishing.check_form(body)
        self.assertTrue(flag_form)

        body = self.email.get("body")
        flag_form = phishing.check_form(body)
        self.assertFalse(flag_form)

    def test_form_value_error(self):
        parser = mailparser.parse_from_file(mail_test_5)
        body = parser.mail.get("body")
        flag_form = phishing.check_form(body)
        self.assertFalse(flag_form)

    def test_check_urls(self):
        flag = False
        if any(
                phishing.check_urls(self.urls, i)
                for i in self.targets.values()):
            flag = True

        self.assertTrue(flag)

    def test_check_phishing(self):
        results = phishing.check_phishing(email=self.email,
                                          attachments=self.attachments,
                                          urls_body=self.urls,
                                          urls_attachments=self.urls,
                                          target_keys=self.targets,
                                          subject_keys=self.subjects)

        self.assertIsInstance(results, dict)
        self.assertEqual(results["score"], 123)
        self.assertIn("filename_attachments", results["score_expanded"])
        self.assertIn("mail_subject", results["score_expanded"])
        self.assertIn("mail_body", results["score_expanded"])
        self.assertIn("mail_from", results["score_expanded"])
        self.assertIn("urls_body", results["score_expanded"])
        self.assertIn("urls_attachments", results["score_expanded"])
        self.assertIn("Test", results["targets"])
        self.assertTrue(results["with_phishing"])

    def test_check_phishing_form(self):
        results = phishing.check_phishing(email=self.email_form,
                                          attachments=self.attachments,
                                          urls_body=self.urls,
                                          urls_attachments=self.urls,
                                          target_keys=self.targets,
                                          subject_keys=self.subjects)

        self.assertIn("mail_form", results["score_expanded"])
示例#48
0
REDIS_KEY_ALL_CRED_SET = 'AllCredentials'
REDIS_KEY_ALL_CRED_SET_REV = 'AllCredentialsRev'
REDIS_KEY_ALL_PATH_SET = 'AllPath'
REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev'
REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping'

if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"
    config_section = "Credential"
    p = Process(config_section)
    publisher.info("Find credentials")

    minimumLengthThreshold = p.config.getint("Credential", "minimumLengthThreshold")

    faup = Faup()
    server_cred = redis.StrictRedis(
        host=p.config.get("ARDB_TermCred", "host"),
        port=p.config.get("ARDB_TermCred", "port"),
        db=p.config.get("ARDB_TermCred", "db"),
        decode_responses=True)

    server_statistics = redis.StrictRedis(
        host=p.config.get("ARDB_Statistics", "host"),
        port=p.config.getint("ARDB_Statistics", "port"),
        db=p.config.getint("ARDB_Statistics", "db"),
        decode_responses=True)

    criticalNumberToAlert = p.config.getint("Credential", "criticalNumberToAlert")
    minTopPassList = p.config.getint("Credential", "minTopPassList")
示例#49
0
import time
import sys
from packages import Paste
from pubsublogger import publisher
from Helper import Process
import re
from pyfaup.faup import Faup

if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"
    config_section = "Credential"
    p = Process(config_section)
    publisher.info("Find credentials")

    faup = Faup()

    critical = 8

    regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
    regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
    regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"
    while True:
        message = p.get_from_set()
        if message is None:
            publisher.debug("Script Credential is Idling 10s")
            time.sleep(10)
            continue

        filepath, count = message.split()
示例#50
0
REDIS_KEY_NUM_PATH = 'uniqNumForUsername'
REDIS_KEY_ALL_CRED_SET = 'AllCredentials'
REDIS_KEY_ALL_CRED_SET_REV = 'AllCredentialsRev'
REDIS_KEY_ALL_PATH_SET = 'AllPath'
REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev'
REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping'

if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"
    config_section = "Credential"
    module_name = "Credential"
    p = Process(config_section)
    publisher.info("Find credentials")

    faup = Faup()

    regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
    #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
    regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
    regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"

    redis_cache_key = regex_helper.generate_redis_cache_key(module_name)

    while True:
        message = p.get_from_set()

        if message is None:
            publisher.debug("Script Credential is Idling 10s")
            time.sleep(10)
            continue
示例#51
0
文件: test.py 项目: sebdraven/faup
#!/usr/bin/python

from pyfaup.faup import Faup
import sys
import codecs
import binascii

f = Faup()
file_urls=codecs.open(sys.argv[1],'r','ascii',errors='ignore')
urls=file_urls.readlines()
for url in urls:
    url=url.replace('\n','')
    #print("We decode the url: %s" % (url))
    #if sys.version.split('.')[0].split('.')[0]=='3':
    f.decode(bytes(url,'utf-8'), False)
    #if sys.version.split('.')[0].split('.')[0]=='2':
    #        f.decode(bytes(url),False)
    #data = f.get()
    f.get_tld()
    #f.get_domain()
    #f.get_subdomain()
    #print(f.get_tld())
    #print(f.get_domain())
    #print("URL TLD: %s" % (f.get_tld()))