def run(self): i = 0 while (True): i = i + 1 if i % 1000 == 0: time.sleep(10) self.lock.acquire() self.r.switchDB(1) url = self.r.rpop('crawl') self.lock.release() # print url fex = Faup() if url: print "url found: " + url try: fex.decode(url) domain = fex.get_host() entry = self.db.new_domaines.find_one({'domaine': domain}) if entry == None: print "record: " + domain self.db.new_domaines.save({ 'domaine': domain, 'urls': [url] }) urls_stored = entry['urls'] if not url in urls_stored: urls_stored.append(url) entry['urls'] = urls_stored self.db.new_domaines.save(entry) except: print "parsing fault " + url
def getmisp_urls(key, url, timeframe): response_domains = [] headers = { 'Authorization': '{}'.format(key), 'Content-type': 'application/json', 'Accept': 'application/json' } payload = '{ "returnFormat": "json", "type": "url", "last": "%s", "enforceWarninglist": true }' % timeframe response = requests.post(url, headers=headers, data=payload, verify=False) json_response = json.loads(response.text) fp = Faup() try: for attr in json_response['response']['Attribute']: url = attr['value'] eventid = attr['event_id'] if eventid not in ignore_eventid: category = attr['category'] timestamp = datetime.datetime.utcfromtimestamp( int(attr['timestamp'])).strftime('%Y-%m-%d') fp.decode(url) domain = fp.get_domain() if re.match(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", domain): response_domains.append({ 'domain': domain, 'eventid': eventid, 'category': category, 'timestamp': timestamp }) return response_domains except: return response_domains
def __init__(self): """ Init Urls """ super(Urls, self).__init__() self.faup = Faup() self.redis_cache_key = regex_helper.generate_redis_cache_key( self.module_name) # Protocol file path protocolsfile_path = os.path.join( os.environ['AIL_HOME'], self.process.config.get("Directories", "protocolsfile")) # Get all uri from protocolsfile (Used for Curve) uri_scheme = "" with open(protocolsfile_path, 'r') as scheme_file: for scheme in scheme_file: uri_scheme += scheme[:-1] + "|" uri_scheme = uri_scheme[:-1] self.url_regex = "((?i:"+uri_scheme + \ ")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:[a-zA-Z]{2,15}))(?:\:[0-9]+)*(?:/?(?:[a-zA-Z0-9\.\,\?'\\+&%\$#\=~_\-]+))*)" # Send module state to logs self.redis_logger.info(f"Module {self.module_name} initialized")
def dns_resolve(url): cached = _cache_get(url, 'dns') if cached is not None: return cached fex = Faup() fex.decode(url) host = fex.get_host().lower() ipv4 = None ipv6 = None if is_ip(host): if ':' in host: try: socket.inet_pton(socket.AF_INET6, host) ipv6 = [host] except: pass else: try: socket.inet_aton(host) ipv4 = [host] except: pass else: try: ipv4 = [str(ip) for ip in dns.resolver.query(host, 'A')] except: logging.debug("No IPv4 address assigned to: " + host) try: ipv6 = [str(ip) for ip in dns.resolver.query(host, 'AAAA')] except: logging.debug("No IPv6 address assigned to: " + host) _cache_set(url, (ipv4, ipv6), 'dns') return ipv4, ipv6
def whois(server, port, domain, ignorelist, replacelist): cached = _cache_get(domain, 'whois') if cached is not None: return cached s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(15) try: s.connect((server, port)) except Exception: print("Connection problems - check WHOIS server") print(("WHOIS request while problem occurred: ", domain)) print(("WHOIS server: {}:{}".format(server, port))) sys.exit(0) if domain.startswith('http'): fex = Faup() fex.decode(domain) d = fex.get_domain().lower() else: d = domain s.send(d + "\r\n") response = '' while True: d = s.recv(4096) response += d if d == '': break s.close() match = re.findall(r'[\w\.-]+@[\w\.-]+', response) emails = process_emails(match, ignorelist, replacelist) if len(emails) == 0: return None list_mail = list(set(emails)) _cache_set(domain, list_mail, 'whois') return list_mail
def __init__(self, misp_url, misp_key, verifycert, config, offline=False, urlsonly=False): self.offline = offline if not self.offline: self.misp = ExpandedPyMISP(misp_url, misp_key, verifycert, debug=config.debug) self.config = config self.urlsonly = urlsonly if not hasattr(self.config, 'enable_dns'): setattr(self.config, 'enable_dns', True) if self.urlsonly is False: setattr(self.config, 'enable_dns', False) self.debug = self.config.debug self.config_from_email_body = {} if not hasattr(self.config, 'ignore_nullsize_attachments'): setattr(self.config, 'ignore_nullsize_attachments', False) self.ignore_nullsize_attachments = self.config.ignore_nullsize_attachments # Init Faup self.f = Faup() self.sightings_to_add = []
def __init__(self): super(Credential, self).__init__() self.faup = Faup() self.regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" self.regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+" self.regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name) # Database config_loader = ConfigLoader.ConfigLoader() self.server_cred = config_loader.get_redis_conn("ARDB_TermCred") self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics") # Config values self.minimumLengthThreshold = config_loader.get_config_int("Credential", "minimumLengthThreshold") self.criticalNumberToAlert = config_loader.get_config_int("Credential", "criticalNumberToAlert") self.max_execution_time = 30 # Waiting time in secondes between to message proccessed self.pending_seconds = 10 # Send module state to logs self.redis_logger.info(f"Module {self.module_name} initialized")
def run(self): i = 0 while (True): i = i + 1 if i % 1000 == 0: time.sleep(10) self.lock.acquire() self.r.switchDB(1) url = self.r.rpop('crawl') self.lock.release() # print url fex = Faup() if url: print "url found: " + url try: fex.decode(url) domain = fex.get_host() entry = self.db.new_domaines.find_one({'domaine': domain}) if entry == None: print "record: " + domain self.db.new_domaines.save({'domaine': domain, 'urls': [url]}) urls_stored = entry['urls'] if not url in urls_stored: urls_stored.append(url) entry['urls'] = urls_stored self.db.new_domaines.save(entry) except: print "parsing fault " + url
def run(self): i = 0 while (True): i = i + 1 if i % 1000 == 0: time.sleep(10) url = self.r.rpop('crawl') fex = Faup() if url: print "url found: " + url fex.decode(url) domain = fex.get_host() entry = self.db.new_domaines.find_one({'domaine': domain}) if entry == None: print "record: " + domain self.db.new_domaines.save({ 'domaine': domain, 'urls': [url] }) urls_stored = entry['urls'] if not url in urls_stored: urls_stored.append(url) entry['urls'] = urls_stored self.db.new_domaines.save(entry)
class Urls(AbstractModule): """ Urls module for AIL framework """ def __init__(self): """ Init Urls """ super(Urls, self).__init__() self.faup = Faup() self.redis_cache_key = regex_helper.generate_redis_cache_key( self.module_name) # Protocol file path protocolsfile_path = os.path.join( os.environ['AIL_HOME'], self.process.config.get("Directories", "protocolsfile")) # Get all uri from protocolsfile (Used for Curve) uri_scheme = "" with open(protocolsfile_path, 'r') as scheme_file: for scheme in scheme_file: uri_scheme += scheme[:-1] + "|" uri_scheme = uri_scheme[:-1] self.url_regex = "((?i:"+uri_scheme + \ ")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:[a-zA-Z]{2,15}))(?:\:[0-9]+)*(?:/?(?:[a-zA-Z0-9\.\,\?'\\+&%\$#\=~_\-]+))*)" # Send module state to logs self.redis_logger.info(f"Module {self.module_name} initialized") def compute(self, message): """ Search for Web links from given message """ # Extract item id, score = message.split() item = Item(id) item_content = item.get_content() l_urls = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.url_regex, item.get_id(), item_content) for url in l_urls: self.faup.decode(url) unpack_url = self.faup.get() to_send = f"{url} {item.get_id()}" print(to_send) self.send_message_to_queue(to_send, 'Url') self.redis_logger.debug(f"url_parsed: {to_send}") if len(l_urls) > 0: to_print = f'Urls;{item.get_source()};{item.get_date()};{item.get_basename()};' self.redis_logger.info( f'{to_print}Detected {len(l_urls)} URL;{item.get_id()}')
def __init__(self): super(LibInjection, self).__init__() self.faup = Faup() config_loader = ConfigLoader() self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics") self.redis_logger.info(f"Module: {self.module_name} Launched")
def harvesting_google(query, numberofpage): listreturn = [] result = Popen(['casperjs', 'CeleryWeb/casperjs/googlesearch.js', str(query), str(numberofpage)], stdout=PIPE) urls = result.stdout.readlines() for url in urls: f = Faup() url=url.replace('\n','') f.decode(url) listreturn.append(f.get()) return listreturn
def get_urls(url, depth=1): if depth > 5: print('Too many redirects.') return fex = Faup() def meta_redirect(content): c = content.lower() soup = BeautifulSoup(c, "html.parser") for result in soup.find_all(attrs={'http-equiv': 'refresh'}): if result: out = result["content"].split(";") if len(out) == 2: wait, text = out a, url = text.split('=', 1) return url.strip() return None resolve, reason = try_resolve(fex, url) if not resolve: # FIXME: inform that the domain does not resolve yield url return logging.debug("Making HTTP connection to " + url) headers = {'User-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0'} try: response = requests.get(url, allow_redirects=True, headers=headers, timeout=15, verify=False) except: # That one can fail (DNS for example) # FIXME: inform that the get failed yield url return if response.history is not None: for h in response.history: # Yeld the urls in the order we find them yield h.url yield response.url meta_redir_url = meta_redirect(response.content) if meta_redir_url is not None: depth += 1 if not meta_redir_url.startswith('http'): fex.decode(url) base = '{}://{}'.format(fex.get_scheme(), fex.get_host()) port = fex.get_port() if port is not None: base += ':{}'.format(port) if not meta_redir_url.startswith('/'): # relative redirect. resource_path has the initial '/' if fex.get_resource_path() is not None: base += fex.get_resource_path() if not base.endswith('/'): base += '/' meta_redir_url = base + meta_redir_url for url in get_urls(meta_redir_url, depth): yield url
class SQLInjectionDetection(AbstractModule): """docstring for SQLInjectionDetection module.""" # # TODO: IMPROVE ME # Reference: https://github.com/stamparm/maltrail/blob/master/core/settings.py SQLI_REGEX = r"information_schema|sysdatabases|sysusers|floor\(rand\(|ORDER BY \d+|\bUNION\s+(ALL\s+)?SELECT\b|\b(UPDATEXML|EXTRACTVALUE)\(|\bCASE[^\w]+WHEN.*THEN\b|\bWAITFOR[^\w]+DELAY\b|\bCONVERT\(|VARCHAR\(|\bCOUNT\(\*\)|\b(pg_)?sleep\(|\bSELECT\b.*\bFROM\b.*\b(WHERE|GROUP|ORDER)\b|\bSELECT \w+ FROM \w+|\b(AND|OR|SELECT)\b.*/\*.*\*/|/\*.*\*/.*\b(AND|OR|SELECT)\b|\b(AND|OR)[^\w]+\d+['\") ]?[=><]['\"( ]?\d+|ODBC;DRIVER|\bINTO\s+(OUT|DUMP)FILE" def __init__(self): super(SQLInjectionDetection, self).__init__() self.faup = Faup() config_loader = ConfigLoader() self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics") self.redis_logger.info(f"Module: {self.module_name} Launched") def compute(self, message): url, id = message.split() if self.is_sql_injection(url): self.faup.decode(url) url_parsed = self.faup.get() item = Item(id) item_id = item.get_id() print(f"Detected SQL in URL: {item_id}") print(urllib.request.unquote(url)) to_print = f'SQLInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}' self.redis_logger.warning(to_print) # Send to duplicate self.send_message_to_queue(item_id, 'Duplicate') # Tag msg = f'infoleak:automatic-detection="sql-injection";{item_id}' self.send_message_to_queue(msg, 'Tags') # statistics tld = url_parsed['tld'] if tld is not None: ## TODO: # FIXME: remove me try: tld = tld.decode() except: pass date = datetime.now().strftime("%Y%m") self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1) # Try to detect if the url passed might be an sql injection by appliying the regex # defined above on it. def is_sql_injection(self, url_parsed): line = urllib.request.unquote(url_parsed) return re.search(SQLInjectionDetection.SQLI_REGEX, line, re.I) is not None
def __post_init__(self): f = Faup( ) # Example code at https://programtalk.com/python-examples-amp/pyfaup.faup.Faup/ f.decode(self.url) self.scheme = f.get_scheme() self.top_level_domain = f.get_tld() self.domain = f.get_domain() self.subdomain = f.get_subdomain() self.path = f.get_resource_path()
def get_urls(url, depth=1): if depth > 5: print('Too many redirects.') return fex = Faup() def meta_redirect(content): c = content.lower() soup = BeautifulSoup(c) for result in soup.find_all(attrs={'http-equiv': 'refresh'}): if result: out = result["content"].split(";") if len(out) == 2: wait, text = out a, url = text.split('=', 1) return url.strip() return None resolve, reason = try_resolve(fex, url) if not resolve: # FIXME: inform that the domain does not resolve yield url return logging.debug("Making HTTP connection to " + url) headers = {'User-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0'} try: response = requests.get(url, allow_redirects=True, headers=headers, timeout=15, verify=False) except: # That one can fail (DNS for example) # FIXME: inform that the get failed yield url return if response.history is not None: for h in response.history: # Yeld the urls in the order we find them yield h.url yield response.url meta_redir_url = meta_redirect(response.content) if meta_redir_url is not None: depth += 1 if not meta_redir_url.startswith('http'): fex.decode(url) base = '{}://{}'.format(fex.get_scheme(), fex.get_host()) port = fex.get_port() if port is not None: base += ':{}'.format(port) if not meta_redir_url.startswith('/'): # relative redirect. resource_path has the initial '/' if fex.get_resource_path() is not None: base += fex.get_resource_path() if not base.endswith('/'): base += '/' meta_redir_url = base + meta_redir_url for url in get_urls(meta_redir_url, depth): yield url
def run(self): i = 0 while True: i = i + 1 if i % 1000 == 0: time.sleep(10) url = self.r.rpop("crawl") fex = Faup() if url: fex.decode(url) domain = fex.get_host() entry = self.db.new_domaines.find_one({"domaine": domain}) if entry == None: print "record: " + domain self.db.new_domaines.save({"domaine": domain, "urls": [url]})
def tld_extract(domain): if "_faup" not in __builtins__: __builtins__["_faup"] = Faup() _faup = __builtins__["_faup"] _faup.decode(domain.decode("utf-8").strip(b".")) return (_faup.get_subdomain() or b"", _faup.get_domain_without_tld() or b"", _faup.get_tld() or b"")
def __post_init__(self): if self.domain is None: f = Faup( ) # Example code at https://programtalk.com/python-examples-amp/pyfaup.faup.Faup/ f.decode(self.address.split("@")[-1]) self.top_level_domain = f.get_tld() self.domain = f.get_domain() self.subdomain = f.get_subdomain()
def process(self): list_domains = self.db['new_domaines'].distinct('domaine') fex = Faup() for domain in list_domains: url = 'http://' + str(domain) fex.decode(url, False) print(fex.get_tld() + ',' + fex.get_domain() + ',' + ','.join(fex.get_subdomain().split('.')[::-1]).replace( 'www', '')).replace(',,', ',')
def dns_resolve(url): cached = _cache_get(url, 'dns') if cached is not None: return cached fex = Faup() fex.decode(url) host = fex.get_host().lower() ipv4 = None ipv6 = None if not is_ip(host): try: ipv4 = [str(ip) for ip in dns.resolver.query(host, 'A')] except: logging.debug("No IPv4 address assigned to: " + host) try: ipv6 = [str(ip) for ip in dns.resolver.query(host, 'AAAA')] except: logging.debug("No IPv6 address assigned to: " + host) _cache_set(url, (ipv4, ipv6), 'dns') return ipv4, ipv6
def is_valid_url(url): cached = _cache_get(url, 'valid') key = date.today().isoformat() + '_submissions' r_cache.zincrby(key, url) if cached is not None: return cached fex = Faup() if url.startswith('hxxp'): url = 'http' + url[4:] elif not url.startswith('http'): url = 'http://' + url logging.debug("Checking validity of URL: " + url) fex.decode(url) scheme = fex.get_scheme() host = fex.get_host() if scheme is None or host is None: reason = "Not a valid http/https URL/URI" return False, url, reason _cache_set(url, (True, url, None), 'valid') return True, url, None
def process(self): list_domains=self.db['new_domaines'].distinct('domaine') fex=Faup() for domain in list_domains: url='http://'+str(domain) fex.decode(url, False) print (fex.get_tld()+','+fex.get_domain()+','+','.join(fex.get_subdomain().split('.')[::-1]).replace('www','')).replace(',,',',')
def run(self): i=0 while(True): i=i+1 if i % 1000==0: time.sleep(10) url=self.r.rpop('crawl') fex=Faup() if url: print "url found: "+url fex.decode(url) domain=fex.get_host() entry = self.db.new_domaines.find_one({'domaine':domain}) if entry== None: print "record: "+ domain self.db.new_domaines.save({'domaine':domain,'urls':[url]}) urls_stored = entry['urls'] if not url in urls_stored: urls_stored.append(url) entry['urls']=urls_stored self.db.new_domaines.save(entry)
def __init__(self): super(WebStats, self).__init__() # Send module state to logs self.redis_logger.info("Module %s initialized" % (self.module_name)) # Sent to the logging a description of the module self.redis_logger.info("Makes statistics about valid URL") self.pending_seconds = 5 * 60 # REDIS # self.r_serv_trend = redis.StrictRedis( host=self.process.config.get("ARDB_Trending", "host"), port=self.process.config.get("ARDB_Trending", "port"), db=self.process.config.get("ARDB_Trending", "db"), decode_responses=True) # FILE CURVE SECTION # self.csv_path_proto = os.path.join( os.environ['AIL_HOME'], self.process.config.get("Directories", "protocolstrending_csv")) self.protocolsfile_path = os.path.join( os.environ['AIL_HOME'], self.process.config.get("Directories", "protocolsfile")) self.csv_path_tld = os.path.join( os.environ['AIL_HOME'], self.process.config.get("Directories", "tldstrending_csv")) self.tldsfile_path = os.path.join( os.environ['AIL_HOME'], self.process.config.get("Directories", "tldsfile")) self.csv_path_domain = os.path.join( os.environ['AIL_HOME'], self.process.config.get("Directories", "domainstrending_csv")) self.faup = Faup() self.generate_new_graph = False
def initialize(self, stormconf, context): super(Urls, self).initialize(stormconf, context) # Faup self.faup = Faup() # Input bolts for Phishing bolt self.input_bolts = set(context["source->stream->grouping"].keys()) # All mails self._mails = {} # Load keywords self._load_lists()
def __init__(self): """ Init Web """ super(Web, self).__init__() # REDIS Cache self.r_serv2 = redis.StrictRedis( host=self.process.config.get("Redis_Cache", "host"), port=self.process.config.getint("Redis_Cache", "port"), db=self.process.config.getint("Redis_Cache", "db"), decode_responses=True) # Country to log as critical self.cc_critical = self.process.config.get("Url", "cc_critical") # FUNCTIONS # self.faup = Faup() # Protocol file path protocolsfile_path = os.path.join(os.environ['AIL_HOME'], self.process.config.get("Directories", "protocolsfile")) # Get all uri from protocolsfile (Used for Curve) uri_scheme = "" with open(protocolsfile_path, 'r') as scheme_file: for scheme in scheme_file: uri_scheme += scheme[:-1]+"|" uri_scheme = uri_scheme[:-1] self.url_regex = "((?i:"+uri_scheme + \ ")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" self.prec_filename = None # Send module state to logs self.redis_logger.info("Module %s initialized" % (self.module_name))
def test_urls_extractor(self): body = """ bla bla https://tweetdeck.twitter.com/random bla bla http://kafka.apache.org/documentation.html http://kafka.apache.org/documentation1.html bla bla bla https://docs.python.org/2/library/re.html bla bla bla bla bla https://docs.python.org/2/library/re_2.html> bla bla <p>https://tweetdeck.twitter.com/random</p> bla bla <p>https://tweetdeck.twitter.com/random_2</p> """ body_unicode_error = """ Return-Path: <> Delivered-To: [email protected] Received: (qmail 15482 invoked from network); 29 Nov 2015 12:28:40 -000 Received: from unknown (HELO 112.149.154.61) (112.149.154.61) by smtp.customers.net with SMTP; 29 Nov 2015 12:28:40 -0000 Received: from unknown (HELO localhost) ([email protected]@110.68.103.81) by 112.149.154.61 with ESMTPA; Sun, 29 Nov 2015 21:29:24 +0900 From: [email protected] To: [email protected] Subject: Gain your male attrctiveness Give satisfaction to your loved one http://contents.xn--90afavbplfx2a6a5b2a.xn--p1ai/ """ parser = Faup() urls = utils.urls_extractor(parser, body) self.assertIsInstance(urls, dict) self.assertIn("apache.org", urls) self.assertIn("python.org", urls) self.assertIn("twitter.com", urls) for i in ("apache.org", "python.org", "twitter.com"): self.assertIsInstance(urls[i], list) self.assertEqual(len(urls[i]), 2) urls = utils.urls_extractor(parser, body_unicode_error) self.assertIsInstance(urls, dict) self.assertIn("xn--90afavbplfx2a6a5b2a.xn--p1ai", urls) self.assertEqual(len(urls["xn--90afavbplfx2a6a5b2a.xn--p1ai"]), 1)
def sort(self, elem_links, url): fex = Faup() f = Filters() f.load() self.r.switchDB(1) extend = True domainfilter = True schemefilter = True try: for link in elem_links: new_url = link self.r.switchDB(2) if not self.r.get(new_url) and new_url: self.r.switchDB(1) if not self.r.get(new_url): fex.decode(new_url) domain = fex.get_host() if f.isfilteredscheme(fex.get_scheme()): self.r.switchDB(2) self.r.put(new_url, new_url) schemefilter = False if f.isfiltereddomains(domain): self.r.switchDB(2) self.r.put(new_url, new_url) domainfilter = False if f.isfilteredextention(fex.get_resource_path()): extend = False self.r.switchDB(2) self.r.put(new_url, new_url) if extend and domainfilter and schemefilter: self.r.switchDB(1) self.r.rpush('crawl', new_url) self.queue.append(new_url) except TypeError as e: print "TypeError"
import time import sys from packages import Paste from pubsublogger import publisher from Helper import Process import re from pyfaup.faup import Faup if __name__ == "__main__": publisher.port = 6380 publisher.channel = "Script" config_section = "Credential" p = Process(config_section) publisher.info("Find credentials") faup = Faup() critical = 8 regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" while True: message = p.get_from_set() if message is None: publisher.debug("Script Credential is Idling 10s") time.sleep(10) continue filepath, count = message.split()
def initialize(self, stormconf, context): super(AbstractUrlsHandlerBolt, self).initialize(stormconf, context) self._load_whitelist() self._parser_faup = Faup()
# dynamically list all Faup's methods methods = [] for m in dir( Faup ): if re.search("^get_", m) : methods.append( m ) methods.remove("get_version") # run # run if len(sys.argv) != 2 : print "%s <file containing 1 url per line>" % sys.argv[0] sys.exit(0) f = Faup() file_urls=codecs.open(sys.argv[1],'r','ascii',errors='ignore') urls=file_urls.readlines() for url in urls: url=url.replace('\n','') print("URL:[%s]" % (url)) f.decode(url) # print("-----> Extracted TLD:%s" % f.get_tld()) # print("-----> Extracted TLD:%s" % f.get_domain_without_tld()) for m in methods: fct = getattr(f, m) print "\t%s : %s" % (re.sub("^get_", "", m), fct())
# Getting the first message from redis. message = p.get_from_set() prec_filename = None max_execution_time = p.config.getint("Onion", "max_execution_time") # send to crawler: activate_crawler = p.config.get("Crawler", "activate_crawler") if activate_crawler == 'True': activate_crawler = True print('Crawler enabled') else: activate_crawler = False print('Crawler disabled') faup = Faup() # Thanks to Faup project for this regex # https://github.com/stricaud/faup url_regex = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" i2p_regex = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" re.compile(url_regex) while True: message = p.get_from_set() if message is not None: print(message) filename, score = message.split() # "For each new paste"
position = new_position email_data = t_email_data # Refang email data email_data = refang(email_data) ## Extract various IOCs urllist = list() urllist += re.findall(urlmarker.WEB_URL_REGEX, email_data) urllist += re.findall(urlmarker.IP_REGEX, email_data) if debug: syslog.syslog(str(urllist)) # Init Faup f = Faup() # Add tags according to configuration for malware in malwaretags: if malware in email_subject.lower(): for tag in malwaretags[malware]: misp.add_tag(new_event, tag) # Extract and add hashes hashlist_md5 = re.findall(hashmarker.MD5_REGEX, email_data) hashlist_sha1 = re.findall(hashmarker.SHA1_REGEX, email_data) hashlist_sha256 = re.findall(hashmarker.SHA256_REGEX, email_data) for h in hashlist_md5: misp.add_hashes(new_event, md5=h) for h in hashlist_sha1:
#!/usr/bin/python from pyfaup.faup import Faup import sys import codecs import binascii f = Faup() file_urls=codecs.open(sys.argv[1],'r','ascii',errors='ignore') urls=file_urls.readlines() for url in urls: url=url.replace('\n','') print("URL:[%s]" % (url)) f.decode(url) print("-----> Extracted TLD:%s" % f.get_tld())
# Add additional tags depending on others for tag in dependingtags: if tag in tlp_tag: for dependingtag in dependingtags[tag]: misp.add_tag(new_event, dependingtag) # Extract IOCs email_data = refang(email_data) urllist = re.findall(urlmarker.WEB_URL_REGEX, email_data) urllist += re.findall(urlmarker.IP_REGEX, email_data) if debug: target.write(str(urllist)) # Init Faup f = Faup() # Add tags according to configuration for malware in malwaretags: if malware in email_subject.lower(): for tag in malwaretags[malware]: misp.add_tag(new_event, tag) # Extract and add hashes hashlist_md5 = re.findall(hashmarker.MD5_REGEX, email_data) hashlist_sha1 = re.findall(hashmarker.SHA1_REGEX, email_data) hashlist_sha256 = re.findall(hashmarker.SHA256_REGEX, email_data) for h in hashlist_md5: misp.add_hashes(new_event, md5=h) for h in hashlist_sha1:
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import pprint from pyfaup.faup import Faup f = Faup() f.decode("www.météo.fr") pprint.pprint(f.get())
# Protocol file path protocolsfile_path = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "protocolsfile")) # Country to log as critical cc_critical = p.config.get("Url", "cc_critical") # FUNCTIONS # publisher.info("Script URL subscribed to channel web_categ") # FIXME For retro compatibility channel = 'web_categ' message = p.get_from_set() prec_filename = None faup = Faup() # Get all uri from protocolsfile (Used for Curve) uri_scheme = "" with open(protocolsfile_path, 'r') as scheme_file: for scheme in scheme_file: uri_scheme += scheme[:-1]+"|" uri_scheme = uri_scheme[:-1] url_regex = "("+uri_scheme+")\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*" while True: if message is not None: filename, score = message.split() if prec_filename is None or filename != prec_filename:
import dns.exception from packages import Paste from packages import lib_refine from pubsublogger import publisher from pyfaup.faup import Faup from Helper import Process if __name__ == "__main__": publisher.port = 6380 publisher.channel = "Script" config_section = 'Mail' faup = Faup() p = Process(config_section) addr_dns = p.config.get("Mail", "dns") # REDIS # r_serv2 = redis.StrictRedis( host=p.config.get("Redis_Cache", "host"), port=p.config.getint("Redis_Cache", "port"), db=p.config.getint("Redis_Cache", "db"), decode_responses=True) # ARDB # server_statistics = redis.StrictRedis( host=p.config.get("ARDB_Statistics", "host"), port=p.config.getint("ARDB_Statistics", "port"), db=p.config.getint("ARDB_Statistics", "db"),
try: urls_file = codecs.open(sys.argv[1], 'r', 'ascii', errors='ignore') except IOError: url_arg = sys.argv[1] if urls_file is None: source_info = "arg:%s" % (sys.argv[1]) else: source_info = "file:%s" % (sys.argv[1]) urlw_log = UrlwLog(source_info) urlw_log.open() urlw_log.custom_log("Starting...") urlw_p = UrlwPlugins(urlw_log) fauplib = Faup() if source_info.startswith("arg:"): fauplib.decode(sys.argv[1]) faup_object = fauplib.get() for plugin in urlw_p.plugins_list: urlw_p.run(plugin, sys.argv[1], faup_object) elif source_info.startswith("file:"): urls = urls_file.readlines() for url in urls: fauplib.decode(url) faup_object = fauplib.get() for plugin in urlw_p.plugins_list: urlw_p.run(plugin, url, faup_object)
# FILE CURVE SECTION # csv_path_proto = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "protocolstrending_csv")) protocolsfile_path = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "protocolsfile")) csv_path_tld = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "tldstrending_csv")) tldsfile_path = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "tldsfile")) csv_path_domain = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "domainstrending_csv")) faup = Faup() generate_new_graph = False # Endless loop getting messages from the input queue while True: # Get one message from the input queue message = p.get_from_set() if message is None: if generate_new_graph: generate_new_graph = False today = datetime.date.today() year = today.year month = today.month print('Building protocol graph') lib_words.create_curve_with_word_file(r_serv_trend, csv_path_proto,
#!/usr/bin/python from pyfaup.faup import Faup url = "http://www.wallinfire.net" f = Faup() print("We decode the url: %s" % (url)) f.decode(url) data = f.get() print("URL TLD: %s" % (data['tld']))
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'core/')) import screen config_loader = ConfigLoader.ConfigLoader() r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") r_cache = config_loader.get_redis_conn("Redis_Cache") config_loader = None # load crawler config config_loader = ConfigLoader.ConfigLoader(config_file='crawlers.cfg') #splash_manager_url = config_loader.get_config_str('Splash_Manager', 'splash_url') #splash_api_key = config_loader.get_config_str('Splash_Manager', 'api_key') config_loader = None faup = Faup() def generate_uuid(): return str(uuid.uuid4()).replace('-', '') def is_valid_onion_domain(domain): if not domain.endswith('.onion'): return False domain = domain.replace('.onion', '', 1) if len(domain) == 16: # v2 address r_onion = r'[a-z0-9]{16}' if re.match(r_onion, domain): return True elif len(domain) == 56: # v3 address
class TestPhishing(unittest.TestCase): faup = Faup() def setUp(self): parser = mailparser.parse_from_file(mail_thug) self.email = parser.mail self.attachments = parser.attachments parser = mailparser.parse_from_file(mail_form) self.email_form = parser.mail body = self.email_form.get("body") self.urls = utils.urls_extractor(body, self.faup) d = { "generic": "conf/keywords/targets.example.yml", "custom": "conf/keywords/targets_english.example.yml" } self.targets = utils.load_keywords_dict(d) d = { "generic": "conf/keywords/subjects.example.yml", "custom": "conf/keywords/subjects_english.example.yml" } self.subjects = utils.load_keywords_list(d) def test_ParserError(self): parser = mailparser.parse_from_file(mail_test_6) body = parser.mail.get("body") flag_form = phishing.check_form(body) self.assertFalse(flag_form) def test_none_values(self): email = copy.deepcopy(self.email) email.pop("body", None) email.pop("subjects", None) email.pop("from", None) phishing.check_phishing(email=email, attachments=self.attachments, urls_body=self.urls, urls_attachments=self.urls, target_keys=self.targets, subject_keys=self.subjects) def test_check_form(self): body = self.email_form.get("body") flag_form = phishing.check_form(body) self.assertTrue(flag_form) body = self.email.get("body") flag_form = phishing.check_form(body) self.assertFalse(flag_form) def test_form_value_error(self): parser = mailparser.parse_from_file(mail_test_5) body = parser.mail.get("body") flag_form = phishing.check_form(body) self.assertFalse(flag_form) def test_check_urls(self): flag = False if any( phishing.check_urls(self.urls, i) for i in self.targets.values()): flag = True self.assertTrue(flag) def test_check_phishing(self): results = phishing.check_phishing(email=self.email, attachments=self.attachments, urls_body=self.urls, urls_attachments=self.urls, target_keys=self.targets, subject_keys=self.subjects) self.assertIsInstance(results, dict) self.assertEqual(results["score"], 123) self.assertIn("filename_attachments", results["score_expanded"]) self.assertIn("mail_subject", results["score_expanded"]) self.assertIn("mail_body", results["score_expanded"]) self.assertIn("mail_from", results["score_expanded"]) self.assertIn("urls_body", results["score_expanded"]) self.assertIn("urls_attachments", results["score_expanded"]) self.assertIn("Test", results["targets"]) self.assertTrue(results["with_phishing"]) def test_check_phishing_form(self): results = phishing.check_phishing(email=self.email_form, attachments=self.attachments, urls_body=self.urls, urls_attachments=self.urls, target_keys=self.targets, subject_keys=self.subjects) self.assertIn("mail_form", results["score_expanded"])
REDIS_KEY_ALL_CRED_SET = 'AllCredentials' REDIS_KEY_ALL_CRED_SET_REV = 'AllCredentialsRev' REDIS_KEY_ALL_PATH_SET = 'AllPath' REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev' REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping' if __name__ == "__main__": publisher.port = 6380 publisher.channel = "Script" config_section = "Credential" p = Process(config_section) publisher.info("Find credentials") minimumLengthThreshold = p.config.getint("Credential", "minimumLengthThreshold") faup = Faup() server_cred = redis.StrictRedis( host=p.config.get("ARDB_TermCred", "host"), port=p.config.get("ARDB_TermCred", "port"), db=p.config.get("ARDB_TermCred", "db"), decode_responses=True) server_statistics = redis.StrictRedis( host=p.config.get("ARDB_Statistics", "host"), port=p.config.getint("ARDB_Statistics", "port"), db=p.config.getint("ARDB_Statistics", "db"), decode_responses=True) criticalNumberToAlert = p.config.getint("Credential", "criticalNumberToAlert") minTopPassList = p.config.getint("Credential", "minTopPassList")
REDIS_KEY_NUM_PATH = 'uniqNumForUsername' REDIS_KEY_ALL_CRED_SET = 'AllCredentials' REDIS_KEY_ALL_CRED_SET_REV = 'AllCredentialsRev' REDIS_KEY_ALL_PATH_SET = 'AllPath' REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev' REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping' if __name__ == "__main__": publisher.port = 6380 publisher.channel = "Script" config_section = "Credential" module_name = "Credential" p = Process(config_section) publisher.info("Find credentials") faup = Faup() regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+" regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" redis_cache_key = regex_helper.generate_redis_cache_key(module_name) while True: message = p.get_from_set() if message is None: publisher.debug("Script Credential is Idling 10s") time.sleep(10) continue
#!/usr/bin/python from pyfaup.faup import Faup import sys import codecs import binascii f = Faup() file_urls=codecs.open(sys.argv[1],'r','ascii',errors='ignore') urls=file_urls.readlines() for url in urls: url=url.replace('\n','') #print("We decode the url: %s" % (url)) #if sys.version.split('.')[0].split('.')[0]=='3': f.decode(bytes(url,'utf-8'), False) #if sys.version.split('.')[0].split('.')[0]=='2': # f.decode(bytes(url),False) #data = f.get() f.get_tld() #f.get_domain() #f.get_subdomain() #print(f.get_tld()) #print(f.get_domain()) #print("URL TLD: %s" % (f.get_tld()))