def fetcher(): """ Main function which fetch the datasets """ while config_db.sismember('modules', module): try: urllib.urlretrieve(url, temp_filename) except: publisher.error('Unable to fetch ' + url) __check_exit() continue drop_file = False """ Check is the file already exists, if the same file is found, the downloaded file is dropped. Else, it is moved in his final directory. """ to_check = glob.glob( os.path.join(old_directory, '*') ) to_check += glob.glob( os.path.join(directory, '*') ) for file in to_check: if filecmp.cmp(temp_filename, file): drop_file = True break if drop_file: os.unlink(temp_filename) publisher.debug('No new file on ' + url) else: os.rename(temp_filename, filename) publisher.info('New file on ' + url) __check_exit() config_db.delete(module + "|" + "fetching")
def launch(): """ Fetch all the whois entry assigned to the server of this :class:`Connector` """ i = 0 while True: try: entry = temp_db.spop(key_ris) if not entry: __disconnect() i = 0 publisher.debug("Disconnected of " + server) time.sleep(sleep_timer) continue if cache_db.get(entry) is None: if not connected: __connect() publisher.debug(server + ", query : " + str(entry)) whois = fetch_whois(entry) if whois != '': cache_db.setex(entry, server + '\n' + unicode(whois, errors="replace"), cache_ttl) if not keepalive: __disconnect() i += 1 if i%10000 == 0: publisher.info(str(temp_db.scard(key_ris)) + ' to process on ' + server) except IOError as text: publisher.error("IOError on " + server + ': ' + str(text)) time.sleep(sleep_timer) __disconnect()
def get_asn_descriptions(self, asn): if not self.has_asnhistory: publisher.debug('ASN History not enabled.') return [datetime.date.today(), 'ASN History not enabled.'] desc_history = self.asnhistory.get_all_descriptions(asn) return [(date.astimezone(tz.tzutc()).date(), descr) for date, descr in desc_history]
def display_listof_pid(r_serv, arg): """Display the pid list from redis This function display infos in the shell about lauched process """ jobs = {} joblist = [] try: for job in r_serv.smembers("pid"): jobs = r_serv.hgetall(job) if jobs != None: start = datetime.strptime(r_serv.hget(job, "startime"), "%Y-%m-%d_%H:%M:%S") end = datetime.strptime(time.strftime("%Y-%m-%d_%H:%M:%S"), "%Y-%m-%d_%H:%M:%S") jobs['uptime'] = str(abs(start - end)) joblist.append(jobs) else: publisher.debug("display_list_of_pid Aborted due to lack of Information in Redis") joblist = sorted(joblist, key=lambda k: k['uptime'], reverse=True) for job in joblist: print format_display_listof_pid(job, arg) if arg == "remain": print "Remaining: {0}".format(r_serv.llen("filelist")) if arg == "processed": print "processed: {0}".format(r_serv.llen("processed")) except TypeError: publisher.error("TypeError for display_listof_pid")
def refining_regex_dataset(r_serv, r_key, regex, min_match, year, month, luhn = True, dnscheck = True): """Refine the "raw dataset" of paste with regulars expressions :param r_serv: -- Redis connexion database :param r_key: -- (str) The name of the key read in redis (often the name of the keywords category list) :param min_match: -- (int) Below this number file are deleted :param regex: -- Regular expression which will be match. This function Refine database created with classify_token_paste function. It opening again the files which matchs the keywords category list, found regular expression inside it and count how many time is found. If there is not too much match about the regular expression the file is deleted from the list. Than it finally merge the result by day to be able to create a bar graph which will represent how many occurence by day the regex match. """ for filename in r_serv.zrange(r_key, 0, -1): with gzip.open(filename, 'rb') as F: var = 0 matchs = set([]) for num, kword in enumerate(F): match = re.findall(regex, kword) var += len(match) for y in match: if y != '' and len(y) < 100: matchs.add(y) # If there is less match than min_match delete it (False pos) if len(matchs) <= min_match : r_serv.zrem(r_key, filename) publisher.debug("{0} deleted".format(filename)) else: # else changing the score. if r_key == "creditcard_categ" and luhn: for card_number in matchs: if is_luhn_valid(card_number): r_serv.zincrby(r_key+'_occur', filename, 1) publisher.info("{1} is valid in the file {0}".format(filename, card_number)) else: publisher.debug("{0} card is invalid".format(card_number)) if r_key == "mails_categ" and dnscheck: r_serv.zadd(r_key+'_occur', checking_MX_record(r_serv, matchs), filename) else: # LUHN NOT TRIGGERED (Other Categs) r_serv.zadd(r_key+'_occur', len(matchs), filename) create_graph_by_day_datastruct(r_serv, r_key, year, month)
def remove_pure_doppelganger(r_serv, nb): """Remove identic paste :param r_serv: -- Redis connexion database :param nb: -- (int) Number of execution wanted Add to a temporary list the hash of wholes files and compare the new hash to the element of this list. If the hash is already inside, the file is deleted otherwise the hash is added in the list. """ hashlist = [] for x in xrange(0,nb): filename = r_serv.lpop("filelist") with open(filename, 'rb') as L: hashline = hashlib.md5(L.read()).hexdigest() print len(hashlist) if hashline in hashlist: os.remove(filename) publisher.debug("{0} removed".format(filename)) print "{0} removed".format(filename) else: hashlist.append(hashline)
def recovering_longlines(r_serv): """Get longlines with linenumbers """ try: for n in xrange(0,nb): filename = r_serv.lpop("longlines") if filename != None: # For each values in redis (longline's line number) for numline in r_serv.smembers(filename): with gzip.open(filename,'rb') as F: for num, line in enumerate(F): #When corresponding. if int(num) == int(numline): pass # TREATMENT else: publisher.debug("Empty list") r_serv.save() break except (KeyboardInterrupt, SystemExit) as e: flush_list_of_pid(r_serv) publisher.debug("Pid list flushed")
def main(): """Main Function""" # CONFIG # cfg = ConfigParser.ConfigParser() cfg.read('./packages/config.cfg') # REDIS # r_serv = redis.StrictRedis( host = cfg.get("Redis_Queues", "host"), port = cfg.getint("Redis_Queues", "port"), db = cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Global" # ZMQ # PubGlob = ZMQ_PubSub.ZMQPub(configfile, "PubSub_Global", "global") # FONCTIONS # publisher.info("Starting to publish.") while True: filename = r_serv.lpop("filelist") if filename != None: msg = cfg.get("PubSub_Global", "channel")+" "+filename PubGlob.send_message(msg) publisher.debug("{0} Published".format(msg)) else: time.sleep(10) publisher.debug("Nothing to publish")
def main(): """Main Function""" # CONFIG # cfg = ConfigParser.ConfigParser() cfg.read(configfile) # REDIS # r_serv = redis.StrictRedis( host = cfg.get("Redis_Queues", "host"), port = cfg.getint("Redis_Queues", "port"), db = cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Script" # ZMQ # channel = cfg.get("PubSub_Longlines", "channel_1") subscriber_name = "tokenize" subscriber_config_section = "PubSub_Longlines" #Publisher publisher_config_section = "PubSub_Words" publisher_name = "pubtokenize" Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) Pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name) channel_0 = cfg.get("PubSub_Words", "channel_0") # FUNCTIONS # publisher.info("Tokeniser subscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_1"))) while True: message = Sub.get_msg_from_queue(r_serv) print message if message != None: PST = P.Paste(message.split(" ",-1)[-1]) else: if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize"): r_serv.srem("SHUTDOWN_FLAGS", "Tokenize") print "Shutdown Flag Up: Terminating" publisher.warning("Shutdown Flag Up: Terminating.") break publisher.debug("Tokeniser is idling 10s") time.sleep(10) print "sleepin" continue for word, score in PST._get_top_words().items(): if len(word) >= 4: msg = channel_0+' '+PST.p_path+' '+str(word)+' '+str(score) Pub.send_message(msg) print msg else: pass
def detect_longline_from_list(r_serv, nb): try: for n in xrange(0,nb): if not dectect_longlines(r_serv, "filelist", True): break except (KeyboardInterrupt, SystemExit) as e: flush_list_of_pid(r_serv) publisher.debug("Pid list flushed")
def main(): publisher.port = 6380 publisher.channel = "Script" config_section = 'DomClassifier' p = Process(config_section) addr_dns = p.config.get("DomClassifier", "dns") publisher.info("""ZMQ DomainClassifier is Running""") c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns]) cc = p.config.get("DomClassifier", "cc") cc_tld = p.config.get("DomClassifier", "cc_tld") while True: try: message = p.get_from_set() if message is not None: PST = Paste.Paste(message) else: publisher.debug("Script DomClassifier is idling 1s") time.sleep(1) continue paste = PST.get_p_content() mimetype = PST._get_p_encoding() if mimetype == "text/plain": c.text(rawtext=paste) c.potentialdomain() c.validdomain(rtype=['A'], extended=True) localizeddomains = c.include(expression=cc_tld) if localizeddomains: print(localizeddomains) publisher.warning( 'DomainC;{};{};{};Checked {} located in {};{}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path)) localizeddomains = c.localizedomain(cc=cc) if localizeddomains: print(localizeddomains) publisher.warning( 'DomainC;{};{};{};Checked {} located in {};{}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) except IOError: print("CRC Checksum Failed on :", PST.p_path) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name))
def test_publisher(self): for i in range(0, 21): if i % 2 == 0: publisher.info('test' + str(i)) elif i % 3 == 0: publisher.warning('test' + str(i)) elif i % 5 == 0: publisher.error('test' + str(i)) elif i % 7 == 0: publisher.critical('test' + str(i)) else: publisher.debug('test' + str(i)) time.sleep(1)
def asn_desc_via_history(self, asn): if self.has_asnhistory: asn_descr = self.asnhistory.get_last_description(asn) if asn_descr is None: # The ASN has no descripion in the database # publisher.error(\ # 'Unable to find the ASN description of {}. ASN History might be down.'.\ # format(asn)) asn_descr = 'No ASN description has been found.' else: publisher.debug('ASN History not enabled.') asn_descr = 'ASN History not enabled.' return asn_descr
def main(): publisher.port = 6380 publisher.channel = "Script" config_section = 'DomClassifier' p = Process(config_section) addr_dns = p.config.get("DomClassifier", "dns") publisher.info("""ZMQ DomainClassifier is Running""") c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns]) cc = p.config.get("DomClassifier", "cc") cc_tld = p.config.get("DomClassifier", "cc_tld") while True: try: message = p.get_from_set() if message is not None: PST = Paste.Paste(message) else: publisher.debug("Script DomClassifier is idling 1s") time.sleep(1) continue paste = PST.get_p_content() mimetype = PST._get_p_encoding() if mimetype == "text/plain": c.text(rawtext=paste) c.potentialdomain() c.validdomain(rtype=['A'], extended=True) localizeddomains = c.include(expression=cc_tld) if localizeddomains: print(localizeddomains) publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path)) localizeddomains = c.localizedomain(cc=cc) if localizeddomains: print(localizeddomains) publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) except IOError: print("CRC Checksum Failed on :", PST.p_path) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name))
def get_TweetRawContent(self): publisher.port = 6380 publisher.channel = 'Script' #publisher.debug("[-Tweet.py-] Requested RAW Content = " + self.p_path) tweetRaw = '' #publisher.debug("[-Tweet.py-] Reading file " + self.p_path) #print("[-Tweet.py-] Reading file " + self.p_path) try: with gzip.open(self.p_path, 'rb') as f: tweetRaw = f.read().decode('utf-8') except Exception as e: publisher.debug("error opening path: "+self.p_path + " with error "+str(e)) paste = 'error opening path: '+self.p_path + ' with error '+str(e) return str(tweetRaw)
def update_running_pids(old_procs): """ Update the list of the running process and return the list """ new_procs = [] for proc in old_procs: if proc.poll() is None and check_pid(proc.pid): publisher.debug(str(proc.pid) + ' is alive') new_procs.append(proc) else: try: publisher.debug(str(proc.pid) + ' is gone') os.kill(proc.pid, signal.SIGKILL) except: # the process is just already gone pass return new_procs
def update_running_pids(old_procs): """ Update the list of the running process and return the list """ new_procs = [] for proc in old_procs: if proc.poll() == None and check_pid(proc.pid): publisher.debug(str(proc.pid) + ' is alive') new_procs.append(proc) else: try: publisher.debug(str(proc.pid) + ' is gone') os.kill(proc.pid, signal.SIGKILL) except: # the process is just already gone pass return new_procs
def launch(): """ Fetch all the whois entry assigned to the server of this :class:`Connector` """ i = 0 while True: try: entry = temp_db.spop(key_ris) if not entry: __disconnect() i = 0 publisher.debug("Disconnected of " + server) time.sleep(sleep_timer) continue if cache_db.get(entry) is None: if not connected: __connect() publisher.debug(server + ", query : " + str(entry)) whois = fetch_whois(entry) if whois != '': cache_db.setex( entry, server + '\n' + unicode(whois, errors="replace"), cache_ttl) if not keepalive: __disconnect() i += 1 if i % 10000 == 0: publisher.info( str(temp_db.scard(key_ris)) + ' to process on ' + server) except IOError as text: publisher.error("IOError on " + server + ': ' + str(text)) publisher.info( str(temp_db.scard(key_ris)) + ' to process on ' + server) time.sleep(sleep_timer) __disconnect() except Exception as e: publisher.error("Error on " + server + ': ' + str(e)) publisher.info( str(temp_db.scard(key_ris)) + ' to process on ' + server) time.sleep(sleep_timer) __disconnect()
def redis_words_ranking(pipe, r_serv, nb, minlength, maxlength): """Looping function :param pipe: -- Redis pipe. :param nb: -- (int) Number of pastes proceeded by function :param minlength: -- (int) passed to the next function :param maxlength: -- (int) passed to the next function """ try: for n in xrange(0,nb): path = r_serv.lpop("filelist") if path != None: set_listof_pid(r_serv, path, sys.argv[0]) redis_zincr_words(pipe, path, minlength, maxlength) update_listof_pid(r_serv) r_serv.lpush("processed",path) publisher.debug(path) else: publisher.debug("Empty list") break except (KeyboardInterrupt, SystemExit) as e: flush_list_of_pid(r_serv) publisher.debug("Pid list flushed")
def search_phone(message): paste = Paste.Paste(message) content = paste.get_p_content() # regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required) reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})') # list of the regex results in the Paste, may be null results = reg_phone.findall(content) # if the list is greater than 4, we consider the Paste may contain a list of phone numbers if len(results) > 4 : print results publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name)) if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # Port of the redis instance used by pubsublogger publisher.port = 6380 # Script is the default channel used for the modules. publisher.channel = 'Script' # Section name in bin/packages/modules.cfg config_section = 'Phone' # Setup the I/O queues p = Process(config_section) # Sent to the logging a description of the module publisher.info("Run Phone module") # Endless loop getting messages from the input queue while True: # Get one message from the input queue message = p.get_from_set() if message is None: publisher.debug("{} queue is empty, waiting".format(config_section)) time.sleep(1) continue # Do something with the message from the queue search_phone(message)
def translateTweet(self,sentence,from_lang): publisher.debug("[-Tweet.py-] (translateTweet) Request from "+from_lang.upper()) re.sub("#|@|&","",sentence) cfgTM = configparser.ConfigParser() cfgTM.read(TMconfigfile) emailforTranslation = cfgTM.get("TwitterAnalyzer", "email_for_translation") api_url = "http://mymemory.translated.net/api/get?q={}&langpair={}|{}&de={}".format(sentence,from_lang.upper(),"EN",emailforTranslation) hdrs = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Accept-Encoding': 'none', 'Accept-Language': 'en-US,en;q=0.8', 'Connection': 'keep-alive'} publisher.debug("[-Tweet.py-] (translateTweet) Request url="+api_url) response = requests.get(api_url, headers=hdrs) response_json = json.loads(response.text) translation = response_json["responseData"]["translatedText"] return translation
def dectect_longlines(r_serv, r_key, store = False, maxlength = 500): """Store longlines's linenumbers in redis :param r_serv: -- The redis connexion database :param r_key: -- (str) The key name in redis :param store: -- (bool) Store the line numbers or not. :param maxlength: -- The limit between "short lines" and "long lines" This function connect to a redis list of filename (pastes filename); Open the paste and check inside if there is some line with their length >= to maxlength. If yes, the paste is "tagged" as containing a longlines in another redis structures, and the linenumber (of the long lines) can be stored in addition if the argument store is at True. """ try: while True: #r_key_list (categ) filename = r_serv.lpop(r_key) if filename != None: set_listof_pid(r_serv, filename, sys.argv[0]) # for each pastes with gzip.open(filename, 'rb') as F: var = True for num, line in enumerate(F): if len(line) >= maxlength: #publisher.debug("Longline:{0}".format(line)) if var: r_serv.rpush("longlines", filename) var = False if store: r_serv.sadd(filename, num) else: publisher.debug("Line numbers of longlines not stored") update_listof_pid(r_serv) else: publisher.debug("Empty list") return False break except (KeyboardInterrupt, SystemExit) as e: flush_list_of_pid(r_serv) publisher.debug("Pid list flushed")
args = parser.parse_args() interval_first = args.firstdate interval_last = args.lastdate if interval_last is None: daemon = True else: daemon = False unavailable = [] while 1: got_new_files = False if daemon or interval_last is None: interval_last = datetime.date.today().strftime("%Y-%m-%d") for fname, url in to_download(): if not already_downloaded(fname) and url not in unavailable: publisher.debug("Trying to download: " + url) if downloadURL(url, fname): got_new_files = True publisher.info("Downloaded:" + fname) elif interval_last != datetime.date.today().strftime( "%Y-%m-%d"): # if today's file is not available, try again later. unavailable.append(url) if not got_new_files: publisher.info('No new files to download.') if not daemon: publisher.info('Exiting...') break time.sleep(3600)
def checking_MX_record(r_serv, adress_set): """Check if emails MX domains are responding. :param r_serv: -- Redis connexion database :param adress_set: -- (set) This is a set of emails adress :return: (int) Number of adress with a responding and valid MX domains This function will split the email adress and try to resolve their domains names: on [email protected] it will try to resolve gmail.com """ score = 0 num = len(adress_set) WalidMX = set([]) # Transforming the set into a string MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower()) resolver = dns.resolver.Resolver() resolver.nameservers = ['149.13.33.69'] resolver.timeout = 5 resolver.lifetime = 2 if MXdomains != []: for MXdomain in set(MXdomains): try: # Already in Redis living. if r_serv.exists(MXdomain[1:]): score += 1 WalidMX.add(MXdomain[1:]) # Not already in Redis else: # If I'm Walid MX domain if resolver.query(MXdomain[1:], rdtype=dns.rdatatype.MX): # Gonna be added in redis. r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) score += 1 WalidMX.add(MXdomain[1:]) else: pass except dns.resolver.NoNameservers: publisher.debug('NoNameserver, No non-broken nameservers are available to answer the query.') except dns.resolver.NoAnswer: publisher.debug('NoAnswer, The response did not contain an answer to the question.') except dns.name.EmptyLabel: publisher.debug('SyntaxError: EmptyLabel') except dns.resolver.NXDOMAIN: r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) publisher.debug('The query name does not exist.') except dns.name.LabelTooLong: publisher.debug('The Label is too long') except dns.resolver.Timeout: r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) except Exception as e: print e publisher.debug("emails before: {0} after: {1} (valid)".format(num, score)) return (num, WalidMX)
if not exists_in(indexpath): ix = create_in(indexpath, schema) else: ix = open_dir(indexpath) # LOGGING # publisher.info("ZMQ Indexer is Running") while True: try: message = p.get_from_set() if message is not None: PST = Paste.Paste(message) else: publisher.debug("Script Indexer is idling 1s") time.sleep(1) continue docpath = message.split(" ", -1)[-1] paste = PST.get_p_content() print "Indexing :", docpath if indexertype == "whoosh": indexwriter = ix.writer() indexwriter.update_document( title=unicode(docpath, errors='ignore'), path=unicode(docpath, errors='ignore'), content=unicode(paste, errors='ignore')) indexwriter.commit() except IOError: print "CRC Checksum Failed on :", PST.p_path publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
if message is not None: filename, score = message.split() if prec_filename is None or filename != prec_filename: PST = Paste.Paste(filename) MX_values = lib_refine.checking_MX_record( r_serv2, PST.get_regex(email_regex)) if MX_values[0] >= 1: PST.__setattr__(channel, MX_values) PST.save_attribute_redis(channel, (MX_values[0], list(MX_values[1]))) pprint.pprint(MX_values) to_print = 'Mails;{};{};{};Checked {} e-mail(s)'.\ format(PST.p_source, PST.p_date, PST.p_name, MX_values[0]) if MX_values[0] > is_critical: publisher.warning(to_print) else: publisher.info(to_print) prec_filename = filename else: publisher.debug("Script Mails is Idling 10s") print 'Sleeping' time.sleep(10) message = p.get_from_set()
port=p.config.getint("ARDB_Statistics", "port"), db=p.config.getint("ARDB_Statistics", "db"), decode_responses=True) criticalNumberToAlert = p.config.getint("Credential", "criticalNumberToAlert") minTopPassList = p.config.getint("Credential", "minTopPassList") regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+" regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" while True: message = p.get_from_set() if message is None: publisher.debug("Script Credential is Idling 10s") #print('sleeping 10s') time.sleep(10) continue filepath, count = message.split(' ') paste = Paste.Paste(filepath) content = paste.get_p_content() creds = set(re.findall(regex_cred, content)) if len(creds) == 0: continue sites= re.findall(regex_web, content) #Use to count occurences sites_set = set(re.findall(regex_web, content))
p = Process(config_section) # port generated automatically depending on the date curYear = datetime.now().year server = redis.StrictRedis( host=p.config.get("ARDB_DB", "host"), port=p.config.get("ARDB_DB", "port"), db=curYear, decode_responses=True) # FUNCTIONS # publisher.info("Script duplicate started") while True: message = p.get_from_set() if message is not None: module_name, p_path = message.split(';') print("new alert : {}".format(module_name)) #PST = Paste.Paste(p_path) else: publisher.debug("Script Attribute is idling 10s") time.sleep(10) continue # Add in redis for browseWarningPaste # Format in set: WARNING_moduleName -> p_path key = "WARNING_" + module_name server.sadd(key, p_path) publisher.info('Saved warning paste {}'.format(p_path))
# set number of files to submit r_serv_log_submit.set(uuid + ':nb_total', len(files.children)) n = 1 for child in files.children: if verify_extention_filename(child.filename.decode()): create_paste(uuid, child.contents, ltags, ltagsgalaxies, uuid+'_'+ str(n) ) n = n + 1 else: print('bad extention') addError(uuid, 'Bad file extension: {}'.format(child.filename.decode()) ) except FileNotFoundError: print('file not found') addError(uuid, 'File not found: {}'.format(file_full_path), uuid ) remove_submit_uuid(uuid) # textarea input paste else: r_serv_log_submit.set(uuid + ':nb_total', 1) create_paste(uuid, paste_content.encode(), ltags, ltagsgalaxies, uuid) remove_submit_uuid(uuid) time.sleep(0.5) # wait for paste else: publisher.debug("Script submit_paste is Idling 10s") time.sleep(3)
p = Process(config_section) max_execution_time = p.config.getint("Curve", "max_execution_time") publisher.info("Release scripts to find release names") movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+" tv = "[a-zA-Z0-9.]+\.S[0-9]{2}E[0-9]{2}.[a-zA-Z0-9.]+\.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+" xxx = "[a-zA-Z0-9._]+.XXX.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+" regexs = [movie, tv, xxx] regex = '|'.join(regexs) while True: signal.alarm(max_execution_time) filepath = p.get_from_set() if filepath is None: publisher.debug("Script Release is Idling 10s") print('Sleeping') time.sleep(10) continue paste = Paste.Paste(filepath) content = paste.get_p_content() #signal.alarm(max_execution_time) try: releases = set(re.findall(regex, content)) if len(releases) == 0: continue to_print = 'Release;{};{};{};{} releases;{}'.format( paste.p_source, paste.p_date, paste.p_name, len(releases),
publisher.info("Script duplicate started") while True: try: hash_dico = {} dupl = set() dico_range_list = [] x = time.time() message = p.get_from_set() if message is not None: path = message PST = Paste.Paste(path) else: publisher.debug("Script Attribute is idling 10s") print('sleeping') time.sleep(10) continue # the paste is too small if (PST._get_p_size() < min_paste_size): continue PST._set_p_hash_kind("ssdeep") PST._set_p_hash_kind("tlsh") # Assignate the correct redis connexion r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month] # Creating the dico name: yyyymm
publisher.port = 6380 publisher.channel = "Script" config_section = 'ApiKey' p = Process(config_section) publisher.info("ApiKey started") message = p.get_from_set() # TODO improve REGEX regex_aws_access_key = re.compile( r'(?<![A-Z0-9])=[A-Z0-9]{20}(?![A-Z0-9])') regex_aws_secret_key = re.compile( r'(?<!=[A-Za-z0-9+])=[A-Za-z0-9+]{40}(?![A-Za-z0-9+])') regex_google_api_key = re.compile(r'=AIza[0-9a-zA-Z-_]{35}') while True: message = p.get_from_set() if message is not None: search_api_key(message) else: publisher.debug("Script ApiKey is Idling 10s") time.sleep(10)
while True: if message is not None: filename, score = message.split() if prec_filename is None or filename != prec_filename: domains_list = [] PST = Paste.Paste(filename) client = ip2asn() for x in PST.get_regex(url_regex): scheme, credential, subdomain, domain, host, tld, \ port, resource_path, query_string, f1, f2, f3, \ f4 = x domains_list.append(domain) p.populate_set_out(x, 'Url') publisher.debug('{} Published'.format(x)) if f1 == "onion": print domain hostl = unicode(subdomain+domain) try: socket.setdefaulttimeout(2) ip = socket.gethostbyname(unicode(hostl)) except: # If the resolver is not giving any IPv4 address, # ASN/CC lookup is skip. continue try: l = client.lookup(ip, qType='IP')
if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # Port of the redis instance used by pubsublogger publisher.port = 6380 # Script is the default channel used for the modules. publisher.channel = 'Script' # Section name in bin/packages/modules.cfg config_section = 'Keys' # Setup the I/O queues p = Process(config_section) # Sent to the logging a description of the module publisher.info("Run Keys module ") # Endless loop getting messages from the input queue while True: # Get one message from the input queue message = p.get_from_set() if message is None: publisher.debug("{} queue is empty, waiting".format(config_section)) time.sleep(1) continue # Do something with the message from the queue paste = Paste.Paste(message) search_key(paste) # (Optional) Send that thing to the next queue
publisher.info("Find credentials") faup = Faup() regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+" regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" redis_cache_key = regex_helper.generate_redis_cache_key(module_name) while True: message = p.get_from_set() if message is None: publisher.debug("Script Credential is Idling 10s") time.sleep(10) continue item_id, count = message.split() item_content = Item.get_item_content(item_id) # Extract all credentials all_credentials = regex_helper.regex_findall( module_name, redis_cache_key, regex_cred, item_id, item_content, max_time=max_execution_time)
def checking_A_record(r_serv, domains_set): score = 0 num = len(domains_set) WalidA = set([]) resolver = dns.resolver.Resolver() resolver.nameservers = ['149.13.33.69'] resolver.timeout = 5 resolver.lifetime = 2 for Adomain in domains_set: try: # Already in Redis living. if r_serv.exists(Adomain): score += 1 WalidA.add(Adomain) # Not already in Redis else: # If I'm Walid domain if resolver.query(Adomain, rdtype=dns.rdatatype.A): # Gonna be added in redis. r_serv.setex(Adomain, 1, timedelta(days=1)) score += 1 WalidA.add(Adomain) else: pass except dns.resolver.NoNameservers: publisher.debug('NoNameserver, No non-broken nameservers are available to answer the query.') except dns.resolver.NoAnswer: publisher.debug('NoAnswer, The response did not contain an answer to the question.') except dns.name.EmptyLabel: publisher.debug('SyntaxError: EmptyLabel') except dns.resolver.NXDOMAIN: r_serv.setex(Adomain[1:], 1, timedelta(days=1)) publisher.debug('The query name does not exist.') except dns.name.LabelTooLong: publisher.debug('The Label is too long') except Exception as e: print e publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score)) return (num, WalidA)
categories = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential'] tmp_dict = {} for filename in categories: bname = os.path.basename(filename) tmp_dict[bname] = [] with open(os.path.join(args.d, filename), 'r') as f: patterns = [r'%s' % re.escape(s.strip()) for s in f] tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE) prec_filename = None while True: filename = p.get_from_set() if filename is None: publisher.debug("Script Categ is Idling 10s") print 'Sleeping' time.sleep(10) continue paste = Paste.Paste(filename) content = paste.get_p_content() for categ, pattern in tmp_dict.items(): found = set(re.findall(pattern, content)) if len(found) > 0: msg = '{} {}'.format(paste.p_path, len(found)) print msg, categ p.populate_set_out(msg, categ) publisher.info(
if not exists_in(indexpath): ix = create_in(indexpath, schema) else: ix = open_dir(indexpath) # LOGGING # publisher.info("ZMQ Indexer is Running") while True: try: message = p.get_from_set() if message is not None: PST = Paste.Paste(message) else: publisher.debug("Script Indexer is idling 1s") time.sleep(1) continue docpath = message.split(" ", -1)[-1] paste = PST.get_p_content() print "Indexing :", docpath if indexertype == "whoosh": indexwriter = ix.writer() indexwriter.update_document(title=unicode(docpath, errors='ignore'), path=unicode(docpath, errors='ignore'), content=unicode(paste, errors='ignore')) indexwriter.commit() except IOError:
config_section = 'SQLInjectionDetection' # Setup the I/O queues p = Process(config_section) # Sent to the logging a description of the module publisher.info("Try to detect SQL injection") server_statistics = redis.StrictRedis( host=p.config.get("ARDB_Statistics", "host"), port=p.config.getint("ARDB_Statistics", "port"), db=p.config.getint("ARDB_Statistics", "db"), decode_responses=True) faup = Faup() # Endless loop getting messages from the input queue while True: # Get one message from the input queue message = p.get_from_set() if message is None: publisher.debug("{} queue is empty, waiting".format(config_section)) time.sleep(10) continue else: # Do something with the message from the queue url, date, path = message.split() analyse(url, path)
def main(): """Main Function""" # CONFIG # cfg = ConfigParser.ConfigParser() cfg.read(configfile) # Redis r_serv1 = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Script" # ZMQ # # Subscriber channel = cfg.get("PubSub_Global", "channel") subscriber_name = "DomainClassifier" subscriber_config_section = "PubSub_Global" cc = cfg.get("PubSub_DomainClassifier", "cc") cc_tld = cfg.get("PubSub_DomainClassifier", "cc_tld") sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) # FUNCTIONS # publisher.info("""ZMQ DomainClassifier is Running""") c = DomainClassifier.domainclassifier.Extract(rawtext="") while True: try: message = sub.get_msg_from_queue(r_serv1) if message is not None: PST = Paste.Paste(message.split(" ", -1)[-1]) else: if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"): r_serv1.srem("SHUTDOWN_FLAGS", "Indexer") publisher.warning("Shutdown Flag Up: Terminating.") break publisher.debug("Script DomainClassifier is idling 10s") time.sleep(1) continue docpath = message.split(" ", -1)[-1] paste = PST.get_p_content() mimetype = PST._get_p_encoding() if mimetype == "text/plain": c.text(rawtext=paste) c.potentialdomain() c.validdomain(rtype=['A'], extended=True) localizeddomains = c.include(expression=cc_tld) if localizeddomains: print(localizeddomains) publisher.warning( 'DomainC;{};{};{};Checked {} located in {}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld)) localizeddomains = c.localizedomain(cc=cc) if localizeddomains: print(localizeddomains) publisher.warning( 'DomainC;{};{};{};Checked {} located in {}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc)) except IOError: print "CRC Checksum Failed on :", PST.p_path publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name)) pass
set_name = 'regex_' + dico_regexname_to_redis[regex_str] new_to_the_set = server_term.sadd(set_name, filename) new_to_the_set = True if new_to_the_set == 1 else False #consider the num of occurence of this term regex_value = int( server_term.hincrby( timestamp, dico_regexname_to_redis[regex_str], int(1))) #1 term per paste if new_to_the_set: regex_value_perPaste = int( server_term.hincrby( "per_paste_" + str(timestamp), dico_regexname_to_redis[regex_str], int(1))) server_term.zincrby( "per_paste_" + curr_set, dico_regexname_to_redis[regex_str], float(1)) server_term.zincrby(curr_set, dico_regexname_to_redis[regex_str], float(1)) else: pass else: publisher.debug("Script RegexForTermsFrequency is Idling") print "sleeping" time.sleep(5) message = p.get_from_set()
args = parser.parse_args() interval_first = args.firstdate interval_last = args.lastdate if interval_last is None: daemon = True else: daemon = False unavailable = [] while 1: got_new_files = False if daemon or interval_last is None: interval_last = datetime.date.today().strftime("%Y-%m-%d") for fname, url in to_download(): if not already_downloaded(fname) and url not in unavailable: publisher.debug("Trying to download: " + url) if downloadURL(url, fname): got_new_files = True publisher.info("Downloaded:" + fname) elif interval_last != datetime.date.today().strftime("%Y-%m-%d"): # if today's file is not available, try again later. unavailable.append(url) if not got_new_files: publisher.info('No new files to download.') if not daemon: publisher.info('Exiting...') break time.sleep(3600)
while True: if message is not None: generate_new_graph = True filename, word, score = message.split() temp = filename.split('/') date = temp[-4] + temp[-3] + temp[-2] low_word = word.lower() prev_score = r_serv1.hget(low_word, date) if prev_score is not None: r_serv1.hset(low_word, date, int(prev_score) + int(score)) else: r_serv1.hset(low_word, date, score) else: if generate_new_graph: generate_new_graph = False print 'Building graph' today = datetime.date.today() year = today.year month = today.month lib_words.create_curve_with_word_file(r_serv1, csv_path, wordfile_path, year, month) publisher.debug("Script Curve is Idling") print "sleeping" time.sleep(10) message = p.get_from_set()
def get_ip_info(self, ip, days_limit=None): """ Return informations related to an IP address. :param ip: The IP address :param days_limit: The number of days we want to check in the past (default: around 2 years) :rtype: Dictionary .. note:: Format of the output: .. code-block:: python { 'ip': ip, 'days_limit' : days_limit, 'ptrrecord' : 'ptr.record.com', 'history': [ { 'asn': asn, 'interval': [first, last], 'block': block, 'timestamp': timestamp, 'descriptions': [ [date, descr], ... ] }, ... ] } """ if days_limit is None: days_limit = 750 to_return = {'ip': ip, 'days_limit': days_limit, 'history': []} if self.has_ptr: to_return['ptrrecord'] = self.get_ptr_record(ip) if not self.has_ipasn: publisher.debug('IPASN not enabled.') to_return['error'] = 'IPASN not enabled.' return to_return if not ip: to_return['error'] = 'No IP provided.' return to_return for first, last, asn, block in self.ipasn.aggregate_history( ip, days_limit): first_date = parser.parse(first).replace(tzinfo=tz.tzutc()).date() last_date = parser.parse(last).replace(tzinfo=tz.tzutc()).date() if self.has_asnhistory: desc_history = self.asnhistory.get_all_descriptions(asn) valid_descriptions = [] for date, descr in desc_history: date = date.astimezone(tz.tzutc()).date() test_date = date - datetime.timedelta(days=1) if last_date < test_date: # Too new continue elif last_date >= test_date and first_date <= test_date: # Changes within the interval valid_descriptions.append([date.isoformat(), descr]) elif first_date > test_date: # get the most recent change befrore the interval valid_descriptions.append([date.isoformat(), descr]) break else: publisher.debug('ASN History not enabled.') valid_descriptions = [ datetime.date.today().isoformat(), 'ASN History not enabled.' ] if len(valid_descriptions) == 0: if len(desc_history) != 0: # fallback, use the oldest description. date = desc_history[-1][0].astimezone(tz.tzutc()).date() descr = desc_history[-1][1] valid_descriptions.append([date.isoformat(), descr]) else: # No history found for this ASN if last_date > datetime.date(2013, 1, 1): # ASN has been seen recently, should not happen # as the asn history module is running since early 2013 publisher.error( 'Unable to find the ASN description of {}. IP address: {}. ASN History might be down.' .format(asn, ip)) valid_descriptions.append( ['0000-00-00', 'No ASN description has been found.']) entry = {} entry['asn'] = asn entry['interval'] = [first_date.isoformat(), last_date.isoformat()] entry['block'] = block entry['timestamp'] = self.get_first_seen(asn, block) entry['descriptions'] = valid_descriptions to_return['history'].append(entry) return to_return
def checking_MX_record(r_serv, adress_set): """Check if emails MX domains are responding. :param r_serv: -- Redis connexion database :param adress_set: -- (set) This is a set of emails adress :return: (int) Number of adress with a responding and valid MX domains This function will split the email adress and try to resolve their domains names: on [email protected] it will try to resolve gmail.com """ score = 0 num = len(adress_set) WalidMX = set([]) # Transforming the set into a string MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower()) if MXdomains != []: for MXdomain in set(MXdomains): try: # Already in Redis living. if r_serv.exists(MXdomain[1:]): score += 1 WalidMX.add(MXdomain[1:]) # Not already in Redis else: # If I'm Walid MX domain if dns.resolver.query(MXdomain[1:], rdtype=dns.rdatatype.MX): # Gonna be added in redis. r_serv.setex(MXdomain[1:], timedelta(days=1), 1) score += 1 WalidMX.add(MXdomain[1:]) else: pass except dns.resolver.NoNameservers: publisher.debug( 'NoNameserver, No non-broken nameservers are available to answer the query.' ) except dns.resolver.NoAnswer: publisher.debug( 'NoAnswer, The response did not contain an answer to the question.' ) except dns.name.EmptyLabel: publisher.debug('SyntaxError: EmptyLabel') except dns.resolver.NXDOMAIN: publisher.debug('The query name does not exist.') except dns.name.LabelTooLong: publisher.debug('The Label is too long') finally: pass publisher.debug("emails before: {0} after: {1} (valid)".format(num, score)) return (num, WalidMX)
# Saving the list of extracted onion domains. PST.__setattr__(channel, domains_list) PST.save_attribute_redis(channel, domains_list) to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name) if len(domains_list) > 0: publisher.warning('{}Detected {} .onion(s)'.format( to_print, len(domains_list))) now = datetime.datetime.now() path = os.path.join( 'onions', str(now.year).zfill(4), str(now.month).zfill(2), str(now.day).zfill(2), str(int(time.mktime(now.utctimetuple())))) to_print = 'Onion;{};{};{};'.format( PST.p_source, PST.p_date, PST.p_name) for url in fetch(p, r_cache, urls, domains_list, path): publisher.warning('{}Checked {}'.format(to_print, url)) else: publisher.info('{}Onion related'.format(to_print)) prec_filename = filename else: publisher.debug("Script url is Idling 10s") print 'Sleeping' time.sleep(10) message = p.get_from_set()
def checking_A_record(r_serv, domains_set): configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') if not os.path.exists(configfile): raise Exception('Unable to find the configuration file. \ Did you set environment variables? \ Or activate the virtualenv.') cfg = configparser.ConfigParser() cfg.read(configfile) dns_server = cfg.get("Web", "dns") score = 0 num = len(domains_set) WalidA = set([]) resolver = dns.resolver.Resolver() resolver.nameservers = [dns_server] resolver.timeout = 5 resolver.lifetime = 2 for Adomain in domains_set: try: # Already in Redis living. if r_serv.exists(Adomain): score += 1 WalidA.add(Adomain) # Not already in Redis else: # If I'm Walid domain if resolver.query(Adomain, rdtype=dns.rdatatype.A): # Gonna be added in redis. r_serv.setex(Adomain, 1, timedelta(days=1)) score += 1 WalidA.add(Adomain) else: pass except dns.resolver.NoNameservers: publisher.debug( 'NoNameserver, No non-broken nameservers are available to answer the query.' ) except dns.resolver.NoAnswer: publisher.debug( 'NoAnswer, The response did not contain an answer to the question.' ) except dns.name.EmptyLabel: publisher.debug('SyntaxError: EmptyLabel') except dns.resolver.NXDOMAIN: r_serv.setex(Adomain[1:], 1, timedelta(days=1)) publisher.debug('The query name does not exist.') except dns.name.LabelTooLong: publisher.debug('The Label is too long') except Exception as e: print(e) publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score)) return (num, WalidA)
def checking_A_record(r_serv, domains_set): score = 0 num = len(domains_set) WalidA = set([]) for Adomain in domains_set: try: # Already in Redis living. if r_serv.exists(Adomain): score += 1 WalidA.add(Adomain) # Not already in Redis else: # If I'm Walid domain if dns.resolver.query(Adomain, rdtype=dns.rdatatype.A): # Gonna be added in redis. r_serv.setex(Adomain, timedelta(days=1), 1) score += 1 WalidA.add(Adomain) else: pass except dns.resolver.NoNameservers: publisher.debug( 'NoNameserver, No non-broken nameservers are available to answer the query.' ) except dns.resolver.NoAnswer: publisher.debug( 'NoAnswer, The response did not contain an answer to the question.' ) except dns.name.EmptyLabel: publisher.debug('SyntaxError: EmptyLabel') except dns.resolver.NXDOMAIN: publisher.debug('The query name does not exist.') except dns.name.LabelTooLong: publisher.debug('The Label is too long') finally: pass publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score)) return (num, WalidA)
def checking_MX_record(r_serv, adress_set, addr_dns): """Check if emails MX domains are responding. :param r_serv: -- Redis connexion database :param adress_set: -- (set) This is a set of emails adress :param adress_set: -- (str) This is a server dns address :return: (int) Number of adress with a responding and valid MX domains This function will split the email adress and try to resolve their domains names: on [email protected] it will try to resolve gmail.com """ #remove duplicate adress_set = list(set(adress_set)) score = 0 num = len(adress_set) WalidMX = set([]) validMX = {} # Transforming the set into a string MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower()) resolver = dns.resolver.Resolver() resolver.nameservers = [addr_dns] resolver.timeout = 5 resolver.lifetime = 2 if MXdomains != []: for MXdomain in MXdomains: try: MXdomain = MXdomain[1:] # Already in Redis living. if r_serv.exists(MXdomain): score += 1 WalidMX.add(MXdomain) validMX[MXdomain] = validMX.get(MXdomain, 0) + 1 # Not already in Redis else: # If I'm Walid MX domain if resolver.query(MXdomain, rdtype=dns.rdatatype.MX): # Gonna be added in redis. r_serv.setex(MXdomain, 1, timedelta(days=1)) score += 1 WalidMX.add(MXdomain) validMX[MXdomain] = validMX.get(MXdomain, 0) + 1 else: pass except dns.resolver.NoNameservers: publisher.debug( 'NoNameserver, No non-broken nameservers are available to answer the query.' ) print( 'NoNameserver, No non-broken nameservers are available to answer the query.' ) except dns.resolver.NoAnswer: publisher.debug( 'NoAnswer, The response did not contain an answer to the question.' ) print( 'NoAnswer, The response did not contain an answer to the question.' ) except dns.name.EmptyLabel: publisher.debug('SyntaxError: EmptyLabel') print('SyntaxError: EmptyLabel') except dns.resolver.NXDOMAIN: r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) publisher.debug('The query name does not exist.') print('The query name does not exist.') except dns.name.LabelTooLong: publisher.debug('The Label is too long') print('The Label is too long') except dns.resolver.Timeout: print('timeout') r_serv.setex(MXdomain, 1, timedelta(days=1)) except Exception as e: print(e) publisher.debug("emails before: {0} after: {1} (valid)".format(num, score)) #return (num, WalidMX) return (num, validMX)
'CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve', 'ApiKey' ] tmp_dict = {} for filename in categories: bname = os.path.basename(filename) tmp_dict[bname] = [] with open(os.path.join(args.d, filename), 'r') as f: patterns = [r'%s' % (re.escape(s.strip())) for s in f] tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE) prec_filename = None while True: filename = p.get_from_set() if filename is None: publisher.debug("Script Categ is Idling 10s") print('Sleeping') time.sleep(10) continue paste = Paste.Paste(filename) content = paste.get_p_content() for categ, pattern in tmp_dict.items(): found = set(re.findall(pattern, content)) if len(found) >= matchingThreshold: msg = '{} {}'.format(paste.p_rel_path, len(found)) print(msg, categ) p.populate_set_out(msg, categ)
# FUNCTIONS # publisher.info("""ZMQ Attribute is Running""") while True: try: message = h.redis_rpop() if message is not None: PST = Paste.Paste(message.split(" ", -1)[-1]) else: if h.redis_queue_shutdown(): print "Shutdown Flag Up: Terminating" publisher.warning("Shutdown Flag Up: Terminating.") break publisher.debug("Script Attribute is idling 10s") time.sleep(10) continue # FIXME do it directly in the class PST.save_attribute_redis("p_encoding", PST._get_p_encoding()) PST.save_attribute_redis("p_language", PST._get_p_language()) # FIXME why not all saving everything there. PST.save_all_attributes_redis() # FIXME Not used. PST.store.sadd("Pastes_Objects", PST.p_path) except IOError: print "CRC Checksum Failed on :", PST.p_path publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name))
old=timestamp, new=last_update) publisher.error(msg) continue else: msg = '===== Importing new file: {new} ====='.format(new=timestamp) publisher.info(msg) p = r.pipeline(transaction=False) p.set('last_update', timestamp) p.sadd('all_timestamps', timestamp) new_asns = 0 updated_descrs = 0 for asn, descr in data: all_descrs = r.hgetall(asn) if len(all_descrs) == 0: p.hset(asn, timestamp, descr) publisher.debug('New asn: {asn}'.format(asn=asn)) new_asns += 1 else: dates = sorted(all_descrs.keys()) last_descr = all_descrs[dates[-1]] if descr != last_descr: p.hset(asn, timestamp, descr) msg = 'New description for {asn}. Was {old}, is {new}'.format( asn=asn, old=last_descr, new=descr) publisher.info(msg) updated_descrs += 1 p.execute() msg = '===== Import finished: {new}, new ASNs:{nb}, Updated:{up} ====='.format( new=timestamp, nb=new_asns, up=updated_descrs) publisher.info(msg) if args.not_new:
urls.append(url) # Saving the list of extracted onion domains. PST.__setattr__(channel, domains_list) PST.save_attribute_redis(channel, domains_list) to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name) if len(domains_list) > 0: publisher.warning('{}Detected {} .onion(s)'.format( to_print, len(domains_list))) now = datetime.datetime.now() path = os.path.join('onions', str(now.year).zfill(4), str(now.month).zfill(2), str(now.day).zfill(2), str(int(time.mktime(now.utctimetuple())))) to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name) for url in fetch(p, r_cache, urls, domains_list, path): publisher.warning('{}Checked {}'.format(to_print, url)) else: publisher.info('{}Onion related'.format(to_print)) prec_filename = filename else: publisher.debug("Script url is Idling 10s") print 'Sleeping' time.sleep(10) message = p.get_from_set()
if message is not None: filename, score = message.split() paste = Paste.Paste(filename) content = paste.get_p_content() all_cards = re.findall(regex, content) if len(all_cards) > 0: print 'All matching', all_cards creditcard_set = set([]) for card in all_cards: clean_card = re.sub('[^0-9]', '', card) if lib_refine.is_luhn_valid(clean_card): print clean_card, 'is valid' creditcard_set.add(clean_card) paste.__setattr__(channel, creditcard_set) paste.save_attribute_redis(channel, creditcard_set) pprint.pprint(creditcard_set) to_print = 'CreditCard;{};{};{};'.format( paste.p_source, paste.p_date, paste.p_name) if (len(creditcard_set) > 0): publisher.warning('{}Checked {} valid number(s)'.format( to_print, len(creditcard_set))) else: publisher.info('{}CreditCard related'.format(to_print)) else: publisher.debug("Script creditcard is idling 1m") print 'Sleeping' time.sleep(10)
creditcard_set = set([]) PST = Paste.Paste(filename) for x in PST.get_regex(creditcard_regex): if lib_refine.is_luhn_valid(x): creditcard_set.add(x) PST.__setattr__(channel, creditcard_set) PST.save_attribute_redis(channel, creditcard_set) pprint.pprint(creditcard_set) to_print = 'CreditCard;{};{};{};'.format( PST.p_source, PST.p_date, PST.p_name) if (len(creditcard_set) > 0): publisher.critical('{}Checked {} valid number(s)'.format( to_print, len(creditcard_set))) else: publisher.info('{}CreditCard related'.format(to_print)) prec_filename = filename else: if h.redis_queue_shutdown(): print "Shutdown Flag Up: Terminating" publisher.warning("Shutdown Flag Up: Terminating.") break publisher.debug("Script creditcard is idling 1m") time.sleep(60) message = h.redis_rpop()