def launch(): """ Fetch all the whois entry assigned to the server of this :class:`Connector` """ i = 0 while True: try: entry = temp_db.spop(key_ris) if not entry: __disconnect() i = 0 publisher.debug("Disconnected of " + server) time.sleep(sleep_timer) continue if cache_db.get(entry) is None: if not connected: __connect() publisher.debug(server + ", query : " + str(entry)) whois = fetch_whois(entry) if whois != '': cache_db.setex(entry, server + '\n' + unicode(whois, errors="replace"), cache_ttl) if not keepalive: __disconnect() i += 1 if i%10000 == 0: publisher.info(str(temp_db.scard(key_ris)) + ' to process on ' + server) except IOError as text: publisher.error("IOError on " + server + ': ' + str(text)) time.sleep(sleep_timer) __disconnect()
def display_listof_pid(r_serv, arg): """Display the pid list from redis This function display infos in the shell about lauched process """ jobs = {} joblist = [] try: for job in r_serv.smembers("pid"): jobs = r_serv.hgetall(job) if jobs != None: start = datetime.strptime(r_serv.hget(job, "startime"), "%Y-%m-%d_%H:%M:%S") end = datetime.strptime(time.strftime("%Y-%m-%d_%H:%M:%S"), "%Y-%m-%d_%H:%M:%S") jobs['uptime'] = str(abs(start - end)) joblist.append(jobs) else: publisher.debug("display_list_of_pid Aborted due to lack of Information in Redis") joblist = sorted(joblist, key=lambda k: k['uptime'], reverse=True) for job in joblist: print format_display_listof_pid(job, arg) if arg == "remain": print "Remaining: {0}".format(r_serv.llen("filelist")) if arg == "processed": print "processed: {0}".format(r_serv.llen("processed")) except TypeError: publisher.error("TypeError for display_listof_pid")
def launch_fetcher(module): """ Launch a process which fetch a dataset in a directory """ service_fetcher = os.path.join(services_dir, "fetch_raw_files.py") timer = '3600' if module is None: publisher.error('Unable to start fetching : module is None') return url = config_db.get(module + "|" + "url") if url is None: publisher.info(module + ' does not have an URL, no fetcher.') config_db.set(module + "|" + "fetching", 0) return directory = config_db.get(module + "|" + "home_dir") if directory is not None: subprocess.Popen([ "python", service_fetcher, '-n', module, '-d', directory, '-u', url, '-t', timer ]) config_db.set(module + "|" + "fetching", 1) publisher.info('Fetching of ' + module + 'started.') else: publisher.error('Unable to start fetching of ' + module + \ ': home_dir unknown.') config_db.set(module + "|" + "fetching", 0)
def fetcher(): """ Main function which fetch the datasets """ while config_db.sismember('modules', module): try: urllib.urlretrieve(url, temp_filename) except: publisher.error('Unable to fetch ' + url) __check_exit() continue drop_file = False """ Check is the file already exists, if the same file is found, the downloaded file is dropped. Else, it is moved in his final directory. """ to_check = glob.glob( os.path.join(old_directory, '*') ) to_check += glob.glob( os.path.join(directory, '*') ) for file in to_check: if filecmp.cmp(temp_filename, file): drop_file = True break if drop_file: os.unlink(temp_filename) publisher.debug('No new file on ' + url) else: os.rename(temp_filename, filename) publisher.info('New file on ' + url) __check_exit() config_db.delete(module + "|" + "fetching")
def crawl_onion(url, domain, date, date_month, message): #if not r_onion.sismember('full_onion_up', domain) and not r_onion.sismember('onion_down:'+date , domain): super_father = r_serv_metadata.hget('paste_metadata:'+paste, 'super_father') if super_father is None: super_father=paste retry = True nb_retry = 0 while retry: try: r = requests.get(splash_url , timeout=30.0) retry = False except Exception: # TODO: relaunch docker or send error message nb_retry += 1 if nb_retry == 30: on_error_send_message_back_in_queue(type_hidden_service, domain, message) publisher.error('{} SPASH DOWN'.format(splash_url)) print('--------------------------------------') print(' \033[91m DOCKER SPLASH DOWN\033[0m') print(' {} DOWN'.format(splash_url)) exit(1) print(' \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m') print(' Retry({}) in 10 seconds'.format(nb_retry)) time.sleep(10) if r.status_code == 200: process = subprocess.Popen(["python", './torcrawler/tor_crawler.py', splash_url, type_hidden_service, url, domain, paste, super_father], stdout=subprocess.PIPE) while process.poll() is None: time.sleep(1) if process.returncode == 0: output = process.stdout.read().decode() print(output) # error: splash:Connection to proxy refused if 'Connection to proxy refused' in output: on_error_send_message_back_in_queue(type_hidden_service, domain, message) publisher.error('{} SPASH, PROXY DOWN OR BAD CONFIGURATION'.format(splash_url)) print('------------------------------------------------------------------------') print(' \033[91m SPLASH: Connection to proxy refused') print('') print(' PROXY DOWN OR BAD CONFIGURATION\033[0m'.format(splash_url)) print('------------------------------------------------------------------------') exit(-2) else: print(process.stdout.read()) exit(-1) else: on_error_send_message_back_in_queue(type_hidden_service, domain, message) print('--------------------------------------') print(' \033[91m DOCKER SPLASH DOWN\033[0m') print(' {} DOWN'.format(splash_url)) exit(1)
def __query_logging(ip, user_agent, method, q_ip=None, announce_date=None, days_limit=None, level=None): if level == 'warning': publisher.warning(__csv2string([ip, user_agent, method, q_ip, announce_date, days_limit, level])) elif level == 'error': publisher.error(__csv2string([ip, user_agent, method, q_ip, announce_date, days_limit, level])) else: publisher.info(__csv2string([ip, user_agent, method, q_ip, announce_date, days_limit, level]))
def main(): publisher.port = 6380 publisher.channel = "Script" config_section = 'DomClassifier' p = Process(config_section) addr_dns = p.config.get("DomClassifier", "dns") publisher.info("""ZMQ DomainClassifier is Running""") c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns]) cc = p.config.get("DomClassifier", "cc") cc_tld = p.config.get("DomClassifier", "cc_tld") while True: try: message = p.get_from_set() if message is not None: PST = Paste.Paste(message) else: publisher.debug("Script DomClassifier is idling 1s") time.sleep(1) continue paste = PST.get_p_content() mimetype = PST._get_p_encoding() if mimetype == "text/plain": c.text(rawtext=paste) c.potentialdomain() c.validdomain(rtype=['A'], extended=True) localizeddomains = c.include(expression=cc_tld) if localizeddomains: print(localizeddomains) publisher.warning( 'DomainC;{};{};{};Checked {} located in {};{}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path)) localizeddomains = c.localizedomain(cc=cc) if localizeddomains: print(localizeddomains) publisher.warning( 'DomainC;{};{};{};Checked {} located in {};{}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) except IOError: print("CRC Checksum Failed on :", PST.p_path) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name))
def test_publisher(self): for i in range(0, 21): if i % 2 == 0: publisher.info('test' + str(i)) elif i % 3 == 0: publisher.warning('test' + str(i)) elif i % 5 == 0: publisher.error('test' + str(i)) elif i % 7 == 0: publisher.critical('test' + str(i)) else: publisher.debug('test' + str(i)) time.sleep(1)
def service_start_once(servicename = None, param = None, processname = None): """ Start a services and save his pids. Check if it is not already running """ config, pid_path = init_static() processname = os.path.basename(processname) pidpath = os.path.join(pid_path,processname+".pid") if not os.path.exists(pidpath): proc = service_start(servicename, param) writepid(processname, proc) else: print(processname + ' already running on pid ' + str(pidof(processname)[0])) publisher.error("%s already running with pid %s" % (param, pidof(processname)[0]))
def get_pgp_packet(message, save_path): save_path = '{}'.format(save_path) if len(save_path) > 131072: save_in_file(message, save_path) return '' else: process1 = subprocess.Popen([ 'echo', '-e', save_path], stdout=subprocess.PIPE) process2 = subprocess.Popen([ 'pgpdump'], stdin=process1.stdout, stdout=subprocess.PIPE) process1.stdout.close() output = process2.communicate()[0] try: output = output.decode() except UnicodeDecodeError: publisher.error('Error PgpDump UnicodeDecodeError: {}'.format(message)) output = '' return output
def service_start_once(servicename=None, param=None, processname=None): """ Start a services and save his pids. Check if it is not already running """ config, pid_path = init_static() processname = os.path.basename(processname) pidpath = os.path.join(pid_path, processname + ".pid") if not os.path.exists(pidpath): proc = service_start(servicename, param) writepid(processname, proc) else: print(processname + ' already running on pid ' + str(pidof(processname)[0])) publisher.error("%s already running with pid %s" % (param, pidof(processname)[0]))
def launch_parser(module): """ Launch a parser on a dataset for a module """ service_parser = os.path.join(services_dir, "parse_raw_files.py") if module is None: publisher.error('Unable to start parsing : module is None') return directory = config_db.get(module + "|" + "home_dir") if directory is not None: subprocess.Popen(["python", service_parser, '-n', module, '-d', directory]) config_db.set(module + "|" + "parsing", 1) publisher.info('Parsing of ' + module + 'started.') else: publisher.error('Unable to start parsing of ' + module + ': home_dir unknown.') config_db.set(module + "|" + "parsing", 0)
def main(): publisher.port = 6380 publisher.channel = "Script" config_section = 'DomClassifier' p = Process(config_section) addr_dns = p.config.get("DomClassifier", "dns") publisher.info("""ZMQ DomainClassifier is Running""") c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns]) cc = p.config.get("DomClassifier", "cc") cc_tld = p.config.get("DomClassifier", "cc_tld") while True: try: message = p.get_from_set() if message is not None: PST = Paste.Paste(message) else: publisher.debug("Script DomClassifier is idling 1s") time.sleep(1) continue paste = PST.get_p_content() mimetype = PST._get_p_encoding() if mimetype == "text/plain": c.text(rawtext=paste) c.potentialdomain() c.validdomain(rtype=['A'], extended=True) localizeddomains = c.include(expression=cc_tld) if localizeddomains: print(localizeddomains) publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path)) localizeddomains = c.localizedomain(cc=cc) if localizeddomains: print(localizeddomains) publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) except IOError: print("CRC Checksum Failed on :", PST.p_path) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name))
def launch_parser(module): """ Launch a parser on a dataset for a module """ service_parser = os.path.join(services_dir, "parse_raw_files.py") timer = '60' if module is None: publisher.error('Unable to start parsing : module is None') return directory = config_db.get(module + "|" + "home_dir") if directory is not None: subprocess.Popen(["python", service_parser, '-n', module, '-d', directory, '-t', timer]) config_db.set(module + "|" + "parsing", 1) publisher.info('Parsing of ' + module + 'started.') else: publisher.error('Unable to start parsing of ' + module + \ ': home_dir unknown.') config_db.set(module + "|" + "parsing", 0)
def prepare_keys_for_ranking(): # Add all announced subnets by ASN pipeline = history_db_static.pipeline() for asn in routing_db.smembers('asns'): blocks = routing_db.smembers(asn) pipeline.sadd('{asn}{sep}{date}{sep}clean_set'.format(sep = separator, asn = asn, date = date), *blocks) temp_db.sadd('full_asn_db', *[str(IPy.IP(b)[0]) for b in blocks]) temp_db.sadd('no_asn', 'full_asn_db') pipeline.execute() # Cleanup the old keys, setup the list of asns to rank sources = global_db.smembers('{date}{sep}{key}'.\ format(date = date, sep = separator, key = index_sources)) pipeline = history_db.pipeline() pipeline_static = history_db_static.pipeline() to_delete = [] for source in sources: asns = global_db.smembers('{date}{sep}{source}{sep}{key}'.\ format(date = date, sep = separator, source = source, key = index_asns_details)) for asn in asns: global_asn = asn.split(separator)[0] asn_key_v4 = '{asn}{sep}{date}{sep}{source}{sep}rankv4'.\ format(sep = separator, asn = global_asn, date = date, source = source) asn_key_v6 = '{asn}{sep}{date}{sep}{source}{sep}rankv6'.\ format(sep = separator, asn = global_asn, date = date, source = source) to_delete.append(asn_key_v4) to_delete.append(asn_key_v6) pipeline.sadd(key_to_rank, '{asn}{sep}{date}{sep}{source}'.format(sep = separator, asn = asn, date = date, source = source)) to_delete = set(to_delete) if len(to_delete) > 0: pipeline_static.delete(*to_delete) else: publisher.error('You *do not* have anything to rank!') pipeline.execute() pipeline_static.execute()
def prepare_keys_for_ranking(): # Add all announced subnets by ASN pipeline = history_db_static.pipeline() for asn in routing_db.smembers('asns'): blocks = routing_db.smembers(asn) pipeline.sadd('{asn}|{date}|clean_set'.format(asn=asn, date=date), *blocks) temp_db.sadd('full_asn_db', *[str(IPy.IP(b)[0]) for b in blocks]) temp_db.sadd('no_asn', 'full_asn_db') pipeline.execute() # Cleanup the old keys, setup the list of asns to rank sources = global_db.smembers('{date}|sources'.format(date=date)) pipeline = history_db.pipeline() pipeline_static = history_db_static.pipeline() to_delete = [] for source in sources: asns = global_db.smembers('{date}|{source}|asns_details'.format( date=date, source=source)) for asn in asns: global_asn = asn.split('|')[0] asn_key_v4 = '{asn}|{date}|{source}|rankv4'.format(asn=global_asn, date=date, source=source) asn_key_v6 = '{asn}|{date}|{source}|rankv6'.format(asn=global_asn, date=date, source=source) to_delete.append(asn_key_v4) to_delete.append(asn_key_v6) pipeline.sadd( key_to_rank, '{asn}|{date}|{source}'.format(asn=asn, date=date, source=source)) to_delete = set(to_delete) if len(to_delete) > 0: pipeline_static.delete(*to_delete) else: publisher.error('You *do not* have anything to rank!') pipeline.execute() pipeline_static.execute()
def launch(): """ Fetch all the whois entry assigned to the server of this :class:`Connector` """ i = 0 while True: try: entry = temp_db.spop(key_ris) if not entry: __disconnect() i = 0 publisher.debug("Disconnected of " + server) time.sleep(sleep_timer) continue if cache_db.get(entry) is None: if not connected: __connect() publisher.debug(server + ", query : " + str(entry)) whois = fetch_whois(entry) if whois != '': cache_db.setex( entry, server + '\n' + unicode(whois, errors="replace"), cache_ttl) if not keepalive: __disconnect() i += 1 if i % 10000 == 0: publisher.info( str(temp_db.scard(key_ris)) + ' to process on ' + server) except IOError as text: publisher.error("IOError on " + server + ': ' + str(text)) publisher.info( str(temp_db.scard(key_ris)) + ' to process on ' + server) time.sleep(sleep_timer) __disconnect() except Exception as e: publisher.error("Error on " + server + ': ' + str(e)) publisher.info( str(temp_db.scard(key_ris)) + ' to process on ' + server) time.sleep(sleep_timer) __disconnect()
def fetch_whois(query): """ Fetch the RIS RIPE informations. Keep the connection is possible. """ server_socket.send('-k -M ' + query + '\n') text = '' fs = server_socket.makefile() prec = '' while 1: temp = fs.readline() if not temp or len(temp) == 0 or prec == temp == '\n': break text += temp prec = temp if len(text) == 0: publisher.error("error (no response) with query: " + query + " on server " + server) time.sleep(sleep_timer) if not keepalive: __disconnect() return text
def launch_fetcher(module): """ Launch a process which fetch a dataset in a directory """ service_fetcher = os.path.join(services_dir, "fetch_raw_files.py") if module is None: publisher.error('Unable to start fetching : module is None') return url = config_db.get(module + "|" + "url") if url is None: publisher.info(module + ' does not have an URL, no fetcher.') config_db.set(module + "|" + "fetching", 0) return directory = config_db.get(module + "|" + "home_dir") if directory is not None: subprocess.Popen(["python", service_fetcher, '-n', module, '-d', directory, '-u', url]) config_db.set(module + "|" + "fetching", 1) publisher.info('Fetching of ' + module + 'started.') else: publisher.error('Unable to start fetching of ' + module + ': home_dir unknown.') config_db.set(module + "|" + "fetching", 0)
def importer(raw_dir, listname): publisher.channel = 'ParseRawFiles' has_files = False if temp_db is None: __prepare() try: parser = importlib.import_module(listname).parser except: parser = __default_parser date = datetime.date.today() for filename in __get_files(raw_dir): try: date_from_module = parser(filename, listname, date) has_files = True if date_from_module is not None: date = date_from_module os.rename(filename, os.path.join(raw_dir, old_dir, date.isoformat())) except: new_file = os.path.join(raw_dir, old_dir, 'INVALID_' + str(date).replace(' ', '-')) os.rename(filename, new_file) publisher.error('Invalid file: ' + new_file) return has_files
def get_pgp_packet(message, save_path): save_path = '{}'.format(save_path) # remove Version all_version = re.findall(regex_tool_version, save_path) for version in all_version: save_path = save_path.replace(version, '') # remove comment all_comment = re.findall(regex_block_comment, save_path) for comment in all_comment: save_path = save_path.replace(comment, '') # remove empty line save_path = [s for s in save_path.splitlines() if s] save_path[0] = save_path[0] + '\n' save_path[-1] = '\n' + save_path[-1] save_path = '\n'.join(save_path) #print(save_path) if len(save_path) > 131072: save_in_file(message, save_path) return '' else: process1 = subprocess.Popen(['echo', '-e', save_path], stdout=subprocess.PIPE) process2 = subprocess.Popen(['pgpdump'], stdin=process1.stdout, stdout=subprocess.PIPE) process1.stdout.close() output = process2.communicate()[0] try: output = output.decode() except UnicodeDecodeError: publisher.error( 'Error PgpDump UnicodeDecodeError: {}'.format(message)) output = '' return output
def main(): """Main Function""" # CONFIG # cfg = ConfigParser.ConfigParser() cfg.read(configfile) # Redis r_serv1 = redis.StrictRedis( host = cfg.get("Redis_Queues", "host"), port = cfg.getint("Redis_Queues", "port"), db = cfg.getint("Redis_Queues", "db")) # Indexer configuration - index dir and schema setup indexpath = cfg.get("Indexer", "path") indexertype = cfg.get("Indexer", "type") if indexertype == "whoosh": schema = Schema(title=TEXT(stored=True), path=ID(stored=True,unique=True), content=TEXT) if not os.path.exists(indexpath): os.mkdir(indexpath) if not exists_in(indexpath): ix = create_in(indexpath, schema) else: ix = open_dir(indexpath) # LOGGING # publisher.channel = "Script" # ZMQ # #Subscriber channel = cfg.get("PubSub_Global", "channel") subscriber_name = "indexer" subscriber_config_section = "PubSub_Global" Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) # FUNCTIONS # publisher.info("""ZMQ Indexer is Running""") while True: try: message = Sub.get_msg_from_queue(r_serv1) if message != None: PST = P.Paste(message.split(" ",-1)[-1]) else: if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"): r_serv1.srem("SHUTDOWN_FLAGS", "Indexer") publisher.warning("Shutdown Flag Up: Terminating.") break publisher.debug("Script Indexer is idling 10s") time.sleep(1) continue docpath = message.split(" ",-1)[-1] paste = PST.get_p_content() print "Indexing :", docpath if indexertype == "whoosh": indexwriter = ix.writer() indexwriter.update_document(title=unicode(docpath, errors='ignore'),path=unicode(docpath, errors='ignore'),content=unicode(paste, errors='ignore')) indexwriter.commit() except IOError: print "CRC Checksum Failed on :", PST.p_path publisher.error('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "CRC Checksum Failed" )) pass
def insert(): """ Re-insert in the database the data provided by the module and extracted by :meth:`get_all_information` in a sorted form. """ while True: i = 0 try: while temp_db.scard(uid_list) > 0: infos = get_all_information() if infos is None: continue uid, ip, src, timestamp = infos if ip is None: publisher.error('Entry without IP, invalid') continue if src is None: publisher.error(ip + ' without source, invalid') continue if timestamp.date() < datetime.date.today() - \ datetime.timedelta(1) and not accept_old_entries: publisher.warning('The timestamp ({ts}) of {ip} from {source} is too old.'.\ format(ts = timestamp.isoformat(), ip = ip, source = src)) continue try: # Check and normalize the IP ip_bin = IPy.IP(ip) if ip_bin.iptype() != 'PUBLIC': publisher.warning(str(ip_bin) + ' is not a PUBLIC IP Address') continue ip = ip_bin.strCompressed() except: publisher.error('This IP: ' + ip + ' in invalid.') continue iso_timestamp = timestamp.isoformat() date = timestamp.date().isoformat() index_day_src = '{date}{sep}{key}'.format(sep = separator, date=date, key=list_sources) index_day_ips = 'temp{sep}{date}{sep}{source}{sep}{key}'.format( sep = separator, date=date, source=src, key=list_ips) ip_details = '{ip}{sep}{timestamp}'.format(sep = separator, ip = ip, timestamp = iso_timestamp) global_db.sadd(index_day_src, src) pipeline_temp_db = temp_db.pipeline() pipeline_temp_db.sadd(index_day_ips, ip_details) pipeline_temp_db.sadd(temp_ris, ip) pipeline_temp_db.sadd(temp_no_asn, index_day_ips) pipeline_temp_db.delete(uid) pipeline_temp_db.execute() i += 1 if i%100 == 0 and config_db.exists(stop_db_input): break if i%10000 == 0: publisher.info('{nb} new entries to insert'\ .format(nb = temp_db.scard(uid_list))) except: publisher.critical('Unable to insert, redis does not respond') break time.sleep(sleep_timer) if config_db.exists(stop_db_input): publisher.info('DatabaseInput stopped.') break
def main(): """Main Function""" # CONFIG # cfg = ConfigParser.ConfigParser() cfg.read(configfile) # Redis r_serv1 = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Script" # ZMQ # # Subscriber channel = cfg.get("PubSub_Global", "channel") subscriber_name = "DomainClassifier" subscriber_config_section = "PubSub_Global" cc = cfg.get("PubSub_DomainClassifier", "cc") cc_tld = cfg.get("PubSub_DomainClassifier", "cc_tld") sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) # FUNCTIONS # publisher.info("""ZMQ DomainClassifier is Running""") c = DomainClassifier.domainclassifier.Extract(rawtext="") while True: try: message = sub.get_msg_from_queue(r_serv1) if message is not None: PST = Paste.Paste(message.split(" ", -1)[-1]) else: if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"): r_serv1.srem("SHUTDOWN_FLAGS", "Indexer") publisher.warning("Shutdown Flag Up: Terminating.") break publisher.debug("Script DomainClassifier is idling 10s") time.sleep(1) continue docpath = message.split(" ", -1)[-1] paste = PST.get_p_content() mimetype = PST._get_p_encoding() if mimetype == "text/plain": c.text(rawtext=paste) c.potentialdomain() c.validdomain(rtype=['A'], extended=True) localizeddomains = c.include(expression=cc_tld) if localizeddomains: print(localizeddomains) publisher.warning( 'DomainC;{};{};{};Checked {} located in {}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld)) localizeddomains = c.localizedomain(cc=cc) if localizeddomains: print(localizeddomains) publisher.warning( 'DomainC;{};{};{};Checked {} located in {}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc)) except IOError: print "CRC Checksum Failed on :", PST.p_path publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name)) pass
def crawl_onion(url, domain, port, type_service, message, crawler_config): crawler_config['url'] = url crawler_config['port'] = port print('Launching Crawler: {}'.format(url)) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'crawling_domain', domain) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S")) retry = True nb_retry = 0 while retry: try: r = requests.get(splash_url, timeout=30.0) retry = False except Exception: # TODO: relaunch docker or send error message nb_retry += 1 if nb_retry == 6: on_error_send_message_back_in_queue(type_service, domain, message) publisher.error('{} SPASH DOWN'.format(splash_url)) print('--------------------------------------') print(' \033[91m DOCKER SPLASH DOWN\033[0m') print(' {} DOWN'.format(splash_url)) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'SPLASH DOWN') nb_retry == 0 print(' \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m') print(' Retry({}) in 10 seconds'.format(nb_retry)) time.sleep(10) if r.status_code == 200: r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling') # save config in cash UUID = str(uuid.uuid4()) r_cache.set('crawler_request:{}'.format(UUID), json.dumps(crawler_config)) process = subprocess.Popen( ["python", './torcrawler/tor_crawler.py', UUID], stdout=subprocess.PIPE) while process.poll() is None: time.sleep(1) if process.returncode == 0: output = process.stdout.read().decode() print(output) # error: splash:Connection to proxy refused if 'Connection to proxy refused' in output: on_error_send_message_back_in_queue(type_service, domain, message) publisher.error( '{} SPASH, PROXY DOWN OR BAD CONFIGURATION'.format( splash_url)) print( '------------------------------------------------------------------------' ) print(' \033[91m SPLASH: Connection to proxy refused') print('') print(' PROXY DOWN OR BAD CONFIGURATION\033[0m'. format(splash_url)) print( '------------------------------------------------------------------------' ) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Error') exit(-2) else: print(process.stdout.read()) exit(-1) else: on_error_send_message_back_in_queue(type_service, domain, message) print('--------------------------------------') print(' \033[91m DOCKER SPLASH DOWN\033[0m') print(' {} DOWN'.format(splash_url)) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling') exit(1)
##################### Similarity found ####################### # if there is data in this dictionnary if len(hash_dico) != 0: # paste_tuple = (hash_type, date, paste_path, percent) for dico_hash, paste_tuple in hash_dico.items(): dupl.add(paste_tuple) # Creating the object attribute and save it. to_print = 'Duplicate;{};{};{};'.format( PST.p_source, PST.p_date, PST.p_name) if dupl != []: dupl = list(dupl) PST.__setattr__("p_duplicate", dupl) PST.save_attribute_duplicate(dupl) PST.save_others_pastes_attribute_duplicate(dupl) publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path)) print('{}Detected {}'.format(to_print, len(dupl))) print('') y = time.time() publisher.debug('{}Processed in {} sec'.format(to_print, y-x)) except IOError: to_print = 'Duplicate;{};{};{};'.format( PST.p_source, PST.p_date, PST.p_name) print("CRC Checksum Failed on :", PST.p_path) publisher.error('{}CRC Checksum Failed'.format(to_print))
def crawl_onion(url, domain, date, date_month, message): #if not r_onion.sismember('full_onion_up', domain) and not r_onion.sismember('onion_down:'+date , domain): super_father = r_serv_metadata.hget('paste_metadata:' + paste, 'super_father') if super_father is None: super_father = paste try: r = requests.get(splash_url, timeout=30.0) except Exception: # TODO: relaunch docker or send error message on_error_send_message_back_in_queue(type_hidden_service, domain, message) publisher.error('{} SPASH DOWN'.format(splash_url)) print('--------------------------------------') print(' \033[91m DOCKER SPLASH DOWN\033[0m') print(' {} DOWN'.format(splash_url)) exit(1) if r.status_code == 200: process = subprocess.Popen([ "python", './torcrawler/tor_crawler.py', splash_url, type_hidden_service, url, domain, paste, super_father ], stdout=subprocess.PIPE) while process.poll() is None: time.sleep(1) if process.returncode == 0: output = process.stdout.read().decode() print(output) # error: splash:Connection to proxy refused if 'Connection to proxy refused' in output: on_error_send_message_back_in_queue(type_hidden_service, domain, message) publisher.error( '{} SPASH, PROXY DOWN OR BAD CONFIGURATION'.format( splash_url)) print( '------------------------------------------------------------------------' ) print(' \033[91m SPLASH: Connection to proxy refused') print('') print(' PROXY DOWN OR BAD CONFIGURATION\033[0m'. format(splash_url)) print( '------------------------------------------------------------------------' ) exit(-2) else: print(process.stdout.read()) exit(-1) else: on_error_send_message_back_in_queue(type_hidden_service, domain, message) print('--------------------------------------') print(' \033[91m DOCKER SPLASH DOWN\033[0m') print(' {} DOWN'.format(splash_url)) exit(1)
# -*- coding: utf-8 -*- """ :file:`bin/services/microblog.py` - Microblogging client ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Start the microblogging client which posts on twitter and identica """ import time from pubsublogger import publisher import microblog dev_mode = True if __name__ == '__main__': sleep_timer = 3600 publisher.channel = 'API_Twitter' while 1: try: if microblog.post_new_top_ranking(): publisher.info('New Ranking posted on twitter and identica.') print 'New Ranking posted on twitter and identica.' except Exception as e: publisher.error("Something bad occurs: " + e) print "Something bad occurs: " + str(e) time.sleep(sleep_timer)
def get_ip_info(self, ip, days_limit=None): """ Return informations related to an IP address. :param ip: The IP address :param days_limit: The number of days we want to check in the past (default: around 2 years) :rtype: Dictionary .. note:: Format of the output: .. code-block:: python { 'ip': ip, 'days_limit' : days_limit, 'ptrrecord' : 'ptr.record.com', 'history': [ { 'asn': asn, 'interval': [first, last], 'block': block, 'timestamp': timestamp, 'descriptions': [ [date, descr], ... ] }, ... ] } """ if days_limit is None: days_limit = 750 to_return = {'ip': ip, 'days_limit': days_limit, 'history': []} if self.has_ptr: to_return['ptrrecord'] = self.get_ptr_record(ip) if not self.has_ipasn: publisher.debug('IPASN not enabled.') to_return['error'] = 'IPASN not enabled.' return to_return if not ip: to_return['error'] = 'No IP provided.' return to_return for first, last, asn, block in self.ipasn.aggregate_history(ip, days_limit): first_date = parser.parse(first).replace(tzinfo=tz.tzutc()).date() last_date = parser.parse(last).replace(tzinfo=tz.tzutc()).date() if self.has_asnhistory: desc_history = self.asnhistory.get_all_descriptions(asn) valid_descriptions = [] for date, descr in desc_history: date = date.astimezone(tz.tzutc()).date() test_date = date - datetime.timedelta(days=1) if last_date < test_date: # Too new continue elif last_date >= test_date and first_date <= test_date: # Changes within the interval valid_descriptions.append([date.isoformat(), descr]) elif first_date > test_date: # get the most recent change befrore the interval valid_descriptions.append([date.isoformat(), descr]) break else: publisher.debug('ASN History not enabled.') valid_descriptions = [datetime.date.today().isoformat(), 'ASN History not enabled.'] if len(valid_descriptions) == 0: if len(desc_history) != 0: # fallback, use the oldest description. date = desc_history[-1][0].astimezone(tz.tzutc()).date() descr = desc_history[-1][1] valid_descriptions.append([date.isoformat(), descr]) else: # No history found for this ASN if last_date > datetime.date(2013, 1, 1): # ASN has been seen recently, should not happen # as the asn history module is running since early 2013 publisher.error('Unable to find the ASN description of {}. IP address: {}. ASN History might be down.'.format(asn, ip)) valid_descriptions.append(['0000-00-00', 'No ASN description has been found.']) entry = {} entry['asn'] = asn entry['interval'] = [first_date.isoformat(), last_date.isoformat()] entry['block'] = block entry['timestamp'] = self.get_first_seen(asn, block) entry['descriptions'] = valid_descriptions to_return['history'].append(entry) return to_return
def main(): """Main Function""" # CONFIG # cfg = ConfigParser.ConfigParser() cfg.read(configfile) # REDIS # r_serv = redis.StrictRedis( host = cfg.get("Redis_Data_Merging", "host"), port = cfg.getint("Redis_Data_Merging", "port"), db = cfg.getint("Redis_Data_Merging", "db")) r_serv1 = redis.StrictRedis( host = cfg.get("Redis_Queues", "host"), port = cfg.getint("Redis_Queues", "port"), db = cfg.getint("Redis_Queues", "db")) p_serv = r_serv.pipeline(False) # LOGGING # publisher.channel = "Script" # ZMQ # #Subscriber channel = cfg.get("PubSub_Global", "channel") subscriber_name = "attributes" subscriber_config_section = "PubSub_Global" Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) # FUNCTIONS # publisher.info("""ZMQ Attribute is Running""") while True: try: message = Sub.get_msg_from_queue(r_serv1) if message != None: PST = P.Paste(message.split(" ",-1)[-1]) else: if r_serv1.sismember("SHUTDOWN_FLAGS", "Attributes"): r_serv1.srem("SHUTDOWN_FLAGS", "Attributes") print "Shutdown Flag Up: Terminating" publisher.warning("Shutdown Flag Up: Terminating.") break publisher.debug("Script Attribute is idling 10s") time.sleep(10) continue encoding = PST._get_p_encoding() language = PST._get_p_language() PST.save_attribute_redis(r_serv, "p_encoding", encoding) PST.save_attribute_redis(r_serv, "p_language", language) r_serv.sadd("Pastes_Objects",PST.p_path) PST.save_all_attributes_redis(r_serv) except IOError: print "CRC Checksum Failed on :", PST.p_path publisher.error('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "CRC Checksum Failed" )) pass
def main(): publisher.port = 6380 publisher.channel = "Script" config_section = 'DomClassifier' p = Process(config_section) addr_dns = p.config.get("DomClassifier", "dns") publisher.info("""ZMQ DomainClassifier is Running""") c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns]) cc = p.config.get("DomClassifier", "cc") cc_tld = p.config.get("DomClassifier", "cc_tld") while True: try: item_id = p.get_from_set() if item_id is None: publisher.debug("Script DomClassifier is idling 1s") time.sleep(1) continue item_content = item_basic.get_item_content(item_id) mimetype = item_basic.get_item_mimetype(item_id) item_basename = item_basic.get_basename(item_id) item_source = item_basic.get_source(item_id) item_date = item_basic.get_item_date(item_id) if mimetype.split('/')[0] == "text": c.text(rawtext=item_content) c.potentialdomain() c.validdomain(passive_dns=True, extended=False) print(c.vdomain) if c.vdomain and d4.is_passive_dns_enabled(): for dns_record in c.vdomain: p.populate_set_out(dns_record) localizeddomains = c.include(expression=cc_tld) if localizeddomains: print(localizeddomains) publisher.warning( f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {cc_tld};{item_id}" ) localizeddomains = c.localizedomain(cc=cc) if localizeddomains: print(localizeddomains) publisher.warning( f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {cc};{item_id}" ) except IOError: print("CRC Checksum Failed on :", item_id) publisher.error( f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed" )
from helpers.initscript import * services_dir = os.path.join(root_dir,config.get('directories','services')) service = os.path.join(services_dir, "fetch_ris_entries") if args.action == "start": publisher.info( "Starting fetching...") for option in servers_available: print(option + " to start...") publisher.info( option + " to start...") service_start_multiple(servicename = service, param = ['-s', option], number = int(config.get('processes','whois_fetch'))) elif args.action == "stop": print("Stopping fetching...") publisher.info("Stopping fetching...") pids = pidof(processname=service) if pids: print(service + " to be stopped...") publisher.info(service + " to be stopped...") for pid in pids: try: os.kill(int(pid), signal.SIGKILL) except OSError, e: print(service + " unsuccessfully stopped") publisher.error(service + " unsuccessfully stopped") rmpid(processname=service) else: usage()
else: ix = open_dir(indexpath) # LOGGING # publisher.info("ZMQ Indexer is Running") while True: try: message = p.get_from_set() if message is not None: PST = Paste.Paste(message) else: publisher.debug("Script Indexer is idling 1s") time.sleep(1) continue docpath = message.split(" ", -1)[-1] paste = PST.get_p_content() print "Indexing :", docpath if indexertype == "whoosh": indexwriter = ix.writer() indexwriter.update_document( title=unicode(docpath, errors='ignore'), path=unicode(docpath, errors='ignore'), content=unicode(paste, errors='ignore')) indexwriter.commit() except IOError: print "CRC Checksum Failed on :", PST.p_path publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name))
# Creating the object attribute and save it. to_print = 'Duplicate;{};{};{};'.format( PST.p_source, PST.p_date, PST.p_name) if dupl != []: PST.__setattr__("p_duplicate", dupl) PST.save_attribute_redis("p_duplicate", dupl) publisher.info('{}Detected {}'.format(to_print, len(dupl))) print '{}Detected {}'.format(to_print, len(dupl)) y = time.time() publisher.debug('{}Processed in {} sec'.format( to_print, y - x)) # Adding the hash in the dico of the month today_dico[index] = paste_hash if flag_write_to_disk: time_1 = time.time() flag_write_to_disk = False flag_reload_from_disk = True print 'writing' with open(filedicopath, 'w') as fp: json.dump(today_dico, fp) except IOError: to_print = 'Duplicate;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name) print "CRC Checksum Failed on :", PST.p_path publisher.error('{}CRC Checksum Failed'.format(to_print))
def crawl_onion(url, domain, date, date_month, message): r_cache.hset('metadata_crawler:{}'.format(splash_port), 'crawling_domain', domain) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S")) #if not r_onion.sismember('full_onion_up', domain) and not r_onion.sismember('onion_down:'+date , domain): super_father = r_serv_metadata.hget('paste_metadata:' + paste, 'super_father') if super_father is None: super_father = paste retry = True nb_retry = 0 while retry: try: r = requests.get(splash_url, timeout=30.0) retry = False except Exception: # TODO: relaunch docker or send error message nb_retry += 1 if nb_retry == 6: on_error_send_message_back_in_queue(type_hidden_service, domain, message) publisher.error('{} SPASH DOWN'.format(splash_url)) print('--------------------------------------') print(' \033[91m DOCKER SPLASH DOWN\033[0m') print(' {} DOWN'.format(splash_url)) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'SPLASH DOWN') nb_retry == 0 print(' \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m') print(' Retry({}) in 10 seconds'.format(nb_retry)) time.sleep(10) if r.status_code == 200: r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling') process = subprocess.Popen([ "python", './torcrawler/tor_crawler.py', splash_url, type_hidden_service, url, domain, paste, super_father ], stdout=subprocess.PIPE) while process.poll() is None: time.sleep(1) if process.returncode == 0: output = process.stdout.read().decode() print(output) # error: splash:Connection to proxy refused if 'Connection to proxy refused' in output: on_error_send_message_back_in_queue(type_hidden_service, domain, message) publisher.error( '{} SPASH, PROXY DOWN OR BAD CONFIGURATION'.format( splash_url)) print( '------------------------------------------------------------------------' ) print(' \033[91m SPLASH: Connection to proxy refused') print('') print(' PROXY DOWN OR BAD CONFIGURATION\033[0m'. format(splash_url)) print( '------------------------------------------------------------------------' ) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Error') exit(-2) else: print(process.stdout.read()) exit(-1) else: on_error_send_message_back_in_queue(type_hidden_service, domain, message) print('--------------------------------------') print(' \033[91m DOCKER SPLASH DOWN\033[0m') print(' {} DOWN'.format(splash_url)) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling') exit(1)
args = parser.parse_args() __prepare(args.directory) publisher.port = redis_port publisher.channel = 'ASN_History' time.sleep(5) publisher.info('Importer started.') while True: for timestamp, data in parse(args.directory): r = redis.Redis(host=redis_host, port=redis_port, db=redis_db) last_update = r.get('last_update') if last_update > timestamp: msg = 'Trying to import an old file (old). Latest: {new}'.\ format(old=timestamp, new=last_update) publisher.error(msg) continue else: msg = '===== Importing new file: {new} ====='.format( new=timestamp) publisher.info(msg) p = r.pipeline(transaction=False) p.set('last_update', timestamp) p.sadd('all_timestamps', timestamp) new_asns = 0 updated_descrs = 0 for asn, descr in data: all_descrs = r.hgetall(asn) if len(all_descrs) == 0: p.hset(asn, timestamp, descr) publisher.debug('New asn: {asn}'.format(asn=asn))
def handle_error(): cherrypy.response.status = 500 cherrypy.response.body = ["<html><body>Sorry, an error occured</body></html>"] publisher.error('Request: '+ str(cherrypy.request.params) + '\n' +_cperror.format_exc())
parser.add_argument("-H", "--hostname", default='localhost', type=str, help='Set the hostname of the server.') parser.add_argument("-p", "--port", default=6379, type=int, help='Set the server port.') parser.add_argument("-c", "--channel", type=str, required=True, help='Channel to publish into.') args = parser.parse_args() if args.use_unix_socket: publisher.use_tcp_socket = False publisher.unix_socket = args.unix_socket_path else: publisher.hostname = args.hostname publisher.port = args.port publisher.channel = args.channel for i in range(0, 21): if i % 2 == 0: publisher.info('test' + str(i)) elif i % 3 == 0: publisher.warning('test' + str(i)) elif i % 5 == 0: publisher.error('test' + str(i)) elif i % 7 == 0: publisher.critical('test' + str(i)) else: publisher.debug('test' + str(i)) time.sleep(1)
help='Set the server port.') parser.add_argument("-c", "--channel", type=str, required=True, help='Channel to publish into.') args = parser.parse_args() if args.use_unix_socket: publisher.use_tcp_socket = False publisher.unix_socket = args.unix_socket_path else: publisher.hostname = args.hostname publisher.port = args.port publisher.channel = args.channel for i in range(0, 21): if i % 2 == 0: publisher.info('test' + str(i)) elif i % 3 == 0: publisher.warning('test' + str(i)) elif i % 5 == 0: publisher.error('test' + str(i)) elif i % 7 == 0: publisher.critical('test' + str(i)) else: publisher.debug('test' + str(i)) time.sleep(1)
def crawl_onion(url, domain, port, type_service, message, crawler_config): crawler_config['url'] = url crawler_config['port'] = port print('Launching Crawler: {}'.format(url)) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'crawling_domain', domain) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S")) retry = True nb_retry = 0 while retry: try: r = requests.get(splash_url , timeout=30.0) retry = False except Exception: # TODO: relaunch docker or send error message nb_retry += 1 if nb_retry == 6: on_error_send_message_back_in_queue(type_service, domain, message) publisher.error('{} SPASH DOWN'.format(splash_url)) print('--------------------------------------') print(' \033[91m DOCKER SPLASH DOWN\033[0m') print(' {} DOWN'.format(splash_url)) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'SPLASH DOWN') nb_retry == 0 print(' \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m') print(' Retry({}) in 10 seconds'.format(nb_retry)) time.sleep(10) if r.status_code == 200: r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling') # save config in cash UUID = str(uuid.uuid4()) r_cache.set('crawler_request:{}'.format(UUID), json.dumps(crawler_config)) process = subprocess.Popen(["python", './torcrawler/tor_crawler.py', UUID], stdout=subprocess.PIPE) while process.poll() is None: time.sleep(1) if process.returncode == 0: output = process.stdout.read().decode() print(output) # error: splash:Connection to proxy refused if 'Connection to proxy refused' in output: on_error_send_message_back_in_queue(type_service, domain, message) publisher.error('{} SPASH, PROXY DOWN OR BAD CONFIGURATION'.format(splash_url)) print('------------------------------------------------------------------------') print(' \033[91m SPLASH: Connection to proxy refused') print('') print(' PROXY DOWN OR BAD CONFIGURATION\033[0m'.format(splash_url)) print('------------------------------------------------------------------------') r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Error') exit(-2) else: print(process.stdout.read()) exit(-1) else: on_error_send_message_back_in_queue(type_service, domain, message) print('--------------------------------------') print(' \033[91m DOCKER SPLASH DOWN\033[0m') print(' {} DOWN'.format(splash_url)) r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling') exit(1)
from helpers.initscript import * from helpers.files_splitter import * services_dir = os.path.join(root_dir,config.get('directories','services')) raw_data = os.path.join(root_dir,config.get('directories','raw_data')) service = os.path.join(services_dir, "push_update_routing") if args.action == "start": print("Start pushing routes...") publisher.info( "Start pushing routes...") print(service + " to start...") publisher.info(service + " to start...") proc = service_start_once(servicename = service, processname = service) elif args.action == "stop": print("Stop pushing routes...") publisher.info("Stop pushing routes...") pids = pidof(processname=service) if pids: print(service + " to be stopped...") publisher.info(service + " to be stopped...") for pid in pids: try: os.kill(int(pid), signal.SIGKILL) except OSError, e: print(service + " unsuccessfully stopped") publisher.error(service + " unsuccessfully stopped") rmpid(processname=service)
def insert(): """ Re-insert in the database the data provided by the module and extracted by :meth:`get_all_information` in a sorted form. """ while True: i = 0 try: while temp_db.scard(uid_list) > 0: infos = get_all_information() if infos is None: continue uid, ip, src, timestamp = infos if ip is None: publisher.error('Entry without IP, invalid') continue if src is None: publisher.error(ip + ' without source, invalid') continue if timestamp.date() < datetime.date.today() - \ datetime.timedelta(1) and not accept_old_entries: publisher.warning('The timestamp ({ts}) of {ip} from {source} is too old.'.\ format(ts = timestamp.isoformat(), ip = ip, source = src)) continue try: # Check and normalize the IP ip_bin = IPy.IP(ip) if ip_bin.iptype() != 'PUBLIC': publisher.warning( str(ip_bin) + ' is not a PUBLIC IP Address') continue ip = ip_bin.strCompressed() except: publisher.error('This IP: ' + ip + ' in invalid.') continue iso_timestamp = timestamp.isoformat() date = timestamp.date().isoformat() index_day_src = '{date}{sep}{key}'.format(sep=separator, date=date, key=list_sources) index_day_ips = 'temp{sep}{date}{sep}{source}{sep}{key}'.format( sep=separator, date=date, source=src, key=list_ips) ip_details = '{ip}{sep}{timestamp}'.format( sep=separator, ip=ip, timestamp=iso_timestamp) global_db.sadd(index_day_src, src) pipeline_temp_db = temp_db.pipeline() pipeline_temp_db.sadd(index_day_ips, ip_details) pipeline_temp_db.sadd(temp_ris, ip) pipeline_temp_db.sadd(temp_no_asn, index_day_ips) pipeline_temp_db.delete(uid) pipeline_temp_db.execute() i += 1 if i % 100 == 0 and config_db.exists(stop_db_input): break if i % 10000 == 0: publisher.info('{nb} new entries to insert'\ .format(nb = temp_db.scard(uid_list))) except: publisher.critical('Unable to insert, redis does not respond') break time.sleep(sleep_timer) if config_db.exists(stop_db_input): publisher.info('DatabaseInput stopped.') break
def main(): """Main Function""" # CONFIG # cfg = ConfigParser.ConfigParser() cfg.read(configfile) # Redis r_serv1 = redis.StrictRedis( host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Script" # ZMQ # # Subscriber channel = cfg.get("PubSub_Global", "channel") subscriber_name = "DomainClassifier" subscriber_config_section = "PubSub_Global" cc = cfg.get("PubSub_DomainClassifier", "cc") cc_tld = cfg.get("PubSub_DomainClassifier", "cc_tld") sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) # FUNCTIONS # publisher.info("""ZMQ DomainClassifier is Running""") c = DomainClassifier.domainclassifier.Extract(rawtext="") while True: try: message = sub.get_msg_from_queue(r_serv1) if message is not None: PST = Paste.Paste(message.split(" ", -1)[-1]) else: if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"): r_serv1.srem("SHUTDOWN_FLAGS", "Indexer") publisher.warning("Shutdown Flag Up: Terminating.") break publisher.debug("Script DomainClassifier is idling 10s") time.sleep(1) continue docpath = message.split(" ", -1)[-1] paste = PST.get_p_content() mimetype = PST._get_p_encoding() if mimetype == "text/plain": c.text(rawtext=paste) c.potentialdomain() c.validdomain(rtype=['A'],extended=True) localizeddomains = c.include(expression=cc_tld) if localizeddomains: print(localizeddomains) publisher.warning('DomainC;{};{};{};Checked {} located in {}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld)) localizeddomains = c.localizedomain(cc=cc) if localizeddomains: print(localizeddomains) publisher.warning('DomainC;{};{};{};Checked {} located in {}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc)) except IOError: print "CRC Checksum Failed on :", PST.p_path publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(PST.p_source, PST.p_date, PST.p_name)) pass
# LOGGING # publisher.info("ZMQ Indexer is Running") while True: try: message = p.get_from_set() if message is not None: PST = Paste.Paste(message) else: publisher.debug("Script Indexer is idling 1s") time.sleep(1) continue docpath = message.split(" ", -1)[-1] paste = PST.get_p_content() print "Indexing :", docpath if indexertype == "whoosh": indexwriter = ix.writer() indexwriter.update_document(title=unicode(docpath, errors='ignore'), path=unicode(docpath, errors='ignore'), content=unicode(paste, errors='ignore')) indexwriter.commit() except IOError: print "CRC Checksum Failed on :", PST.p_path publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name))
config.read(config_file) root_dir = config.get('directories','root') sys.path.append(os.path.join(root_dir,config.get('directories','libraries'))) from helpers.initscript import * services_dir = os.path.join(root_dir,config.get('directories','services')) service = os.path.join(services_dir, "fetch_bview") if args.action == "start": print('Start fetching of bview') publisher.info('Start fetching of bview') print(service + " to start...") publisher.info(service + " to start...") proc = service_start_once(servicename = service, processname = service) elif args.action == "stop": print('Stop fetching of bview') publisher.info('Stop fetching of bview') pid = pidof(processname=service) if pid: pid = pid[0] print(service + " to be stopped...") publisher.info(service + " to be stopped...") try: os.kill(int(pid), signal.SIGKILL) except OSError, e: print(service + 'unsuccessfully stopped') publisher.error(service + 'unsuccessfully stopped') rmpid(processname=service)
def get_ip_info(self, ip, days_limit=None): """ Return informations related to an IP address. :param ip: The IP address :param days_limit: The number of days we want to check in the past (default: around 2 years) :rtype: Dictionary .. note:: Format of the output: .. code-block:: python { 'ip': ip, 'days_limit' : days_limit, 'ptrrecord' : 'ptr.record.com', 'history': [ { 'asn': asn, 'interval': [first, last], 'block': block, 'timestamp': timestamp, 'descriptions': [ [date, descr], ... ] }, ... ] } """ if days_limit is None: days_limit = 750 to_return = {'ip': ip, 'days_limit': days_limit, 'history': []} if self.has_ptr: to_return['ptrrecord'] = self.get_ptr_record(ip) if not self.has_ipasn: publisher.debug('IPASN not enabled.') to_return['error'] = 'IPASN not enabled.' return to_return if not ip: to_return['error'] = 'No IP provided.' return to_return for first, last, asn, block in self.ipasn.aggregate_history( ip, days_limit): first_date = parser.parse(first).replace(tzinfo=tz.tzutc()).date() last_date = parser.parse(last).replace(tzinfo=tz.tzutc()).date() if self.has_asnhistory: desc_history = self.asnhistory.get_all_descriptions(asn) valid_descriptions = [] for date, descr in desc_history: date = date.astimezone(tz.tzutc()).date() test_date = date - datetime.timedelta(days=1) if last_date < test_date: # Too new continue elif last_date >= test_date and first_date <= test_date: # Changes within the interval valid_descriptions.append([date.isoformat(), descr]) elif first_date > test_date: # get the most recent change befrore the interval valid_descriptions.append([date.isoformat(), descr]) break else: publisher.debug('ASN History not enabled.') valid_descriptions = [ datetime.date.today().isoformat(), 'ASN History not enabled.' ] if len(valid_descriptions) == 0: if len(desc_history) != 0: # fallback, use the oldest description. date = desc_history[-1][0].astimezone(tz.tzutc()).date() descr = desc_history[-1][1] valid_descriptions.append([date.isoformat(), descr]) else: # No history found for this ASN if last_date > datetime.date(2013, 1, 1): # ASN has been seen recently, should not happen # as the asn history module is running since early 2013 publisher.error( 'Unable to find the ASN description of {}. IP address: {}. ASN History might be down.' .format(asn, ip)) valid_descriptions.append( ['0000-00-00', 'No ASN description has been found.']) entry = {} entry['asn'] = asn entry['interval'] = [first_date.isoformat(), last_date.isoformat()] entry['block'] = block entry['timestamp'] = self.get_first_seen(asn, block) entry['descriptions'] = valid_descriptions to_return['history'].append(entry) return to_return
config.read(config_file) root_dir = config.get('directories', 'root') sys.path.append(os.path.join(root_dir, config.get('directories', 'libraries'))) from helpers.initscript import * services_dir = os.path.join(root_dir, config.get('directories', 'services')) service = os.path.join(services_dir, "ip_zmq") if args.action == "start": print("Starting ZeroMQ IP publisher...") publisher.info('Starting ZeroMQ IP publisher...') print(service+" to start...") publisher.info(service + 'to start...') proc = service_start_once(servicename=service, processname=service) elif args.action == "stop": print("Stopping ZeroMQ IP publisher...") publisher.info('Stopping ZeroMQ IP publisher...') pid = pidof(processname=service) if pid: pid = pid[0] try: os.kill(int(pid), signal.SIGHUP) except OSError, e: print(service + " unsuccessfully stopped") publisher.error(service + 'unsuccessfully stopped') rmpid(processname=service) else: print('No running ZeroMQ IP publisher process') publisher.info('No running ZeroMQ IP publisher process')
args = parser.parse_args() __prepare(args.directory) publisher.port = redis_port publisher.channel = 'ASN_History' time.sleep(5) publisher.info('Importer started.') while True: for timestamp, data in parse(args.directory): r = redis.Redis(host=redis_host, port=redis_port, db=redis_db) last_update = r.get('last_update') if last_update > timestamp: msg = 'Trying to import an old file (old). Latest: {new}'.format( old=timestamp, new=last_update) publisher.error(msg) continue else: msg = '===== Importing new file: {new} ====='.format(new=timestamp) publisher.info(msg) p = r.pipeline(transaction=False) p.set('last_update', timestamp) p.sadd('all_timestamps', timestamp) new_asns = 0 updated_descrs = 0 for asn, descr in data: all_descrs = r.hgetall(asn) if len(all_descrs) == 0: p.hset(asn, timestamp, descr) publisher.debug('New asn: {asn}'.format(asn=asn)) new_asns += 1
#!/usr/bin/python # -*- coding: utf-8 -*- """ :file:`bin/services/microblog.py` - Microblogging client ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Start the microblogging client which posts on twitter and identica """ import time from pubsublogger import publisher import microblog dev_mode = True if __name__ == '__main__': sleep_timer = 3600 publisher.channel = 'API_Twitter' while 1: try: if microblog.post_new_top_ranking(): publisher.info('New Ranking posted on twitter and identica.') print 'New Ranking posted on twitter and identica.' except Exception as e: publisher.error("Something bad occurs: " + e) print "Something bad occurs: " + str(e) time.sleep(sleep_timer)