Пример #1
0
def fetcher():
    """
        Main function which fetch the datasets
    """
    while config_db.sismember('modules', module):
        try:
            urllib.urlretrieve(url, temp_filename)
        except:
            publisher.error('Unable to fetch ' + url)
            __check_exit()
            continue
        drop_file = False
        """
            Check is the file already exists, if the same file is found,
            the downloaded file is dropped. Else, it is moved in his
            final directory.
        """
        to_check = glob.glob( os.path.join(old_directory, '*') )
        to_check += glob.glob( os.path.join(directory, '*') )
        for file in to_check:
            if filecmp.cmp(temp_filename, file):
                drop_file = True
                break
        if drop_file:
            os.unlink(temp_filename)
            publisher.debug('No new file on ' + url)
        else:
            os.rename(temp_filename, filename)
            publisher.info('New file on ' + url)
        __check_exit()
    config_db.delete(module + "|" + "fetching")
Пример #2
0
def launch():
    """
        Fetch all the whois entry assigned to the server of this :class:`Connector`
    """
    i = 0
    while True:
        try:
            entry = temp_db.spop(key_ris)
            if not entry:
                __disconnect()
                i = 0
                publisher.debug("Disconnected of " + server)
                time.sleep(sleep_timer)
                continue
            if cache_db.get(entry) is None:
                if not connected:
                    __connect()
                publisher.debug(server + ", query : " + str(entry))
                whois = fetch_whois(entry)
                if whois != '':
                    cache_db.setex(entry, server + '\n' + unicode(whois,  errors="replace"), cache_ttl)
                if not keepalive:
                    __disconnect()
            i += 1
            if i%10000 == 0:
                publisher.info(str(temp_db.scard(key_ris)) + ' to process on ' + server)
        except IOError as text:
            publisher.error("IOError on " + server + ': ' + str(text))
            time.sleep(sleep_timer)
            __disconnect()
Пример #3
0
 def get_asn_descriptions(self, asn):
     if not self.has_asnhistory:
         publisher.debug('ASN History not enabled.')
         return [datetime.date.today(), 'ASN History not enabled.']
     desc_history = self.asnhistory.get_all_descriptions(asn)
     return [(date.astimezone(tz.tzutc()).date(), descr)
             for date, descr in desc_history]
Пример #4
0
def display_listof_pid(r_serv, arg):
    """Display the pid list from redis

    This function display infos in the shell about lauched process

    """
    jobs = {}
    joblist = []
    try:
        for job in r_serv.smembers("pid"):
            jobs = r_serv.hgetall(job)

            if jobs != None:
                start = datetime.strptime(r_serv.hget(job, "startime"), "%Y-%m-%d_%H:%M:%S")

                end = datetime.strptime(time.strftime("%Y-%m-%d_%H:%M:%S"), "%Y-%m-%d_%H:%M:%S")
                jobs['uptime'] = str(abs(start - end))
                joblist.append(jobs)
            else:
                publisher.debug("display_list_of_pid Aborted due to lack of Information in Redis")

        joblist = sorted(joblist, key=lambda k: k['uptime'], reverse=True)

        for job in joblist:
            print format_display_listof_pid(job, arg)

        if arg == "remain":
            print "Remaining: {0}".format(r_serv.llen("filelist"))

        if arg == "processed":
            print "processed: {0}".format(r_serv.llen("processed"))

    except TypeError:
        publisher.error("TypeError for display_listof_pid")
Пример #5
0
def refining_regex_dataset(r_serv, r_key, regex, min_match, year, month, luhn = True, dnscheck = True):
    """Refine the "raw dataset" of paste with regulars expressions

    :param r_serv: -- Redis connexion database
    :param r_key: -- (str) The name of the key read in redis (often the name of
        the keywords category list)
    :param min_match: -- (int) Below this number file are deleted
    :param regex: -- Regular expression which will be match.

    This function Refine database created with classify_token_paste function.
    It opening again the files which matchs the keywords category list, found
    regular expression inside it and count how many time is found.

    If there is not too much match about the regular expression the file is
    deleted from the list.

    Than it finally merge the result by day to be able to create a bar graph
    which will represent how many occurence by day the regex match.

    """
    for filename in r_serv.zrange(r_key, 0, -1):

        with gzip.open(filename, 'rb') as F:
            var = 0
            matchs = set([])

            for num, kword in enumerate(F):

                match = re.findall(regex, kword)
                var += len(match)

                for y in match:
                    if y != '' and len(y) < 100:
                        matchs.add(y)
            # If there is less match than min_match delete it (False pos)
            if len(matchs) <= min_match :
                r_serv.zrem(r_key, filename)
                publisher.debug("{0} deleted".format(filename))
            else:
            # else changing the score.
                if r_key == "creditcard_categ" and luhn:
                    for card_number in matchs:
                        if is_luhn_valid(card_number):

                            r_serv.zincrby(r_key+'_occur', filename, 1)

                            publisher.info("{1} is valid in the file {0}".format(filename, card_number))
                        else:
                            publisher.debug("{0} card is invalid".format(card_number))

                if r_key == "mails_categ" and dnscheck:
                    r_serv.zadd(r_key+'_occur', checking_MX_record(r_serv, matchs), filename)

                else:
                    # LUHN NOT TRIGGERED (Other Categs)
                    r_serv.zadd(r_key+'_occur',
                        len(matchs),
                        filename)

    create_graph_by_day_datastruct(r_serv, r_key, year, month)
Пример #6
0
def remove_pure_doppelganger(r_serv, nb):
    """Remove identic paste

    :param r_serv: -- Redis connexion database
    :param nb: -- (int) Number of execution wanted

    Add to a temporary list the hash of wholes files and compare the new hash
    to the element of this list. If the hash is already inside, the file
    is deleted otherwise the hash is added in the list.

    """
    hashlist = []
    for x in xrange(0,nb):
        filename = r_serv.lpop("filelist")

        with open(filename, 'rb') as L:
            hashline = hashlib.md5(L.read()).hexdigest()

            print len(hashlist)

            if hashline in hashlist:

                os.remove(filename)
                publisher.debug("{0} removed".format(filename))
                print "{0} removed".format(filename)
            else:
                hashlist.append(hashline)
Пример #7
0
def recovering_longlines(r_serv):
    """Get longlines with linenumbers

    """
    try:
        for n in xrange(0,nb):
            filename = r_serv.lpop("longlines")

            if filename != None:
                # For each values in redis (longline's line number)
                for numline in r_serv.smembers(filename):

                    with gzip.open(filename,'rb') as F:

                        for num, line in enumerate(F):
                            #When corresponding.
                            if int(num) == int(numline):
                                pass
                                # TREATMENT
            else:
                publisher.debug("Empty list")
                r_serv.save()
                break

    except (KeyboardInterrupt, SystemExit) as e:
        flush_list_of_pid(r_serv)
        publisher.debug("Pid list flushed")
Пример #8
0
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read('./packages/config.cfg')

    # REDIS #
    r_serv = redis.StrictRedis(
        host = cfg.get("Redis_Queues", "host"),
        port = cfg.getint("Redis_Queues", "port"),
        db = cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.channel = "Global"

    # ZMQ #
    PubGlob = ZMQ_PubSub.ZMQPub(configfile, "PubSub_Global", "global")

    # FONCTIONS #
    publisher.info("Starting to publish.")

    while True:
        filename = r_serv.lpop("filelist")

        if filename != None:

            msg = cfg.get("PubSub_Global", "channel")+" "+filename
            PubGlob.send_message(msg)
            publisher.debug("{0} Published".format(msg))
        else:
            time.sleep(10)
            publisher.debug("Nothing to publish")
Пример #9
0
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read(configfile)

    # REDIS #
    r_serv = redis.StrictRedis(
        host = cfg.get("Redis_Queues", "host"),
        port = cfg.getint("Redis_Queues", "port"),
        db = cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.channel = "Script"

    # ZMQ #
    channel = cfg.get("PubSub_Longlines", "channel_1")
    subscriber_name = "tokenize"
    subscriber_config_section = "PubSub_Longlines"

    #Publisher
    publisher_config_section = "PubSub_Words"
    publisher_name = "pubtokenize"

    Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
    Pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)

    channel_0 = cfg.get("PubSub_Words", "channel_0")

    # FUNCTIONS #
    publisher.info("Tokeniser subscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_1")))

    while True:
        message = Sub.get_msg_from_queue(r_serv)
        print message
        if message != None:
            PST = P.Paste(message.split(" ",-1)[-1])
        else:
            if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize"):
                r_serv.srem("SHUTDOWN_FLAGS", "Tokenize")
                print "Shutdown Flag Up: Terminating"
                publisher.warning("Shutdown Flag Up: Terminating.")
                break
            publisher.debug("Tokeniser is idling 10s")
            time.sleep(10)
            print "sleepin"
            continue

        for word, score in PST._get_top_words().items():
            if len(word) >= 4:
                msg = channel_0+' '+PST.p_path+' '+str(word)+' '+str(score)
                Pub.send_message(msg)
                print msg
            else:
                pass
Пример #10
0
def detect_longline_from_list(r_serv, nb):
    try:
        for n in xrange(0,nb):

                if not dectect_longlines(r_serv, "filelist", True):
                    break

    except (KeyboardInterrupt, SystemExit) as e:
        flush_list_of_pid(r_serv)
        publisher.debug("Pid list flushed")
Пример #11
0
def main():
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = 'DomClassifier'

    p = Process(config_section)
    addr_dns = p.config.get("DomClassifier", "dns")

    publisher.info("""ZMQ DomainClassifier is Running""")

    c = DomainClassifier.domainclassifier.Extract(rawtext="",
                                                  nameservers=[addr_dns])

    cc = p.config.get("DomClassifier", "cc")
    cc_tld = p.config.get("DomClassifier", "cc_tld")

    while True:
        try:
            message = p.get_from_set()

            if message is not None:
                PST = Paste.Paste(message)
            else:
                publisher.debug("Script DomClassifier is idling 1s")
                time.sleep(1)
                continue
            paste = PST.get_p_content()
            mimetype = PST._get_p_encoding()

            if mimetype == "text/plain":
                c.text(rawtext=paste)
                c.potentialdomain()
                c.validdomain(rtype=['A'], extended=True)
                localizeddomains = c.include(expression=cc_tld)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        'DomainC;{};{};{};Checked {} located in {};{}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            localizeddomains, cc_tld, PST.p_path))
                localizeddomains = c.localizedomain(cc=cc)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        'DomainC;{};{};{};Checked {} located in {};{}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            localizeddomains, cc, PST.p_path))
        except IOError:
            print("CRC Checksum Failed on :", PST.p_path)
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
                PST.p_source, PST.p_date, PST.p_name))
Пример #12
0
 def test_publisher(self):
     for i in range(0, 21):
         if i % 2 == 0:
             publisher.info('test' + str(i))
         elif i % 3 == 0:
             publisher.warning('test' + str(i))
         elif i % 5 == 0:
             publisher.error('test' + str(i))
         elif i % 7 == 0:
             publisher.critical('test' + str(i))
         else:
             publisher.debug('test' + str(i))
         time.sleep(1)
Пример #13
0
 def asn_desc_via_history(self, asn):
     if self.has_asnhistory:
         asn_descr = self.asnhistory.get_last_description(asn)
         if asn_descr is None:
             # The ASN has no descripion in the database
             # publisher.error(\
             #        'Unable to find the ASN description of {}. ASN History might be down.'.\
             #        format(asn))
             asn_descr = 'No ASN description has been found.'
     else:
         publisher.debug('ASN History not enabled.')
         asn_descr = 'ASN History not enabled.'
     return asn_descr
Пример #14
0
 def test_publisher(self):
     for i in range(0, 21):
         if i % 2 == 0:
             publisher.info('test' + str(i))
         elif i % 3 == 0:
             publisher.warning('test' + str(i))
         elif i % 5 == 0:
             publisher.error('test' + str(i))
         elif i % 7 == 0:
             publisher.critical('test' + str(i))
         else:
             publisher.debug('test' + str(i))
         time.sleep(1)
Пример #15
0
 def asn_desc_via_history(self, asn):
     if self.has_asnhistory:
         asn_descr = self.asnhistory.get_last_description(asn)
         if asn_descr is None:
             # The ASN has no descripion in the database
             # publisher.error(\
             #        'Unable to find the ASN description of {}. ASN History might be down.'.\
             #        format(asn))
             asn_descr = 'No ASN description has been found.'
     else:
         publisher.debug('ASN History not enabled.')
         asn_descr = 'ASN History not enabled.'
     return asn_descr
Пример #16
0
def main():
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = 'DomClassifier'

    p = Process(config_section)
    addr_dns = p.config.get("DomClassifier", "dns")

    publisher.info("""ZMQ DomainClassifier is Running""")

    c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])

    cc = p.config.get("DomClassifier", "cc")
    cc_tld = p.config.get("DomClassifier", "cc_tld")

    while True:
        try:
            message = p.get_from_set()

            if message is not None:
                PST = Paste.Paste(message)
            else:
                publisher.debug("Script DomClassifier is idling 1s")
                time.sleep(1)
                continue
            paste = PST.get_p_content()
            mimetype = PST._get_p_encoding()

            if mimetype == "text/plain":
                c.text(rawtext=paste)
                c.potentialdomain()
                c.validdomain(rtype=['A'], extended=True)
                localizeddomains = c.include(expression=cc_tld)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
                        PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path))
                localizeddomains = c.localizedomain(cc=cc)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
                        PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
        except IOError:
            print("CRC Checksum Failed on :", PST.p_path)
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
                PST.p_source, PST.p_date, PST.p_name))
Пример #17
0
    def get_TweetRawContent(self):

        publisher.port = 6380
        publisher.channel = 'Script'
        #publisher.debug("[-Tweet.py-] Requested RAW Content = " + self.p_path)
        tweetRaw = ''

        #publisher.debug("[-Tweet.py-] Reading file " + self.p_path)
        #print("[-Tweet.py-] Reading file " + self.p_path)
        try:
            with gzip.open(self.p_path, 'rb') as f:
                tweetRaw = f.read().decode('utf-8')
        except Exception as e:
            publisher.debug("error opening path: "+self.p_path + " with error "+str(e))
            paste = 'error opening path: '+self.p_path + ' with error '+str(e)

        return str(tweetRaw)
Пример #18
0
def update_running_pids(old_procs):
    """
        Update the list of the running process and return the list
    """
    new_procs = []
    for proc in old_procs:
        if proc.poll() is None and check_pid(proc.pid):
            publisher.debug(str(proc.pid) + ' is alive')
            new_procs.append(proc)
        else:
            try:
                publisher.debug(str(proc.pid) + ' is gone')
                os.kill(proc.pid, signal.SIGKILL)
            except:
                # the process is just already gone
                pass
    return new_procs
Пример #19
0
def update_running_pids(old_procs):
    """
        Update the list of the running process and return the list
    """
    new_procs = []
    for proc in old_procs:
        if proc.poll() == None and check_pid(proc.pid):
            publisher.debug(str(proc.pid) + ' is alive')
            new_procs.append(proc)
        else:
            try:
                publisher.debug(str(proc.pid) + ' is gone')
                os.kill(proc.pid, signal.SIGKILL)
            except:
                # the process is just already gone
                pass
    return new_procs
def launch():
    """
        Fetch all the whois entry assigned to the server of this :class:`Connector`
    """
    i = 0
    while True:
        try:
            entry = temp_db.spop(key_ris)
            if not entry:
                __disconnect()
                i = 0
                publisher.debug("Disconnected of " + server)
                time.sleep(sleep_timer)
                continue
            if cache_db.get(entry) is None:
                if not connected:
                    __connect()
                publisher.debug(server + ", query : " + str(entry))
                whois = fetch_whois(entry)
                if whois != '':
                    cache_db.setex(
                        entry,
                        server + '\n' + unicode(whois, errors="replace"),
                        cache_ttl)
                if not keepalive:
                    __disconnect()
            i += 1
            if i % 10000 == 0:
                publisher.info(
                    str(temp_db.scard(key_ris)) + ' to process on ' + server)
        except IOError as text:
            publisher.error("IOError on " + server + ': ' + str(text))
            publisher.info(
                str(temp_db.scard(key_ris)) + ' to process on ' + server)
            time.sleep(sleep_timer)
            __disconnect()
        except Exception as e:
            publisher.error("Error on " + server + ': ' + str(e))
            publisher.info(
                str(temp_db.scard(key_ris)) + ' to process on ' + server)
            time.sleep(sleep_timer)
            __disconnect()
Пример #21
0
def redis_words_ranking(pipe, r_serv, nb, minlength, maxlength):
    """Looping function

    :param pipe: -- Redis pipe.
    :param nb: -- (int) Number of pastes proceeded by function
    :param minlength: -- (int) passed to the next function
    :param maxlength: -- (int) passed to the next function

    """
    try:
        for n in xrange(0,nb):

                path = r_serv.lpop("filelist")

                if path != None:
                    set_listof_pid(r_serv, path, sys.argv[0])

                    redis_zincr_words(pipe, path, minlength, maxlength)

                    update_listof_pid(r_serv)

                    r_serv.lpush("processed",path)

                    publisher.debug(path)
                else:
                    publisher.debug("Empty list")
                    break
    except (KeyboardInterrupt, SystemExit) as e:
        flush_list_of_pid(r_serv)
        publisher.debug("Pid list flushed")
Пример #22
0
def search_phone(message):
    paste = Paste.Paste(message)
    content = paste.get_p_content()
    # regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
    reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
    # list of the regex results in the Paste, may be null
    results = reg_phone.findall(content)

    # if the list is greater than 4, we consider the Paste may contain a list of phone numbers
    if len(results) > 4 :
        print results
        publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))

	if __name__ == '__main__':
    # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
    # Port of the redis instance used by pubsublogger
    publisher.port = 6380
    # Script is the default channel used for the modules.
    publisher.channel = 'Script'

    # Section name in bin/packages/modules.cfg
    config_section = 'Phone'

    # Setup the I/O queues
    p = Process(config_section)

    # Sent to the logging a description of the module
    publisher.info("Run Phone module")

    # Endless loop getting messages from the input queue
    while True:
        # Get one message from the input queue
        message = p.get_from_set()
        if message is None:
            publisher.debug("{} queue is empty, waiting".format(config_section))
            time.sleep(1)
            continue

        # Do something with the message from the queue
        search_phone(message)
Пример #23
0
    def translateTweet(self,sentence,from_lang):

        publisher.debug("[-Tweet.py-] (translateTweet) Request from "+from_lang.upper())


        re.sub("#|@|&","",sentence)
        cfgTM = configparser.ConfigParser()
        cfgTM.read(TMconfigfile)
        emailforTranslation = cfgTM.get("TwitterAnalyzer", "email_for_translation")

        api_url = "http://mymemory.translated.net/api/get?q={}&langpair={}|{}&de={}".format(sentence,from_lang.upper(),"EN",emailforTranslation)
        hdrs = {
            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
            'Accept-Encoding': 'none',
            'Accept-Language': 'en-US,en;q=0.8',
            'Connection': 'keep-alive'}

        publisher.debug("[-Tweet.py-] (translateTweet) Request url="+api_url) 
        response = requests.get(api_url, headers=hdrs)
        response_json = json.loads(response.text)
        translation = response_json["responseData"]["translatedText"]
        return translation
Пример #24
0
def dectect_longlines(r_serv, r_key, store = False, maxlength = 500):
    """Store longlines's linenumbers in redis

    :param r_serv: -- The redis connexion database
    :param r_key: -- (str) The key name in redis
    :param store: -- (bool) Store the line numbers or not.
    :param maxlength: -- The limit between "short lines" and "long lines"

    This function connect to a redis list of filename (pastes filename);
    Open the paste and check inside if there is some line with their
    length >= to maxlength.
    If yes, the paste is "tagged" as containing a longlines in another
    redis structures, and the linenumber (of the long lines) can be stored
    in addition if the argument store is at True.

    """
    try:
        while True:
            #r_key_list (categ)
            filename = r_serv.lpop(r_key)

            if filename != None:

                set_listof_pid(r_serv, filename, sys.argv[0])

                # for each pastes
                with gzip.open(filename, 'rb') as F:
                    var = True
                    for num, line in enumerate(F):

                        if  len(line) >= maxlength:
                            #publisher.debug("Longline:{0}".format(line))
                            if var:
                                r_serv.rpush("longlines", filename)
                                var = False

                            if store:
                                r_serv.sadd(filename, num)
                            else:
                                publisher.debug("Line numbers of longlines not stored")

                update_listof_pid(r_serv)
            else:
                publisher.debug("Empty list")
                return False
                break

    except (KeyboardInterrupt, SystemExit) as e:
        flush_list_of_pid(r_serv)
        publisher.debug("Pid list flushed")
    args = parser.parse_args()
    interval_first = args.firstdate
    interval_last = args.lastdate
    if interval_last is None:
        daemon = True
    else:
        daemon = False

    unavailable = []
    while 1:
        got_new_files = False
        if daemon or interval_last is None:
            interval_last = datetime.date.today().strftime("%Y-%m-%d")

        for fname, url in to_download():
            if not already_downloaded(fname) and url not in unavailable:
                publisher.debug("Trying to download: " + url)
                if downloadURL(url, fname):
                    got_new_files = True
                    publisher.info("Downloaded:" + fname)
                elif interval_last != datetime.date.today().strftime(
                        "%Y-%m-%d"):
                    # if today's file is not available, try again later.
                    unavailable.append(url)
        if not got_new_files:
            publisher.info('No new files to download.')
            if not daemon:
                publisher.info('Exiting...')
                break
            time.sleep(3600)
Пример #26
0
def checking_MX_record(r_serv, adress_set):
    """Check if emails MX domains are responding.

    :param r_serv: -- Redis connexion database
    :param adress_set: -- (set) This is a set of emails adress
    :return: (int) Number of adress with a responding and valid MX domains

    This function will split the email adress and try to resolve their domains
    names: on [email protected] it will try to resolve gmail.com

    """
    score = 0
    num = len(adress_set)
    WalidMX = set([])
    # Transforming the set into a string
    MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower())
    resolver = dns.resolver.Resolver()
    resolver.nameservers = ['149.13.33.69']
    resolver.timeout = 5
    resolver.lifetime = 2
    if MXdomains != []:

            for MXdomain in set(MXdomains):
                try:
                    # Already in Redis living.
                    if r_serv.exists(MXdomain[1:]):
                        score += 1
                        WalidMX.add(MXdomain[1:])
                    # Not already in Redis
                    else:
                        # If I'm Walid MX domain
                        if resolver.query(MXdomain[1:], rdtype=dns.rdatatype.MX):
                            # Gonna be added in redis.
                            r_serv.setex(MXdomain[1:], 1, timedelta(days=1))
                            score += 1
                            WalidMX.add(MXdomain[1:])
                        else:
                            pass

                except dns.resolver.NoNameservers:
                    publisher.debug('NoNameserver, No non-broken nameservers are available to answer the query.')

                except dns.resolver.NoAnswer:
                    publisher.debug('NoAnswer, The response did not contain an answer to the question.')

                except dns.name.EmptyLabel:
                    publisher.debug('SyntaxError: EmptyLabel')

                except dns.resolver.NXDOMAIN:
                    r_serv.setex(MXdomain[1:], 1, timedelta(days=1))
                    publisher.debug('The query name does not exist.')

                except dns.name.LabelTooLong:
                    publisher.debug('The Label is too long')

                except dns.resolver.Timeout:
                    r_serv.setex(MXdomain[1:], 1, timedelta(days=1))

                except Exception as e:
                    print e

    publisher.debug("emails before: {0} after: {1} (valid)".format(num, score))
    return (num, WalidMX)
Пример #27
0
        if not exists_in(indexpath):
            ix = create_in(indexpath, schema)
        else:
            ix = open_dir(indexpath)

    # LOGGING #
    publisher.info("ZMQ Indexer is Running")

    while True:
        try:
            message = p.get_from_set()

            if message is not None:
                PST = Paste.Paste(message)
            else:
                publisher.debug("Script Indexer is idling 1s")
                time.sleep(1)
                continue
            docpath = message.split(" ", -1)[-1]
            paste = PST.get_p_content()
            print "Indexing :", docpath
            if indexertype == "whoosh":
                indexwriter = ix.writer()
                indexwriter.update_document(
                    title=unicode(docpath, errors='ignore'),
                    path=unicode(docpath, errors='ignore'),
                    content=unicode(paste, errors='ignore'))
                indexwriter.commit()
        except IOError:
            print "CRC Checksum Failed on :", PST.p_path
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
Пример #28
0
        if message is not None:
            filename, score = message.split()

            if prec_filename is None or filename != prec_filename:
                PST = Paste.Paste(filename)
                MX_values = lib_refine.checking_MX_record(
                    r_serv2, PST.get_regex(email_regex))

                if MX_values[0] >= 1:

                    PST.__setattr__(channel, MX_values)
                    PST.save_attribute_redis(channel, (MX_values[0],
                                             list(MX_values[1])))

                    pprint.pprint(MX_values)
                    to_print = 'Mails;{};{};{};Checked {} e-mail(s)'.\
                        format(PST.p_source, PST.p_date, PST.p_name,
                               MX_values[0])
                    if MX_values[0] > is_critical:
                        publisher.warning(to_print)
                    else:
                        publisher.info(to_print)
            prec_filename = filename

        else:
            publisher.debug("Script Mails is Idling 10s")
            print 'Sleeping'
            time.sleep(10)

        message = p.get_from_set()
Пример #29
0
        port=p.config.getint("ARDB_Statistics", "port"),
        db=p.config.getint("ARDB_Statistics", "db"),
        decode_responses=True)

    criticalNumberToAlert = p.config.getint("Credential", "criticalNumberToAlert")
    minTopPassList = p.config.getint("Credential", "minTopPassList")

    regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
    #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
    regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
    regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"

    while True:
        message = p.get_from_set()
        if message is None:
            publisher.debug("Script Credential is Idling 10s")
            #print('sleeping 10s')
            time.sleep(10)
            continue

        filepath, count = message.split(' ')

        paste = Paste.Paste(filepath)
        content = paste.get_p_content()
        creds = set(re.findall(regex_cred, content))

        if len(creds) == 0:
            continue

        sites= re.findall(regex_web, content) #Use to count occurences
        sites_set = set(re.findall(regex_web, content))
Пример #30
0
    p = Process(config_section)

    # port generated automatically depending on the date
    curYear = datetime.now().year
    server = redis.StrictRedis(
                host=p.config.get("ARDB_DB", "host"),
                port=p.config.get("ARDB_DB", "port"),
                db=curYear,
                decode_responses=True)

    # FUNCTIONS #
    publisher.info("Script duplicate started")

    while True:
            message = p.get_from_set()
            if message is not None:
                module_name, p_path = message.split(';')
                print("new alert : {}".format(module_name))
                #PST = Paste.Paste(p_path)
            else:
                publisher.debug("Script Attribute is idling 10s")
                time.sleep(10)
                continue

            # Add in redis for browseWarningPaste
            # Format in set: WARNING_moduleName -> p_path
            key = "WARNING_" + module_name
            server.sadd(key, p_path)

            publisher.info('Saved warning paste {}'.format(p_path))
Пример #31
0
                                # set number of files to submit
                                r_serv_log_submit.set(uuid + ':nb_total', len(files.children))
                                n = 1
                                for child in files.children:
                                    if verify_extention_filename(child.filename.decode()):
                                        create_paste(uuid, child.contents, ltags, ltagsgalaxies, uuid+'_'+ str(n) )
                                        n = n + 1
                                    else:
                                        print('bad extention')
                                        addError(uuid, 'Bad file extension: {}'.format(child.filename.decode()) )

                            except FileNotFoundError:
                                print('file not found')
                                addError(uuid, 'File not found: {}'.format(file_full_path), uuid )

                            remove_submit_uuid(uuid)



            # textarea input paste
            else:
                r_serv_log_submit.set(uuid + ':nb_total', 1)
                create_paste(uuid, paste_content.encode(), ltags, ltagsgalaxies, uuid)
                remove_submit_uuid(uuid)
                time.sleep(0.5)

        # wait for paste
        else:
            publisher.debug("Script submit_paste is Idling 10s")
            time.sleep(3)
Пример #32
0
    p = Process(config_section)
    max_execution_time = p.config.getint("Curve", "max_execution_time")
    publisher.info("Release scripts to find release names")

    movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+"
    tv = "[a-zA-Z0-9.]+\.S[0-9]{2}E[0-9]{2}.[a-zA-Z0-9.]+\.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
    xxx = "[a-zA-Z0-9._]+.XXX.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"

    regexs = [movie, tv, xxx]

    regex = '|'.join(regexs)
    while True:
        signal.alarm(max_execution_time)
        filepath = p.get_from_set()
        if filepath is None:
            publisher.debug("Script Release is Idling 10s")
            print('Sleeping')
            time.sleep(10)
            continue

        paste = Paste.Paste(filepath)
        content = paste.get_p_content()

        #signal.alarm(max_execution_time)
        try:
            releases = set(re.findall(regex, content))
            if len(releases) == 0:
                continue

                to_print = 'Release;{};{};{};{} releases;{}'.format(
                    paste.p_source, paste.p_date, paste.p_name, len(releases),
Пример #33
0
    publisher.info("Script duplicate started")

    while True:
        try:
            hash_dico = {}
            dupl = set()
            dico_range_list = []

            x = time.time()

            message = p.get_from_set()
            if message is not None:
                path = message
                PST = Paste.Paste(path)
            else:
                publisher.debug("Script Attribute is idling 10s")
                print('sleeping')
                time.sleep(10)
                continue

            # the paste is too small
            if (PST._get_p_size() < min_paste_size):
                continue

            PST._set_p_hash_kind("ssdeep")
            PST._set_p_hash_kind("tlsh")

            # Assignate the correct redis connexion
            r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]

            # Creating the dico name: yyyymm
Пример #34
0
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = 'ApiKey'

    p = Process(config_section)

    publisher.info("ApiKey started")

    message = p.get_from_set()

    # TODO improve REGEX
    regex_aws_access_key = re.compile(
        r'(?<![A-Z0-9])=[A-Z0-9]{20}(?![A-Z0-9])')
    regex_aws_secret_key = re.compile(
        r'(?<!=[A-Za-z0-9+])=[A-Za-z0-9+]{40}(?![A-Za-z0-9+])')

    regex_google_api_key = re.compile(r'=AIza[0-9a-zA-Z-_]{35}')

    while True:

        message = p.get_from_set()

        if message is not None:

            search_api_key(message)

        else:
            publisher.debug("Script ApiKey is Idling 10s")
            time.sleep(10)
Пример #35
0
    while True:
        if message is not None:
            filename, score = message.split()

            if prec_filename is None or filename != prec_filename:
                domains_list = []
                PST = Paste.Paste(filename)
                client = ip2asn()
                for x in PST.get_regex(url_regex):
                    scheme, credential, subdomain, domain, host, tld, \
                        port, resource_path, query_string, f1, f2, f3, \
                        f4 = x
                    domains_list.append(domain)
                    p.populate_set_out(x, 'Url')
                    publisher.debug('{} Published'.format(x))

                    if f1 == "onion":
                        print domain

                    hostl = unicode(subdomain+domain)
                    try:
                        socket.setdefaulttimeout(2)
                        ip = socket.gethostbyname(unicode(hostl))
                    except:
                        # If the resolver is not giving any IPv4 address,
                        # ASN/CC lookup is skip.
                        continue

                    try:
                        l = client.lookup(ip, qType='IP')
Пример #36
0
if __name__ == '__main__':
    # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
    # Port of the redis instance used by pubsublogger
    publisher.port = 6380
    # Script is the default channel used for the modules.
    publisher.channel = 'Script'

    # Section name in bin/packages/modules.cfg
    config_section = 'Keys'

    # Setup the I/O queues
    p = Process(config_section)

    # Sent to the logging a description of the module
    publisher.info("Run Keys module ")

    # Endless loop getting messages from the input queue
    while True:
        # Get one message from the input queue
        message = p.get_from_set()
        if message is None:
            publisher.debug("{} queue is empty, waiting".format(config_section))
            time.sleep(1)
            continue

        # Do something with the message from the queue
        paste = Paste.Paste(message)
        search_key(paste)

        # (Optional) Send that thing to the next queue
Пример #37
0
    publisher.info("Find credentials")

    faup = Faup()

    regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
    #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
    regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
    regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"

    redis_cache_key = regex_helper.generate_redis_cache_key(module_name)

    while True:
        message = p.get_from_set()

        if message is None:
            publisher.debug("Script Credential is Idling 10s")
            time.sleep(10)
            continue

        item_id, count = message.split()

        item_content = Item.get_item_content(item_id)

        # Extract all credentials
        all_credentials = regex_helper.regex_findall(
            module_name,
            redis_cache_key,
            regex_cred,
            item_id,
            item_content,
            max_time=max_execution_time)
Пример #38
0
def checking_A_record(r_serv, domains_set):
    score = 0
    num = len(domains_set)
    WalidA = set([])
    resolver = dns.resolver.Resolver()
    resolver.nameservers = ['149.13.33.69']
    resolver.timeout = 5
    resolver.lifetime = 2

    for Adomain in domains_set:
        try:
            # Already in Redis living.
            if r_serv.exists(Adomain):
                score += 1
                WalidA.add(Adomain)
            # Not already in Redis
            else:
                # If I'm Walid domain
                if resolver.query(Adomain, rdtype=dns.rdatatype.A):
                    # Gonna be added in redis.
                    r_serv.setex(Adomain, 1, timedelta(days=1))
                    score += 1
                    WalidA.add(Adomain)
                else:
                    pass

        except dns.resolver.NoNameservers:
            publisher.debug('NoNameserver, No non-broken nameservers are available to answer the query.')

        except dns.resolver.NoAnswer:
            publisher.debug('NoAnswer, The response did not contain an answer to the question.')

        except dns.name.EmptyLabel:
            publisher.debug('SyntaxError: EmptyLabel')

        except dns.resolver.NXDOMAIN:
            r_serv.setex(Adomain[1:], 1, timedelta(days=1))
            publisher.debug('The query name does not exist.')

        except dns.name.LabelTooLong:
            publisher.debug('The Label is too long')

        except Exception as e:
            print e

    publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score))
    return (num, WalidA)
Пример #39
0
    categories = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential']
    tmp_dict = {}
    for filename in categories:
        bname = os.path.basename(filename)
        tmp_dict[bname] = []
        with open(os.path.join(args.d, filename), 'r') as f:
            patterns = [r'%s' % re.escape(s.strip()) for s in f]
            tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)

    prec_filename = None

    while True:
        filename = p.get_from_set()
        if filename is None:
            publisher.debug("Script Categ is Idling 10s")
            print 'Sleeping'
            time.sleep(10)
            continue

        paste = Paste.Paste(filename)
        content = paste.get_p_content()

        for categ, pattern in tmp_dict.items():
            found = set(re.findall(pattern, content))
            if len(found) > 0:
                msg = '{} {}'.format(paste.p_path, len(found))
                print msg, categ
                p.populate_set_out(msg, categ)

                publisher.info(
Пример #40
0
        if not exists_in(indexpath):
            ix = create_in(indexpath, schema)
        else:
            ix = open_dir(indexpath)

    # LOGGING #
    publisher.info("ZMQ Indexer is Running")

    while True:
        try:
            message = p.get_from_set()

            if message is not None:
                PST = Paste.Paste(message)
            else:
                publisher.debug("Script Indexer is idling 1s")
                time.sleep(1)
                continue
            docpath = message.split(" ", -1)[-1]
            paste = PST.get_p_content()
            print "Indexing :", docpath
            if indexertype == "whoosh":
                indexwriter = ix.writer()
                indexwriter.update_document(title=unicode(docpath,
                                                          errors='ignore'),
                                            path=unicode(docpath,
                                                         errors='ignore'),
                                            content=unicode(paste,
                                                            errors='ignore'))
                indexwriter.commit()
        except IOError:
    config_section = 'SQLInjectionDetection'

    # Setup the I/O queues
    p = Process(config_section)

    # Sent to the logging a description of the module
    publisher.info("Try to detect SQL injection")

    server_statistics = redis.StrictRedis(
        host=p.config.get("ARDB_Statistics", "host"),
        port=p.config.getint("ARDB_Statistics", "port"),
        db=p.config.getint("ARDB_Statistics", "db"),
        decode_responses=True)

    faup = Faup()

    # Endless loop getting messages from the input queue
    while True:
        # Get one message from the input queue
        message = p.get_from_set()

        if message is None:
            publisher.debug("{} queue is empty, waiting".format(config_section))
            time.sleep(10)
            continue

        else:
            # Do something with the message from the queue
            url, date, path = message.split()
            analyse(url, path)
Пример #42
0
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read(configfile)

    # Redis
    r_serv1 = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"),
                                port=cfg.getint("Redis_Queues", "port"),
                                db=cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.channel = "Script"

    # ZMQ #
    # Subscriber
    channel = cfg.get("PubSub_Global", "channel")
    subscriber_name = "DomainClassifier"
    subscriber_config_section = "PubSub_Global"

    cc = cfg.get("PubSub_DomainClassifier", "cc")
    cc_tld = cfg.get("PubSub_DomainClassifier", "cc_tld")

    sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel,
                            subscriber_name)

    # FUNCTIONS #
    publisher.info("""ZMQ DomainClassifier is Running""")
    c = DomainClassifier.domainclassifier.Extract(rawtext="")

    while True:
        try:
            message = sub.get_msg_from_queue(r_serv1)

            if message is not None:
                PST = Paste.Paste(message.split(" ", -1)[-1])
            else:
                if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"):
                    r_serv1.srem("SHUTDOWN_FLAGS", "Indexer")
                    publisher.warning("Shutdown Flag Up: Terminating.")
                    break
                publisher.debug("Script DomainClassifier is idling 10s")
                time.sleep(1)
                continue
            docpath = message.split(" ", -1)[-1]
            paste = PST.get_p_content()
            mimetype = PST._get_p_encoding()
            if mimetype == "text/plain":
                c.text(rawtext=paste)
                c.potentialdomain()
                c.validdomain(rtype=['A'], extended=True)
                localizeddomains = c.include(expression=cc_tld)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        'DomainC;{};{};{};Checked {} located in {}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            localizeddomains, cc_tld))
                localizeddomains = c.localizedomain(cc=cc)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        'DomainC;{};{};{};Checked {} located in {}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            localizeddomains, cc))
        except IOError:
            print "CRC Checksum Failed on :", PST.p_path
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
                PST.p_source, PST.p_date, PST.p_name))
            pass
Пример #43
0
                        set_name = 'regex_' + dico_regexname_to_redis[regex_str]
                        new_to_the_set = server_term.sadd(set_name, filename)
                        new_to_the_set = True if new_to_the_set == 1 else False

                        #consider the num of occurence of this term
                        regex_value = int(
                            server_term.hincrby(
                                timestamp, dico_regexname_to_redis[regex_str],
                                int(1)))
                        #1 term per paste
                        if new_to_the_set:
                            regex_value_perPaste = int(
                                server_term.hincrby(
                                    "per_paste_" + str(timestamp),
                                    dico_regexname_to_redis[regex_str],
                                    int(1)))
                            server_term.zincrby(
                                "per_paste_" + curr_set,
                                dico_regexname_to_redis[regex_str], float(1))
                    server_term.zincrby(curr_set,
                                        dico_regexname_to_redis[regex_str],
                                        float(1))
                else:
                    pass

        else:
            publisher.debug("Script RegexForTermsFrequency is Idling")
            print "sleeping"
            time.sleep(5)
        message = p.get_from_set()
    args = parser.parse_args()
    interval_first = args.firstdate
    interval_last = args.lastdate
    if interval_last is None:
        daemon = True
    else:
        daemon = False

    unavailable = []
    while 1:
        got_new_files = False
        if daemon or interval_last is None:
            interval_last = datetime.date.today().strftime("%Y-%m-%d")

        for fname, url in to_download():
            if not already_downloaded(fname) and url not in unavailable:
                publisher.debug("Trying to download: " + url)
                if downloadURL(url, fname):
                    got_new_files = True
                    publisher.info("Downloaded:" + fname)
                elif interval_last != datetime.date.today().strftime("%Y-%m-%d"):
                    # if today's file is not available, try again later.
                    unavailable.append(url)
        if not got_new_files:
            publisher.info('No new files to download.')
            if not daemon:
                publisher.info('Exiting...')
                break
            time.sleep(3600)
Пример #45
0
    while True:
        if message is not None:
            generate_new_graph = True

            filename, word, score = message.split()
            temp = filename.split('/')
            date = temp[-4] + temp[-3] + temp[-2]

            low_word = word.lower()
            prev_score = r_serv1.hget(low_word, date)
            if prev_score is not None:
                r_serv1.hset(low_word, date, int(prev_score) + int(score))
            else:
                r_serv1.hset(low_word, date, score)

        else:
            if generate_new_graph:
                generate_new_graph = False
                print 'Building graph'
                today = datetime.date.today()
                year = today.year
                month = today.month
                lib_words.create_curve_with_word_file(r_serv1, csv_path,
                                                      wordfile_path, year,
                                                      month)

            publisher.debug("Script Curve is Idling")
            print "sleeping"
            time.sleep(10)
        message = p.get_from_set()
Пример #46
0
    def get_ip_info(self, ip, days_limit=None):
        """
            Return informations related to an IP address.

            :param ip: The IP address
            :param days_limit: The number of days we want to check in the past
                (default: around 2 years)
            :rtype: Dictionary

                .. note:: Format of the output:

                    .. code-block:: python

                        {
                            'ip': ip,
                            'days_limit' : days_limit,
                            'ptrrecord' : 'ptr.record.com',
                            'history':
                                [
                                    {
                                        'asn': asn,
                                        'interval': [first, last],
                                        'block': block,
                                        'timestamp': timestamp,
                                        'descriptions':
                                            [
                                                [date, descr],
                                                ...
                                            ]
                                    },
                                    ...
                                ]
                        }
        """
        if days_limit is None:
            days_limit = 750
        to_return = {'ip': ip, 'days_limit': days_limit, 'history': []}
        if self.has_ptr:
            to_return['ptrrecord'] = self.get_ptr_record(ip)
        if not self.has_ipasn:
            publisher.debug('IPASN not enabled.')
            to_return['error'] = 'IPASN not enabled.'
            return to_return
        if not ip:
            to_return['error'] = 'No IP provided.'
            return to_return
        for first, last, asn, block in self.ipasn.aggregate_history(
                ip, days_limit):
            first_date = parser.parse(first).replace(tzinfo=tz.tzutc()).date()
            last_date = parser.parse(last).replace(tzinfo=tz.tzutc()).date()
            if self.has_asnhistory:
                desc_history = self.asnhistory.get_all_descriptions(asn)
                valid_descriptions = []
                for date, descr in desc_history:
                    date = date.astimezone(tz.tzutc()).date()
                    test_date = date - datetime.timedelta(days=1)
                    if last_date < test_date:
                        # Too new
                        continue
                    elif last_date >= test_date and first_date <= test_date:
                        # Changes within the interval
                        valid_descriptions.append([date.isoformat(), descr])
                    elif first_date > test_date:
                        # get the most recent change befrore the interval
                        valid_descriptions.append([date.isoformat(), descr])
                        break
            else:
                publisher.debug('ASN History not enabled.')
                valid_descriptions = [
                    datetime.date.today().isoformat(),
                    'ASN History not enabled.'
                ]
            if len(valid_descriptions) == 0:
                if len(desc_history) != 0:
                    # fallback, use the oldest description.
                    date = desc_history[-1][0].astimezone(tz.tzutc()).date()
                    descr = desc_history[-1][1]
                    valid_descriptions.append([date.isoformat(), descr])
                else:
                    # No history found for this ASN
                    if last_date > datetime.date(2013, 1, 1):
                        # ASN has been seen recently, should not happen
                        # as the asn history module is running since early 2013
                        publisher.error(
                            'Unable to find the ASN description of {}. IP address: {}. ASN History might be down.'
                            .format(asn, ip))
                    valid_descriptions.append(
                        ['0000-00-00', 'No ASN description has been found.'])
            entry = {}
            entry['asn'] = asn
            entry['interval'] = [first_date.isoformat(), last_date.isoformat()]
            entry['block'] = block
            entry['timestamp'] = self.get_first_seen(asn, block)
            entry['descriptions'] = valid_descriptions
            to_return['history'].append(entry)
        return to_return
Пример #47
0
def checking_MX_record(r_serv, adress_set):
    """Check if emails MX domains are responding.

    :param r_serv: -- Redis connexion database
    :param adress_set: -- (set) This is a set of emails adress
    :return: (int) Number of adress with a responding and valid MX domains

    This function will split the email adress and try to resolve their domains
    names: on [email protected] it will try to resolve gmail.com

    """
    score = 0
    num = len(adress_set)
    WalidMX = set([])
    # Transforming the set into a string
    MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}",
                           str(adress_set).lower())

    if MXdomains != []:

        for MXdomain in set(MXdomains):
            try:
                # Already in Redis living.
                if r_serv.exists(MXdomain[1:]):
                    score += 1
                    WalidMX.add(MXdomain[1:])
                # Not already in Redis
                else:
                    # If I'm Walid MX domain
                    if dns.resolver.query(MXdomain[1:],
                                          rdtype=dns.rdatatype.MX):
                        # Gonna be added in redis.
                        r_serv.setex(MXdomain[1:], timedelta(days=1), 1)
                        score += 1
                        WalidMX.add(MXdomain[1:])
                    else:
                        pass

            except dns.resolver.NoNameservers:
                publisher.debug(
                    'NoNameserver, No non-broken nameservers are available to answer the query.'
                )

            except dns.resolver.NoAnswer:
                publisher.debug(
                    'NoAnswer, The response did not contain an answer to the question.'
                )

            except dns.name.EmptyLabel:
                publisher.debug('SyntaxError: EmptyLabel')

            except dns.resolver.NXDOMAIN:
                publisher.debug('The query name does not exist.')

            except dns.name.LabelTooLong:
                publisher.debug('The Label is too long')

            finally:
                pass

    publisher.debug("emails before: {0} after: {1} (valid)".format(num, score))
    return (num, WalidMX)
Пример #48
0
                # Saving the list of extracted onion domains.
                PST.__setattr__(channel, domains_list)
                PST.save_attribute_redis(channel, domains_list)
                to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date,
                                                    PST.p_name)
                if len(domains_list) > 0:

                    publisher.warning('{}Detected {} .onion(s)'.format(
                        to_print, len(domains_list)))
                    now = datetime.datetime.now()
                    path = os.path.join(
                        'onions',
                        str(now.year).zfill(4),
                        str(now.month).zfill(2),
                        str(now.day).zfill(2),
                        str(int(time.mktime(now.utctimetuple()))))
                    to_print = 'Onion;{};{};{};'.format(
                        PST.p_source, PST.p_date, PST.p_name)
                    for url in fetch(p, r_cache, urls, domains_list, path):
                        publisher.warning('{}Checked {}'.format(to_print, url))
                else:
                    publisher.info('{}Onion related'.format(to_print))

            prec_filename = filename
        else:
            publisher.debug("Script url is Idling 10s")
            print 'Sleeping'
            time.sleep(10)
        message = p.get_from_set()
Пример #49
0
def checking_A_record(r_serv, domains_set):
    configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
    if not os.path.exists(configfile):
        raise Exception('Unable to find the configuration file. \
                        Did you set environment variables? \
                        Or activate the virtualenv.')
    cfg = configparser.ConfigParser()
    cfg.read(configfile)
    dns_server = cfg.get("Web", "dns")

    score = 0
    num = len(domains_set)
    WalidA = set([])
    resolver = dns.resolver.Resolver()
    resolver.nameservers = [dns_server]
    resolver.timeout = 5
    resolver.lifetime = 2

    for Adomain in domains_set:
        try:
            # Already in Redis living.
            if r_serv.exists(Adomain):
                score += 1
                WalidA.add(Adomain)
            # Not already in Redis
            else:
                # If I'm Walid domain
                if resolver.query(Adomain, rdtype=dns.rdatatype.A):
                    # Gonna be added in redis.
                    r_serv.setex(Adomain, 1, timedelta(days=1))
                    score += 1
                    WalidA.add(Adomain)
                else:
                    pass

        except dns.resolver.NoNameservers:
            publisher.debug(
                'NoNameserver, No non-broken nameservers are available to answer the query.'
            )

        except dns.resolver.NoAnswer:
            publisher.debug(
                'NoAnswer, The response did not contain an answer to the question.'
            )

        except dns.name.EmptyLabel:
            publisher.debug('SyntaxError: EmptyLabel')

        except dns.resolver.NXDOMAIN:
            r_serv.setex(Adomain[1:], 1, timedelta(days=1))
            publisher.debug('The query name does not exist.')

        except dns.name.LabelTooLong:
            publisher.debug('The Label is too long')

        except Exception as e:
            print(e)

    publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score))
    return (num, WalidA)
Пример #50
0
def checking_A_record(r_serv, domains_set):
    score = 0
    num = len(domains_set)
    WalidA = set([])

    for Adomain in domains_set:
        try:
            # Already in Redis living.
            if r_serv.exists(Adomain):
                score += 1
                WalidA.add(Adomain)
            # Not already in Redis
            else:
                # If I'm Walid domain
                if dns.resolver.query(Adomain, rdtype=dns.rdatatype.A):
                    # Gonna be added in redis.
                    r_serv.setex(Adomain, timedelta(days=1), 1)
                    score += 1
                    WalidA.add(Adomain)
                else:
                    pass

        except dns.resolver.NoNameservers:
            publisher.debug(
                'NoNameserver, No non-broken nameservers are available to answer the query.'
            )

        except dns.resolver.NoAnswer:
            publisher.debug(
                'NoAnswer, The response did not contain an answer to the question.'
            )

        except dns.name.EmptyLabel:
            publisher.debug('SyntaxError: EmptyLabel')

        except dns.resolver.NXDOMAIN:
            publisher.debug('The query name does not exist.')

        except dns.name.LabelTooLong:
            publisher.debug('The Label is too long')

        finally:
            pass

    publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score))
    return (num, WalidA)
Пример #51
0
def checking_MX_record(r_serv, adress_set, addr_dns):
    """Check if emails MX domains are responding.

    :param r_serv: -- Redis connexion database
    :param adress_set: -- (set) This is a set of emails adress
    :param adress_set: -- (str) This is a server dns address
    :return: (int) Number of adress with a responding and valid MX domains

    This function will split the email adress and try to resolve their domains
    names: on [email protected] it will try to resolve gmail.com

    """

    #remove duplicate
    adress_set = list(set(adress_set))

    score = 0
    num = len(adress_set)
    WalidMX = set([])
    validMX = {}
    # Transforming the set into a string
    MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}",
                           str(adress_set).lower())
    resolver = dns.resolver.Resolver()
    resolver.nameservers = [addr_dns]
    resolver.timeout = 5
    resolver.lifetime = 2
    if MXdomains != []:

        for MXdomain in MXdomains:
            try:
                MXdomain = MXdomain[1:]
                # Already in Redis living.
                if r_serv.exists(MXdomain):
                    score += 1
                    WalidMX.add(MXdomain)
                    validMX[MXdomain] = validMX.get(MXdomain, 0) + 1
                # Not already in Redis
                else:
                    # If I'm Walid MX domain
                    if resolver.query(MXdomain, rdtype=dns.rdatatype.MX):
                        # Gonna be added in redis.
                        r_serv.setex(MXdomain, 1, timedelta(days=1))
                        score += 1
                        WalidMX.add(MXdomain)
                        validMX[MXdomain] = validMX.get(MXdomain, 0) + 1
                    else:
                        pass

            except dns.resolver.NoNameservers:
                publisher.debug(
                    'NoNameserver, No non-broken nameservers are available to answer the query.'
                )
                print(
                    'NoNameserver, No non-broken nameservers are available to answer the query.'
                )

            except dns.resolver.NoAnswer:
                publisher.debug(
                    'NoAnswer, The response did not contain an answer to the question.'
                )
                print(
                    'NoAnswer, The response did not contain an answer to the question.'
                )

            except dns.name.EmptyLabel:
                publisher.debug('SyntaxError: EmptyLabel')
                print('SyntaxError: EmptyLabel')

            except dns.resolver.NXDOMAIN:
                r_serv.setex(MXdomain[1:], 1, timedelta(days=1))
                publisher.debug('The query name does not exist.')
                print('The query name does not exist.')

            except dns.name.LabelTooLong:
                publisher.debug('The Label is too long')
                print('The Label is too long')

            except dns.resolver.Timeout:
                print('timeout')
                r_serv.setex(MXdomain, 1, timedelta(days=1))

            except Exception as e:
                print(e)

    publisher.debug("emails before: {0} after: {1} (valid)".format(num, score))
    #return (num, WalidMX)
    return (num, validMX)
Пример #52
0
        'CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve', 'ApiKey'
    ]
    tmp_dict = {}
    for filename in categories:
        bname = os.path.basename(filename)
        tmp_dict[bname] = []
        with open(os.path.join(args.d, filename), 'r') as f:
            patterns = [r'%s' % (re.escape(s.strip())) for s in f]
            tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)

    prec_filename = None

    while True:
        filename = p.get_from_set()
        if filename is None:
            publisher.debug("Script Categ is Idling 10s")
            print('Sleeping')
            time.sleep(10)
            continue

        paste = Paste.Paste(filename)
        content = paste.get_p_content()

        for categ, pattern in tmp_dict.items():
            found = set(re.findall(pattern, content))
            if len(found) >= matchingThreshold:
                msg = '{} {}'.format(paste.p_rel_path, len(found))

                print(msg, categ)
                p.populate_set_out(msg, categ)
Пример #53
0
    # FUNCTIONS #
    publisher.info("""ZMQ Attribute is Running""")

    while True:
        try:
            message = h.redis_rpop()

            if message is not None:
                PST = Paste.Paste(message.split(" ", -1)[-1])
            else:
                if h.redis_queue_shutdown():
                    print "Shutdown Flag Up: Terminating"
                    publisher.warning("Shutdown Flag Up: Terminating.")
                    break
                publisher.debug("Script Attribute is idling 10s")
                time.sleep(10)
                continue

            # FIXME do it directly in the class
            PST.save_attribute_redis("p_encoding", PST._get_p_encoding())
            PST.save_attribute_redis("p_language", PST._get_p_language())
            # FIXME why not all saving everything there.
            PST.save_all_attributes_redis()
            # FIXME Not used.
            PST.store.sadd("Pastes_Objects", PST.p_path)
        except IOError:
            print "CRC Checksum Failed on :", PST.p_path
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
                PST.p_source, PST.p_date, PST.p_name))
Пример #54
0
             old=timestamp, new=last_update)
         publisher.error(msg)
         continue
     else:
         msg = '===== Importing new file: {new} ====='.format(new=timestamp)
         publisher.info(msg)
         p = r.pipeline(transaction=False)
         p.set('last_update', timestamp)
         p.sadd('all_timestamps', timestamp)
         new_asns = 0
         updated_descrs = 0
         for asn, descr in data:
             all_descrs = r.hgetall(asn)
             if len(all_descrs) == 0:
                 p.hset(asn, timestamp, descr)
                 publisher.debug('New asn: {asn}'.format(asn=asn))
                 new_asns += 1
             else:
                 dates = sorted(all_descrs.keys())
                 last_descr = all_descrs[dates[-1]]
                 if descr != last_descr:
                     p.hset(asn, timestamp, descr)
                     msg = 'New description for {asn}. Was {old}, is {new}'.format(
                         asn=asn, old=last_descr, new=descr)
                     publisher.info(msg)
                     updated_descrs += 1
         p.execute()
         msg = '===== Import finished: {new}, new ASNs:{nb}, Updated:{up} ====='.format(
             new=timestamp, nb=new_asns, up=updated_descrs)
         publisher.info(msg)
 if args.not_new:
Пример #55
0
                    urls.append(url)

                # Saving the list of extracted onion domains.
                PST.__setattr__(channel, domains_list)
                PST.save_attribute_redis(channel, domains_list)
                to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date,
                                                    PST.p_name)
                if len(domains_list) > 0:

                    publisher.warning('{}Detected {} .onion(s)'.format(
                        to_print, len(domains_list)))
                    now = datetime.datetime.now()
                    path = os.path.join('onions', str(now.year).zfill(4),
                                        str(now.month).zfill(2),
                                        str(now.day).zfill(2),
                                        str(int(time.mktime(now.utctimetuple()))))
                    to_print = 'Onion;{};{};{};'.format(PST.p_source,
                                                        PST.p_date,
                                                        PST.p_name)
                    for url in fetch(p, r_cache, urls, domains_list, path):
                        publisher.warning('{}Checked {}'.format(to_print, url))
                else:
                    publisher.info('{}Onion related'.format(to_print))

            prec_filename = filename
        else:
            publisher.debug("Script url is Idling 10s")
            print 'Sleeping'
            time.sleep(10)
        message = p.get_from_set()
Пример #56
0
        if message is not None:
            filename, score = message.split()
            paste = Paste.Paste(filename)
            content = paste.get_p_content()
            all_cards = re.findall(regex, content)
            if len(all_cards) > 0:
                print 'All matching', all_cards
                creditcard_set = set([])

                for card in all_cards:
                    clean_card = re.sub('[^0-9]', '', card)
                    if lib_refine.is_luhn_valid(clean_card):
                        print clean_card, 'is valid'
                        creditcard_set.add(clean_card)

                paste.__setattr__(channel, creditcard_set)
                paste.save_attribute_redis(channel, creditcard_set)

                pprint.pprint(creditcard_set)
                to_print = 'CreditCard;{};{};{};'.format(
                    paste.p_source, paste.p_date, paste.p_name)
                if (len(creditcard_set) > 0):
                    publisher.warning('{}Checked {} valid number(s)'.format(
                        to_print, len(creditcard_set)))
                else:
                    publisher.info('{}CreditCard related'.format(to_print))
        else:
            publisher.debug("Script creditcard is idling 1m")
            print 'Sleeping'
            time.sleep(10)
Пример #57
0
                creditcard_set = set([])
                PST = Paste.Paste(filename)

                for x in PST.get_regex(creditcard_regex):
                    if lib_refine.is_luhn_valid(x):
                        creditcard_set.add(x)

                PST.__setattr__(channel, creditcard_set)
                PST.save_attribute_redis(channel, creditcard_set)

                pprint.pprint(creditcard_set)
                to_print = 'CreditCard;{};{};{};'.format(
                    PST.p_source, PST.p_date, PST.p_name)
                if (len(creditcard_set) > 0):
                    publisher.critical('{}Checked {} valid number(s)'.format(
                        to_print, len(creditcard_set)))
                else:
                    publisher.info('{}CreditCard related'.format(to_print))

            prec_filename = filename

        else:
            if h.redis_queue_shutdown():
                print "Shutdown Flag Up: Terminating"
                publisher.warning("Shutdown Flag Up: Terminating.")
                break
            publisher.debug("Script creditcard is idling 1m")
            time.sleep(60)

        message = h.redis_rpop()