Пример #1
0
def manager():
    """
        Manage (start/stop) the process (fetching/parsing) of the modules
    """
    modules = config_db.smembers('modules')
    modules_nr = len(modules)
    # Cleanup
    for module in modules:
        config_db.delete(module + '|parsing')
        config_db.delete(module + '|fetching')

    while True:
        for module in modules:
            parsing = config_db.get(module + "|" + "parsing")
            fetching = config_db.get(module + "|" + "fetching")
            if parsing is None:
                launch_parser(module)
            if fetching is None:
                launch_fetcher(module)

            parsing = config_db.get(module + "|" + "parsing")
            fetching = config_db.get(module + "|" + "fetching")
            if parsing == 0 and fetching == 0:
                config_db.srem('modules', module)

        modules = config_db.smembers('modules')
        if len(modules) != modules_nr:
            modules_nr = len(modules)
            publisher.info('These modules are running: ' + str(modules))
        else:
            time.sleep(sleep_timer)
Пример #2
0
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read(configfile)

    # REDIS #
    r_serv = redis.StrictRedis(
        host = cfg.get("Redis_Queues", "host"),
        port = cfg.getint("Redis_Queues", "port"),
        db = cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.channel = "Queuing"

    # ZMQ #
    Sub = ZMQ_PubSub.ZMQSub(configfile,"PubSub_Categ", "onion_categ", "tor")

    # FUNCTIONS #
    publisher.info("""Suscribed to channel {0}""".format("onion_categ"))

    while True:
        Sub.get_and_lpush(r_serv)

        if r_serv.sismember("SHUTDOWN_FLAGS", "Onion_Q"):
            r_serv.srem("SHUTDOWN_FLAGS", "Onion_Q")
            print "Shutdown Flag Up: Terminating"
            publisher.warning("Shutdown Flag Up: Terminating.")
            break
Пример #3
0
def launch_fetcher(module):
    """
        Launch a process which fetch a dataset in a directory
    """
    service_fetcher = os.path.join(services_dir, "fetch_raw_files.py")
    timer = '3600'
    if module is None:
        publisher.error('Unable to start fetching : module is None')
        return
    url = config_db.get(module + "|" + "url")
    if url is None:
        publisher.info(module + ' does not have an URL, no fetcher.')
        config_db.set(module + "|" + "fetching", 0)
        return
    directory = config_db.get(module + "|" + "home_dir")
    if directory is not None:
        subprocess.Popen([
            "python", service_fetcher, '-n', module, '-d', directory, '-u',
            url, '-t', timer
        ])
        config_db.set(module + "|" + "fetching", 1)
        publisher.info('Fetching of ' + module + 'started.')
    else:
        publisher.error('Unable to start fetching of ' + module + \
                ': home_dir unknown.')
        config_db.set(module + "|" + "fetching", 0)
Пример #4
0
def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time=30, r_set=True):

    proc = Proc(target=_regex_findall, args=(redis_key, regex, item_content, r_set, ))
    try:
        proc.start()
        proc.join(max_time)
        if proc.is_alive():
            proc.terminate()
            Statistics.incr_module_timeout_statistic(module_name)
            err_mess = "{}: processing timeout: {}".format(module_name, item_id)
            print(err_mess)
            publisher.info(err_mess)
            return []
        else:
            if r_set:
                all_items = r_serv_cache.smembers(redis_key)
            else:
                all_items = r_serv_cache.lrange(redis_key, 0 ,-1)
            r_serv_cache.delete(redis_key)
            proc.terminate()
            return all_items
    except KeyboardInterrupt:
        print("Caught KeyboardInterrupt, terminating workers")
        proc.terminate()
        sys.exit(0)
Пример #5
0
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read(configfile)

    # REDIS #
    r_serv = redis.StrictRedis(
        host = cfg.get("Redis_Queues", "host"),
        port = cfg.getint("Redis_Queues", "port"),
        db = cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.channel = "Queuing"

    # ZMQ #
    channel = cfg.get("PubSub_Words", "channel_0")
    subscriber_name = "curve"
    subscriber_config_section = "PubSub_Words"

    Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
    # FUNCTIONS #
    publisher.info("""Suscribed to channel {0}""".format(channel))

    while True:
        Sub.get_and_lpush(r_serv)

        if r_serv.sismember("SHUTDOWN_FLAGS", "Curve_Q"):
            r_serv.srem("SHUTDOWN_FLAGS", "Curve_Q")
            print "Shutdown Flag Up: Terminating"
            publisher.warning("Shutdown Flag Up: Terminating.")
            break
Пример #6
0
def fetcher():
    """
        Main function which fetch the datasets
    """
    while config_db.sismember('modules', module):
        try:
            urllib.urlretrieve(url, temp_filename)
        except:
            publisher.error('Unable to fetch ' + url)
            __check_exit()
            continue
        drop_file = False
        """
            Check is the file already exists, if the same file is found,
            the downloaded file is dropped. Else, it is moved in his
            final directory.
        """
        to_check = glob.glob( os.path.join(old_directory, '*') )
        to_check += glob.glob( os.path.join(directory, '*') )
        for file in to_check:
            if filecmp.cmp(temp_filename, file):
                drop_file = True
                break
        if drop_file:
            os.unlink(temp_filename)
            publisher.debug('No new file on ' + url)
        else:
            os.rename(temp_filename, filename)
            publisher.info('New file on ' + url)
        __check_exit()
    config_db.delete(module + "|" + "fetching")
Пример #7
0
def launch():
    """
        Fetch all the whois entry assigned to the server of this :class:`Connector`
    """
    i = 0
    while True:
        try:
            entry = temp_db.spop(key_ris)
            if not entry:
                __disconnect()
                i = 0
                publisher.debug("Disconnected of " + server)
                time.sleep(sleep_timer)
                continue
            if cache_db.get(entry) is None:
                if not connected:
                    __connect()
                publisher.debug(server + ", query : " + str(entry))
                whois = fetch_whois(entry)
                if whois != '':
                    cache_db.setex(entry, server + '\n' + unicode(whois,  errors="replace"), cache_ttl)
                if not keepalive:
                    __disconnect()
            i += 1
            if i%10000 == 0:
                publisher.info(str(temp_db.scard(key_ris)) + ' to process on ' + server)
        except IOError as text:
            publisher.error("IOError on " + server + ': ' + str(text))
            time.sleep(sleep_timer)
            __disconnect()
Пример #8
0
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read('./packages/config.cfg')

    # REDIS #
    r_serv = redis.StrictRedis(
        host = cfg.get("Redis_Queues", "host"),
        port = cfg.getint("Redis_Queues", "port"),
        db = cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.channel = "Global"

    # ZMQ #
    PubGlob = ZMQ_PubSub.ZMQPub(configfile, "PubSub_Global", "global")

    # FONCTIONS #
    publisher.info("Starting to publish.")

    while True:
        filename = r_serv.lpop("filelist")

        if filename != None:

            msg = cfg.get("PubSub_Global", "channel")+" "+filename
            PubGlob.send_message(msg)
            publisher.debug("{0} Published".format(msg))
        else:
            time.sleep(10)
            publisher.debug("Nothing to publish")
def analyse(url, path):
    faup.decode(url)
    url_parsed = faup.get()

    resource_path = url_parsed['resource_path']
    query_string = url_parsed['query_string']

    result_path = 0
    result_query = 0

    if resource_path is not None:
        result_path = is_sql_injection(resource_path)

    if query_string is not None:
        result_query = is_sql_injection(query_string)

    if (result_path > 0) or (result_query > 0):
        paste = Paste.Paste(path)
        if (result_path > 1) or (result_query > 1):
            print "Detected SQL in URL: "
            print urllib2.unquote(url)
            to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path)
            publisher.warning(to_print)
            #Send to duplicate
            p.populate_set_out(path, 'Duplicate')
            #send to Browse_warning_paste
            p.populate_set_out('sqlinjection;{}'.format(path), 'BrowseWarningPaste')
        else:
            print "Potential SQL injection:"
            print urllib2.unquote(url)
            to_print = 'SQLInjection;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection")
            publisher.info(to_print)
Пример #10
0
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read(configfile)

    # REDIS #
    r_serv = redis.StrictRedis(
        host = cfg.get("Redis_default", "host"),
        port = cfg.getint("Redis_default", "port"),
        db = args.db)

    p_serv = r_serv.pipeline(False)

    # LOGGING #
    publisher.channel = "Script"

    # ZMQ #
    channel = cfg.get("PubSub_Longlines", "channel_0")
    Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Longlines", channel)

    # FUNCTIONS #
    publisher.info("Longlines ubscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_0")))

    while True:
        PST = P.Paste(Sub.get_message().split(" ", -1)[-1])
        r_serv.sadd("Longlines", PST.p_mime)
        PST.save_in_redis(r_serv, PST.p_mime)
Пример #11
0
def manager():
    """
        Manage (start/stop) the process (fetching/parsing) of the modules
    """
    modules = config_db.smembers('modules')
    modules_nr = len(modules)
    # Cleanup
    for module in modules:
        config_db.delete(module + '|parsing')
        config_db.delete(module + '|fetching')

    while True:
        for module in modules:
            parsing = config_db.get(module + "|" + "parsing")
            fetching = config_db.get(module + "|" + "fetching")
            if parsing is None:
                launch_parser(module)
            if fetching is None:
                launch_fetcher(module)

            parsing = config_db.get(module + "|" + "parsing")
            fetching = config_db.get(module + "|" + "fetching")
            if parsing == 0 and fetching == 0:
                config_db.srem('modules', module)

        modules = config_db.smembers('modules')
        if len(modules) != modules_nr:
            modules_nr = len(modules)
            publisher.info('These modules are running: ' + str(modules))
        else:
            time.sleep(sleep_timer)
Пример #12
0
def prepare_bview_file():
    publisher.info('Start converting binary bview file in plain text...')

    # create the plain text dump from the binary dump
    output = open(os.path.join(bview_dir, 'bview'), 'wr')
    nul_f = open(os.devnull, 'w')
    bgpdump = os.path.join(root_dir, path_to_bgpdump_bin)
    p_bgp = Popen([bgpdump, filename], stdout=PIPE, stderr=nul_f)
    for line in p_bgp.stdout:
        output.write(line)
    nul_f.close()
    output.close()
    publisher.info('Convertion finished, start splitting...')

    # Split the plain text file
    fs = FilesSplitter(output.name, number_of_splits)
    splitted_files = fs.fplit()
    publisher.info('Splitting finished.')

    # Flush the old routing database and launch the population of
    # the new database
    routing_db.flushdb()

    publisher.info('Start pushing all routes...')
    pushing_process_service = os.path.join(services_dir, "pushing_process")
    run_splitted_processing(split_procs, pushing_process_service,
                            splitted_files)
    publisher.info('All routes pushed.')

    # Remove the binary and the plain text files
    os.unlink(output.name)
    os.unlink(filename)
Пример #13
0
def analyse(url, path):
    faup.decode(url)
    url_parsed = faup.get()

    resource_path = url_parsed['resource_path']
    query_string = url_parsed['query_string']

    result_path = 0
    result_query = 0

    if resource_path is not None:
        result_path = is_sql_injection(resource_path)

    if query_string is not None:
        result_query = is_sql_injection(query_string)

    if (result_path > 0) or (result_query > 0):
        paste = Paste.Paste(path)
        if (result_path > 1) or (result_query > 1):
            print "Detected SQL in URL: "
            print urllib2.unquote(url)
            to_print = 'SQLInjection;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL")
            publisher.warning(to_print)
            #Send to duplicate
            p.populate_set_out(path, 'Duplicate')
            #send to Browse_warning_paste
            p.populate_set_out('sqlinjection;{}'.format(path), 'BrowseWarningPaste')
        else:
            print "Potential SQL injection:"
            print urllib2.unquote(url)
            to_print = 'SQLInjection;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection")
            publisher.info(to_print)
Пример #14
0
def create_tld_list(url = "https://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1"):
    """Recover a tld list from url.

    :param url: -- The url of the tld list.
    :return: -- list

    This function recover from mozilla.org the list of the effective tld names,
    Save it as a file, and return a list of all the tld.


    """
    domains = []
    htmlSource = urllib.urlopen(url).read()
    with open("ICCANdomain", 'wb') as F:
        F.write(htmlSource)

    with open("ICCANdomain", 'rb') as F:

        for num, line in enumerate(F):
            if re.match(r"^\/\/|\n", line) == None:
                domains.append(re.sub(r'\*', '', line[:-1]))
            else:
                publisher.info("Comment line ignored.")

    return domains
Пример #15
0
def prepare_bview_file():
    publisher.info('Start converting binary bview file in plain text...')

    # create the plain text dump from the binary dump
    output = open(os.path.join(bview_dir, 'bview'), 'wr')
    nul_f = open(os.devnull, 'w')
    bgpdump = os.path.join(root_dir, path_to_bgpdump_bin)
    p_bgp = Popen([bgpdump , filename], stdout=PIPE, stderr = nul_f)
    for line in p_bgp.stdout:
        output.write(line)
    nul_f.close()
    output.close()
    publisher.info('Convertion finished, start splitting...')

    # Split the plain text file
    fs = FilesSplitter(output.name, number_of_splits)
    splitted_files = fs.fplit()
    publisher.info('Splitting finished.')

    # Flush the old routing database and launch the population of
    # the new database
    routing_db.flushdb()

    publisher.info('Start pushing all routes...')
    pushing_process_service = os.path.join(services_dir, "pushing_process")
    run_splitted_processing(split_procs, pushing_process_service,
            splitted_files)
    publisher.info('All routes pushed.')

    # Remove the binary and the plain text files
    os.unlink(output.name)
    os.unlink(filename)
Пример #16
0
def refining_regex_dataset(r_serv, r_key, regex, min_match, year, month, luhn = True, dnscheck = True):
    """Refine the "raw dataset" of paste with regulars expressions

    :param r_serv: -- Redis connexion database
    :param r_key: -- (str) The name of the key read in redis (often the name of
        the keywords category list)
    :param min_match: -- (int) Below this number file are deleted
    :param regex: -- Regular expression which will be match.

    This function Refine database created with classify_token_paste function.
    It opening again the files which matchs the keywords category list, found
    regular expression inside it and count how many time is found.

    If there is not too much match about the regular expression the file is
    deleted from the list.

    Than it finally merge the result by day to be able to create a bar graph
    which will represent how many occurence by day the regex match.

    """
    for filename in r_serv.zrange(r_key, 0, -1):

        with gzip.open(filename, 'rb') as F:
            var = 0
            matchs = set([])

            for num, kword in enumerate(F):

                match = re.findall(regex, kword)
                var += len(match)

                for y in match:
                    if y != '' and len(y) < 100:
                        matchs.add(y)
            # If there is less match than min_match delete it (False pos)
            if len(matchs) <= min_match :
                r_serv.zrem(r_key, filename)
                publisher.debug("{0} deleted".format(filename))
            else:
            # else changing the score.
                if r_key == "creditcard_categ" and luhn:
                    for card_number in matchs:
                        if is_luhn_valid(card_number):

                            r_serv.zincrby(r_key+'_occur', filename, 1)

                            publisher.info("{1} is valid in the file {0}".format(filename, card_number))
                        else:
                            publisher.debug("{0} card is invalid".format(card_number))

                if r_key == "mails_categ" and dnscheck:
                    r_serv.zadd(r_key+'_occur', checking_MX_record(r_serv, matchs), filename)

                else:
                    # LUHN NOT TRIGGERED (Other Categs)
                    r_serv.zadd(r_key+'_occur',
                        len(matchs),
                        filename)

    create_graph_by_day_datastruct(r_serv, r_key, year, month)
Пример #17
0
def db_import(filename, day):
    with open(filename, 'r') as f:
        entry = ''
        pipeline = routing_db.pipeline()
        i = 0
        for line in f:
            # End of block, extracting the information
            if line == '\n':
                i += 1
                parsed = re.findall('(?:ASPATH|PREFIX): ([^\n{]*)', entry)
                try:
                    block = parsed[0].strip()
                    # RIPE-NCC-RIS BGP IPv6 Anchor Prefix @RRC00
                    # RIPE-NCC-RIS BGP Anchor Prefix @ rrc00 - RIPE NCC
                    if block in ['2001:7fb:ff00::/48', '84.205.80.0/24',
                            '2001:7fb:fe00::/48', '84.205.64.0/24']:
                        asn = 12654
                    else:
                        asn = int(parsed[1].split()[-1].strip())
                    pipeline.hset(block, day, asn)
                except:
                    #FIXME: check the cause of the exception
                    publisher.warning(entry)
                entry = ''
                if i%10000 == 0:
                    pipeline.execute()
                    pipeline = routing_db.pipeline()
            else :
                # append the line to the current block.
                entry += line
        pipeline.execute()
        publisher.info('{f} finished, {nb} entries impported.'.\
                format(f=filename, nb = i))
Пример #18
0
def redis_interbargraph_set(r_serv, year, month, overwrite):
    """Create a Redis sorted set.

    :param r_serv: -- connexion to redis database
    :param year: -- (integer) The year to process
    :param month: -- (integer) The month to process
    :param overwrite: -- (bool) trigger the overwrite mode

    This function create inside redis the intersection of all days in
    a month two by two.
    Example:
    For a month of 31days it will create 30 sorted set between day and
    day+1 until the last day.
    The overwrite mode delete the intersets and re-create them.

    """
    a = date(year, month, 01)
    b = date(year, month, cal.monthrange(year, month)[1])

    if overwrite:
        r_serv.delete("InterSet")

        for dt in rrule(DAILY, dtstart = a, until = b - timedelta(1)):
            dayafter = dt+timedelta(1)

            r_serv.delete(str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d")))

            r_serv.zinterstore(
                str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d")),
                {str(dt.strftime("%Y%m%d")):1,
                str(dayafter.strftime("%Y%m%d")):-1})

            r_serv.zadd(
                "InterSet",
                1,
                str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d")))
    else:
        for dt in rrule(DAILY, dtstart = a, until = b - timedelta(1)):
            dayafter = dt+timedelta(1)

            if r_serv.zcard(str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d"))) == 0:

                r_serv.zinterstore(
                    str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d")),
                    {str(dt.strftime("%Y%m%d")):1,
                    str(dayafter.strftime("%Y%m%d")):-1})

                r_serv.zadd(
                    "InterSet",
                    1,
                    str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d")))

                publisher.info(str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d"))+" Intersection Created")

            else:
                publisher.warning("Data already exist, operation aborted.")
Пример #19
0
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read(configfile)

    # REDIS #
    r_serv = redis.StrictRedis(
        host = cfg.get("Redis_Queues", "host"),
        port = cfg.getint("Redis_Queues", "port"),
        db = cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.channel = "Script"

    # ZMQ #
    channel = cfg.get("PubSub_Longlines", "channel_1")
    subscriber_name = "tokenize"
    subscriber_config_section = "PubSub_Longlines"

    #Publisher
    publisher_config_section = "PubSub_Words"
    publisher_name = "pubtokenize"

    Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
    Pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)

    channel_0 = cfg.get("PubSub_Words", "channel_0")

    # FUNCTIONS #
    publisher.info("Tokeniser subscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_1")))

    while True:
        message = Sub.get_msg_from_queue(r_serv)
        print message
        if message != None:
            PST = P.Paste(message.split(" ",-1)[-1])
        else:
            if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize"):
                r_serv.srem("SHUTDOWN_FLAGS", "Tokenize")
                print "Shutdown Flag Up: Terminating"
                publisher.warning("Shutdown Flag Up: Terminating.")
                break
            publisher.debug("Tokeniser is idling 10s")
            time.sleep(10)
            print "sleepin"
            continue

        for word, score in PST._get_top_words().items():
            if len(word) >= 4:
                msg = channel_0+' '+PST.p_path+' '+str(word)+' '+str(score)
                Pub.send_message(msg)
                print msg
            else:
                pass
Пример #20
0
def service_start_multiple(servicename, number, param=None):
    """
        Start multiple services using `service_start` and save their pids
    """
    i = 0
    publisher.info('Starting ' + str(number) + ' times ' + servicename)
    while i < number:
        proc = service_start(servicename, param)
        writepid(servicename, proc)
        i += 1
Пример #21
0
def service_start_multiple(servicename, number, param = None):
    """
        Start multiple services using `service_start` and save their pids
    """
    i = 0
    publisher.info('Starting ' + str(number) + ' times ' + servicename)
    while i < number:
        proc = service_start(servicename, param)
        writepid(servicename, proc)
        i += 1
Пример #22
0
def graph_categ_by_day(r_serv, filename, year, month, r_key):
    """Create a bargraph representing regex matching by day

    :param r_serv: -- Redis connexion database
    :param filename: -- (str) The absolute path where to save the figure.png
    :param r_key: -- (str) The name of the key read in redis (often the name of
        the keywords category list)
    :param year: -- (integer) The year to process
    :param month: -- (integer) The month to process

    This function display the amount of the category per day.

    """
    adate = []
    categ_num = []
    rcParams['figure.figsize'] = 15, 10

    a = date(year, month, 01)
    b = date(year, month, cal.monthrange(year, month)[1])

    for dt in rrule(DAILY, dtstart = a, until = b):
        adate.append(dt.strftime("%d"))
        categ_num.append(r_serv.zscore(r_key+'_by_day',dt.strftime("%Y%m%d")))

    n_groups = len(categ_num)
    adress_scores = tuple(categ_num)

    index = np.arange(n_groups)
    bar_width = 0.5
    opacity = 0.6

    ladress = plt.bar(index, adress_scores, bar_width,
                 alpha = opacity,
                 color = 'b',
                 label = r_key)


    plt.plot(tuple(categ_num), 'r--')
    #plt.yscale('log')
    plt.xlabel('Days')
    plt.ylabel('Amount')
    plt.title('Occurence of '+r_key+' by day')
    plt.xticks(index + bar_width/2 , tuple(adate))

    plt.legend()
    plt.grid()

    plt.tight_layout()

    plt.savefig(filename+".png", dpi=None, facecolor='w', edgecolor='b',
        orientation='portrait', papertype=None, format="png",
        transparent=False, bbox_inches=None, pad_inches=0.1,
        frameon=True)

    publisher.info(filename+".png"+" saved!")
def analyse(url, path):
    faup.decode(url)
    url_parsed = faup.get()

    resource_path = url_parsed['resource_path']
    query_string = url_parsed['query_string']

    result_path = 0
    result_query = 0

    if resource_path is not None:
        ## TODO: # FIXME: remove me
        try:
            resource_path = resource_path.decode()
        except:
            pass
        result_path = is_sql_injection(resource_path)

    if query_string is not None:
        ## TODO: # FIXME: remove me
        try:
            query_string = query_string.decode()
        except:
            pass
        result_query = is_sql_injection(query_string)

    if (result_path > 0) or (result_query > 0):
        paste = Paste.Paste(path)
        if (result_path > 1) or (result_query > 1):
            print("Detected SQL in URL: ")
            print(urllib.request.unquote(url))
            to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_rel_path)
            publisher.warning(to_print)
            #Send to duplicate
            p.populate_set_out(path, 'Duplicate')

            msg = 'infoleak:automatic-detection="sql-injection";{}'.format(path)
            p.populate_set_out(msg, 'Tags')

            #statistics
            tld = url_parsed['tld']
            if tld is not None:
                ## TODO: # FIXME: remove me
                try:
                    tld = tld.decode()
                except:
                    pass
                date = datetime.datetime.now().strftime("%Y%m")
                server_statistics.hincrby('SQLInjection_by_tld:'+date, tld, 1)

        else:
            print("Potential SQL injection:")
            print(urllib.request.unquote(url))
            to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_rel_path)
            publisher.info(to_print)
Пример #24
0
def __query_logging(ip, user_agent, method, q_ip=None, announce_date=None,
                    days_limit=None, level=None):
    if level == 'warning':
        publisher.warning(__csv2string([ip, user_agent, method, q_ip,
                                        announce_date, days_limit, level]))
    elif level == 'error':
        publisher.error(__csv2string([ip, user_agent, method, q_ip,
                                      announce_date, days_limit, level]))
    else:
        publisher.info(__csv2string([ip, user_agent, method, q_ip,
                                     announce_date, days_limit, level]))
Пример #25
0
def __query_logging(ip, user_agent, method, q_ip=None, announce_date=None,
                    days_limit=None, level=None):
    if level == 'warning':
        publisher.warning(__csv2string([ip, user_agent, method, q_ip,
                                        announce_date, days_limit, level]))
    elif level == 'error':
        publisher.error(__csv2string([ip, user_agent, method, q_ip,
                                      announce_date, days_limit, level]))
    else:
        publisher.info(__csv2string([ip, user_agent, method, q_ip,
                                     announce_date, days_limit, level]))
Пример #26
0
def main():
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = 'DomClassifier'

    p = Process(config_section)
    addr_dns = p.config.get("DomClassifier", "dns")

    publisher.info("""ZMQ DomainClassifier is Running""")

    c = DomainClassifier.domainclassifier.Extract(rawtext="",
                                                  nameservers=[addr_dns])

    cc = p.config.get("DomClassifier", "cc")
    cc_tld = p.config.get("DomClassifier", "cc_tld")

    while True:
        try:
            message = p.get_from_set()

            if message is not None:
                PST = Paste.Paste(message)
            else:
                publisher.debug("Script DomClassifier is idling 1s")
                time.sleep(1)
                continue
            paste = PST.get_p_content()
            mimetype = PST._get_p_encoding()

            if mimetype == "text/plain":
                c.text(rawtext=paste)
                c.potentialdomain()
                c.validdomain(rtype=['A'], extended=True)
                localizeddomains = c.include(expression=cc_tld)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        'DomainC;{};{};{};Checked {} located in {};{}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            localizeddomains, cc_tld, PST.p_path))
                localizeddomains = c.localizedomain(cc=cc)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        'DomainC;{};{};{};Checked {} located in {};{}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            localizeddomains, cc, PST.p_path))
        except IOError:
            print("CRC Checksum Failed on :", PST.p_path)
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
                PST.p_source, PST.p_date, PST.p_name))
Пример #27
0
def prepare_bview_file(filename):
    publisher.info('Start converting binary bview file in plain text...')
    # create the plain text dump from the binary dump
    with open(path_output_bviewfile, 'w') as output:
        nul_f = open(os.devnull, 'w')
        p_bgp = Popen([bgpdump, filename], stdout=PIPE, stderr=nul_f)
        for line in p_bgp.stdout:
            output.write(line)
        nul_f.close()
    publisher.info('Convertion finished, start splitting...')

    # Split the plain text file
    return file_splitter.fsplit(path_output_bviewfile)
Пример #28
0
 def test_publisher(self):
     for i in range(0, 21):
         if i % 2 == 0:
             publisher.info('test' + str(i))
         elif i % 3 == 0:
             publisher.warning('test' + str(i))
         elif i % 5 == 0:
             publisher.error('test' + str(i))
         elif i % 7 == 0:
             publisher.critical('test' + str(i))
         else:
             publisher.debug('test' + str(i))
         time.sleep(1)
Пример #29
0
def prepare_bview_file(filename):
    publisher.info('Start converting binary bview file in plain text...')
    # create the plain text dump from the binary dump
    with open(path_output_bviewfile, 'w') as output:
        nul_f = open(os.devnull, 'w')
        p_bgp = Popen([bgpdump, filename], stdout=PIPE, stderr=nul_f)
        for line in p_bgp.stdout:
            output.write(line)
        nul_f.close()
    publisher.info('Convertion finished, start splitting...')

    # Split the plain text file
    return file_splitter.fsplit(path_output_bviewfile)
Пример #30
0
 def test_publisher(self):
     for i in range(0, 21):
         if i % 2 == 0:
             publisher.info('test' + str(i))
         elif i % 3 == 0:
             publisher.warning('test' + str(i))
         elif i % 5 == 0:
             publisher.error('test' + str(i))
         elif i % 7 == 0:
             publisher.critical('test' + str(i))
         else:
             publisher.debug('test' + str(i))
         time.sleep(1)
Пример #31
0
def add_asn_entry(asn, owner, ips_block):
    """
        Add a new subnet to the ASNs known by the system,
        only if the subnet is not already present. Elsewhere, simply return
        the value from the database.
    """
    key = None
    asn_timestamps = sorted(global_db.smembers(asn), reverse=True)
    key_list = [ "{asn}{sep}{timestamp}{sep}{ips_block}".format(\
                    asn = asn, timestamp = asn_timestamp,
                    sep = separator, ips_block = key_ips_block)
                 for asn_timestamp in asn_timestamps ]
    known_asn_ips_blocks = []
    if len(key_list) != 0:
        known_asn_ips_blocks = global_db.mget(key_list)
    i = 0
    for block in known_asn_ips_blocks:
        if block == ips_block:
            asn, timestamp, b = key_list[i].split(separator)
            temp_key = "{asn}{sep}{timestamp}".format(asn=asn,
                    sep = separator, timestamp=timestamp)
            if global_db.get("{key}{sep}{owner}".format(key = temp_key,
                sep = separator, owner = key_owner)) == owner:
                key = temp_key
                break
        i +=1
    if key is None:
        lock = global_db.getset('locked_new_ans', 1)
        if lock == 1 :
            # ensure the same new entry is not inserted twice
            return None
        timestamp = datetime.datetime.utcnow().isoformat()
        key = "{asn}{sep}{timestamp}".format(asn=asn, sep = separator,
                timestamp=timestamp)
        to_set = {\
                    "{key}{sep}{owner}".format(\
                                key = key, sep = separator,
                                owner = key_owner) : owner,
                    "{key}{sep}{ips_block}".format(\
                                key = key, sep = separator,
                                ips_block = key_ips_block): ips_block
                 }
        pipeline = global_db.pipeline(False)
        pipeline.sadd(asn, timestamp)
        pipeline.mset(to_set)
        pipeline.set('locked_new_ans', 0)
        pipeline.execute()
        publisher.info('New asn entry inserted in the database: {asn}, {owner}, {ipblock}'\
                .format(asn = asn, owner = owner, ipblock = ips_block))
    return key
Пример #32
0
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read('./packages/config.cfg')

    # SCRIPT PARSER #
    parser = argparse.ArgumentParser(
        description=
        '''This script is a part of the Assisted Information Leak framework.''',
        epilog='''''')

    parser.add_argument('-db',
                        type=int,
                        default=0,
                        help='The name of the Redis DB (default 0)',
                        choices=[0, 1, 2, 3, 4],
                        action='store')

    # REDIS #
    r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"),
                               port=cfg.getint("Redis_Queues", "port"),
                               db=cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.port = 6380
    publisher.channel = "Queuing"

    # ZMQ #
    channel = cfg.get("PubSub_Global", "channel")

    # FUNCTIONS #
    publisher.info("""Suscribed to channel {0}""".format(channel))

    while True:
        table = texttable.Texttable()
        table.header(["Queue name", "#Items"])
        row = []
        for queue in r_serv.smembers("queues"):
            current = r_serv.llen(queue)
            current = current - r_serv.llen(queue)
            row.append((queue, r_serv.llen(queue)))

        time.sleep(0.5)
        row.sort()
        table.add_rows(row, header=False)
        os.system('clear')
        print table.draw()
Пример #33
0
def launch_parser(module):
    """
        Launch a parser on a dataset for a module
    """
    service_parser = os.path.join(services_dir, "parse_raw_files.py")
    if module is None:
        publisher.error('Unable to start parsing : module is None')
        return
    directory = config_db.get(module + "|" + "home_dir")
    if directory is not None:
        subprocess.Popen(["python", service_parser, '-n', module, '-d', directory])
        config_db.set(module + "|" + "parsing", 1)
        publisher.info('Parsing of ' + module + 'started.')
    else:
        publisher.error('Unable to start parsing of ' + module + ': home_dir unknown.')
        config_db.set(module + "|" + "parsing", 0)
Пример #34
0
def main():
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = 'DomClassifier'

    p = Process(config_section)
    addr_dns = p.config.get("DomClassifier", "dns")

    publisher.info("""ZMQ DomainClassifier is Running""")

    c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])

    cc = p.config.get("DomClassifier", "cc")
    cc_tld = p.config.get("DomClassifier", "cc_tld")

    while True:
        try:
            message = p.get_from_set()

            if message is not None:
                PST = Paste.Paste(message)
            else:
                publisher.debug("Script DomClassifier is idling 1s")
                time.sleep(1)
                continue
            paste = PST.get_p_content()
            mimetype = PST._get_p_encoding()

            if mimetype == "text/plain":
                c.text(rawtext=paste)
                c.potentialdomain()
                c.validdomain(rtype=['A'], extended=True)
                localizeddomains = c.include(expression=cc_tld)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
                        PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path))
                localizeddomains = c.localizedomain(cc=cc)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
                        PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
        except IOError:
            print("CRC Checksum Failed on :", PST.p_path)
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
                PST.p_source, PST.p_date, PST.p_name))
Пример #35
0
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read('./packages/config.cfg')

    # SCRIPT PARSER #
    parser = argparse.ArgumentParser(
        description='''This script is a part of the Assisted Information Leak framework.''',
        epilog='''''')

    parser.add_argument('-db', type=int, default=0,
                        help='The name of the Redis DB (default 0)',
                        choices=[0, 1, 2, 3, 4], action='store')

    # REDIS #
    r_serv = redis.StrictRedis(
        host=cfg.get("Redis_Queues", "host"),
        port=cfg.getint("Redis_Queues", "port"),
        db=cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.port = 6380
    publisher.channel = "Queuing"

    # ZMQ #
    channel = cfg.get("PubSub_Global", "channel")

    # FUNCTIONS #
    publisher.info("""Suscribed to channel {0}""".format(channel))

    while True:
        table = texttable.Texttable()
        table.header(["Queue name", "#Items"])
        row = []
        for queue in r_serv.smembers("queues"):
            current = r_serv.llen(queue)
            current = current - r_serv.llen(queue)
            row.append((queue, r_serv.llen(queue)))

        time.sleep(0.5)
        row.sort()
        table.add_rows(row, header=False)
        os.system('clear')
        print table.draw()
Пример #36
0
def stop_services(signum, frame):
    """
        Tell the modules to stop.
    """
    config = ConfigParser.RawConfigParser()
    config_file = "/etc/bgpranking/bgpranking.conf"
    config.read(config_file)
    config_db = redis.Redis(port = int(config.get('redis','port_master')),\
                              db = config.get('redis','config'))
    modules = config_db.smembers('modules')
    # Cleanup
    for module in modules:
        config_db.delete(module + '|parsing')
        config_db.delete(module + '|fetching')
    config_db.delete('modules', modules)
    publisher.info('The services will be stopped ASAP')
    exit(0)
Пример #37
0
def parse(directory):
    old_dir = os.path.join(directory, 'old')
    to_import = glob.glob(os.path.join(directory, '*'))
    to_import.sort()
    for f_name in to_import:
        if os.path.isdir(f_name):
            continue
        try:
            update = None
            f = open(f_name).read()
            data = re.findall('as=AS(.*)&.*</a> (.*)\n', f)
            update_raw = re.sub('[\n()]', '',
                                re.findall('File last modified at (.*)</I>', f, re.S)[0])
            update = dateutil.parser.parse(update_raw).isoformat()
            yield update, data
            os.rename(f_name, os.path.join(old_dir, update))
        except:
            publisher.info('Invalid file. Update:' + update)
Пример #38
0
def launch_parser(module):
    """
        Launch a parser on a dataset for a module
    """
    service_parser = os.path.join(services_dir, "parse_raw_files.py")
    timer = '60'
    if module is None:
        publisher.error('Unable to start parsing : module is None')
        return
    directory = config_db.get(module + "|" + "home_dir")
    if directory is not None:
        subprocess.Popen(["python", service_parser, '-n', module,
            '-d', directory, '-t', timer])
        config_db.set(module + "|" + "parsing", 1)
        publisher.info('Parsing of ' + module + 'started.')
    else:
        publisher.error('Unable to start parsing of ' + module + \
                ': home_dir unknown.')
        config_db.set(module + "|" + "parsing", 0)
Пример #39
0
def parse(directory):
    old_dir = os.path.join(directory, 'old')
    to_import = glob.glob(os.path.join(directory, '*'))
    to_import.sort()
    for f_name in to_import:
        if os.path.isdir(f_name):
            continue
        try:
            update = None
            f = open(f_name).read()
            data = re.findall('as=AS(.*)&.*</a> (.*)\n', f)
            update_raw = re.sub(
                '[\n()]', '',
                re.findall('File last modified at (.*)</I>', f, re.S)[0])
            update = dateutil.parser.parse(update_raw).isoformat()
            yield update, data
            os.rename(f_name, os.path.join(old_dir, update))
        except:
            publisher.info('Invalid file. Update:' + update)
Пример #40
0
def fetch(url, directory):
    temp_dir = os.path.join(directory, 'temp')
    old_dir = os.path.join(directory, 'old')

    filename = os.path.join(temp_dir, 'autnums.html')
    urlretrieve('http://www.cidr-report.org/as2.0/autnums.html', filename)
    f = open(filename).read()
    update_raw = re.sub('[\n()]', '',
                        re.findall('File last modified at (.*)</I>', f, re.S)[0])
    update = dateutil.parser.parse(update_raw).isoformat()

    newfile = os.path.join(directory, update)
    oldfile = os.path.join(old_dir, update)
    if os.path.exists(newfile) or os.path.exists(oldfile):
        os.remove(filename)
        return False
    else:
        os.rename(filename, newfile)
        publisher.info('File updated at ' + update)
        return True
Пример #41
0
def launch_fetcher(module):
    """
        Launch a process which fetch a dataset in a directory
    """
    service_fetcher = os.path.join(services_dir, "fetch_raw_files.py")
    if module is None:
        publisher.error('Unable to start fetching : module is None')
        return
    url = config_db.get(module + "|" + "url")
    if url is None:
        publisher.info(module + ' does not have an URL, no fetcher.')
        config_db.set(module + "|" + "fetching", 0)
        return
    directory = config_db.get(module + "|" + "home_dir")
    if directory is not None:
        subprocess.Popen(["python", service_fetcher, '-n', module, '-d', directory, '-u', url])
        config_db.set(module + "|" + "fetching", 1)
        publisher.info('Fetching of ' + module + 'started.')
    else:
        publisher.error('Unable to start fetching of ' + module + ': home_dir unknown.')
        config_db.set(module + "|" + "fetching", 0)
Пример #42
0
def regex_search(module_name, redis_key, regex, item_id, item_content, max_time=30):
    proc = Proc(target=_regex_search, args=(redis_key, regex, item_content, ))
    try:
        proc.start()
        proc.join(max_time)
        if proc.is_alive():
            proc.terminate()
            Statistics.incr_module_timeout_statistic(module_name)
            err_mess = "{}: processing timeout: {}".format(module_name, item_id)
            print(err_mess)
            publisher.info(err_mess)
            return None
        else:
            first_occ = r_serv_cache.get(redis_key)
            r_serv_cache.delete(redis_key)
            proc.terminate()
            return first_occ
    except KeyboardInterrupt:
        print("Caught KeyboardInterrupt, terminating workers")
        proc.terminate()
        sys.exit(0)
Пример #43
0
def fetch(url, directory):
    temp_dir = os.path.join(directory, 'temp')
    old_dir = os.path.join(directory, 'old')

    filename = os.path.join(temp_dir, 'autnums.html')
    urlretrieve('http://www.cidr-report.org/as2.0/autnums.html', filename)
    f = open(filename).read()
    update_raw = re.sub(
        '[\n()]', '',
        re.findall('File last modified at (.*)</I>', f, re.S)[0])
    update = dateutil.parser.parse(update_raw).isoformat()

    newfile = os.path.join(directory, update)
    oldfile = os.path.join(old_dir, update)
    if os.path.exists(newfile) or os.path.exists(oldfile):
        os.remove(filename)
        return False
    else:
        os.rename(filename, newfile)
        publisher.info('File updated at ' + update)
        return True
Пример #44
0
def create_dirfile(r_serv, directory, overwrite):
    """Create a file of path.

    :param r_serv: -- connexion to redis database
    :param directory: -- The folder where to launch the listing of the .gz alerts

    This function create a list in redis with inside the absolute path
    of all the pastes needed to be proceeded by function using parallel
    (like redis_words_ranking)

    """
    if overwrite:
        r_serv.delete("filelist")

        for x in listdirectory(directory):
            r_serv.lpush("filelist", x)

        publisher.info("The list was overwritten")

    else:
        if r_serv.llen("filelist") == 0:

            for x in listdirectory(directory):
                r_serv.lpush("filelist", x)

            publisher.info("New list created")
        else:

            for x in listdirectory(directory):
                r_serv.lpush("filelist", x)

            publisher.info("The list was updated with new elements")
Пример #45
0
def search_phone(message):
    paste = Paste.Paste(message)
    content = paste.get_p_content()
    # regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
    reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
    # list of the regex results in the Paste, may be null
    results = reg_phone.findall(content)

    # if the list is greater than 4, we consider the Paste may contain a list of phone numbers
    if len(results) > 4 :
        print results
        publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))

	if __name__ == '__main__':
    # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
    # Port of the redis instance used by pubsublogger
    publisher.port = 6380
    # Script is the default channel used for the modules.
    publisher.channel = 'Script'

    # Section name in bin/packages/modules.cfg
    config_section = 'Phone'

    # Setup the I/O queues
    p = Process(config_section)

    # Sent to the logging a description of the module
    publisher.info("Run Phone module")

    # Endless loop getting messages from the input queue
    while True:
        # Get one message from the input queue
        message = p.get_from_set()
        if message is None:
            publisher.debug("{} queue is empty, waiting".format(config_section))
            time.sleep(1)
            continue

        # Do something with the message from the queue
        search_phone(message)
Пример #46
0
def create_dirfile(r_serv, directory, overwrite):
    """Create a file of path.

    :param r_serv: -- connexion to redis database
    :param directory: -- The folder where to launch the listing of the .gz files

    This function create a list in redis with inside the absolute path
    of all the pastes needed to be proceeded by function using parallel
    (like redis_words_ranking)

    """
    if overwrite:
        r_serv.delete("filelist")

        for x in listdirectory(directory):
            r_serv.lpush("filelist", x)

        publisher.info("The list was overwritten")

    else:
        if r_serv.llen("filelist") == 0:

            for x in listdirectory(directory):
                r_serv.lpush("filelist", x)

            publisher.info("New list created")
        else:

            for x in listdirectory(directory):
                r_serv.lpush("filelist", x)

            publisher.info("The list was updated with new elements")
def analyse(url, path):
    faup.decode(url)
    url_parsed = faup.get()

    resource_path = url_parsed['resource_path']
    query_string = url_parsed['query_string']

    result_path = 0
    result_query = 0

    if resource_path is not None:
        result_path = is_sql_injection(resource_path.decode('utf8'))

    if query_string is not None:
        result_query = is_sql_injection(query_string.decode('utf8'))

    if (result_path > 0) or (result_query > 0):
        paste = Paste.Paste(path)
        if (result_path > 1) or (result_query > 1):
            print("Detected SQL in URL: ")
            print(urllib.request.unquote(url))
            to_print = 'SQLInjection;{};{};{};{};{}'.format(
                paste.p_source, paste.p_date, paste.p_name,
                "Detected SQL in URL", paste.p_path)
            publisher.warning(to_print)
            #Send to duplicate
            p.populate_set_out(path, 'Duplicate')
            #send to Browse_warning_paste
            p.populate_set_out('sqlinjection;{}'.format(path), 'alertHandler')

            msg = 'infoleak:automatic-detection="sql-injection";{}'.format(
                path)
            p.populate_set_out(msg, 'Tags')
        else:
            print("Potential SQL injection:")
            print(urllib.request.unquote(url))
            to_print = 'SQLInjection;{};{};{};{};{}'.format(
                paste.p_source, paste.p_date, paste.p_name,
                "Potential SQL injection", paste.p_path)
            publisher.info(to_print)
Пример #48
0
def add_asn_entry(asn, owner, ips_block):
    """
        Add a new subnet to the ASNs known by the system,
        only if the subnet is not already present. Elsewhere, simply return
        the value from the database.
    """
    key = '{asn}|{block}'.format(asn=asn, block=ips_block)
    owners = global_db.hvals(key)
    if owner not in owners:
        lock = global_db.getset('locked_new_ans', 1)
        if lock == 1:
            # ensure the same new entry is not inserted twice
            return None
        timestamp = datetime.datetime.utcnow().isoformat()
        p = global_db.pipeline(False)
        p.hset(key, timestamp, owner)
        p.sadd(asn, ips_block)
        p.set('locked_new_ans', 0)
        p.execute()
        publisher.info('New asn entry inserted in the database: {asn}, {owner}, {ipblock}'\
                .format(asn = asn, owner = owner, ipblock = ips_block))
    return key
Пример #49
0
def db_import(filename, day):
    routing_db = get_redis_connector()
    with open(filename, 'r') as f:
        entry = ''
        pipeline = routing_db.pipeline()
        i = 0
        for line in f:
            # End of block, extracting the information
            if line == '\n':
                i += 1
                parsed = re.findall('(?:ASPATH|PREFIX): ([^\n{]*)', entry)
                try:
                    block = parsed[0].strip()
                    # RIPE-NCC-RIS BGP IPv6 Anchor Prefix @RRC00
                    # RIPE-NCC-RIS BGP Anchor Prefix @ rrc00 - RIPE NCC
                    if block in [
                            '2001:7fb:ff00::/48', '84.205.80.0/24',
                            '2001:7fb:fe00::/48', '84.205.64.0/24'
                    ]:
                        asn = 12654
                    else:
                        asn = int(parsed[1].split()[-1].strip())
                    pipeline.hset(block, day, asn)
                except:
                    # FIXME: check the cause of the exception
                    publisher.warning(entry)
                entry = ''
                if i % 10000 == 0:
                    pipeline.execute()
                    pipeline = routing_db.pipeline()
            else:
                # append the line to the current block.
                entry += line
        pipeline.execute()
        publisher.info('{f} finished, {nb} entries impported.'.format(
            f=filename, nb=i))
def launch():
    """
        Fetch all the whois entry assigned to the server of this :class:`Connector`
    """
    i = 0
    while True:
        try:
            entry = temp_db.spop(key_ris)
            if not entry:
                __disconnect()
                i = 0
                publisher.debug("Disconnected of " + server)
                time.sleep(sleep_timer)
                continue
            if cache_db.get(entry) is None:
                if not connected:
                    __connect()
                publisher.debug(server + ", query : " + str(entry))
                whois = fetch_whois(entry)
                if whois != '':
                    cache_db.setex(
                        entry,
                        server + '\n' + unicode(whois, errors="replace"),
                        cache_ttl)
                if not keepalive:
                    __disconnect()
            i += 1
            if i % 10000 == 0:
                publisher.info(
                    str(temp_db.scard(key_ris)) + ' to process on ' + server)
        except IOError as text:
            publisher.error("IOError on " + server + ': ' + str(text))
            publisher.info(
                str(temp_db.scard(key_ris)) + ' to process on ' + server)
            time.sleep(sleep_timer)
            __disconnect()
        except Exception as e:
            publisher.error("Error on " + server + ': ' + str(e))
            publisher.info(
                str(temp_db.scard(key_ris)) + ' to process on ' + server)
            time.sleep(sleep_timer)
            __disconnect()
Пример #51
0
from packages import lib_refine
from pubsublogger import publisher
import re

from Helper import Process

if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"

    config_section = 'CreditCards'

    p = Process(config_section)

    # FUNCTIONS #
    publisher.info("Creditcard script subscribed to channel creditcard_categ")

    creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?"

    # FIXME For retro compatibility
    channel = 'creditcard_categ'

    # Source: http://www.richardsramblings.com/regex/credit-card-numbers/
    cards = [
        r'\b4\d{3}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}\b',  # 16-digit VISA, with separators
        r'\b5[1-5]\d{2}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}\b',  # 16 digits MasterCard
        r'\b6(?:011|22(?:(?=[\ \-]?(?:2[6-9]|[3-9]))|[2-8]|9(?=[\ \-]?(?:[01]|2[0-5])))|4[4-9]\d|5\d\d)(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}\b',  # Discover Card
        r'\b35(?:2[89]|[3-8]\d)(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}\b',  # Japan Credit Bureau (JCB)
        r'\b3[47]\d\d(?:[\ \-]?)\d{6}(?:[\ \-]?)\d{5}\b',  # American Express
        r'\b(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}\b',  # Maestro
    ]
Пример #52
0
if __name__ == '__main__':
    # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
    # Port of the redis instance used by pubsublogger
    publisher.port = 6380
    # Script is the default channel used for the modules.
    publisher.channel = 'Script'

    # Section name in bin/packages/modules.cfg
    config_section = 'Keys'

    # Setup the I/O queues
    p = Process(config_section)

    # Sent to the logging a description of the module
    publisher.info("Run Keys module ")

    # Endless loop getting messages from the input queue
    while True:
        # Get one message from the input queue
        message = p.get_from_set()
        if message is None:
            publisher.debug("{} queue is empty, waiting".format(config_section))
            time.sleep(1)
            continue

        # Do something with the message from the queue
        paste = Paste.Paste(message)
        search_key(paste)

        # (Optional) Send that thing to the next queue
Пример #53
0
REGEX_CRED = "[a-z]+|[A-Z]{3,}|[A-Z]{1,2}[a-z]+|[0-9]+"
REDIS_KEY_NUM_USERNAME = '******'
REDIS_KEY_NUM_PATH = 'uniqNumForUsername'
REDIS_KEY_ALL_CRED_SET = 'AllCredentials'
REDIS_KEY_ALL_CRED_SET_REV = 'AllCredentialsRev'
REDIS_KEY_ALL_PATH_SET = 'AllPath'
REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev'
REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping'

if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"
    config_section = "Credential"
    module_name = "Credential"
    p = Process(config_section)
    publisher.info("Find credentials")

    faup = Faup()

    regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
    #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
    regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
    regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"

    redis_cache_key = regex_helper.generate_redis_cache_key(module_name)

    while True:
        message = p.get_from_set()

        if message is None:
            publisher.debug("Script Credential is Idling 10s")
Пример #54
0
    # SCRIPT PARSER #
    parser = argparse.ArgumentParser(
        description='Start Categ module on files.')

    parser.add_argument(
        '-d',
        type=str,
        default="../files/",
        help='Path to the directory containing the category files.',
        action='store')

    args = parser.parse_args()

    # FUNCTIONS #
    publisher.info("Script Categ started")

    categories = [
        'CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve', 'ApiKey'
    ]
    tmp_dict = {}
    for filename in categories:
        bname = os.path.basename(filename)
        tmp_dict[bname] = []
        with open(os.path.join(args.d, filename), 'r') as f:
            patterns = [r'%s' % (re.escape(s.strip())) for s in f]
            tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)

    prec_filename = None

    while True:
Пример #55
0
if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"

    torclient_host = '127.0.0.1'
    torclient_port = 9050

    config_section = 'Onion'

    p = Process(config_section)
    r_cache = redis.StrictRedis(host=p.config.get("Redis_Cache", "host"),
                                port=p.config.getint("Redis_Cache", "port"),
                                db=p.config.getint("Redis_Cache", "db"))

    # FUNCTIONS #
    publisher.info("Script subscribed to channel onion_categ")

    # FIXME For retro compatibility
    channel = 'onion_categ'

    # Getting the first message from redis.
    message = p.get_from_set()
    prec_filename = None

    # Thanks to Faup project for this regex
    # https://github.com/stricaud/faup
    url_regex = "((http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"

    while True:
        if message is not None:
            print message
Пример #56
0
def main():
    """Main Function"""

    # CONFIG #
    cfg = ConfigParser.ConfigParser()
    cfg.read(configfile)

    # Redis
    r_serv1 = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"),
                                port=cfg.getint("Redis_Queues", "port"),
                                db=cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.channel = "Script"

    # ZMQ #
    # Subscriber
    channel = cfg.get("PubSub_Global", "channel")
    subscriber_name = "DomainClassifier"
    subscriber_config_section = "PubSub_Global"

    cc = cfg.get("PubSub_DomainClassifier", "cc")
    cc_tld = cfg.get("PubSub_DomainClassifier", "cc_tld")

    sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel,
                            subscriber_name)

    # FUNCTIONS #
    publisher.info("""ZMQ DomainClassifier is Running""")
    c = DomainClassifier.domainclassifier.Extract(rawtext="")

    while True:
        try:
            message = sub.get_msg_from_queue(r_serv1)

            if message is not None:
                PST = Paste.Paste(message.split(" ", -1)[-1])
            else:
                if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"):
                    r_serv1.srem("SHUTDOWN_FLAGS", "Indexer")
                    publisher.warning("Shutdown Flag Up: Terminating.")
                    break
                publisher.debug("Script DomainClassifier is idling 10s")
                time.sleep(1)
                continue
            docpath = message.split(" ", -1)[-1]
            paste = PST.get_p_content()
            mimetype = PST._get_p_encoding()
            if mimetype == "text/plain":
                c.text(rawtext=paste)
                c.potentialdomain()
                c.validdomain(rtype=['A'], extended=True)
                localizeddomains = c.include(expression=cc_tld)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        'DomainC;{};{};{};Checked {} located in {}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            localizeddomains, cc_tld))
                localizeddomains = c.localizedomain(cc=cc)
                if localizeddomains:
                    print(localizeddomains)
                    publisher.warning(
                        'DomainC;{};{};{};Checked {} located in {}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            localizeddomains, cc))
        except IOError:
            print "CRC Checksum Failed on :", PST.p_path
            publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
                PST.p_source, PST.p_date, PST.p_name))
            pass