Exemplo n.º 1
0
class ConfigReader():
    def __init__(self, file):
        self.configLoader = ConfigLoader()
        self.configLoader.read(file)

    def read(self, section, item, default):
        return self.configLoader.get(section, item, default)
Exemplo n.º 2
0
    def __init__(self):
        super(LibInjection, self).__init__()

        self.faup = Faup()

        config_loader = ConfigLoader()
        self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")

        self.redis_logger.info(f"Module: {self.module_name} Launched")
Exemplo n.º 3
0
    def __init__(self):
        super(Onion, self).__init__()

        config_loader = ConfigLoader()
        self.r_cache = config_loader.get_redis_conn("Redis_Cache")
        self.r_onion = config_loader.get_redis_conn("ARDB_Onion")

        self.pending_seconds = config_loader.get_config_int(
            "Onion", "max_execution_time")
        # regex timeout
        self.regex_timeout = 30

        self.faup = crawlers.get_faup()
        self.redis_cache_key = regex_helper.generate_redis_cache_key(
            self.module_name)

        # activate_crawler = p.config.get("Crawler", "activate_crawler")

        self.url_regex = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
        self.i2p_regex = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
        re.compile(self.url_regex)
        re.compile(self.i2p_regex)

        self.redis_logger.info(f"Module: {self.module_name} Launched")

        # TEMP var: SAVE I2P Domain (future I2P crawler)
        self.save_i2p = config_loader.get_config_boolean("Onion", "save_i2p")
Exemplo n.º 4
0
    def __init__(self):
        super(Global, self).__init__()

        self.r_stats = ConfigLoader().get_redis_conn("ARDB_Statistics")

        self.processed_item = 0
        self.time_last_stats = time.time()

        # Get and sanityze ITEM DIRECTORY
        # # TODO: rename PASTE => ITEM
        self.PASTES_FOLDER = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "pastes"))
        self.PASTES_FOLDERS = self.PASTES_FOLDER + '/'
        self.PASTES_FOLDERS = os.path.join(
            os.path.realpath(self.PASTES_FOLDERS), '')

        # Waiting time in secondes between to message proccessed
        self.pending_seconds = 0.5

        # Send module state to logs
        self.redis_logger.info(f"Module {self.module_name} initialized")
Exemplo n.º 5
0
# Modules Classes
from modules.ApiKey import ApiKey
from modules.Categ import Categ
from modules.CreditCards import CreditCards
from modules.DomClassifier import DomClassifier
from modules.Global import Global
from modules.Keys import Keys
from modules.Onion import Onion

# project packages
from lib.ConfigLoader import ConfigLoader
import lib.crawlers as crawlers
import packages.Item as Item

#### COPY SAMPLES ####
config_loader = ConfigLoader()
# # TODO:move me in new Item package
ITEMS_FOLDER = os.path.join(
    os.environ['AIL_HOME'],
    config_loader.get_config_str("Directories", "pastes")) + '/'
ITEMS_FOLDER = os.path.join(os.path.realpath(ITEMS_FOLDER), '')
TESTS_ITEMS_FOLDER = os.path.join(ITEMS_FOLDER, 'tests')
sample_dir = os.path.join(os.environ['AIL_HOME'], 'samples')
copy_tree(sample_dir, TESTS_ITEMS_FOLDER)
#### ---- ####


class Test_Module_ApiKey(unittest.TestCase):
    def setUp(self):
        self.module_obj = ApiKey()
Exemplo n.º 6
0
 def __init__(self, file):
     self.configLoader = ConfigLoader()
     self.configLoader.read(file)
Exemplo n.º 7
0
class Configuration:
    default = {
        'redisHost': 'localhost',
        'redisPort': 6379,
        'redisVendorDB': 10,
        'redisNotificationsDB': 11,
        'redisRefDB': 12,
        'redisPass': None,
        'mongoHost': 'localhost',
        'mongoPort': 27017,
        'mongoDB': "cvedb",
        'mongoUsername': '',
        'mongoPassword': '',
        'mongoTLS': False,
        'mongoTLSCA': '',
        'flaskHost': "127.0.0.1",
        'flaskPort': 5000,
        'flaskDebug': True,
        'pageLength': 50,
        'loginRequired': False,
        'listLogin': True,
        'ssl': False,
        'sslCertificate': "./ssl/cve-search.crt",
        'sslKey': "./ssl/cve-search.crt",
        'CVEStartYear': 2002,
        'logging': True,
        'logfile': "./log/cve-search.log",
        'maxLogSize': '100MB',
        'backlog': 5,
        'Indexdir': './indexdir',
        'updatelogfile': './log/update.log',
        'Tmpdir': './tmp',
        'http_proxy': '',
        'http_ignore_certs': False,
        'plugin_load': './etc/plugins.txt',
        'plugin_config': './etc/plugins.ini',
        'auth_load': './etc/auth.txt'
    }
    #jdt_NOTE: chceck if these default data sources are up to date
    sources = {
        'cve': "https://nvd.nist.gov/feeds/json/cve/1.1/",
        'cpe':
        "https://nvd.nist.gov/feeds/json/cpematch/1.0/nvdcpematch-1.0.json.zip",
        'cwe': "https://cwe.mitre.org/data/xml/cwec_v3.1.xml.zip",
        'capec': "https://capec.mitre.org/data/xml/capec_v3.2.xml",
        'via4': "https://www.cve-search.org/feeds/via4.json",
        'includecve': True,
        'includecapec': True,
        'includemsbulletin': True,
        'includecpe': True,
        'includecwe': True,
        'includevia4': True
    }

    # jdt_NOTE: chceck if these default data sources are up to date
    sources = {
        'cve': "https://nvd.nist.gov/feeds/json/cve/1.0/",
        'cpe':
        "https://nvd.nist.gov/feeds/json/cpematch/1.0/nvdcpematch-1.0.json.zip",
        'cwe': "https://cwe.mitre.org/data/xml/cwec_v3.1.xml.zip",
        'capec': "https://capec.mitre.org/data/xml/capec_v3.2.xml",
        'via4': "https://www.cve-search.org/feeds/via4.json",
        'includecve': True,
        'includecapec': True,
        'includemsbulletin': True,
        'includecpe': True,
        'includecwe': True,
        'includevia4': True
    }

    configLoader = ConfigLoader()
    configLoader.read(configurationPath)

    # Mongo
    @classmethod
    def getMongoDB(cls):
        return cls.configLoader.get("Mongo", "DB", cls.default['mongoDB'])

    @classmethod
    def getMongoConnection(cls):
        mongoHost = cls.configLoader.get("Mongo", "Host",
                                         cls.default['mongoHost'])
        mongoPort = cls.configLoader.get("Mongo", "Port",
                                         cls.default['mongoPort'])
        mongoDB = cls.configLoader.get("Mongo", "DB", cls.default['mongoDB'])
        mongoUsername = urllib.parse.quote(
            cls.configLoader.get("Mongo", "Username",
                                 cls.default['mongoUsername']))
        mongoPassword = urllib.parse.quote(
            cls.configLoader.get("Mongo", "Password",
                                 cls.default['mongoPassword']))
        mongoTLS = cls.configLoader.get("Mongo", "TLS",
                                        cls.default['mongoTLS'])
        mongoTLSCAFile = cls.configLoader.get("Mongo", "TLSCA",
                                              cls.default['mongoTLSCA'])

        kwargs: dict = {}
        if mongoHost:
            kwargs["host"] = mongoHost
        if mongoPort:
            kwargs["port"] = mongoPort
        if mongoUsername:
            kwargs["username"] = mongoUsername
            if mongoPassword:
                kwargs["password"] = mongoPassword
        if mongoDB:
            kwargs["authSource"] = mongoDB
        if mongoTLS:
            kwargs["tls"] = mongoTLS
            kwargs["tlsAllowInvalidHostnames"] = False
            if mongoTLSCAFile:
                kwargs["tlsCAFile"] = mongoTLSCAFile

        mongo = MongoClient(**kwargs)

        try:
            result = mongo.admin.command('ismaster')
        except Exception as e:
            sys.exit(e)
        return mongo.get_database(mongoDB)

    @classmethod
    def toPath(cls, path):
        return path if os.path.isabs(path) else os.path.join(
            runPath, "..", path)

    # Redis
    @classmethod
    def getRedisHost(cls):
        return cls.configLoader.get("Redis", "Host", cls.default['redisHost'])

    @classmethod
    def getRedisPort(cls):
        return cls.configLoader.get("Redis", "Port", cls.default['redisPort'])

    @classmethod
    def getRedisVendorConnection(cls):
        redisHost = cls.getRedisHost()
        redisPort = cls.getRedisPort()
        redisDB = cls.configLoader.get("Redis", "VendorsDB",
                                       cls.default['redisVendorDB'])
        redisPass = cls.configLoader.get("Redis", "Password",
                                         cls.default['redisPass'])
        return redis.StrictRedis(host=redisHost,
                                 port=redisPort,
                                 db=redisDB,
                                 password=redisPass,
                                 charset='utf-8',
                                 decode_responses=True)

    @classmethod
    def getRedisNotificationsConnection(cls):
        redisHost = cls.getRedisHost()
        redisPort = cls.getRedisPort()
        redisDB = cls.configLoader.get("Redis", "NotificationsDB",
                                       cls.default['redisNotificationsDB'])
        redisPass = cls.configLoader.get("Redis", "Password",
                                         cls.default['redisPass'])
        return redis.StrictRedis(host=redisHost,
                                 port=redisPort,
                                 db=redisDB,
                                 password=redisPass,
                                 charset="utf-8",
                                 decode_responses=True)

    @classmethod
    def getRedisRefConnection(cls):
        redisHost = cls.getRedisHost()
        redisPort = cls.getRedisPort()
        redisDB = cls.configLoader.get("Redis", "RefDB",
                                       cls.default['redisRefDB'])
        redisPass = cls.configLoader.get("Redis", "Password",
                                         cls.default['redisPass'])
        return redis.StrictRedis(host=redisHost,
                                 port=redisPort,
                                 db=redisDB,
                                 password=redisPass,
                                 charset="utf-8",
                                 decode_responses=True)

    # Flask
    @classmethod
    def getFlaskHost(cls):
        return cls.configLoader.get("Webserver", "Host",
                                    cls.default['flaskHost'])

    @classmethod
    def getFlaskPort(cls):
        return cls.configLoader.get("Webserver", "Port",
                                    cls.default['flaskPort'])

    @classmethod
    def getFlaskDebug(cls):
        return cls.configLoader.get("Webserver", "Debug",
                                    cls.default['flaskDebug'])

    # Webserver
    @classmethod
    def getPageLength(cls):
        return cls.configLoader.get("Webserver", "PageLength",
                                    cls.default['pageLength'])

    # Authentication
    @classmethod
    def loginRequired(cls):
        return cls.configLoader.get("Webserver", "LoginRequired",
                                    cls.default['loginRequired'])

    @classmethod
    def listLoginRequired(cls):
        return cls.configLoader.get("Webserver", "ListLoginRequired",
                                    cls.default['listLogin'])

    @classmethod
    def getAuthLoadSettings(cls):
        return cls.toPath(
            cls.configLoader.get("Webserver", "authSettings",
                                 cls.default['auth_load']))

    # SSL
    @classmethod
    def useSSL(cls):
        return cls.configLoader.get("Webserver", "SSL", cls.default['ssl'])

    @classmethod
    def getSSLCert(cls):
        return cls.toPath(
            cls.configLoader.get("Webserver", "Certificate",
                                 cls.default['sslCertificate']))

    @classmethod
    def getSSLKey(cls):
        return cls.toPath(
            cls.configLoader.get("Webserver", "Key", cls.default['sslKey']))

    # CVE
    @classmethod
    def getCVEStartYear(cls):
        YEAR_CVE_BEGAN = 2002
        next_year = datetime.datetime.now().year + 1
        start_year = cls.configLoader.get("CVE", "StartYear",
                                          cls.default['CVEStartYear'])
        if start_year < YEAR_CVE_BEGAN or start_year > next_year:
            print(
                'The year %i is not a valid year.\ndefault year %i will be used.'
                % (start_year, cls.default['CVEStartYear']))
            start_year = cls.default['CVEStartYear']
        return start_year

    # Logging
    @classmethod
    def getLogfile(cls):
        return cls.toPath(
            cls.configLoader.get("Logging", "Logfile", cls.default['logfile']))

    @classmethod
    def getUpdateLogFile(cls):
        return cls.toPath(
            cls.configLoader.get("Logging", "Updatelogfile",
                                 cls.default['updatelogfile']))

    @classmethod
    def getLogging(cls):
        return cls.configLoader.get("Logging", "Logging",
                                    cls.default['logging'])

    @classmethod
    def getMaxLogSize(cls):
        size = cls.configLoader.get("Logging", "MaxSize",
                                    cls.default['maxLogSize'])
        split = re.findall('\d+|\D+', size)
        multipliers = {'KB': 1024, 'MB': 1024 * 1024, 'GB': 1024 * 1024 * 1024}
        if len(split) == 2:
            base = int(split[0])
            unit = split[1].strip().upper()
            return base * multipliers.get(unit, 1024 * 1024)
        # if size is not a correctly defined set it to 100MB
        else:
            return 100 * 1024 * 1024

    @classmethod
    def getBacklog(cls):
        return cls.configLoader.get("Logging", "Backlog",
                                    cls.default['backlog'])

    # Indexing
    @classmethod
    def getTmpdir(cls):
        return cls.toPath(
            cls.configLoader.get("dbmgt", "Tmpdir", cls.default['Tmpdir']))

    # Indexing
    @classmethod
    def getIndexdir(cls):
        return cls.toPath(
            cls.configLoader.get("FulltextIndex", "Indexdir",
                                 cls.default['Indexdir']))

    # Http Proxy
    @classmethod
    def getProxy(cls):
        return cls.configLoader.get("Proxy", "http", cls.default['http_proxy'])

    @classmethod
    def ignoreCerts(cls):
        return cls.configLoader.get("Proxy", "IgnoreCerts",
                                    cls.default['http_ignore_certs'])

    @classmethod
    def getFile(cls, getfile, unpack=True):
        if cls.getProxy():
            proxy = req.ProxyHandler({
                'http': cls.getProxy(),
                'https': cls.getProxy()
            })
            auth = req.HTTPBasicAuthHandler()
            opener = req.build_opener(proxy, auth, req.HTTPHandler)
            req.install_opener(opener)
        if cls.ignoreCerts():
            ctx = ssl.create_default_context()
            ctx.check_hostname = False
            ctx.verify_mode = ssl.CERT_NONE
            opener = req.build_opener(urllib.request.HTTPSHandler(context=ctx))
            req.install_opener(opener)

        response = req.urlopen(getfile)
        data = response
        # TODO: if data == text/plain; charset=utf-8, read and decode
        if unpack:
            if 'gzip' in response.info().get('Content-Type'):
                buf = BytesIO(response.read())
                data = gzip.GzipFile(fileobj=buf)
            elif 'bzip2' in response.info().get('Content-Type'):
                data = BytesIO(bz2.decompress(response.read()))
            elif 'zip' in response.info().get('Content-Type'):
                fzip = zipfile.ZipFile(BytesIO(response.read()), 'r')
                if len(fzip.namelist()) > 0:
                    data = BytesIO(fzip.read(fzip.namelist()[0]))
        return (data, response)

    # Feeds
    @classmethod
    def getFeedData(cls, source, unpack=True):
        source = cls.getFeedURL(source)
        return cls.getFile(source, unpack) if source else None

    @classmethod
    def getFeedURL(cls, source):
        cls.configLoader.read(os.path.join(runPath, "../etc/sources.ini"))
        return cls.configLoader.get("Sources", source,
                                    cls.sources.get(source, ""))

    @classmethod
    def includesFeed(cls, feed):
        return cls.configLoader.get("EnabledFeeds", feed,
                                    cls.sources.get('include' + feed, False))

    # Plugins
    @classmethod
    def getPluginLoadSettings(cls):
        return cls.toPath(
            cls.configLoader.get("Plugins", "loadSettings",
                                 cls.default['plugin_load']))

    @classmethod
    def getPluginsettings(cls):
        return cls.toPath(
            cls.configLoader.get("Plugins", "pluginSettings",
                                 cls.default['plugin_config']))
Exemplo n.º 8
0
class Global(AbstractModule):
    """
    Global module for AIL framework
    """
    def __init__(self):
        super(Global, self).__init__()

        self.r_stats = ConfigLoader().get_redis_conn("ARDB_Statistics")

        self.processed_item = 0
        self.time_last_stats = time.time()

        # Get and sanityze ITEM DIRECTORY
        # # TODO: rename PASTE => ITEM
        self.PASTES_FOLDER = os.path.join(
            os.environ['AIL_HOME'],
            self.process.config.get("Directories", "pastes"))
        self.PASTES_FOLDERS = self.PASTES_FOLDER + '/'
        self.PASTES_FOLDERS = os.path.join(
            os.path.realpath(self.PASTES_FOLDERS), '')

        # Waiting time in secondes between to message proccessed
        self.pending_seconds = 0.5

        # Send module state to logs
        self.redis_logger.info(f"Module {self.module_name} initialized")

    def computeNone(self):
        difftime = time.time() - self.time_last_stats
        if int(difftime) > 30:
            to_print = f'Global; ; ; ;glob Processed {self.processed_item} item(s) in {difftime} s'
            print(to_print)
            self.redis_logger.debug(to_print)

            self.time_last_stats = time.time()
            self.processed_item = 0

    def compute(self, message, r_result=False):
        # Recovering the streamed message informations
        splitted = message.split()

        if len(splitted) == 2:
            item, gzip64encoded = splitted

            # Remove PASTES_FOLDER from item path (crawled item + submited)
            if self.PASTES_FOLDERS in item:
                item = item.replace(self.PASTES_FOLDERS, '', 1)

            file_name_item = item.split('/')[-1]
            if len(file_name_item) > 255:
                new_file_name_item = '{}{}.gz'.format(file_name_item[:215],
                                                      str(uuid4()))
                item = self.rreplace(item, file_name_item, new_file_name_item,
                                     1)

            # Creating the full filepath
            filename = os.path.join(self.PASTES_FOLDER, item)
            filename = os.path.realpath(filename)

            # Incorrect filename
            if not os.path.commonprefix([filename, self.PASTES_FOLDER
                                         ]) == self.PASTES_FOLDER:
                self.redis_logger.warning(
                    f'Global; Path traversal detected {filename}')
                print(f'Global; Path traversal detected {filename}')

            else:
                # Decode compressed base64
                decoded = base64.standard_b64decode(gzip64encoded)
                new_file_content = self.gunzip_bytes_obj(filename, decoded)

                if new_file_content:
                    filename = self.check_filename(filename, new_file_content)

                    if filename:
                        # create subdir
                        dirname = os.path.dirname(filename)
                        if not os.path.exists(dirname):
                            os.makedirs(dirname)

                        with open(filename, 'wb') as f:
                            f.write(decoded)

                        item_id = filename
                        # remove self.PASTES_FOLDER from
                        if self.PASTES_FOLDERS in item_id:
                            item_id = item_id.replace(self.PASTES_FOLDERS, '',
                                                      1)

                        self.send_message_to_queue(item_id)
                        self.processed_item += 1
                        if r_result:
                            return item_id

        else:
            self.redis_logger.debug(f"Empty Item: {message} not processed")
            print(f"Empty Item: {message} not processed")

    def check_filename(self, filename, new_file_content):
        """
        Check if file is not a duplicated file
        return the filename if new file, else None
        """

        # check if file exist
        if os.path.isfile(filename):
            self.redis_logger.warning(f'File already exist {filename}')
            print(f'File already exist {filename}')

            # Check that file already exists but content differs
            curr_file_content = self.gunzip_file(filename)

            if curr_file_content:
                # Compare file content with message content with MD5 checksums
                curr_file_md5 = md5(curr_file_content).hexdigest()
                new_file_md5 = md5(new_file_content).hexdigest()

                if new_file_md5 != curr_file_md5:
                    # MD5 are not equals, verify filename
                    if filename.endswith('.gz'):
                        filename = f'{filename[:-3]}_{new_file_md5}.gz'
                    else:
                        filename = f'{filename}_{new_file_md5}'
                    self.redis_logger.debug(f'new file to check: {filename}')

                    if os.path.isfile(filename):
                        # Ignore duplicate
                        self.redis_logger.debug(
                            f'ignore duplicated file {filename}')
                        print(f'ignore duplicated file {filename}')
                        filename = None

                else:
                    # Ignore duplicate checksum equals
                    self.redis_logger.debug(
                        f'ignore duplicated file {filename}')
                    print(f'ignore duplicated file {filename}')
                    filename = None

            else:
                # File not unzipped
                filename = None

        return filename

    def gunzip_file(self, filename):
        """
        Unzip a file
        publish stats if failure
        """
        curr_file_content = None

        try:
            with gzip.open(filename, 'rb') as f:
                curr_file_content = f.read()
        except EOFError:
            self.redis_logger.warning(f'Global; Incomplete file: {filename}')
            print(f'Global; Incomplete file: {filename}')
            # save daily stats
            self.r_stats.zincrby('module:Global:incomplete_file',
                                 datetime.datetime.now().strftime('%Y%m%d'), 1)
        except OSError:
            self.redis_logger.warning(
                f'Global; Not a gzipped file: {filename}')
            print(f'Global; Not a gzipped file: {filename}')
            # save daily stats
            self.r_stats.zincrby('module:Global:invalid_file',
                                 datetime.datetime.now().strftime('%Y%m%d'), 1)

        return curr_file_content

    # # TODO: add stats incomplete_file/Not a gzipped file
    def gunzip_bytes_obj(self, filename, bytes_obj):
        gunzipped_bytes_obj = None
        try:
            in_ = io.BytesIO()
            in_.write(bytes_obj)
            in_.seek(0)

            with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
                gunzipped_bytes_obj = fo.read()
        except Exception as e:
            self.redis_logger.warning(
                f'Global; Invalid Gzip file: {filename}, {e}')
            print(f'Global; Invalid Gzip file: {filename}, {e}')

        return gunzipped_bytes_obj

    def rreplace(self, s, old, new, occurrence):
        li = s.rsplit(old, occurrence)
        return new.join(li)