Exemplo n.º 1
0
    def __init__(self, config, memcached_servers=None):
        self.config = config
        self.memcached = memcache.Client(memcached_servers or ['127.0.0.1:11211']) # NOSONAR
        try:
            self.database = Database(config)
        except:
            logger.exception("Error opening database")
            self.database = NoopDatabase(config)

        self.clean_paths = []
Exemplo n.º 2
0
    def __init__(self, config, memcached_servers=None):
        self.config = config
        self.memcached = memcache.Client(memcached_servers
                                         or ['127.0.0.1:11211'])  # NOSONAR
        try:
            self.database = Database(config)
        except:
            logger.exception("Error opening database")
            self.database = NoopDatabase(config)

        self.clean_paths = []
Exemplo n.º 3
0
class AmavisVT(object):
    buffer_size = 4096

    def __init__(self, config, memcached_servers=None):
        self.config = config
        self.memcached = memcache.Client(memcached_servers
                                         or ['127.0.0.1:11211'])  # NOSONAR
        try:
            self.database = Database(config)
        except:
            logger.exception("Error opening database")
            self.database = NoopDatabase(config)

        self.clean_paths = []

    def run(self, file_or_directory):
        resources = []

        if os.path.isfile(file_or_directory):
            if os.access(file_or_directory, os.R_OK):
                resources.append(Resource(file_or_directory, cleanup=False))
            else:
                logger.info("Skipping inaccessible file %s", file_or_directory)
        elif os.path.isdir(file_or_directory):
            for root, dirs, files in os.walk(file_or_directory):
                for f in files:
                    p = os.path.join(root, f)

                    if not os.path.isfile(p):
                        continue

                    if os.access(p, os.R_OK):
                        resources.append(Resource(p, cleanup=False))
                    else:
                        logger.info("Skipping inaccessible file %s",
                                    file_or_directory)

        return self.process(ResourceSet(resources))

    def process(self, resource_set):
        hashes_for_vt = []
        results = []

        try:

            def _iter_resources():
                for r in resource_set:
                    yield r
                    for x in r:
                        yield x

            all_resources = list(_iter_resources())
            logger.info("Processing %s resources: %s", len(all_resources),
                        ', '.join([r.path for r in all_resources]))

            for resource in all_resources:
                if resource.cleanup:
                    self.clean_paths.append(resource.path)

                if self.is_included(resource):
                    cached_value = self.get_from_cache(resource.sha256)

                    if cached_value:
                        logger.info("Using cached result for file %s (%s): %s",
                                    resource, resource.sha256, cached_value)
                        results.append((resource, cached_value))
                    else:
                        hashes_for_vt.append((resource, resource.sha256))
                else:
                    logger.debug("Skipping resource (not included): %s",
                                 resource)
                    continue

            logger.info("Sending %s hashes to Virustotal", len(hashes_for_vt))
            vt_results = list(self.check_vt(hashes_for_vt))
            results.extend(vt_results)

            if self.config.filename_pattern_detection:
                logger.debug("Filename pattern detection enabled")
                results.extend(
                    self.do_filename_pattern_detection(hashes_for_vt,
                                                       resource_set,
                                                       vt_results))

            # update patterns for entries which have no pattern set yet
            self.database.update_patterns()

            return [(resource, response) for resource, response in results
                    if response]
        finally:
            clean_silent(self.clean_paths)
            self.database.clean()

    def do_filename_pattern_detection(self, hashes_for_vt, resource_set,
                                      vt_results):
        results = []
        for resource, sha256 in hashes_for_vt:
            vtresult = [r for _, r in vt_results if r and r.sha256 == sha256]
            vtresult = vtresult[0] if vtresult else None

            # add the resource to the database
            self.database.add_resource(resource, vtresult,
                                       resource_set.to_localpart,
                                       resource_set.to_domain)

            # only test for filename pattern if the resource hasn't identified as infected by its hash
            if vtresult is None and self.database.filename_pattern_match(
                    resource, localpart=resource_set.to_localpart):
                logger.info(
                    "Flagging attachment %s as INFECTED (identified via filename pattern)",
                    resource.filename)

                try:
                    results.remove((resource, vtresult))
                except ValueError:
                    pass

                reported = False

                if self.config.auto_report:
                    reported = self.report_to_vt(resource)

                results.append((resource, FilenameResponse(reported)))
        return results

    @staticmethod
    def is_included(resource):
        return any((f(resource) for f in [
            lambda r: r.mime_type.startswith('application/'),
            lambda r: r.mime_type in ('text/x-shellscript', 'text/x-perl',
                                      'text/x-ruby', 'text/x-python'),
            lambda r: re.search(
                r"\.(exe|com|zip|tar\.[\w\d]+|doc\w?|xls\w?|ppt\w?|pdf|js|bat|cmd|rtf|ttf|html?|vbs|wsf)$",
                r.filename, re.IGNORECASE),
            lambda r: '.' not in r.filename or r.filename.endswith('.')
        ]))

    def check_vt(self, checksums):
        if self.config.pretend:
            logger.info("NOT sending requests to virustotal")
            return

        if not checksums:
            return

        max_hashes_per_request = 4  # Virustotal's public api limit
        extra_hashes = max_hashes_per_request - len(checksums)
        if extra_hashes < 0:
            extra_hashes = 0

        try:
            # create a dictionary of sha256 <> filename
            query_d = dict((v, k) for k, v in checksums)

            raw_checksums = [x[1] for x in checksums]

            # get hashes from database that have a pattern but infected=0
            if extra_hashes > 0:
                clean_hashes = self.database.get_clean_hashes(extra_hashes)
                logger.info(
                    "Piggy backing request to VT to send %s extra hashes" %
                    len(clean_hashes))
            else:
                clean_hashes = []

            send_checksums = sorted(list(set(raw_checksums + clean_hashes)))
            logger.debug("Sending %s checksums", len(send_checksums))

            response = requests.post(
                self.config.api_url, {
                    'apikey': self.config.apikey,
                    'resource': ', '.join(send_checksums)
                },
                timeout=float(self.config.timeout),
                headers={
                    'User-Agent':
                    'amavisvt/%s (+https://ercpe.de/projects/amavisvt)' %
                    VERSION
                })
            response.raise_for_status()
            if response.status_code == 204:
                raise Exception("API-Limit exceeded!")

            responses = response.json()
            if not isinstance(responses, list):
                responses = [responses]
            logger.debug("Got %s items in response", len(responses))
            responses = dict(
                (d['sha256'], d) for d in responses if 'sha256' in d)
            logger.debug("Got %s complete items in response", len(responses))

            for sha256, data in responses.items():
                vtr = VTResponse(data)
                vtr.infected = self.is_infected(vtr)

                cache_expires = self.config.unknown_expire
                if vtr.response_code:
                    cache_expires = self.config.positive_expire if vtr.infected else self.config.negative_expire

                logger.info("Saving in cache: %s (expires in %s seconds)",
                            vtr.sha256, cache_expires)
                self.set_in_cache(vtr.resource, data, cache_expires)

                logger.info("Updating database result for %s (infected: %s)",
                            vtr.sha256, vtr.infected)
                self.database.update_result(vtr)

                if sha256 in query_d:
                    filename = query_d[sha256]
                    logger.debug("Result for %s: %s" % (filename, vtr))
                    yield filename, vtr

        except:
            logger.exception("Error asking virustotal about files")

    def report_to_vt(self, resource):
        if self.config.pretend:
            logger.info("NOT sending resource to virustotal")
            return

        try:
            logger.info("Reporting resource %s (%s) to virustotal", resource,
                        resource.filename)

            files = {
                'file': (resource.filename, open(resource.path, 'rb')),
            }
            response = requests.post(
                self.config.report_url,
                data={
                    'apikey': self.config.apikey,
                },
                files=files,
                timeout=float(self.config.timeout),
                headers={
                    'User-Agent':
                    'amavisvt/%s (+https://ercpe.de/projects/amavisvt)' %
                    VERSION
                })
            response.raise_for_status()
            if response.status_code == 204:
                raise Exception("API-Limit exceeded!")

            vtr = VTResponse(response.json())
            logger.info("Report result: %s", vtr)
            return vtr
        except:
            logger.exception("Error reporting %s to virustotal", resource)
            return False

    def get_from_cache(self, sha256hash):
        from_cache = self.memcached.get(sha256hash)
        if from_cache:
            vtr = VTResponse(from_cache)
            vtr.infected = self.is_infected(vtr)
            return vtr

    def set_in_cache(self, sha256hash, d, expire=0):
        logger.debug("Saving key %s in cache. Expires in %s seconds",
                     sha256hash, expire)
        self.memcached.set(sha256hash, d, time=expire)

    def is_infected(self, response_or_positive_hits):
        if isinstance(response_or_positive_hits, VTResponse):
            return response_or_positive_hits.positives >= self.config.hits_required
        return int(response_or_positive_hits) >= self.config.hits_required
Exemplo n.º 4
0
class AmavisVT(object):
    buffer_size = 4096

    def __init__(self, config, memcached_servers=None):
        self.config = config
        self.memcached = memcache.Client(memcached_servers or ['127.0.0.1:11211']) # NOSONAR
        try:
            self.database = Database(config)
        except:
            logger.exception("Error opening database")
            self.database = NoopDatabase(config)

        self.clean_paths = []

    def run(self, file_or_directory):
        resources = []

        if os.path.isfile(file_or_directory):
            if os.access(file_or_directory, os.R_OK):
                resources.append(Resource(file_or_directory, cleanup=False))
            else:
                logger.info("Skipping inaccessible file %s", file_or_directory)
        elif os.path.isdir(file_or_directory):
            for root, dirs, files in os.walk(file_or_directory):
                for f in files:
                    p = os.path.join(root, f)

                    if not os.path.isfile(p):
                        continue

                    if os.access(p, os.R_OK):
                        resources.append(Resource(p, cleanup=False))
                    else:
                        logger.info("Skipping inaccessible file %s", file_or_directory)

        return self.process(ResourceSet(resources))

    def process(self, resource_set):
        hashes_for_vt = []
        results = []

        try:
            def _iter_resources():
                for r in resource_set:
                    yield r
                    for x in r:
                        yield x

            all_resources = list(_iter_resources())
            logger.info("Processing %s resources: %s", len(all_resources), ', '.join([r.path for r in all_resources]))

            for resource in all_resources:
                if resource.cleanup:
                    self.clean_paths.append(resource.path)

                if self.is_included(resource):
                    cached_value = self.get_from_cache(resource.sha256)

                    if cached_value:
                        logger.info("Using cached result for file %s (%s): %s", resource, resource.sha256, cached_value)
                        results.append((resource, cached_value))
                    else:
                        hashes_for_vt.append((resource, resource.sha256))
                else:
                    logger.debug("Skipping resource (not included): %s", resource)
                    continue

            logger.info("Sending %s hashes to Virustotal", len(hashes_for_vt))
            vt_results = list(self.check_vt(hashes_for_vt))
            results.extend(vt_results)

            if self.config.filename_pattern_detection:
                logger.debug("Filename pattern detection enabled")
                results.extend(self.do_filename_pattern_detection(hashes_for_vt, resource_set, vt_results))

            # update patterns for entries which have no pattern set yet
            self.database.update_patterns()

            return [(resource, response) for resource, response in results if response]
        finally:
            clean_silent(self.clean_paths)
            self.database.clean()

    def do_filename_pattern_detection(self, hashes_for_vt, resource_set, vt_results):
        results = []
        for resource, sha256 in hashes_for_vt:
            vtresult = [r for _, r in vt_results if r and r.sha256 == sha256]
            vtresult = vtresult[0] if vtresult else None

            # add the resource to the database
            self.database.add_resource(resource, vtresult, resource_set.to_localpart, resource_set.to_domain)

            # only test for filename pattern if the resource hasn't identified as infected by its hash
            if vtresult is None and self.database.filename_pattern_match(resource, localpart=resource_set.to_localpart):
                logger.info("Flagging attachment %s as INFECTED (identified via filename pattern)", resource.filename)

                try:
                    results.remove((resource, vtresult))
                except ValueError:
                    pass

                reported = False

                if self.config.auto_report:
                    reported = self.report_to_vt(resource)

                results.append((resource, FilenameResponse(reported)))
        return results

    @staticmethod
    def is_included(resource):
        return any((f(resource) for f in [
                    lambda r: r.mime_type.startswith('application/'),
                    lambda r: r.mime_type in ('text/x-shellscript', 'text/x-perl', 'text/x-ruby', 'text/x-python'),
                    lambda r: re.search(r"\.(exe|com|zip|tar\.[\w\d]+|doc\w?|xls\w?|ppt\w?|pdf|js|bat|cmd|rtf|ttf|html?|vbs|wsf)$", r.filename, re.IGNORECASE),
                    lambda r: '.' not in r.filename or r.filename.endswith('.')
        ]))

    def check_vt(self, checksums):
        if self.config.pretend:
            logger.info("NOT sending requests to virustotal")
            return

        if not checksums:
            return

        max_hashes_per_request = 4 # Virustotal's public api limit
        extra_hashes = max_hashes_per_request - len(checksums)
        if extra_hashes < 0:
            extra_hashes = 0

        try:
            # create a dictionary of sha256 <> filename
            query_d = dict((v, k) for k, v in checksums)

            raw_checksums = [x[1] for x in checksums]

            # get hashes from database that have a pattern but infected=0
            if extra_hashes > 0:
                clean_hashes = self.database.get_clean_hashes(extra_hashes)
                logger.info("Piggy backing request to VT to send %s extra hashes" % len(clean_hashes))
            else:
                clean_hashes = []

            send_checksums = sorted(list(set(raw_checksums + clean_hashes)))
            logger.debug("Sending %s checksums", len(send_checksums))

            response = requests.post(self.config.api_url, {
                'apikey': self.config.apikey,
                'resource': ', '.join(send_checksums)
            }, timeout=float(self.config.timeout), headers={
                'User-Agent': 'amavisvt/%s (+https://ercpe.de/projects/amavisvt)' % VERSION
            })
            response.raise_for_status()
            if response.status_code == 204:
                raise Exception("API-Limit exceeded!")

            responses = response.json()
            if not isinstance(responses, list):
                responses = [responses]
            logger.debug("Got %s items in response", len(responses))
            responses = dict((d['sha256'], d) for d in responses if 'sha256' in d)
            logger.debug("Got %s complete items in response", len(responses))

            for sha256, data in responses.items():
                vtr = VTResponse(data)
                vtr.infected = self.is_infected(vtr)

                cache_expires = self.config.unknown_expire
                if vtr.response_code:
                    cache_expires = self.config.positive_expire if vtr.infected else self.config.negative_expire

                logger.info("Saving in cache: %s (expires in %s seconds)", vtr.sha256, cache_expires)
                self.set_in_cache(vtr.resource, data, cache_expires)

                logger.info("Updating database result for %s (infected: %s)", vtr.sha256, vtr.infected)
                self.database.update_result(vtr)

                if sha256 in query_d:
                    filename = query_d[sha256]
                    logger.debug("Result for %s: %s" % (filename, vtr))
                    yield filename, vtr

        except:
            logger.exception("Error asking virustotal about files")

    def report_to_vt(self, resource):
        if self.config.pretend:
            logger.info("NOT sending resource to virustotal")
            return

        try:
            logger.info("Reporting resource %s (%s) to virustotal", resource, resource.filename)

            files = {
                'file': (resource.filename, open(resource.path, 'rb')),
            }
            response = requests.post(self.config.report_url, data={
                                        'apikey': self.config.apikey,
                                    },
                                    files=files,
                                    timeout=float(self.config.timeout),
                                    headers={
                                        'User-Agent': 'amavisvt/%s (+https://ercpe.de/projects/amavisvt)' % VERSION
                                    })
            response.raise_for_status()
            if response.status_code == 204:
                raise Exception("API-Limit exceeded!")

            vtr = VTResponse(response.json())
            logger.info("Report result: %s", vtr)
            return vtr
        except:
            logger.exception("Error reporting %s to virustotal", resource)
            return False

    def get_from_cache(self, sha256hash):
        from_cache = self.memcached.get(sha256hash)
        if from_cache:
            vtr = VTResponse(from_cache)
            vtr.infected = self.is_infected(vtr)
            return vtr

    def set_in_cache(self, sha256hash, d, expire=0):
        logger.debug("Saving key %s in cache. Expires in %s seconds", sha256hash, expire)
        self.memcached.set(sha256hash, d, time=expire)

    def is_infected(self, response_or_positive_hits):
        if isinstance(response_or_positive_hits, VTResponse):
            return response_or_positive_hits.positives >= self.config.hits_required
        return int(response_or_positive_hits) >= self.config.hits_required