def __init__(self, config, memcached_servers=None): self.config = config self.memcached = memcache.Client(memcached_servers or ['127.0.0.1:11211']) # NOSONAR try: self.database = Database(config) except: logger.exception("Error opening database") self.database = NoopDatabase(config) self.clean_paths = []
class AmavisVT(object): buffer_size = 4096 def __init__(self, config, memcached_servers=None): self.config = config self.memcached = memcache.Client(memcached_servers or ['127.0.0.1:11211']) # NOSONAR try: self.database = Database(config) except: logger.exception("Error opening database") self.database = NoopDatabase(config) self.clean_paths = [] def run(self, file_or_directory): resources = [] if os.path.isfile(file_or_directory): if os.access(file_or_directory, os.R_OK): resources.append(Resource(file_or_directory, cleanup=False)) else: logger.info("Skipping inaccessible file %s", file_or_directory) elif os.path.isdir(file_or_directory): for root, dirs, files in os.walk(file_or_directory): for f in files: p = os.path.join(root, f) if not os.path.isfile(p): continue if os.access(p, os.R_OK): resources.append(Resource(p, cleanup=False)) else: logger.info("Skipping inaccessible file %s", file_or_directory) return self.process(ResourceSet(resources)) def process(self, resource_set): hashes_for_vt = [] results = [] try: def _iter_resources(): for r in resource_set: yield r for x in r: yield x all_resources = list(_iter_resources()) logger.info("Processing %s resources: %s", len(all_resources), ', '.join([r.path for r in all_resources])) for resource in all_resources: if resource.cleanup: self.clean_paths.append(resource.path) if self.is_included(resource): cached_value = self.get_from_cache(resource.sha256) if cached_value: logger.info("Using cached result for file %s (%s): %s", resource, resource.sha256, cached_value) results.append((resource, cached_value)) else: hashes_for_vt.append((resource, resource.sha256)) else: logger.debug("Skipping resource (not included): %s", resource) continue logger.info("Sending %s hashes to Virustotal", len(hashes_for_vt)) vt_results = list(self.check_vt(hashes_for_vt)) results.extend(vt_results) if self.config.filename_pattern_detection: logger.debug("Filename pattern detection enabled") results.extend( self.do_filename_pattern_detection(hashes_for_vt, resource_set, vt_results)) # update patterns for entries which have no pattern set yet self.database.update_patterns() return [(resource, response) for resource, response in results if response] finally: clean_silent(self.clean_paths) self.database.clean() def do_filename_pattern_detection(self, hashes_for_vt, resource_set, vt_results): results = [] for resource, sha256 in hashes_for_vt: vtresult = [r for _, r in vt_results if r and r.sha256 == sha256] vtresult = vtresult[0] if vtresult else None # add the resource to the database self.database.add_resource(resource, vtresult, resource_set.to_localpart, resource_set.to_domain) # only test for filename pattern if the resource hasn't identified as infected by its hash if vtresult is None and self.database.filename_pattern_match( resource, localpart=resource_set.to_localpart): logger.info( "Flagging attachment %s as INFECTED (identified via filename pattern)", resource.filename) try: results.remove((resource, vtresult)) except ValueError: pass reported = False if self.config.auto_report: reported = self.report_to_vt(resource) results.append((resource, FilenameResponse(reported))) return results @staticmethod def is_included(resource): return any((f(resource) for f in [ lambda r: r.mime_type.startswith('application/'), lambda r: r.mime_type in ('text/x-shellscript', 'text/x-perl', 'text/x-ruby', 'text/x-python'), lambda r: re.search( r"\.(exe|com|zip|tar\.[\w\d]+|doc\w?|xls\w?|ppt\w?|pdf|js|bat|cmd|rtf|ttf|html?|vbs|wsf)$", r.filename, re.IGNORECASE), lambda r: '.' not in r.filename or r.filename.endswith('.') ])) def check_vt(self, checksums): if self.config.pretend: logger.info("NOT sending requests to virustotal") return if not checksums: return max_hashes_per_request = 4 # Virustotal's public api limit extra_hashes = max_hashes_per_request - len(checksums) if extra_hashes < 0: extra_hashes = 0 try: # create a dictionary of sha256 <> filename query_d = dict((v, k) for k, v in checksums) raw_checksums = [x[1] for x in checksums] # get hashes from database that have a pattern but infected=0 if extra_hashes > 0: clean_hashes = self.database.get_clean_hashes(extra_hashes) logger.info( "Piggy backing request to VT to send %s extra hashes" % len(clean_hashes)) else: clean_hashes = [] send_checksums = sorted(list(set(raw_checksums + clean_hashes))) logger.debug("Sending %s checksums", len(send_checksums)) response = requests.post( self.config.api_url, { 'apikey': self.config.apikey, 'resource': ', '.join(send_checksums) }, timeout=float(self.config.timeout), headers={ 'User-Agent': 'amavisvt/%s (+https://ercpe.de/projects/amavisvt)' % VERSION }) response.raise_for_status() if response.status_code == 204: raise Exception("API-Limit exceeded!") responses = response.json() if not isinstance(responses, list): responses = [responses] logger.debug("Got %s items in response", len(responses)) responses = dict( (d['sha256'], d) for d in responses if 'sha256' in d) logger.debug("Got %s complete items in response", len(responses)) for sha256, data in responses.items(): vtr = VTResponse(data) vtr.infected = self.is_infected(vtr) cache_expires = self.config.unknown_expire if vtr.response_code: cache_expires = self.config.positive_expire if vtr.infected else self.config.negative_expire logger.info("Saving in cache: %s (expires in %s seconds)", vtr.sha256, cache_expires) self.set_in_cache(vtr.resource, data, cache_expires) logger.info("Updating database result for %s (infected: %s)", vtr.sha256, vtr.infected) self.database.update_result(vtr) if sha256 in query_d: filename = query_d[sha256] logger.debug("Result for %s: %s" % (filename, vtr)) yield filename, vtr except: logger.exception("Error asking virustotal about files") def report_to_vt(self, resource): if self.config.pretend: logger.info("NOT sending resource to virustotal") return try: logger.info("Reporting resource %s (%s) to virustotal", resource, resource.filename) files = { 'file': (resource.filename, open(resource.path, 'rb')), } response = requests.post( self.config.report_url, data={ 'apikey': self.config.apikey, }, files=files, timeout=float(self.config.timeout), headers={ 'User-Agent': 'amavisvt/%s (+https://ercpe.de/projects/amavisvt)' % VERSION }) response.raise_for_status() if response.status_code == 204: raise Exception("API-Limit exceeded!") vtr = VTResponse(response.json()) logger.info("Report result: %s", vtr) return vtr except: logger.exception("Error reporting %s to virustotal", resource) return False def get_from_cache(self, sha256hash): from_cache = self.memcached.get(sha256hash) if from_cache: vtr = VTResponse(from_cache) vtr.infected = self.is_infected(vtr) return vtr def set_in_cache(self, sha256hash, d, expire=0): logger.debug("Saving key %s in cache. Expires in %s seconds", sha256hash, expire) self.memcached.set(sha256hash, d, time=expire) def is_infected(self, response_or_positive_hits): if isinstance(response_or_positive_hits, VTResponse): return response_or_positive_hits.positives >= self.config.hits_required return int(response_or_positive_hits) >= self.config.hits_required
class AmavisVT(object): buffer_size = 4096 def __init__(self, config, memcached_servers=None): self.config = config self.memcached = memcache.Client(memcached_servers or ['127.0.0.1:11211']) # NOSONAR try: self.database = Database(config) except: logger.exception("Error opening database") self.database = NoopDatabase(config) self.clean_paths = [] def run(self, file_or_directory): resources = [] if os.path.isfile(file_or_directory): if os.access(file_or_directory, os.R_OK): resources.append(Resource(file_or_directory, cleanup=False)) else: logger.info("Skipping inaccessible file %s", file_or_directory) elif os.path.isdir(file_or_directory): for root, dirs, files in os.walk(file_or_directory): for f in files: p = os.path.join(root, f) if not os.path.isfile(p): continue if os.access(p, os.R_OK): resources.append(Resource(p, cleanup=False)) else: logger.info("Skipping inaccessible file %s", file_or_directory) return self.process(ResourceSet(resources)) def process(self, resource_set): hashes_for_vt = [] results = [] try: def _iter_resources(): for r in resource_set: yield r for x in r: yield x all_resources = list(_iter_resources()) logger.info("Processing %s resources: %s", len(all_resources), ', '.join([r.path for r in all_resources])) for resource in all_resources: if resource.cleanup: self.clean_paths.append(resource.path) if self.is_included(resource): cached_value = self.get_from_cache(resource.sha256) if cached_value: logger.info("Using cached result for file %s (%s): %s", resource, resource.sha256, cached_value) results.append((resource, cached_value)) else: hashes_for_vt.append((resource, resource.sha256)) else: logger.debug("Skipping resource (not included): %s", resource) continue logger.info("Sending %s hashes to Virustotal", len(hashes_for_vt)) vt_results = list(self.check_vt(hashes_for_vt)) results.extend(vt_results) if self.config.filename_pattern_detection: logger.debug("Filename pattern detection enabled") results.extend(self.do_filename_pattern_detection(hashes_for_vt, resource_set, vt_results)) # update patterns for entries which have no pattern set yet self.database.update_patterns() return [(resource, response) for resource, response in results if response] finally: clean_silent(self.clean_paths) self.database.clean() def do_filename_pattern_detection(self, hashes_for_vt, resource_set, vt_results): results = [] for resource, sha256 in hashes_for_vt: vtresult = [r for _, r in vt_results if r and r.sha256 == sha256] vtresult = vtresult[0] if vtresult else None # add the resource to the database self.database.add_resource(resource, vtresult, resource_set.to_localpart, resource_set.to_domain) # only test for filename pattern if the resource hasn't identified as infected by its hash if vtresult is None and self.database.filename_pattern_match(resource, localpart=resource_set.to_localpart): logger.info("Flagging attachment %s as INFECTED (identified via filename pattern)", resource.filename) try: results.remove((resource, vtresult)) except ValueError: pass reported = False if self.config.auto_report: reported = self.report_to_vt(resource) results.append((resource, FilenameResponse(reported))) return results @staticmethod def is_included(resource): return any((f(resource) for f in [ lambda r: r.mime_type.startswith('application/'), lambda r: r.mime_type in ('text/x-shellscript', 'text/x-perl', 'text/x-ruby', 'text/x-python'), lambda r: re.search(r"\.(exe|com|zip|tar\.[\w\d]+|doc\w?|xls\w?|ppt\w?|pdf|js|bat|cmd|rtf|ttf|html?|vbs|wsf)$", r.filename, re.IGNORECASE), lambda r: '.' not in r.filename or r.filename.endswith('.') ])) def check_vt(self, checksums): if self.config.pretend: logger.info("NOT sending requests to virustotal") return if not checksums: return max_hashes_per_request = 4 # Virustotal's public api limit extra_hashes = max_hashes_per_request - len(checksums) if extra_hashes < 0: extra_hashes = 0 try: # create a dictionary of sha256 <> filename query_d = dict((v, k) for k, v in checksums) raw_checksums = [x[1] for x in checksums] # get hashes from database that have a pattern but infected=0 if extra_hashes > 0: clean_hashes = self.database.get_clean_hashes(extra_hashes) logger.info("Piggy backing request to VT to send %s extra hashes" % len(clean_hashes)) else: clean_hashes = [] send_checksums = sorted(list(set(raw_checksums + clean_hashes))) logger.debug("Sending %s checksums", len(send_checksums)) response = requests.post(self.config.api_url, { 'apikey': self.config.apikey, 'resource': ', '.join(send_checksums) }, timeout=float(self.config.timeout), headers={ 'User-Agent': 'amavisvt/%s (+https://ercpe.de/projects/amavisvt)' % VERSION }) response.raise_for_status() if response.status_code == 204: raise Exception("API-Limit exceeded!") responses = response.json() if not isinstance(responses, list): responses = [responses] logger.debug("Got %s items in response", len(responses)) responses = dict((d['sha256'], d) for d in responses if 'sha256' in d) logger.debug("Got %s complete items in response", len(responses)) for sha256, data in responses.items(): vtr = VTResponse(data) vtr.infected = self.is_infected(vtr) cache_expires = self.config.unknown_expire if vtr.response_code: cache_expires = self.config.positive_expire if vtr.infected else self.config.negative_expire logger.info("Saving in cache: %s (expires in %s seconds)", vtr.sha256, cache_expires) self.set_in_cache(vtr.resource, data, cache_expires) logger.info("Updating database result for %s (infected: %s)", vtr.sha256, vtr.infected) self.database.update_result(vtr) if sha256 in query_d: filename = query_d[sha256] logger.debug("Result for %s: %s" % (filename, vtr)) yield filename, vtr except: logger.exception("Error asking virustotal about files") def report_to_vt(self, resource): if self.config.pretend: logger.info("NOT sending resource to virustotal") return try: logger.info("Reporting resource %s (%s) to virustotal", resource, resource.filename) files = { 'file': (resource.filename, open(resource.path, 'rb')), } response = requests.post(self.config.report_url, data={ 'apikey': self.config.apikey, }, files=files, timeout=float(self.config.timeout), headers={ 'User-Agent': 'amavisvt/%s (+https://ercpe.de/projects/amavisvt)' % VERSION }) response.raise_for_status() if response.status_code == 204: raise Exception("API-Limit exceeded!") vtr = VTResponse(response.json()) logger.info("Report result: %s", vtr) return vtr except: logger.exception("Error reporting %s to virustotal", resource) return False def get_from_cache(self, sha256hash): from_cache = self.memcached.get(sha256hash) if from_cache: vtr = VTResponse(from_cache) vtr.infected = self.is_infected(vtr) return vtr def set_in_cache(self, sha256hash, d, expire=0): logger.debug("Saving key %s in cache. Expires in %s seconds", sha256hash, expire) self.memcached.set(sha256hash, d, time=expire) def is_infected(self, response_or_positive_hits): if isinstance(response_or_positive_hits, VTResponse): return response_or_positive_hits.positives >= self.config.hits_required return int(response_or_positive_hits) >= self.config.hits_required