def test_database_006_caching_threaded(self): """Cached database connections for threads.""" enable_cached_db_connections() e = threading.Event() with get_db_connection() as conn_1: self.assertEquals(len(saq.database._global_db_cache), 1) conn_1_id = id(conn_1) def f(): enable_cached_db_connections() # this connection should be different than conn_1 with get_db_connection() as conn_2: self.assertEquals(len(saq.database._global_db_cache), 2) self.assertNotEquals(conn_1, conn_2) conn_2_id = id(conn_2) # but asked a second time this should be the same as before with get_db_connection() as conn_3: self.assertEquals(len(saq.database._global_db_cache), 2) self.assertEquals(conn_2_id, id(conn_3)) e.set() disable_cached_db_connections() self.assertEquals(len(saq.database._global_db_cache), 1) t = threading.Thread(target=f) t.start() e.wait() with get_db_connection() as conn_4: self.assertEquals(len(saq.database._global_db_cache), 1) self.assertEquals(conn_1_id, id(conn_4)) disable_cached_db_connections() self.assertEquals(len(saq.database._global_db_cache), 0)
def _create_analysis(url, reprocess, alertable, **kwargs): assert isinstance(url, str) assert isinstance(reprocess, bool) assert isinstance(alertable, bool) assert isinstance(kwargs, dict) sha256_url = hash_url(url) new_entry = False try: with get_db_connection('cloudphish') as db: c = db.cursor() execute_with_retry( c, """INSERT INTO analysis_results ( sha256_url ) VALUES ( UNHEX(%s) )""", (sha256_url, )) db.commit() new_entry = True except pymysql.err.IntegrityError as e: # timing issue -- created as we were getting ready to create # (<class 'pymysql.err.IntegrityError'>--(1062, "Duplicate entry if e.args[0] != 1062: raise e logging.debug("entry for {} already created".format(url)) with get_db_connection('cloudphish') as db: c = db.cursor() # if we didn't just create this then we update the status of the existing entry # we don't need to do this if we just created it because if reprocess or not new_entry: execute_with_retry( c, """UPDATE analysis_results SET status = %s WHERE sha256_url = UNHEX(%s)""", (STATUS_NEW, sha256_url)) try: execute_with_retry( c, """INSERT INTO workload ( sha256_url, url, alertable, details ) VALUES ( UNHEX(%s), %s, %s, %s )""", (sha256_url, url, alertable, pickle.dumps(kwargs))) except pymysql.err.IntegrityError as e: # timing issue -- created as we were getting ready to create # (<class 'pymysql.err.IntegrityError'>--(1062, "Duplicate entry if e.args[0] != 1062: raise e logging.debug("analysis request for {} already exists".format(url)) db.commit() return get_cached_analysis(url)
def download(): url, sha256_url = _get_url_and_hash() content_location = None content_file_name = None # get the sha256_content for this url with get_db_connection() as db: c = db.cursor() c.execute("SELECT HEX(sha256_content) FROM cloudphish_analysis_results WHERE sha256_url = UNHEX(%s)", (sha256_url,)) row = c.fetchone() if row is None: return "Unknown URL", 404 sha256_content = row[0] content_metadata = get_content_metadata(sha256_content) if not content_metadata: return "Unknown content", 404 content_location, content_file_name = content_metadata # is this a different node? if content_location != saq.SAQ_NODE: # get the correct location for the node from the database with get_db_connection() as db: c = db.cursor() c.execute("SELECT location FROM nodes WHERE name = %s", (content_location,)) row = c.fetchone() if row is None: return "node {} no longer exists".format(content_location), 404 # replace the netloc of this url with the new location target_url = url_for('cloudphish.download', s=sha256_url, _external=True) parsed_url = list(urlparse(target_url)) parsed_url[1] = row[0] target_url = urlunparse(parsed_url) logging.debug("sending redirect to {}".format(target_url)) return redirect(target_url) path = os.path.join(saq.DATA_DIR, saq.CONFIG['cloudphish']['cache_dir'], sha256_content[0:2].lower(), sha256_content.lower()) if not os.path.exists(path): return "Unknown content path", 404 return send_from_directory(os.path.dirname(path), os.path.basename(path), as_attachment=True, attachment_filename=content_file_name)
def f(): enable_cached_db_connections() # this connection should be different than conn_1 with get_db_connection() as conn_2: send_test_message(len(saq.database._global_db_cache) == 2) send_test_message(conn_1 != conn_2) conn_2_id = id(conn_2) # but asked a second time this should be the same as before with get_db_connection() as conn_3: send_test_message(len(saq.database._global_db_cache) == 2) send_test_message(conn_2_id == id(conn_3)) disable_cached_db_connections() send_test_message(len(saq.database._global_db_cache) == 1)
def _get_cached_analysis(url): sha256 = hash_url(url) # have we already processed this url? with get_db_connection('cloudphish') as db: c = db.cursor() c.execute( """SELECT ar.status, ar.result, ar.http_result_code, ar.http_message, HEX(ar.sha256_content), cm.location, cm.name FROM analysis_results AS ar LEFT JOIN content_metadata AS cm ON ar.sha256_content = cm.sha256_content WHERE sha256_url = UNHEX(%s)""", (sha256, )) row = c.fetchone() if row: file_name = row[6] if file_name: file_name = file_name.decode() return CloudphishAnalysisResult( RESULT_OK, # result None, # details row[0], # status row[1], # analysis_results row[2], # http_result row[3], # http_message row[4], # sha256_content row[5], # location file_name) return None
def download(): enable_cached_db_connections() sha256 = request.args.get('s', None) if not sha256: return "Invalid request.", 400 if not re.match(r'^[a-fA-F0-9]{64}$', sha256): return "Invalid request.", 400 path = os.path.join(saq.SAQ_HOME, saq.CONFIG['cloudphish']['cache_dir'], sha256[0:2].lower(), sha256.lower()) if not os.path.exists(path): # if we don't have the content see if it's on another node with get_db_connection('cloudphish') as db: c = db.cursor() c.execute( """SELECT location FROM content_metadata WHERE sha256_content = UNHEX(%s)""", (sha256, )) row = c.fetchone() if row: content_location = row[0] # is this a different node? if content_location != saq.CONFIG['engine_cloudphish'][ 'location']: return redirect( 'https://{}/cloudphish/download?s={}'.format( content_location, sha256)) # otherwise we just don't know about it return "Unknown content", 404 return send_from_directory(os.path.dirname(path), os.path.basename(path), as_attachment=True)
def test_retry_function_on_deadlock(self): from saq.database import User, retry_function_on_deadlock with get_db_connection() as db: c = db.cursor() c.execute( "INSERT INTO users ( username, email ) VALUES ( 'user0', 'user0@localhost' )" ) c.execute( "INSERT INTO users ( username, email ) VALUES ( 'user1', 'user1@localhost' )" ) db.commit() lock_user0 = threading.Event() lock_user1 = threading.Event() def _t1(): # acquire lock on user0 saq.db.execute(User.__table__.update().where( User.username == 'user0').values(email='user0@t1')) lock_user0.set() # wait for lock on user1 lock_user1.wait(5) time.sleep(2) # this should fire a deadlock saq.db.execute(User.__table__.update().where( User.username == 'user1').values(email='user1@t1')) saq.db.commit() def _t2(): with get_db_connection() as db: c = db.cursor() lock_user0.wait(5) # acquire lock on user1 c.execute( "UPDATE users SET email = 'user1@t2' WHERE username = '******'" ) lock_user1.set() # this will block waiting for lock on user0 c.execute( "UPDATE users SET email = 'user0@t2' WHERE username = '******'" ) db.commit() t1 = threading.Thread(target=retry_function_on_deadlock, args=(_t1, )) t1.start() t2 = threading.Thread(target=_t2) t2.start() t1.join(5) t2.join(5) self.assertEquals(log_count('DEADLOCK STATEMENT'), 1) self.assertIsNotNone( saq.db.query(User).filter(User.email == 'user0@t1', User.username == 'user0').first()) self.assertIsNotNone( saq.db.query(User).filter(User.email == 'user1@t1', User.username == 'user1').first())
def f(): enable_cached_db_connections() # this connection should be different than conn_1 with get_db_connection() as conn_2: self.assertEquals(len(saq.database._global_db_cache), 2) self.assertNotEquals(conn_1, conn_2) conn_2_id = id(conn_2) # but asked a second time this should be the same as before with get_db_connection() as conn_3: self.assertEquals(len(saq.database._global_db_cache), 2) self.assertEquals(conn_2_id, id(conn_3)) e.set() disable_cached_db_connections() self.assertEquals(len(saq.database._global_db_cache), 1)
def decrypt_password(key): """Returns the decrypted value for the given key.""" from saq.database import get_db_connection with get_db_connection() as db: c = db.cursor() c.execute( """ SELECT `encrypted_value` FROM `encrypted_passwords` WHERE `key` = %s """, (key, )) row = c.fetchone() if row is None: logging.warning(f"request for unknown encrypted password {key}") return None if saq.ENCRYPTION_PASSWORD is not None: from saq.crypto import decrypt_chunk return decrypt_chunk(base64.b64decode(row[0])).decode('utf8') else: logging.debug( f"request to decrypt {key} without decryption key set") return None
def wrapper(*args, **kwargs): with get_db_connection() as db: c = db.cursor() c.execute("""DELETE FROM alerts""") db.commit() return target_function(*args, **kwargs)
def get_restoration_targets(message_ids): """Given a list of message-ids, return a list of tuples of (message_id, recipient) suitable for the unremediate_emails command. The values are discovered by querying the remediation table in the database.""" if not message_ids: return [] result = [] # if ( message-id, recipient ) logging.info("searching for restoration targets for {} message-ids".format( len(message_ids))) with get_db_connection() as db: c = db.cursor() for message_id in message_ids: # TODO create an email_remediation table that has the indexing for message_id, recipient, etc... c.execute( "SELECT DISTINCT(`key`) FROM `remediation` WHERE `type` = 'email' AND `action` = 'remove' AND `key` LIKE %s", (f'{message_id}%', )) for row in c: message_id, recipient = row[0].split(':', 1) result.append((message_id, recipient)) return result
def reset_brocess(self): # clear the brocess db with get_db_connection('brocess') as db: c = db.cursor() c.execute("""DELETE FROM httplog""") c.execute("""DELETE FROM smtplog""") db.commit()
def root_analysis_completed(self, root): if root.delayed: return # mark the analysis as completed try: with get_db_connection('cloudphish') as db: c = db.cursor() c.execute( "UPDATE analysis_results SET status = %s WHERE sha256_url = UNHEX(%s)", ( STATUS_ANALYZED, root.details[KEY_DETAILS_SHA256_URL], )) db.commit() except Exception as e: logging.error("unable to update database: {}".format(e)) report_exception() # delete the work directory if not self.keep_work_dir: try: shutil.rmtree(root.storage_dir) except Exception as e: logging.error("unable to delete work directory {}: {}".format( root.storage_dir, e)) report_exception()
def collect(self): # grab the next urls to process results = None with get_db_connection('cloudphish') as db: c = db.cursor() # go ahead and allocate a batch of URLs to process c.execute( """UPDATE workload SET node = %s WHERE sha256_url IN ( SELECT sha256_url FROM ( SELECT sha256_url FROM workload WHERE node IS NULL OR node = %s ORDER BY node, insert_date ASC LIMIT {}) as t)""" .format(self.analysis_pool_size), (self.node, self.node)) db.commit() c.execute( "SELECT HEX(sha256_url), url, alertable, details FROM workload WHERE node = %s ORDER BY insert_date DESC", (self.node, )) results = c.fetchall() #logging.debug("got {} urls from database".format(len(results))) if not results: # XXX temp hack try: time.sleep(1) except Exception as e: pass #logging.debug("no work available") return # process each url url_hash_list = [] for sha256_url, url, alertable, details in results: logging.info("adding url {} (alertable {}) to workload".format( url, alertable)) self.add_work_item( (url, alertable, pickle.loads(details) if details else {})) url_hash_list.append(sha256_url) with get_db_connection('cloudphish') as db: c = db.cursor() logging.debug("deleting {} entries from workload table".format( len(url_hash_list))) for sha256_url in url_hash_list: c.execute("DELETE FROM workload WHERE sha256_url = UNHEX(%s)", (sha256_url, )) db.commit()
def setUp(self, *args, **kwargs): super().setUp(*args, **kwargs) with get_db_connection() as db: c = db.cursor() c.execute("DELETE FROM cloudphish_analysis_results") db.commit() self.start_http_server()
def disposition_watch_execute(self, alert_id): with get_db_connection() as db: c = db.cursor() c.execute("SELECT disposition FROM alerts WHERE id = %s", (alert_id, )) (disposition, ) = c.fetchone() if disposition: self.cancel_analysis()
def delete_password(key): """Deletes the given password from the database. Returns True if the password was deleted.""" from saq.database import get_db_connection with get_db_connection() as db: c = db.cursor() c.execute("DELETE FROM `encrypted_passwords` WHERE `key` = %s", (key, )) db.commit() return c.rowcount == 1
def test_database_007_caching_processes(self): """Cached database connections for processes.""" enable_cached_db_connections() with get_db_connection() as conn_1: self.assertEquals(len(saq.database._global_db_cache), 1) conn_1_id = id(conn_1) def f(): enable_cached_db_connections() # this connection should be different than conn_1 with get_db_connection() as conn_2: send_test_message(len(saq.database._global_db_cache) == 2) send_test_message(conn_1 != conn_2) conn_2_id = id(conn_2) # but asked a second time this should be the same as before with get_db_connection() as conn_3: send_test_message(len(saq.database._global_db_cache) == 2) send_test_message(conn_2_id == id(conn_3)) disable_cached_db_connections() send_test_message(len(saq.database._global_db_cache) == 1) p = multiprocessing.Process(target=f) p.start() self.assertTrue( recv_test_message()) # len(saq.database._global_db_cache) == 2 self.assertTrue(recv_test_message()) # conn_1 != conn_2 self.assertTrue( recv_test_message()) # len(saq.database._global_db_cache) == 2 self.assertTrue(recv_test_message()) # conn_2_id == id(conn_3) self.assertTrue( recv_test_message()) # len(saq.database._global_db_cache) == 1 p.join() with get_db_connection() as conn_4: self.assertEquals(len(saq.database._global_db_cache), 1) self.assertEquals(conn_1_id, id(conn_4)) disable_cached_db_connections() self.assertEquals(len(saq.database._global_db_cache), 0)
def add_httplog(fqdn): with get_db_connection('brocess') as db: c = db.cursor() for fqdn_part in iterate_fqdn_parts(fqdn): c.execute( """INSERT INTO httplog ( host, numconnections, firstconnectdate ) VALUES ( LOWER(%s), 1, UNIX_TIMESTAMP(NOW()) ) ON DUPLICATE KEY UPDATE numconnections = numconnections + 1""", (fqdn_part, )) db.commit()
def test_execute_with_retry_commit(self): _uuid = str(uuid.uuid4()) _lock_uuid = str(uuid.uuid4()) disable_cached_db_connections() # simple insert statement with commit option with get_db_connection() as db: c = db.cursor() execute_with_retry( db, c, 'INSERT INTO locks ( uuid, lock_time ) VALUES ( %s, NOW() )', (_uuid, ), commit=True) # check it on another connection with get_db_connection() as db: c = db.cursor() c.execute("SELECT uuid FROM locks WHERE uuid = %s", (_uuid, )) self.assertIsNotNone(c.fetchone()) _uuid = str(uuid.uuid4()) _lock_uuid = str(uuid.uuid4()) # and then this one should fail since we did not commit it with get_db_connection() as db: c = db.cursor() execute_with_retry( db, c, 'INSERT INTO locks ( uuid, lock_time ) VALUES ( %s, NOW() )', (_uuid, ), commit=False) with get_db_connection() as db: c = db.cursor() c.execute("SELECT uuid FROM locks WHERE uuid = %s", (_uuid, )) self.assertIsNone(c.fetchone()) enable_cached_db_connections()
def reset_cloudphish(self): # clear cloudphish db with get_db_connection('cloudphish') as db: c = db.cursor() c.execute("""DELETE FROM analysis_results""") c.execute("""DELETE FROM content_metadata""") c.execute("""DELETE FROM workload""") db.commit() with get_db_connection('brocess') as db: c = db.cursor() c.execute("""DELETE FROM httplog""") db.commit() # clear cloudphish engine and module cache for cache_dir in [ saq.CONFIG['cloudphish']['cache_dir'], saq.CONFIG['analysis_module_cloudphish']['local_cache_dir'] ]: if os.path.isdir(cache_dir): shutil.rmtree(cache_dir) os.makedirs(cache_dir)
def query_brocess_by_fqdn(fqdn): with get_db_connection('brocess') as connection: cursor = connection.cursor() cursor.execute( 'SELECT SUM(numconnections) FROM httplog WHERE host = %s', (fqdn, )) for row in cursor: count = row[0] return int(count) if count is not None else 0 raise RuntimeError( "failed to return a row for sum() query operation !?")
def query_brocess_by_dest_ipv4(ipv4): with get_db_connection('brocess') as connection: cursor = connection.cursor() cursor.execute( 'SELECT SUM(numconnections) FROM connlog WHERE destip = INET_ATON(%s)', (ipv4, )) for row in cursor: count = row[0] return int(count) if count is not None else 0 raise RuntimeError( "failed to return a row for sum() query operation !?")
def query_brocess_by_source_email(source_email_address): with get_db_connection('brocess') as connection: cursor = connection.cursor() cursor.execute( 'SELECT SUM(numconnections) FROM smtplog WHERE source = %s', (source_email_address, )) for row in cursor: count = row[0] return int(count) if count is not None else 0 raise RuntimeError( "failed to return a row for sum() query operation !?")
def execute_analysis(self, observable): # create analysis object and add it to the observable analysis = self.create_analysis(observable) # create hal9000 observable list for use in post analysis if not hasattr(self.root, 'hal9000_observables'): self.root.hal9000_observables = set( ) # of the md5 hashes of the observables # get the id of the observable md5_hasher = md5() md5_hasher.update(observable.type.encode('utf-8', errors='ignore')) md5_hasher.update(observable.value.encode('utf-8', errors='ignore')) id = md5_hasher.hexdigest() logging.debug("id = {}".format(id)) # append id to hal9000 observables list so we can insert it during post processing self.root.hal9000_observables.add(id) # connect to database with get_db_connection('hal9000') as db: c = db.cursor() # lookup the mal frequency of this observavble c.execute( """ SELECT mal_count, total_count FROM observables WHERE id = UNHEX(%s) """, (id)) result = c.fetchone() db.commit() if result is not None: analysis.mal_count = result[0] analysis.total_count = result[1] logging.debug("Malicious Frequency Analysis {}/{} ({}%)".format( analysis.mal_count, analysis.total_count, analysis.mal_percent)) # if we have seen this indicator enough times to use as a flag if analysis.total_count > self.min_sample_size: # flag observable if its malicous percent is greater than the configured threshold if analysis.mal_percent >= self.mal_threshold: observable.add_tag('high_mal_frequency') # flag observable as false positive if mal_percent is less than configured threshold if analysis.mal_percent < self.fp_threshold: observable.add_tag('high_fp_frequency') return True
def _t2(): with get_db_connection() as db: c = db.cursor() lock_user0.wait(5) # acquire lock on user1 c.execute( "UPDATE users SET email = 'user1@t2' WHERE username = '******'" ) lock_user1.set() # this will block waiting for lock on user0 c.execute( "UPDATE users SET email = 'user0@t2' WHERE username = '******'" ) db.commit()
def setUp(self, *args, **kwargs): super().setUp(*args, **kwargs) with get_db_connection() as db: c = db.cursor() c.execute("DELETE FROM work_distribution_groups") c.execute("DELETE FROM incoming_workload") c.execute("DELETE FROM workload") c.execute( "UPDATE nodes SET last_update = SUBTIME(NOW(), '01:00:00')") db.commit() # default engines to support any analysis mode saq.CONFIG['engine']['local_analysis_modes'] = ''
def encrypt_password(key, value): """Stores sensitive data as an encrypted value.""" encrypted_value = base64.b64encode(encrypt_chunk(value.encode('utf8'))) from saq.database import get_db_connection with get_db_connection() as db: c = db.cursor() c.execute( """ INSERT INTO `encrypted_passwords` ( `key`, `encrypted_value` ) VALUES ( %s, %s ) ON DUPLICATE KEY UPDATE `encrypted_value` = %s""", (key, encrypted_value, encrypted_value)) db.commit()
def clear_alert(): enable_cached_db_connections() url = request.values.get('url', None) sha256 = request.values.get('sha256', None) if not url and not sha256: return "Invalid request (missing url or sha256.)", 400 if url: url = url.encode('ascii', errors='ignore').decode('ascii') if not sha256: sha256 = hash_url(url) if not sha256: return "Invalid request.", 400 if not re.match(r'^[a-fA-F0-9]{64}$', sha256): return "Invalid request (not a valid hash.)", 400 row_count = 0 with get_db_connection('cloudphish') as db: c = db.cursor() c.execute( """SELECT HEX(sha256_content) FROM analysis_results WHERE sha256_url = UNHEX(%s)""", (sha256, )) row = c.fetchone() if row: sha256_content = row[0] c.execute( """UPDATE analysis_results SET result = 'CLEAR' WHERE sha256_content = UNHEX(%s)""", (sha256_content, )) row_count = c.rowcount db.commit() else: logging.warning("missing url {} (sha256 {})".format(url, sha256)) logging.info( "request to clear cloudphish alert for {} row_count {}".format( url if url else sha256, row_count)) response = make_response( json.dumps({ 'result': 'OK', 'row_count': row_count })) response.mime_type = 'application/json' response.headers['Access-Control-Allow-Origin'] = '*' return response, 200
def test_cloudphish_engine_007_http_clear_alert(self): url = 'http://valvoline.com/' json_data = self.generate_cloudphish_alert(url) # there should be one result in the database with get_db_connection('cloudphish') as db: c = db.cursor() c.execute( """SELECT COUNT(*) FROM analysis_results WHERE result = 'ALERT'""" ) result = c.fetchone() self.assertEquals(result[0], 1) result = self.client.get('/cloudphish/clear_alert?{}'.format( urlencode({'url': url}))) # now there should be zero with get_db_connection('cloudphish') as db: c = db.cursor() c.execute( """SELECT COUNT(*) FROM analysis_results WHERE result = 'ALERT'""" ) result = c.fetchone() self.assertEquals(result[0], 0)