def test_database_006_caching_threaded(self): """Cached database connections for threads.""" enable_cached_db_connections() e = threading.Event() with get_db_connection() as conn_1: self.assertEquals(len(saq.database._global_db_cache), 1) conn_1_id = id(conn_1) def f(): enable_cached_db_connections() # this connection should be different than conn_1 with get_db_connection() as conn_2: self.assertEquals(len(saq.database._global_db_cache), 2) self.assertNotEquals(conn_1, conn_2) conn_2_id = id(conn_2) # but asked a second time this should be the same as before with get_db_connection() as conn_3: self.assertEquals(len(saq.database._global_db_cache), 2) self.assertEquals(conn_2_id, id(conn_3)) e.set() disable_cached_db_connections() self.assertEquals(len(saq.database._global_db_cache), 1) t = threading.Thread(target=f) t.start() e.wait() with get_db_connection() as conn_4: self.assertEquals(len(saq.database._global_db_cache), 1) self.assertEquals(conn_1_id, id(conn_4)) disable_cached_db_connections() self.assertEquals(len(saq.database._global_db_cache), 0)
def submit(): enable_cached_db_connections() url = request.values.get('url', None) if not url: return "Invalid request.", 400 # XXX this is a hack but urls should be all ASCII anyways # so technically this changes the sha256 hash we get out of it but that's OK # because if it's not ASCII it's not a valid anyways url = url.encode('ascii', errors='ignore').decode('ascii') reprocess = True if request.values.get('r', None) == '1' else False alertable = True if request.values.get('a', None) == '1' else False details = {} # to support any future changes we just store all of the variables that were passed in for key in request.values.keys(): if key not in ['a', 'r', 'url']: details[key] = request.values.get(key) logging.info("received submission for {} reprocess {} alertable {}".format( url, reprocess, alertable)) result = analyze_url(url, reprocess, alertable, **details) logging.debug("returning result {} for {}".format(result, url)) response = make_response( json.dumps(result.json(), sort_keys=True, indent=4)) response.mime_type = 'application/json' return response, 200
def loop(self): enable_cached_db_connections() while True: try: result = self.execute() # if we did something then we immediately look for more work unless we're shutting down if result == WORK_SUBMITTED: if self.shutdown_event.is_set(): break # if were was no work available to be submitted then wait a second and look again elif result == NO_WORK_AVAILABLE: if self.shutdown_event.wait(1): break # if there were no NODES available then wait a little while longer and look again elif result == NO_NODES_AVAILABLE: if self.shutdown_event.wait( self.node_status_update_frequency / 2): break elif result == NO_WORK_SUBMITTED: if self.shutdown_event.wait(1): break except Exception as e: logging.error( "unexpected exception thrown in loop for {}: {}".format( self, e)) report_exception() if self.shutdown_event.wait(1): break disable_cached_db_connections()
def download(): enable_cached_db_connections() sha256 = request.args.get('s', None) if not sha256: return "Invalid request.", 400 if not re.match(r'^[a-fA-F0-9]{64}$', sha256): return "Invalid request.", 400 path = os.path.join(saq.SAQ_HOME, saq.CONFIG['cloudphish']['cache_dir'], sha256[0:2].lower(), sha256.lower()) if not os.path.exists(path): # if we don't have the content see if it's on another node with get_db_connection('cloudphish') as db: c = db.cursor() c.execute( """SELECT location FROM content_metadata WHERE sha256_content = UNHEX(%s)""", (sha256, )) row = c.fetchone() if row: content_location = row[0] # is this a different node? if content_location != saq.CONFIG['engine_cloudphish'][ 'location']: return redirect( 'https://{}/cloudphish/download?s={}'.format( content_location, sha256)) # otherwise we just don't know about it return "Unknown content", 404 return send_from_directory(os.path.dirname(path), os.path.basename(path), as_attachment=True)
def clear_alert(): enable_cached_db_connections() url = request.values.get('url', None) sha256 = request.values.get('sha256', None) if not url and not sha256: return "Invalid request (missing url or sha256.)", 400 if url: url = url.encode('ascii', errors='ignore').decode('ascii') if not sha256: sha256 = hash_url(url) if not sha256: return "Invalid request.", 400 if not re.match(r'^[a-fA-F0-9]{64}$', sha256): return "Invalid request (not a valid hash.)", 400 row_count = 0 with get_db_connection('cloudphish') as db: c = db.cursor() c.execute( """SELECT HEX(sha256_content) FROM analysis_results WHERE sha256_url = UNHEX(%s)""", (sha256, )) row = c.fetchone() if row: sha256_content = row[0] c.execute( """UPDATE analysis_results SET result = 'CLEAR' WHERE sha256_content = UNHEX(%s)""", (sha256_content, )) row_count = c.rowcount db.commit() else: logging.warning("missing url {} (sha256 {})".format(url, sha256)) logging.info( "request to clear cloudphish alert for {} row_count {}".format( url if url else sha256, row_count)) response = make_response( json.dumps({ 'result': 'OK', 'row_count': row_count })) response.mime_type = 'application/json' response.headers['Access-Control-Allow-Origin'] = '*' return response, 200
def debug(self): # if we're starting and we haven't loaded any groups yet then go ahead and load them here if not self.remote_node_groups: self.load_groups() enable_cached_db_connections() try: self.debug_extended_collection() except NotImplementedError: pass self.execute() self.execute_workload_cleanup() disable_cached_db_connections()
def f(): enable_cached_db_connections() # this connection should be different than conn_1 with get_db_connection() as conn_2: send_test_message(len(saq.database._global_db_cache) == 2) send_test_message(conn_1 != conn_2) conn_2_id = id(conn_2) # but asked a second time this should be the same as before with get_db_connection() as conn_3: send_test_message(len(saq.database._global_db_cache) == 2) send_test_message(conn_2_id == id(conn_3)) disable_cached_db_connections() send_test_message(len(saq.database._global_db_cache) == 1)
def f(): enable_cached_db_connections() # this connection should be different than conn_1 with get_db_connection() as conn_2: self.assertEquals(len(saq.database._global_db_cache), 2) self.assertNotEquals(conn_1, conn_2) conn_2_id = id(conn_2) # but asked a second time this should be the same as before with get_db_connection() as conn_3: self.assertEquals(len(saq.database._global_db_cache), 2) self.assertEquals(conn_2_id, id(conn_3)) e.set() disable_cached_db_connections() self.assertEquals(len(saq.database._global_db_cache), 1)
def download_alert(): enable_cached_db_connections() sha256 = request.args.get('s', None) if not sha256: return "Invalid request.", 400 if not re.match(r'^[a-fA-F0-9]{64}$', sha256): return "Invalid request.", 400 path = os.path.join(saq.SAQ_HOME, saq.CONFIG['cloudphish']['cache_dir'], sha256[0:2].lower(), sha256.lower()) if not os.path.exists(path): # if we don't have the content see if it's on another node with get_db_connection('cloudphish') as db: c = db.cursor() c.execute( """SELECT location FROM content_metadata WHERE sha256_content = UNHEX(%s)""", (sha256, )) row = c.fetchone() if row: content_location = row[0] # is this a different node? if content_location != saq.CONFIG['engine_cloudphish'][ 'location']: return redirect( 'https://{}/cloudphish/download_alert?s={}'.format( content_location, sha256)) # otherwise we just don't know about it return "Unknown content", 404 ace_path = '{}.ace.tar.gz'.format(path) if not os.path.exists(ace_path): return "No alert data.", 404 def return_alert(): with open(ace_path, 'rb') as fp: while True: _buffer = fp.read(io.DEFAULT_BUFFER_SIZE) if _buffer == b'': break yield _buffer return Response(return_alert(), mimetype='application/gzip')
def disposition_watch_loop(self, alert_id): enable_cached_db_connections() while not self.shutdown and not self.cancel_analysis_flag and not self.analysis_ended_flag: try: self.disposition_watch_execute(alert_id) time.sleep(5) except Exception as e: logging.error("unable to check disposition of {}: {}".format( alert_id, e)) report_exception() return disable_cached_db_connections() logging.debug("exiting disposition watch")
def test_database_005_caching(self): from saq.database import _cached_db_connections_enabled self.assertFalse(_cached_db_connections_enabled()) enable_cached_db_connections() self.assertTrue(_cached_db_connections_enabled()) with get_db_connection() as db: pass # we should have one database connection ready self.assertEquals(len(saq.database._global_db_cache), 1) disable_cached_db_connections() self.assertFalse(_cached_db_connections_enabled()) # we should have zero database connection ready self.assertEquals(len(saq.database._global_db_cache), 0) self.assertEquals(len(saq.database._use_cache_flags), 0)
def loop(self): enable_cached_db_connections() while True: try: self.execute() except Exception as e: logging.error( "unexpected exception thrown during loop for {}: {}". format(self, e)) report_exception() if self.service_shutdown_event.wait(1): break if self.is_service_shutdown: break disable_cached_db_connections()
def cleanup_loop(self): logging.debug("starting cleanup loop") enable_cached_db_connections() while True: wait_time = 1 try: if self.execute_workload_cleanup() > 0: wait_time = 0 except Exception as e: logging.exception(f"unable to execute workload cleanup: {e}") if self.service_shutdown_event.wait(wait_time): break disable_cached_db_connections() logging.debug("exited cleanup loop")
def initialize_collection(self): super().initialize_collection() enable_cached_db_connections() if not os.path.exists(self.incomplete_analysis_path): return logging.info("reading incomplete analysis from {}".format( self.incomplete_analysis_path)) with open(self.incomplete_analysis_path, 'r') as fp: for line in fp: uuid, storage_dir, _id = line.strip().split('\t') self.incomplete_analysis.append( AnalysisRequest(uuid, storage_dir, _id)) logging.info("loaded {} incomplete analysis requests".format( len(self.incomplete_analysis))) os.remove(self.incomplete_analysis_path)
def test_database_007_caching_processes(self): """Cached database connections for processes.""" enable_cached_db_connections() with get_db_connection() as conn_1: self.assertEquals(len(saq.database._global_db_cache), 1) conn_1_id = id(conn_1) def f(): enable_cached_db_connections() # this connection should be different than conn_1 with get_db_connection() as conn_2: send_test_message(len(saq.database._global_db_cache) == 2) send_test_message(conn_1 != conn_2) conn_2_id = id(conn_2) # but asked a second time this should be the same as before with get_db_connection() as conn_3: send_test_message(len(saq.database._global_db_cache) == 2) send_test_message(conn_2_id == id(conn_3)) disable_cached_db_connections() send_test_message(len(saq.database._global_db_cache) == 1) p = multiprocessing.Process(target=f) p.start() self.assertTrue( recv_test_message()) # len(saq.database._global_db_cache) == 2 self.assertTrue(recv_test_message()) # conn_1 != conn_2 self.assertTrue( recv_test_message()) # len(saq.database._global_db_cache) == 2 self.assertTrue(recv_test_message()) # conn_2_id == id(conn_3) self.assertTrue( recv_test_message()) # len(saq.database._global_db_cache) == 1 p.join() with get_db_connection() as conn_4: self.assertEquals(len(saq.database._global_db_cache), 1) self.assertEquals(conn_1_id, id(conn_4)) disable_cached_db_connections() self.assertEquals(len(saq.database._global_db_cache), 0)
def test_execute_with_retry_commit(self): _uuid = str(uuid.uuid4()) _lock_uuid = str(uuid.uuid4()) disable_cached_db_connections() # simple insert statement with commit option with get_db_connection() as db: c = db.cursor() execute_with_retry( db, c, 'INSERT INTO locks ( uuid, lock_time ) VALUES ( %s, NOW() )', (_uuid, ), commit=True) # check it on another connection with get_db_connection() as db: c = db.cursor() c.execute("SELECT uuid FROM locks WHERE uuid = %s", (_uuid, )) self.assertIsNotNone(c.fetchone()) _uuid = str(uuid.uuid4()) _lock_uuid = str(uuid.uuid4()) # and then this one should fail since we did not commit it with get_db_connection() as db: c = db.cursor() execute_with_retry( db, c, 'INSERT INTO locks ( uuid, lock_time ) VALUES ( %s, NOW() )', (_uuid, ), commit=False) with get_db_connection() as db: c = db.cursor() c.execute("SELECT uuid FROM locks WHERE uuid = %s", (_uuid, )) self.assertIsNone(c.fetchone()) enable_cached_db_connections()
def extended_collection_wrapper(self): enable_cached_db_connections() try: self.extended_collection() finally: disable_cached_db_connections()