def test_object_score_low_duplication(self): obj1 = { 'field1': 'value', 'field2': 'value1', 'field3': 'value2', 'field4': 'value3', 'field5': 'value4' } obj2 = { 'field1': str(uuid.uuid4()), 'field2': str(uuid.uuid4()), 'field3': str(uuid.uuid4()), 'field4': str(uuid.uuid4()), 'field5': str(uuid.uuid4()) } ioc = GreaseContainer() parent1 = ioc.getCollection('test_scoring').insert_one({ 'expiry': Deduplication.generate_expiry_time(1), 'max_expiry': Deduplication.generate_max_expiry_time(1), 'type': 1, 'score': 1, 'source': 'test_source', 'hash': Deduplication.generate_hash_from_obj(obj1) }).inserted_id score1 = Deduplication.object_field_score('test_scoring', ioc, 'test_source', 'test_configuration', obj1, parent1, 1, 1) parent2 = ioc.getCollection('test_scoring').insert_one({ 'expiry': Deduplication.generate_expiry_time(1), 'max_expiry': Deduplication.generate_max_expiry_time(1), 'type': 1, 'score': 1, 'source': 'test_source', 'hash': Deduplication.generate_hash_from_obj(obj2) }).inserted_id score2 = Deduplication.object_field_score('test_scoring', ioc, 'test_source', 'test_configuration', obj2, parent2, 1, 1) print("++++++++++++++++++++++++++++++++++") print("score1: {0}".format(score1)) print("score2: {0}".format(score2)) print("++++++++++++++++++++++++++++++++++") self.assertTrue(score1 == 0.0) self.assertTrue(score2 <= 20.0) ioc.getCollection('test_scoring').drop() time.sleep(1.5)
def test_empty_source_schedule(self): ioc = GreaseContainer() sch = Scheduling(ioc) jServer = ioc.getCollection('JobServer') jID = jServer.insert_one({ 'jobs': 0, 'os': platform.system().lower(), 'roles': ["general"], 'prototypes': ["detect"], 'active': True, 'activationTime': datetime.datetime.utcnow() }).inserted_id time.sleep(1.5) self.assertFalse(sch.scheduleDetection('test', 'test_conf', [])) jServer.delete_one({'_id': ObjectId(jID)}) ioc.getCollection('SourceData').drop()
def test_prototype_execution(self): ioc = GreaseContainer() cmd = DaemonProcess(ioc) # add search path fil = open(ioc.getConfig().greaseConfigFile, 'r') data = json.loads(fil.read()) fil.close() fil = open(ioc.getConfig().greaseConfigFile, 'w') data['Import']['searchPath'].append('tgt_grease.router.Commands.tests') fil.write(json.dumps(data, sort_keys=True, indent=4)) fil.close() Configuration.ReloadConfig() # Update Node to run it ioc.getCollection('JobServer')\ .update_one( {'_id': ObjectId(ioc.getConfig().NodeIdentity)}, { '$set': { 'prototypes': ['TestProtoType'] } } ) # Sleeps are because mongo in Travis is slow sometimes to persist data time.sleep(1.5) self.assertTrue(cmd.server()) self.assertTrue(cmd.drain_jobs(ioc.getCollection('JobQueue'))) # ensure jobs drain out time.sleep(1.5) self.assertEqual( ioc.getCollection('TestProtoType').find({ 'runs': { '$exists': True } }).count(), 10) # clean up fil = open(ioc.getConfig().greaseConfigFile, 'r') data = json.loads(fil.read()) fil.close() # remove collection ioc.getCollection('TestProtoType').drop() # pop search path trash = data['Import']['searchPath'].pop() # close out fil = open(ioc.getConfig().greaseConfigFile, 'w') fil.write(json.dumps(data, sort_keys=True, indent=4)) fil.close() ioc.getCollection('JobServer') \ .update_one( {'_id': ObjectId(ioc.getConfig().NodeIdentity)}, { '$set': { 'prototypes': [] } } )
def test_deduplicate_object(self): ioc = GreaseContainer() ioc.getConfig().set('verbose', True, 'Logging') obj = [{ 'field': 'var', 'field1': 'var1', 'field2': 'var2', 'field3': 'var3', 'field4': 'var4', 'field5': 'var5', }, { 'field': 'var', 'field1': 'var1', 'field2': 'var2', 'field3': 'var3', 'field4': 'var4', 'field5': 'var5', }, { 'field': str(uuid.uuid4()), 'field1': 'var1', 'field2': str(uuid.uuid4()), 'field3': str(uuid.uuid4()), 'field4': 'var4', 'field5': str(uuid.uuid4()), }] finalObj = [] Deduplication.deduplicate_object(ioc, obj[0], 1, 1, 40.0, 'test_source', 'test_configuration', finalObj, 'test_source') self.assertEqual(len(finalObj), 1) Deduplication.deduplicate_object(ioc, obj[1], 1, 1, 40.0, 'test_source', 'test_configuration', finalObj, 'test_source') self.assertEqual(len(finalObj), 1) Deduplication.deduplicate_object(ioc, obj[2], 1, 1, 40.0, 'test_source', 'test_configuration', finalObj, 'test_source') self.assertGreaterEqual(len(finalObj), 1) ioc.getConfig().set('verbose', False, 'Logging') ioc.getCollection('test_source').drop() time.sleep(1.5)
class BridgeCommand(object): """Methods for Cluster Administration Attributes: imp (ImportTool): Import Tool Instance monitor (NodeMonitoring): Node Monitoring Model Instance """ def __init__(self, ioc=None): if isinstance(ioc, GreaseContainer): self.ioc = ioc else: self.ioc = GreaseContainer() self.imp = ImportTool(self.ioc.getLogger()) self.monitor = NodeMonitoring(self.ioc) def action_register(self): """Ensures Registration of server Returns: bool: Registration status """ self.ioc.getLogger().debug("Registration Requested") if self.ioc.ensureRegistration(): print("Registration Complete!") self.ioc.getLogger().info("Registration Completed Successfully") return True print("Registration Failed!") self.ioc.getLogger().info("Registration Failed") return False def action_info(self, node=None, jobs=None, prototypeJobs=None): """Gets Node Information Args: node (str): MongoDB Object ID to get information about jobs (bool): If true then will retrieve jobs executed by this node prototypeJobs (bool): If true then prototype jobs will be printed as well Note: provide a node argument via the CLI --node=4390qwr2fvdew458239 Note: provide a jobs argument via teh CLI --jobs Note: provide a prototype jobs argument via teh CLI --pJobs Returns: bool: If Info was found """ if not self.ioc.ensureRegistration(): self.ioc.getLogger().error("Server not registered with MongoDB") print("Unregistered servers cannot talk to the cluster") return False valid, serverId = self.valid_server(node) if not valid: print("Invalid ObjectID") return False server = self.ioc.getCollection('JobServer').find_one( {'_id': ObjectId(str(serverId))}) if server: server = dict(server) print(""" <<<<<<<<<<<<<< SERVER: {0} >>>>>>>>>>>>>> Activation State: {1} Date: {2} Jobs: {3} Operating System: {4} Prototypes: {5} Execution Roles: {6} """.format(server.get('_id'), server.get('active'), server.get('activationTime'), server.get('jobs'), server.get('os'), server.get('prototypes'), server.get('roles'))) if jobs and prototypeJobs: print( "======================= SOURCING =======================") for job in self.ioc.getCollection('SourceData').find( {'grease_data.sourcing.server': ObjectId(serverId)}): print( """ ------------------------------- Job: {0} ------------------------------- """, job['_id']) if jobs and prototypeJobs: print( "======================= DETECTION =======================" ) for job in self.ioc.getCollection('SourceData').find( {'grease_data.detection.server': ObjectId(serverId)}): print(""" ------------------------------- Job: {0} Start Time: {1} End Time: {2} Context: {3} ------------------------------- """.format(job['_id'], job['grease_data']['detection']['start'], job['grease_data']['detection']['end'], job['grease_data']['detection']['detection'])) if jobs and prototypeJobs: print( "======================= SCHEDULING =======================" ) for job in self.ioc.getCollection('SourceData').find( {'grease_data.scheduling.server': ObjectId(serverId)}): print(""" ------------------------------- Job: {0} Start Time: {1} End Time: {2} ------------------------------- """.format(job['_id'], job['grease_data']['scheduling']['start'], job['grease_data']['scheduling']['end'])) if jobs: print( "======================= EXECUTION =======================" ) for job in self.ioc.getCollection('SourceData').find( {'grease_data.execution.server': ObjectId(serverId)}): print(""" ------------------------------- Job: {0} Assignment Time: {1} Completed Time: {2} Execution Success: {3} Command Success: {4} Failures: {5} Return Data: {6} ------------------------------- """.format( job['_id'], job['grease_data']['execution']['assignmentTime'], job['grease_data']['execution']['completeTime'], job['grease_data']['execution']['executionSuccess'], job['grease_data']['execution']['commandSuccess'], job['grease_data']['execution']['failures'], job['grease_data']['execution']['returnData'])) return True print("Unable to locate server") self.ioc.getLogger().error( "Unable to load [{0}] server for information".format(serverId)) return False def action_assign(self, prototype=None, role=None, node=None): """Assign prototypes/roles to a node either local or remote Args: prototype (str): Prototype Job to assign role (str): Role to assign node (str): MongoDB ObjectId of node to assign to, if not provided will default to the local node Returns: bool: If successful true else false """ assigned = False if prototype: job = self.imp.load(str(prototype)) if not job or not isinstance(job, Command): print( "Cannot find prototype [{0}] to assign check search path!". format(prototype)) self.ioc.getLogger().error( "Cannot find prototype [{0}] to assign check search path!". format(prototype)) return False # Cleanup job job.__del__() del job valid, serverId = self.valid_server(node) if not valid: print("Invalid ObjectID") return False updated = self.ioc.getCollection('JobServer').update_one( { '_id': ObjectId(serverId) }, { '$addToSet': { 'prototypes': prototype } }).acknowledged if updated: print("Prototype Assigned") self.ioc.getLogger().info( "Prototype [{0}] assigned to server [{1}]".format( prototype, serverId)) assigned = True else: print("Prototype Assignment Failed!") self.ioc.getLogger().info( "Prototype [{0}] assignment failed to server [{1}]".format( prototype, serverId)) return False if role: valid, serverId = self.valid_server(node) if not valid: print("Invalid ObjectID") return False updated = self.ioc.getCollection('JobServer').update_one( { '_id': ObjectId(serverId) }, { '$push': { 'roles': role } }).acknowledged if updated: print("Role Assigned") self.ioc.getLogger().info( "Role [{0}] assigned to server [{1}]".format( prototype, serverId)) assigned = True else: print("Role Assignment Failed!") self.ioc.getLogger().info( "Role [{0}] assignment failed to server [{1}]".format( prototype, serverId)) return False if not assigned: print("Assignment failed, please check logs for details") return assigned def action_unassign(self, prototype=None, role=None, node=None): """Unassign prototypes to a node either local or remote Args: prototype (str): Prototype Job to unassign role (str): Role to unassign node (str): MongoDB ObjectId of node to unassign to, if not provided will default to the local node Returns: bool: If successful true else false """ unassigned = False if prototype: job = self.imp.load(str(prototype)) if not job or not isinstance(job, Command): print( "Cannot find prototype [{0}] to unassign check search path!" .format(prototype)) self.ioc.getLogger().error( "Cannot find prototype [{0}] to unassign check search path!" .format(prototype)) return False # Cleanup job job.__del__() del job valid, serverId = self.valid_server(node) if not valid: print("Invalid ObjectID") return False updated = self.ioc.getCollection('JobServer').update_one( { '_id': ObjectId(serverId) }, { '$pull': { 'prototypes': prototype } }).acknowledged if updated: print("Prototype Assignment Removed") self.ioc.getLogger().info( "Prototype [{0}] unassigned from server [{1}]".format( prototype, serverId)) unassigned = True else: print("Prototype Unassignment Failed!") self.ioc.getLogger().info( "Prototype [{0}] unassignment failed from server [{1}]". format(prototype, serverId)) return False if role: valid, serverId = self.valid_server(node) if not valid: print("Invalid ObjectID") return False updated = self.ioc.getCollection('JobServer').update_one( { '_id': ObjectId(serverId) }, { '$pull': { 'roles': role } }).acknowledged if updated: print("Role Removed") self.ioc.getLogger().info( "Role [{0}] removed to server [{1}]".format( prototype, serverId)) unassigned = True else: print("Role Removal Failed!") self.ioc.getLogger().info( "Role [{0}] removal failed to server [{1}]".format( prototype, serverId)) return False if not unassigned: print("Unassignment failed, please check logs for details") return unassigned def action_cull(self, node=None): """Culls a server from the active cluster Args: node (str): MongoDB ObjectId to cull; defaults to local node """ if not self.ioc.ensureRegistration(): self.ioc.getLogger().error("Server not registered with MongoDB") print("Unregistered servers cannot talk to the cluster") return False valid, serverId = self.valid_server(node) if not valid: print("Invalid ObjectID") return False if not self.monitor.deactivateServer(serverId): self.ioc.getLogger().error( "Failed deactivating server [{0}]".format(serverId)) print("Failed deactivating server [{0}]".format(serverId)) return False self.ioc.getLogger().warning( "Server [{0}] preparing to reallocate detect jobs".format( serverId)) if not self.monitor.rescheduleDetectJobs(serverId): self.ioc.getLogger().error( "Failed rescheduling detect jobs [{0}]".format(serverId)) print("Failed rescheduling detect jobs [{0}]".format(serverId)) return False self.ioc.getLogger().warning( "Server [{0}] preparing to reallocate schedule jobs".format( serverId)) if not self.monitor.rescheduleScheduleJobs(serverId): self.ioc.getLogger().error( "Failed rescheduling detect jobs [{0}]".format(serverId)) print("Failed rescheduling detect jobs [{0}]".format(serverId)) return False self.ioc.getLogger().warning( "Server [{0}] preparing to reallocate jobs".format(serverId)) if not self.monitor.rescheduleJobs(serverId): self.ioc.getLogger().error( "Failed rescheduling detect jobs [{0}]".format(serverId)) print("Failed rescheduling detect jobs [{0}]".format(serverId)) return False print("Server Deactivated") return True def action_activate(self, node=None): """activates server in cluster Args: node (str): MongoDB ObjectId to activate; defaults to local node Returns: bool: If activation is successful """ if not self.ioc.ensureRegistration(): self.ioc.getLogger().error("Server not registered with MongoDB") print("Unregistered servers cannot talk to the cluster") return False valid, serverId = self.valid_server(node) if not valid: print("Invalid ObjectID") return False if self.ioc.getCollection('JobServer').update_one( { '_id': ObjectId(serverId) }, { '$set': { 'active': True, 'activationTime': datetime.datetime.utcnow() } }).modified_count < 1: self.ioc.getLogger().warning( "Server [{0}] failed to be activated".format(serverId)) return False self.ioc.getLogger().warning("Server [{0}] activated".format(serverId)) return True def valid_server(self, node=None): """Validates node is in the MongoDB instance connected to Args: node (str): MongoDB Object ID to validate; defaults to local node Returns: tuple: first element is boolean if valid second is objectId as string """ if node: try: server = self.ioc.getCollection('JobServer').find_one( {'_id': ObjectId(str(node))}) except InvalidId: self.ioc.getLogger().error( "Invalid ObjectID passed to bridge info [{0}]".format( node)) return False, "" if server: return True, dict(server).get('_id') self.ioc.getLogger().error( "Failed to find server [{0}] in the database".format(node)) return False, "" return True, self.ioc.getConfig().NodeIdentity
def test_deduplication(self): ioc = GreaseContainer() dedup = Deduplication(ioc) ioc.getConfig().set('verbose', True, 'Logging') obj = [{ 'field': 'var', 'field1': 'var1', 'field2': 'var2', 'field3': 'var3', 'field4': 'var4', 'field5': 'var5', }, { 'field': 'var', 'field1': 'var1', 'field2': 'var2', 'field3': 'var3', 'field4': 'var4', 'field5': 'var5', }, { 'field': 'var', 'field1': 'var1', 'field2': 'var2', 'field3': 'var3', 'field4': 'var4', 'field5': 'var5', }, { 'field': 'var', 'field1': 'var1', 'field2': 'var2', 'field3': 'var3', 'field4': 'var4', 'field5': 'var5', }, { 'field': str(uuid.uuid4()), 'field1': str(uuid.uuid4()), 'field2': str(uuid.uuid4()), 'field3': str(uuid.uuid4()), 'field4': str(uuid.uuid4()), 'field5': str(uuid.uuid4()) }, { 'field': 'var', 'field1': 'var1', 'field2': 'var2', 'field3': 'var3', 'field4': 'var4', 'field5': 'var5', }, { 'field': 'var', 'field1': 'var1', 'field2': 'var2', 'field3': 'var3', 'field4': 'var4', 'field5': 'var5', }, { 'field': 'var', 'field1': 'var1', 'field2': 'var2', 'field3': 'var3', 'field4': 'var4', 'field5': 'var5', }, { 'field': str(uuid.uuid4()), 'field1': str(uuid.uuid4()), 'field2': str(uuid.uuid4()), 'field3': str(uuid.uuid4()), 'field4': str(uuid.uuid4()), 'field5': str(uuid.uuid4()) }, { 'field': str(uuid.uuid4()), 'field1': str(uuid.uuid4()), 'field2': str(uuid.uuid4()), 'field3': str(uuid.uuid4()), 'field4': str(uuid.uuid4()), 'field5': str(uuid.uuid4()) }, { 'field': str(uuid.uuid4()), 'field1': str(uuid.uuid4()), 'field2': str(uuid.uuid4()), 'field3': str(uuid.uuid4()), 'field4': str(uuid.uuid4()), 'field5': str(uuid.uuid4()) }] finalObj = dedup.Deduplicate(obj, 'test_source', 'test_configuration', 85.0, 1, 1, 'test_source') self.assertGreaterEqual(len(finalObj), 4) ioc.getConfig().set('verbose', False, 'Logging') ioc.getCollection('test_source').drop() time.sleep(1.5)
class Scheduler(object): """Job Scheduler Model This model will attempt to schedule a job for execution Attributes: ioc (GreaseContainer): IOC for scanning impTool (ImportTool): Import Utility Instance conf (PrototypeConfig): Prototype configuration tool scheduler (Scheduling): Prototype Scheduling Service Instance """ def __init__(self, ioc=None): if ioc and isinstance(ioc, GreaseContainer): self.ioc = ioc else: self.ioc = GreaseContainer() self.impTool = ImportTool(self.ioc.getLogger()) self.ioc.ensureRegistration() self.conf = PrototypeConfig(self.ioc) self.scheduler = Scheduling(self.ioc) def scheduleExecution(self): """Schedules the oldest successfully detected source to execution Returns: bool: True if detection is successful else false """ source = self.getDetectedSource() if source: self.ioc.getLogger().trace("Attempting schedule of source", trace=True) self.ioc.getCollection('SourceData').update_one( {'_id': ObjectId(source.get('_id'))}, { '$set': { 'grease_data.scheduling.start': datetime.datetime.utcnow() } }) if self.schedule(source): self.ioc.getLogger().trace( "Scheduling [{0}] was successful".format(source['_id']), trace=True) self.ioc.getCollection('SourceData').update_one( {'_id': ObjectId(source.get('_id'))}, { '$set': { 'grease_data.scheduling.end': datetime.datetime.utcnow() } }) return True else: self.ioc.getLogger().error( "Failed to schedule [{0}] for execution".format( source['_id']), trace=True, notify=False) self.ioc.getCollection('SourceData').update_one( {'_id': ObjectId(source.get('_id'))}, { '$set': { 'grease_data.scheduling.start': None, 'grease_data.scheduling.end': None } }) return False else: self.ioc.getLogger().trace( "No sources detected for this node at this time", trace=True) return True def getDetectedSource(self): """Gets the oldest successfully detected source Returns: dict: Object from MongoDB """ return self.ioc.getCollection('SourceData').find_one( { 'grease_data.scheduling.server': ObjectId(self.ioc.getConfig().NodeIdentity), 'grease_data.scheduling.start': None, 'grease_data.scheduling.end': None }, sort=[('grease_data.createTime', pymongo.DESCENDING)]) def schedule(self, source): """Schedules source for execution Returns: bool: If scheduling was successful or not """ if isinstance(source['configuration'], bytes): config = self.conf.get_config(source['configuration'].decode()) else: config = self.conf.get_config(source['configuration']) if not config: self.ioc.getLogger().error( "Failed to load configuration for source [{0}]".format( source['_id'])) return False server, jobs = self.scheduler.determineExecutionServer( config.get('exe_env', 'general')) if not server: self.ioc.getLogger().error( "Failed to find an Execution Node for environment [{0}]". format(config.get('exe_env', 'general'))) return False self.ioc.getCollection('SourceData').update_one( {'_id': ObjectId(source['_id'])}, { '$set': { 'grease_data.execution.server': ObjectId(server), 'grease_data.execution.assignmentTime': datetime.datetime.utcnow(), } }) self.ioc.getCollection('JobServer').update_one( {'_id': ObjectId(server)}, {'$set': { 'jobs': jobs + 1 }}) return True
class DaemonProcess(object): """Actual daemon processing for GREASE Daemon Attributes: ioc (GreaseContainer): The Grease IOC current_real_second (int): Current second in time registered (bool): If the node is registered with MongoDB impTool (ImportTool): Instance of Import Tool conf (PrototypeConfig): Prototype Configuration Instance """ ioc = None current_real_second = None registered = True contextManager = {'jobs': {}, 'prototypes': {}} impTool = None def __init__(self, ioc): if isinstance(ioc, GreaseContainer): self.ioc = ioc else: self.ioc = GreaseContainer() self.current_real_second = datetime.utcnow().second if self.ioc.getConfig( ).NodeIdentity == "Unknown" and not self.register(): self.registered = False self.impTool = ImportTool(self.ioc.getLogger()) self.conf = PrototypeConfig(self.ioc) def server(self): """Server process for ensuring prototypes & jobs are running By Running this method this will clear the DB of any jobs a node may have Returns: bool: Server Success """ # Ensure we aren't swamping the system cpu = cpu_percent(interval=.1) mem = virtual_memory().percent if \ cpu >= int(self.ioc.getConfig().get('NodeInformation', 'ResourceMax')) \ or mem >= int(self.ioc.getConfig().get('NodeInformation', 'ResourceMax')): self.ioc.getLogger().trace( "Thread Maximum Reached CPU: [{0}] Memory: [{1}]".format( cpu, mem), trace=True) # remove variables del cpu del mem return True if not self.registered: self.ioc.getLogger().trace("Server is not registered", trace=True) return False self.ioc.getLogger().trace("Server execution starting", trace=True) # establish job collection JobsCollection = self.ioc.getCollection("SourceData") self.ioc.getLogger().trace("Searching for Jobs", trace=True) jobs = JobsCollection.find({ 'grease_data.execution.server': ObjectId(self.ioc.getConfig().NodeIdentity), 'grease_data.execution.commandSuccess': False, 'grease_data.execution.executionSuccess': False, 'grease_data.execution.failures': { '$lt': 6 } }) # Get Node Information Node = self.ioc.getCollection('JobServer').find_one( {'_id': ObjectId(self.ioc.getConfig().NodeIdentity)}) if not Node: # If for some reason we couldn't find it self.ioc.getLogger().error("Failed To Load Node Information") return False # Get Prototypes prototypes = list(Node.get('prototypes')) # Del node instance del Node if prototypes: # We have prototypes to spin up for prototype in prototypes: self.ioc.getLogger().trace( "Passing ProtoType [{0}] to Runner".format(prototype), trace=True) self._run_prototype(prototype) if jobs.count(): self.ioc.getLogger().trace("Total Jobs to Execute: [{0}]".format( jobs.count())) for job in jobs: self.ioc.getLogger().trace( "Passing Job [{0}] to Runner".format(job.get("_id")), trace=True) self._run_job(job, JobsCollection) else: # Nothing to Run for Jobs self.ioc.getLogger().trace("No Jobs Scheduled to Server", trace=True) self.ioc.getLogger().trace("Server execution complete", trace=True) return True def _run_job(self, job, JobCollection): """Run a On-Demand Job Args: job (dict): Job Data to execute JobCollection (pymongo.collection.Collection): JobCollection to update for telemetry Returns: None: Void Method to kickoff execution """ if not self.contextManager['jobs'].get(job.get('_id')): # New Job to run if isinstance(job.get('configuration'), bytes): conf = job.get('configuration').decode() else: conf = job.get('configuration') inst = self.impTool.load(self.conf.get_config(conf).get('job', '')) if inst and isinstance(inst, Command): inst.ioc.getLogger().foreground = self.ioc.getLogger( ).foreground thread = threading.Thread( target=inst.safe_execute, args=(job.get('grease_data', {}).get('detection', {}).get('detection', {}), ), name="GREASE DAEMON COMMAND EXECUTION [{0}]".format( job.get('_id'))) thread.daemon = True thread.start() self.contextManager['jobs'][job.get("_id")] = { 'thread': thread, 'command': inst } else: # Invalid Job del inst self.ioc.getLogger().warning("Invalid Job", additional=job) JobCollection.update_one({'_id': ObjectId(job['_id'])}, { '$set': { 'grease_data.execution.failures': job.get('failures', 0) + 1 } }) return else: # Job already executing if self.contextManager['jobs'].get( job.get('_id')).get('thread').isAlive(): # thread still executing return else: # Execution has ended self.ioc.getLogger().trace("Job [{0}] finished running".format( job.get('_id')), trace=True) finishedJob = self.contextManager['jobs'].get( job.get('_id')).get('command') # type: Command if finishedJob.getRetVal(): # job completed successfully JobCollection.update_one( {'_id': ObjectId(job.get('_id'))}, { '$set': { 'grease_data.execution.commandSuccess': finishedJob.getRetVal(), 'grease_data.execution.executionSuccess': finishedJob.getExecVal(), 'grease_data.execution.completeTime': datetime.utcnow(), 'grease_data.execution.returnData': finishedJob.getData() } }) else: # Job Failure self.ioc.getLogger().warning( "Job Failed [{0}]".format(job.get('_id')), additional=finishedJob.getData()) # TODO: Job Execution cooldown timing JobCollection.update_one({'_id': ObjectId(job['_id'])}, { '$set': { 'grease_data.execution.failures': job.get('grease_data', {}).get( 'execution', {}).get('failures', 0) + 1 } }) # close out job finishedJob.__del__() del finishedJob # remove from contextManager del self.contextManager['jobs'][job.get('_id')] return def _run_prototype(self, prototype): """Startup a ProtoType Args: prototype (str): ProtoType to start Returns: None: Void method to start prototype """ if not self.contextManager['prototypes'].get(prototype): # ProtoType has not started inst = self.impTool.load(prototype) if not isinstance(inst, Command): # invalid ProtoType self.log_once_per_second( "Invalid ProtoType [{0}]".format(prototype), level=ERROR) return inst.ioc.getLogger().foreground = self.ioc.getLogger().foreground thread = threading.Thread( target=inst.safe_execute, args=({}), name="GREASE DAEMON PROTOTYPE [{0}]".format(prototype)) thread.daemon = True thread.start() self.contextManager['prototypes'][prototype] = thread return else: # ensure thread is alive if self.contextManager['prototypes'].get(prototype).isAlive(): self.ioc.getLogger().trace( "ProtoType [{0}] is alive".format(prototype)) return else: # Thread died for some reason self.log_once_per_second( "ProtoType [{0}] Stopped".format(prototype), level=INFO) inst = self.impTool.load(prototype) if not isinstance(inst, Command): self.log_once_per_second( "Invalid ProtoType [{0}]".format(prototype), level=ERROR) return inst.ioc.getLogger().foreground = self.ioc.getLogger( ).foreground thread = threading.Thread( target=inst.execute, name="GREASE DAEMON PROTOTYPE [{0}]".format(prototype)) thread.daemon = True thread.start() self.contextManager['prototypes'][prototype] = thread return def drain_jobs(self, JobCollection): """Will drain jobs from the current context This method is used to prevent abnormal ending of executions Args: JobCollection (pymongo.collection.Collection): Job Collection Object Returns: bool: When job queue is emptied """ Threads = True while Threads: if self.contextManager['jobs']: jobs = {} for key, val in self.contextManager['jobs'].items(): if val['thread'].isAlive(): jobs[key] = val continue else: # Execution has ended self.ioc.getLogger().trace( "Job [{0}] finished running".format(key), trace=True) finishedJob = self.contextManager['jobs'].get(key).get( 'command') # type: Command if finishedJob.getRetVal(): # job completed successfully JobCollection.update_one({'_id': ObjectId(key)}, { '$set': { 'grease_data.execution.commandSuccess': finishedJob.getRetVal(), 'grease_data.execution.executionSuccess': finishedJob.getExecVal(), 'grease_data.execution.completeTime': datetime.utcnow(), 'grease_data.execution.returnData': finishedJob.getData() } }) else: # Job Failure self.ioc.getLogger().warning( "Job Failed [{0}]".format(key), additional=finishedJob.getData()) JobCollection.update_one({'_id': ObjectId(key)}, { '$set': { 'grease_data.execution.failures': val['command'].get('failures', 0) + 1 } }) # close out job finishedJob.__del__() del finishedJob self.contextManager['jobs'] = jobs else: Threads = False return True def register(self): """Attempt to register with MongoDB Returns: bool: Registration Success """ return self.ioc.ensureRegistration() def log_once_per_second(self, message, level=DEBUG, additional=None): """Log Message once per second Args: message (str): Message to log level (int): Log Level additional (object): Additional information that is able to be str'd Returns: None: Void Method to fire log message """ if self._has_time_progressed(): self.ioc.getLogger().TriageMessage(message=message, level=level, additional=additional) def _has_time_progressed(self): """Determines if the current second and the real second are not the same Returns: bool: if true then time has passed in a meaningful way """ if self.current_real_second != datetime.utcnow().second: self.current_real_second = datetime.utcnow().second return True else: return False
def test_detectionScheduling(self): ioc = GreaseContainer() ioc.ensureRegistration() sch = Scheduling(ioc) jServer = ioc.getCollection('JobServer') jID1 = jServer.insert_one({ 'jobs': 0, 'os': platform.system().lower(), 'roles': ["general"], 'prototypes': ["detect"], 'active': True, 'activationTime': datetime.datetime.utcnow() }).inserted_id time.sleep(1) jID2 = jServer.insert_one({ 'jobs': 0, 'os': platform.system().lower(), 'roles': ["general"], 'prototypes': ["detect"], 'active': True, 'activationTime': datetime.datetime.utcnow() }).inserted_id time.sleep(1) self.assertTrue( sch.scheduleDetection('test', 'test_conf', [ { 'test0': 'var0', 'test1': 'var1', 'test2': 'var2', 'test3': 'var3', 'test4': 'var4', 'test5': 'var5', 'test6': 'var6', 'test7': 'var7', 'test8': 'var8', 'test9': 'var9', 'test10': 'var10', }, { 'test0': 'var0', 'test1': 'var1', 'test2': 'var2', 'test3': 'var3', 'test4': 'var4', 'test5': 'var5', 'test6': 'var6', 'test7': 'var7', 'test8': 'var8', 'test9': 'var9', 'test10': 'var10', }, { 'test0': 'var0', 'test1': 'var1', 'test2': 'var2', 'test3': 'var3', 'test4': 'var4', 'test5': 'var5', 'test6': 'var6', 'test7': 'var7', 'test8': 'var8', 'test9': 'var9', 'test10': 'var10', }, { 'test0': 'var0', 'test1': 'var1', 'test2': 'var2', 'test3': 'var3', 'test4': 'var4', 'test5': 'var5', 'test6': 'var6', 'test7': 'var7', 'test8': 'var8', 'test9': 'var9', 'test10': 'var10', }, { 'test0': 'var0', 'test1': 'var1', 'test2': 'var2', 'test3': 'var3', 'test4': 'var4', 'test5': 'var5', 'test6': 'var6', 'test7': 'var7', 'test8': 'var8', 'test9': 'var9', 'test10': 'var10', }, { 'test0': 'var0', 'test1': 'var1', 'test2': 'var2', 'test3': 'var3', 'test4': 'var4', 'test5': 'var5', 'test6': 'var6', 'test7': 'var7', 'test8': 'var8', 'test9': 'var9', 'test10': 'var10', }, ])) time.sleep(1) self.assertEqual( ioc.getCollection('SourceData').find({ 'grease_data.detection.server': ObjectId(jID1) }).count(), 3) self.assertEqual( ioc.getCollection('SourceData').find({ 'grease_data.detection.server': ObjectId(jID2) }).count(), 3) self.assertEqual( ioc.getCollection('JobServer').find_one({'_id': ObjectId(jID1)})['jobs'], 3) self.assertEqual( ioc.getCollection('JobServer').find_one({'_id': ObjectId(jID2)})['jobs'], 3) jServer.delete_one({'_id': ObjectId(jID1)}) jServer.delete_one({'_id': ObjectId(jID2)}) ioc.getCollection('SourceData').drop()
class AutomationTest(TestCase): """Automation Test Class Version II of GREASE was all about proving stability. Automation testing is critically important to ensure reliability during fault isolation. This class is an abstract class your tests can implement to ensure they will perform exactly as you expect in production. Make sure you set the **configuration** class attribute to ensure your configuration is tested, the **mock_data** class attribute with your mock data dictionary you expect to be sourced in production, and the **expected_data** with what you expect detection to find from your mocked source data. Then implement the **test_command** method to write standard unittests around your automation. The Platform will test your configuration for you, and execute **test_command** with `python setup.py test` is executed. Attributes: configuration (str|dict): Configuration to load for this test mock_data (dict): String Key -> Int/Float/String Value pair to mock source data expected_data (dict): data you expect context for your command to look like enabled (bool): set to true to enable your test to run Here is an example:: class TestAutomationTest(AutomationTest): def __init__(self, *args, **kwargs): AutomationTest.__init__(self, *args, **kwargs) self.configuration = "mongo://test_automation_test" self.mock_data = {'ver': 'var'} self.expected_data = {'ver': ['var']} self.enabled = True def test_command(self): myCommand = myCommand() self.assertTrue(myCommand.execute({'hostname': 'localhost'})) This is a pretty basic example but it will help you get started automatically testing your automation! Note: **YOU MUST SET THE PROPERTY `ENABLED` TO BOOLEAN TRUE IN ORDER FOR YOUR TEST TO BE PICKED UP** Note: To use a static configuration set `configuration` to a dictionary Note: To use a MongoDB configuration for a test prefix your configuration's name with mongo:// Note: To use a package configuration for a test prefix your configuration's name with pkg:// Note: to use a filesystem configuration for a test prefix your configuration's path with fs:// """ def __init__(self, *args, **kwargs): TestCase.__init__(self, *args, **kwargs) self.configuration = None self.enabled = False self.mock_data = {} self.expected_data = {} self.ioc = GreaseContainer() self.detect = Detect(self.ioc) def test_configuration(self): """Configuration Test This method tests your configuration and validates that detection will return as you expect """ if not self.enabled: raise SkipTest self.assertTrue(self.configuration, "Ensure configuration is not empty") self.assertIsInstance(self.configuration, str, "Ensure configuration is type string") self.assertTrue(self.mock_data, "Ensure mock_data is not empty") self.assertIsInstance(self.mock_data, dict, "Ensure mock_data is type dict") self.assertTrue(self.expected_data, "Ensure expected_data is not empty") self.assertIsInstance(self.expected_data, dict, "Ensure expected_data is type dict") config = None if str(self.configuration).startswith("mongo://"): config = self.ioc.getCollection('Configuration').find_one({ 'name': str(self.configuration).split("://")[1], 'active': True, "type": "prototype_config" }) self.assertTrue(config, "Ensuring MongoDB has configuration") config = dict(config) elif str(self.configuration).startswith("pkg://"): if os.path.isfile( pkg_resources.resource_filename( 'tgt_grease', str(self.configuration).split("://")[1])): with open( pkg_resources.resource_filename( 'tgt_grease', str(self.configuration).split("://")[1]), 'rb') as fil: config = json.loads(fil.read()) self.assertIsInstance(config, dict, "Ensuring Valid JSON") else: self.assertTrue( False, "Failed to load [{0}] from tgt_grease pkg".format( str(self.configuration).split("://")[1])) elif str(self.configuration).startswith("fs://"): if os.path.isfile(str(self.configuration).split("://")[1]): with open(str(self.configuration).split("://")[1], 'rb') as fil: config = json.loads(fil.read()) self.assertIsInstance(config, dict, "Ensuring Valid JSON") else: self.assertTrue( False, "Failed to load [{0}] from filesystem".format( str(self.configuration).split("://")[1])) else: self.assertTrue( False, "Failed to load configuration::Invalid Configuration Location Type" ) self.assertTrue(config, "Ensuring config is not boolean equatable to False") result, context = self.detect.detection(self.mock_data, config) self.assertTrue(result, "Detection Results") self.assertDictEqual(context, self.expected_data, "validating context expected") def test_command(self): """This method is for **you** to fill out to test your command Note: The more tests the better! Make sure to add as many tests as you need to ensure your automation is always successful """ if not self.enabled: raise SkipTest
class Deduplication(object): """Responsible for Deduplication Operations Deduplication in GREASE is a multi-step process to ensure performance and accuracy of deduplication. The overview of this process is this: - Step 1: Identify a Object Type 1 Hash Match. A Type 1 Object (T1) is a SHA256 hash of a dictionary in a data list. If we can hash the entire object and find a match then the object is 100% duplicate. - Step 2: Object Type 2 Matching. If a Type 1 (T1) object cannot be found Type 2 Object (T2) deduplication occurs. This will introspect the dictionary for each field and map them against other likely objects of the same type. If a hash match is found (source + field + value as a SHA256) then the field is 100% duplicate. The aggregate score of all fields or the specified subset is above the provided threshold then the object is duplicate. This prevents similar objects from passing through when they are most likely updates to an original object that does not need to be computed on. If a field updates that you will need always then exclude it will need to be passed into the `Deduplicate` function. Object examples:: # Type 1 Object { '_id': ObjectId, # <-- MongoDB ObjectID 'type: Int, # <-- Always Type 1 'hash': String, # <-- SHA256 hash of entire object 'expiry': DateTime, # <-- Expiration time if no objects are found to be duplicate after which object will be deleted 'max_expiry': DateTime, # <-- Expiration time for object to be deleted when reached 'score': Int, # <-- Amount of times this object has been found 'source': String # <-- Source of the object } # Type 2 Object { '_id': ObjectId, # <-- MongoDB ObjectID 'type: Int, # <-- Always Type 2 'source': String, # <-- Source of data 'field': String, # <-- Field in Object 'value': String, # <-- Value of Object's field 'hash': String, # <-- SHA256 of source + field + value 'expiry': DateTime, # <-- Expiration time if no objects are found to be duplicate after which object will be deleted 'max_expiry': DateTime, # <-- Expiration time for object to be deleted when reached 'score': Int, # <-- Amount of times this object has been found 'parentId': ObjectId # <-- T1 Object ID from parent } Attributes: ioc (GreaseContainer): IoC access for DeDuplication """ def __init__(self, ioc=None): if isinstance(ioc, GreaseContainer): self.ioc = ioc else: self.ioc = GreaseContainer() def Deduplicate(self, data, source, configuration, threshold, expiry_hours, expiry_max, collection, field_set=None): """Deduplicate data This method will deduplicate the `data` object to allow for only unique objects to be returned. The collection variable will be the collection deduplication data will be stored in Args: data (list[dict]): **list or single dimensional dictionaries** to deduplicate source (str): Source of data being deduplicated configuration (str): Configuration Name Provided threshold (float): level of duplication allowed in an object (the lower the threshold the more uniqueness is required) expiry_hours (int): Hours to retain deduplication data expiry_max (int): Maximum days to retain deduplication data collection (str): Deduplication collection to use field_set (list, optional): Fields to deduplicate on Note: expiry_hours is specific to how many hours objects will be persisted for if they are not seen again Returns: list[dict]: Deduplicated data """ # ensure we got a list if not isinstance(data, list): self.ioc.getLogger().error( "Data was not of type list for Deduplication got type [{0}]". format(str(type(data))), notify=False, verbose=True) return [] # ensure there is data to parse if len(data) <= 0: # empty list return empty lists return [] self.ioc.getLogger().trace( "Starting deduplication from data source [{0}] total records to parse [{1}]" .format(source, len(data)), trace=True) # now comes James' version of machine learning. I call it "Blue Collar Machine Learning" # Pointer to access items in the list data_pointer = 0 # Max Length data_max = len(data) if data_max is 0: # we have no data to process self.ioc.getLogger().trace("Length of data is zero", verbose=True) return [] # Thread pool threads = [] # Final result final = [] # loop through the objects while data_pointer < data_max: # ensure we don't swamp the system resources cpu = cpu_percent(interval=.1) mem = virtual_memory().percent if \ cpu >= int(self.ioc.getConfig().get('NodeInformation', 'ResourceMax')) or \ mem >= int(self.ioc.getConfig().get('NodeInformation', 'ResourceMax')): self.ioc.getLogger().trace( "Deduplication sleeping; System resource maximum reached", verbose=True) # remove variables del cpu del mem continue # Resources are available lets start cooking down this list # Poll the active threads to ensure we are cleaning up self.ioc.getLogger().trace("Thread Pool polling Starting", verbose=True) threads_final = [] for thread in threads: if thread.isAlive(): threads_final.append(thread) threads = threads_final self.ioc.getLogger().trace("Thread polling complete", verbose=True) self.ioc.getLogger().trace( "Total current deduplication threads [{0}]".format( len(threads)), verbose=True) # ensure we do not breach the thread limit for the server if len(threads) >= int(self.ioc.getConfig().get( 'NodeInformation', 'DeduplicationThreads')): self.ioc.getLogger().trace( "Thread max reached. Deduplication waiting for threads to complete", verbose=True) continue # Ensure each object is a dictionary if not isinstance(data[data_pointer], dict): self.ioc.getLogger().warning( 'DeDuplication Received NON-DICT from source: [{0}] Type: [{1}] got: [{2}]' .format(source, str(type(data[data_pointer])), str(data[data_pointer]))) data_pointer += 1 continue # create thread for deduplication proc = threading.Thread( target=self.deduplicate_object, args=( self.ioc, data[data_pointer], expiry_hours, expiry_max, threshold, source, configuration, final, collection, data_pointer, data_max, field_set, ), name="GREASE DEDUPLICATION THREAD [{0}/{1}]".format( data_pointer, data_max)) proc.daemon = True proc.start() threads.append(proc) data_pointer += 1 self.ioc.getLogger().trace( "Total current deduplication threads [{0}]".format( len(threads)), verbose=True) self.ioc.getLogger().info( "All data objects have been threaded for processing", verbose=True) # wait for threads to finish out while len(threads) > 0: self.ioc.getLogger().trace( "Total current deduplication threads [{0}]".format( len(threads)), verbose=True) threads_final = [] for thread in threads: if thread.isAlive(): threads_final.append(thread) threads = threads_final self.ioc.getLogger().trace( "Total current deduplication threads [{0}]".format( len(threads)), verbose=True) # ensure collections expiry timers are in place self.ioc.getCollection(collection).create_index([('expiry', 1), ('expireAfterSeconds', 1)]) self.ioc.getCollection(collection).create_index([('max_expiry', 1), ('expireAfterSeconds', 1)]) return final @staticmethod def deduplicate_object(ioc, obj, expiry, expiry_max, threshold, source_name, configuration_name, final, collection, data_pointer=None, data_max=None, field_set=None): """DeDuplicate Object This is the method to actually deduplicate an object. The `final` argument is appended to with the obj if it was successfully deduplicated. Args: ioc (GreaseContainer): IoC for the instance obj (dict): Object to be deduplicated expiry (int): Hours to deduplicate for expiry_max (int): Maximum days to deduplicate for threshold (float): level of duplication allowed in an object (the lower the threshold the more uniqueness is required) source_name (str): Source of data being deduplicated configuration_name (str): Configuration being deduplicated for final (list): List to append `obj` to if unique collection (str): Name of deduplication collection data_pointer (int): If provided will provide log information relating to thread (Typically used via `Deduplicate`) data_max (int): If provided will provide log information relating to thread (Typically used via `Deduplicate`) field_set (list): If provided will only deduplicate on list of fields provided Returns: None: Nothing returned. Updates `final` object """ # first determine if this object has been seen before DeDupCollection = ioc.getCollection(collection) t1test = obj t1test['grease_internal_configuration'] = configuration_name T1Hash = DeDupCollection.find_one( {'hash': Deduplication.generate_hash_from_obj(t1test)}) if T1Hash: # T1 Found Protocol: We have found a fully duplicate object # we have a duplicate source document # increase the counter and expiry and move on (DROP) ioc.getLogger().debug("Type1 Match found for object", verbose=True) # bump the expiry time and move on DeDupCollection.update_one({'_id': T1Hash['_id']}, { "$set": { 'score': int(T1Hash['score']) + 1, 'expiry': Deduplication.generate_expiry_time(expiry) } }) return else: # T1 Not Found Protocol: We have a possibly unique object ioc.getLogger().debug( "Type1 Match not found; Beginning type 2 processing") # Create a T1 T1ObjectId = DeDupCollection.insert_one({ 'expiry': Deduplication.generate_expiry_time(int(expiry)), 'grease_internal_configuration': configuration_name, 'max_expiry': Deduplication.generate_max_expiry_time(int(expiry_max)), 'type': 1, 'score': 1, 'source': str(source_name), 'hash': Deduplication.generate_hash_from_obj(t1test) }).inserted_id # Begin T2 Deduplication compositeScore = Deduplication.object_field_score( collection, ioc, source_name, configuration_name, obj, str(T1ObjectId), expiry, expiry_max, field_set) if compositeScore < threshold: # unique obj ioc.getLogger().trace( "Unique object! Composite score was: [{0}] threashold: [{1}]" .format(compositeScore, threshold), verbose=True) final.append(obj) return else: # likely duplicate value ioc.getLogger().trace( "Object surpassed threshold, suspected to be duplicate! " "Composite score was: [{0}] threashold: [{1}]".format( compositeScore, threshold), verbose=True) return @staticmethod def object_field_score(collection, ioc, source_name, configuration_name, obj, objectId, expiry, max_expiry, field_set=None): """Returns T2 average uniqueness Takes a dictionary and returns the likelihood of that object being unique based on data in the collection Args: collection (str): Deduplication collection name ioc (GreaseContainer): IoC Access source_name (str): source of data to be deduplicated configuration_name (str): configuration name to be deduplicated obj (dict): Single dimensional list to be compared against collection objectId (str): T1 Hash Mongo ObjectId to be used to associate fields to a T1 expiry (int): Hours for deduplication to wait before removing a field if not seen again max_expiry (int): Days for deduplication to wait before ensuring object is deleted field_set (list, optional): List of fields to deduplicate with if provided. Else will use all keys Returns: float: Duplication Probability """ # generate field list if not provided FieldColl = ioc.getCollection(collection) if not isinstance(field_set, list) or len(field_set) <= 0: field_set = obj.keys() # List to hold field level scores field_scores = [] # iterate over the field set for field in field_set: # ensure key is in the object ioc.getLogger().trace("Starting field [{0}]".format(field), verbose=True) if field in obj: if isinstance(obj.get(field), bytes): value = obj.get(field).decode('utf-8', 'ignore') else: value = obj.get(field) T2Object = { 'source': source_name, 'field': field, 'value': value, 'configuration': configuration_name } checkDoc = FieldColl.find_one( {'hash': Deduplication.generate_hash_from_obj(T2Object)}) if checkDoc: # we found a 100% matching T2 object ioc.getLogger().trace("T2 object Located", trace=True) update_statement = { "$set": { 'score': int(checkDoc['score']) + 1, 'expiry': Deduplication.generate_expiry_time(expiry) } } FieldColl.update_one({'_id': checkDoc['_id']}, update_statement) field_scores.append(100) continue else: # We have a possible unique value ioc.getLogger().trace("T2 object not found", trace=True) # generate a list to collect similarities to other field objects fieldProbabilityList = [] for record in FieldColl.find({'source': source_name, 'configuration': configuration_name, 'field': field, 'type': 2})\ .sort('score', pymongo.ASCENDING).limit(100): if Deduplication.string_match_percentage( record['value'], str(T2Object['value'])) > .95: # We've found a REALLY strong match # Set this field's score to that of the match field_scores.append( 100 * Deduplication.string_match_percentage( record['value'], str(T2Object['value']))) # leave the for loop for this field since we found a highly probable match break else: fieldProbabilityList.append( 100 * Deduplication.string_match_percentage( record['value'], str(T2Object['value']))) if fieldProbabilityList: # We have at least one result score = float( sum(fieldProbabilityList) / len(fieldProbabilityList)) ioc.getLogger().trace( "Field Score [{0}]".format(score), verbose=True) field_scores.append(score) else: # It is a globally unique field field_scores.append(0) # finally persist the new object T2Object['hash'] = Deduplication.generate_hash_from_obj( T2Object) T2Object['score'] = 1 T2Object['expiry'] = Deduplication.generate_expiry_time( expiry) T2Object[ 'max_expiry'] = Deduplication.generate_max_expiry_time( max_expiry) T2Object['type'] = 2 T2Object['parentId'] = ObjectId(objectId) FieldColl.insert_one(T2Object) else: ioc.getLogger().warning( "field [{0}] not found in object".format(field), trace=True, notify=False) continue if len(field_scores) is 0: return 0.0 else: return float(sum(field_scores) / float(len(field_scores))) @staticmethod def generate_hash_from_obj(obj): """Takes an object and generates a SHA256 Hash of it Args: obj (object): Hashable object ot generate a SHA256 Returns: str: Object Hash """ return hashlib.sha256(str(obj).encode('utf-8')).hexdigest() @staticmethod def generate_expiry_time(hours): """Generates UTC Timestamp for hours in the future Args: hours (int): How many hours in the future to expire on Returns: datetime.datetime: Datetime object for hours in the future """ return datetime.datetime.utcnow() + datetime.timedelta( hours=int(hours)) @staticmethod def generate_max_expiry_time(days): """Generates UTC Timestamp for hours in the future Args: days (int): How many days in the future to expire on Returns: datetime.datetime: Datetime object for days in the future """ return datetime.datetime.utcnow() + datetime.timedelta(days=int(days)) @staticmethod def string_match_percentage(constant, new_value): """Returns the percentage likelihood two strings are identical Args: constant (str): Value to use as base standard new_value (str): Value to compare `constant` against Returns: float: Percentage likelihood of duplicate value """ return difflib.SequenceMatcher(lambda x: x == " ", constant, new_value).quick_ratio()
def test_scan(self): # setup configList = [ { "name": "test1", "job": "fakeJob", "exe_env": "windows", "source": "TestSource", "logic": { "regex": [ { "field": "character", "pattern": ".*skywalker.*" } ] } } ] ioc = GreaseContainer() ioc.ensureRegistration() ioc.getConfig().set('trace', True, 'Logging') ioc.getConfig().set('verbose', True, 'Logging') fil = open(ioc.getConfig().greaseConfigFile, 'r') data = json.loads(fil.read()) fil.close() fil = open(ioc.getConfig().greaseConfigFile, 'w') data['Import']['searchPath'].append('tgt_grease.enterprise.Model.tests') fil.write(json.dumps(data, sort_keys=True, indent=4)) fil.close() Configuration.ReloadConfig() jServer = ioc.getCollection('JobServer') jID1 = jServer.insert_one({ 'jobs': 0, 'os': platform.system().lower(), 'roles': ["general"], 'prototypes': ["detect"], 'active': True, 'activationTime': datetime.utcnow() }).inserted_id time.sleep(1) jID2 = jServer.insert_one({ 'jobs': 0, 'os': platform.system().lower(), 'roles': ["general"], 'prototypes': ["detect"], 'active': True, 'activationTime': datetime.utcnow() }).inserted_id # Begin Test conf = PrototypeConfig(ioc) conf.load(reloadConf=True, ConfigurationList=configList) scanner = Scan(ioc) # Scan Environment self.assertTrue(scanner.Parse()) # Begin ensuring environment is how we expect # we assert less or equal because sometimes uuid's are close :p self.assertLessEqual(ioc.getCollection('SourceData').find({ 'detectionServer': ObjectId(jID1) }).count(), 3) self.assertLessEqual(ioc.getCollection('SourceData').find({ 'detectionServer': ObjectId(jID2) }).count(), 3) self.assertLessEqual(ioc.getCollection('JobServer').find_one({ '_id': ObjectId(jID1) })['jobs'], 3) self.assertLessEqual(ioc.getCollection('JobServer').find_one({ '_id': ObjectId(jID2) })['jobs'], 3) # clean up fil = open(ioc.getConfig().greaseConfigFile, 'r') data = json.loads(fil.read()) fil.close() # remove collection ioc.getCollection('TestProtoType').drop() # remove prototypes data['NodeInformation']['ProtoTypes'] = [] # pop search path trash = data['Import']['searchPath'].pop() # close out fil = open(ioc.getConfig().greaseConfigFile, 'w') fil.write(json.dumps(data, sort_keys=True, indent=4)) fil.close() jServer.delete_one({'_id': ObjectId(jID1)}) jServer.delete_one({'_id': ObjectId(jID2)}) ioc.getCollection('SourceData').drop() ioc.getCollection('Dedup_Sourcing').drop() ioc.getConfig().set('trace', False, 'Logging') ioc.getConfig().set('verbose', False, 'Logging') Configuration.ReloadConfig()
class Detect(object): """Detection class for GREASE detect This is the model to actually utilize the detectors to parse the sources from scan Attributes: ioc (GreaseContainer): IOC for scanning impTool (ImportTool): Import Utility Instance conf (PrototypeConfig): Prototype configuration tool scheduler (Scheduling): Prototype Scheduling Service Instance """ def __init__(self, ioc=None): if ioc and isinstance(ioc, GreaseContainer): self.ioc = ioc else: self.ioc = GreaseContainer() self.impTool = ImportTool(self.ioc.getLogger()) self.ioc.ensureRegistration() self.conf = PrototypeConfig(self.ioc) self.scheduler = Scheduling(self.ioc) def detectSource(self): """This will perform detection the oldest source from SourceData Returns: bool: If detection process was successful """ sourceData = self.getScheduledSource() if sourceData: if isinstance(sourceData.get('configuration'), bytes): conf = sourceData.get('configuration').decode() else: conf = sourceData.get('configuration') configurationData = self.conf.get_config(conf) if configurationData: self.ioc.getCollection('SourceData').update_one( {'_id': ObjectId(sourceData.get('_id'))}, { '$set': { 'grease_data.detection.start': datetime.datetime.utcnow() } }) result, resultData = self.detection(sourceData.get('data'), configurationData) if result: # Put constants in detection results resultData['constants'] = self.conf.get_config( configurationData.get('name')).get('constants', {}) # Update detection self.ioc.getCollection('SourceData').update_one( {'_id': ObjectId(sourceData.get('_id'))}, { '$set': { 'grease_data.detection.end': datetime.datetime.utcnow(), 'grease_data.detection.detection': resultData } }) # attempt scheduling return self.scheduler.scheduleScheduling( sourceData.get('_id')) else: self.ioc.getCollection('SourceData').update_one( {'_id': ObjectId(sourceData.get('_id'))}, { '$set': { 'grease_data.detection.end': datetime.datetime.utcnow(), 'grease_data.detection.detection': {} } }) self.ioc.getLogger().trace( "Detection yielded no detection data", trace=True) return True else: self.ioc.getLogger().error( "Failed to load Prototype Config [{0}]".format( sourceData.get('configuration')), notify=False) return False else: self.ioc.getLogger().trace( "No sources awaiting detection currently", trace=True) return True def getScheduledSource(self): """Queries for oldest source that has been assigned for detection Returns: dict: source awaiting detection """ return self.ioc.getCollection('SourceData').find_one( { 'grease_data.detection.server': ObjectId(self.ioc.getConfig().NodeIdentity), 'grease_data.detection.start': None, 'grease_data.detection.end': None, }, sort=[('grease_data.createTime', pymongo.DESCENDING)]) def detection(self, source, configuration): """Performs detection on a source with the provided configuration Args: source (dict): Key->Value pairs from sourcing to detect upon configuration (dict): Prototype configuration provided from sourcing Returns: tuple: Detection Results; first boolean for success, second dict of variables for context """ # Ensure types final = {} finalBool = False if not isinstance(source, dict): self.ioc.getLogger().warning("Detection got non-dict source data", notify=False) finalBool = False return finalBool, final if not isinstance(configuration, dict): self.ioc.getLogger().warning( "Detection got non-dict configuration", notify=False) finalBool = False return finalBool, final # Now loop through logical blocks for detector, logicBlock in configuration.get('logic', {}).items(): if not isinstance(logicBlock, list): self.ioc.getLogger().warning("Logical Block was not list", trace=True, notify=False) detect = self.impTool.load(detector) if isinstance(detect, Detector): result, resultData = detect.processObject(source, logicBlock) if not result: self.ioc.getLogger().trace( "Detection yielded false for [{0}]".format(detector), trace=True) finalBool = False break else: self.ioc.getLogger().trace( "Detection yielded true for [{0}]".format(detector), trace=True) for key, val in resultData.items(): final[key] = val finalBool = True continue else: self.ioc.getLogger().warning( "invalid detector [{0}]".format(detector), notify=False) finalBool = False return finalBool, final
def test_job_execution(self): ioc = GreaseContainer() cmd = DaemonProcess(ioc) proto = PrototypeConfig(ioc) ioc.getCollection('Configuration').insert_one( { 'active': True, 'type': 'prototype_config', "name": "exe_test", "job": "help", "exe_env": "general", "source": "url_source", "logic": { "Regex": [ { "field": "url", "pattern": ".*", 'variable': True, 'variable_name': 'url' } ], 'Range': [ { 'field': 'status_code', 'min': 199, 'max': 201 } ] }, 'constants': { 'test': 'ver' } } ) proto.load(reloadConf=True) jobid = ioc.getCollection('SourceData').insert_one({ 'grease_data': { 'sourcing': { 'server': ObjectId(ioc.getConfig().NodeIdentity) }, 'detection': { 'server': ObjectId(ioc.getConfig().NodeIdentity), 'start': datetime.datetime.utcnow(), 'end': datetime.datetime.utcnow(), 'detection': {} }, 'scheduling': { 'server': ObjectId(ioc.getConfig().NodeIdentity), 'start': datetime.datetime.utcnow(), 'end': datetime.datetime.utcnow(), }, 'execution': { 'server': ObjectId(ioc.getConfig().NodeIdentity), 'assignmentTime': datetime.datetime.utcnow(), 'completeTime': None, 'returnData': {}, 'executionSuccess': False, 'commandSuccess': False, 'failures': 0 } }, 'source': 'dev', 'configuration': 'exe_test', 'data': {}, 'createTime': datetime.datetime.utcnow(), 'expiry': Deduplication.generate_max_expiry_time(1) }).inserted_id # Run for a bit self.assertTrue(cmd.server()) self.assertTrue(cmd.drain_jobs(ioc.getCollection('SourceData'))) result = ioc.getCollection('SourceData').find_one({'_id': ObjectId(jobid)}) self.assertTrue(result) self.assertTrue(result.get('grease_data').get('execution').get('executionSuccess')) self.assertTrue(result.get('grease_data').get('execution').get('commandSuccess')) ioc.getCollection('SourceData').drop() ioc.getCollection('Configuration').drop()
class PrototypeConfig(object): """Responsible for Scanning/Detection/Scheduling configuration Structure of Configuration:: { 'configuration': { 'pkg': [], # <-- Loaded from pkg_resources.resource_filename('tgt_grease.enterprise.Model', 'config/') 'fs': [], # <-- Loaded from `<GREASE_DIR>/etc/*.config.json` 'mongo': [] # <-- Loaded from the Configuration Mongo Collection }, 'raw': [], # <-- All loaded configurations 'sources': [], # <-- list of sources found in configurations 'source': {} # <-- keys will be source values list of configs for that source 'names': [], # <-- all configs via their name so to allow dialing 'name': {} # <-- all configs via their name so to allow being dialing } Structure of a configuration file:: { "name": String, "job": String, "exe_env": String, # <-- If not provided will be default as 'general' "source": String, "logic": { # I need to be the logical blocks for Detection } } Attributes: ioc (GreaseContainer): IOC access """ def __init__(self, ioc=None): global GREASE_PROTOTYPE_CONFIGURATION if ioc and isinstance(ioc, GreaseContainer): self.ioc = ioc else: self.ioc = GreaseContainer() if not GREASE_PROTOTYPE_CONFIGURATION: GREASE_PROTOTYPE_CONFIGURATION = self.load() def getConfiguration(self): """Returns the Configuration Object loaded into memory Returns: dict: Configuration object """ global GREASE_PROTOTYPE_CONFIGURATION if not GREASE_PROTOTYPE_CONFIGURATION: self.load(reloadConf=True) return GREASE_PROTOTYPE_CONFIGURATION def load(self, reloadConf=False, ConfigurationList=None): """[Re]loads configuration data about the current execution node Configuration data loads from 3 places in GREASE. The first is internal to the package, if one were to manually add their own files into the package in the current directory following the file pattern. The next is following the same pattern but loaded from `<GREASE_DIR>/etc/`. The final place GREASE looks for configuration data is from the `configuration` collection in MongoDB Args: reloadConf (bool): If True this will reload the global object. False will return the object ConfigurationList (list of dict): If provided will load the list of dict for config after validation Note: Providing a configuration *automatically* reloads the memory structure of prototype configuration Returns: dict: Current Configuration information """ global GREASE_PROTOTYPE_CONFIGURATION if ConfigurationList: conf = dict() conf['configuration'] = dict() conf['configuration'][ 'ConfigurationList'] = self.validate_config_list( ConfigurationList) conf['raw'] = conf['configuration']['ConfigurationList'] # split by configuration sets # the list of configured sources conf['sources'] = list() # the actual configurations for each source conf['source'] = dict() # configurations to get via name conf['names'] = list() # the actual configurations for each config name conf['name'] = dict() for config in conf.get('raw'): # type: dict if config.get('source') in conf['sources']: conf['source'][config.get('source')].append(config) else: conf['sources'].append(config.get('source')) conf['source'][config.get('source')] = list() conf['source'][config.get('source')].append(config) if config.get('name') in conf['names']: self.ioc.getLogger().error( "Prototype Configuration [{0}] already found! Overwriting" .format(config.get('name'))) conf['name'][config.get('name')] = config else: conf['names'].append(config.get('name')) conf['name'][config.get('name')] = config GREASE_PROTOTYPE_CONFIGURATION = conf return conf # fill out raw results conf = dict() conf['configuration'] = dict() conf['raw'] = [] pkg = self.validate_config_list( self.load_from_fs( pkg_resources.resource_filename('tgt_grease.enterprise.Model', 'config/'))) for newConfig in pkg: conf['raw'].append(newConfig) conf['configuration']['pkg'] = pkg del pkg fs = self.validate_config_list( self.load_from_fs(self.ioc.getConfig().get('Configuration', 'dir'))) for newConfig in fs: conf['raw'].append(newConfig) conf['configuration']['fs'] = fs del fs mongo = self.validate_config_list(self.load_from_mongo()) for newConfig in mongo: conf['raw'].append(newConfig) conf['configuration']['mongo'] = mongo del mongo # split by configuration sets # the list of configured sources conf['sources'] = list() # the actual configurations for each source conf['source'] = dict() # configurations to get via name conf['names'] = list() # the actual configurations for each config name conf['name'] = dict() for config in conf.get('raw'): # type: dict if config.get('source') in conf['sources']: conf['source'][config.get('source')].append(config) else: conf['sources'].append(config.get('source')) conf['source'][config.get('source')] = list() conf['source'][config.get('source')].append(config) if config.get('name') in conf['names']: self.ioc.getLogger().error( "Prototype Configuration [{0}] already found! Overwriting". format(config.get('name'))) conf['name'][config.get('name')] = config else: conf['names'].append(config.get('name')) conf['name'][config.get('name')] = config # return block if not reloadConf: return conf else: GREASE_PROTOTYPE_CONFIGURATION = conf return conf def get_sources(self): """Returns the list of sources to be scanned Returns: list: List of sources """ global GREASE_PROTOTYPE_CONFIGURATION # type: dict if GREASE_PROTOTYPE_CONFIGURATION: return GREASE_PROTOTYPE_CONFIGURATION.get('sources', []) else: self.ioc.getLogger().error( "GREASE Prototype configuration is not loaded", trace=True, notify=False) return [] def get_source(self, name): """Get all configuration by source by name Args: name (str): Source name to get Returns: list[dict]: Configuration if found else empty dict """ global GREASE_PROTOTYPE_CONFIGURATION if GREASE_PROTOTYPE_CONFIGURATION: return GREASE_PROTOTYPE_CONFIGURATION.get('source').get(name, []) else: self.ioc.getLogger().error( "GREASE Prototype configuration not loaded", notify=False, trace=True) return [] def get_names(self): """Returns the list of names of configs Returns: list: List of config names """ global GREASE_PROTOTYPE_CONFIGURATION # type: dict if GREASE_PROTOTYPE_CONFIGURATION: return GREASE_PROTOTYPE_CONFIGURATION.get('names', []) else: self.ioc.getLogger().error( "GREASE Prototype configuration is not loaded", trace=True, notify=False) return [] def get_config(self, name): """Get Configuration by name Args: name (str): Configuration name to get Returns: dict: Configuration if found else empty dict """ global GREASE_PROTOTYPE_CONFIGURATION if GREASE_PROTOTYPE_CONFIGURATION: if GREASE_PROTOTYPE_CONFIGURATION.get('name'): return GREASE_PROTOTYPE_CONFIGURATION.get('name').get(name, {}) else: self.ioc.getLogger().error("GREASE Configuration Not Found", notify=False, trace=True) return {} else: self.ioc.getLogger().error( "GREASE Prototype configuration not loaded", notify=False, trace=True) return {} def load_from_fs(self, directory): """Loads configurations from provided directory Note: Pattern is `*.config.json` Args: directory (str): Directory to load from Returns: list of dict: configurations """ self.ioc.getLogger().trace( "Loading Configurations from directory [{0}]".format(directory), trace=True) intermediate = list() matches = [] for root, dirnames, filenames in os.walk(directory): for filename in fnmatch.filter(filenames, '*.config.json'): matches.append(os.path.join(root, filename)) for doc in matches: self.ioc.getLogger().trace("Attempting to load [{0}]".format(doc), trace=True) with open(doc, 'rb') as current_file: content = current_file.read() if isinstance(content, bytes): content = content.decode() try: intermediate.append(json.loads(content)) self.ioc.getLogger().trace( "Successfully loaded [{0}]".format(doc), trace=True) except ValueError: self.ioc.getLogger().error("Failed to load [{0}]".format(doc), trace=True, notify=False) continue self.ioc.getLogger().trace( "total documents returned from fs [{0}]".format(len(intermediate)), trace=True) return intermediate def load_from_mongo(self): """Returns all active configurations from the mongo collection Configuration Structure of Configuration expected in Mongo:: { "name": String, "job": String, "exe_env": String, # <-- If not provided will be default as 'general' "active": Boolean, # <-- set to true to load configuration "type": "prototype_config", # <-- MUST BE THIS VALUE; For it is the config type :) "source": String, "logic": { # I need to be the logical blocks for Detection } } Returns: list of dict: Configurations """ self.ioc.getLogger().trace("Loading Configurations from mongo", trace=True) mConf = [] for conf in self.ioc.getCollection('Configuration').find({ 'active': True, 'type': 'prototype_config' }): mConf.append(dict(conf)) self.ioc.getLogger().trace( "total documents returned from mongo [{0}]".format(len(mConf)), trace=True) return mConf def validate_config_list(self, configs): """Validates a configuration List Args: configs (list[dict]): Configuration List Returns: list: The Valid configurations """ final = [] self.ioc.getLogger().trace( "Total configurations to validate [{0}]".format(len(configs))) for conf in configs: if self.validate_config(conf): final.append(conf) return final def validate_config(self, config): """Validates a configuration The default JSON Schema is this:: { "name": String, "job": String, "exe_env": String, # <-- If not provided will be default as 'general' "source": String, "logic": { # I need to be the logical blocks for Detection } } Args: config (dict): Configuration to validate Returns: bool: If it is a valid configuration """ self.ioc.getLogger().trace( "Configuration to be validated: [{0}]".format(config), trace=True) if not isinstance(config, dict): self.ioc.getLogger().error( "Configuration Validation Failed! Not of Type Dict::Got [{0}]". format(str(type(config))), trace=True, notify=False) if config.get('name'): if not isinstance(config.get('name'), str): config['name'] = str(config.get('name')) else: self.ioc.getLogger().error( "Configuration does not have valid name field", trace=True, notify=False) return False if config.get('job'): if not isinstance(config.get('job'), str): config['job'] = str(config.get('job')) else: self.ioc.getLogger().error( "Configuration does not have valid job field", trace=True, notify=False) return False if config.get('source'): if not isinstance(config.get('source'), str): config['source'] = str(config.get('source')) else: self.ioc.getLogger().error( "Configuration does not have valid source field", trace=True, notify=False) return False if not isinstance(config.get('logic'), dict): self.ioc.getLogger().error( "Configuration does not have valid logic field", trace=True, notify=False) return False if not config.get('logic'): # empty dictionary check AKA no logical blocks return False for key, params in config.get('logic').items(): if not isinstance(params, list): self.ioc.getLogger().error( "Configuration logic field was not list!", trace=True, notify=False) return False for block in params: if not isinstance(block, dict): self.ioc.getLogger().error( "Configuration logical block was not dict", trace=True, notify=False) return False return True
def test_all_load_bad(self): ioc = GreaseContainer() # clean up for root, dirnames, filenames in os.walk(ioc.getConfig().get('Configuration', 'dir')): for filename in fnmatch.filter(filenames, '*.config.json'): self.assertIsNone(os.remove(os.path.join(root, filename))) # clean up for root, dirnames, filenames in os.walk(pkg_resources.resource_filename('tgt_grease.enterprise.Model', 'config/')): for filename in fnmatch.filter(filenames, '*.config.json'): self.assertIsNone(os.remove(os.path.join(root, filename))) configList = [ { "name": "test1", "job": "fakeJob", "exe_env": "windows", "source": "swapi", "logic": { "regex": [ { "field": "character", "pattern": ".*skywalker.*" } ] } }, { "name": "badtest1", "exe_env": "windows", "source": "stackOverflow", "logic": { "regex": [ { "field": "character", "pattern": ".*skywalker.*" } ] } }, { "name": "test3", "job": "fakeJob", "exe_env": "windows", "source": "Google", "logic": { "regex": [ { "field": "character", "pattern": ".*skywalker.*" } ], "exists": [ { "field": "var" } ] } } ] GoodConfigList = [ { "name": "test1", "job": "fakeJob", "exe_env": "windows", "source": "swapi", "logic": { "regex": [ { "field": "character", "pattern": ".*skywalker.*" } ] } }, { "name": "test3", "job": "fakeJob", "exe_env": "windows", "source": "Google", "logic": { "regex": [ { "field": "character", "pattern": ".*skywalker.*" } ], "exists": [ { "field": "var" } ] } } ] i = 0 length = len(configList) - 1 while i <= length: if i == 0: with open(ioc.getConfig().get('Configuration', 'dir') + 'conf{0}.config.json'.format(i), 'w') as fil: fil.write(json.dumps(configList[i], indent=4)) if i == 1: with open(pkg_resources.resource_filename('tgt_grease.enterprise.Model', 'config/') + 'conf{0}.config.json'.format(i), 'w') as fil: fil.write(json.dumps(configList[i], indent=4)) if i == 2: ioc.getCollection('Configuration').insert_one(configList[i]) i += 1 ioc.getCollection('Configuration').update_many({}, {'$set': {'active': True, 'type': 'prototype_config'}}) # sleep because travis is slow time.sleep(1.5) conf = PrototypeConfig(ioc) conf.load(reloadConf=True) self.assertEqual(len(conf.getConfiguration().get('configuration').get('mongo')), 1) self.assertEqual(len(conf.getConfiguration().get('configuration').get('pkg')), 0) self.assertEqual(len(conf.getConfiguration().get('configuration').get('fs')), 1) self.assertEqual(len(conf.getConfiguration().get('raw')), len(GoodConfigList)) self.assertEqual(len(conf.getConfiguration().get('source').get('swapi')), 1) self.assertEqual(len(conf.getConfiguration().get('source').get('Google')), 1) self.assertEqual(2, len(conf.get_names())) self.assertEqual(len(conf.get_source('Google')), 1) self.assertTrue(isinstance(conf.get_config('test1'), dict)) self.assertTrue(conf.get_config('test1')) # clean up ioc.getCollection('Configuration').drop() for root, dirnames, filenames in os.walk(ioc.getConfig().get('Configuration', 'dir')): for filename in fnmatch.filter(filenames, '*.config.json'): self.assertIsNone(os.remove(os.path.join(root, filename))) # clean up for root, dirnames, filenames in os.walk(pkg_resources.resource_filename('tgt_grease.enterprise.Model', 'config/')): for filename in fnmatch.filter(filenames, '*.config.json'): self.assertIsNone(os.remove(os.path.join(root, filename))) # clear the config conf.load(reloadConf=True)
def test_mongo_load_bad(self): ioc = GreaseContainer() # clean up for root, dirnames, filenames in os.walk(ioc.getConfig().get('Configuration', 'dir')): for filename in fnmatch.filter(filenames, '*.config.json'): self.assertIsNone(os.remove(os.path.join(root, filename))) configList = [ { "name": "test1", "job": "fakeJob", "exe_env": "windows", "source": "swapi", "logic": { "regex": [ { "field": "character", "pattern": ".*skywalker.*" } ] } }, { "name": "badtest1", "exe_env": "windows", "source": "stackOverflow", "logic": { "regex": [ { "field": "character", "pattern": ".*skywalker.*" } ] } }, { "name": "test3", "job": "fakeJob", "exe_env": "windows", "source": "Google", "logic": { "regex": [ { "field": "character", "pattern": ".*skywalker.*" } ], "exists": [ { "field": "var" } ] } } ] GoodConfigList = [ { "name": "test1", "job": "fakeJob", "exe_env": "windows", "source": "swapi", "logic": { "regex": [ { "field": "character", "pattern": ".*skywalker.*" } ] } }, { "name": "test3", "job": "fakeJob", "exe_env": "windows", "source": "Google", "logic": { "regex": [ { "field": "character", "pattern": ".*skywalker.*" } ], "exists": [ { "field": "var" } ] } } ] for conf in configList: ioc.getCollection('Configuration').insert_one(conf) ioc.getCollection('Configuration').update_many({}, {'$set': {'active': True, 'type': 'prototype_config'}}) # sleep because travis is slow sometimes time.sleep(1.5) conf = PrototypeConfig(ioc) conf.load(reloadConf=True) self.assertEqual(len(conf.getConfiguration().get('configuration').get('mongo')), len(GoodConfigList)) self.assertEqual(len(conf.getConfiguration().get('raw')), len(GoodConfigList)) self.assertEqual(len(conf.getConfiguration().get('source').get('swapi')), 1) self.assertEqual(len(conf.getConfiguration().get('source').get('Google')), 1) self.assertEqual(2, len(conf.get_sources())) self.assertEqual(2, len(conf.get_names())) self.assertEqual(len(conf.get_source('Google')), 1) self.assertTrue(isinstance(conf.get_config('test1'), dict)) self.assertTrue(conf.get_config('test1')) # clean up ioc.getCollection('Configuration').drop() # clear the config conf.load(reloadConf=True)
def test_get_collection(self): ioc = GreaseContainer() self.assertTrue(isinstance(ioc.getMongo(), Mongo)) coll = ioc.getCollection('TestCollection') self.assertTrue(isinstance(coll, Collection)) self.assertEqual(coll.name, "TestCollection")
class NodeMonitoring(object): """Monitors cluster nodes for unhealthy state Attributes: ioc (GreaseContainer): IoC Access centralScheduler (Scheduling): Central Scheduling Instance scheduler (Scheduler): Scheduling Model Instance """ def __init__(self, ioc=None): if isinstance(ioc, GreaseContainer): self.ioc = ioc else: self.ioc = GreaseContainer() self.centralScheduler = Scheduling(self.ioc) self.scheduler = Scheduler(self.ioc) def monitor(self): """Monitoring process Returns: bool: If successful monitoring run occurred """ servers = self.getServers() retVal = False self.ioc.getLogger().debug("Total servers to monitor [{0}]".format(len(servers)), trace=True) for server in servers: if self.serverAlive(server.get('_id')): retVal = True continue else: self.ioc.getLogger().warning("Server [{0}] preparing to be culled from pool".format(server.get('_id'))) self.ioc.getLogger().warning("Server [{0}] preparing to be deactivated".format(server.get('_id'))) if not self.deactivateServer(server.get('_id')): self.ioc.getLogger().error( "Failed deactivating server [{0}]".format(server.get('_id')) ) retVal = False break self.ioc.getLogger().warning( "Server [{0}] preparing to reallocate detect jobs".format(server.get('_id')) ) if not self.rescheduleDetectJobs(server.get('_id')): self.ioc.getLogger().error( "Failed rescheduling detect jobs [{0}]".format(server.get('_id')) ) retVal = False break self.ioc.getLogger().warning( "Server [{0}] preparing to reallocate schedule jobs".format(server.get('_id')) ) if not self.rescheduleScheduleJobs(server.get('_id')): self.ioc.getLogger().error( "Failed rescheduling detect jobs [{0}]".format(server.get('_id')) ) retVal = False break self.ioc.getLogger().warning( "Server [{0}] preparing to reallocate jobs".format(server.get('_id')) ) if not self.rescheduleJobs(server.get('_id')): self.ioc.getLogger().error( "Failed rescheduling detect jobs [{0}]".format(server.get('_id')) ) retVal = False break return retVal def scanComplete(self): """Enters a completed source so that this local server is alive next run This method is so that the server's 'heart' beats after each run. It will insert a completed SourceData document and increments the job counter in the JobServer Document Returns: None: Writes a MongoDB Document """ self.ioc.getCollection('SourceData').insert_one({ 'grease_data': { 'sourcing': { 'server': ObjectId(self.ioc.getConfig().NodeIdentity) }, 'detection': { 'server': ObjectId(self.ioc.getConfig().NodeIdentity), 'start': datetime.datetime.utcnow(), 'end': datetime.datetime.utcnow(), 'detection': {} }, 'scheduling': { 'server': ObjectId(self.ioc.getConfig().NodeIdentity), 'start': datetime.datetime.utcnow(), 'end': datetime.datetime.utcnow() }, 'execution': { 'server': ObjectId(self.ioc.getConfig().NodeIdentity), 'assignmentTime': datetime.datetime.utcnow(), 'completeTime': datetime.datetime.utcnow(), 'returnData': {}, 'executionSuccess': True, 'commandSuccess': True, 'failures': 0 } }, 'source': 'grease_internal_node_monitoring', 'configuration': None, 'data': {}, 'createTime': datetime.datetime.utcnow(), 'expiry': Deduplication.generate_max_expiry_time(1) }) server = self.ioc.getCollection('JobServer').find_one({'_id': ObjectId(self.ioc.getConfig().NodeIdentity)}) if not server: self.ioc.getLogger().critical( "Failed to find server [{0}] after monitoring occurred!".format(self.ioc.getConfig().NodeIdentity) ) self.ioc.getCollection('JobServer').update_one({ '_id': ObjectId(self.ioc.getConfig().NodeIdentity)}, {'$set': {'jobs': dict(server).get('jobs', 0) + 1}} ) def getServers(self): """Returns the servers to be monitored this cycle Returns: list[dict]: List of servers """ final = [] servers = self.ioc.getCollection('JobServer').find({'active': True}) for server in servers: final.append(dict(server)) return final def serverAlive(self, serverId): """Checks to see if server is alive This method checks if the serverID exists in the collection and determines if it's execution number has changed recently. If it is a newly configured node it will be added to the monitoring collection Args: serverId (str): ObjectId of server Returns: bool: If server is alive """ # Server Health Collection coll = self.ioc.getCollection('ServerHealth') Server = coll.find_one({'server': ObjectId(serverId)}) if Server: # We have a server already in the system serverStats = self.ioc.getCollection('JobServer').find_one({'_id': ObjectId(serverId)}) if serverStats: # compare previous results to see if there has been change if dict(Server).get('jobs', 0) < dict(serverStats).get('jobs', 0): # Job Server Numbers have changed coll.update_one( {'_id': Server['_id']}, { '$set': { 'jobs': dict(serverStats).get('jobs', 0), 'checkTime': datetime.datetime.utcnow() } } ) self.ioc.getLogger().trace("JobServer [{0}] is alive".format(serverId), trace=True) return True else: if dict(Server).get('checkTime', datetime.datetime.utcnow()) < \ datetime.datetime.utcnow() - datetime.timedelta(minutes=10): # server has aged out self.ioc.getLogger().trace( "JobServer [{0}] is not alive; Timestamp has not changed in ten minutes".format(serverId), trace=True ) return False else: # server is in a degraded state self.ioc.getLogger().warning("JobServer [{0}] is degraded!".format(serverId), trace=True) return True else: # Failed to find server in JobServer collection self.ioc.getLogger().error("JobServer not found during node monitoring! [{0}]".format(serverId)) return False else: # we have a new server serverStats = self.ioc.getCollection('JobServer').find_one({'_id': ObjectId(serverId)}) if serverStats: coll.insert_one( { 'server': ObjectId(serverId), 'jobs': dict(serverStats).get('jobs', 0), 'checkTime': datetime.datetime.utcnow() } ) self.ioc.getLogger().info("New JobServer persisted in monitoring [{0}]".format(serverId)) return True else: # Failed to find server in JobServer collection self.ioc.getLogger().error("New JobServer not found during node monitoring! [{0}]".format(serverId)) return False def deactivateServer(self, serverId): """deactivates server from pool Args: serverId (str): ObjectId to deactivate Returns: bool: If deactivation is successful """ if self.ioc.getCollection('JobServer').update_one( {'_id': ObjectId(serverId)}, { '$set': { 'active': False } } ).modified_count < 1: self.ioc.getLogger().warning("Server [{0}] failed to be deactivated".format(serverId)) return False else: self.ioc.getLogger().warning("Server [{0}] deactivated".format(serverId)) return True def rescheduleDetectJobs(self, serverId): """Reschedules any detection jobs Args: serverId (str): Server ObjectId Returns: bool: rescheduling success """ retval = True server = self.ioc.getCollection('JobServer').find_one({'_id': ObjectId(serverId)}) if not server: self.ioc.getLogger().error( "Failed to load server details while trying to reschedule detection [{0}]".format(serverId) ) return False for job in self.ioc.getCollection('SourceData').find( { 'grease_data.detection.server': ObjectId(serverId), 'grease_data.detection.start': None, 'grease_data.detection.end': None } ): job = dict(job) if not self.centralScheduler.scheduleDetection(job.get('source'), job.get('configuration'), [job]): retval = False break else: self.ioc.getCollection('JobServer').update_one( {'_id': ObjectId(serverId)}, { '$set': { 'jobs': dict(server).get('jobs', 0) - 1 } } ) return retval def rescheduleScheduleJobs(self, serverId): """Reschedules any detection jobs Args: serverId (str): Server ObjectId Returns: bool: rescheduling success """ retval = True server = self.ioc.getCollection('JobServer').find_one({'_id': ObjectId(serverId)}) if not server: self.ioc.getLogger().error( "Failed to load server details while trying to reschedule schedules [{0}]".format(serverId) ) return False for job in self.ioc.getCollection('SourceData').find( { 'grease_data.scheduling.server': ObjectId(serverId), 'grease_data.scheduling.start': None, 'grease_data.scheduling.end': None } ): job = dict(job) if not self.centralScheduler.scheduleScheduling(job.get('_id')): retval = False break else: self.ioc.getCollection('JobServer').update_one( {'_id': ObjectId(serverId)}, { '$set': { 'jobs': dict(server).get('jobs', 0) - 1 } } ) return retval def rescheduleJobs(self, serverId): """Reschedules any detection jobs Args: serverId (str): Server ObjectId Returns: bool: rescheduling success """ retval = True server = self.ioc.getCollection('JobServer').find_one({'_id': ObjectId(serverId)}) if not server: self.ioc.getLogger().error( "Failed to load server details while trying to reschedule schedules [{0}]".format(serverId) ) return False for job in self.ioc.getCollection('SourceData').find( { 'grease_data.execution.server': ObjectId(serverId), 'grease_data.execution.commandSuccess': False, 'grease_data.execution.executionSuccess': False, 'grease_data.execution.failures': {'$lt': 6} } ): job = dict(job) if not self.scheduler.schedule(job): retval = False break else: self.ioc.getCollection('JobServer').update_one( {'_id': ObjectId(serverId)}, { '$set': { 'jobs': dict(server).get('jobs', 0) - 1 } } ) return retval
class Scheduling(object): """Central scheduling class for GREASE This class routes data to nodes within GREASE Attributes: ioc (GreaseContainer): IoC access for DeDuplication """ def __init__(self, ioc=None): if isinstance(ioc, GreaseContainer): self.ioc = ioc else: self.ioc = GreaseContainer() self.ioc.ensureRegistration() def scheduleDetection(self, source, configName, data): """Schedule a Source Parse to detection This method will take a list of single dimension dictionaries and schedule them for detection Args: source (str): Name of the source configName (str): Configuration Data was sourced from data (list[dict]): Data to be scheduled for detection Returns: bool: Scheduling success """ if len(data) is 0 or not isinstance(data, list): self.ioc.getLogger().trace( "Data provided empty or is not type list type: [{0}] len: [{1}]".format(str(type(data)), len(data)), trace=True ) return False self.ioc.getLogger().trace("Preparing to schedule [{0}] source objects".format(len(data)), trace=True) sourceCollect = self.ioc.getCollection('SourceData') jServerCollect = self.ioc.getCollection('JobServer') # begin scheduling loop of each block for elem in data: if not isinstance(elem, dict): self.ioc.getLogger().warning( "Element from data not of type dict! Got [{0}] DROPPED".format(str(type(elem))), notify=False ) continue server, jobCount = self.determineDetectionServer() if server: sourceCollect.insert_one({ 'grease_data': { 'sourcing': { 'server': ObjectId(self.ioc.getConfig().NodeIdentity) }, 'detection': { 'server': ObjectId(server), 'start': None, 'end': None, 'detection': {} }, 'scheduling': { 'server': None, 'start': None, 'end': None }, 'execution': { 'server': None, 'assignmentTime': None, 'completeTime': None, 'returnData': {}, 'executionSuccess': False, 'commandSuccess': False, 'failures': 0 } }, 'source': str(source), 'configuration': str(configName), 'data': elem, 'createTime': datetime.datetime.utcnow(), 'expiry': Deduplication.generate_max_expiry_time(1) }) jServerCollect.update_one({ '_id': ObjectId(server)}, {'$set': {'jobs': int(jobCount) + 1}} ) else: self.ioc.getLogger().warning( "Failed to find detection server for data object from source [{0}]; DROPPED".format(source), notify=False ) self.ioc.getLogger().warning( "Detection scheduling failed. Could not find detection server", notify=False ) return False return True def scheduleScheduling(self, objectId): """Schedule a source for job scheduling This method schedules a source for job scheduling Args: objectId (str): MongoDB ObjectId to schedule Returns: bool: If scheduling was successful """ server, jobCount = self.determineSchedulingServer() if not server: self.ioc.getLogger().error("Failed to find scheduling server", notify=False) return False self.ioc.getCollection('SourceData').update_one( {'_id': ObjectId(objectId)}, { '$set': { 'grease_data.scheduling.server': ObjectId(server), 'grease_data.scheduling.start': None, 'grease_data.scheduling.end': None } } ) self.ioc.getCollection('SourceData').update_one({ '_id': ObjectId(server)}, {'$set': {'jobs': int(jobCount) + 1}} ) return True def determineDetectionServer(self): """Determines detection server to use Finds the detection server available for a new detection job Returns: tuple: MongoDB Object ID of server & current job count """ result = self.ioc.getCollection('JobServer').find({ 'active': True, 'prototypes': 'detect' }).sort('jobs', pymongo.ASCENDING).limit(1) if result.count(): return str(result[0]['_id']), int(result[0]['jobs']) else: return "", 0 def determineSchedulingServer(self): """Determines scheduling server to use Finds the scheduling server available for a new scheduling job Returns: tuple: MongoDB Object ID of server & current job count """ result = self.ioc.getCollection('JobServer').find({ 'active': True, 'prototypes': 'schedule' }).sort('jobs', pymongo.DESCENDING).limit(1) if result.count(): return str(result[0]['_id']), int(result[0]['jobs']) else: return "", 0 def determineExecutionServer(self, role): """Determines execution server to use Finds the execution server available for a new execution job Returns: str: MongoDB Object ID of server; if one cannot be found then string will be empty """ result = self.ioc.getCollection('JobServer').find({ 'active': True, 'roles': str(role) }).sort('jobs', pymongo.DESCENDING).limit(1) if result.count(): return str(result[0]['_id']), int(result[0]['jobs']) else: return "", 0
def test_real(self): ############################################# # SETUP UP TIME ############################################# ioc = GreaseContainer() pConf = PrototypeConfig(ioc) ioc.ensureRegistration() ioc.getCollection('JobServer').update_one( {'_id': ObjectId(ioc.getConfig().NodeIdentity)}, {'$set': { 'prototypes': ['scan', 'detect', 'schedule'] }}) ioc.getCollection('Configuration').insert_one({ 'active': True, 'type': 'prototype_config', "name": "full_stack_test", "job": "help", "exe_env": "general", "source": "url_source", "url": ['http://google.com'], "logic": { "Regex": [{ "field": "url", "pattern": ".*", 'variable': True, 'variable_name': 'url' }], 'Range': [{ 'field': 'status_code', 'min': 199, 'max': 201 }] }, 'constants': { 'test': 'ver' } }) pConf.load(reloadConf=True) ############################################# # EXECUTE SCANNING ############################################# Scanner = scan() Scanner.ioc.getLogger().getConfig().set('verbose', True, 'Logging') Scanner.ioc.getLogger().getConfig().set('trace', True, 'Logging') Scanner.ioc.getLogger().getConfig().set('config', 'full_stack_test', 'Sourcing') self.assertTrue(Scanner.execute({'loop': 1})) ############################################# # ASSERT SCANNING ############################################# self.assertTrue( ioc.getCollection('SourceData').find_one({ 'grease_data.sourcing.server': ObjectId(ioc.getConfig().NodeIdentity), 'grease_data.detection.server': ObjectId(ioc.getConfig().NodeIdentity), 'grease_data.detection.start': None, 'grease_data.detection.end': None })) ############################################# # EXECUTE DETECTION ############################################# Detect = detect() Detect.ioc.getLogger().getConfig().set('verbose', True, 'Logging') Detect.ioc.getLogger().getConfig().set('trace', True, 'Logging') Detect.ioc.getLogger().getConfig().set('config', 'full_stack_test', 'Sourcing') self.assertTrue(Detect.execute({'loop': 1})) ############################################# # ASSERT DETECTION ############################################# self.assertTrue( ioc.getCollection('SourceData').find_one({ 'grease_data.sourcing.server': ObjectId(ioc.getConfig().NodeIdentity), 'grease_data.detection.server': ObjectId(ioc.getConfig().NodeIdentity), 'grease_data.scheduling.server': ObjectId(ioc.getConfig().NodeIdentity), 'grease_data.scheduling.start': None, 'grease_data.scheduling.end': None })) ############################################# # EXECUTE SCHEDULING ############################################# Scheduling = schedule() Scheduling.ioc.getLogger().getConfig().set('verbose', True, 'Logging') Scheduling.ioc.getLogger().getConfig().set('trace', True, 'Logging') Scheduling.ioc.getLogger().getConfig().set('config', 'full_stack_test', 'Sourcing') self.assertTrue(Scheduling.execute({'loop': 1})) ############################################# # ASSERT SCHEDULING ############################################# self.assertTrue( ioc.getCollection('SourceData').find_one({ 'grease_data.sourcing.server': ObjectId(ioc.getConfig().NodeIdentity), 'grease_data.detection.server': ObjectId(ioc.getConfig().NodeIdentity), 'grease_data.scheduling.server': ObjectId(ioc.getConfig().NodeIdentity), 'grease_data.execution.server': ObjectId(ioc.getConfig().NodeIdentity), 'grease_data.execution.start': None, 'grease_data.execution.end': None })) ############################################# # EXECUTE JOBS ############################################# ioc.getCollection('JobServer').update_one( {'_id': ObjectId(ioc.getConfig().NodeIdentity)}, {'$set': { 'prototypes': [] }}) Daemon = DaemonProcess(ioc) Daemon.ioc.getLogger().getConfig().set('verbose', True, 'Logging') Daemon.ioc.getLogger().getConfig().set('trace', True, 'Logging') Daemon.ioc.getLogger().getConfig().set('config', 'full_stack_test', 'Sourcing') self.assertTrue(Daemon.server()) self.assertTrue(Daemon.drain_jobs(ioc.getCollection('SourceData'))) ############################################# # ASSERT JOB EXECUTION ############################################# # sleep a few for seconds to let help complete time.sleep(5) self.assertTrue( ioc.getCollection('SourceData').find_one({ 'grease_data.sourcing.server': ObjectId(ioc.getConfig().NodeIdentity), 'grease_data.detection.server': ObjectId(ioc.getConfig().NodeIdentity), 'grease_data.scheduling.server': ObjectId(ioc.getConfig().NodeIdentity), 'grease_data.execution.server': ObjectId(ioc.getConfig().NodeIdentity), 'grease_data.execution.commandSuccess': True, 'grease_data.execution.executionSuccess': True })) ############################################# # CLEAN UP TIME ############################################# ioc.getCollection('JobServer').update_one( {'_id': ObjectId(ioc.getConfig().NodeIdentity)}, {'$set': { 'prototypes': [] }}) ioc.getCollection('Configuration').drop() ioc.getCollection('SourceData').drop() ioc.getCollection('DeDup_Sourcing').drop() pConf.load(reloadConf=True)