示例#1
0
	def process_incident(self, incident):
		"""
		get the incident time from the db and gathers all features

		INPUT:
			log_files: the logs that we went through it.
		"""
		if(incident is None):
			return 

		ip_sieve = IPSieve()
		ip_records = {}		
		banned_ips = []

		if(incident["file_name"] is None) or (len(incident["file_name"]) == 0):
			# get the logs from ES
			# get the logs from ES
			banned_ips = self.es_handler.get_banjax(incident['start'], incident['stop'], incident['target'])
			ats_records = self.es_handler.get(incident['start'], incident['stop'], incident['target'])

			# calculate IP dictionary with ATS records
			ip_records = ip_sieve.process_ats_records(ats_records)
		else:
			# read the sessions from the log file
			ip_sieve.add_log_file(incident["file_name"])
			ip_records = ip_sieve.parse_log("nginx")
	
		# calculate features
		ip_feature_db = {}

		#At this stage it is only a peliminary list we might lose features
		#due to 0 variance
		self._active_feature_list = []
		#do a dry run on all features just to gather the indeces of all available
		#features
		for CurentFeature in Learn2BanFeature.__subclasses__():
			f = CurentFeature(ip_records, ip_feature_db)
			self._active_feature_list.append(f._FEATURE_INDEX)

		for CurentFeature in Learn2BanFeature.__subclasses__():
			f = CurentFeature(ip_records, ip_feature_db)
			#logging.info("Computing feature %i..."% f._FEATURE_INDEX)
			print "Computing feature %i..."% f._FEATURE_INDEX
			f.compute()

		# post process the features
		ip_feature_db = self.bothound_tools.post_process(ip_feature_db)

		# delete the old sessions for thie incidend
		self.bothound_tools.delete_sessions(incident['id'])

		#print ip_feature_db
		self.bothound_tools.add_sessions(incident['id'], ip_feature_db, banned_ips)
		self.bothound_tools.set_incident_process(incident['id'], False)
		print "Incident {} processed.".format(incident['id'])
		return ip_feature_db
示例#2
0
    def test_all_features(self):
        for cur_log_file in self.log_files:
            self.test_ip_sieve.add_log_file(cur_log_file)
            self.test_ip_sieve.parse_log()

            for CurrentFeatureType in Learn2BanFeature.__subclasses__():
                cur_feature_tester = CurrentFeatureType(self.test_ip_sieve, self.test_ip_feature_db)
                cur_feature_tester.compute()

        print self.test_ip_feature_db
 def _build_available_feature_list(self):
     """
     Search all the available feature class and stored them
     in a dictionary indexed by their names
     """
     self._available_features={}
     self._feature_list = list()
     for CurrentFeatureType in Learn2BanFeature.__subclasses__():
         self._available_features[CurrentFeatureType.__name__] = CurrentFeatureType
         self._feature_list.append(CurrentFeatureType.__name__)
示例#4
0
    def test_all_features(self):
        for cur_log_file in self.log_files:
            self.test_ip_sieve.add_log_file(cur_log_file)
            self.test_ip_sieve.parse_log()

            for CurrentFeatureType in Learn2BanFeature.__subclasses__():
                cur_feature_tester = CurrentFeatureType(
                    self.test_ip_sieve, self.test_ip_feature_db)
                cur_feature_tester.compute()

        print self.test_ip_feature_db
示例#5
0
 def _build_available_feature_list(self):
     """
     Search all the available feature class and stored them
     in a dictionary indexed by their names
     """
     self._available_features = {}
     self._feature_list = list()
     for CurrentFeatureType in Learn2BanFeature.__subclasses__():
         self._available_features[
             CurrentFeatureType.__name__] = CurrentFeatureType
         self._feature_list.append(CurrentFeatureType.__name__)
    def gather_all_features(self, log_files):
        """
        gathers all features

        INPUT:
            log_files: the logs that we went through it.
        """
        for cur_log_file in log_files:
            self.ip_sieve.add_log_file(cur_log_file)
            self.ip_sieve.parse_log()
            for CurrentFeatureType in Learn2BanFeature.__subclasses__():
                cur_feature_tester = CurrentFeatureType(self.ip_sieve, self.ip_feature_db)
                cur_feature_tester.compute()

        return self.ip_feature_db
示例#7
0
    def gather_all_features(self, log_files):
        """
        gathers all features

        INPUT:
            log_files: the logs that we went through it.
        """
        for cur_log_file in log_files:
            self.ip_sieve.add_log_file(cur_log_file)
            self.ip_sieve.parse_log()
            for CurrentFeatureType in Learn2BanFeature.__subclasses__():
                cur_feature_tester = CurrentFeatureType(
                    self.ip_sieve, self.ip_feature_db)
                cur_feature_tester.compute()

        return self.ip_feature_db
示例#8
0
    def _process_logs(self):
        """
        get the log name from db and gathers all features

        INPUT:
            log_files: the logs that we went through it.
        """
        #this is not a oop way of retrieving the logs but I think we are
        #avoiding db access in other classes beside l2btools
        cur_experiment_logs = self.l2btools.retrieve_experiment_logs(self.id)

        #if there is no log associated to this experiment then there is nothing
        #to do
        if len(cur_experiment_logs) == 0:
            logging.info("Giving up on experiment %i with no training log" %
                         self.expr_dict['id'])
            return

        #log id is needed to be send to the trainer so the the trainer
        #knows which regex is detecting the bots for which log
        self.trainer.add_malicious_history_log_files([
            (cur_log_info['log_id'], cur_log_info['file_name'])
            for cur_log_info in cur_experiment_logs
        ])

        #extracitng the filenames
        #Get IP Features
        log_filenames = tuple(cur_log['file_name']
                              for cur_log in cur_experiment_logs)
        #At this stage it is only a peliminary list we might lose features
        #due to 0 variance
        self._active_feature_list = []
        #do a dry run on all features just to gather the indeces of all available
        #features
        for CurrentFeatureType in Learn2BanFeature.__subclasses__():
            cur_feature_tester = CurrentFeatureType(self.ip_sieve,
                                                    self.ip_feature_db)
            self._active_feature_list.append(cur_feature_tester._FEATURE_INDEX)

        for cur_log_file in log_filenames:  #in theory it might be more memory efficient
            #to crunch the logs one by one but python is quite disappointing in memory
            #management
            try:
                self.ip_sieve.add_log_file(cur_log_file)
                self.ip_sieve.parse_log()
            except IOError:
                print "Unable to read ", cur_log_file, "skipping..."

        for CurrentFeatureType in Learn2BanFeature.__subclasses__():
            cur_feature_tester = CurrentFeatureType(self.ip_sieve,
                                                    self.ip_feature_db)
            logging.info("Computing feature %i..." %
                         cur_feature_tester._FEATURE_INDEX)
            cur_feature_tester.compute()

            # we have memory problem here :(
            # import objgraph
            # objgraph.show_refs([self.ip_sieve._ordered_records], filename='ips-graph.png')

        del self.ip_sieve._ordered_records
        del self.ip_sieve

        #f**k python with not letting the memory released
        # import gc
        # gc.collect()
        # print gc.garbage()

        self.trainer.add_to_sample(self.ip_feature_db)

        #we store the non-normailized vectors in a json file
        jsonized_ip_feature_db = {}
        for k, v in self.ip_feature_db.items():
            jsonized_ip_feature_db[str(k)] = v
        import json
        with open(self.base_analyse_log_file + ".prenormal_ip_feature_db.json",
                  "w") as ip_feature_file:
            json.dump(jsonized_ip_feature_db, ip_feature_file)

        del self.ip_feature_db
        del jsonized_ip_feature_db

        #Normalise training set, normalisation should happen after all
        #sample is gathered
        self.trainer.normalise(self.expr_dict['norm_mode'])
示例#9
0
    def process_incident(self, incident):
        """
		get the incident time from the db and gathers all features

		INPUT:
			log_files: the logs that we went through it.
		"""
        if (incident is None):
            return

        ip_sieve = IPSieve()
        ip_records = {}
        banned_ips = []

        if (incident["file_name"] is None) or (len(incident["file_name"])
                                               == 0):
            # get the logs from ES
            # get the logs from ES
            banned_ips = self.es_handler.get_banjax(incident['start'],
                                                    incident['stop'],
                                                    incident['target'])
            ats_records = self.es_handler.get(incident['start'],
                                              incident['stop'],
                                              incident['target'])

            # calculate IP dictionary with ATS records
            ip_records = ip_sieve.process_ats_records(ats_records)
        else:
            # read the sessions from the log file
            ip_sieve.add_log_file(incident["file_name"])
            ip_records = ip_sieve.parse_log("nginx")

        # calculate features
        ip_feature_db = {}

        #At this stage it is only a peliminary list we might lose features
        #due to 0 variance
        self._active_feature_list = []
        #do a dry run on all features just to gather the indeces of all available
        #features
        for CurentFeature in Learn2BanFeature.__subclasses__():
            f = CurentFeature(ip_records, ip_feature_db)
            self._active_feature_list.append(f._FEATURE_INDEX)

        for CurentFeature in Learn2BanFeature.__subclasses__():
            f = CurentFeature(ip_records, ip_feature_db)
            #logging.info("Computing feature %i..."% f._FEATURE_INDEX)
            print "Computing feature %i..." % f._FEATURE_INDEX
            f.compute()

        # post process the features
        ip_feature_db = self.bothound_tools.post_process(ip_feature_db)

        # delete the old sessions for thie incidend
        self.bothound_tools.delete_sessions(incident['id'])

        #print ip_feature_db
        self.bothound_tools.add_sessions(incident['id'], ip_feature_db,
                                         banned_ips)
        self.bothound_tools.set_incident_process(incident['id'], False)
        print "Incident {} processed.".format(incident['id'])
        return ip_feature_db