Exemplo n.º 1
0
    def __init__(self,
                 mode=0,
                 offline=0,
                 dump_testing='testing_fingerprints.csv',
                 dump_training='training_fingerprints.csv'):
        # 0 for Training mode - 1 for Testing mode
        if (mode != 0 and mode != 1) or (offline != 0 and offline != 1):
            raise ValueError(
                'The mode value is not valid. Choose between 1 or 0.')
        self.hosts_clusters = {}
        self.label_generator = LabelGenerator()
        self.fin_generator = FingerprintGenerator()
        self.fin_manager = FingerprintManager()
        self.detector = DetectionModule()
        self.mode = mode
        self.alerts = []
        self.time_start = None
        self.time_current = None
        self.offline = offline

        # Files for offline dumps
        self.dump_testing = dump_testing
        self.dump_training = dump_training

        # Known browsers
        self.browser_user_agents = set()

        # Referrer graphs per user_agent
        self.referrerGraphs = dict()
Exemplo n.º 2
0
 def __init__(self, folder_path, dump_fingerprint_to_timestamps_training = 'training_fingerprint_to_timestamps.txt',
              dump_fingerprint_to_timestamps_testing  = 'testing_fingerprint_to_timestamps.txt'):
     self.files = glob.glob(folder_path + "*.csv")
     self.files = sorted(self.files, key=lambda tmp: tmp[84:])
     self.training_manager = FingerprintManager()
     self.testing_manager = FingerprintManager()
     self.detector = DetectionModule()
     self.dump_fingerprint_to_timestamps_training = dump_fingerprint_to_timestamps_training
     self.dump_fingerprint_to_timestamps_testing = dump_fingerprint_to_timestamps_testing
     self.fingerprint_to_timestamps_testing = {}
     self.fingerprint_to_timestamps_training = {}
Exemplo n.º 3
0
class OfflineDetector:
    def __init__(self, folder_path, dump_fingerprint_to_timestamps_training = 'training_fingerprint_to_timestamps.txt',
                 dump_fingerprint_to_timestamps_testing  = 'testing_fingerprint_to_timestamps.txt'):
        self.files = glob.glob(folder_path + "*.csv")
        self.files = sorted(self.files, key=lambda tmp: tmp[84:])
        self.training_manager = FingerprintManager()
        self.testing_manager = FingerprintManager()
        self.detector = DetectionModule()
        self.dump_fingerprint_to_timestamps_training = dump_fingerprint_to_timestamps_training
        self.dump_fingerprint_to_timestamps_testing = dump_fingerprint_to_timestamps_testing
        self.fingerprint_to_timestamps_testing = {}
        self.fingerprint_to_timestamps_training = {}

    def _load_from_csv_2(self):
        """
        This method loads fingerprints from a .csv file, but only those flagged for training"
        """
        for f in self.files:
            if "training" in f:
                self.training_manager.read_from_file(f)
                print "" + f + " has been loaded for training."
               

    def run_detection_2(self):
        """
        This method runs the offline detection.

        Fingerprints were previously dumped into csv files. In the offline analysis are loaded
        from the csv files, and then compared.

        Training data is first loaded. The each testing file is analyzed.
        """
        alerts = []
        benign = []
        self._load_from_csv_2()

        print "loading__fingerprint_to_timestamps_training"

        # now load the mapping between fingerprints and timestamps...
        with open(self.dump_fingerprint_to_timestamps_training, 'r') as f:
            self.fingerprint_to_timestamps_training = pickle.load(f)

        print "loading__fingerprint_to_timestamps_testing"

        with open(self.dump_fingerprint_to_timestamps_testing, 'r') as f:
            self.fingerprint_to_timestamps_testing = pickle.load(f)

        print "all_fingerprint_to_timestamp_mappings_loaded"

        all_training_fingerprints = []
        total_files = 0
        total_detected = 0
        
        for h, fingerprints in self.training_manager.hosts_fingerprints.iteritems():
            for f in fingerprints:
                all_training_fingerprints.append(f)
        
        for f in self.files:
            if "testing" in f:
                self.testing_manager.read_from_file(f)
                print "" + f + " has been loaded for testing."
                for host,test_fingerprints in self.testing_manager.hosts_fingerprints.iteritems():
                    total_files += 1
                    detected = False
                    for fingerprint in test_fingerprints:
                        if self.detector.detection(all_training_fingerprints, fingerprint):
                            if not detected:
                                total_detected += 1
                                detected = True
                            alerts.append(fingerprint)
                        else:
                            benign.append(fingerprint)
                    if not detected:
                        print host
                self.testing_manager.hosts_fingerprints = dict()
        # Uncomment if you want to print on how many files at least an alert has been  triggered.
        #print """{}/{} files detected.""".format(total_detected, total_files)

        return alerts, benign, self.fingerprint_to_timestamps_training, self.fingerprint_to_timestamps_testing
Exemplo n.º 4
0
class Aggregator:
    """
    This class is the engine of Decanter. It is responsible of training and testing fingerprints from input data.
    """

    # Timeout used only in testing mode.
    timeout = datetime.timedelta(minutes=10)

    def __init__(
        self,
        mode=0,
        offline=0,
        dump_testing='testing_fingerprints.csv',
        dump_training='training_fingerprints.csv',
        dump_fingerprint_to_timestamps_training='training_fingerprint_to_timestamps.txt',
        dump_fingerprint_to_timestamps_testing='testing_fingerprint_to_timestamps.txt'
    ):
        # 0 for Training mode - 1 for Testing mode
        if (mode != 0 and mode != 1) or (offline != 0 and offline != 1):
            raise ValueError(
                'The mode value is not valid. Choose between 1 or 0.')
        self.hosts_clusters = {}
        self.label_generator = LabelGenerator()
        self.fin_generator = FingerprintGenerator()
        self.fin_manager = FingerprintManager()
        self.detector = DetectionModule()
        self.mode = mode
        self.alerts = []
        self.time_start = None
        self.time_current = None
        self.offline = offline
        self.fingerprint_to_timestamps_testing = {}
        self.fingerprint_to_timestamps_training = {}

        # Files for offline dumps
        self.dump_testing = dump_testing
        self.dump_training = dump_training

        # will be used to store the mapping between fingerprints and timestamps...
        self.dump_fingerprint_to_timestamps_training = dump_fingerprint_to_timestamps_training
        self.dump_fingerprint_to_timestamps_testing = dump_fingerprint_to_timestamps_testing

        # Known browsers
        self.browser_user_agents = set()

        # Referrer graphs per user_agent
        self.referrerGraphs = dict()

    def change_mode(self, mode):
        if mode != 0 and mode != 1:
            raise ValueError('The mode value is not valid')
        self.mode = mode
        if mode == 1:
            print "Aggregator switched to Testing mode."
        else:
            print "Aggregator switched to Training mode."

    def analyze_log(self, data):
        """
            Load and aggregate the HTTP requests from a Dataframe
            
            Parameter
            -------------
            data : pandas Dataframe
        """
        if (self.mode == 0):
            self._training(data)
            return self.fingerprint_to_timestamps_training
        elif (self.mode == 1):
            self._testing(data)
            return self.fingerprint_to_timestamps_testing
        else:
            pass

    def _testing(self, data):

        for row in data.iterrows():

            # Generate HTTP request
            http_data = row[1].to_dict()
            h = HTTPRequest(http_data)

            # Initialize Time
            if self.time_start == None:
                self.time_start = h.ts

            # Aggregate request
            self._insert_http_request(h)

            # Set current time to the current HTTP request timestamp
            self.time_current = h.ts

            # Check if the timeout is expired
            if (self.time_current - self.time_start) > self.timeout:

                # Create and store the fingerprints
                for host in self.hosts_clusters.keys():
                    for app, http_cluster in self.hosts_clusters[
                            host].iteritems():
                        self._create_fingerprints(host, http_cluster)

                # Flush the aggregated HTTP requests and reset the starting time
                self.hosts_clusters.clear()
                self.time_start = None

        # Writing of fingerprints in case the file "ended" and the timeout did not exceed.
        if self.hosts_clusters:
            for host in self.hosts_clusters.keys():
                for app, http_cluster in self.hosts_clusters[host].iteritems():
                    self._create_fingerprints(host, http_cluster)

        self.hosts_clusters.clear()
        self.time_start = None

    def _training(self, data):

        for row in data.iterrows():

            # Generate HTTP request
            http_data = row[1].to_dict()
            h = HTTPRequest(http_data)

            # Aggregate request
            self._insert_http_request(h)

        # Create and store the fingerprints
        for host in self.hosts_clusters.keys():
            for app, http_cluster in self.hosts_clusters[host].iteritems():
                self._create_fingerprints(host, http_cluster)

        # In OFFLINE mode , dump the generated fingerprints in a .csv file.
        if self.offline == 1:
            self.fin_manager.write_to_file(self.dump_training)

        self.hosts_clusters.clear()

    def _create_fingerprints(self, host, http_cluster):
        """
            Extract GET and POST requests for each Cluster of HTTP requests
            
            Parameter
            ----------------
            http_cluster : list of HTTPRequest
            
            Returns
            ----------------
            (get, post) : tuple (list of HTTPRequests, list of HTTPRequests)
        """

        # Removed GET-POST split and replaced with Label_generator
        labels, referrerGraph = self.label_generator.generate_label(
            http_cluster, self.mode, self.browser_user_agents,
            self.referrerGraphs)

        # Training mode

        if self.mode == 0:
            for key, value in labels.items():
                method = key[0]
                label = key[1]
                cluster = value
                self.fin_manager.store(
                    host,
                    self.fin_generator.generate_fingerprint(
                        cluster, method, label,
                        self.fingerprint_to_timestamps_training))

                with open(self.dump_fingerprint_to_timestamps_training,
                          'w') as f:
                    pickle.dump(self.fingerprint_to_timestamps_training, f)

                # If browser, store to known browser user-agents
                if label == "Browser":
                    user_agent = http_cluster[0].header_values.get(
                        'user-agent', None)
                    self.browser_user_agents.add(user_agent)

        # Testing mode
        elif self.mode == 1:

            user_agent = http_cluster[0].header_values.get('user-agent', None)

            self.referrerGraphs[user_agent] = referrerGraph

            for key, value in labels.items():
                method = key[0]
                label = key[1]
                cluster = value
                new_fingerprint = self.fin_generator.generate_fingerprint(
                    cluster, method, label,
                    self.fingerprint_to_timestamps_testing)

                # In OFFLINE mode, dump the generated fingerprints in a .csv file. IN THIS CASE WE APPEND!!!!
                if self.offline == 1:
                    self.fin_manager.write_fingerprint_to_file(
                        self.dump_testing, new_fingerprint, host)

                    with open(self.dump_fingerprint_to_timestamps_testing,
                              'w') as f:
                        pickle.dump(self.fingerprint_to_timestamps_testing, f)

                else:
                    all_training_fingerprints = []
                    for h, fingerprints in self.fin_manager.hosts_fingerprints.iteritems(
                    ):
                        for f in fingerprints:
                            all_training_fingerprints.append(f)

                    if self.detector.detection(all_training_fingerprints,
                                               new_fingerprint):
                        self.alerts.append(new_fingerprint)

        else:
            pass

    def _insert_http_request(self, req):
        """
            Aggregate the HTTP requests per host and user-agent 
            
            Parameter
            -------------------
            req : HTTPRequest object
        """

        # Initialize the clusters for the (previously unseen) host
        if req.orig_ip not in self.hosts_clusters:
            self.hosts_clusters[req.orig_ip] = {}

        # Add a request to the cluster of a known host
        if req.orig_ip in self.hosts_clusters:

            # Create and/or Update a cluster for those requests that DO NOT HAVE a User-Agent
            if 'user-agent' not in req.header_values:
                if 'None' not in self.hosts_clusters[req.orig_ip]:
                    self.hosts_clusters[req.orig_ip]['None'] = [req]
                else:
                    self.hosts_clusters[req.orig_ip]['None'].append(req)

            # Create and/or Update a cluster for those requests that DO HAVE a User-Agent
            else:
                if req.header_values['user-agent'] not in self.hosts_clusters[
                        req.orig_ip]:
                    self.hosts_clusters[req.orig_ip][
                        req.header_values['user-agent']] = [req]
                else:
                    self.hosts_clusters[req.orig_ip][
                        req.header_values['user-agent']].append(req)
Exemplo n.º 5
0
 def __init__(self, folder_path):
     self.files = glob.glob(folder_path + "*.csv")
     self.files = sorted(self.files, key=lambda tmp: tmp[84:])
     self.training_manager = FingerprintManager()
     self.testing_manager = FingerprintManager()
     self.detector = DetectionModule()