def setUp(self):
     self.lock = torch.multiprocessing.get_context('spawn').Lock()
     self.model = LSTMLayer(num_classes=5)
     cardinality = Cardinality(3, "", "")
     cardinality.list_classes = [1,1,1,2,2,3,4,5,6]
     cardinality.counter= {1:10, 2:100, 3:100, 4:100, 6:1000, 5:1000}
     cardinality.compute_position()
     self.result = Result(cardinality)
class UtilTest(unittest.TestCase):
    def setUp(self):
        self.lock = torch.multiprocessing.get_context('spawn').Lock()
        self.model = LSTMLayer(num_classes=5)
        cardinality = Cardinality(3, "", "")
        cardinality.list_classes = [1,1,1,2,2,3,4,5,6]
        cardinality.counter= {1:10, 2:100, 3:100, 4:100, 6:1000, 5:1000}
        cardinality.compute_position()
        self.result = Result(cardinality)


    @patch('os.path.isfile')    
    def test_saver_no_file(self, mock_isfile):
        mock_isfile.return_value = False
        self.saver = Saver("test", "./", 3, self.lock)
        read_data = ""
        mockOpen = mock_open(read_data=read_data)
        with patch('builtins.open', mockOpen):
            self.saver.save(self.model, condition="Test", result= self.result)

    @patch('os.path.isfile')    
    def test_saver_file(self, mock_isfile):
        mock_isfile.return_value = True
        self.saver = Saver("test", "./", 3, self.lock)
        read_data = pickle.dumps({"LSTM": {3:self.model.state_dict()}})
        mockOpen = mock_open(read_data=read_data)
        with patch('builtins.open', mockOpen):
            self.saver.save(self.model, condition="Test", result= self.result)

    @patch('os.path.isfile')    
    def test_saver_file_empty(self, mock_isfile):
        mock_isfile.return_value = True
        self.saver = Saver("test", "./", 3, self.lock)
        read_data = pickle.dumps({})
        mockOpen = mock_open(read_data=read_data)
        with patch('builtins.open', mockOpen):
            self.saver.save(self.model, condition="Test", result=self.result)

    @patch('os.path.isfile')    
    def test_load_file(self, mock_isfile):
        mock_isfile.return_value = True
        self.saver = Saver("test", "./", 3, self.lock)
        read_data = pickle.dumps({"LSTM": {3:self.model.state_dict()}})
        mockOpen = mock_open(read_data=read_data)
        with patch('builtins.open', mockOpen):
            model = self.saver.load(self.model)
            self.assertIsInstance(model, LSTMLayer)

    @patch('os.path.isfile')    
    def test_load_no_file(self, mock_isfile):
        mock_isfile.return_value = False
        self.saver = Saver("test", "./", 3, self.lock)
        read_data = pickle.dumps({"LSTM": {3:self.model.state_dict()}})
        mockOpen = mock_open(read_data=read_data)
        with self.assertRaises(Exception):
            with patch('builtins.open', mockOpen):
                self.saver.load(self.model)
Exemplo n.º 3
0
    def setUp(self):
        self.model = LSTMLayer(num_classes=5)
        self.default_pattern = Pattern(0, [], [])
        self.list_model = {
            1: self.model.state_dict(),
            2: self.model.state_dict(),
            3: self.model.state_dict()
        }
        default_pattern1 = Pattern(0, [], [])
        default_pattern1.id = 1
        default_pattern2 = Pattern(0, [], [])
        default_pattern2.id = 2
        default_pattern3 = Pattern(0, [], [])
        default_pattern3.id = 3
        m = Mock()
        m.side_effect = [default_pattern1, default_pattern2, default_pattern3
                         ] * 30
        # Mock(return_value=self.default_pattern)
        logflow.logsparser.Journal.Journal.find_pattern = m
        #mock_get_pattern.return_value = 1
        read_data = pickle.dumps({
            'word2vec': {
                "1": np.asarray([1] * 20),
                "2": np.asarray([2] * 20),
                "3": np.asarray([3] * 20),
                "4": [4] * 20,
                "5": [5] * 20,
                "6": [6] * 20,
                "7": [7] * 20
            },
            'counter_patterns': {
                1: 100,
                2: 100,
                3: 100,
                4: 100,
                6: 1000,
                5: 1000
            },
            "LSTM": {
                3: self.model.state_dict()
            },
            "dict_patterns": {}
        })

        mockOpen = mock_open(read_data=read_data)
        with patch('builtins.open', mockOpen):
            self.dataset = Dataset(path_model="/",
                                   path_data="/",
                                   name_model="/")
            self.dataset.load_files()

        self.dataset.LSTM = self.list_model
        self.dataset.list_logs = []
        for i in range(30):
            self.dataset.list_logs.append(
                Log("1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123",
                    index_line=i))
Exemplo n.º 4
0
    def load_model(self):
        """Load the learned model from a previous state

        Raises:
            e: file is not found
        """
        self.model = LSTMLayer(num_classes=self.dataset.number_of_classes).to(
            self.device)
        try:
            self.model = self.saver.load(model=self.model)
        except FileNotFoundError as e:
            logger.critical("No such file: " + self.path_model +
                            self.name_dataset + "_model.lf" + ".torch")
            print("Raising: ", e)
            raise e
Exemplo n.º 5
0
 def setUp(self):
     self.list_cardinalities = [
         Cardinality(3, "", ""),
         Cardinality(4, "", ""),
         Cardinality(5, "", "")
     ]
     self.lock = torch.multiprocessing.get_context('spawn').Lock()
     self.model = LSTMLayer(num_classes=5)
Exemplo n.º 6
0
    def save(self, model: LSTMLayer, result: Result, condition="Test"):
        """Save the model

        Args:
            model (LSTMLayer): model to save
            result (Result): result to save
            condition (str): Test or train results to save
        """
        dict_cardinalities_model = {}
        self.lock.acquire()
        if os.path.isfile(self.path):
            with open(self.path, "rb") as output_file:
                logger.info("[" + str(self.cardinality) + "] Loading: " +
                            self.path)
                dict_cardinalities_model = pickle.load(output_file)
        try:
            dict_cardinalities_model["LSTM"][
                self.cardinality] = model.state_dict()
        except:
            dict_cardinalities_model["LSTM"] = {}
            dict_cardinalities_model["LSTM"][
                self.cardinality] = model.state_dict()
        # Keep only the latest version of the results
        try:
            dict_cardinalities_model["Result"]
        except:
            dict_cardinalities_model["Result"] = {}
        try:
            dict_cardinalities_model["Result"][self.cardinality]
        except:
            dict_cardinalities_model["Result"][self.cardinality] = {}
        if condition != "temp":
            dict_cardinalities_model["Result"][
                self.cardinality][condition] = result
        with open(self.path, "wb") as output_file:
            pickle.dump(dict_cardinalities_model, output_file)
        logger.info("[" + str(self.cardinality) + "] Saving: " + self.path)
        self.lock.release()
Exemplo n.º 7
0
    def load(self, model: LSTMLayer) -> LSTMLayer:
        """Load the model. Note that the model must be created before. This function loads only the parameters inside the model.
        
        Args:
            model (LSTMLayer): object to use for loading the model.

        Raises:
            Exception: the file is not found

        Returns:
            LSTMLayer: the loaded model
        """
        if os.path.isfile(self.path):
            self.lock.acquire()
            with open(self.path, "rb") as output_file:
                dict_cardinalities_model = pickle.load(output_file)
            model.load_state_dict(
                dict_cardinalities_model["LSTM"][self.cardinality])
            self.lock.release()
            return model
        else:
            logger.critical("Trying to load an unknown file: " +
                            str(self.path))
            raise Exception("Trying to load an unknown file")
Exemplo n.º 8
0
class UtilTest(unittest.TestCase):
    def setUp(self):
        self.model = LSTMLayer(num_classes=5)
        self.default_pattern = Pattern(0, [], [])
        self.list_model = {
            1: self.model.state_dict(),
            2: self.model.state_dict(),
            3: self.model.state_dict()
        }
        default_pattern1 = Pattern(0, [], [])
        default_pattern1.id = 1
        default_pattern2 = Pattern(0, [], [])
        default_pattern2.id = 2
        default_pattern3 = Pattern(0, [], [])
        default_pattern3.id = 3
        m = Mock()
        m.side_effect = [default_pattern1, default_pattern2, default_pattern3
                         ] * 30
        # Mock(return_value=self.default_pattern)
        logflow.logsparser.Journal.Journal.find_pattern = m
        #mock_get_pattern.return_value = 1
        read_data = pickle.dumps({
            'word2vec': {
                "1": np.asarray([1] * 20),
                "2": np.asarray([2] * 20),
                "3": np.asarray([3] * 20),
                "4": [4] * 20,
                "5": [5] * 20,
                "6": [6] * 20,
                "7": [7] * 20
            },
            'counter_patterns': {
                1: 100,
                2: 100,
                3: 100,
                4: 100,
                6: 1000,
                5: 1000
            },
            "LSTM": {
                3: self.model.state_dict()
            },
            "dict_patterns": {}
        })

        mockOpen = mock_open(read_data=read_data)
        with patch('builtins.open', mockOpen):
            self.dataset = Dataset(path_model="/",
                                   path_data="/",
                                   name_model="/")
            self.dataset.load_files()

        self.dataset.LSTM = self.list_model
        self.dataset.list_logs = []
        for i in range(30):
            self.dataset.list_logs.append(
                Log("1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123",
                    index_line=i))

    def test_create(self):
        workflow = Workflow(self.dataset)

    def test_workflow_working(self):
        workflow = Workflow(self.dataset)
        workflow.detect_workflow(25)
        workflow.get_tree(25)

    def test_workflow_working_with_child(self):
        m = Mock()
        m.side_effect = [[{
            "log": 25,
            "weigth": 10
        }, {
            "log": 15,
            "weigth": 10
        }], [{
            "log": 25,
            "weigth": 10
        }, {
            "log": 15,
            "weigth": 10
        }], [{
            "log": 25,
            "weigth": 10
        }, {
            "log": 15,
            "weigth": 10
        }], [{
            "log": 25,
            "weigth": 10
        }, {
            "log": 15,
            "weigth": 10
        }], [{
            "log": 25,
            "weigth": 10
        }, {
            "log": 15,
            "weigth": 10
        }]]
        logflow.treebuilding.Inference.Inference.test = m

        default_pattern1 = Pattern(0, [], [])
        default_pattern1.id = 1
        default_pattern2 = Pattern(0, [], [])
        default_pattern2.id = 2
        default_pattern3 = Pattern(0, [], [])
        default_pattern3.id = 3
        m_pattern = Mock()
        m_pattern.side_effect = [
            default_pattern1, default_pattern2, default_pattern3
        ] * 3000
        logflow.logsparser.Journal.Journal.find_pattern = m_pattern

        workflow = Workflow(self.dataset)
        workflow.detect_workflow(25)

    def test_workflow_after_last_line(self):
        workflow = Workflow(self.dataset)
        with self.assertRaises(Exception):
            workflow.get_tree(40)

    def test_workflow_wrong_first_log(self):
        read_data = pickle.dumps({
            'word2vec': {
                "1": np.asarray([1] * 20),
                "2": np.asarray([2] * 20),
                "3": np.asarray([3] * 20),
                "4": [4] * 20,
                "5": [5] * 20,
                "6": [6] * 20,
                "7": [7] * 20
            },
            'counter_patterns': {
                1: 100,
                2: 100,
                3: 100,
                4: 100,
                6: 1000,
                5: 1000
            },
            "LSTM": {
                3: self.model.state_dict()
            },
            "dict_patterns": {}
        })
        mockOpen = mock_open(read_data=read_data)
        with patch('builtins.open', mockOpen):
            dataset_local = Dataset(path_model="/",
                                    path_data="/",
                                    name_model="/")
            dataset_local.load_files()

        dataset_local.LSTM = self.list_model
        dataset_local.list_logs = []
        default_pattern_local = Pattern(0, [], [])
        default_pattern_local.id = -1
        m = Mock()
        m.side_effect = [default_pattern_local] * 30
        logflow.logsparser.Journal.Journal.find_pattern = m
        for i in range(30):
            log = Log("-1", index_line=i)
            log.pattern = default_pattern_local
            dataset_local.list_logs.append(log)
        workflow = Workflow(dataset_local)
        #with self.assertRaises(Exception):
        tree = workflow.detect_workflow(10)
        self.assertEqual(tree, "-1")
 def setUp(self):
     self.model = LSTMLayer(num_classes=5)
     self.default_pattern = Pattern.Pattern(0, [], [])
Exemplo n.º 10
0
class UtilTest(unittest.TestCase):
    def setUp(self):
        self.model = LSTMLayer(num_classes=5)
        self.default_pattern = Pattern.Pattern(0, [], [])

    def test_create(self):
        with self.assertRaises(Exception):
            dataset = Dataset(path_model="", path_data="/", name_model="/")
        with self.assertRaises(Exception):
            dataset = Dataset(path_model="/", path_data="/", name_model="")
        with self.assertRaises(Exception):
            dataset = Dataset(path_model="/", path_data="", name_model="/")
        dataset = Dataset(path_model="/", path_data="/", name_model="/")

    def test_load_files(self):
        read_data = pickle.dumps(
            {
            'word2vec': {
                "1": [1]*20, "2": [2]*20, "3": [3]*20,"4": [4]*20, "5": [5]*20, "6": [6]*20, "7": [7]*20
                }, 
            'counter_patterns': {
                1:10, 2:100, 3:100, 4:100, 6:1000, 5:1000
                },
            "LSTM": {
                3:self.model.state_dict()
                } ,
            "dict_patterns": {
                
                } 
            })
        mockOpen = mock_open(read_data=read_data)
        with patch('builtins.open', mockOpen):
            dataset = Dataset(path_model="/", path_data="/", name_model="/")
            dataset.load_files()
        self.assertEqual(len(dataset.dict_patterns), 0)
        self.assertEqual(len(dataset.counter_patterns), 6)

    def test_load_logs(self):
        read_data_str = "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123"
        # m = unittest.mock.MagicMock(name='open', spec=open)
        # m.return_value = iter(read_data)

        #with unittest.mock.patch('builtins.open', m):
        
        #mockOpen = mock_open(read_data=read_data)
        m = unittest.mock.mock_open(read_data=''.join(read_data_str))
        m.return_value.__iter__ = lambda self: self
        m.return_value.__next__ = lambda self: next(iter(self.readline, ''))
        with patch('builtins.open', m):
            dataset = Dataset(path_model="/", path_data="/", name_model="/", index_line_max=1)
            dataset.load_logs()
        self.assertEqual(len(dataset.list_logs), 2)

        with patch('builtins.open', m):
            dataset = Dataset(path_model="/", path_data="/", name_model="/")
            dataset.load_logs()
        self.assertEqual(len(dataset.list_logs), 3)

    #@patch('logflow.logsparser.Journal.Journal.find_pattern')
    def test_slice(self): #, mock_get_pattern):
        default_pattern1 = Pattern.Pattern(0, [], [])
        default_pattern1.id = 1
        default_pattern2 = Pattern.Pattern(0, [], [])
        default_pattern2.id = 2
        default_pattern3 = Pattern.Pattern(0, [], [])
        default_pattern3.id = 3
        m = Mock()
        m.side_effect = [default_pattern1, default_pattern2, default_pattern3]
        # Mock(return_value=self.default_pattern)
        logflow.logsparser.Journal.Journal.find_pattern = m
        #mock_get_pattern.return_value = 1
        read_data = pickle.dumps(
            {
            'word2vec': {
                "1": np.asarray([1]*20), "2": np.asarray([2]*20), "3": np.asarray([3]*20),"4": [4]*20, "5": [5]*20, "6": [6]*20, "7": [7]*20
                }, 
            'counter_patterns': {
                1:10, 2:100, 3:100, 4:100, 6:1000, 5:1000
                },
            "LSTM": {
                3:self.model.state_dict()
                } ,
            "dict_patterns": {
                
                } 
            })

        mockOpen = mock_open(read_data=read_data)
        with patch('builtins.open', mockOpen):
            dataset = Dataset(path_model="/", path_data="/", name_model="/")
            dataset.load_files()

        read_data_str = "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123"
        # mockOpen = mock_open(read_data=read_data)
        mockOpen = unittest.mock.mock_open(read_data=''.join(read_data_str))
        mockOpen.return_value.__iter__ = lambda self: self
        mockOpen.return_value.__next__ = lambda self: next(iter(self.readline, ''))
        with patch('builtins.open', mockOpen):
            dataset.load_logs()
        # Normal, return a log
        output = dataset.get_slice(2)
        self.assertIsInstance(output, logflow.treebuilding.Log.Log)
        dataset.show_selected_lines(2)

    #@patch('logflow.logsparser.Journal.Journal.find_pattern')
    def test_slice_w2v_issue(self): #, mock_get_pattern):
        default_pattern1 = Pattern.Pattern(0, [], [])
        default_pattern1.id = 1
        default_pattern2 = Pattern.Pattern(0, [], [])
        default_pattern2.id = 2
        default_pattern3 = Pattern.Pattern(0, [], [])
        default_pattern3.id = 3
        m = Mock()
        m.side_effect = [default_pattern1, default_pattern2, default_pattern3]
        # Mock(return_value=self.default_pattern)
        logflow.logsparser.Journal.Journal.find_pattern = m
        #mock_get_pattern.return_value = 1
        read_data = pickle.dumps(
            {
            'word2vec': {
                "1": [1]*20, "2": np.asarray([2]*20), "3": np.asarray([3]*20),"4": [4]*20, "5": [5]*20, "6": [6]*20, "7": [7]*20
                }, 
            'counter_patterns': {
                1:10, 2:100, 3:100, 4:100, 6:1000, 5:1000
                },
            "LSTM": {
                3:self.model.state_dict()
                } ,
            "dict_patterns": {
                
                } 
            })

        mockOpen = mock_open(read_data=read_data)
        with patch('builtins.open', mockOpen):
            dataset = Dataset(path_model="/", path_data="/", name_model="/")
            dataset.load_files()

        read_data_str = "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123"
        # mockOpen = mock_open(read_data=read_data)
        mockOpen = unittest.mock.mock_open(read_data=''.join(read_data_str))
        mockOpen.return_value.__iter__ = lambda self: self
        mockOpen.return_value.__next__ = lambda self: next(iter(self.readline, ''))
        with patch('builtins.open', mockOpen):
            dataset.load_logs()
        # Empty
        output = dataset.get_slice(2)
        self.assertEqual(output, -1)

    def test_slice_firt_log_issue(self): #, mock_get_pattern):
        default_pattern1 = Pattern.Pattern(0, [], [])
        default_pattern1.id = -1
        default_pattern2 = Pattern.Pattern(0, [], [])
        default_pattern2.id = 2
        default_pattern3 = Pattern.Pattern(0, [], [])
        default_pattern3.id = 3
        m = Mock()
        m.side_effect = [default_pattern1, default_pattern2, default_pattern3]
        # Mock(return_value=self.default_pattern)
        logflow.logsparser.Journal.Journal.find_pattern = m
        #mock_get_pattern.return_value = 1
        read_data = pickle.dumps(
            {
            'word2vec': {
                "1": np.asarray([1]*20), "2": np.asarray([2]*20), "3": np.asarray([3]*20),"4": [4]*20, "5": [5]*20, "6": [6]*20, "7": [7]*20
                }, 
            'counter_patterns': {
                1:10, 2:100, 3:100, 4:100, 6:1000, 5:1000
                },
            "LSTM": {
                3:self.model.state_dict()
                } ,
            "dict_patterns": {
                
                } 
            })

        mockOpen = mock_open(read_data=read_data)
        with patch('builtins.open', mockOpen):
            dataset = Dataset(path_model="/", path_data="/", name_model="/")
            dataset.load_files()

        read_data_str = "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123"
        # mockOpen = mock_open(read_data=read_data)
        mockOpen = unittest.mock.mock_open(read_data=''.join(read_data_str))
        mockOpen.return_value.__iter__ = lambda self: self
        mockOpen.return_value.__next__ = lambda self: next(iter(self.readline, ''))
        with patch('builtins.open', mockOpen):
            dataset.load_logs()
        # Empty
        output = dataset.get_slice(2)
        self.assertEqual(output, -1)

    def test_slice_cardinality_issue(self): #, mock_get_pattern):
        default_pattern1 = Pattern.Pattern(0, [], [])
        default_pattern1.id = 1
        default_pattern2 = Pattern.Pattern(0, [], [])
        default_pattern2.id = 2
        default_pattern3 = Pattern.Pattern(0, [], [])
        default_pattern3.id = 3
        m = Mock()
        m.side_effect = [default_pattern1, default_pattern2, default_pattern3]
        # Mock(return_value=self.default_pattern)
        logflow.logsparser.Journal.Journal.find_pattern = m
        #mock_get_pattern.return_value = 1
        read_data = pickle.dumps(
            {
            'word2vec': {
                "1": np.asarray([1]*20), "2": np.asarray([2]*20), "3": np.asarray([3]*20),"4": [4]*20, "5": [5]*20, "6": [6]*20, "7": [7]*20
                }, 
            'counter_patterns': {
                1:100000000000, 2:100, 3:100, 4:100, 6:1000, 5:1000
                },
            "LSTM": {
                3:self.model.state_dict()
                } ,
            "dict_patterns": {
                
                } 
            })

        mockOpen = mock_open(read_data=read_data)
        with patch('builtins.open', mockOpen):
            dataset = Dataset(path_model="/", path_data="/", name_model="/")
            dataset.load_files()

        read_data_str = "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123"
        # mockOpen = mock_open(read_data=read_data)
        mockOpen = unittest.mock.mock_open(read_data=''.join(read_data_str))
        mockOpen.return_value.__iter__ = lambda self: self
        mockOpen.return_value.__next__ = lambda self: next(iter(self.readline, ''))
        with patch('builtins.open', mockOpen):
            dataset.load_logs()
        # Empty
        output = dataset.get_slice(2)
        self.assertEqual(output, -1)

    def test_slice_empty_issue(self): #, mock_get_pattern):
        default_pattern1 = Pattern.Pattern(0, [], [])
        default_pattern1.id = 1
        default_pattern2 = Pattern.Pattern(0, [], [])
        default_pattern2.id = 1
        default_pattern3 = Pattern.Pattern(0, [], [])
        default_pattern3.id = 1
        m = Mock()
        m.side_effect = [default_pattern1, default_pattern2, default_pattern3]
        # Mock(return_value=self.default_pattern)
        logflow.logsparser.Journal.Journal.find_pattern = m
        #mock_get_pattern.return_value = 1
        read_data = pickle.dumps(
            {
            'word2vec': {
                "1": np.asarray([1]*20), "2": np.asarray([2]*20), "3": np.asarray([3]*20),"4": [4]*20, "5": [5]*20, "6": [6]*20, "7": [7]*20
                }, 
            'counter_patterns': {
                1:10, 2:100, 3:100, 4:100, 6:1000, 5:1000
                },
            "LSTM": {
                3:self.model.state_dict()
                } ,
            "dict_patterns": {
                
                } 
            })

        mockOpen = mock_open(read_data=read_data)
        with patch('builtins.open', mockOpen):
            dataset = Dataset(path_model="/", path_data="/", name_model="/")
            dataset.load_files()

        read_data_str = "1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123\n1530388399 2018 Jun 30 21:53:19 m21205 authpriv info sshd pam_unix(sshd:session): session closed, for1 User Root/1 123"
        # mockOpen = mock_open(read_data=read_data)
        mockOpen = unittest.mock.mock_open(read_data=''.join(read_data_str))
        mockOpen.return_value.__iter__ = lambda self: self
        mockOpen.return_value.__next__ = lambda self: next(iter(self.readline, ''))
        with patch('builtins.open', mockOpen):
            dataset.load_logs()
        # Empty
        output = dataset.get_slice(2)
        self.assertEqual(output, -1)
Exemplo n.º 11
0
class Worker_single():
    """A single worker is responsible for the creation of the dataloader, the learning/testing step and for saving files of one cardinality.

    Args:
        cardinality (Cardinality): the cardinality object containing the data.
        lock (threading.Lock): lock used for saving files in the same file for all cardinalities.
        batch_size (int, optional): size of the batch. Defaults to 128.
        path_model (str, optional): path to the model to save. Defaults to "".
        name_dataset (str, optional): name of the dataset. Defaults to "".
        batch_result (int, optional): show results each batch_result number of batchs. Defaults to 2000.
        exclude_test (boolean, optional): exlude the testing step during the learning step. Can be use with the timer as stopping condition to have an exact duration.
        stoppingcondition (str, optional): condition to stop the learning step (timer, earlystopping, epoch). Defaults to earlystopping.
        condition_value (float, optional): stoppingcondition option. Value of the increase. Defaults to 0.005.
        condition_step (int, optional): stoppingcondition option. Number of steps. Defaults to 3.
        duration (int, optional): stoppingcondition option. Duration of the learning step in minute. Defaults to 60.
        condition_epoch (int, optional): stoppingcondition option. Number of epochs to be done. Defaults to 3.
    """
    def __init__(self,
                 cardinality: Cardinality,
                 lock: threading.Lock,
                 batch_size=128,
                 path_model="",
                 name_dataset="",
                 batch_result=20000,
                 exclude_test=False,
                 stoppingcondition="earlystopping",
                 condition_value=0.005,
                 condition_step=3,
                 duration=5,
                 condition_epoch=3):
        self.dataset = cardinality
        self.cardinality = self.dataset.cardinality
        self.batch_size = batch_size
        self.model = -1
        # self.stopping_condition = StoppingCondition(method="earlystopping", condition_value = 0.005, condition_step=3)
        if stoppingcondition == "earlystopping":
            self.stopping_condition = StoppingCondition(
                method=stoppingcondition,
                condition_value=condition_value,
                condition_step=condition_step)
        elif stoppingcondition == "timer":
            self.stopping_condition = StoppingCondition(
                method=stoppingcondition, duration=duration)
        elif stoppingcondition == "epoch":
            self.stopping_condition = StoppingCondition(
                method=stoppingcondition, condition_epoch=condition_epoch)
        else:
            raise Exception(
                "Stopping condition method is not implemented. Please use 'earlystopping', 'timer', or 'epoch'"
            )
        self.path_model = path_model
        self.name_dataset = name_dataset
        self.lock = lock
        self.exlude_test = exclude_test
        self.saver = Saver(path_model=self.path_model,
                           name_model=self.name_dataset,
                           cardinality=self.cardinality,
                           lock=self.lock)
        self.batch_result = batch_result

        if torch.cuda.is_available():
            self.device = torch.device('cuda')
            logger.info("Starting learning on GPU")
        else:
            self.device = torch.device('cpu')
            logger.info("Starting learning on CPU")

    def create_dataloader(self,
                          validation_split=0.6,
                          condition="Test",
                          subsample=False,
                          subsample_split=0.01) -> DataLoader:
        """Create the dataloader for the learning/testing step.

        Args:
            validation_split (float, optional): ratio between the learning and the testing set. Defaults to 0.6.
            condition (str, optional): if Test the dataloader contains the test data. Else it contains the learning data. Defaults to "Test".
            subsample (bool, optional): use only a subsample of the data. Can be used for the learning and/or the testing step. Defaults to False.
            subsample_split (float, optional): ratio of the data to use. Defaults to 0.01.

        Returns:
            DataLoader: PyTorch dataloader corresponding to the previous features.
        """
        if not self.dataset.loaded:
            self.dataset.load_files()
            self.dataset.compute_position()
            self.size = len(self.dataset)
            logger.info("Cardinality: " + str(self.dataset.cardinality) +
                        " size of dataset: " + str(self.size))
            logger.info("Nb of classes: " +
                        str(self.dataset.number_of_classes))
        # Set the random seed to have always the same random value.
        random_seed = 42
        np.random.seed(random_seed)
        split = int(np.floor(validation_split * self.size))
        indices = list(range(self.size))
        np.random.shuffle(indices)
        if condition == "Test":
            indices = indices[:split]
        else:
            indices = indices[split:]
        if subsample:
            split = int(np.floor(subsample_split * len(indices)))
            np.random.shuffle(indices)
            indices = indices[:split]
        sampler = SubsetRandomSampler(indices)
        dataloader = DataLoader(self.dataset,
                                batch_size=self.batch_size,
                                pin_memory=True,
                                drop_last=True,
                                num_workers=5,
                                sampler=sampler)  # type: ignore
        return dataloader

    def load_model(self):
        """Load the learned model from a previous state

        Raises:
            e: file is not found
        """
        self.model = LSTMLayer(num_classes=self.dataset.number_of_classes).to(
            self.device)
        try:
            self.model = self.saver.load(model=self.model)
        except FileNotFoundError as e:
            logger.critical("No such file: " + self.path_model +
                            self.name_dataset + "_model.lf" + ".torch")
            print("Raising: ", e)
            raise e

    def train(self, validation_split=0.6, resuming=False):
        """Train the model

        Args:
            validation_split (float, optional): ratio between testing and learning set. Defaults to 0.6.
            resuming (bool, optional): resume the learning from a previous step. Not implemented yet. Defaults to False.
        """
        # Create the dataloader
        dataloader_train = self.create_dataloader(
            validation_split=validation_split, condition="train")
        if resuming:
            self.load_model()
        else:
            self.model = LSTMLayer(num_classes=self.dataset.number_of_classes,
                                   batch_size=self.batch_size).to(self.device)
        # Create the results
        result = Result(self.dataset, condition="Train")
        optimizer = optim.Adam(self.model.parameters())
        loss_fn = nn.CrossEntropyLoss()
        self.model.train()
        logger.info("Cardinality: " + str(self.cardinality) +
                    " Starting the learning step")
        # Start the learning
        while not self.stopping_condition.stop():
            for index_batch, batch in enumerate(dataloader_train):
                optimizer.zero_grad()
                label = batch['output']
                input_data = batch['input'].to(self.device)
                prediction = self.model(input_data)
                loss = loss_fn(prediction, label.to(self.device))
                loss.backward()
                optimizer.step()
                result.update(prediction, label)
                # Compute the results each 2000 batchs.
                if index_batch % self.batch_result == 0 and index_batch != 0:
                    result.computing_result(progress=index_batch /
                                            len(dataloader_train))
                    self.saver.save(model=self.model,
                                    result=result,
                                    condition="temp")
                    if not self.exlude_test:
                        # Test only on a subsample
                        self.test(subsample=True, subsample_split=0.1)
                    print(self.stopping_condition)
                # Test if we stop only for the timer method at each batch
                if self.stopping_condition.method == "timer" and self.stopping_condition.stop(
                ):
                    logger.debug("[Stopping] Cardinality: " +
                                 str(self.cardinality) + " " +
                                 str(self.stopping_condition) +
                                 " stopping learning step.")
                    break
            # At the end of one epoch, use the all testing test and update the condition
            if not self.exlude_test:
                self.test()
            result.computing_result(reinit=True, progress=1)
            if self.stopping_condition.stop():
                logger.debug("[Stopping] Cardinality: " +
                             str(self.cardinality) + " " +
                             str(self.stopping_condition) +
                             " stopping learning step.")
            self.saver.save(model=self.model, result=result, condition="Train")
        # logger.info("[Test] Cardinality: " + str(self.cardinality) + " " + str(self.stopping_condition) + " stopping learning step.")
        # self.saver.save(model=self.model.state_dict())

    def test(self,
             validation_split=0.6,
             subsample=False,
             subsample_split=0.01):
        """Test the model

        Args:
            validation_split (float, optional): ratio between testing and learning set. Defaults to 0.6.
            subsample (bool, optional): if False, use all the available data, if True, use only a ratio of the data (subsample_split*data). Defaults to False.
            subsample_split (float, optional): ratio of the data to use. Defaults to 0.01.
        """
        dataloader_test = self.create_dataloader(
            validation_split=validation_split,
            condition="Test",
            subsample=subsample,
            subsample_split=0.01)
        result = Result(self.dataset, condition="Test", subsample=subsample)
        if self.model == -1:
            self.load_model()
        self.model.eval()
        self.conf_matrix = torch.zeros(self.dataset.number_of_classes,
                                       self.dataset.number_of_classes)
        for index_batch, batch in enumerate(dataloader_test):
            label = batch['output']
            input_data = batch['input'].to(self.device)
            prediction = self.model(input_data)
            result.update(prediction, label)
            if index_batch % self.batch_result == 0:
                result.computing_result(reinit=False,
                                        progress=index_batch /
                                        len(dataloader_test))
        self.model.train()
        self.saver.save(model=self.model, result=result, condition="Test")
        result.computing_result(reinit=True, progress=1)
        self.stopping_condition.update(result.microf1)
        self.stopping_condition.stop()
Exemplo n.º 12
0
    def train(self, validation_split=0.6, resuming=False):
        """Train the model

        Args:
            validation_split (float, optional): ratio between testing and learning set. Defaults to 0.6.
            resuming (bool, optional): resume the learning from a previous step. Not implemented yet. Defaults to False.
        """
        # Create the dataloader
        dataloader_train = self.create_dataloader(
            validation_split=validation_split, condition="train")
        if resuming:
            self.load_model()
        else:
            self.model = LSTMLayer(num_classes=self.dataset.number_of_classes,
                                   batch_size=self.batch_size).to(self.device)
        # Create the results
        result = Result(self.dataset, condition="Train")
        optimizer = optim.Adam(self.model.parameters())
        loss_fn = nn.CrossEntropyLoss()
        self.model.train()
        logger.info("Cardinality: " + str(self.cardinality) +
                    " Starting the learning step")
        # Start the learning
        while not self.stopping_condition.stop():
            for index_batch, batch in enumerate(dataloader_train):
                optimizer.zero_grad()
                label = batch['output']
                input_data = batch['input'].to(self.device)
                prediction = self.model(input_data)
                loss = loss_fn(prediction, label.to(self.device))
                loss.backward()
                optimizer.step()
                result.update(prediction, label)
                # Compute the results each 2000 batchs.
                if index_batch % self.batch_result == 0 and index_batch != 0:
                    result.computing_result(progress=index_batch /
                                            len(dataloader_train))
                    self.saver.save(model=self.model,
                                    result=result,
                                    condition="temp")
                    if not self.exlude_test:
                        # Test only on a subsample
                        self.test(subsample=True, subsample_split=0.1)
                    print(self.stopping_condition)
                # Test if we stop only for the timer method at each batch
                if self.stopping_condition.method == "timer" and self.stopping_condition.stop(
                ):
                    logger.debug("[Stopping] Cardinality: " +
                                 str(self.cardinality) + " " +
                                 str(self.stopping_condition) +
                                 " stopping learning step.")
                    break
            # At the end of one epoch, use the all testing test and update the condition
            if not self.exlude_test:
                self.test()
            result.computing_result(reinit=True, progress=1)
            if self.stopping_condition.stop():
                logger.debug("[Stopping] Cardinality: " +
                             str(self.cardinality) + " " +
                             str(self.stopping_condition) +
                             " stopping learning step.")
            self.saver.save(model=self.model, result=result, condition="Train")
 def setUp(self, mock_isfile):
     self.lock = torch.multiprocessing.get_context('spawn').Lock()
     self.model = LSTMLayer(num_classes=5)
class UtilTest(unittest.TestCase):
    @patch('os.path.isfile')
    def setUp(self, mock_isfile):
        self.lock = torch.multiprocessing.get_context('spawn').Lock()
        self.model = LSTMLayer(num_classes=5)

    def test_creation(self):
        cardinality = Cardinality(3, "", "", size=2)
        read_data = pickle.dumps({
            'word2vec': -1,
            'counter_patterns': {
                "1": 1,
                "10": 10,
                "100": 100,
                "1000": 1000,
                "10000": 10000,
                "100000": 100000
            }
        })
        mockOpen = mock_open(read_data=read_data)
        tf = tempfile.NamedTemporaryFile()
        f = h5py.File(tf, 'w')
        f.create_dataset("list_classes", data=[1, 2, 3, 4, 5])
        f.close()
        cardinality.path_list_classes = tf.name
        with patch('builtins.open', mockOpen):
            cardinality.load_files()
        self.cardinality = cardinality
        worker_single = Worker_single(cardinality=self.cardinality,
                                      lock=self.lock)
        worker_single = Worker_single(cardinality=self.cardinality,
                                      lock=self.lock,
                                      stoppingcondition="timer")
        worker_single = Worker_single(cardinality=self.cardinality,
                                      lock=self.lock,
                                      stoppingcondition="earlystopping")
        worker_single = Worker_single(cardinality=self.cardinality,
                                      lock=self.lock,
                                      stoppingcondition="epoch")
        with self.assertRaises(Exception):
            worker_single = Worker_single(cardinality=self.cardinality,
                                          lock=self.lock,
                                          stoppingcondition="unimplemented")
        tf.close()

    @patch('os.path.isfile')
    def test_create(self, mock_isfile):
        mock_isfile.return_value = True
        cardinality = Cardinality(3, "", "")
        read_data = pickle.dumps({
            'word2vec': {
                "1": [1] * 20,
                "2": [2] * 20,
                "3": [3] * 20,
                "4": [4] * 20,
                "5": [5] * 20,
                "6": [6] * 20,
                "7": [7] * 20
            },
            'counter_patterns': {
                1: 10,
                2: 100,
                3: 100,
                4: 100,
                6: 1000,
                5: 1000
            }
        })
        mockOpen = mock_open(read_data=read_data)
        tf = tempfile.NamedTemporaryFile()
        f = h5py.File(tf, 'w')
        f.create_dataset("list_classes",
                         data=[
                             1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2,
                             1, 2, 1, 2, 3, 4, 5, 6
                         ])
        f.close()
        cardinality.path_list_classes = tf.name
        cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000}

        # Test
        with patch('builtins.open', mockOpen):
            #cardinality.load_files()
            worker_single = Worker_single(cardinality=cardinality,
                                          lock=self.lock,
                                          batch_size=1)
            dataloader = worker_single.create_dataloader()
            self.assertEqual(len(dataloader), 7)  # 60% of 12
            self.assertTrue(worker_single.dataset.loaded)

        # Train
        # Avoid to create a new object
        worker_single.dataset.loaded = False
        with patch('builtins.open', mockOpen):
            #cardinality.load_files()
            worker_single = Worker_single(cardinality=cardinality,
                                          lock=self.lock,
                                          batch_size=1)
            dataloader = worker_single.create_dataloader(condition="Train")
            self.assertEqual(len(dataloader), 5)  # 40% of 12

        worker_single.dataset.loaded = False
        with patch('builtins.open', mockOpen):
            #cardinality.load_files()
            worker_single = Worker_single(cardinality=cardinality,
                                          lock=self.lock,
                                          batch_size=1)
            dataloader = worker_single.create_dataloader(condition="Train",
                                                         subsample=True,
                                                         subsample_split=0.25)
            self.assertEqual(len(dataloader), 1)  # 25% of 40% of 12

        tf.close()

    @patch('os.path.isfile')
    def test_load_model(self, mock_isfile):
        mock_isfile.return_value = True
        cardinality = Cardinality(3, "", "")
        read_data = pickle.dumps({
            'word2vec': {
                "1": [1] * 20,
                "2": [2] * 20,
                "3": [3] * 20,
                "4": [4] * 20,
                "5": [5] * 20,
                "6": [6] * 20,
                "7": [7] * 20
            },
            'counter_patterns': {
                1: 10,
                2: 100,
                3: 100,
                4: 100,
                6: 1000,
                5: 1000
            },
            "LSTM": {
                3: self.model.state_dict()
            }
        })
        mockOpen = mock_open(read_data=read_data)
        tf = tempfile.NamedTemporaryFile()
        f = h5py.File(tf, 'w')
        f.create_dataset("list_classes",
                         data=[
                             1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2,
                             1, 2, 1, 2, 3, 4, 5, 6
                         ])
        f.close()
        cardinality.path_list_classes = tf.name
        cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000}
        with patch('builtins.open', mockOpen):
            worker_single = Worker_single(cardinality=cardinality,
                                          lock=self.lock,
                                          batch_size=1)
            dataloader = worker_single.create_dataloader()
            worker_single.load_model()

    @patch('os.path.isfile')
    def test_without_file(self, mock_isfile):
        mock_isfile.return_value = True
        cardinality = Cardinality(3, "", "")
        read_data = pickle.dumps({
            'word2vec': {
                "1": [1] * 20,
                "2": [2] * 20,
                "3": [3] * 20,
                "4": [4] * 20,
                "5": [5] * 20,
                "6": [6] * 20,
                "7": [7] * 20
            },
            'counter_patterns': {
                1: 10,
                2: 100,
                3: 100,
                4: 100,
                6: 1000,
                5: 1000
            },
            "LSTM": {
                3: self.model.state_dict()
            }
        })
        mockOpen = mock_open(read_data=read_data)
        tf = tempfile.NamedTemporaryFile()
        f = h5py.File(tf, 'w')
        f.create_dataset("list_classes",
                         data=[
                             1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2,
                             1, 2, 1, 2, 3, 4, 5, 6
                         ])
        f.close()
        cardinality.path_list_classes = tf.name
        cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000}
        # No data
        with self.assertRaises(Exception):
            worker_single = Worker_single(cardinality=cardinality,
                                          lock=self.lock,
                                          batch_size=1)
            worker_single.load_model()

    @patch('os.path.isfile')
    def test_train_model(self, mock_isfile):
        mock_isfile.return_value = True
        cardinality = Cardinality(3, "", "")
        read_data = pickle.dumps({
            'word2vec': {
                "1": [1] * 20,
                "2": [2] * 20,
                "3": [3] * 20,
                "4": [4] * 20,
                "5": [5] * 20,
                "6": [6] * 20,
                "7": [7] * 20
            },
            'counter_patterns': {
                1: 10,
                2: 100,
                3: 100,
                4: 100,
                6: 1000,
                5: 1000
            },
            "LSTM": {
                3: self.model.state_dict()
            }
        })
        mockOpen = mock_open(read_data=read_data)
        tf = tempfile.NamedTemporaryFile()
        f = h5py.File(tf, 'w')
        f.create_dataset("list_classes",
                         data=[
                             1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2,
                             1, 2, 1, 2, 3, 4, 5, 6
                         ])
        f.close()
        cardinality.path_list_classes = tf.name
        cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000}
        with patch('builtins.open', mockOpen):
            worker_single = Worker_single(batch_result=1,
                                          cardinality=cardinality,
                                          lock=self.lock,
                                          batch_size=1)
            dataloader = worker_single.create_dataloader()
            worker_single.train(resuming=False)
            worker_single.train(resuming=True)

    @patch('os.path.isfile')
    def test_test_model(self, mock_isfile):
        mock_isfile.return_value = True
        cardinality = Cardinality(3, "", "")
        read_data = pickle.dumps({
            'word2vec': {
                "1": [1] * 20,
                "2": [2] * 20,
                "3": [3] * 20,
                "4": [4] * 20,
                "5": [5] * 20,
                "6": [6] * 20,
                "7": [7] * 20
            },
            'counter_patterns': {
                1: 10,
                2: 100,
                3: 100,
                4: 100,
                6: 1000,
                5: 1000
            },
            "LSTM": {
                3: self.model.state_dict()
            }
        })
        mockOpen = mock_open(read_data=read_data)
        tf = tempfile.NamedTemporaryFile()
        f = h5py.File(tf, 'w')
        f.create_dataset("list_classes",
                         data=[
                             1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2,
                             1, 2, 1, 2, 3, 4, 5, 6
                         ])
        f.close()
        cardinality.path_list_classes = tf.name
        cardinality.counter = {1: 10, 2: 100, 3: 100, 4: 100, 6: 1000, 5: 1000}
        with patch('builtins.open', mockOpen):
            worker_single = Worker_single(batch_result=1,
                                          cardinality=cardinality,
                                          lock=self.lock,
                                          batch_size=1)
            dataloader = worker_single.create_dataloader()
            worker_single.test()