示例#1
0
    def test_semanticAlignment_bug1(self):
        """test_semanticAlignment_bug1:
        A bug on the semantic alignment has been identified which prevent
        the computation of a valid regex. This test verifies the bug is not comming back.
        @date 18/04/2013
        """

        firstname1 = "antoine"
        email1 = "*****@*****.**"

        firstname2 = "luc"
        email2 = "*****@*****.**"

        msg1 = RawMessage(uuid.uuid4(), None, TypeConvertor.stringToNetzobRaw("6" + firstname1 + "GAHFSHQS" + email1))
        msg2 = RawMessage(uuid.uuid4(), None, TypeConvertor.stringToNetzobRaw("3" + firstname2 + "CVSDHISD" + email2))

        project = Project(uuid.uuid4(), "Experiment", datetime.now(), "")
        nwEngine = NeedlemanAndWunsch(8, project, False, None)
        symbol = Symbol(uuid.uuid4(), "Test", project)

        symbol.addMessages([msg1, msg2])
        msg1.addSemanticTag("firstname", 2, 2 + len(firstname1) * 2)
        msg1.addSemanticTag("email", 2 + len(firstname1) * 2 + 16, 2 + len(firstname1) * 2 + 16 + len(email1) * 2)

        msg2.addSemanticTag("firstname", 2, 2 + len(firstname2) * 2)
        msg2.addSemanticTag("email", 2 + len(firstname2) * 2 + 16, 2 + len(firstname2) * 2 + 16 + len(email2) * 2)

        nwEngine.alignField(symbol.getField())
        symbol.getField().setFormat(Format.STRING)

        print("Computed Regex : {0}".format(symbol.getRegex()))
        print(symbol.getCells(True))

        computedFields = symbol.getExtendedFields()
        self.assertTrue(len(computedFields) > 1, "Only one field has been computed which tells us something went wrong.")
示例#2
0
    def test_semanticAlignment_simple(self):
        """test_semanticAlignment_simple:
        Test that messages with embedded semantic are efficiently aligned.
        Format : <random 10 bytes><random username><random 5 ASCII><random email>

        Optimal Needleman & Wunsch Parameters :
        // Cost definitions for the alignment
        static const short int MATCH = 5;
        static const short int SEMANTIC_MATCH = 30;
        static const short int MISMATCH = -5;
        static const short int GAP = 0;
        static const short int BLEN = 10;
        // Consts for the definition of a mask
        static const unsigned char END = 2;
        static const unsigned char DIFFERENT = 1;
        static const unsigned char EQUAL = 0;
        """
        project = Project(uuid.uuid4(), "Experiment", datetime.now(), "")
        symbol = Symbol(uuid.uuid4(), "Test", project)

        nbMessage = 500
        usernames = []
        emails = []
        for iMessage in range(0, nbMessage):
            str_username = self.generateRandomString(4, 10)
            username = TypeConvertor.stringToNetzobRaw(str_username)
            usernames.append(str_username)

            email_prefix = self.generateRandomString(4, 10)
            email_domain = self.generateRandomString(4, 10)
            email_extension = self.generateRandomString(2, 3)
            str_email = "{0}@{1}.{2}".format(email_prefix, email_domain, email_extension)
            emails.append(str_email)
            email = TypeConvertor.stringToNetzobRaw(str_email)
            random10Bytes = self.generateRandomBytes(10, 10)
            random5ASCII = TypeConvertor.stringToNetzobRaw(self.generateRandomString(5, 5))
            data = "{0}{1}{2}{3}".format(random10Bytes, username, random5ASCII, email)

            message = RawMessage(uuid.uuid4(), None, data)
            message.addSemanticTag("username", len(random10Bytes), len(random10Bytes) + len(username))
            message.addSemanticTag("email", len(random10Bytes) + len(username) + len(random5ASCII), len(random10Bytes) + len(username) + len(random5ASCII) + len(email))

            symbol.addMessage(message)

        nwEngine = NeedlemanAndWunsch(8, project, False, None)
        nwEngine.alignField(symbol.getField())

        symbol.getField().setFormat(Format.STRING)

        print("Number of computed fields : {0}".format(len(symbol.getExtendedFields())))
        self.assertEqual(4, len(symbol.getExtendedFields()))
        nbValidMessages = 0

        for message in symbol.getMessages():
            isValid = symbol.getField().isRegexValidForMessage(message)
            if isValid:
                nbValidMessages += 1
            self.assertTrue(isValid)

        print(symbol.getCells())

        print("Computed regex is valid for {0}/{1} messages.".format(nbValidMessages, len(symbol.getMessages())))