def test_semanticAlignment_bug1(self): """test_semanticAlignment_bug1: A bug on the semantic alignment has been identified which prevent the computation of a valid regex. This test verifies the bug is not comming back. @date 18/04/2013 """ firstname1 = "antoine" email1 = "*****@*****.**" firstname2 = "luc" email2 = "*****@*****.**" msg1 = RawMessage( uuid.uuid4(), None, TypeConvertor.stringToNetzobRaw("6" + firstname1 + "GAHFSHQS" + email1)) msg2 = RawMessage( uuid.uuid4(), None, TypeConvertor.stringToNetzobRaw("3" + firstname2 + "CVSDHISD" + email2)) project = Project(uuid.uuid4(), "Experiment", datetime.now(), "") nwEngine = NeedlemanAndWunsch(8, project, False, None) symbol = Symbol(uuid.uuid4(), "Test", project) symbol.addMessages([msg1, msg2]) msg1.addSemanticTag("firstname", 2, 2 + len(firstname1) * 2) msg1.addSemanticTag("email", 2 + len(firstname1) * 2 + 16, 2 + len(firstname1) * 2 + 16 + len(email1) * 2) msg2.addSemanticTag("firstname", 2, 2 + len(firstname2) * 2) msg2.addSemanticTag("email", 2 + len(firstname2) * 2 + 16, 2 + len(firstname2) * 2 + 16 + len(email2) * 2) nwEngine.alignField(symbol.getField()) symbol.getField().setFormat(Format.STRING) print "Computed Regex : {0}".format(symbol.getRegex()) print "=======" print symbol.getCells(True) computedFields = symbol.getExtendedFields() self.assertTrue( len(computedFields) > 1, "Only one field has been computed which tells us something went wrong." )
def test_semanticAlignment_bug1(self): """test_semanticAlignment_bug1: A bug on the semantic alignment has been identified which prevent the computation of a valid regex. This test verifies the bug is not comming back. @date 18/04/2013 """ firstname1 = "antoine" email1 = "*****@*****.**" firstname2 = "luc" email2 = "*****@*****.**" msg1 = RawMessage(uuid.uuid4(), None, TypeConvertor.stringToNetzobRaw("6" + firstname1 + "GAHFSHQS" + email1)) msg2 = RawMessage(uuid.uuid4(), None, TypeConvertor.stringToNetzobRaw("3" + firstname2 + "CVSDHISD" + email2)) project = Project(uuid.uuid4(), "Experiment", datetime.now(), "") nwEngine = NeedlemanAndWunsch(8, project, False, None) symbol = Symbol(uuid.uuid4(), "Test", project) symbol.addMessages([msg1, msg2]) msg1.addSemanticTag("firstname", 2, 2 + len(firstname1) * 2) msg1.addSemanticTag("email", 2 + len(firstname1) * 2 + 16, 2 + len(firstname1) * 2 + 16 + len(email1) * 2) msg2.addSemanticTag("firstname", 2, 2 + len(firstname2) * 2) msg2.addSemanticTag("email", 2 + len(firstname2) * 2 + 16, 2 + len(firstname2) * 2 + 16 + len(email2) * 2) nwEngine.alignField(symbol.getField()) symbol.getField().setFormat(Format.STRING) print "Computed Regex : {0}".format(symbol.getRegex()) print "=======" print symbol.getCells(True) computedFields = symbol.getExtendedFields() self.assertTrue(len(computedFields) > 1, "Only one field has been computed which tells us something went wrong.")
def test_semanticAlignment_simple(self): """test_semanticAlignment_simple: Test that messages with embedded semantic are efficiently aligned. Format : <random 10 bytes><random username><random 5 ASCII><random email> Optimal Needleman & Wunsch Parameters : // Cost definitions for the alignment static const short int MATCH = 5; static const short int SEMANTIC_MATCH = 30; static const short int MISMATCH = -5; static const short int GAP = 0; static const short int BLEN = 10; // Consts for the definition of a mask static const unsigned char END = 2; static const unsigned char DIFFERENT = 1; static const unsigned char EQUAL = 0; """ project = Project(uuid.uuid4(), "Experiment", datetime.now(), "") symbol = Symbol(uuid.uuid4(), "Test", project) nbMessage = 500 usernames = [] emails = [] for iMessage in range(0, nbMessage): str_username = self.generateRandomString(4, 10) username = TypeConvertor.stringToNetzobRaw(str_username) usernames.append(str_username) email_prefix = self.generateRandomString(4, 10) email_domain = self.generateRandomString(4, 10) email_extension = self.generateRandomString(2, 3) str_email = "{0}@{1}.{2}".format(email_prefix, email_domain, email_extension) emails.append(str_email) email = TypeConvertor.stringToNetzobRaw(str_email) random10Bytes = self.generateRandomBytes(10, 10) random5ASCII = TypeConvertor.stringToNetzobRaw( self.generateRandomString(5, 5)) data = "{0}{1}{2}{3}".format(random10Bytes, username, random5ASCII, email) message = RawMessage(uuid.uuid4(), None, data) message.addSemanticTag("username", len(random10Bytes), len(random10Bytes) + len(username)) message.addSemanticTag( "email", len(random10Bytes) + len(username) + len(random5ASCII), len(random10Bytes) + len(username) + len(random5ASCII) + len(email)) symbol.addMessage(message) nwEngine = NeedlemanAndWunsch(8, project, False, None) nwEngine.alignField(symbol.getField()) symbol.getField().setFormat(Format.STRING) print "Number of computed fields : {0}".format( len(symbol.getExtendedFields())) self.assertEqual(4, len(symbol.getExtendedFields())) nbValidMessages = 0 for message in symbol.getMessages(): isValid = symbol.getField().isRegexValidForMessage(message) if isValid: nbValidMessages += 1 self.assertTrue(isValid) print symbol.getCells() print "Computed regex is valid for {0}/{1} messages.".format( nbValidMessages, len(symbol.getMessages()))
def test_semanticAlignment_simple(self): """test_semanticAlignment_simple: Test that messages with embedded semantic are efficiently aligned. Format : <random 10 bytes><random username><random 5 ASCII><random email> Optimal Needleman & Wunsch Parameters : // Cost definitions for the alignment static const short int MATCH = 5; static const short int SEMANTIC_MATCH = 30; static const short int MISMATCH = -5; static const short int GAP = 0; static const short int BLEN = 10; // Consts for the definition of a mask static const unsigned char END = 2; static const unsigned char DIFFERENT = 1; static const unsigned char EQUAL = 0; """ project = Project(uuid.uuid4(), "Experiment", datetime.now(), "") symbol = Symbol(uuid.uuid4(), "Test", project) nbMessage = 500 usernames = [] emails = [] for iMessage in range(0, nbMessage): str_username = self.generateRandomString(4, 10) username = TypeConvertor.stringToNetzobRaw(str_username) usernames.append(str_username) email_prefix = self.generateRandomString(4, 10) email_domain = self.generateRandomString(4, 10) email_extension = self.generateRandomString(2, 3) str_email = "{0}@{1}.{2}".format(email_prefix, email_domain, email_extension) emails.append(str_email) email = TypeConvertor.stringToNetzobRaw(str_email) random10Bytes = self.generateRandomBytes(10, 10) random5ASCII = TypeConvertor.stringToNetzobRaw(self.generateRandomString(5, 5)) data = "{0}{1}{2}{3}".format(random10Bytes, username, random5ASCII, email) message = RawMessage(uuid.uuid4(), None, data) message.addSemanticTag("username", len(random10Bytes), len(random10Bytes) + len(username)) message.addSemanticTag("email", len(random10Bytes) + len(username) + len(random5ASCII), len(random10Bytes) + len(username) + len(random5ASCII) + len(email)) symbol.addMessage(message) nwEngine = NeedlemanAndWunsch(8, project, False, None) nwEngine.alignField(symbol.getField()) symbol.getField().setFormat(Format.STRING) print "Number of computed fields : {0}".format(len(symbol.getExtendedFields())) self.assertEqual(4, len(symbol.getExtendedFields())) nbValidMessages = 0 for message in symbol.getMessages(): isValid = symbol.getField().isRegexValidForMessage(message) if isValid: nbValidMessages += 1 self.assertTrue(isValid) print symbol.getCells() print "Computed regex is valid for {0}/{1} messages.".format(nbValidMessages, len(symbol.getMessages()))