Exemplo n.º 1
0
    def test_deserialisationGroups(self):
        print "start"

        symbols = []
        nbSymbol = random.randint(2, 50)

        for iSymbol in range(0, nbSymbol):
            # We create 6 messages of 2 group
            originalSymbol = Symbol(str(uuid.uuid4()), "TestSymbol", None)
            # group1
            message1 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?"))
            message2 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?"))
            message3 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?"))
            # group2
            message4 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?"))
            message5 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?"))
            message6 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?"))

            originalSymbol.addMessage(message1)
            originalSymbol.addMessage(message2)
            originalSymbol.addMessage(message3)
            originalSymbol.addMessage(message4)
            originalSymbol.addMessage(message5)
            originalSymbol.addMessage(message6)
            symbols.append(originalSymbol)

        # Start the clustering
        clusteringSolution = UPGMA(None, [originalSymbol], True, 100, 90, True)
        result = clusteringSolution.deserializeGroups(symbols)
        self.assertEqual(result, len(symbols))
Exemplo n.º 2
0
    def test_deserialisationGroups(self):
        print("start")

        symbols = []
        nbSymbol = random.randint(2, 50)

        for iSymbol in range(0, nbSymbol):
            # We create 6 messages of 2 group
            originalSymbol = Symbol(str(uuid.uuid4()), "TestSymbol", None)
            # group1
            message1 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?"))
            message2 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?"))
            message3 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?"))
            # group2
            message4 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?"))
            message5 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?"))
            message6 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?"))

            originalSymbol.addMessage(message1)
            originalSymbol.addMessage(message2)
            originalSymbol.addMessage(message3)
            originalSymbol.addMessage(message4)
            originalSymbol.addMessage(message5)
            originalSymbol.addMessage(message6)
            symbols.append(originalSymbol)

        # Start the clustering
        clusteringSolution = UPGMA(None, [originalSymbol], True, 100, 90, True)
        result = clusteringSolution.deserializeGroups(symbols)
        self.assertEqual(result, len(symbols))
Exemplo n.º 3
0
 def test_executingClusteringWithOrphanReduction(self):
     
     # We create 6 messages of 2 group
     
     # group1 
     originalSymbol1 = Symbol(uuid.uuid4(), "TestSymbol", None)        
     message1 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(200, 1000)))
     originalSymbol1.addMessage(message1)
     
     originalSymbol2 = Symbol(uuid.uuid4(), "TestSymbol2", None)  
     message2 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(200, 1000)))
     originalSymbol2.addMessage(message2)
     
     originalSymbol3 = Symbol(uuid.uuid4(), "TestSymbol3", None)  
     message3 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(200, 1000)))
     originalSymbol3.addMessage(message3)        
     
     # group2
     originalSymbol4 = Symbol(uuid.uuid4(), "TestSymbol4", None)  
     message4 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("salut " + self.generateRandomString(200, 1000)))
     originalSymbol4.addMessage(message4)
     
     originalSymbol5 = Symbol(uuid.uuid4(), "TestSymbol5", None)  
     message5 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("salut " + self.generateRandomString(200, 1000)))
     originalSymbol5.addMessage(message5)
     
     originalSymbol6 = Symbol(uuid.uuid4(), "TestSymbol6", None)  
     message6 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("salut " + self.generateRandomString(200, 1000)))
     originalSymbol6.addMessage(message6)
     
     symbols = [originalSymbol1, originalSymbol2, originalSymbol3, originalSymbol4, originalSymbol5, originalSymbol6]
     
     # Start the clustering
     clusteringSolution = UPGMA(None, symbols, True, 100, 80, True, Format.ASCII)
     resultBeforeOrphan = clusteringSolution.executeClustering()
     resultAfterOrphan = clusteringSolution.executeOrphanReduction()
     
     if (len(resultAfterOrphan) < len(resultBeforeOrphan)) :
         print "Before Orphan Reduction : "
         for symbol in resultBeforeOrphan :
             print "Symbol : " + str(symbol.getName())
             for m in symbol.getMessages() :
                 print " + " + str(m.getStringData())
         
         print "After Orphan Reduction : "
         for symbol in resultAfterOrphan :
             print "Symbol : " + str(symbol.getName())
             for m in symbol.getMessages() :
                 print " + " + str(m.getStringData())
     
     
     self.assertGreaterEqual(len(resultBeforeOrphan), len(resultAfterOrphan))
Exemplo n.º 4
0
    def test_executingClusteringWithOrphanReduction(self):

        # We create 6 messages of 2 group

        # group1
        originalSymbol1 = Symbol(str(uuid.uuid4()), "TestSymbol", None)
        message1 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(200, 1000)))
        originalSymbol1.addMessage(message1)

        originalSymbol2 = Symbol(str(uuid.uuid4()), "TestSymbol2", None)
        message2 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(200, 1000)))
        originalSymbol2.addMessage(message2)

        originalSymbol3 = Symbol(str(uuid.uuid4()), "TestSymbol3", None)
        message3 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(200, 1000)))
        originalSymbol3.addMessage(message3)

        # group2
        originalSymbol4 = Symbol(str(uuid.uuid4()), "TestSymbol4", None)
        message4 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut " + self.generateRandomString(200, 1000)))
        originalSymbol4.addMessage(message4)

        originalSymbol5 = Symbol(str(uuid.uuid4()), "TestSymbol5", None)
        message5 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut " + self.generateRandomString(200, 1000)))
        originalSymbol5.addMessage(message5)

        originalSymbol6 = Symbol(str(uuid.uuid4()), "TestSymbol6", None)
        message6 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut " + self.generateRandomString(200, 1000)))
        originalSymbol6.addMessage(message6)

        symbols = [originalSymbol1, originalSymbol2, originalSymbol3, originalSymbol4, originalSymbol5, originalSymbol6]

        # Start the clustering
        clusteringSolution = UPGMA(None, symbols, True, 100, 80, True, Format.ASCII)
        resultBeforeOrphan = clusteringSolution.executeClustering()
        resultAfterOrphan = clusteringSolution.executeOrphanReduction()

        if (len(resultAfterOrphan) < len(resultBeforeOrphan)):
            print "Before Orphan Reduction: "
            for symbol in resultBeforeOrphan:
                print "Symbol: " + str(symbol.getName())
                for m in symbol.getMessages():
                    print " + " + str(m.getStringData())

            print "After Orphan Reduction: "
            for symbol in resultAfterOrphan:
                print "Symbol: " + str(symbol.getName())
                for m in symbol.getMessages():
                    print " + " + str(m.getStringData())

        self.assertGreaterEqual(len(resultBeforeOrphan), len(resultAfterOrphan))
Exemplo n.º 5
0
 def test_executingClustering(self):
     
     # We create 6 messages of 2 group
     
     # group1 
     originalSymbol1 = Symbol(uuid.uuid4(), "TestSymbol", None)        
     message1 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?"))
     originalSymbol1.addMessage(message1)
     
     originalSymbol2 = Symbol(uuid.uuid4(), "TestSymbol2", None)  
     message2 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?"))
     originalSymbol2.addMessage(message2)
     
     originalSymbol3 = Symbol(uuid.uuid4(), "TestSymbol3", None)  
     message3 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?"))
     originalSymbol3.addMessage(message3)
     
     
     # group2
     originalSymbol4 = Symbol(uuid.uuid4(), "TestSymbol4", None)  
     message4 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?"))
     originalSymbol4.addMessage(message4)
     
     originalSymbol5 = Symbol(uuid.uuid4(), "TestSymbol5", None)  
     message5 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?"))
     originalSymbol5.addMessage(message5)
     
     originalSymbol6 = Symbol(uuid.uuid4(), "TestSymbol6", None)  
     message6 = RawMessage(uuid.uuid4(), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?"))
     originalSymbol6.addMessage(message6)
     
     symbols = [originalSymbol1, originalSymbol2, originalSymbol3, originalSymbol4, originalSymbol5, originalSymbol6]
     
     # Start the clustering
     clusteringSolution = UPGMA(None, symbols, True, 100, 90, True, Format.ASCII)
     result = clusteringSolution.executeClustering()
     
     for symbol in result :
         print "Symbol : " + str(symbol.getName())
         for m in symbol.getMessages() :
             print " + " + str(m.getStringData())
Exemplo n.º 6
0
    def test_executingClustering(self):

        # We create 6 messages of 2 group

        # group1
        originalSymbol1 = Symbol(str(uuid.uuid4()), "TestSymbol", None)
        message1 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?"))
        originalSymbol1.addMessage(message1)

        originalSymbol2 = Symbol(str(uuid.uuid4()), "TestSymbol2", None)
        message2 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?"))
        originalSymbol2.addMessage(message2)

        originalSymbol3 = Symbol(str(uuid.uuid4()), "TestSymbol3", None)
        message3 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("bonjour " + self.generateRandomString(20, 30) + " comment vas-tu ?"))
        originalSymbol3.addMessage(message3)

        # group2
        originalSymbol4 = Symbol(str(uuid.uuid4()), "TestSymbol4", None)
        message4 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?"))
        originalSymbol4.addMessage(message4)

        originalSymbol5 = Symbol(str(uuid.uuid4()), "TestSymbol5", None)
        message5 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?"))
        originalSymbol5.addMessage(message5)

        originalSymbol6 = Symbol(str(uuid.uuid4()), "TestSymbol6", None)
        message6 = RawMessage(str(uuid.uuid4()), str(time.time()), TypeConvertor.stringToNetzobRaw("salut à toi " + self.generateRandomString(10, 15) + " what's up ?"))
        originalSymbol6.addMessage(message6)

        symbols = [originalSymbol1, originalSymbol2, originalSymbol3, originalSymbol4, originalSymbol5, originalSymbol6]

        # Start the clustering
        clusteringSolution = UPGMA(None, symbols, True, 100, 90, True, Format.ASCII)
        result = clusteringSolution.executeClustering()

        for symbol in result:
            print "Symbol: " + str(symbol.getName())
            for m in symbol.getMessages():
                print " + " + str(m.getStringData())
Exemplo n.º 7
0
    def alignSymbols(self, symbols, project):
        from netzob.Inference.Vocabulary.Alignment.UPGMA import UPGMA
        self.result = []
        preResults = []

        # First we add in results, the symbols which wont be aligned
        for symbol in project.getVocabulary().getSymbols():
            found = False
            for s in symbols:
                if str(symbol.getID()) == str(s.getID()):
                    found = True
            if not found:
                logging.debug("Symbol " + str(symbol.getName()) + "[" + str(symbol.getID()) + "]] wont be aligned")
                preResults.append(symbol)

        # Then we retrieve all the parameters of the CLUSTERING / ALIGNMENT
        defaultFormat = project.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_GLOBAL_FORMAT)
        nbIteration = project.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_NB_ITERATION)
        minEquivalence = project.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_EQUIVALENCE_THRESHOLD)
        doInternalSlick = project.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_DO_INTERNAL_SLICK)
        doOrphanReduction = project.getConfiguration().getVocabularyInferenceParameter(ProjectConfiguration.VOCABULARY_ORPHAN_REDUCTION)

        # We try to cluster each symbol
        listEqu = []  # list of thresholds recorded
        emptySymbols = []  # list of all empty symbols
        clusteringSolution = UPGMA(project, symbols, True, nbIteration, minEquivalence, doInternalSlick, defaultFormat, self.unitSize, self.cb_status)
        t1 = time.time()
        self.result = clusteringSolution.executeClustering()

        # We optionally handle orphans
        if doOrphanReduction:
            self.result = clusteringSolution.executeOrphanReduction()

        self.result.extend(preResults)
        self.result.extend(emptySymbols)  # Add the empty symbols (To discuss: can we delete them before?)
        logging.info("Time of clustering : " + str(time.time() - t1))