Exemplo n.º 1
0
    def __executeOnMessages(self, messages, symbol_name, symbol):
        """
        Clusters messages by CRC32 type. Takes a symbol which contains messages as input
        Args:
            symbol:  :class:`netzob.Model.Vocabulary.Symbol.Symbol`

        Returns: list: Returns a list of :class:`netzob.Model.Vocabulary.Symbol.Symbol`

        """
        crcfinder = CRCFinder()
        messageByCRC = dict()
        for message in messages:
            results = collections.namedtuple(
                'Results', ['CRC_be', 'CRC_le', 'CRC_mid_be', 'CRC_mid_le'])
            searched_string = message.data
            results.CRC_be, results.CRC_le = crcfinder._search_CRC(
                searched_string)
            results.CRC_mid_be, results.CRC_mid_le = crcfinder._search_mid_CRC(
                searched_string)
            self._logger.debug("Found the following results:")
            self._logger.debug("CRC_BE : " + str(results.CRC_be) + "")
            self._logger.debug("CRC_LE : " + str(results.CRC_le) + "")
            self._logger.debug("CRC_mid_be : " + str(results.CRC_mid_be) + "")
            self._logger.debug("CRC_mid_le : " + str(results.CRC_mid_le) + "")
            # Cluster messages by CRC type
            if results.CRC_be:
                if "CRC_be" in messageByCRC:
                    messageByCRC["CRC_be"].append(message)
                else:
                    messageByCRC["CRC_be"] = [message]
            elif results.CRC_le:
                if "CRC_le" in messageByCRC:
                    messageByCRC["CRC_le"].append(message)
                else:
                    messageByCRC["CRC_le"] = [message]
            elif results.CRC_mid_be:
                if "CRC_mid_be" in messageByCRC:
                    messageByCRC["CRC_mid_be"].append(message)
                else:
                    messageByCRC["CRC_mid_be"] = [message]
            elif results.CRC_mid_le:
                if "CRC_mid_le" in messageByCRC:
                    messageByCRC["CRC_mid_le"].append(message)
                else:
                    messageByCRC["CRC_mid_le"] = [message]
            else:
                if "No_CRC" in messageByCRC:
                    messageByCRC["No_CRC"].append(message)
                else:
                    messageByCRC["No_CRC"] = [message]
            # Create new symbols for each group of equivalent message CRC
            newSymbols = []
        for sym_name, msgs in messageByCRC.items():
            s = Symbol(messages=msgs, name=symbol_name + "_" + sym_name)
            if symbol is not None:
                s.fields = symbol.fields
            newSymbols.append(s)
        return newSymbols
Exemplo n.º 2
0
    def cluster_by_field(self, fields, messages, fid_merged):
        logging.debug("[+] Generate Clusters")
        if fid_merged == 0:
            il = 0
            ir = fields[0].domain.dataType.size[1] // 8
        elif fid_merged == 1:
            il = fields[0].domain.dataType.size[1] // 8
            ir = il + (fields[1].domain.dataType.size[1] // 8)
        else:
            logging.error("Error: fid_merged should be 0 or 1")

        f_values = [message.data[il:ir] for message in messages]

        dict_fv_i = dict()
        for i, fv in enumerate(f_values):
            if fv not in dict_fv_i:
                dict_fv_i[fv] = list()
            dict_fv_i[fv].append(i)

        symbols = collections.OrderedDict()
        for fv in dict_fv_i:
            s = Symbol(name=fv, messages=[messages[i] for i in dict_fv_i[fv]])
            symbols[fv] = s

        return symbols
Exemplo n.º 3
0
    def cluster(self, messages, meta=False):
        """Create and return new symbols according to the messages size.

        >>> from netzob.all import *
        >>> import binascii
        >>> samples = ["00ffff1100abcd", "00aaaa1100abcd", "00bbbb1100abcd", "001100abcd", "001100ffff", "00ffffffff1100abcd"]
        >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples]
        >>> clusterer = ClusterBySize()
        >>> newSymbols = clusterer.cluster(messages)
        >>> for sym in newSymbols:
        ...     print("[" + sym.name + "]")
        ...     sym.addEncodingFunction(TypeEncodingFunction(HexaString))
        ...     print(sym)
        [symbol_7]
        Field           
        ----------------
        '00ffff1100abcd'
        '00aaaa1100abcd'
        '00bbbb1100abcd'
        ----------------
        [symbol_5]
        Field       
        ------------
        '001100abcd'
        '001100ffff'
        ------------
        [symbol_9]
        Field               
        --------------------
        '00ffffffff1100abcd'
        --------------------

        :param messages: the messages to cluster.
        :type messages: a list of :class:`netzob.Model.Vocabulary.Messages.AbstractMessage.AbstractMessage`
        :raise Exception if something bad happens
        """

        # Safe checks
        if messages is None:
            raise TypeError("'messages' should not be None")

        # Cluster messages by size
        messagesByLen = {}
        messagesByLen = OrderedDict()
        for msg in messages:
            l = len(msg.data)
            if not l in list(messagesByLen.keys()):
                messagesByLen[l] = []
            messagesByLen[l].append(msg)

        # Create new symbols for each group of equivalend message size
        newSymbols = []
        for (length, msgs) in list(messagesByLen.items()):
            s = Symbol(messages=msgs,
                       name="symbol_{0}".format(str(length)),
                       meta=meta)
            newSymbols.append(s)

        return newSymbols
Exemplo n.º 4
0
    def test_split_delimiter():
        BPF_FILTER = "!(arp) and !(len == 96)"

        messages = PCAPImporter.readFile(
            "/home/research/Downloads/hunter_no_vlan.pcap",
            nbPackets=10,
            bpfFilter=BPF_FILTER).values()
        symbol = Symbol(messages=messages)
        Format.splitDelimiter(symbol.fields[0], Raw(b'\05'))
    def cluster(self, messages, appDatas):
        if messages is None:
            raise TypeError("Messages cannot be None")
        if appDatas is None:
            raise TypeError("AppDatas cannot be None")
        if len(messages) == 0:
            raise TypeError("There should be at least one message.")
        if len(appDatas) == 0:
            raise TypeError("There should be at least one applicative data.")

        for m in messages:
            if not isinstance(m, AbstractMessage):
                raise TypeError("At least one message ({0}) is not an AbstractMessage.".format(str(m)))
        for appData in appDatas:
            if not isinstance(appData, ApplicativeData):
                raise TypeError("At least one applicative data ({0}) is not an instance of ApplicativeData.".format(str(appData)))

        labels = dict()
        for appData in appDatas:
            labels[appData.value] = appData.name

        idMessages = dict()
        for message in messages:
            idMessages[message.id] = message

        messagesPerAppData = dict()
        for message in messages:
            messagesPerAppData[message] = set()

        searchEngine = SearchEngine()

        searchResults = searchEngine.searchDataInMessages([appData.value for appData in appDatas], messages, inParallel=True, dataLabels=labels)
        for result in searchResults:
            searchTask = result.searchTask
            message = searchTask.properties['message']
            label = searchTask.properties['label']
            if label not in list(labels.values()):
                raise ValueError("Found label ({0}) in a result cannot be identified in the original list of searched labels.".format(label))
            if message.id not in list(idMessages.keys()):
                raise ValueError("Found message ({0}) cannot be identified in the original list of searched messages.".format(message.id))
            messagesPerAppData[idMessages[message.id]].add(label)

        # Build clusters
        clusters = dict()
        for message, labelsInMessage in list(messagesPerAppData.items()):
            strAppDatas = ';'.join(sorted(labelsInMessage))
            if len(strAppDatas) == 0:
                strAppDatas = None
            if strAppDatas in list(clusters.keys()):
                clusters[strAppDatas].append(message)
            else:
                clusters[strAppDatas] = [message]

        # Build Symbols
        symbols = [Symbol(name=strAppDatas, messages=msgs) for strAppDatas, msgs in list(clusters.items())]

        return symbols
Exemplo n.º 6
0
    def test_split_offset():
        BPF_FILTER = "!(arp) and !(len == 96)"
        messages = PCAPImporter.readFile(
            "/home/research/Downloads/hunter_no_vlan.pcap",
            nbPackets=100,
            bpfFilter=BPF_FILTER).values()

        bytes_entropy = [
            byte_entropy
            for byte_entropy in EntropyMeasurement.measure_entropy(messages)
        ]
        print(bytes_entropy)

        symbol = Symbol(messages=messages)

        Format.splitStatic(symbol)

        Format.splitOffset(symbol, [1, 3, 4, 5])

        clusters = Format.clusterByKeyField(symbol, symbol.fields[2])
        for key, value in clusters.items():
            print(value)
        new_symbol = list(clusters.items())[-1][1]
        print(new_symbol)

        # rels = CorrelationFinder.find(new_symbol, minMic=0.7)
        # for rel in rels:
        #     print("  " + rel["relation_type"] + ", between '" + rel["x_attribute"] + "' of:")
        #     print("    " + str('-'.join([f.name for f in rel["x_fields"]])))
        #     p = [v.getValues()[:] for v in rel["x_fields"]]
        #     print("    " + str(p))
        #     print("  " + "and '" + rel["y_attribute"] + "' of:")
        #     print("    " + str('-'.join([f.name for f in rel["y_fields"]])))
        #     p = [v.getValues()[:] for v in rel["y_fields"]]
        #     print("    " + str(p))
        #     print("  by MIC: {} and PEARSON: {}".format(rel["mic"], rel["pearson"]))

        rels = RelationFinder.findOnSymbol(new_symbol)
        for rel in rels:
            print("  " + rel["relation_type"] + ", between '" +
                  rel["x_attribute"] + "' of:")
            print("    " + str('-'.join([f.name for f in rel["x_fields"]])))
            p = [v.getValues()[:] for v in rel["x_fields"]]
            print("    " + str(p))
            print("  " + "and '" + rel["y_attribute"] + "' of:")
            print("    " + str('-'.join([f.name for f in rel["y_fields"]])))
            p = [v.getValues()[:] for v in rel["y_fields"]]
            print("    " + str(p))
            if rel["relation_type"] == "SizeRelation":
                rel["x_fields"][0].domain = Size(rel["y_fields"],
                                                 factor=1 / 8.0)
        print(new_symbol.fields[2].domain)

        message_to_verify = messages[1]
        print(
            AbstractField.abstract(message_to_verify.data,
                                   list(clusters.values())))
Exemplo n.º 7
0
    def test_split_static():
        BPF_FILTER = "!(arp) and !(len == 96)"

        messages = PCAPImporter.readFile(
            "/home/research/Downloads/hunter_no_vlan.pcap",
            nbPackets=10,
            bpfFilter=BPF_FILTER).values()
        symbol = Symbol(messages=messages)
        Format.splitStatic(symbol)
        return symbol
Exemplo n.º 8
0
    def create_symbol(self, name):
        symbol = Symbol(name=name)
        self.__symbols[str(symbol.id)] = symbol

        for field in symbol.fields:
            self.__fields[str(field.id)] = field

            self.__domains[str(field.domain.id)] = field.domain

            self.__datatypes[str(
                field.domain.dataType.id)] = field.domain.dataType

        return symbol
Exemplo n.º 9
0
def symbolsFromSegments(
        segmentsPerMsg: Iterable[Sequence[MessageSegment]]) -> List[Symbol]:
    """
    Generate a list of Netzob Symbols from the given lists of segments for each message.

    :param segmentsPerMsg: List of messages, represented by lists of segments.
    :return: list of Symbols, one for each entry in the given iterable of lists.
    """
    return [
        Symbol([
            Field(segment.bytes)
            for segment in sorted(segSeq, key=lambda f: f.offset)
        ],
               messages=[segSeq[0].message]) for segSeq in segmentsPerMsg
    ]
Exemplo n.º 10
0
    def clusterByDestination(messages):
        """Regroup messages sent to the same destination

        >>> import operator
        >>> from netzob.all import *
        >>> messages = [RawMessage("hello berlin", destination="user"), RawMessage("hello paris", destination="master")]
        >>> messages.extend([RawMessage("hello madrid", destination="master"), RawMessage("hello world", destination="user")])
        >>> symbols = Format.clusterByDestination(messages)
        >>> print(len(symbols))
        2
        >>> for symbol in sorted(symbols, key=operator.attrgetter("name")):
        ...     print("{}:".format(symbol.name))
        ...     print(symbol)
        Symbol-master:
        Field         
        --------------
        'hello paris' 
        'hello madrid'
        --------------
        Symbol-user:
        Field         
        --------------
        'hello berlin'
        'hello world' 
        --------------

        """

        clusters = dict()
        for message in messages:
            if message.destination in clusters.keys():
                clusters[message.destination].messages.append(message)
            else:
                clusters[message.destination] = Symbol(name="Symbol-{}".format(
                    message.destination),
                                                       messages=[message])

        return list(clusters.values())
Exemplo n.º 11
0
    def _processUPGMA(self, messages, recomputeMatrixThreshold=None):
        """Computes the matrix of equivalences (in C) and reduces it
        iteratively."""
        if messages is None:
            raise TypeError("Messages cannot be None")
        if len(messages) == 0:
            raise TypeError("There should be at least one message.")
        for m in messages:
            if not isinstance(m, AbstractMessage):
                raise TypeError(
                    "At least one message ({0}) is not an AbstractMessage.".
                    format(str(m)))

        # We create one symbol for each message
        initialSymbols = [Symbol(messages=[message]) for message in messages]

        self._logger.debug("Computing the associated matrix")

        # Compute initial similarity matrix
        self.scores = self._computeSimilarityMatrix(initialSymbols)

        # Reduce the UPGMA matrix (merge symbols by similarity)
        return self._computePhylogenicTree(initialSymbols,
                                           recomputeMatrixThreshold)
Exemplo n.º 12
0
    def _mergeEffectiveRowCol(self, symbols, i_maximum, j_maximum):
        """Merge the symbols i and j in the "symbols" structure
        @param i_maximum: id of the first symbol to merge
        @param j_maximum: id of the second symbol to merge
        @return the newly created symbol result of the merged process"""
        # Extract symbols i and j
        if i_maximum > j_maximum:
            symbol1 = symbols.pop(i_maximum)
            symbol2 = symbols.pop(j_maximum)
        else:
            symbol1 = symbols.pop(j_maximum)
            symbol2 = symbols.pop(i_maximum)

        # Merge the symbols i and j
        messages = []
        messages.extend(symbol1.messages)
        messages.extend(symbol2.messages)

        newSymbol = Symbol(messages=messages)

        # Append th new symbol to the "symbols" structure
        symbols.append(newSymbol)

        return str(newSymbol.id)
Exemplo n.º 13
0
    def buildPacket(self, payload):
        """Build a raw IP packet including the IP layer and its payload.

        :parameter payload: the payload to write on the channel
        :type payload: binary object
        """

        ip_ver = Field(name='Version', domain=BitArray(value=bitarray('0100')))  # IP Version 4
        ip_ihl = Field(name='Header length', domain=BitArray(bitarray('0000')))
        ip_tos = Field(name='TOS', domain=Data(dataType=BitArray(nbBits=8), originalValue=bitarray('00000000'), svas=SVAS.PERSISTENT))
        ip_tot_len = Field(name='Total length', domain=BitArray(bitarray('0000000000000000')))
        ip_id = Field(name='Identification number', domain=BitArray(nbBits=16))
        ip_flags = Field(name='Flags', domain=Data(dataType=BitArray(nbBits=3), originalValue=bitarray('000'), svas=SVAS.PERSISTENT))
        ip_frag_off = Field(name='Fragment offset', domain=Data(dataType=BitArray(nbBits=13), originalValue=bitarray('0000000000000'), svas=SVAS.PERSISTENT))
        ip_ttl = Field(name='TTL', domain=Data(dataType=BitArray(nbBits=8), originalValue=bitarray('10000000'), svas=SVAS.PERSISTENT))
        ip_proto = Field(name='Protocol', domain=Integer(value=self.upperProtocol, unitSize=AbstractType.UNITSIZE_8, endianness=AbstractType.ENDIAN_BIG, sign=AbstractType.SIGN_UNSIGNED))
        ip_checksum = Field(name='Checksum', domain=BitArray(bitarray('0000000000000000')))
        ip_saddr = Field(name='Source address', domain=IPv4(self.localIP))
        ip_daddr = Field(name='Destination address', domain=IPv4(self.remoteIP))
        ip_payload = Field(name='Payload', domain=payload)

        ip_ihl.domain = Size([ip_ver,
                              ip_ihl,
                              ip_tos,
                              ip_tot_len,
                              ip_id, ip_flags,
                              ip_frag_off,
                              ip_ttl, ip_proto,
                              ip_checksum,
                              ip_saddr,
                              ip_daddr], dataType=BitArray(nbBits=4), factor=1/float(32))
        ip_tot_len.domain = Size([ip_ver,
                                  ip_ihl,
                                  ip_tos,
                                  ip_tot_len,
                                  ip_id,
                                  ip_flags,
                                  ip_frag_off,
                                  ip_ttl,
                                  ip_proto,
                                  ip_checksum,
                                  ip_saddr,
                                  ip_daddr,
                                  ip_payload], dataType=Raw(nbBytes=2), factor=1/float(8))
        ip_checksum.domain = InternetChecksum(fields=[ip_ver,
                                                      ip_ihl,
                                                      ip_tos,
                                                      ip_tot_len,
                                                      ip_id,
                                                      ip_flags,
                                                      ip_frag_off,
                                                      ip_ttl,
                                                      ip_proto,
                                                      ip_checksum,
                                                      ip_saddr,
                                                      ip_daddr], dataType=Raw(nbBytes=2))
        
        packet = Symbol(name='IP layer', fields=[ip_ver,
                                                 ip_ihl,
                                                 ip_tos,
                                                 ip_tot_len,
                                                 ip_id,
                                                 ip_flags,
                                                 ip_frag_off,
                                                 ip_ttl,
                                                 ip_proto,
                                                 ip_checksum,
                                                 ip_saddr,
                                                 ip_daddr,
                                                 ip_payload])
        return packet.specialize()
Exemplo n.º 14
0
    def initHeader(self):
        """Initialize the IP header according to the IP format definition.

        """

        # Ethernet header

        # Retrieve remote MAC address
        dstMacAddr = get_mac_address(ip=self.remoteIP)
        if dstMacAddr is not None:
            dstMacAddr = dstMacAddr.replace(':', '')
            dstMacAddr = binascii.unhexlify(dstMacAddr)
        else:
            # Force ARP resolution
            p = subprocess.Popen(["/bin/ping", "-c1", self.remoteIP])
            p.wait()
            time.sleep(0.1)

            dstMacAddr = get_mac_address(ip=self.remoteIP)
            if dstMacAddr is not None:
                dstMacAddr = dstMacAddr.replace(':', '')
                dstMacAddr = binascii.unhexlify(dstMacAddr)
            else:
                raise Exception(
                    "Cannot resolve IP address to a MAC address for IP: '{}'".
                    format(self.remoteIP))

        # Retrieve local MAC address
        srcMacAddr = self.get_interface_addr(bytes(self.interface, 'utf-8'))[1]

        eth_dst = Field(name='eth.dst', domain=Raw(dstMacAddr))
        eth_src = Field(name='eth.src', domain=Raw(srcMacAddr))
        eth_type = Field(name='eth.type', domain=Raw(b"\x08\x00"))

        # IP header

        ip_ver = Field(name='ip.version',
                       domain=BitArray(value=bitarray('0100')))  # IP Version 4
        ip_ihl = Field(name='ip.hdr_len', domain=BitArray(bitarray('0000')))
        ip_tos = Field(name='ip.tos',
                       domain=Data(dataType=BitArray(nbBits=8),
                                   originalValue=bitarray('00000000'),
                                   svas=SVAS.PERSISTENT))
        ip_tot_len = Field(name='ip.len',
                           domain=BitArray(bitarray('0000000000000000')))
        ip_id = Field(name='ip.id', domain=BitArray(nbBits=16))
        ip_flags = Field(name='ip.flags',
                         domain=Data(dataType=BitArray(nbBits=3),
                                     originalValue=bitarray('000'),
                                     svas=SVAS.PERSISTENT))
        ip_frag_off = Field(name='ip.fragment',
                            domain=Data(
                                dataType=BitArray(nbBits=13),
                                originalValue=bitarray('0000000000000'),
                                svas=SVAS.PERSISTENT))
        ip_ttl = Field(name='ip.ttl',
                       domain=Data(dataType=BitArray(nbBits=8),
                                   originalValue=bitarray('01000000'),
                                   svas=SVAS.PERSISTENT))
        ip_proto = Field(name='ip.proto',
                         domain=Integer(value=self.upperProtocol,
                                        unitSize=AbstractType.UNITSIZE_8,
                                        endianness=AbstractType.ENDIAN_BIG,
                                        sign=AbstractType.SIGN_UNSIGNED))
        ip_checksum = Field(name='ip.checksum',
                            domain=BitArray(bitarray('0000000000000000')))
        ip_saddr = Field(name='ip.src', domain=IPv4(self.localIP))
        ip_daddr = Field(name='ip.dst', domain=IPv4(self.remoteIP))
        ip_payload = Field(name='ip.payload', domain=Raw())

        ip_ihl.domain = Size([
            ip_ver, ip_ihl, ip_tos, ip_tot_len, ip_id, ip_flags, ip_frag_off,
            ip_ttl, ip_proto, ip_checksum, ip_saddr, ip_daddr
        ],
                             dataType=BitArray(nbBits=4),
                             factor=1 / float(32))
        ip_tot_len.domain = Size([
            ip_ver, ip_ihl, ip_tos, ip_tot_len, ip_id, ip_flags, ip_frag_off,
            ip_ttl, ip_proto, ip_checksum, ip_saddr, ip_daddr, ip_payload
        ],
                                 dataType=Integer(
                                     unitSize=AbstractType.UNITSIZE_16,
                                     sign=AbstractType.SIGN_UNSIGNED),
                                 factor=1 / float(8))
        ip_checksum.domain = InternetChecksum(
            fields=[
                ip_ver, ip_ihl, ip_tos, ip_tot_len, ip_id, ip_flags,
                ip_frag_off, ip_ttl, ip_proto, ip_checksum, ip_saddr, ip_daddr
            ],
            dataType=Raw(nbBytes=2, unitSize=AbstractType.UNITSIZE_16))

        self.header = Symbol(name='Ethernet layer',
                             fields=[
                                 eth_dst, eth_src, eth_type, ip_ver, ip_ihl,
                                 ip_tos, ip_tot_len, ip_id, ip_flags,
                                 ip_frag_off, ip_ttl, ip_proto, ip_checksum,
                                 ip_saddr, ip_daddr, ip_payload
                             ])
Exemplo n.º 15
0
    def cluster(self, field, keyField):
        """Create and return new symbols according to a specific key
        field.

        >>> import binascii
        >>> from netzob.all import *
        >>> samples = [b"00ff2f000000", b"000020000000", b"00ff2f000000"]
        >>> messages = [RawMessage(data=binascii.unhexlify(sample)) for sample in samples]
        >>> f1 = Field(Raw(nbBytes=1))
        >>> f2 = Field(Raw(nbBytes=2))
        >>> f3 = Field(Raw(nbBytes=3))
        >>> symbol = Symbol([f1, f2, f3], messages=messages)
        >>> symbol.addEncodingFunction(TypeEncodingFunction(HexaString))
        >>> newSymbols = Format.clusterByKeyField(symbol, f2)
        >>> for sym in list(newSymbols.values()):
        ...     sym.addEncodingFunction(TypeEncodingFunction(HexaString))
        ...     print(sym.name + ":")
        ...     print(sym)
        Symbol_ff2f:
        Field | Field  | Field   
        ----- | ------ | --------
        '00'  | 'ff2f' | '000000'
        '00'  | 'ff2f' | '000000'
        ----- | ------ | --------
        Symbol_0020:
        Field | Field  | Field   
        ----- | ------ | --------
        '00'  | '0020' | '000000'
        ----- | ------ | --------


        :param field: the field we want to split in new symbols
        :type field: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField`
        :param keyField: the field used as a key during the splitting operation
        :type field: :class:`netzob.Model.Vocabulary.AbstractField.AbstractField`
        :raise Exception if something bad happens
        """

        # Safe checks
        if field is None:
            raise TypeError("'field' should not be None")
        if keyField is None:
            raise TypeError("'keyField' should not be None")
        if keyField not in field.fields:
            raise TypeError("'keyField' is not a child of 'field'")

        newSymbols = collections.OrderedDict()

        keyFieldMessageValues = keyField.getMessageValues(encoded=False,
                                                          styled=False)
        newSymbolsSplittedMessages = {}

        # we identify what would be the best type of the key field
        keyFieldType = ASCII
        for message, keyFieldValue in list(keyFieldMessageValues.items()):
            # If the value cannot be parsed as ASCII, we convert it to HexaString
            if not ASCII().canParse(
                    TypeConverter.convert(keyFieldValue, Raw, BitArray)):
                keyFieldType = HexaString
                break

            # Even if the value is theoritically parsable as ASCII, some caracters cannot be encoded, so we double check
            tmp_value = TypeConverter.convert(keyFieldValue, Raw, ASCII)
            tmp2_value = TypeConverter.convert(tmp_value, ASCII, Raw)
            if keyFieldValue != tmp2_value:
                # This means we cannot retrieve the original value by encoding and then decoding in ASCII
                keyFieldType = HexaString
                break

        # we create a symbol for each of these uniq values
        for message, keyFieldValue in list(keyFieldMessageValues.items()):
            keyFieldValue = TypeConverter.convert(keyFieldValue, Raw,
                                                  keyFieldType)
            if keyFieldValue not in list(newSymbols.keys()):
                if type(keyFieldValue) is str:
                    symbolName = "Symbol_{0}".format(keyFieldValue)
                else:
                    symbolName = "Symbol_{0}".format(
                        keyFieldValue.decode("utf-8"))
                newSymbols[keyFieldValue] = Symbol(name=symbolName,
                                                   messages=[message])
                splittedMessages = DataAlignment.align([message.data],
                                                       field,
                                                       encoded=False)
                newSymbolsSplittedMessages[keyFieldValue] = [
                    splittedMessages[0]
                ]
            else:
                newSymbols[keyFieldValue].messages.append(message)
                splittedMessages = DataAlignment.align([message.data],
                                                       field,
                                                       encoded=False)
                newSymbolsSplittedMessages[keyFieldValue].append(
                    splittedMessages[0])

        for newSymbolKeyValue, newSymbol in list(newSymbols.items()):
            # we recreate the same fields in this new symbol as the fields that exist in the original symbol
            newSymbol.clearFields()
            for i, f in enumerate(field.fields):
                if f == keyField:
                    newFieldDomain = TypeConverter.convert(
                        newSymbolKeyValue, keyFieldType, Raw)
                else:
                    newFieldDomain = set()
                    for j in range(
                            len(newSymbolsSplittedMessages[newSymbolKeyValue])
                    ):
                        newFieldDomain.add(
                            newSymbolsSplittedMessages[newSymbolKeyValue][j]
                            [i])
                    newFieldDomain = list(newFieldDomain)
                newF = Field(name=f.name, domain=newFieldDomain)
                newF.parent = newSymbol
                newSymbol.fields.append(newF)

            # we remove endless fields that accepts no values
            cells = newSymbol.getCells(encoded=False,
                                       styled=False,
                                       transposed=False)
            max_i_cell_with_value = 0
            for line in cells:
                for i_cell, cell in enumerate(line):
                    if cell != '' and max_i_cell_with_value < i_cell:
                        max_i_cell_with_value = i_cell
            newSymbol.clearFields()
            for i, f in enumerate(field.fields[:max_i_cell_with_value + 1]):
                if f == keyField:
                    newFieldDomain = TypeConverter.convert(
                        newSymbolKeyValue, keyFieldType, Raw)
                else:
                    newFieldDomain = set()
                    for j in range(
                            len(newSymbolsSplittedMessages[newSymbolKeyValue])
                    ):
                        newFieldDomain.add(
                            newSymbolsSplittedMessages[newSymbolKeyValue][j]
                            [i])
                    newFieldDomain = list(newFieldDomain)
                newF = Field(name=f.name, domain=newFieldDomain)
                newF.parent = newSymbol
                newSymbol.fields.append(newF)

        return newSymbols
Exemplo n.º 16
0
def symbolsFromSegments(
        segmentsPerMsg: Iterable[Sequence[MessageSegment]]) -> List[Symbol]:
    """
    Generate a list of Netzob Symbols from the given lists of segments for each message.

    >>> from nemere.inference.segmentHandler import symbolsFromSegments
    >>> from nemere.inference.segments import MessageSegment
    >>> from nemere.inference.analyzers import Value
    >>> from netzob.Model.Vocabulary.Symbol import Symbol
    >>> from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage
    >>> # prevent Netzob from producing debug output.
    >>> import logging
    >>> logging.getLogger().setLevel(30)
    >>>
    >>> dummymsg = RawMessage(bytes(list(range(50, 70))))
    >>> dummyana = Value(dummymsg)
    >>> testgapped = [[ MessageSegment(dummyana, 0, 2), MessageSegment(dummyana, 5, 2), MessageSegment(dummyana, 7, 6),
    ...                MessageSegment(dummyana, 17, 2) ]]
    >>> symbol = symbolsFromSegments(testgapped)[0]
    >>> print(symbol)
    Field | Field | Field | Field    | Field  | Field | Field
    ----- | ----- | ----- | -------- | ------ | ----- | -----
    '23'  | '456' | '78'  | '9:;<=>' | '?@AB' | 'CD'  | 'E'...
    ----- | ----- | ----- | -------- | ------ | ----- | -----

    Intermediately produces:
    ```
    from pprint import pprint
    pprint(filledSegments)
    [[MessageSegment 2 bytes at (0, 2): 1415 | values: (20, 21),
      MessageSegment 3 bytes at (2, 5): 161718 | values: (22, 23, 24),
      MessageSegment 2 bytes at (5, 7): 191a | values: (25, 26),
      MessageSegment 6 bytes at (7, 13): 1b1c1d1e1f20 | values: (27, 28, 29...,
      MessageSegment 4 bytes at (13, 17): 21222324 | values: (33, 34, 35...,
      MessageSegment 2 bytes at (17, 19): 2526 | values: (37, 38)]]
    ````

    :param segmentsPerMsg: List of messages, represented by lists of segments.
    :return: list of Symbols, one for each entry in the given iterable of lists.
    """
    sortedSegments = (sorted(segSeq, key=lambda f: f.offset)
                      for segSeq in segmentsPerMsg)
    filledSegments = list()
    for segSeq in sortedSegments:
        assert len(segSeq) > 0
        filledGaps = list()
        for segment in segSeq:
            lastoffset = filledGaps[-1].nextOffset if len(
                filledGaps) > 0 else 0
            if segment.offset > lastoffset:
                gaplength = segment.offset - lastoffset
                filledGaps.append(
                    MessageSegment(segment.analyzer, lastoffset, gaplength))
            filledGaps.append(segment)
        # check for required trailing segment
        lastoffset = filledGaps[-1].nextOffset
        msglen = len(filledGaps[-1].message.data)
        if lastoffset < msglen:
            gaplength = msglen - lastoffset
            filledGaps.append(
                MessageSegment(filledGaps[-1].analyzer, lastoffset, gaplength))
        filledSegments.append(filledGaps)

    return [
        Symbol([Field(segment.bytes) for segment in segSeq],
               messages=[segSeq[0].message],
               name=f"nemesys Symbol {i}")
        for i, segSeq in enumerate(filledSegments)
    ]
Exemplo n.º 17
0
    def initHeader(self):
        """Initialize the IP header according to the IP format definition.

        """

        ip_ver = Field(name='ip.version',
                       domain=BitArray(value=bitarray('0100')))  # IP Version 4
        ip_ihl = Field(name='ip.hdr_len', domain=BitArray(bitarray('0000')))
        ip_tos = Field(name='ip.tos',
                       domain=Data(dataType=BitArray(nbBits=8),
                                   originalValue=bitarray('00000000'),
                                   svas=SVAS.PERSISTENT))
        ip_tot_len = Field(name='ip.len',
                           domain=BitArray(bitarray('0000000000000000')))
        ip_id = Field(name='ip.id', domain=BitArray(nbBits=16))
        ip_flags = Field(name='ip.flags',
                         domain=Data(dataType=BitArray(nbBits=3),
                                     originalValue=bitarray('000'),
                                     svas=SVAS.PERSISTENT))
        ip_frag_off = Field(name='ip.fragment',
                            domain=Data(
                                dataType=BitArray(nbBits=13),
                                originalValue=bitarray('0000000000000'),
                                svas=SVAS.PERSISTENT))
        ip_ttl = Field(name='ip.ttl',
                       domain=Data(dataType=BitArray(nbBits=8),
                                   originalValue=bitarray('01000000'),
                                   svas=SVAS.PERSISTENT))
        ip_proto = Field(name='ip.proto',
                         domain=Integer(value=self.upperProtocol,
                                        unitSize=AbstractType.UNITSIZE_8,
                                        endianness=AbstractType.ENDIAN_BIG,
                                        sign=AbstractType.SIGN_UNSIGNED))
        ip_checksum = Field(name='ip.checksum',
                            domain=BitArray(bitarray('0000000000000000')))
        ip_saddr = Field(name='ip.src', domain=IPv4(self.localIP))
        ip_daddr = Field(name='ip.dst', domain=IPv4(self.remoteIP))
        ip_payload = Field(name='ip.payload', domain=Raw())

        ip_ihl.domain = Size([
            ip_ver, ip_ihl, ip_tos, ip_tot_len, ip_id, ip_flags, ip_frag_off,
            ip_ttl, ip_proto, ip_checksum, ip_saddr, ip_daddr
        ],
                             dataType=BitArray(nbBits=4),
                             factor=1 / float(32))
        ip_tot_len.domain = Size([
            ip_ver, ip_ihl, ip_tos, ip_tot_len, ip_id, ip_flags, ip_frag_off,
            ip_ttl, ip_proto, ip_checksum, ip_saddr, ip_daddr, ip_payload
        ],
                                 dataType=Integer(
                                     unitSize=AbstractType.UNITSIZE_16,
                                     sign=AbstractType.SIGN_UNSIGNED),
                                 factor=1 / float(8))
        ip_checksum.domain = InternetChecksum(
            fields=[
                ip_ver, ip_ihl, ip_tos, ip_tot_len, ip_id, ip_flags,
                ip_frag_off, ip_ttl, ip_proto, ip_checksum, ip_saddr, ip_daddr
            ],
            dataType=Raw(nbBytes=2, unitSize=AbstractType.UNITSIZE_16))

        self.header = Symbol(name='IP layer',
                             fields=[
                                 ip_ver, ip_ihl, ip_tos, ip_tot_len, ip_id,
                                 ip_flags, ip_frag_off, ip_ttl, ip_proto,
                                 ip_checksum, ip_saddr, ip_daddr, ip_payload
                             ])
Exemplo n.º 18
0
 def setUp(self) -> None:
     """
     Construct Symbols with the same name.
     """
     self.symbols = [Symbol(fields=[], messages=[], name="NEMESYS")] * 5
Exemplo n.º 19
0
def symbolsFromSegments(segmentsPerMsg):
    from netzob.Model.Vocabulary.Symbol import Symbol, Field
    return [Symbol([Field(segment.bytes) for segment in sorted(segSeq, key=lambda f: f.offset)], messages=[segSeq[0].message]) for segSeq in segmentsPerMsg ]