Exemplo n.º 1
0
    def split(self):
        newmsg = list()
        for sid, seg in enumerate(
                self.segments
        ):  # enum necessary to change to in place edit after debug (want to do?)
            didReplace = False
            for comfeat in self._moco:
                comoff = seg.bytes.find(comfeat)
                if comoff == -1:  # comfeat not in moco, continue with next in moco
                    continue

                featlen = len(comfeat)
                if seg.length == featlen:  # its already the concise frequent feature
                    newmsg.append(seg)
                else:
                    if CropDistinct._debug:
                        print("\nReplaced {} by:".format(seg.bytes.hex()),
                              end=" ")

                    absco = seg.offset + comoff
                    if comoff > 0:
                        segl = MessageSegment(seg.analyzer, seg.offset, comoff)
                        newmsg.append(segl)
                        if CropDistinct._debug:
                            print(segl.bytes.hex(), end=" ")

                    segc = MessageSegment(seg.analyzer, absco, featlen)
                    newmsg.append(segc)
                    if CropDistinct._debug:
                        print(segc.bytes.hex(), end=" ")

                    rlen = seg.length - comoff - featlen
                    if rlen > 0:
                        segr = MessageSegment(seg.analyzer, absco + featlen,
                                              rlen)
                        newmsg.append(segr)
                        if CropDistinct._debug:
                            print(segr.bytes.hex(), end=" ")

                didReplace = True
                break  # only most common match!? otherwise how to handle subsequent matches after split(s)?
            if not didReplace:
                newmsg.append(seg)
            elif CropDistinct._debug:
                print()

        return newmsg
Exemplo n.º 2
0
def wobbleSegmentInMessage(segment: MessageSegment):
    """
    At start for now.

    For end if would be, e. g.: if segment.nextOffset < len(segment.message.data):  segment.nextOffset + 1

    :param segment:
    :return:
    """
    wobbles = [segment]

    if segment.offset > 0:
        wobbles.append(
            MessageSegment(segment.analyzer, segment.offset - 1,
                           segment.length + 1))
    if segment.length > 1:
        wobbles.append(
            MessageSegment(segment.analyzer, segment.offset + 1,
                           segment.length - 1))

    return wobbles
Exemplo n.º 3
0
def calcHexDist(hexA, hexB):
    from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage
    from nemere.inference.analyzers import Value
    from nemere.inference.segments import MessageSegment
    from nemere.inference.templates import DistanceCalculator

    bytedata = [bytes.fromhex(hexA),bytes.fromhex(hexB)]
    messages = [RawMessage(bd) for bd in bytedata]
    analyzers = [Value(message) for message in messages]
    segments = [MessageSegment(analyzer, 0, len(analyzer.message.data)) for analyzer in analyzers]
    dc = DistanceCalculator(segments)
    return dc.pairDistance(*segments)
Exemplo n.º 4
0
    def split(self):
        """
        Perform the splitting of the segments.

        :return: List of segments splitted from the input.
        """
        segmentStack = list(reversed(self.segments[1:]))
        mangledSegments = [self.segments[0]]
        if len(self.segments) > 1:
            while segmentStack:
                segc = segmentStack.pop()
                segl = mangledSegments[-1]
                if segl.offset + segl.length == segc.offset:
                    # compare byte pairs' frequency
                    splitshift = self.lookupLeastFrequent(segc)
                    if ( 0 > splitshift >= -segl.length) \
                        or (0 < splitshift <= segc.length):
                        if segl.length != -splitshift:
                            mangledSegments[-1] = MessageSegment(
                                mangledSegments[-1].analyzer,
                                mangledSegments[-1].offset,
                                mangledSegments[-1].length + splitshift)
                        else:  # segment to the left completely used up in center
                            del mangledSegments[-1]
                        if self._debug:
                            print("Recombined segments: \n{} and {} into ".
                                  format(segl, segc))
                        segc = MessageSegment(segc.analyzer,
                                              segc.offset + splitshift,
                                              segc.length - splitshift)
                        if self._debug:
                            print("{} and {}".format(
                                mangledSegments[-1]
                                if mangledSegments else 'Empty', segc))
                mangledSegments.append(segc)
        return mangledSegments
Exemplo n.º 5
0
def generateTestSegments():
    from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage
    from nemere.inference.analyzers import Value
    from nemere.inference.segments import MessageSegment

    bytedata = [
        bytes([1, 2, 3, 4]),
        bytes([2, 3, 4]),
        bytes([1, 3, 4]),
        bytes([2, 4]),
        bytes([2, 3]),
        bytes([20, 30, 37, 50, 69, 2, 30]),
        bytes([37, 5, 69]),
        bytes([0, 0, 0, 0]),
        bytes([3, 2, 3, 4])
    ]
    messages = [RawMessage(bd) for bd in bytedata]
    analyzers = [Value(message) for message in messages]
    segments = [MessageSegment(analyzer, 0, len(analyzer.message.data)) for analyzer in analyzers]
    return segments
Exemplo n.º 6
0
    def split(self, segmentID: int, chunkLength: int):
        """

        :param segmentID: The index of the segment to split within the sequence of segments composing the message
        :param chunkLength: The fixed length of the target segments in bytes
        :return: The message segments with the given segment replaced by multiple segments of the given fixed length.
        """
        selSeg = self.segments[segmentID]
        if chunkLength < selSeg.length:
            newSegs = list()
            for chunkoff in range(selSeg.offset, selSeg.nextOffset,
                                  chunkLength):
                remainLen = selSeg.nextOffset - chunkoff
                newSegs.append(
                    MessageSegment(selSeg.analyzer, chunkoff,
                                   min(remainLen, chunkLength)))
            newmsg = self.segments[:segmentID] + newSegs + self.segments[
                segmentID + 1:]
            return newmsg
        else:
            return self.segments
Exemplo n.º 7
0
    def merge(self):
        """
        Perform the merging.

        :return: a new set of segments after the input has been merged
        """
        mergedSegments = self.segments[0:1]
        if len(self.segments) > 1:
            for segl, segr in zip(self.segments[:-1], self.segments[1:]):
                # TODO check for equal analyzer, requires implementing a suitable equality-check in analyzer
                # from inference.MessageAnalyzer import MessageAnalyzer
                if segl.offset + segl.length == segr.offset and self.condition(
                        segl, segr):
                    mergedSegments[-1] = MessageSegment(
                        mergedSegments[-1].analyzer, mergedSegments[-1].offset,
                        mergedSegments[-1].length + segr.length)
                    if self._debug:
                        print(
                            "Merged segments: \n{} and \n{} into \n{}".format(
                                segl, segr, mergedSegments[-1]))
                else:
                    mergedSegments.append(segr)
        return mergedSegments
Exemplo n.º 8
0
def symbolsFromSegments(
        segmentsPerMsg: Iterable[Sequence[MessageSegment]]) -> List[Symbol]:
    """
    Generate a list of Netzob Symbols from the given lists of segments for each message.

    >>> from nemere.inference.segmentHandler import symbolsFromSegments
    >>> from nemere.inference.segments import MessageSegment
    >>> from nemere.inference.analyzers import Value
    >>> from netzob.Model.Vocabulary.Symbol import Symbol
    >>> from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage
    >>> # prevent Netzob from producing debug output.
    >>> import logging
    >>> logging.getLogger().setLevel(30)
    >>>
    >>> dummymsg = RawMessage(bytes(list(range(50, 70))))
    >>> dummyana = Value(dummymsg)
    >>> testgapped = [[ MessageSegment(dummyana, 0, 2), MessageSegment(dummyana, 5, 2), MessageSegment(dummyana, 7, 6),
    ...                MessageSegment(dummyana, 17, 2) ]]
    >>> symbol = symbolsFromSegments(testgapped)[0]
    >>> print(symbol)
    Field | Field | Field | Field    | Field  | Field | Field
    ----- | ----- | ----- | -------- | ------ | ----- | -----
    '23'  | '456' | '78'  | '9:;<=>' | '?@AB' | 'CD'  | 'E'...
    ----- | ----- | ----- | -------- | ------ | ----- | -----

    Intermediately produces:
    ```
    from pprint import pprint
    pprint(filledSegments)
    [[MessageSegment 2 bytes at (0, 2): 1415 | values: (20, 21),
      MessageSegment 3 bytes at (2, 5): 161718 | values: (22, 23, 24),
      MessageSegment 2 bytes at (5, 7): 191a | values: (25, 26),
      MessageSegment 6 bytes at (7, 13): 1b1c1d1e1f20 | values: (27, 28, 29...,
      MessageSegment 4 bytes at (13, 17): 21222324 | values: (33, 34, 35...,
      MessageSegment 2 bytes at (17, 19): 2526 | values: (37, 38)]]
    ````

    :param segmentsPerMsg: List of messages, represented by lists of segments.
    :return: list of Symbols, one for each entry in the given iterable of lists.
    """
    sortedSegments = (sorted(segSeq, key=lambda f: f.offset)
                      for segSeq in segmentsPerMsg)
    filledSegments = list()
    for segSeq in sortedSegments:
        assert len(segSeq) > 0
        filledGaps = list()
        for segment in segSeq:
            lastoffset = filledGaps[-1].nextOffset if len(
                filledGaps) > 0 else 0
            if segment.offset > lastoffset:
                gaplength = segment.offset - lastoffset
                filledGaps.append(
                    MessageSegment(segment.analyzer, lastoffset, gaplength))
            filledGaps.append(segment)
        # check for required trailing segment
        lastoffset = filledGaps[-1].nextOffset
        msglen = len(filledGaps[-1].message.data)
        if lastoffset < msglen:
            gaplength = msglen - lastoffset
            filledGaps.append(
                MessageSegment(filledGaps[-1].analyzer, lastoffset, gaplength))
        filledSegments.append(filledGaps)

    return [
        Symbol([Field(segment.bytes) for segment in segSeq],
               messages=[segSeq[0].message],
               name=f"nemesys Symbol {i}")
        for i, segSeq in enumerate(filledSegments)
    ]
Exemplo n.º 9
0
def fixedlengthSegmenter(length: int, specimens: BaseLoader,
                         analyzerType: type, analysisArgs: Union[Tuple, None], unit=MessageAnalyzer.U_BYTE, padded=False) \
        -> List[Tuple[MessageSegment]]:
    """
    Segment messages into fixed size chunks.

    >>> from nemere.utils.loader import SpecimenLoader
    >>> from nemere.validation.dissectorMatcher import MessageComparator
    >>> from nemere.inference.analyzers import Value
    >>> from nemere.inference.segmentHandler import fixedlengthSegmenter
    >>> specimens = SpecimenLoader("../input/deduped-orig/ntp_SMIA-20111010_deduped-100.pcap", 2, True)
    >>> comparator = MessageComparator(specimens, 2, True, debug=False)
    Wait for tshark output (max 20s)...
    >>> segmentedMessages = fixedlengthSegmenter(4, specimens, Value, None)
    >>> areIdentical = True
    >>> for msgsegs in segmentedMessages:
    ...     msg = msgsegs[0].message
    ...     msgbytes = b"".join([seg.bytes for seg in msgsegs])
    ...     areIdentical = areIdentical and msgbytes == msg.data
    >>> print(areIdentical)
    True

    :param length: Fixed length for all segments. Overhanging segments at the end that are shorter than length
        will be padded with NANs.
    :param specimens: Loader utility class that contains the payload messages.
    :param analyzerType: Type of the analysis. Subclass of inference.analyzers.MessageAnalyzer.
    :param analysisArgs: Arguments for the analysis method.
    :param unit: Base unit for the analysis. Either MessageAnalyzer.U_BYTE or MessageAnalyzer.U_NIBBLE.
    :param padded: Toggle to pad the last segment to the requested fixed length or leave the last segment to be
        shorter than length if the message length is not an exact multiple of the segment length.
    :return: Segments of the analyzer's message according to the true format.
    """
    segments = list()
    for l4msg, rmsg in specimens.messagePool.items():
        if len(l4msg.data) % length == 0:  # exclude the overlap
            lastOffset = len(l4msg.data)
        else:
            lastOffset = (len(l4msg.data) // length) * length

        originalAnalyzer = MessageAnalyzer.findExistingAnalysis(
            analyzerType, unit, l4msg, analysisArgs)
        sequence = [
            MessageSegment(originalAnalyzer, offset, length)
            for offset in range(0, lastOffset, length)
        ]

        if len(l4msg.data) > lastOffset:  # append the overlap
            if padded:
                # here are nasty hacks!
                # TODO Better define a new subclass of MessageSegment that internally padds values
                #  (and bytes? what are the guarantees?) to a given length that exceeds the message length
                residuepadd = lastOffset + length - len(l4msg.data)
                newMessage = copy.copy(originalAnalyzer.message)
                newMessage.data = newMessage.data + b'\x00' * residuepadd
                newAnalyzer = type(originalAnalyzer)(
                    newMessage, originalAnalyzer.unit)  # type: MessageAnalyzer
                newAnalyzer.setAnalysisParams(*originalAnalyzer.analysisParams)
                padd = [numpy.nan] * residuepadd
                newAnalyzer._values = originalAnalyzer.values + padd
                newSegment = MessageSegment(newAnalyzer, lastOffset, length)
                for seg in sequence:  # replace all previous analyzers to make the sequence homogeneous for this message
                    seg.analyzer = newAnalyzer
                sequence.append(newSegment)
            else:
                newSegment = MessageSegment(originalAnalyzer, lastOffset,
                                            len(l4msg.data) - lastOffset)
                sequence.append(newSegment)

        segments.append(tuple(sequence))

    return segments
Exemplo n.º 10
0
    def merge(self):
        """
        Perform the merging.

        >>> from nemere.utils.loader import SpecimenLoader
        >>> from nemere.inference.segmentHandler import bcDeltaGaussMessageSegmentation
        >>> from nemere.inference.formatRefinement import CumulativeCharMerger
        >>> sl = SpecimenLoader('../input/deduped-orig/dns_ictf2010_deduped-100.pcap', layer=0, relativeToIP=True)
        >>> segmentsPerMsg = bcDeltaGaussMessageSegmentation(sl)
        Segmentation by inflections of sigma-0.6-gauss-filtered bit-variance.
        >>> for messageSegments in segmentsPerMsg:
        ...     ccm = CumulativeCharMerger(messageSegments)
        ...     ccmmsg = ccm.merge()
        ...     if ccmmsg != messageSegments:
        ...         sgms = b''.join([m.bytes for m in ccmmsg])
        ...         sgss = b''.join([m.bytes for m in messageSegments])
        ...         if sgms != sgss:
        ...             print("Mismatch!")

        :return: a new set of segments after the input has been merged
        """
        minLen = 6

        segmentStack = list(reversed(self.segments))
        newmsg = list()
        isCharCand = False
        workingStack = list()
        while segmentStack:
            workingStack.append(segmentStack.pop())
            if sum([len(ws.bytes) for ws in workingStack]) < minLen:
                continue

            # now we have 6 bytes
            # and the merge is a new char candidate
            joinedbytes = b"".join([ws.bytes for ws in workingStack])
            if isExtendedCharSeq(joinedbytes) \
                    and b"\x00\x00" not in joinedbytes:
                isCharCand = True
                continue
            # the last segment ended the char candidate
            elif isCharCand:
                isCharCand = False
                if len(workingStack) > 2:
                    newlen = sum([ws.length for ws in workingStack[:-1]])
                    newseg = MessageSegment(workingStack[0].analyzer,
                                            workingStack[0].offset, newlen)
                    newmsg.append(newseg)
                else:
                    # retain the original segment (for equality test and to save creating a new object instance)
                    newmsg.append(workingStack[0])
                if len(workingStack) > 1:
                    segmentStack.append(workingStack[-1])
                workingStack = list()
            # there was not a char candidate
            else:
                newmsg.append(workingStack[0])
                for ws in reversed(workingStack[1:]):
                    segmentStack.append(ws)
                workingStack = list()
        # there are segments in the working stack left
        if len(workingStack) > 1 and isCharCand:
            newlen = sum([ws.length for ws in workingStack])
            newseg = MessageSegment(workingStack[0].analyzer,
                                    workingStack[0].offset, newlen)
            newmsg.append(newseg)
        # there was no char sequence and there are segments in the working stack left
        else:
            newmsg.extend(workingStack)
        return newmsg
Exemplo n.º 11
0
    def split(self):
        """
        Perform the splitting of the segments.

        :return: List of segments splitted from the input.
        """
        segmentStack = list(reversed(self.segments))
        mangledSegments = list()
        if len(self.segments) > 1:
            while segmentStack:
                # TODO check for equal analyzer, requires equality-check in analyzer
                # from inference.MessageAnalyzer import MessageAnalyzer

                segc = segmentStack.pop()
                # TODO: this is char specific only!
                if not isPrintable(segc.bytes):
                    # cancel split relocation
                    mangledSegments.append(segc)
                    continue

                if mangledSegments:
                    # integrate segment to the left into center
                    segl = mangledSegments[-1]
                    if segl.offset + segl.length == segc.offset:
                        splitpos = self.toTheLeft(segl)
                        # segment to the left ends with chars, add them to the center segment
                        if splitpos < segl.length:
                            if splitpos > 0:
                                mangledSegments[-1] = MessageSegment(
                                    mangledSegments[-1].analyzer,
                                    mangledSegments[-1].offset, splitpos)
                            else:  # segment to the left completely used up in center
                                del mangledSegments[-1]
                            restlen = segl.length - splitpos
                            if self._debug:
                                print("Recombined segments: \n{} and {} into ".
                                      format(segl, segc))
                            segc = MessageSegment(segc.analyzer,
                                                  segc.offset - restlen,
                                                  segc.length + restlen)
                            if self._debug:
                                print("{} and {}".format(
                                    mangledSegments[-1]
                                    if mangledSegments else 'Empty', segc))

                if segmentStack:
                    # integrate segment to the right into center
                    segr = segmentStack[-1]
                    if segc.offset + segc.length == segr.offset:
                        splitpos = self.toTheRight(segr)
                        # segment to the right starts with chars, add them to the center segment
                        if splitpos > 0:
                            if segr.length - splitpos > 0:
                                segmentStack[-1] = MessageSegment(
                                    segr.analyzer, segr.offset + splitpos,
                                    segr.length - splitpos)
                            else:  # segment to the right completely used up in center
                                del segmentStack[-1]
                            if self._debug:
                                print("Recombined segments: \n{} and {} into ".
                                      format(segc, segr))
                            segc = MessageSegment(segc.analyzer, segc.offset,
                                                  segc.length + splitpos)
                            if self._debug:
                                print("{} and {}".format(
                                    segc, segmentStack[-1]
                                    if segmentStack else 'Empty'))

                mangledSegments.append(segc)
        return mangledSegments