예제 #1
0
    def __init__(self,
                 specimens: sl.SpecimenLoader,
                 layer: int = -1,
                 relativeToIP: bool = False,
                 failOnUndissectable=True,
                 debug=False):
        self.specimens = specimens
        self.messages = specimens.messagePool  # type: OrderedDict[AbstractMessage, netzob.RawMessage]
        """:type messages: OrderedDict[AbstractMessage, RawMessage]"""
        self.baselayer = specimens.getBaseLayerOfPCAP()
        self.debug = debug

        # Cache messages that already have been parsed and labeled
        self._messageCache = dict()  # type: Dict[netzob.RawMessage, ]
        self._targetlayer = layer
        self._relativeToIP = relativeToIP
        self._failOnUndissectable = failOnUndissectable

        self._dissections = self._dissectAndLabel(self.messages.values())
예제 #2
0
        '(typically the payload of a transport protocol).')
    parser.add_argument('-r',
                        '--relativeToIP',
                        default=False,
                        action='store_true')
    args = parser.parse_args()
    if not isfile(args.pcapfilename):
        print('File not found: ' + args.pcapfilename)
        exit(1)

    import time
    swstart = time.time()
    print('\nLoading ...')

    specimens = SpecimenLoader(args.pcapfilename,
                               layer=args.layer,
                               relativeToIP=args.relativeToIP)
    comparator = MessageComparator(specimens,
                                   layer=args.layer,
                                   relativeToIP=args.relativeToIP,
                                   failOnUndissectable=False,
                                   debug=debug)
    print('Loaded and dissected in {:.3f}s'.format(time.time() - swstart))

    print('\nNetzob Inference ...')
    # dict ( similaritythreshold : dict ( symbol : (quality, fieldcount, exactf, nearf, uospecific) ) )
    if args.smin:
        minThresh = args.smin
        maxThresh = args.smax if args.smax else args.smin
    threshSymbTfmtTime = iterSimilarities(minThresh, maxThresh)
    threshSymbTfmt = {t: s for t, (s, r) in threshSymbTfmtTime.items()}
예제 #3
0
if __name__ == '__main__':
    parser = ArgumentParser(
        description='Dissect PCAP with tshark and parse to python.')
    parser.add_argument('pcapfilename', help='pcapfilename')
    parser.add_argument('-l', '--targetlayer', type=int)
    parser.add_argument('-r',
                        '--relativeToIP',
                        default=False,
                        action='store_true')
    args = parser.parse_args()
    if not isfile(args.pcapfilename):
        print('File not found: ' + args.pcapfilename)
        exit(1)
    if args.targetlayer:
        specimens = SpecimenLoader(args.pcapfilename, args.targetlayer,
                                   args.relativeToIP)
    else:
        specimens = SpecimenLoader(args.pcapfilename)
    pkt = list(specimens.messagePool.values())

    st = time.time()

    ###########################
    # Performance tests, comparing the dissection of one message at a time by the static parseMultiple method,
    # the ParsedMessage constructor and finally the dissection of a batch of all messages.
    ###########################
    # # Single messages with ParsedMessage.parseMultiple test:   Dissection ran in 63.48 seconds.
    # pms = dict()
    # for p in pkt:
    #     pms.update(ParsedMessage.parseMultiple([p]))
    # pms = list(pms.values())
예제 #4
0
def cacheAndLoadDC(pcapfilename: str, analysisTitle: str, tokenizer: str, debug: bool,
                   analyzerType: type, analysisArgs: Tuple=None, sigma: float=None, filterTrivial=False,
                   refinementCallback:Union[Callable, None] = refinements,
                   disableCache=False) \
        -> Tuple[SpecimenLoader, MessageComparator, List[Tuple[MessageSegment]], DistanceCalculator,
        float, float]:
    """
    cache or load the DistanceCalculator to or from the filesystem


    :param filterTrivial: Filter out **one-byte** segments and such just consisting of **zeros**.
    :param disableCache: When experimenting with distances manipulation, deactivate caching!
    :return:
    """
    pcapbasename = os.path.basename(pcapfilename)
    # if refinementCallback == pcaMocoRefinements:
    #     sigma = pcamocoSigmapertrace[pcapbasename] if not sigma and pcapbasename in pcamocoSigmapertrace else \
    #         0.9 if not sigma else sigma
    # else:
    sigma = sigmapertrace[pcapbasename] if not sigma and pcapbasename in sigmapertrace else \
        0.9 if not sigma else sigma
    pcapName = os.path.splitext(pcapbasename)[0]
    # noinspection PyUnboundLocalVariable
    tokenparm = tokenizer if tokenizer != "nemesys" else \
        "{}{:.0f}".format(tokenizer, sigma * 10)
    dccachefn = os.path.join(
        cacheFolder, 'cache-dc-{}-{}-{}-{}-{}.{}'.format(
            analysisTitle, tokenparm, "filtered" if filterTrivial else "all",
            refinementCallback.__name__
            if refinementCallback is not None else "raw", pcapName, 'ddc'))
    # dccachefn = 'cache-dc-{}-{}-{}.{}'.format(analysisTitle, tokenizer, pcapName, 'dc')
    if disableCache or not os.path.exists(dccachefn):
        # dissect and label messages
        print("Load messages from {}...".format(pcapName))
        specimens = SpecimenLoader(pcapfilename, 2, True)
        comparator = MessageComparator(specimens, 2, True, debug=debug)

        print("Segmenting messages...", end=' ')
        segmentationTime = time.time()
        # select tokenizer by command line parameter
        if tokenizer == "tshark":
            # 1. segment messages according to true fields from the labels
            segmentedMessages = annotateFieldTypes(analyzerType, analysisArgs,
                                                   comparator)
        elif tokenizer == "4bytesfixed":
            # 2. segment messages into fixed size chunks for testing
            segmentedMessages = segmentsFixed(4, comparator, analyzerType,
                                              analysisArgs)
        elif tokenizer == "nemesys":
            # 3. segment messages by NEMESYS
            segmentsPerMsg = bcDeltaGaussMessageSegmentation(specimens, sigma)

            # get analyzer requested by analyzerType/analysisArgs
            segmentedMessages = [[
                MessageSegment(
                    MessageAnalyzer.findExistingAnalysis(
                        analyzerType, MessageAnalyzer.U_BYTE, seg.message,
                        analysisArgs), seg.offset, seg.length) for seg in msg
            ] for msg in segmentsPerMsg]

            if refinementCallback is not None:
                if refinementCallback.__code__.co_argcount > 1:
                    # assume the second argument is expected to be a distance calculator
                    chainedSegments = list(
                        chain.from_iterable(segmentedMessages))
                    print("Refinement: Calculate distance for {} segments...".
                          format(len(chainedSegments)))
                    if len(chainedSegments)**2 > MemmapDC.maxMemMatrix:
                        refinementDC = MemmapDC(chainedSegments)
                    else:
                        refinementDC = DelegatingDC(chainedSegments)
                    segmentedMessages = refinementCallback(
                        segmentedMessages, refinementDC)
                else:
                    segmentedMessages = refinementCallback(segmentedMessages)

            # segments = list(chain.from_iterable(segmentedMessages))

        segmentationTime = time.time() - segmentationTime
        print("done.")

        if filterTrivial:
            # noinspection PyUnboundLocalVariable
            chainedSegments = [
                seg for seg in chain.from_iterable(segmentedMessages)
                if seg.length > 1 and set(seg.values) != {0}
            ]
        else:
            # noinspection PyUnboundLocalVariable
            chainedSegments = list(chain.from_iterable(segmentedMessages))

        print("Calculate distance for {} segments...".format(
            len(chainedSegments)))
        # dc = DistanceCalculator(chainedSegments, reliefFactor=0.33)  # Pairwise similarity of segments: dc.distanceMatrix
        dist_calc_segmentsTime = time.time()
        if len(chainedSegments)**2 > MemmapDC.maxMemMatrix:
            dc = MemmapDC(chainedSegments)
        else:
            dc = DelegatingDC(chainedSegments)
        assert chainedSegments == dc.rawSegments
        dist_calc_segmentsTime = time.time() - dist_calc_segmentsTime
        try:
            with open(dccachefn, 'wb') as f:
                pickle.dump((segmentedMessages, comparator, dc), f,
                            pickle.HIGHEST_PROTOCOL)
        except MemoryError as e:
            print("DC could not be cached due to a MemoryError. Removing",
                  dccachefn, "and continuing.")
            os.remove(dccachefn)
    else:
        print("Load distances from cache file {}".format(dccachefn))
        with open(dccachefn, 'rb') as f:
            segmentedMessages, comparator, dc = pickle.load(f)
        if not (isinstance(comparator, MessageComparator)
                and isinstance(dc, DistanceCalculator)):
            print('Loading of cached distances failed.')
            exit(10)
        specimens = comparator.specimens
        # chainedSegments = list(chain.from_iterable(segmentedMessages))
        segmentationTime, dist_calc_segmentsTime = None, None

    return specimens, comparator, segmentedMessages, dc, segmentationTime, dist_calc_segmentsTime