示例#1
0
    def compute_observation_probabilities(self):
        print("[++++++++] Compute probabilities of observation constraints")
        messages_aligned = Alignment.get_messages_aligned(
            self.messages,
            os.path.join(self.output_dir, Alignment.FILENAME_OUTPUT_ONELINE))
        messages_request, messages_response = Processing.divide_msgs_by_directionlist(
            self.messages, self.direction_list)
        messages_request_aligned, messages_response_aligned = Processing.divide_msgs_by_directionlist(
            messages_aligned, self.direction_list)

        fid_list_request = self.filter_fields(self.fields, self.fid_list,
                                              messages_request_aligned)
        fid_list_response = self.filter_fields(self.fields, self.fid_list,
                                               messages_response_aligned)
        logging.debug(
            "request candidate fid: {}\nresponse candidate fid: {}".format(
                fid_list_request, fid_list_response))

        # compute matrix of similarity scores
        constraint_m_request, constraint_m_response = MessageSimilarity(
            messages=messages_request_aligned), MessageSimilarity(
                messages=messages_response_aligned)
        constraint_m_request.compute_similarity_matrix()
        constraint_m_response.compute_similarity_matrix()

        # the observation prob of each cluster: {fid: the list of observation probabilities ([pm,ps,pd,pv])}
        cluster_p_request, cluster_p_response = dict(), dict()
        # the size of each cluster
        cluster_size_request, cluster_size_response = dict(), dict()
        # the observation prob of each cluster pair: {fid-fid: [,]}
        pairs_p_request, pairs_p_response = dict(), dict()
        pairs_size_request, pairs_size_response = dict(), dict()

        for fid_request in fid_list_request:
            logging.info("[++++] Test Request Field {0}-*".format(fid_request))

            # merge other fields
            fields_merged_request = self.merge_nontest_fields(
                self.fields, fid_request)
            fid_merged_request = 0 if fid_request == 0 else 1

            # generate clusters
            symbols_request_aligned = self.cluster_by_field(
                fields_merged_request, messages_request_aligned,
                fid_merged_request)
            # change symbol names
            symbols_request_aligned = self.change_symbol_name(
                symbols_request_aligned)

            # compute prob of m,s,d,v
            cluster_p_request[fid_request] = list()
            cluster_p_request[fid_request].append(
                constraint_m_request.compute_constraint_message_similarity(
                    symbols_request_aligned))
            cluster_p_request[fid_request].append(
                self.compute_constraint_structure(symbols_request_aligned))
            cluster_p_request[fid_request].append(
                self.compute_constraint_dimension(symbols_request_aligned))
            cluster_p_request[fid_request].append(
                self.compute_constraint_value(symbols_request_aligned))
            cluster_size_request[fid_request] = [
                len(s.messages) for s in symbols_request_aligned.values()
            ]

            for fid_response in fid_list_response:
                #if fid_request != fid_response:
                #    continue
                logging.debug("[++] Test Response Field {0}-{1}".format(
                    fid_request, fid_response))

                # merge other fields
                fields_merged_response = self.merge_nontest_fields(
                    self.fields, fid_response)
                fid_merged_response = 0 if fid_response == 0 else 1

                # generate clusters
                symbols_response_aligned = self.cluster_by_field(
                    fields_merged_response, messages_response_aligned,
                    fid_merged_response)
                # change symbol names
                symbols_response_aligned = self.change_symbol_name(
                    symbols_response_aligned)

                # compute prob of m,s,d,v
                if fid_response not in cluster_p_response:
                    cluster_p_response[fid_response] = list()
                    cluster_p_response[fid_response].append(
                        constraint_m_response.
                        compute_constraint_message_similarity(
                            symbols_response_aligned))
                    cluster_p_response[fid_response].append(
                        self.compute_constraint_structure(
                            symbols_response_aligned))
                    cluster_p_response[fid_response].append(
                        self.compute_constraint_dimension(
                            symbols_response_aligned))
                    cluster_p_response[fid_response].append(
                        self.compute_constraint_value(
                            symbols_response_aligned))
                    cluster_size_response[fid_response] = [
                        len(s.messages)
                        for s in symbols_response_aligned.values()
                    ]

                # print msg numbers of each cluster
                logging.debug("Number of request symbols: {0}".format(
                    len(symbols_request_aligned.values())))
                for s in symbols_request_aligned.values():
                    logging.debug("  Symbol {0} msgs numbers: {1}".format(
                        str(s.name), len(s.messages)))
                logging.debug("Number of response symbols: {0}".format(
                    len(symbols_response_aligned.values())))
                for s in symbols_response_aligned.values():
                    logging.debug("  Symbol {0} msgs numbers: {1}".format(
                        str(s.name), len(s.messages)))

                # compute remote coupling probabilities
                rc = RemoteCoupling(messages_all=messages_aligned,
                                    symbols_request=symbols_request_aligned,
                                    symbols_response=symbols_response_aligned,
                                    direction_list=self.direction_list)
                rc.compute_pairs_by_directionlist()
                fid_pair = "{}-{}".format(fid_request, fid_response)
                p_r_request = rc.compute_constraint_remote_coupling(
                    RemoteCoupling.TEST_TYPE_REQUEST)
                p_r_response = rc.compute_constraint_remote_coupling(
                    RemoteCoupling.TEST_TYPE_RESPONSE)

                logging.debug(
                    "[+] Observation Prob Results for pairs {}".format(
                        fid_pair))
                p_m, p_s, p_d, p_v = cluster_p_request[fid_request][
                    0], cluster_p_request[fid_request][1], cluster_p_request[
                        fid_request][2], cluster_p_request[fid_request][3]
                logging.debug(
                    "Request:\nPm: {0}\nPr: {1}\nPs: {2}\nPd: {3}\nPv: {4}".
                    format(p_m, p_r_request, p_s, p_d, p_v))
                pairs_p_request[fid_pair] = [p_m, p_r_request, p_s, p_d, p_v]
                pairs_size_request[fid_pair] = cluster_size_request[
                    fid_request]

                p_m, p_s, p_d, p_v = cluster_p_response[fid_response][
                    0], cluster_p_response[fid_response][
                        1], cluster_p_response[fid_response][
                            2], cluster_p_response[fid_response][3]
                logging.debug(
                    "Response:\nPm: {0}\nPr: {1}\nPs: {2}\nPd: {3}\nPv: {4}".
                    format(p_m, p_r_response, p_s, p_d, p_v))
                pairs_p_response[fid_pair] = [p_m, p_r_response, p_s, p_d, p_v]
                pairs_size_response[fid_pair] = cluster_size_response[
                    fid_response]

                del rc
                del symbols_response_aligned  #symbols
                del fields_merged_response
                gc.collect()
            del symbols_request_aligned
            del fields_merged_request
            gc.collect()

        pairs_p = [pairs_p_request, pairs_p_response]
        pairs_size = [pairs_size_request, pairs_size_response]

        return pairs_p, pairs_size
示例#2
0
文件: main.py 项目: yapengye/NetPlier
    parser.add_argument('-t', '--type', dest='protocol_type', help='type of the protocol (for generating the ground truth): \
        dhcp, dnp3, icmp, modbus, ntp, smb, smb2, tftp, zeroaccess')
    parser.add_argument('-o', '--output_dir', dest='output_dir', default='tmp_netplier/', help='output directory')
    parser.add_argument('-l', '--layer', dest='layer', default=5, type=int, help='the layer of the protocol')
    parser.add_argument('-m', '--mafft', dest='mafft_mode', default='ginsi', help='the mode of mafft: [ginsi, linsi, einsi]')
    parser.add_argument('-mt', '--multithread', dest='multithread', default=False, action='store_true', help='run mafft with multi threads')

    args = parser.parse_args()

    p = Processing(filepath=args.filepath_input, protocol_type=args.protocol_type, layer=args.layer)
    # p.print_dataset_info()
    
    mode = args.mafft_mode
    if args.protocol_type in['dnp3']: # tftp
        mode = 'linsi'
    netplier = NetPlier(messages=p.messages, direction_list=p.direction_list, output_dir=args.output_dir, mode=mode, multithread=args.multithread)
    fid_inferred = netplier.execute()
    
    # Clustering
    messages_aligned = Alignment.get_messages_aligned(netplier.messages, os.path.join(netplier.output_dir, Alignment.FILENAME_OUTPUT_ONELINE))
    messages_request, messages_response = Processing.divide_msgs_by_directionlist(netplier.messages, netplier.direction_list)
    messages_request_aligned, messages_response_aligned = Processing.divide_msgs_by_directionlist(messages_aligned, netplier.direction_list)

    clustering = Clustering(fields=netplier.fields, protocol_type=args.protocol_type)
    clustering_result_request_true = clustering.cluster_by_kw_true(messages_request)
    clustering_result_response_true = clustering.cluster_by_kw_true(messages_response)
    clustering_result_request_netplier = clustering.cluster_by_kw_inferred(fid_inferred, messages_request_aligned)
    clustering_result_response_netplier = clustering.cluster_by_kw_inferred(fid_inferred, messages_response_aligned)
    clustering.evaluation([clustering_result_request_true, clustering_result_response_true], [clustering_result_request_netplier, clustering_result_response_netplier])