def compute_observation_probabilities(self): print("[++++++++] Compute probabilities of observation constraints") messages_aligned = Alignment.get_messages_aligned( self.messages, os.path.join(self.output_dir, Alignment.FILENAME_OUTPUT_ONELINE)) messages_request, messages_response = Processing.divide_msgs_by_directionlist( self.messages, self.direction_list) messages_request_aligned, messages_response_aligned = Processing.divide_msgs_by_directionlist( messages_aligned, self.direction_list) fid_list_request = self.filter_fields(self.fields, self.fid_list, messages_request_aligned) fid_list_response = self.filter_fields(self.fields, self.fid_list, messages_response_aligned) logging.debug( "request candidate fid: {}\nresponse candidate fid: {}".format( fid_list_request, fid_list_response)) # compute matrix of similarity scores constraint_m_request, constraint_m_response = MessageSimilarity( messages=messages_request_aligned), MessageSimilarity( messages=messages_response_aligned) constraint_m_request.compute_similarity_matrix() constraint_m_response.compute_similarity_matrix() # the observation prob of each cluster: {fid: the list of observation probabilities ([pm,ps,pd,pv])} cluster_p_request, cluster_p_response = dict(), dict() # the size of each cluster cluster_size_request, cluster_size_response = dict(), dict() # the observation prob of each cluster pair: {fid-fid: [,]} pairs_p_request, pairs_p_response = dict(), dict() pairs_size_request, pairs_size_response = dict(), dict() for fid_request in fid_list_request: logging.info("[++++] Test Request Field {0}-*".format(fid_request)) # merge other fields fields_merged_request = self.merge_nontest_fields( self.fields, fid_request) fid_merged_request = 0 if fid_request == 0 else 1 # generate clusters symbols_request_aligned = self.cluster_by_field( fields_merged_request, messages_request_aligned, fid_merged_request) # change symbol names symbols_request_aligned = self.change_symbol_name( symbols_request_aligned) # compute prob of m,s,d,v cluster_p_request[fid_request] = list() cluster_p_request[fid_request].append( constraint_m_request.compute_constraint_message_similarity( symbols_request_aligned)) cluster_p_request[fid_request].append( self.compute_constraint_structure(symbols_request_aligned)) cluster_p_request[fid_request].append( self.compute_constraint_dimension(symbols_request_aligned)) cluster_p_request[fid_request].append( self.compute_constraint_value(symbols_request_aligned)) cluster_size_request[fid_request] = [ len(s.messages) for s in symbols_request_aligned.values() ] for fid_response in fid_list_response: #if fid_request != fid_response: # continue logging.debug("[++] Test Response Field {0}-{1}".format( fid_request, fid_response)) # merge other fields fields_merged_response = self.merge_nontest_fields( self.fields, fid_response) fid_merged_response = 0 if fid_response == 0 else 1 # generate clusters symbols_response_aligned = self.cluster_by_field( fields_merged_response, messages_response_aligned, fid_merged_response) # change symbol names symbols_response_aligned = self.change_symbol_name( symbols_response_aligned) # compute prob of m,s,d,v if fid_response not in cluster_p_response: cluster_p_response[fid_response] = list() cluster_p_response[fid_response].append( constraint_m_response. compute_constraint_message_similarity( symbols_response_aligned)) cluster_p_response[fid_response].append( self.compute_constraint_structure( symbols_response_aligned)) cluster_p_response[fid_response].append( self.compute_constraint_dimension( symbols_response_aligned)) cluster_p_response[fid_response].append( self.compute_constraint_value( symbols_response_aligned)) cluster_size_response[fid_response] = [ len(s.messages) for s in symbols_response_aligned.values() ] # print msg numbers of each cluster logging.debug("Number of request symbols: {0}".format( len(symbols_request_aligned.values()))) for s in symbols_request_aligned.values(): logging.debug(" Symbol {0} msgs numbers: {1}".format( str(s.name), len(s.messages))) logging.debug("Number of response symbols: {0}".format( len(symbols_response_aligned.values()))) for s in symbols_response_aligned.values(): logging.debug(" Symbol {0} msgs numbers: {1}".format( str(s.name), len(s.messages))) # compute remote coupling probabilities rc = RemoteCoupling(messages_all=messages_aligned, symbols_request=symbols_request_aligned, symbols_response=symbols_response_aligned, direction_list=self.direction_list) rc.compute_pairs_by_directionlist() fid_pair = "{}-{}".format(fid_request, fid_response) p_r_request = rc.compute_constraint_remote_coupling( RemoteCoupling.TEST_TYPE_REQUEST) p_r_response = rc.compute_constraint_remote_coupling( RemoteCoupling.TEST_TYPE_RESPONSE) logging.debug( "[+] Observation Prob Results for pairs {}".format( fid_pair)) p_m, p_s, p_d, p_v = cluster_p_request[fid_request][ 0], cluster_p_request[fid_request][1], cluster_p_request[ fid_request][2], cluster_p_request[fid_request][3] logging.debug( "Request:\nPm: {0}\nPr: {1}\nPs: {2}\nPd: {3}\nPv: {4}". format(p_m, p_r_request, p_s, p_d, p_v)) pairs_p_request[fid_pair] = [p_m, p_r_request, p_s, p_d, p_v] pairs_size_request[fid_pair] = cluster_size_request[ fid_request] p_m, p_s, p_d, p_v = cluster_p_response[fid_response][ 0], cluster_p_response[fid_response][ 1], cluster_p_response[fid_response][ 2], cluster_p_response[fid_response][3] logging.debug( "Response:\nPm: {0}\nPr: {1}\nPs: {2}\nPd: {3}\nPv: {4}". format(p_m, p_r_response, p_s, p_d, p_v)) pairs_p_response[fid_pair] = [p_m, p_r_response, p_s, p_d, p_v] pairs_size_response[fid_pair] = cluster_size_response[ fid_response] del rc del symbols_response_aligned #symbols del fields_merged_response gc.collect() del symbols_request_aligned del fields_merged_request gc.collect() pairs_p = [pairs_p_request, pairs_p_response] pairs_size = [pairs_size_request, pairs_size_response] return pairs_p, pairs_size
parser.add_argument('-t', '--type', dest='protocol_type', help='type of the protocol (for generating the ground truth): \ dhcp, dnp3, icmp, modbus, ntp, smb, smb2, tftp, zeroaccess') parser.add_argument('-o', '--output_dir', dest='output_dir', default='tmp_netplier/', help='output directory') parser.add_argument('-l', '--layer', dest='layer', default=5, type=int, help='the layer of the protocol') parser.add_argument('-m', '--mafft', dest='mafft_mode', default='ginsi', help='the mode of mafft: [ginsi, linsi, einsi]') parser.add_argument('-mt', '--multithread', dest='multithread', default=False, action='store_true', help='run mafft with multi threads') args = parser.parse_args() p = Processing(filepath=args.filepath_input, protocol_type=args.protocol_type, layer=args.layer) # p.print_dataset_info() mode = args.mafft_mode if args.protocol_type in['dnp3']: # tftp mode = 'linsi' netplier = NetPlier(messages=p.messages, direction_list=p.direction_list, output_dir=args.output_dir, mode=mode, multithread=args.multithread) fid_inferred = netplier.execute() # Clustering messages_aligned = Alignment.get_messages_aligned(netplier.messages, os.path.join(netplier.output_dir, Alignment.FILENAME_OUTPUT_ONELINE)) messages_request, messages_response = Processing.divide_msgs_by_directionlist(netplier.messages, netplier.direction_list) messages_request_aligned, messages_response_aligned = Processing.divide_msgs_by_directionlist(messages_aligned, netplier.direction_list) clustering = Clustering(fields=netplier.fields, protocol_type=args.protocol_type) clustering_result_request_true = clustering.cluster_by_kw_true(messages_request) clustering_result_response_true = clustering.cluster_by_kw_true(messages_response) clustering_result_request_netplier = clustering.cluster_by_kw_inferred(fid_inferred, messages_request_aligned) clustering_result_response_netplier = clustering.cluster_by_kw_inferred(fid_inferred, messages_response_aligned) clustering.evaluation([clustering_result_request_true, clustering_result_response_true], [clustering_result_request_netplier, clustering_result_response_netplier])