def record_detect(flow_address, host_stored_address): with open(host_stored_address, 'rb') as host_collector_file: host_collector = cpickle.load(host_collector_file) flow_feature_list = [] start_time = datetime.datetime.now() record_num = 0 with open(flow_address, 'rb') as infile: for line in infile: record = line.strip(' \n').split('\t') if len(record) == 13 and record[_host] != '': current_record = FlowRecord(record) current_model = host_collector.get_host_model( current_record['host']) rfF.calculate(current_record, current_model) flow_feature = [ current_record['path_prop'], current_record['specialSymbol_prop'], current_record['enumeration'], current_record['variable_composition'], current_record['variable_order'], current_record['value_length_prop'], current_record['value_distribution1'], current_record['value_distribution2'] ] flow_feature_list.append(flow_feature) record_num += 1 if record_num % 10000 == 0: print 'Record completed: %s' % record_num end_time = datetime.datetime.now() print 'Calculate Records: %s' % record_num print 'Record Calculated Consuming: %s' % (end_time - start_time) return flow_feature_list
def record_detect(flow_address, host_stored_address): with open(host_stored_address, 'rb') as host_collector_file: host_collector = cpickle.load(host_collector_file) flow_feature_list = [] start_time = datetime.datetime.now() record_num = 0 with open(flow_address, 'rb') as infile: for line in infile: record = line.strip(' \n').split('\t') if len(record) == 13 and record[_host] != '': current_record = FlowRecord(record) current_model = host_collector.get_host_model(current_record['host']) rfF.calculate(current_record, current_model) flow_feature = [ current_record['path_prop'], current_record['specialSymbol_prop'], current_record['enumeration'], current_record['variable_composition'], current_record['variable_order'], current_record['value_length_prop'], current_record['value_distribution1'], current_record['value_distribution2'] ] flow_feature_list.append(flow_feature) record_num += 1 if record_num % 10000 == 0: print 'Record completed: %s' % record_num end_time = datetime.datetime.now() print 'Calculate Records: %s' % record_num print 'Record Calculated Consuming: %s' % (end_time - start_time) return flow_feature_list
def cal_feature(record, host_collector): current_record = FlowRecord(record) current_model = host_collector.get_host_model(current_record['host']) rfF.calculate(current_record, current_model) flow_feature = [ current_record['path_prop'], current_record['specialSymbol_prop'], current_record['enumeration'], current_record['variable_composition'], current_record['variable_order'], current_record['value_length_prop'], current_record['value_distribution1'], current_record['value_distribution2'] ] flow_feature = [str(feature) for feature in flow_feature] return flow_feature