def read_dpi_logs(domain_patterns, golden_config_dict, analyzer_conf_dict, mdreq, device_map_name):
    headers_list = []
    global cmds_list
    global header_cols
    global comp
    global fields_old
    global TOTAL_COLS
    num_lines = 0
    filename = None
    try:
        fire_pbr_for_cacheable_url = analyzer_conf_dict["fire_pbr_for_cacheable_url"].strip().lower()
    except KeyError as e:
        fire_pbr_for_cacheable_url = "no"
    MAX_FILTER_RULES_NODE = "/nkn/device_map/config/" + device_map_name + "/filter_config/max_rules"
    try:
        max_filter_rules = mdreq.query(MAX_FILTER_RULES_NODE)
    except KeyError as e:
        dpiloganalyzer_log.error("The filter_config/max_rules node is not available. Defauling to max-rules of 10000")
        max_filter_rules = 10000

        # Look for files starting with dpilog_ keyword and with a number as extension
        # Sort the accesslog files based on the timestamp. Latest files will be accessed first
    dpi_logfiles = sorted(glob.glob("dpilog_*.[0-9]*"), key=lambda x: os.path.getmtime(x), reverse=True)
    for filename in dpi_logfiles:
        # print filename
        with open(filename, "a+") as fp:
            # Check if the file has already been processed by looking for the signature in the last line
            fp.seek(-9, os.SEEK_END)
            if fp.read().strip() != header.SIGNATURE:
                dpiloganalyzer_log.info("Reading the file %s" % filename)
                # New file so go to the begining of the file
                fp.seek(0, 0)
                # Read the fields format line
                line = fp.readline()
                # Loop through till we hit the 'Fields' line in the log file
                while line != None and not "#Fields" in line:
                    line = fp.readline()
                    # If the '#Fields' is not present in the log file then skip that file
                if line == None:
                    dpiloganalyzer_log.info("The '#Fields' line not present in the accesslog.Skipping the file")
                    continue
                    # Generate the http header to column mapping
                fields_new = line
                # Check if the format has changed while reading the files
                if fields_new != fields_old:
                    fields_old = fields_new
                    # If the accesslog file does not have all the mandatroy http  headers then skip that file
                    http_headers = line[8:].strip()
                    for items in http_headers.split():
                        if items[0] == '"' or items[0] == "'":
                            headers_list.append(items[1:-1])
                        else:
                            headers_list.append(items)
                    if not isMandatoryHttpHeadersPresent(headers_list):
                        dpiloganalyzer_log.info("One or more of the mandatory http headers is missing in the dpilog")
                        continue
                        # Generate the http header to column mapping
                    header_cols = gen_utils.generate_headers_column_mapping(headers_list)
                    TOTAL_COLS = len(headers_list)
                    del headers_list[:]
                num_lines = 0
                for line in fp:
                    list = comp.findall(line)
                    # If the total columns in the log don't match the TOTAL_COLS just continue
                    if len(list) != TOTAL_COLS:
                        continue
                        # If the Host header is not there in the log skip that line
                    host = list[header_cols["cs_Host_"]]
                    if host == "-":
                        continue
                        # If the host header matches with the domain pattern and the uri matches the uri pattern
                        # of the golden config then the pbr should be fired
                        # result = domain_regex_pattern.search(list[header_cols['cs_Host_']])
                    result = is_golden_config_match(golden_config_dict, list, line)
                    num_lines += 1
                    if result or (
                        fire_pbr_for_cacheable_url == "yes" and isCacheable(line, list, domain_patterns, num_lines)
                    ):
                        # Get the dest-ip from the log
                        destip = list[header_cols["s_ip"]]
                        # Append only the unique dest-ip to the list
                        if not destip in dest_ips:
                            # This way of appending in list is faster than the call to 'append'
                            dest_ips[destip] = 1
                            set_str = "set policy-options prefix-list redirect-to-proxy %s" % (destip)
                            cmds_list.append(set_str)
                        else:
                            dest_ips[destip] += 1
                fp.write(header.SIGNATURE)
                try:
                    if len(cmds_list) >= int(max_filter_rules):
                        sendPbrs(cmds_list)
                except ValueError as e:
                    dpiloganalyzer_log.info(
                        "Integer value has to be specified for max_filter_rules in the analyzer.conf. Defaulting to 10000"
                    )
                    max_filter_rules = 10000
                    if len(cmds_list) >= int(max_filter_rules):
                        sendPbrs(cmds_list)
            else:
                continue
        fp.close()
示例#2
0
def parse_mfc_accesslog(generic_namespace_name, analyzer_conf_dict, mfc_accesslog_path):
	namespace_dict =dict()
	filename = None
	global fields_old
	global num_files_read
	global header_cols
	#Sort the accesslog files based on the timestamp. Latest files will be read first
	accesslog_files = sorted(glob.glob('*access.log*.gz'), key=lambda x: os.path.getmtime(x), reverse=True)
	#Loop through all the accesslog files
	for filename in accesslog_files:
		#print filename
		#Check if the accesslog file is already processessed
		if isFileAlreadyProcessessed(filename):
			continue
		mfcloganalyzer_log.info("Reading the file %s"%filename)
		fp = gen_utils.zlib_file()
		start_time = time.time()
		fp.open(filename)
		line = fp.readline()
		#Loop through till we hit the 'Fields' line in the log file
		while line!= None and not '#Fields' in line:
			line = fp.readline()
		#If the '#Fields' is not present in the log file then skip that file
		if line == None:
			mfcloganalyzer_log.info("The '#Fields' line not present in the accesslog.Skipping the file")
			continue
		#Generate the http header to column mapping
		fields_new = line
		if fields_new != fields_old:
			fields_old = fields_new
			#If the accesslog file does not have all the mandatroy http  headers then skip that file
			http_headers = line[8:].strip()
			headers_list =  http_headers.split()
			if not isMandatoryHttpHeadersPresent(headers_list):
				mfcloganalyzer_log.info("One of the mandatory http headers namespace or server-ip is missing in the accesslog")
				continue
			header_cols = gen_utils.generate_headers_column_mapping(headers_list)
			TOTAL_COLS = len(headers_list)
		num_files_read += 1
		num_lines = 0
		#Now read the rest of the mfc accesslog file line by line
		while line:
			line = fp.readline()
			#The line is empty, just skip it.
			if line == None:
				continue
			line = line.strip()
			#The line is commented, just skip it.
			if len(line) > 0 and line[0] == '#':
				continue
			#Find all the words with Space as delimiter
			#All words with whitespace within double or single quotes  are consisdered one word
			lst =  comp.findall(line)
			#Skip the tunneled data
			if len(lst) == 0 or 'Tunnel' in lst[0]:
				continue
			#If the number of columns logged don't match the number of http headers skip that line
			#Log only for the first line as you don't want to fill the log files
			if len(lst) !=  TOTAL_COLS:
				num_lines += 1
				if num_lines == 1:
					mfcloganalyzer_log.info("%s", line)
					mfcloganalyzer_log.info("# of hdrs in 'Fields' doesn't match # of hdrs generated, hdrs with whitespaces needs to be quoted")
				continue
			namespace = lst[header_cols['x_namespace']]
			dest_ip = lst[header_cols['s_ip']]
			#If the namespace name is not equal to the generic namespace then store the namespace name
			#as a key and the list of server-ip's as the values in a dictionary
			if namespace != generic_namespace_name and namespace != '-':
				#Check if the namespace does not already exist in the dictionary
				if namespace in namespace_dict:
					server_ip = namespace_dict[namespace]
					#If the dest ip is not already there in the list then append it
					if dest_ip not in server_ip:
						server_ip.append(dest_ip)
				else:
					#New namespace so just add it and create a list of dest ip's
					namespace_dict[namespace] = [dest_ip]
		fp.close()
		if str(num_files_read) == analyzer_conf_dict['no_of_accesslog_files_read']:
			#print "SENDING THE FILE"
			num_files_read = 0
			send_cmd_on_threshold_limit(namespace_dict)
		#Persist with the checksum dictionary entries
		with open('log_analyzer_checksum_dict.pickle', 'wb') as f:
			pickle.dump(checksum_dict, f)
		#mfcloganalyzer_log.info("%s seconds", (time.time() - start_time))
		#print time.time() - start_time, "seconds"
	return 0
示例#3
0
def read_dpi_logs(domain_patterns, golden_config_dict, analyzer_conf_dict,
                  mdreq, device_map_name):
    headers_list = []
    global cmds_list
    global header_cols
    global comp
    global fields_old
    global TOTAL_COLS
    num_lines = 0
    filename = None
    try:
        fire_pbr_for_cacheable_url = analyzer_conf_dict[
            'fire_pbr_for_cacheable_url'].strip().lower()
    except KeyError as e:
        fire_pbr_for_cacheable_url = 'no'
    MAX_FILTER_RULES_NODE = '/nkn/device_map/config/' + device_map_name + '/filter_config/max_rules'
    try:
        max_filter_rules = mdreq.query(MAX_FILTER_RULES_NODE)
    except KeyError as e:
        dpiloganalyzer_log.error(
            "The filter_config/max_rules node is not available. Defauling to max-rules of 10000"
        )
        max_filter_rules = 10000

    #Look for files starting with dpilog_ keyword and with a number as extension
    #Sort the accesslog files based on the timestamp. Latest files will be accessed first
    dpi_logfiles = sorted(glob.glob('dpilog_*.[0-9]*'),
                          key=lambda x: os.path.getmtime(x),
                          reverse=True)
    for filename in dpi_logfiles:
        #print filename
        with open(filename, 'a+') as fp:
            #Check if the file has already been processed by looking for the signature in the last line
            fp.seek(-9, os.SEEK_END)
            if fp.read().strip() != header.SIGNATURE:
                dpiloganalyzer_log.info("Reading the file %s" % filename)
                #New file so go to the begining of the file
                fp.seek(0, 0)
                #Read the fields format line
                line = fp.readline()
                #Loop through till we hit the 'Fields' line in the log file
                while line != None and not '#Fields' in line:
                    line = fp.readline()
                #If the '#Fields' is not present in the log file then skip that file
                if line == None:
                    dpiloganalyzer_log.info(
                        "The '#Fields' line not present in the accesslog.Skipping the file"
                    )
                    continue
                #Generate the http header to column mapping
                fields_new = line
                #Check if the format has changed while reading the files
                if fields_new != fields_old:
                    fields_old = fields_new
                    #If the accesslog file does not have all the mandatroy http  headers then skip that file
                    http_headers = line[8:].strip()
                    for items in http_headers.split():
                        if items[0] == '"' or items[0] == '\'':
                            headers_list.append(items[1:-1])
                        else:
                            headers_list.append(items)
                    if not isMandatoryHttpHeadersPresent(headers_list):
                        dpiloganalyzer_log.info(
                            "One or more of the mandatory http headers is missing in the dpilog"
                        )
                        continue
                    #Generate the http header to column mapping
                    header_cols = gen_utils.generate_headers_column_mapping(
                        headers_list)
                    TOTAL_COLS = len(headers_list)
                    del headers_list[:]
                num_lines = 0
                for line in fp:
                    list = comp.findall(line)
                    #If the total columns in the log don't match the TOTAL_COLS just continue
                    if len(list) != TOTAL_COLS:
                        continue
                    #If the Host header is not there in the log skip that line
                    host = list[header_cols['cs_Host_']]
                    if host == '-':
                        continue
                    #If the host header matches with the domain pattern and the uri matches the uri pattern
                    #of the golden config then the pbr should be fired
                    #result = domain_regex_pattern.search(list[header_cols['cs_Host_']])
                    result = is_golden_config_match(golden_config_dict, list,
                                                    line)
                    num_lines += 1
                    if result or (fire_pbr_for_cacheable_url == 'yes'
                                  and isCacheable(line, list, domain_patterns,
                                                  num_lines)):
                        #Get the dest-ip from the log
                        destip = list[header_cols['s_ip']]
                        #Append only the unique dest-ip to the list
                        if not destip in dest_ips:
                            #This way of appending in list is faster than the call to 'append'
                            dest_ips[destip] = 1
                            set_str = 'set policy-options prefix-list redirect-to-proxy %s' % (
                                destip)
                            cmds_list.append(set_str)
                        else:
                            dest_ips[destip] += 1
                fp.write(header.SIGNATURE)
                try:
                    if len(cmds_list) >= int(max_filter_rules):
                        sendPbrs(cmds_list)
                except ValueError as e:
                    dpiloganalyzer_log.info(
                        "Integer value has to be specified for max_filter_rules in the analyzer.conf. Defaulting to 10000"
                    )
                    max_filter_rules = 10000
                    if len(cmds_list) >= int(max_filter_rules):
                        sendPbrs(cmds_list)
            else:
                continue
        fp.close()