def bc_get_data_from_feature_file(self, feature_file): data=[] linenum = 0 with open(feature_file) as file_: for line in file_: if bc_utils.is_comment_line(line): continue linenum+=1 if (re.match("Total",line) or \ re.match("Unicode Encode Errors",line) or \ re.match("Unicode Decode Errors", line)): continue data += [line[:-1].split('\t')] lendir = len(PdfReport.annotated_dir) + 1 + 10 # Adding /annotated feat_filename = feature_file[lendir:-4] # Config file sets the maxlines to 0 to report all the lines if (PdfReport.bc_config_feature_lines[feat_filename] != 0): if (linenum >= PdfReport.bc_config_feature_lines[feat_filename]): # Lines reached max: Breaking break return data
def bc_process_textfile(fn, image_fileinfo, image_info, FiwalkReport): ifd = open(fn.fiwalk_txtfile, "r") linenum = 0 for line in ifd: linenum = linenum + 1 if is_comment_line(line): continue # eliminate comments # Add the info to the dictionary make_dict(linenum, line, image_fileinfo, image_info, FiwalkReport) # First, form the Dictionary of formats found in this fiwalk output file. get_file_info(line, FiwalkReport) # Write collected statistics to output file FiwalkReport.bc_generate_fiwalk_reports(FiwalkReport, fn)
def bc_parse_config_file(PdfReport, FiwalkReport, config_file): ifd = open(config_file,"r") # Clone the static dictionary of file-formats to start with # NOTE: The code is retained for future work on letting the user # configure the format files. As of now, all format files are # generated, but the user can limit the number of these files by setting # S:MAX_FILE_FORMAT_FILES_TO_REPORT:20 PdfReport.bc_config_filefmt_files = FiwalkReport.dictFileFmtStatic.copy() # Initialize all the values to 0 for x, y in PdfReport.bc_config_filefmt_files.items(): PdfReport.bc_config_filefmt_files[x] = 0 # By default, report special files PdfReport.bc_config_report_special_files = True ## print("D:config_filefmt: ", PdfReport.bc_config_filefmt_files) for line in ifd: if bc_utils.is_comment_line(line): continue line1 = re.split(":", line) # Set the flag for the particular feature to 1 indicating # the user wants to see the report for this feature. if line1[0] == 'L': ## Logo print("Overwriting Logo with", line1[1]) PdfReport.logo = line1[1] elif line1[0] == 'F': ## Test if line1[1] is a legitimate feature> if line1[1] in PdfReport.bc_config_feature: PdfReport.bc_config_feature[line1[1]] = 1 # if the third field is 0, print all the lines in the # feature file. Otherwise print what the number says. if line1[2] == '\n': lines = 0 else: lines = line1[2] PdfReport.bc_config_feature_lines[line1[1]] = int(lines) else: print("Info: Feature %s does NOT exist" % line1[1]) elif line1[0] == 'R': ## Test if line1[1] is a legitimate report file if line1[1] in PdfReport.bc_config_report_files: PdfReport.bc_config_report_files[line1[1]] = 1 PdfReport.bc_config_report_lines[line1[1]] = int(line1[2]) ## print("D: Reporting %d lines for file %s" \ ## %(PdfReport.bc_config_report_lines[line1[1]], line1[1])) else: print("Info: Report file %s is not legitimate" % line1[1]) print("Info: Fix the config file") elif line1[0] == 'M': # Get the file format file_format = line1[1].rstrip('\n') # Get all the files that have this format from the dict. PdfReport.bc_config_filefmt_files[file_format] = 1 elif line1[0] == 'S': # Find out if user wants special files to be reported if line1[1].rstrip() == 'REPORT_SPECIAL_FILES': if line1[2].rstrip() == 'YES': PdfReport.bc_config_report_special_files = True else: # print("D: Not reporting Special files") PdfReport.bc_config_report_special_files = False elif line1[1].rstrip() == 'MAX_LINES_TO_REPORT': PdfReport.bc_max_lines_to_report = int(line1[2]) elif line1[1].rstrip() == 'MAX_FILE_FORMAT_FILES_TO_REPORT': PdfReport.bc_max_fmtfiles_to_report = int(line1[2]) elif line1[1].rstrip() == 'MAX_FEATURE_FILES_TO_REPORT': PdfReport.bc_max_featfiles_to_report = int(line1[2]) elif line1[1].rstrip() == 'MAX_FORMATS_FOR_BAR_GRAPH': PdfReport.bc_max_formats_in_bar_graph = int(line1[2]) elif line1[1].rstrip() == 'FEATURE_OUTPUTS_IN_PDF': PdfReport.bc_feature_output_in_pdf = int(line1[2]) elif line1[0] == 'G': # Regression test parameters if line1[1].rstrip() == 'REGRESS_ANNOTATED_DIR': PdfReport.bc_regr_annotated_dir = line1[2] elif line1[1].rstrip() == 'REGRESS_INPUT_XML_FILE': PdfReport.bc_regr_xml_file = line1[2] elif line1[1].rstrip() == 'REGRESS_OUTDIR': PdfReport.bc_regr_xml_file = line1[2] # For regression test we keep the max formats to be reported to 20 if FiwalkReport.regressionTest == True: PdfReport.bc_max_fmtfiles_to_report = 20
def bc_parse_config_file(PdfReport, config_file): ifd = open(config_file,"r") # Clone the static dictionary of file-formats to start with # NOTE: The code is retained for future work on letting the user # configure the format files. As of now, all format files are # generated, but the user can limit the number of these files by setting # S:MAX_FILE_FORMAT_FILES_TO_REPORT:20 PdfReport.bc_config_filefmt_files = FiwalkReport.dictFileFmtStatic.copy() # Initialize all the values to 0 for x, y in PdfReport.bc_config_filefmt_files.items(): PdfReport.bc_config_filefmt_files[x] = 0 # By default, report special files PdfReport.bc_config_report_special_files = True ## print("D:config_filefmt: ", PdfReport.bc_config_filefmt_files) for line in ifd: if bc_utils.is_comment_line(line): continue line1 = re.split(":", line) # Set the flag for the particular feature to 1 indicating # the user wants to see the report for this feature. if line1[0] == 'L': ## Logo print("Overwriting Logo with", line1[1]) PdfReport.logo = line1[1] elif line1[0] == 'F': ## Test if line1[1] is a legitimate feature> if line1[1] in PdfReport.bc_config_feature: PdfReport.bc_config_feature[line1[1]] = 1 # if the third field is 0, print all the lines in the # feature file. Otherwise print what the number says. if line1[2] == '\n': lines = 0 else: lines = line1[2] PdfReport.bc_config_feature_lines[line1[1]] = int(lines) else: print("Info: Feature %s does NOT exist" % line1[1]) elif line1[0] == 'R': ## Test if line1[1] is a legitimate report file if line1[1] in PdfReport.bc_config_report_files: PdfReport.bc_config_report_files[line1[1]] = 1 PdfReport.bc_config_report_lines[line1[1]] = int(line1[2]) ## print("D: Reporting %d lines for file %s" \ ## %(PdfReport.bc_config_report_lines[line1[1]], line1[1])) else: print("Info: Report file %s is not legitimate" % line1[1]) print("Info: Fix the config file") elif line1[0] == 'M': # Get the file format file_format = line1[1].rstrip('\n') # Get all the files that have this format from the dict. PdfReport.bc_config_filefmt_files[file_format] = 1 elif line1[0] == 'S': # Find out if user wants special files to be reported if line1[1].rstrip() == 'REPORT_SPECIAL_FILES': if line1[2].rstrip() == 'YES': PdfReport.bc_config_report_special_files = True else: print("D: Not reporting Special files") PdfReport.bc_config_report_special_files = False elif line1[1].rstrip() == 'MAX_LINES_TO_REPORT': PdfReport.bc_max_lines_to_report = int(line1[2]) elif line1[1].rstrip() == 'MAX_FILE_FORMAT_FILES_TO_REPORT': PdfReport.bc_max_fmtfiles_to_report = int(line1[2]) elif line1[1].rstrip() == 'MAX_FEATURE_FILES_TO_REPORT': PdfReport.bc_max_featfiles_to_report = int(line1[2]) elif line1[1].rstrip() == 'MAX_FORMATS_FOR_BAR_GRAPH': PdfReport.bc_max_formats_in_bar_graph = int(line1[2]) elif line1[1].rstrip() == 'FEATURE_OUTPUTS_IN_PDF': PdfReport.bc_feature_output_in_pdf = int(line1[2]) elif line1[0] == 'G': # Regression test parameters if line1[1].rstrip() == 'REGRESS_ANNOTATED_DIR': PdfReport.bc_regr_annotated_dir = line1[2] elif line1[1].rstrip() == 'REGRESS_INPUT_XML_FILE': PdfReport.bc_regr_xml_file = line1[2] elif line1[1].rstrip() == 'REGRESS_OUTDIR': PdfReport.bc_regr_xml_file = line1[2] # For regression test we keep the max formats to be reported to 20 if FiwalkReport.regressionTest == True: PdfReport.bc_max_fmtfiles_to_report = 20