def count_lines(files): files_processed = 0 error_files = 0 total_lines = 0 total_lines_tagged = 0 category_totals = {} for f in files: try: lines = run_parser(f) files_processed += 1 except Exception as e: error_files += 1 print(f + " --> " + str(e)) continue #print("File " + f + " has " + str(len(lines)) + " lines") for line in lines: total_lines += 1 if line.categories or line.context: total_lines_tagged += 1 for category in line.categories: if category not in category_totals.keys(): category_totals[category] = 0 category_totals[category] += 1 print("Processed " + str(files_processed) + " files, skipping " + str(error_files) + " files due to errors") print(" ============================") print("Total lines:", total_lines) print("Total lines with tags:", total_lines_tagged) print("====== Category totals ======") for category in category_totals.keys(): print(category + " => " + str( category_totals[category] ))
def read_dir(directory): documents = [] for root, dirs, files in os.walk(directory): for file in files: full_path = root + '/' + file lines = run_parser(full_path) doc = Document(full_path, lines) documents.append(doc) return documents