def test_name(): filename = "name.txt" name_file = 'tests/test_files/' + filename output_dir = 'tests/test_files/redacted/' main.init_stats(name_file, 0, None) # Get test file content = main.get_file_contents(name_file) # Used to split the file for POS analysis word_punct_tokenizer = WordPunctTokenizer() tagged_content = nltk.pos_tag(word_punct_tokenizer.tokenize(content)) # Redacte content = main.redact_names(content, tagged_content, name_file) # X nameed words in file assert (main.num_names[name_file] == 22) # Create path if (not os.path.isdir(output_dir)): sys.stderr.write("Output directory did not exist...creating " + output_dir + "/\n") os.makedirs(output_dir) # Write out the redacted test file for reference main.write_redacted(content, name_file, output_dir)
def test_concept(): filename = "concept.txt" concept_file = 'tests/test_files/' + filename output_dir = 'tests/test_files/redacted/' main.init_stats(concept_file, 0, None) # Get test file content = main.get_file_contents(concept_file) # Used to split the file for POS analysis word_punct_tokenizer = WordPunctTokenizer() tagged_content = nltk.pos_tag(word_punct_tokenizer.tokenize(content)) # Make required dot structure. # See https://stackoverflow.com/questions/2352181/how-to-use-a-dot-to-access-members-of-dictionary arg = {"concept": ["child"]} args = temp(arg) # Redacte content = main.redact_concept(content, concept_file, args) # X concept words in file assert (main.num_concept[concept_file] == 12) # Create path if (not os.path.isdir(output_dir)): sys.stderr.write("Output directory did not exist...creating " + output_dir + "/\n") os.makedirs(output_dir) # Write out the redacted test file for reference main.write_redacted(content, concept_file, output_dir)
def test_address(): filename = "addresses.txt" address_file = 'tests/test_files/' + filename output_dir = 'tests/test_files/redacted/' main.init_stats(address_file, 0, None) # Get test file content = main.get_file_contents(address_file) # Redacte content = main.redact_addresses(content, address_file) # Three addressses in file assert (main.num_addresses[address_file] == 3) # Create path if (not os.path.isdir(output_dir)): sys.stderr.write("Output directory did not exist...creating " + output_dir + "/\n") os.makedirs(output_dir) # Write out the redacted test file for reference main.write_redacted(content, address_file, output_dir)