forked from akki744/autosuggest-preprocessor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.py
60 lines (47 loc) · 2.9 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from time import time
from parser import Parser
from config_loader import ConfigLoader
from profanity_filter import ProfanityFilter
from de_duplicator import DeDuplicator
from keyword_dictionary_builder import KeywordDictionaryBuilder
from symspell_checker import SymSpellChecker
import os
def main():
start_time = time()
print("Running Basic Setup Steps....")
config_loader = ConfigLoader()
output_directory_path = config_loader.get_base_path() + config_loader.get_output_directory_name()
if not os.path.exists(output_directory_path):
os.makedirs(output_directory_path)
parser = Parser(config_loader)
profanity_filter = ProfanityFilter(config_loader, parser)
de_duplicator = DeDuplicator(parser)
keyword_dictionary_builder = KeywordDictionaryBuilder(parser)
sym_spell_checker = SymSpellChecker(config_loader, parser)
print("Running Parser....")
parser.parse(config_loader.get_query_logs_file_path(), config_loader.get_frequency_file_path(),
config_loader.get_max_total_queries())
print("Running De-duplicator....")
de_duplicator.remove_duplicates(config_loader.get_frequency_file_path(), config_loader.get_frequency_file_path(),
config_loader.get_de_duplicated_keyword_ordered_1_file_path(),
config_loader.get_de_duplicated_missing_space_1_file_path(),
config_loader.get_de_duplicated_synonyms_1_file_path())
print("Running Profanity Filter....")
profanity_filter.remove_profane_queries(config_loader.get_frequency_file_path(),
config_loader.get_frequency_file_path(),
config_loader.get_filtered_profane_queries_file_path())
print("Running Keyword Dictionary Builder....")
keyword_dictionary_builder.build_dictionary_file_from_frequency_file(config_loader.get_frequency_file_path(),
config_loader.get_dictionary_file_path())
print("Running SymSpell Checker....")
sym_spell_checker.run_sym_spell(config_loader.get_sym_spell_iterations(), config_loader.get_frequency_file_path(),
config_loader.get_dictionary_file_path(), config_loader.get_dictionary_file_path())
print("Running De-duplicator....")
de_duplicator.remove_duplicates(config_loader.get_dictionary_file_path(), config_loader.get_dictionary_file_path(),
config_loader.get_de_duplicated_keyword_ordered_2_file_path(),
config_loader.get_de_duplicated_missing_space_2_file_path(),
config_loader.get_de_duplicated_synonyms_2_file_path())
print("Completed!!!")
print("Total time taken: ", (time() - start_time) / 60, " minutes")
if __name__ == "__main__":
main()