def pool_initializer(): global worker_app worker_app = SuffixTreeApplicationWithCassandra()
def check_words(self, is_sorted=False, is_reversed=False): # Split the long string into separate strings, and make some IDs. words = list([w for w in LONG_TEXT[:100].split(' ') if w]) print("Adding words: {}".format(words)) # Avoid adding the same string twice (or a prefix of a previous string). # - because it's a current problem unless we append string IDs, which makes things too slow # words = set(words) # words = [w for w in words if 0 != sum([x.startswith(w) for x in words if x != w])] assert words # Make a string ID for each string. strings = {} for string in words: string_id = uuid.uuid4().hex strings[string_id] = string # Create a new suffix tree. self.app = SuffixTreeApplicationWithCassandra() st = self.app.register_new_suffix_tree() assert st.id in self.app.suffix_tree_repo # Close the app, so the pool doesn't inherit it. self.app.close() # Start the pool. pool = Pool(initializer=pool_initializer, processes=1) words = [[s, sid, st.id] for sid, s in strings.items() if s] if is_sorted: words = sorted(words) if is_reversed: words = reversed(words) results = pool.map(add_string_to_suffix_tree, words) for result in results: if isinstance(result, Exception): print(result.args[0][1]) raise result # Creat the app again. self.app = SuffixTreeApplicationWithCassandra() errors = [] # Check the suffix tree returns string ID for all substrings of string. for string_id, string in strings.items(): # Check all prefixes and suffixes. substrings = sorted(list(get_all_substrings(string))) print("") print("Checking for all substrings of string '{}': {}".format( repr(string), " ".join([repr(s) for s in substrings]))) for substring in substrings: results = self.app.find_string_ids(substring, st.id) if string_id not in results: msg = "Not found: substring '{}' from string '{}'".format( repr(substring), repr(string)) print(msg) errors.append(msg) # Check for errors. self.assertFalse(errors, "\n".join(errors))