def pool_initializer():
    global worker_app
    worker_app = SuffixTreeApplicationWithCassandra()
    def check_words(self, is_sorted=False, is_reversed=False):
        # Split the long string into separate strings, and make some IDs.
        words = list([w for w in LONG_TEXT[:100].split(' ') if w])

        print("Adding words: {}".format(words))

        # Avoid adding the same string twice (or a prefix of a previous string).
        #  - because it's a current problem unless we append string IDs, which makes things too slow
        # words = set(words)
        # words = [w for w in words if 0 != sum([x.startswith(w) for x in words if x != w])]

        assert words

        # Make a string ID for each string.
        strings = {}
        for string in words:
            string_id = uuid.uuid4().hex
            strings[string_id] = string

        # Create a new suffix tree.
        self.app = SuffixTreeApplicationWithCassandra()
        st = self.app.register_new_suffix_tree()
        assert st.id in self.app.suffix_tree_repo

        # Close the app, so the pool doesn't inherit it.
        self.app.close()

        # Start the pool.
        pool = Pool(initializer=pool_initializer, processes=1)

        words = [[s, sid, st.id] for sid, s in strings.items() if s]

        if is_sorted:
            words = sorted(words)
        if is_reversed:
            words = reversed(words)

        results = pool.map(add_string_to_suffix_tree, words)
        for result in results:
            if isinstance(result, Exception):
                print(result.args[0][1])
                raise result

        # Creat the app again.
        self.app = SuffixTreeApplicationWithCassandra()

        errors = []

        # Check the suffix tree returns string ID for all substrings of string.
        for string_id, string in strings.items():
            # Check all prefixes and suffixes.
            substrings = sorted(list(get_all_substrings(string)))
            print("")
            print("Checking for all substrings of string '{}': {}".format(
                repr(string), " ".join([repr(s) for s in substrings])))
            for substring in substrings:
                results = self.app.find_string_ids(substring, st.id)
                if string_id not in results:
                    msg = "Not found: substring '{}' from string '{}'".format(
                        repr(substring), repr(string))
                    print(msg)
                    errors.append(msg)

        # Check for errors.
        self.assertFalse(errors, "\n".join(errors))
    def check_words(self, is_sorted=False, is_reversed=False):
        # Split the long string into separate strings, and make some IDs.
        words = list([w for w in LONG_TEXT[:100].split(' ') if w])

        print("Adding words: {}".format(words))

        # Avoid adding the same string twice (or a prefix of a previous string).
        #  - because it's a current problem unless we append string IDs, which makes things too slow
        # words = set(words)
        # words = [w for w in words if 0 != sum([x.startswith(w) for x in words if x != w])]

        assert words

        # Make a string ID for each string.
        strings = {}
        for string in words:
            string_id = uuid.uuid4().hex
            strings[string_id] = string

        # Create a new suffix tree.
        self.app = SuffixTreeApplicationWithCassandra()
        st = self.app.register_new_suffix_tree()
        assert st.id in self.app.suffix_tree_repo

        # Close the app, so the pool doesn't inherit it.
        self.app.close()

        # Start the pool.
        pool = Pool(initializer=pool_initializer, processes=1)

        words = [[s, sid, st.id] for sid, s in strings.items() if s]

        if is_sorted:
            words = sorted(words)
        if is_reversed:
            words = reversed(words)

        results = pool.map(add_string_to_suffix_tree, words)
        for result in results:
            if isinstance(result, Exception):
                print(result.args[0][1])
                raise result

        # Creat the app again.
        self.app = SuffixTreeApplicationWithCassandra()

        errors = []

        # Check the suffix tree returns string ID for all substrings of string.
        for string_id, string in strings.items():
            # Check all prefixes and suffixes.
            substrings = sorted(list(get_all_substrings(string)))
            print("")
            print("Checking for all substrings of string '{}': {}".format(repr(string), " ".join([repr(s) for s in substrings])))
            for substring in substrings:
                results = self.app.find_string_ids(substring, st.id)
                if string_id not in results:
                    msg = "Not found: substring '{}' from string '{}'".format(repr(substring), repr(string))
                    print(msg)
                    errors.append(msg)

        # Check for errors.
        self.assertFalse(errors, "\n".join(errors))
class TestMultiprocessingWithGeneralizedSuffixTree(CassandraTestCase):
    def setUp(self):
        super(TestMultiprocessingWithGeneralizedSuffixTree, self).setUp()
        self.app = None

    def tearDown(self):
        super(TestMultiprocessingWithGeneralizedSuffixTree, self).tearDown()
        if self.app is not None:
            self.app.close()

    def test_words_in_sorted_order(self):
        self.check_words(is_sorted=True)

    # Todo: Fix this - adding strings in a random order sometimes breaks (perhaps a dict is causing indeterminate order).
    # def test_words_in_unsorted_order(self):
    #     self.check_words()

    # Todo: Fix this - adding strings in a reversed sorted order always fails. Not sure why all substrings of 'ree' fail. The suffix is obviously not moving along in the same way as it does when the nodes are added. Perhaps it needs to add the IDs when explicit match is made, and then move the first char along by one? Not sure so trace it out?
    # def test_words_in_reverse_sorted_order(self):
    #     self.check_words(is_reversed=True)
    #
    # The error reported is:-
    #
    # >       self.assertFalse(errors, "\n".join(errors))
    # E       Not found: substring ''e'' from string ''tree''
    # E       Not found: substring ''ee'' from string ''tree''
    # E       Not found: substring ''r'' from string ''tree''
    # E       Not found: substring ''re'' from string ''tree''
    # E       Not found: substring ''ree'' from string ''tree''

    def check_words(self, is_sorted=False, is_reversed=False):
        # Split the long string into separate strings, and make some IDs.
        words = list([w for w in LONG_TEXT[:100].split(' ') if w])

        print("Adding words: {}".format(words))

        # Avoid adding the same string twice (or a prefix of a previous string).
        #  - because it's a current problem unless we append string IDs, which makes things too slow
        # words = set(words)
        # words = [w for w in words if 0 != sum([x.startswith(w) for x in words if x != w])]

        assert words

        # Make a string ID for each string.
        strings = {}
        for string in words:
            string_id = uuid.uuid4().hex
            strings[string_id] = string

        # Create a new suffix tree.
        self.app = SuffixTreeApplicationWithCassandra()
        st = self.app.register_new_suffix_tree()
        assert st.id in self.app.suffix_tree_repo

        # Close the app, so the pool doesn't inherit it.
        self.app.close()

        # Start the pool.
        pool = Pool(initializer=pool_initializer, processes=1)

        words = [[s, sid, st.id] for sid, s in strings.items() if s]

        if is_sorted:
            words = sorted(words)
        if is_reversed:
            words = reversed(words)

        results = pool.map(add_string_to_suffix_tree, words)
        for result in results:
            if isinstance(result, Exception):
                print(result.args[0][1])
                raise result

        # Creat the app again.
        self.app = SuffixTreeApplicationWithCassandra()

        errors = []

        # Check the suffix tree returns string ID for all substrings of string.
        for string_id, string in strings.items():
            # Check all prefixes and suffixes.
            substrings = sorted(list(get_all_substrings(string)))
            print("")
            print("Checking for all substrings of string '{}': {}".format(
                repr(string), " ".join([repr(s) for s in substrings])))
            for substring in substrings:
                results = self.app.find_string_ids(substring, st.id)
                if string_id not in results:
                    msg = "Not found: substring '{}' from string '{}'".format(
                        repr(substring), repr(string))
                    print(msg)
                    errors.append(msg)

        # Check for errors.
        self.assertFalse(errors, "\n".join(errors))
class TestMultiprocessingWithGeneralizedSuffixTree(CassandraTestCase):

    def setUp(self):
        super(TestMultiprocessingWithGeneralizedSuffixTree, self).setUp()
        self.app = None

    def tearDown(self):
        super(TestMultiprocessingWithGeneralizedSuffixTree, self).tearDown()
        if self.app is not None:
            self.app.close()

    def test_words_in_sorted_order(self):
        self.check_words(is_sorted=True)

    # Todo: Fix this - adding strings in a random order sometimes breaks (perhaps a dict is causing indeterminate order).
    # def test_words_in_unsorted_order(self):
    #     self.check_words()

    # Todo: Fix this - adding strings in a reversed sorted order always fails. Not sure why all substrings of 'ree' fail. The suffix is obviously not moving along in the same way as it does when the nodes are added. Perhaps it needs to add the IDs when explicit match is made, and then move the first char along by one? Not sure so trace it out?
    # def test_words_in_reverse_sorted_order(self):
    #     self.check_words(is_reversed=True)
    #
    # The error reported is:-
    #
    # >       self.assertFalse(errors, "\n".join(errors))
    # E       Not found: substring ''e'' from string ''tree''
    # E       Not found: substring ''ee'' from string ''tree''
    # E       Not found: substring ''r'' from string ''tree''
    # E       Not found: substring ''re'' from string ''tree''
    # E       Not found: substring ''ree'' from string ''tree''

    def check_words(self, is_sorted=False, is_reversed=False):
        # Split the long string into separate strings, and make some IDs.
        words = list([w for w in LONG_TEXT[:100].split(' ') if w])

        print("Adding words: {}".format(words))

        # Avoid adding the same string twice (or a prefix of a previous string).
        #  - because it's a current problem unless we append string IDs, which makes things too slow
        # words = set(words)
        # words = [w for w in words if 0 != sum([x.startswith(w) for x in words if x != w])]

        assert words

        # Make a string ID for each string.
        strings = {}
        for string in words:
            string_id = uuid.uuid4().hex
            strings[string_id] = string

        # Create a new suffix tree.
        self.app = SuffixTreeApplicationWithCassandra()
        st = self.app.register_new_suffix_tree()
        assert st.id in self.app.suffix_tree_repo

        # Close the app, so the pool doesn't inherit it.
        self.app.close()

        # Start the pool.
        pool = Pool(initializer=pool_initializer, processes=1)

        words = [[s, sid, st.id] for sid, s in strings.items() if s]

        if is_sorted:
            words = sorted(words)
        if is_reversed:
            words = reversed(words)

        results = pool.map(add_string_to_suffix_tree, words)
        for result in results:
            if isinstance(result, Exception):
                print(result.args[0][1])
                raise result

        # Creat the app again.
        self.app = SuffixTreeApplicationWithCassandra()

        errors = []

        # Check the suffix tree returns string ID for all substrings of string.
        for string_id, string in strings.items():
            # Check all prefixes and suffixes.
            substrings = sorted(list(get_all_substrings(string)))
            print("")
            print("Checking for all substrings of string '{}': {}".format(repr(string), " ".join([repr(s) for s in substrings])))
            for substring in substrings:
                results = self.app.find_string_ids(substring, st.id)
                if string_id not in results:
                    msg = "Not found: substring '{}' from string '{}'".format(repr(substring), repr(string))
                    print(msg)
                    errors.append(msg)

        # Check for errors.
        self.assertFalse(errors, "\n".join(errors))