def is_sorted__via_lt(iterable, *, key=None, __lt__=None): it = map_if(key, iterable) del iterable if __lt__ is None: __lt__ = operator.lt return all(not __lt__(b, a) for a, b in zip_me2(it))
def is_sorted__via_le(iterable, *, key=None, __le__=None): it = map_if(key, iterable) del iterable if __le__ is None: __le__ = operator.le return all(starmap(__le__, zip_me2(it)))
def verify_arbitrary_LCP(self, suffices, LCP): if not suffices: return not LCP L = len(suffices) if not len(LCP) == L - 1: return False LCP_def = [len_lcp_f(s1, s2) for s1, s2 in zip_me2(suffices)] return LCP == LCP_def
def make_isuffix2is_L(self, string): # -> [Bool] assert string L = len(string) isuffix2is_L = [None] * L # the first succ char == the last char == the last L char isuffix2is_L[-1] = is_succ_L = True for prev_isuffix, (succ, prev) in zip(reversed(range(L - 1)), zip_me2(reversed(string))): if prev < succ: # prev is S r = False elif prev > succ: # prev is L r = True else: r = is_succ_L is_succ_L = isuffix2is_L[prev_isuffix] = r return isuffix2is_L
def is_strict_sorted(iterable, *, key=None): #bug: return is_sorted__via_lt(iterable, key=key) it = map_if(key, iterable) del iterable __lt__ = operator.lt return all(starmap(__lt__, zip_me2(it)))
def make_big_string(self, alphabet_size, string, charL2beginSA, charS2endSA): '''return (big_alphabet_size, big_string , isuffix2is_L :: ISuffix -> Bool , big_isuffix2LMS_isuffix #, sorted_LMS_substring_isuffices , pseudo_sorted_LMS_substring_isuffices , isuffix2is_LMS , ichar2big_ichar :: [(None|BigISuffix)] , to_LMS_substring_suffix #, to_pseudo_LMS_substring_suffix ) NOTE: sorted_LMS_substring_isuffices * exclude_1size_suffices except string[-1:] * include leading L suffices ''' L = len(string) isuffix2is_L = self.make_isuffix2is_L(string) isuffix2is_L = isuffix2is_L.__getitem__ # isuffix2is_L done def isuffix2is_LMS(isuffix): return (not isuffix2is_L(isuffix) and (isuffix == 0 or isuffix2is_L(isuffix - 1))) LMS_isuffices = list(filter(isuffix2is_LMS, range(L - 1))) # may be empty big_ichar2LMS_isuffix = big_isuffix2LMS_isuffix = LMS_isuffices # big_isuffix2LMS_isuffix done ichar2big_ichar = [None] * L # may be all be None finally for big_ichar, (begin, end) in enumerate(zip_me2(chain(LMS_isuffices, [L]))): for i in range(begin, end): ichar2big_ichar[i] = big_ichar else: big_ichar = begin = end = None del big_ichar, begin, end if self.testing: print(string) print(ichar2big_ichar) assert self.verify_ichar2big_ichar(string, isuffix2is_L, isuffix2is_LMS, ichar2big_ichar) # NOTE: assume 1size LMS_isuffix followed with '$' # so, ichar=L-1 isnot more special than LMS_ichars/1size LMS_isuffices '''bug: sorted_LMS_substring_isuffices =\ self.make_sorted_LMS_substring_isuffices( alphabet_size, string, table , isuffix2is_L, LMS_isuffices , charL2beginSA, charS2endSA) # sorted_LMS_substring_isuffices done if self.testing: assert self.verify_sorted_LMS_substring_isuffices( to_LMS_substring_suffix, sorted_LMS_substring_isuffices) sorted_LMS_whole_substring_isuffices =\ list(filter(isuffix2is_LMS, sorted_LMS_substring_isuffices)) gs = groupby(sorted_LMS_whole_substring_isuffices , key=to_LMS_substring_suffix) ''' pseudo_sorted_LMS_substring_isuffices =\ self.make_pseudo_sorted_LMS_substring_isuffices( alphabet_size, string , isuffix2is_L, LMS_isuffices , charL2beginSA, charS2endSA) def to_pseudo_LMS_substring_suffix(isuffix): 'to_LMS_substring_suffix append m/M' suffix = to_LMS_substring_suffix(isuffix) end_isuffix_ex = isuffix + len(suffix) mM = -1 if end_isuffix_ex == L else alphabet_size #pseudo_suffix = suffix + [mM] # type(suffix) may be input array type pseudo_suffix = [*suffix, mM] return pseudo_suffix def to_LMS_substring_suffix(isuffix): '''* include leading Ls * exclude_1size_suffices except string[-1:] ''' may_big_ichar = ichar2big_ichar[isuffix] if may_big_ichar is None: next_big_ichar = 0 else: big_ichar = may_big_ichar next_big_ichar = big_ichar + 1 del big_ichar if next_big_ichar == len(LMS_isuffices): return string[isuffix:] next_LMS_isuffix = LMS_isuffices[next_big_ichar] return string[isuffix:next_LMS_isuffix + 1] if self.testing: try: assert self.verify_pseudo_sorted_LMS_substring_isuffices( to_pseudo_LMS_substring_suffix, pseudo_sorted_LMS_substring_isuffices) except: print(string) print(pseudo_sorted_LMS_substring_isuffices) print(*map(to_pseudo_LMS_substring_suffix, pseudo_sorted_LMS_substring_isuffices)) raise pseudo_sorted_LMS_whole_substring_isuffices =\ list(filter(isuffix2is_LMS, pseudo_sorted_LMS_substring_isuffices)) # O(L) gs = groupby(pseudo_sorted_LMS_whole_substring_isuffices, key=to_pseudo_LMS_substring_suffix) big_string = [None] * len(LMS_isuffices) big_char = -1 for big_char, (_, g) in enumerate(gs): for LMS_isuffix in g: big_ichar = ichar2big_ichar[LMS_isuffix] big_string[big_ichar] = big_char assert None not in big_string big_alphabet_size = big_char + 1 if self.testing: assert self.verify_big_string(ichar2big_ichar, to_pseudo_LMS_substring_suffix, big_ichar2LMS_isuffix, big_alphabet_size, big_string) return ( big_alphabet_size, big_string, isuffix2is_L, big_isuffix2LMS_isuffix #, sorted_LMS_substring_isuffices , pseudo_sorted_LMS_substring_isuffices, isuffix2is_LMS, ichar2big_ichar, to_LMS_substring_suffix #, to_pseudo_LMS_substring_suffix )