def equals(alpha, beta): if isinstance(alpha, str) and isinstance(beta, str): if alpha == beta: return True elif isinstance(beta, list): sym = translate(alpha) if sym in beta: return True elif isinstance(alpha, list): sym = translate(beta) if sym in alpha: return True return False
def _translate(s): r=[] for c in s: if isinstance(c,str): r.append([translate(c)]) elif isinstance(c, list): r.append(c) return r
def match_score2(alpha, beta): if alpha == '-' and beta != '-': return gap_penalty if alpha != '-' and beta == '-': return gap_penalty if isinstance(alpha, str) and isinstance(beta, str): if alpha == beta: return match_award elif isinstance(beta, list): sym = translate(alpha) if sym in beta: return match_award elif isinstance(alpha, list): sym = translate(beta) if sym in alpha: return match_award return mismatch_penalty
def translate_all(inputs, filter_empty=True, sort=True, trans_table=all_table_ascii_unicode): p = [ translate(i, trans_table=trans_table) for i in inputs if not filter_empty or len(i.strip()) > 0 ] if sort: p = sorted(p, key=functools.cmp_to_key(pattern_comparator)) return p
def symbol_to_patter(orig, symbol): m=[] for i, s in enumerate(symbol): if isinstance(s, str): m.append(s) elif isinstance(s, list): if len(s) == 1: # normally only happens if we compare two patterns with the same symbols m.append(s) elif len(s) != 2: print("WARNIGN {}".format(s)) else: a1, a2 = s[0], s[1] if "-" in s: opt=a2 if a1 =="-" else a1 if isinstance(opt,str): opt= utils.translate(opt) m.append((opt,0,1)) else: if isinstance(a1, str) and isinstance(a2, str): # update vcounts t = [utils.translate(a1), utils.translate(a2)] m.append(list(set(t))) if isinstance(a1, list) and isinstance(a2, str): # we keep the a2 value in the freq dict, # but need to merge the symbol t = [utils.translate(a2)] m.append(list(set(a1 + t))) if isinstance(a1, str) and isinstance(a2, list): # we keep the a2 value in the freq dict, # but need to merge the symbol t = [utils.translate(a1)] m.append(list(set(a2 + t))) print("O:{} S:{}".format(orig, symbol)) return m
def update_symbol(self, symbol): #TODO length check m = [] if not self._vcounts: self._vcounts = [{} for i in range(0, len(symbol))] for i, s in enumerate(symbol): if isinstance(s, str): m.append(s) elif isinstance(s, list): if len(s) == 1: #normally only happens if we compare two patterns with the same symbols m.append(s) print("SINGLE VALUE {}".format(s)) elif len(s) != 2: print("WARNIGN {}".format(s)) else: a1 = s[0] # a2 = s[1] if isinstance(a1, str) and isinstance(a2, str): # self._vcounts[i].setdefault(a1, 0) self._vcounts[i].setdefault(a2, 0) self._vcounts[i][a1] += 1 self._vcounts[i][a2] += 1 t = [translate(a1), translate(a2)] m.append(list(set(t))) if isinstance(a1, list) and isinstance(a2, str): #we keep the a2 value in the freq dict, # but need to merge the symbol t = [translate(a2)] m.append(list(set(a1 + t))) self.symbol = m self._count += 1
def match_score2(alpha, beta, score_matrix=score_matrix): if alpha == '-' and beta != '-': return score_matrix[gap_penalty] if alpha != '-' and beta == '-': return score_matrix[gap_penalty] if isinstance(alpha, str) and isinstance(beta, str): if alpha == beta: return score_matrix[match_award] elif isinstance(alpha, list) and isinstance(beta, list): if set(beta) < set(alpha): return score_matrix[match_award] elif isinstance(beta, list): sym = translate(alpha) if sym in beta: return score_matrix[cset_match_award] elif isinstance(alpha, list): sym = translate(beta) if sym in alpha: return score_matrix[cset_match_award] return score_matrix[mismatch_penalty]
def merge_alignment(self, symbol): m = [] for s in symbol: if isinstance(s, str): m.append(s) elif isinstance(s, list): a1 = s[0] a2 = s[1] # if isinstance(a1,list) and isinstance(a2, str): ##a1 is already a merge or optional t = set(translate("".join(s))) m.append([c for c in t]) return m
def from_string(cls,pstring): _p= Pattern() ins='' parseValue=True valuestring='' for c in pstring: if c == "'": if not parseValue: #start parseValue=True else: #end _p._values.append(valuestring) _p._l1patterns.append(translate(valuestring)) parseValue=False else: if parseValue: valuestring+=c else: pass
def parse_value(self, value): logger.debug("CREATE PATTERN") if not value: return t = translate(value) logger.debug("{:>2} translated:{}".format('',t)) #o=unique_order(t) #logger.debug("O:{}".format(o)) l1_grouped = [(k, sum(1 for i in g)) for k, g in itertools.groupby(t)] logger.debug("{:>2} L1-groups:{}".format('',l1_grouped)) c=0 for g in l1_grouped: self._l2patterns.append((g[0],c,g[1])) self._l1patterns.append(t[c:c+g[1]]) self._values.append(value[c:c + g[1]]) c+=g[1] self._l2string= "".join([p[0] for p in self._l2patterns]) self._l1string = "".join(self._l1patterns) self._valuestring = "".join(self._values)
def find_best_alignment(self, alpha_list, beta_list): identity, score, align1, symbol2, align2 = needle( alpha_list, beta_list) self.data['raw'] = { 'score': score, 'identity': identity, 'align1': align1, 'align2': align2, 'symbol': symbol2 } if 0 < identity < 100: ctrans = False #translate the non matching symbols in alpha alpha_ct = [] for i in range(0, len(align1)): if len(symbol2[i]) == 1: alpha_ct.append(align1[i]) else: if symbol2[i][0] != '-': if isinstance(symbol2[i][0], str): ctrans = True alpha_ct.append([translate(symbol2[i][0])]) else: alpha_ct.append(symbol2[i][0]) if ctrans: identity, score, align1, symbol2, align2 = needle( alpha_ct, beta_list) self.data['partl1'] = { 'score': score, 'identity': identity, 'align1': align1, 'align2': align2, 'symbol': symbol2 } elif identity == 0: #no matching characters: identity, score, align1, symbol2, align2 = needle( self._translate(alpha_list), beta_list) self.data['l1'] = { 'score': score, 'identity': identity, 'align1': align1, 'align2': align2, 'symbol': symbol2 } if len(self.data) > 1: def compare(item1, item2): res = item1[1]['identity'] - item2[1]['identity'] if res == 0: res = item1[1]['score'] - item2[1]['score'] return res _s_al = sorted(enumerate(list(self.data.values())), key=functools.cmp_to_key(compare)) self.data['best'] = _s_al[-1][1] else: self.data['best'] = self.data['raw']