def solution(dataset: list) -> str: fasta = read_fasta(lines=dataset) Seq1, Seq2 = fasta[0], fasta[1] w, h = len(Seq1), len(Seq2) Matrix = [[0 for x in range(w + 1)] for y in range(h + 1)] for i, ibase in enumerate(Seq1, 1): for j, jbase in enumerate(Seq2, 1): if ibase == jbase: Matrix[j][i] = Matrix[j - 1][i - 1] + 1 else: Matrix[j][i] = max(Matrix[j - 1][i], Matrix[j][i - 1]) # remove zeros Matrix = [M[1:] for M in Matrix[1:]] # print(" ", " ".join([nt for nt in Seq1])) # for i, b in enumerate(Seq2): # print(b, " ".join([str(s) for s in Matrix[i]])) # len(LCS) == Matrix[len(Seq2) - 1][len(Seq1) - 1] i, j = len(Seq1) - 1, len(Seq2) - 1 LCS = [] while i > -1 and j > -1: if Seq1[i] == Seq2[j]: LCS.append(Seq1[i]) j -= 1 i -= 1 elif Matrix[j][i - 1] == Matrix[j][i]: i -= 1 elif Matrix[j - 1][i] == Matrix[j][i]: j -= 1 return "".join(LCS)[::-1]
def slow_solution(dataset: list) -> str: sequences = read_fasta(lines=dataset) k = 3 graph = [] for s in sequences: # O(n**2) for t in sequences: if s == t: # stop cycling continue if s[-k:] == t[:k]: graph.append((s.id, t.id)) return "\n".join([f"{edge[0]} {edge[1]}" for edge in graph])
def solution(dataset: list) -> str: sequences = read_fasta(lines=dataset) nucleotides = ["A", "C", "G", "T"] all_counts = [] consensus = "" for nts in zip(*sequences): nt_counts = Counter(nts) consensus += nt_counts.most_common(1)[0][0] all_counts.append([str(nt_counts[nt]) for nt in nucleotides]) profile_matrix = "\n".join([ f"{nt}: {' '.join(counts)}" for nt, counts in zip(nucleotides, zip(*all_counts)) ]) return f"{consensus}\n{profile_matrix}"
def solution(dataset: list) -> str: sequences = read_fasta(lines=dataset) k = 3 edges = [] nodes = [] for s in sequences: # O(n * ( n - 1 ) / 2) == O(n**2), but runs twice as fast # check what other nodes in the graph s connects to for t in nodes: if s.endswith(t[:k]): edges.append((s.id, t.id)) if t.endswith(s[:k]): edges.append((t.id, s.id)) # add node to graph nodes.append(s) return "\n".join([f"{edge[0]} {edge[1]}" for edge in edges])
def solution(dataset: list) -> str: sequences = read_fasta(lines=dataset) DNA = sequences[0] subsequence = sequences[1] indices = [] offest = 0 for base in subsequence: loc = DNA[offest:].find_one(base) offest = loc + offest + 1 indices.append(str(offest)) # sanity check for i, idx in enumerate(indices): try: assert subsequence[i] == DNA[int(idx) - 1] except: print(f"{subsequence[i]} != {DNA[int(idx)-1]} at {idx}") return " ".join(indices)