def eulerian_path(edge_dict): '''Returns an Eulerian path from the given edges.''' # Determine the unbalanced edges. out_values = reduce(lambda a, b: a + b, edge_dict.values()) for node in set(out_values + edge_dict.keys()): out_value = out_values.count(node) if node in edge_dict: in_value = len(edge_dict[node]) else: in_value = 0 if in_value < out_value: unbalanced_from = node elif out_value < in_value: unbalanced_to = node # Add an edge connecting the unbalanced edges. if unbalanced_from in edge_dict: edge_dict[unbalanced_from].append(unbalanced_to) else: edge_dict[unbalanced_from] = [unbalanced_to] # Get the Eulerian Cycle from the edges, including the unbalanced edge. cycle = eulerian_cycle(edge_dict) # Find the location of the unbalanced edge in the eulerian cycle. divide_point = filter( lambda i: cycle[i:i + 2] == [unbalanced_from, unbalanced_to], xrange(len(cycle) - 1))[0] # Remove the unbalanced edge, and shift appropriately, overlapping the head and tail. return cycle[divide_point + 1:] + cycle[1:divide_point + 1]
def eulerian_path(edge_dict): '''Returns an Eulerian path from the given edges.''' # Determine the unbalanced edges. out_values = reduce(lambda a,b: a+b, edge_dict.values()) for node in set(out_values+edge_dict.keys()): out_value = out_values.count(node) if node in edge_dict: in_value = len(edge_dict[node]) else: in_value = 0 if in_value < out_value: unbalanced_from = node elif out_value < in_value: unbalanced_to = node # Add an edge connecting the unbalanced edges. if unbalanced_from in edge_dict: edge_dict[unbalanced_from].append(unbalanced_to) else: edge_dict[unbalanced_from] = [unbalanced_to] # Get the Eulerian Cycle from the edges, including the unbalanced edge. cycle = eulerian_cycle(edge_dict) # Find the location of the unbalanced edge in the eulerian cycle. divide_point = filter(lambda i: cycle[i:i+2] == [unbalanced_from, unbalanced_to], xrange(len(cycle)-1))[0] # Remove the unbalanced edge, and shift appropriately, overlapping the head and tail. return cycle[divide_point+1:]+cycle[1:divide_point+1]
The course is run on Coursera and the assignments and textbook are hosted on Stepic Problem Title: Universal String Problem Assignment #: 05 Problem ID: C URL: https://beta.stepic.org/Bioinformatics-Algorithms-2/From-Eulers-Theorem-to-an-Algorithm-for-Finding-Eulerian-Cycles-203/#step-8 ''' from Assignment_04E import eulerian_cycle from itertools import product # Read the input data. with open('data/stepic_5c.txt') as input_data: k = int(input_data.read().strip()) # Create the edges. universal_dict = {} for kmer in [''.join(item) for item in product('01', repeat=k)]: if kmer[:-1] in universal_dict: universal_dict[kmer[:-1]].append(kmer[1:]) else: universal_dict[kmer[:-1]] = [kmer[1:]] # Get the cycle, remove the repeated last entry for the associated path. path = eulerian_cycle(universal_dict) # Print and save the answer. print ''.join([item[0] for item in path[:-1]]) with open('output/Assignment_05C.txt', 'w') as output_data: output_data.write(''.join([item[0] for item in path[:-1]]))