def build_hash_table(self, current_node): t0 = time.time() adjacenciesB = self.adjacenciesB get_adjacencies = Extremities_and_adjacencies() #print() node = current_node #print() #print() #print('current node and state: ') #print(node) #print(node.state) #print('____________________________________________________________________________________') #if the genome has a circular intermediate (i.e all of its children will be linear) if node.next_operation != 0: operation_type = None operations = [] # if point of cut = previous point of join: if node.next_operation_weight == 0.5: operations.append(node.next_operation) operation_type = 'trp1' elif node.next_operation_weight == 1.5: operations = [] if type(node.next_operation) is list: for operation in node.next_operation: operations.append(operation) else: operations.append(node.next_operation) operation_type = 'trp2' else: print( 'You have got a problem with the .next_operation weights') for operation in operations: child_state = node.take_action(operation)[0] check_hash_table = Network.check_hash_key(self, child_state) if check_hash_table[0]: child = check_hash_table[1] node.children.append(child) node.children_weights.append(node.next_operation_weight) node.children_operations.append( (operation, operation_type)) # print() # print('Operation: ', operation) # print('Type: ', operation_type) # print(get_adjacencies.adjacencies_to_genome(node.state), ' ----> ', # get_adjacencies.adjacencies_to_genome(child_state)) else: #remember the child will consist of linear chromosomes only because it is the result of a forced reinsertion child = Node(child_state) hash_key = hash(str(child.state)) self.hash_table.update({hash_key: child}) # print('#T: ', self.hash_table) node.children.append(child) node.children_weights.append(node.next_operation_weight) node.children_operations.append( (operation, operation_type)) # print() # print('Operation: ', operation) # print('Type: ', operation_type) # print(get_adjacencies.adjacencies_to_genome(node.state), ' ----> ', # get_adjacencies.adjacencies_to_genome(child_state)) Network.build_hash_table(self, child) #if the genome has no circular intermediates (i.e. some of its children may have circular chromosomes) else: operations = node.get_legal_operations(adjacenciesB) for operation in operations: operation_result = node.take_action(operation) child_state = operation_result[0] op_type = operation_result[1] check_hash_table = Network.check_hash_key(self, child_state) if check_hash_table[0]: child = check_hash_table[1] node.children.append(child) # if the operation is a trp0 child.find_chromosomes(child.state) if len(child.circular_chromosomes) != 0: node.children_weights.append(0.5) node.children_operations.append((operation, 'trp0')) # print() # print('Operation: ', operation) # print('Type: ', 'trp0') # print(get_adjacencies.adjacencies_to_genome(node.state), ' ----> ', # get_adjacencies.adjacencies_to_genome(child_state)) else: #node.children_weights.append(1) #if op_type == 'fis' or op_type == 'fus' or op_type == 'u_trl': # operation_type = op_type #else: # operation_type = node.find_operation_type(operation) if op_type == 'fis': operation_type = op_type op_weight = 2 elif op_type == 'fus': operation_type = op_type op_weight = 2 elif op_type == 'u_trl': operation_type = op_type op_weight = 1.5 else: operation_type = node.find_operation_type( operation) if operation_type == 'inv': op_weight = 1 elif operation_type == 'b_trl': op_weight = 1.5 else: print( "There's a problem at the .find_optype node function" ) node.children_weights.append(op_weight) node.children_operations.append( (operation, operation_type)) # print() # print('Operation: ', operation) # print('Type: ', operation_type) # print('weight: ', op_weight) # print(get_adjacencies.adjacencies_to_genome(node.state), ' ----> ', # get_adjacencies.adjacencies_to_genome(child_state)) # print(node.children_weights) else: child = Node(child_state) # check whether a circular chromosome has been created child.find_chromosomes(child.state) # if a circular chromosome has been created: if len(child.circular_chromosomes) != 0: legal_operation = child.get_legal_reinsertion_operation( operation, self.adjacenciesB) if legal_operation: child.next_operation = legal_operation child.next_operation_weight = 0.5 hash_key = hash(str(child.state)) self.hash_table.update({hash_key: child}) # print('#T: ', self.hash_table) node.children.append(child) node.children_operations.append( (operation, 'trp0')) node.children_weights.append(0.5) # print() # print('Operation: ', operation) # print('Type: ', 'trp0') # print(get_adjacencies.adjacencies_to_genome(node.state), ' ----> ', # get_adjacencies.adjacencies_to_genome(child_state)) Network.build_hash_table(self, child) else: child.next_operation = child.get_illegal_decircularization_operation( self.adjacenciesB) child.next_operation_weight = 1.5 hash_key = hash(str(child.state)) self.hash_table.update({hash_key: child}) # print('#T: ', self.hash_table) node.children.append(child) node.children_operations.append( (operation, 'trp0')) node.children_weights.append(0.5) # print() # print('Operation: ', operation) # print('Type: ', 'trp0') # print(get_adjacencies.adjacencies_to_genome(node.state), ' ----> ', # get_adjacencies.adjacencies_to_genome(child_state)) Network.build_hash_table(self, child) # else if no circular chromosome has been created: else: hash_key = hash(str(child.state)) self.hash_table.update({hash_key: child}) # print('#T: ', self.hash_table) node.children.append(child) ''' if op_type == 'fis' or op_type == 'fus' or op_type == 'u_trl': operation_type = op_type else: operation_type = node.find_operation_type(operation) node.children_weights.append(1) node.children_operations.append((operation, operation_type)) print() print('Operation: ', operation) print('Type: ', operation_type) print(get_adjacencies.adjacencies_to_genome(node.stalen(child.circular_chromosomes)te), ' ----> ', get_adjacencies.adjacencies_to_genome(child_state)) ''' if op_type == 'fis': operation_type = op_type op_weight = 2 elif op_type == 'fus': operation_type = op_type op_weight = 2 elif op_type == 'u_trl': operation_type = op_type op_weight = 1.5 else: operation_type = node.find_operation_type( operation) if operation_type == 'inv': op_weight = 1 elif operation_type == 'b_trl': op_weight = 1.5 else: print( "There's a problem at the .find_optype node function" ) node.children_weights.append(op_weight) node.children_operations.append( (operation, operation_type)) Network.build_hash_table(self, child)
get_adjacencies = Extremities_and_adjacencies() adjacencies_genomeA = get_adjacencies.adjacencies_ordered_and_sorted(genomeA) adjacencies_genomeB = get_adjacencies.adjacencies_ordered_and_sorted(genomeB) print('Adjacencies of the genomes: ') print('Genome A: ', adjacencies_genomeA) print('Genome B: ', adjacencies_genomeB) print('____________________________________') print() print() #Create start and target node start_node = Node(adjacencies_genomeA) target_node = Node(adjacencies_genomeB) #Construct entire network construct_network = Network(start_node, target_node, adjacencies_genomeB) network = construct_network.build_network() graph = GraphTheory(network) #plot the entire network in hierarchical structure (saved as 'hierarchical_network_plot.png') graph.plot_network(start_node)
weight_ratios = solution_ratios #weight_ratios = solution_ratios while hash(str(source_adjacencies)) == hash(str(target_adjacencies)): target_genome = GenomeEvolver.create_target_genome(number_of_sequence_blocks) evolving_genome = Evolve(target_genome) rearrangement_series = evolving_genome.evolve_with_random_rearrangements(number_of_rearrangements) reverse_the_series = GenomeEvolver.reverse_rearrangement_series(target_genome, rearrangement_series) source_genome = reverse_the_series[1] solution = reverse_the_series[2] target_adjacencies = get_adjacencies_and_genomes.adjacencies_ordered_and_sorted(target_genome) source_adjacencies = get_adjacencies_and_genomes.adjacencies_ordered_and_sorted(source_genome) #create start and target node for network source_node = Node(source_adjacencies) target_node = Node(target_adjacencies) #create dictionary of solutions dictionary_of_intermediates = {} source_key = hash(str(source_node.state)) target_key = hash(str(target_node.state)) dictionary_of_intermediates.update({source_key:source_node}) dictionary_of_intermediates.update({target_key:target_node}) # print('^^^^^^^^^^^^') # print('source: ', source_node) # print('target: ', target_node) # print('source key: ', source_key) # print('target_key: ', target_key) # print('dict:')
def build_hash_table(self, current_node): node = current_node print() print() print('current node and state: ') print(node) print(node.state) print('____________________________________________________________________________________') #if the genome has a circular intermediate (i.e all of its children will be linear) if node.next_operation != 0: print('this genome has a circular intermediate') print('the op wieight is: ', node.next_operation_weight) operations = [] # if point of cut = previous point of join: if node.next_operation_weight == 0.5: operations.append(node.next_operation) print('legal, operations: ', operations) elif node.next_operation_weight == 1.5: operations = [] if type(node.next_operation) is list: for operation in node.next_operation: operations.append(operation) else: operations.append(node.next_operation) print('illigal, operations: ', operations) else: print('You have got a problem with the .next_operation weights') for operation in operations: print('the operation: ', operation) child_state = node.take_action(operation) print('result of op: ', child_state) check_hash_table = Network.check_hash_key(self, child_state) if check_hash_table[0]: child = check_hash_table[1] node.children.append(child) node.children_weights.append(node.next_operation_weight) else: #remember the child will consist of linear chromosomes only because it is the result of a forced reinsertion child = Node(child_state) hash_key = hash(str(child.state)) self.hash_table.update({hash_key: child}) # print('#T: ', self.hash_table) node.children.append(child) node.children_weights.append(node.next_operation_weight) print('node children: ', node.children) print('node children weigths', node.children_weights) Network.build_hash_table(self, child) #if the genome has no circular intermediates (i.e. some of its children may have circular chromosomes) else: operations = node.get_legal_operations(self.adjacenciesB) print('Operations: ', operations) for operation in operations: child_state = node.take_action(operation) print('operations - result: ', operation, ' - ', child_state) check_hash_table = Network.check_hash_key(self, child_state) if check_hash_table[0]: child = check_hash_table[1] node.children.append(child) child.find_chromosomes(child.state) if len(child.circular_chromosomes) != 0 : node.children_weights.append(0.5) else: node.children_weights.append(1) else: child = Node(child_state) # check whether a circular chromosome has been created child.find_chromosomes(child.state) # if a circular chromosome has been created: if len(child.circular_chromosomes) != 0: legal_operation = child.get_legal_reinsertion_operation(operation, self.adjacenciesB) print() print('!!!!!!!!!!!!!!!!', legal_operation) print() if legal_operation: child.next_operation = legal_operation child.next_operation_weight = 0.5 hash_key = hash(str(child.state)) self.hash_table.update({hash_key: child}) # print('#T: ', self.hash_table) node.children.append(child) node.children_weights.append(0.5) print('node children: ', node.children) print('node.children weigths: ', node.children_weights) Network.build_hash_table(self, child) else: child.next_operation = child.get_illegal_decircularization_operation(self.adjacenciesB) print('the ilegal next operation: ', child.state) print('illegal op: ', child.next_operation) child.next_operation_weight = 1.5 hash_key = hash(str(child.state)) self.hash_table.update({hash_key: child}) # print('#T: ', self.hash_table) node.children.append(child) node.children_weights.append(0.5) print('node children: ', node.children) print('node.children weigths: ', node.children_weights) Network.build_hash_table(self, child) ''' print('a cicular chromosome has been formed') # potential_operation = False # get legal reinsertion operation for adjacency in operation[-1]: if adjacency in child.circular_chromosomes[0]: circular_join = adjacency potential_operation = child.check_if_operation_exists(circular_join, self.adjacenciesB) # print('legal op: ', potential_operation) # if the a legal operation exists: if potential_operation: print('there is a legal op for: ', child.state) print('legal op: ', potential_operation) child.next_operation = potential_operation child.next_operation_weight = 0.5 hash_key = hash(str(child.state)) self.hash_table.update({hash_key: child}) # print('#T: ', self.hash_table) node.children.append(child) node.children_weights.append(0.5) print('node children: ', node.children) print('node.children weigths: ', node.children_weights) Network.build_hash_table(self, child) print() # else if there exists no legal reinsertion operation else: print('there was no legal op') child.next_operation = child.get_illegal_decircularization_operation(self.adjacenciesB) print('the ilegal next operation: ', child.state) print('illegal op: ', child.next_operation) child.next_operation_weight = 1.5 hash_key = hash(str(child.state)) self.hash_table.update({hash_key: child}) # print('#T: ', self.hash_table) node.children.append(child) node.children_weights.append(0.5) print('node children: ', node.children) print('node.children weigths: ', node.children_weights) Network.build_hash_table(self, child) # if not potential_operation: # child.next_operation = child.get_decircularization_operation(self.adjacenciesB) # child.next_operation_weight = 1.5 # hash_key = hash(str(child.state)) # self.hash_table.update({hash_key: child}) # # print('#T: ', self.hash_table) # node.children.append(child) # node.children_weights.append(0.5) # Network.build_hash_table(self, child) # print() ''' # else if no circular chromosome has been created: else: print('no cicular chrms') hash_key = hash(str(child.state)) self.hash_table.update({hash_key: child}) # print('#T: ', self.hash_table) node.children.append(child) node.children_weights.append(1) print('node children: ', node.children) print('node.children weigths: ', node.children_weights) Network.build_hash_table(self, child)
def build_hash_table(current_node, hash_table, adjacenciesB, weights): node = current_node # if the previous operation was a cicularization (i.e. a trp0) do: if node.join_adjacency != 0: operations = node.get_reinsertion_operations(adjacenciesB) for operation in operations: child_state = node.take_action(operation)[0] # perform operation check_hash_table = check_hash_key( child_state, hash_table ) # check whether the intermediate create exists already # if it is a trp1 type operation if node.join_adjacency in operation[0]: operation_type = 'trp1' operation_weight = 0.5 * weights[1] # else it is a trp2 type operation else: operation_type = 'trp2' operation_weight = 1.5 * weights[1] if check_hash_table[0]: # if the intermediate exists child = check_hash_table[ 1] # let the child = (point to) the intermediate node in the hash table node.children.append( child ) # add the child to the list of children of the current node node.children_weights.append( operation_weight ) # add the weight of the operation that generated the child to the list of weights node.children_operations.append( (operation, operation_type) ) # add the operation and its type to the list of operations that generated the node children else: # if the intermediate does not exist in the hash table child = Node(child_state) # create a node for the state child.join_adjacency = 0 hash_key = hash(str(child.state)) hash_table.update({hash_key: child}) # add child node to hash table node.children.append(child) node.children_weights.append(operation_weight) node.children_operations.append((operation, operation_type)) build_hash_table(child, hash_table, adjacenciesB, weights) else: # if the previous operation was not a circularization, i.e. the current intermediary genome consists of only linear chromosomes operations = node.get_legal_operations(adjacenciesB) for operation in operations: operation_result = node.take_action(operation) child_state = operation_result[0] op_type = operation_result[1] check_hash_table = check_hash_key(child_state, hash_table) if check_hash_table[0]: # if the child exists in the hash table: child = check_hash_table[1] node.children.append(child) child.find_chromosomes(child.state) if len(child.circular_chromosomes ) != 0: # if a circularization occurred node.children_weights.append(0.5 * weights[1]) node.children_operations.append((operation, 'trp0')) if type(operation[-1][0]) is tuple and type( operation[-1][1]) is tuple: for adjacency in operation[-1]: if adjacency in child.circular_chromosomes[0]: child.join_adjacency = adjacency elif type(operation[-1][0]) is tuple: if operation[-1][0] in child.circular_chromosomes[0]: child.join_adjacency = operation[-1][0] else: print('error') elif type(operation[-1][1]) is tuple: if operation[-1][1] in child.circular_chromosomes[0]: child.join_adjacency = operation[-1][1] else: print('error') else: if operation[-1] in child.circular_chromosomes[0]: child.join_adjacency = operation[-1] else: print('error') else: child.join_adjacency = 0 if op_type == 'fis': operation_type = op_type op_weight = 1 * weights[4] elif op_type == 'fus': operation_type = op_type op_weight = 1 * weights[5] elif op_type == 'u_trl': operation_type = op_type op_weight = 1 * weights[3] elif op_type == 'b_trl': operation_type = op_type op_weight = 1 * weights[2] elif op_type == 'inv': operation_type = op_type op_weight = 1 * weights[0] else: print('You have got a problem, the op type is: ', op_type, ' #2') node.children_weights.append(op_weight) node.children_operations.append( (operation, operation_type)) else: # if the child is not in the hash table child = Node(child_state) child.find_chromosomes(child.state) if len(child.circular_chromosomes ) != 0: # if a circular chromosome has been created: if type(operation[-1][0]) is tuple and type( operation[-1][1]) is tuple: for adjacency in operation[-1]: if adjacency in child.circular_chromosomes[0]: child.join_adjacency = adjacency elif type(operation[-1][0]) is tuple: if operation[-1][0] in child.circular_chromosomes[0]: child.join_adjacency = operation[-1][0] else: print('error') elif type(operation[-1][1]) is tuple: if operation[-1][1] in child.circular_chromosomes[0]: child.join_adjacency = operation[-1][1] else: print('error') else: #if operation[-1][0] in child.circular_chromosomes[0]: if operation[-1] in child.circular_chromosomes[0]: #child.join_adjacency = operation[-1][0] child.join_adjacency = operation[-1] else: print('error') hash_key = hash(str(child.state)) hash_table.update({hash_key: child}) node.children.append(child) node.children_operations.append((operation, 'trp0')) node.children_weights.append(0.5 * weights[1]) build_hash_table(child, hash_table, adjacenciesB, weights) else: # else if no circular chromosome has been created: child.join_adjacency = 0 hash_key = hash(str(child.state)) hash_table.update({hash_key: child}) node.children.append(child) if op_type == 'fis': operation_type = op_type op_weight = 1 * weights[4] elif op_type == 'fus': operation_type = op_type op_weight = 1 * weights[5] elif op_type == 'u_trl': operation_type = op_type op_weight = 1 * weights[3] elif op_type == 'inv': operation_type = op_type op_weight = 1 * weights[0] elif op_type == 'b_trl': operation_type = op_type op_weight = 1 * weights[2] else: print( "There's a problem at the .find_optype node function" ) print('You have got a problem, the op type is: ', op_type, ' #4') node.children_weights.append(op_weight) node.children_operations.append( (operation, operation_type)) build_hash_table(child, hash_table, adjacenciesB, weights)
def run(args): genomeA_file = args.source_genome genomeB_file = args.target_genome weight_ratios_file = args.ratios stdoutOrigin = sys.stdout sys.stdout = open(args.output_file, 'w') #outfile = open(args.output_file, 'w') with open(genomeA_file) as csv: line = [element.strip('\n').split(',') for element in csv] genomeA = [] for element in line: element = list(map(int, element)) genomeA.append(element) with open(genomeB_file) as csv: line = [element.strip('\n').split(',') for element in csv] genomeB = [] for element in line: element = list(map(int, element)) genomeB.append(element) with open(weight_ratios_file) as csv: line = [element.strip('\n').split(',') for element in csv] weight_ratios = [] for element in line: element = list(map(int, element)) weight_ratios.append(element) get_adjacencies = Extremities_and_adjacencies() adjacencies_genomeA = get_adjacencies.adjacencies_ordered_and_sorted(genomeA) adjacencies_genomeB = get_adjacencies.adjacencies_ordered_and_sorted(genomeB) #Create start and target node start_node = Node(adjacencies_genomeA) target_node = Node(adjacencies_genomeB) hash_table = {} hash_key_start = hash(str(start_node.state)) hash_key_target = hash(str(target_node.state)) hash_table.update({hash_key_start:start_node}) hash_table.update({hash_key_target:target_node}) #finding rearrangement weights max_number = max(weight_ratios[0]) weights = [] for element in weight_ratios[0]: if element == 0: weights.append(max_number^2) else: weights.append(max_number/element) New_Network_wrDCJ.build_hash_table(start_node, hash_table, adjacencies_genomeB, weights) network = New_Network_wrDCJ.build_network(hash_table) shortest_paths = (list(all_shortest_paths(network, start_node, target_node, weight='weight'))) j = 1 tot_b_trl = 0 tot_u_trl = 0 tot_inv = 0 tot_trp1 = 0 tot_trp2 = 0 tot_fus = 0 tot_fis = 0 Paths_state = [] Paths_state_weight = [] # print(shortest_paths[0][4].children_weights[2]) for path in shortest_paths: path_state = [] path_state_weight = [] i = 0 while i < len(path): current = path[i] if i == 0: operation_type = 'none, this is the source genome' operation_weight = 'N/A' operation = 'N/A' else: x = path[i - 1].children.index(current) operation_type = path[i - 1].children_operations[x][1] operation_weight = path[i - 1].children_weights[x] operation = path[i - 1].children_operations[x][0] adjacencies = current.state genome = get_adjacencies.adjacencies_to_genome(adjacencies) path_state_weight.append((genome, ((operation_type, operation), operation_weight))) path_state.append((genome, (operation_type, operation))) i += 1 Paths_state.append((path_state)) Paths_state_weight.append(path_state_weight) for path in shortest_paths: i = 0 b_trl = 0 u_trl = 0 inv = 0 trp1 = 0 trp2 = 0 fus = 0 fis = 0 while i < len(path): current = path[i] if i == 0: pass else: x = path[i - 1].children.index(current) operation_type = path[i - 1].children_operations[x][1] if operation_type == 'b_trl': b_trl += 1 elif operation_type == 'u_trl': u_trl += 1 elif operation_type == 'inv': inv += 1 elif operation_type == 'trp1': trp1 += 1 elif operation_type == 'trp2': trp2 += 1 elif operation_type == 'fus': fus += 1 elif operation_type == 'fis': fis += 1 i += 1 tot_b_trl += b_trl tot_u_trl += u_trl tot_inv += inv tot_trp1 += trp1 tot_trp2 += trp2 tot_fus += fus tot_fis += fis j += 1 print('############################################################################################################') print() print('Source Genome: ', genomeA) print('Target Genome: ', genomeB) print() print('Number of most parsimonious solutions: ', len(shortest_paths)) print() print('Average number of each operation per solution:') print('Inversions: ', int(tot_inv/len(shortest_paths)), ' Transpositions type 1: ', int(tot_trp1/len(shortest_paths)), ' Transpositions type 2: ', int(tot_trp2/len(shortest_paths)), ' Balanced translocations: ', int(tot_b_trl/len(shortest_paths)), ' Unbalanced translocations: ', int(tot_u_trl/len(shortest_paths)), ' Fusions: ', int(tot_fus/len(shortest_paths)), ' Fissions: ', int(tot_fis/len(shortest_paths))) print() print() print('Solutions: ') print() path_counter = 1 for path in Paths_state: print('Solution number ', path_counter) for genome in path: print(genome) path_counter+=1 print() print() print('############################################################################################################') ############################### # JUST FOR TESTING solution = [([[1, 2, 3, 4, 15], [-8, -7, 6, -5, -14, -13, -12], [9, 11], [-20, -19, -18, -17, -16, -32, 10, -31, -30, -29, -28, -27], [21, 22, 23, 24, 25, 26], [-33], [34, 35, 36, 37, 38, 39, 40]], ('none, this is the source genome', 'N/A')), ( [[1, 2, 3, 4, 15], [-8, -7, -6, -5, -14, -13, -12], [9, 11], [-20, -19, -18, -17, -16, -32, 10, -31, -30, -29, -28, -27], [21, 22, 23, 24, 25, 26], [-33], [34, 35, 36, 37, 38, 39, 40]], ('inv', (((5.5, 6.5), (6, 7)), ((5.5, 6), (6.5, 7))))), ( [[1, 2, 3, 4, 15], [-8, -7, -6, -5, -14, -13, -12], [9, 11], [16, 17, 18, 19, 20], [21, 22, 23, 24, 25, 26], [27, 28, 29, 30, 31, -10, 32, 33], [34, 35, 36, 37, 38, 39, 40]], ('u_trl', (((16, 32.5), 33), ((32.5, 33), 16)))), ( [[1, 2, 3, 4, 5, 6, 7, 8], [9, 11], [12, 13, 14, 15], [16, 17, 18, 19, 20], [21, 22, 23, 24, 25, 26], [27, 28, 29, 30, 31, -10, 32, 33], [34, 35, 36, 37, 38, 39, 40]], ('b_trl', (((4.5, 15), (5, 14.5)), ((4.5, 5), (14.5, 15))))), ( [[1, 2, 3, 4, 5, 6, 7, 8], [9, 11], [12, 13, 14, 15], [16, 17, 18, 19, 20], [21, 22, 23, 24, 25, 26], [27, 28, 29, 30, 31, 32, 33], [34, 35, 36, 37, 38, 39, 40], ['o', 10]], ('trp0', (((10, 32), (10.5, 31.5)), ((10, 10.5), (31.5, 32))))), ( [[1, 2, 3, 4, 5, 6, 7, 8], [9, 10, 11], [12, 13, 14, 15], [16, 17, 18, 19, 20], [21, 22, 23, 24, 25, 26], [27, 28, 29, 30, 31, 32, 33], [34, 35, 36, 37, 38, 39, 40]], ('trp1', (((9.5, 11), (10, 10.5)), ((9.5, 10), (10.5, 11)))))] paths_operations = [] for element in Paths_state: path_operations = [y for (x, y) in element] paths_operations.append(path_operations) solution_operations = [d for (c, d) in solution] path_types = [] sol_types = [a for a, b in solution_operations] for element in paths_operations: types = [c for c, d in element] path_types.append(types) indexes = [] counter = 0 for element in path_types: if element == sol_types: indexer = path_types.index(element) counter += 1 indexes.append(indexer) # for element in indexes: # for x in Paths_state[element]: # print(x) # print() # print('*****') # for element in solution: # print(element) print('sol len: ', len(solution)) print('shortest path len: ',len(shortest_paths[0])) print('counter', counter) print('And the answer is... ', solution_operations in paths_operations) print('Source genome: ',genomeA) print('Target genome: ', genomeB) print() print('Solution: ', solution) ########################################################################################################## sys.stdout.close() sys.stdout=stdoutOrigin
def run(args): genomeA_file = args.source_genome genomeB_file = args.target_genome weight_ratios_file = args.ratios stdoutOrigin = sys.stdout sys.stdout = open(args.output_file, 'w') #outfile = open(args.output_file, 'w') with open(genomeA_file) as csv: line = [element.strip('\n').split(',') for element in csv] genomeA = [] for element in line: element = list(map(int, element)) genomeA.append(element) with open(genomeB_file) as csv: line = [element.strip('\n').split(',') for element in csv] genomeB = [] for element in line: element = list(map(int, element)) genomeB.append(element) with open(weight_ratios_file) as csv: line = [element.strip('\n').split(',') for element in csv] weight_ratios = [] for element in line: element = list(map(int, element)) weight_ratios.append(element) get_adjacencies = Extremities_and_adjacencies() adjacencies_genomeA = get_adjacencies.adjacencies_ordered_and_sorted(genomeA) adjacencies_genomeB = get_adjacencies.adjacencies_ordered_and_sorted(genomeB) #Create start and target node start_node = Node(adjacencies_genomeA) target_node = Node(adjacencies_genomeB) hash_table = {} hash_key_start = hash(str(start_node.state)) hash_key_target = hash(str(target_node.state)) hash_table.update({hash_key_start:start_node}) hash_table.update({hash_key_target:target_node}) #finding rearrangement weights max_number = max(weight_ratios[0]) weights = [] for element in weight_ratios[0]: if element == 0: weights.append(max_number^2) else: weights.append(max_number/element) New_Network_wrDCJ.build_hash_table(start_node, hash_table, adjacencies_genomeB, weights) network = New_Network_wrDCJ.build_network(hash_table) shortest_paths = (list(all_shortest_paths(network, start_node, target_node, weight='weight'))) j = 1 tot_b_trl = 0 tot_u_trl = 0 tot_inv = 0 tot_trp1 = 0 tot_trp2 = 0 tot_fus = 0 tot_fis = 0 Paths_state = [] Paths_state_weight = [] # print(shortest_paths[0][4].children_weights[2]) for path in shortest_paths: path_state = [] path_state_weight = [] i = 0 while i < len(path): current = path[i] if i == 0: operation_type = 'none, this is the source genome' operation_weight = 'N/A' operation = 'N/A' else: x = path[i - 1].children.index(current) operation_type = path[i - 1].children_operations[x][1] operation_weight = path[i - 1].children_weights[x] operation = path[i - 1].children_operations[x][0] adjacencies = current.state genome = get_adjacencies.adjacencies_to_genome(adjacencies) path_state_weight.append((genome, ((operation_type, operation), operation_weight))) path_state.append((genome, (operation_type, operation))) i += 1 Paths_state.append((path_state)) Paths_state_weight.append(path_state_weight) for path in shortest_paths: i = 0 b_trl = 0 u_trl = 0 inv = 0 trp1 = 0 trp2 = 0 fus = 0 fis = 0 while i < len(path): current = path[i] if i == 0: pass else: x = path[i - 1].children.index(current) operation_type = path[i - 1].children_operations[x][1] if operation_type == 'b_trl': b_trl += 1 elif operation_type == 'u_trl': u_trl += 1 elif operation_type == 'inv': inv += 1 elif operation_type == 'trp1': trp1 += 1 elif operation_type == 'trp2': trp2 += 1 elif operation_type == 'fus': fus += 1 elif operation_type == 'fis': fis += 1 i += 1 tot_b_trl += b_trl tot_u_trl += u_trl tot_inv += inv tot_trp1 += trp1 tot_trp2 += trp2 tot_fus += fus tot_fis += fis j += 1 print('############################################################################################################') print() print('Source Genome: ', genomeA) print('Target Genome: ', genomeB) print() print('Number of most parsimonious solutions: ', len(shortest_paths)) print() print('Average number of operations per solution: ', float(tot_inv/len(shortest_paths))+float(tot_trp1/len(shortest_paths))+float(2*(tot_trp2/len(shortest_paths)))+float(tot_b_trl/len(shortest_paths))+float(tot_u_trl/len(shortest_paths))+float(tot_fis/len(shortest_paths))+float(tot_fus/len(shortest_paths))) print() print('Average number of each operation per solution:') print('Inversions: ', float(tot_inv/len(shortest_paths)), ' Transpositions type 1: ', float(tot_trp1/len(shortest_paths)), ' Transpositions type 2: ', float(tot_trp2/len(shortest_paths)), ' Balanced translocations: ', float(tot_b_trl/len(shortest_paths)), ' Unbalanced translocations: ', float(tot_u_trl/len(shortest_paths)), ' Fusions: ', float(tot_fus/len(shortest_paths)), ' Fissions: ', float(tot_fis/len(shortest_paths))) print() print() print('Solutions: ') print() path_counter = 1 for path in Paths_state: print('Solution number ', path_counter) for genome in path: print(genome) path_counter+=1 print() print() print('############################################################################################################') sys.stdout.close() sys.stdout=stdoutOrigin