Exemplo n.º 1
0
    def build_hash_table(self, current_node):
        t0 = time.time()

        adjacenciesB = self.adjacenciesB

        get_adjacencies = Extremities_and_adjacencies()
        #print()
        node = current_node
        #print()
        #print()
        #print('current node and state: ')
        #print(node)
        #print(node.state)
        #print('____________________________________________________________________________________')

        #if the genome has a circular intermediate (i.e all of its children will be linear)
        if node.next_operation != 0:
            operation_type = None

            operations = []
            # if point of cut = previous point of join:
            if node.next_operation_weight == 0.5:

                operations.append(node.next_operation)
                operation_type = 'trp1'

            elif node.next_operation_weight == 1.5:
                operations = []
                if type(node.next_operation) is list:
                    for operation in node.next_operation:
                        operations.append(operation)
                else:
                    operations.append(node.next_operation)
                operation_type = 'trp2'

            else:
                print(
                    'You have got a problem with the .next_operation weights')

            for operation in operations:

                child_state = node.take_action(operation)[0]

                check_hash_table = Network.check_hash_key(self, child_state)

                if check_hash_table[0]:
                    child = check_hash_table[1]
                    node.children.append(child)
                    node.children_weights.append(node.next_operation_weight)
                    node.children_operations.append(
                        (operation, operation_type))
                # print()
                # print('Operation: ', operation)
                # print('Type: ', operation_type)
                # print(get_adjacencies.adjacencies_to_genome(node.state), '  ---->    ',
                #      get_adjacencies.adjacencies_to_genome(child_state))

                else:
                    #remember the child will consist of linear chromosomes only because it is the result of a forced reinsertion
                    child = Node(child_state)
                    hash_key = hash(str(child.state))
                    self.hash_table.update({hash_key: child})
                    # print('#T: ', self.hash_table)
                    node.children.append(child)
                    node.children_weights.append(node.next_operation_weight)
                    node.children_operations.append(
                        (operation, operation_type))
                    #  print()
                    #  print('Operation: ', operation)
                    #  print('Type: ', operation_type)
                    #  print(get_adjacencies.adjacencies_to_genome(node.state), '  ---->    ',
                    #        get_adjacencies.adjacencies_to_genome(child_state))

                    Network.build_hash_table(self, child)

        #if the genome has no circular intermediates (i.e. some of its children may have circular chromosomes)
        else:

            operations = node.get_legal_operations(adjacenciesB)

            for operation in operations:

                operation_result = node.take_action(operation)
                child_state = operation_result[0]
                op_type = operation_result[1]

                check_hash_table = Network.check_hash_key(self, child_state)

                if check_hash_table[0]:
                    child = check_hash_table[1]
                    node.children.append(child)

                    # if the operation is a trp0
                    child.find_chromosomes(child.state)
                    if len(child.circular_chromosomes) != 0:
                        node.children_weights.append(0.5)
                        node.children_operations.append((operation, 'trp0'))
                    #   print()
                    #   print('Operation: ', operation)
                    #   print('Type: ', 'trp0')
                    #   print(get_adjacencies.adjacencies_to_genome(node.state), '  ---->    ',
                    #         get_adjacencies.adjacencies_to_genome(child_state))

                    else:
                        #node.children_weights.append(1)
                        #if op_type == 'fis' or op_type == 'fus' or op_type == 'u_trl':
                        #    operation_type = op_type
                        #else:
                        #    operation_type = node.find_operation_type(operation)

                        if op_type == 'fis':
                            operation_type = op_type
                            op_weight = 2

                        elif op_type == 'fus':
                            operation_type = op_type
                            op_weight = 2

                        elif op_type == 'u_trl':
                            operation_type = op_type
                            op_weight = 1.5

                        else:
                            operation_type = node.find_operation_type(
                                operation)
                            if operation_type == 'inv':
                                op_weight = 1
                            elif operation_type == 'b_trl':
                                op_weight = 1.5
                            else:
                                print(
                                    "There's a problem at the .find_optype node function"
                                )

                        node.children_weights.append(op_weight)

                        node.children_operations.append(
                            (operation, operation_type))
                    #   print()
                    #   print('Operation: ', operation)
                    #   print('Type: ', operation_type)
                    #   print('weight: ', op_weight)
                    #   print(get_adjacencies.adjacencies_to_genome(node.state), '  ---->    ',
                    #         get_adjacencies.adjacencies_to_genome(child_state))
                    #   print(node.children_weights)

                else:
                    child = Node(child_state)

                    # check whether a circular chromosome has been created
                    child.find_chromosomes(child.state)

                    # if a circular chromosome has been created:
                    if len(child.circular_chromosomes) != 0:

                        legal_operation = child.get_legal_reinsertion_operation(
                            operation, self.adjacenciesB)

                        if legal_operation:
                            child.next_operation = legal_operation
                            child.next_operation_weight = 0.5
                            hash_key = hash(str(child.state))
                            self.hash_table.update({hash_key: child})
                            # print('#T: ', self.hash_table)
                            node.children.append(child)
                            node.children_operations.append(
                                (operation, 'trp0'))
                            node.children_weights.append(0.5)
                            #   print()
                            #   print('Operation: ', operation)
                            #   print('Type: ', 'trp0')
                            #   print(get_adjacencies.adjacencies_to_genome(node.state), '  ---->    ',
                            #         get_adjacencies.adjacencies_to_genome(child_state))

                            Network.build_hash_table(self, child)
                        else:
                            child.next_operation = child.get_illegal_decircularization_operation(
                                self.adjacenciesB)

                            child.next_operation_weight = 1.5
                            hash_key = hash(str(child.state))
                            self.hash_table.update({hash_key: child})
                            # print('#T: ', self.hash_table)
                            node.children.append(child)
                            node.children_operations.append(
                                (operation, 'trp0'))
                            node.children_weights.append(0.5)
                            # print()
                            # print('Operation: ', operation)
                            # print('Type: ', 'trp0')
                            # print(get_adjacencies.adjacencies_to_genome(node.state), '  ---->    ',
                            #       get_adjacencies.adjacencies_to_genome(child_state))

                            Network.build_hash_table(self, child)

                    # else if no circular chromosome has been created:
                    else:

                        hash_key = hash(str(child.state))
                        self.hash_table.update({hash_key: child})
                        # print('#T: ', self.hash_table)
                        node.children.append(child)
                        '''
                        if op_type == 'fis' or op_type == 'fus' or op_type == 'u_trl':
                            operation_type = op_type
                        else:
                            operation_type = node.find_operation_type(operation)

                        node.children_weights.append(1)
                        node.children_operations.append((operation, operation_type))
                        print()
                        print('Operation: ', operation)
                        print('Type: ', operation_type)
                        print(get_adjacencies.adjacencies_to_genome(node.stalen(child.circular_chromosomes)te), '  ---->    ',
                              get_adjacencies.adjacencies_to_genome(child_state))
                        
                        '''

                        if op_type == 'fis':
                            operation_type = op_type
                            op_weight = 2

                        elif op_type == 'fus':
                            operation_type = op_type
                            op_weight = 2

                        elif op_type == 'u_trl':
                            operation_type = op_type
                            op_weight = 1.5
                        else:
                            operation_type = node.find_operation_type(
                                operation)
                            if operation_type == 'inv':
                                op_weight = 1
                            elif operation_type == 'b_trl':
                                op_weight = 1.5
                            else:
                                print(
                                    "There's a problem at the .find_optype node function"
                                )

                        node.children_weights.append(op_weight)

                        node.children_operations.append(
                            (operation, operation_type))

                        Network.build_hash_table(self, child)
Exemplo n.º 2
0
get_adjacencies = Extremities_and_adjacencies()
adjacencies_genomeA = get_adjacencies.adjacencies_ordered_and_sorted(genomeA)
adjacencies_genomeB = get_adjacencies.adjacencies_ordered_and_sorted(genomeB)

print('Adjacencies of the genomes: ')
print('Genome A: ', adjacencies_genomeA)
print('Genome B: ', adjacencies_genomeB)
print('____________________________________')
print()
print()




#Create start and target node
start_node = Node(adjacencies_genomeA)
target_node = Node(adjacencies_genomeB)

#Construct entire network
construct_network = Network(start_node, target_node, adjacencies_genomeB)




network = construct_network.build_network()

graph = GraphTheory(network)

#plot the entire network in hierarchical structure (saved as 'hierarchical_network_plot.png')
graph.plot_network(start_node)
Exemplo n.º 3
0
        weight_ratios = solution_ratios

    #weight_ratios = solution_ratios
    while hash(str(source_adjacencies)) == hash(str(target_adjacencies)):
        target_genome = GenomeEvolver.create_target_genome(number_of_sequence_blocks)
        evolving_genome = Evolve(target_genome)
        rearrangement_series = evolving_genome.evolve_with_random_rearrangements(number_of_rearrangements)
        reverse_the_series = GenomeEvolver.reverse_rearrangement_series(target_genome, rearrangement_series)
        source_genome = reverse_the_series[1]
        solution = reverse_the_series[2]
        target_adjacencies = get_adjacencies_and_genomes.adjacencies_ordered_and_sorted(target_genome)
        source_adjacencies = get_adjacencies_and_genomes.adjacencies_ordered_and_sorted(source_genome)


    #create start and target node for network
    source_node = Node(source_adjacencies)
    target_node = Node(target_adjacencies)

    #create dictionary of solutions
    dictionary_of_intermediates = {}
    source_key = hash(str(source_node.state))
    target_key = hash(str(target_node.state))

    dictionary_of_intermediates.update({source_key:source_node})
    dictionary_of_intermediates.update({target_key:target_node})
    # print('^^^^^^^^^^^^')
    # print('source: ', source_node)
    # print('target: ', target_node)
    # print('source key: ', source_key)
    # print('target_key: ', target_key)
    # print('dict:')
Exemplo n.º 4
0
    def build_hash_table(self, current_node):
        node = current_node
        print()
        print()
        print('current node and state: ')
        print(node)
        print(node.state)
        print('____________________________________________________________________________________')

        #if the genome has a circular intermediate (i.e all of its children will be linear)
        if node.next_operation != 0:
            print('this genome has a circular intermediate')
            print('the op wieight is: ', node.next_operation_weight)
            operations = []
            # if point of cut = previous point of join:
            if node.next_operation_weight == 0.5:

                operations.append(node.next_operation)
                print('legal, operations: ', operations)

            elif node.next_operation_weight == 1.5:
                operations = []
                if type(node.next_operation) is list:
                    for operation in node.next_operation:
                        operations.append(operation)
                else:
                    operations.append(node.next_operation)
                print('illigal, operations: ', operations)

            else:
                print('You have got a problem with the .next_operation weights')

            for operation in operations:
                print('the operation: ', operation)
                child_state = node.take_action(operation)
                print('result of op: ', child_state)
                check_hash_table = Network.check_hash_key(self, child_state)

                if check_hash_table[0]:
                    child = check_hash_table[1]
                    node.children.append(child)
                    node.children_weights.append(node.next_operation_weight)

                else:
                    #remember the child will consist of linear chromosomes only because it is the result of a forced reinsertion
                    child = Node(child_state)
                    hash_key = hash(str(child.state))
                    self.hash_table.update({hash_key: child})
                    # print('#T: ', self.hash_table)
                    node.children.append(child)
                    node.children_weights.append(node.next_operation_weight)
                    print('node children: ', node.children)
                    print('node children weigths', node.children_weights)
                    Network.build_hash_table(self, child)


        #if the genome has no circular intermediates (i.e. some of its children may have circular chromosomes)
        else:

            operations = node.get_legal_operations(self.adjacenciesB)
            print('Operations: ', operations)
            for operation in operations:

                child_state = node.take_action(operation)
                print('operations - result: ', operation, ' - ', child_state)
                check_hash_table = Network.check_hash_key(self, child_state)

                if check_hash_table[0]:
                    child = check_hash_table[1]
                    node.children.append(child)
                    child.find_chromosomes(child.state)
                    if len(child.circular_chromosomes) != 0 :
                        node.children_weights.append(0.5)
                    else:
                        node.children_weights.append(1)

                else:
                    child = Node(child_state)

                    # check whether a circular chromosome has been created
                    child.find_chromosomes(child.state)


                    # if a circular chromosome has been created:
                    if len(child.circular_chromosomes) != 0:

                        legal_operation = child.get_legal_reinsertion_operation(operation, self.adjacenciesB)
                        print()
                        print('!!!!!!!!!!!!!!!!', legal_operation)
                        print()
                        if legal_operation:
                            child.next_operation = legal_operation
                            child.next_operation_weight = 0.5
                            hash_key = hash(str(child.state))
                            self.hash_table.update({hash_key: child})
                            # print('#T: ', self.hash_table)
                            node.children.append(child)
                            node.children_weights.append(0.5)
                            print('node children: ', node.children)
                            print('node.children weigths: ', node.children_weights)
                            Network.build_hash_table(self, child)
                        else:
                            child.next_operation = child.get_illegal_decircularization_operation(self.adjacenciesB)
                            print('the ilegal next operation: ', child.state)
                            print('illegal op: ', child.next_operation)
                            child.next_operation_weight = 1.5
                            hash_key = hash(str(child.state))
                            self.hash_table.update({hash_key: child})
                            # print('#T: ', self.hash_table)
                            node.children.append(child)
                            node.children_weights.append(0.5)
                            print('node children: ', node.children)
                            print('node.children weigths: ', node.children_weights)
                            Network.build_hash_table(self, child)

                        '''
                            
                        print('a cicular chromosome has been formed')

                        # potential_operation = False
                        # get legal reinsertion operation
                        for adjacency in operation[-1]:
                            if adjacency in child.circular_chromosomes[0]:
                                circular_join = adjacency
                                potential_operation = child.check_if_operation_exists(circular_join, self.adjacenciesB)
                                # print('legal op: ', potential_operation)

                                # if the a legal operation exists:

                                if potential_operation:
                                    print('there is a legal op for: ', child.state)
                                    print('legal op: ', potential_operation)

                                    child.next_operation = potential_operation
                                    child.next_operation_weight = 0.5
                                    hash_key = hash(str(child.state))
                                    self.hash_table.update({hash_key: child})
                                    # print('#T: ', self.hash_table)
                                    node.children.append(child)
                                    node.children_weights.append(0.5)
                                    print('node children: ', node.children)
                                    print('node.children weigths: ', node.children_weights)
                                    Network.build_hash_table(self, child)
                                    print()

                                # else if there exists no legal reinsertion operation
                                else:
                                    print('there was no legal op')

                        child.next_operation = child.get_illegal_decircularization_operation(self.adjacenciesB)
                        print('the ilegal next operation: ', child.state)
                        print('illegal op: ', child.next_operation)
                        child.next_operation_weight = 1.5
                        hash_key = hash(str(child.state))
                        self.hash_table.update({hash_key: child})
                        # print('#T: ', self.hash_table)
                        node.children.append(child)
                        node.children_weights.append(0.5)
                        print('node children: ', node.children)
                        print('node.children weigths: ', node.children_weights)
                        Network.build_hash_table(self, child)

                        # if not potential_operation:
                        #   child.next_operation = child.get_decircularization_operation(self.adjacenciesB)
                        #  child.next_operation_weight = 1.5
                        #  hash_key = hash(str(child.state))
                        #  self.hash_table.update({hash_key: child})
                        #  # print('#T: ', self.hash_table)
                        #  node.children.append(child)
                        #  node.children_weights.append(0.5)
                        #  Network.build_hash_table(self, child)
                        #  print()

                    '''
                    # else if no circular chromosome has been created:
                    else:
                        print('no cicular chrms')
                        hash_key = hash(str(child.state))
                        self.hash_table.update({hash_key: child})
                        # print('#T: ', self.hash_table)
                        node.children.append(child)
                        node.children_weights.append(1)
                        print('node children: ', node.children)
                        print('node.children weigths: ', node.children_weights)
                        Network.build_hash_table(self, child)
Exemplo n.º 5
0
def build_hash_table(current_node, hash_table, adjacenciesB, weights):
    node = current_node

    # if the previous operation was a cicularization (i.e. a trp0) do:

    if node.join_adjacency != 0:

        operations = node.get_reinsertion_operations(adjacenciesB)

        for operation in operations:

            child_state = node.take_action(operation)[0]  # perform operation
            check_hash_table = check_hash_key(
                child_state, hash_table
            )  # check whether the intermediate create exists already

            # if it is a trp1 type operation
            if node.join_adjacency in operation[0]:
                operation_type = 'trp1'
                operation_weight = 0.5 * weights[1]

            # else it is a trp2 type operation
            else:
                operation_type = 'trp2'
                operation_weight = 1.5 * weights[1]

            if check_hash_table[0]:  # if the intermediate exists

                child = check_hash_table[
                    1]  # let the child = (point to) the intermediate node in the hash table
                node.children.append(
                    child
                )  # add the child to the list of children of the current node
                node.children_weights.append(
                    operation_weight
                )  # add the weight of the operation that generated the child to the list of weights
                node.children_operations.append(
                    (operation, operation_type)
                )  # add the operation and its type to the list of operations that generated the node children

            else:  # if the intermediate does not exist in the hash table
                child = Node(child_state)  # create a node for the state
                child.join_adjacency = 0
                hash_key = hash(str(child.state))
                hash_table.update({hash_key:
                                   child})  # add child node to hash table
                node.children.append(child)
                node.children_weights.append(operation_weight)
                node.children_operations.append((operation, operation_type))

                build_hash_table(child, hash_table, adjacenciesB, weights)

    else:  # if the previous operation was not a circularization, i.e. the current intermediary genome consists of only linear chromosomes

        operations = node.get_legal_operations(adjacenciesB)

        for operation in operations:

            operation_result = node.take_action(operation)
            child_state = operation_result[0]
            op_type = operation_result[1]

            check_hash_table = check_hash_key(child_state, hash_table)

            if check_hash_table[0]:  # if the child exists in the hash table:
                child = check_hash_table[1]
                node.children.append(child)

                child.find_chromosomes(child.state)

                if len(child.circular_chromosomes
                       ) != 0:  # if a circularization occurred
                    node.children_weights.append(0.5 * weights[1])
                    node.children_operations.append((operation, 'trp0'))

                    if type(operation[-1][0]) is tuple and type(
                            operation[-1][1]) is tuple:

                        for adjacency in operation[-1]:
                            if adjacency in child.circular_chromosomes[0]:
                                child.join_adjacency = adjacency

                    elif type(operation[-1][0]) is tuple:
                        if operation[-1][0] in child.circular_chromosomes[0]:
                            child.join_adjacency = operation[-1][0]
                        else:
                            print('error')

                    elif type(operation[-1][1]) is tuple:
                        if operation[-1][1] in child.circular_chromosomes[0]:
                            child.join_adjacency = operation[-1][1]
                        else:
                            print('error')

                    else:

                        if operation[-1] in child.circular_chromosomes[0]:

                            child.join_adjacency = operation[-1]
                        else:
                            print('error')

                else:
                    child.join_adjacency = 0
                    if op_type == 'fis':
                        operation_type = op_type
                        op_weight = 1 * weights[4]

                    elif op_type == 'fus':
                        operation_type = op_type
                        op_weight = 1 * weights[5]

                    elif op_type == 'u_trl':
                        operation_type = op_type
                        op_weight = 1 * weights[3]

                    elif op_type == 'b_trl':
                        operation_type = op_type
                        op_weight = 1 * weights[2]

                    elif op_type == 'inv':
                        operation_type = op_type
                        op_weight = 1 * weights[0]

                    else:
                        print('You have got a problem, the op type is: ',
                              op_type, '   #2')

                    node.children_weights.append(op_weight)
                    node.children_operations.append(
                        (operation, operation_type))

            else:  # if the child is not in the hash table
                child = Node(child_state)
                child.find_chromosomes(child.state)

                if len(child.circular_chromosomes
                       ) != 0:  # if a circular chromosome has been created:

                    if type(operation[-1][0]) is tuple and type(
                            operation[-1][1]) is tuple:

                        for adjacency in operation[-1]:
                            if adjacency in child.circular_chromosomes[0]:
                                child.join_adjacency = adjacency

                    elif type(operation[-1][0]) is tuple:
                        if operation[-1][0] in child.circular_chromosomes[0]:
                            child.join_adjacency = operation[-1][0]
                        else:
                            print('error')

                    elif type(operation[-1][1]) is tuple:
                        if operation[-1][1] in child.circular_chromosomes[0]:
                            child.join_adjacency = operation[-1][1]
                        else:
                            print('error')

                    else:
                        #if operation[-1][0] in child.circular_chromosomes[0]:
                        if operation[-1] in child.circular_chromosomes[0]:
                            #child.join_adjacency = operation[-1][0]
                            child.join_adjacency = operation[-1]
                        else:
                            print('error')

                    hash_key = hash(str(child.state))
                    hash_table.update({hash_key: child})
                    node.children.append(child)
                    node.children_operations.append((operation, 'trp0'))
                    node.children_weights.append(0.5 * weights[1])

                    build_hash_table(child, hash_table, adjacenciesB, weights)

                else:  # else if no circular chromosome has been created:
                    child.join_adjacency = 0
                    hash_key = hash(str(child.state))
                    hash_table.update({hash_key: child})
                    node.children.append(child)

                    if op_type == 'fis':
                        operation_type = op_type
                        op_weight = 1 * weights[4]

                    elif op_type == 'fus':
                        operation_type = op_type
                        op_weight = 1 * weights[5]

                    elif op_type == 'u_trl':
                        operation_type = op_type
                        op_weight = 1 * weights[3]

                    elif op_type == 'inv':
                        operation_type = op_type
                        op_weight = 1 * weights[0]

                    elif op_type == 'b_trl':
                        operation_type = op_type
                        op_weight = 1 * weights[2]
                    else:
                        print(
                            "There's a problem at the .find_optype node function"
                        )
                        print('You have got a problem, the op type is: ',
                              op_type, '    #4')

                    node.children_weights.append(op_weight)
                    node.children_operations.append(
                        (operation, operation_type))

                    build_hash_table(child, hash_table, adjacenciesB, weights)
Exemplo n.º 6
0
def run(args):
    genomeA_file = args.source_genome
    genomeB_file = args.target_genome
    weight_ratios_file = args.ratios
    stdoutOrigin = sys.stdout
    sys.stdout = open(args.output_file, 'w')
    #outfile = open(args.output_file, 'w')
    with open(genomeA_file) as csv:
        line = [element.strip('\n').split(',') for element in csv]
    genomeA = []

    for element in line:
        element = list(map(int, element))
        genomeA.append(element)

    with open(genomeB_file) as csv:
        line = [element.strip('\n').split(',') for element in csv]
    genomeB = []

    for element in line:
        element = list(map(int, element))
        genomeB.append(element)

    with open(weight_ratios_file) as csv:
        line = [element.strip('\n').split(',') for element in csv]
    weight_ratios = []

    for element in line:
        element = list(map(int, element))
        weight_ratios.append(element)

    get_adjacencies = Extremities_and_adjacencies()
    adjacencies_genomeA = get_adjacencies.adjacencies_ordered_and_sorted(genomeA)


    adjacencies_genomeB = get_adjacencies.adjacencies_ordered_and_sorted(genomeB)

    #Create start and target node
    start_node = Node(adjacencies_genomeA)
    target_node = Node(adjacencies_genomeB)

    hash_table = {}
    hash_key_start = hash(str(start_node.state))
    hash_key_target = hash(str(target_node.state))
    hash_table.update({hash_key_start:start_node})
    hash_table.update({hash_key_target:target_node})

    #finding rearrangement weights
    max_number = max(weight_ratios[0])
    weights = []
    for element in weight_ratios[0]:
        if element == 0:
            weights.append(max_number^2)
        else:
            weights.append(max_number/element)

    New_Network_wrDCJ.build_hash_table(start_node, hash_table, adjacencies_genomeB, weights)

    network = New_Network_wrDCJ.build_network(hash_table)

    shortest_paths = (list(all_shortest_paths(network, start_node, target_node, weight='weight')))

    j = 1
    tot_b_trl = 0
    tot_u_trl = 0
    tot_inv = 0
    tot_trp1 = 0
    tot_trp2 = 0
    tot_fus = 0
    tot_fis = 0

    Paths_state = []
    Paths_state_weight = []
    # print(shortest_paths[0][4].children_weights[2])
    for path in shortest_paths:
        path_state = []
        path_state_weight = []

        i = 0
        while i < len(path):
            current = path[i]
            if i == 0:
                operation_type = 'none, this is the source genome'
                operation_weight = 'N/A'
                operation = 'N/A'
            else:
                x = path[i - 1].children.index(current)

                operation_type = path[i - 1].children_operations[x][1]
                operation_weight = path[i - 1].children_weights[x]
                operation = path[i - 1].children_operations[x][0]

            adjacencies = current.state
            genome = get_adjacencies.adjacencies_to_genome(adjacencies)
            path_state_weight.append((genome, ((operation_type, operation), operation_weight)))

            path_state.append((genome, (operation_type, operation)))

            i += 1
        Paths_state.append((path_state))
        Paths_state_weight.append(path_state_weight)

    for path in shortest_paths:

        i = 0
        b_trl = 0
        u_trl = 0
        inv = 0
        trp1 = 0
        trp2 = 0
        fus = 0
        fis = 0
        while i < len(path):

            current = path[i]
            if i == 0:
                pass
            else:
                x = path[i - 1].children.index(current)
                operation_type = path[i - 1].children_operations[x][1]
                if operation_type == 'b_trl':
                    b_trl += 1
                elif operation_type == 'u_trl':
                    u_trl += 1
                elif operation_type == 'inv':
                    inv += 1
                elif operation_type == 'trp1':
                    trp1 += 1
                elif operation_type == 'trp2':
                    trp2 += 1
                elif operation_type == 'fus':
                    fus += 1
                elif operation_type == 'fis':
                    fis += 1
            i += 1

        tot_b_trl += b_trl
        tot_u_trl += u_trl
        tot_inv += inv
        tot_trp1 += trp1
        tot_trp2 += trp2
        tot_fus += fus
        tot_fis += fis
        j += 1


    print('############################################################################################################')
    print()
    print('Source Genome: ', genomeA)
    print('Target Genome: ', genomeB)
    print()
    print('Number of most parsimonious solutions: ', len(shortest_paths))
    print()
    print('Average number of each operation per solution:')
    print('Inversions: ', int(tot_inv/len(shortest_paths)), '  Transpositions type 1: ', int(tot_trp1/len(shortest_paths)), '  Transpositions type 2: ', int(tot_trp2/len(shortest_paths)), '  Balanced translocations: ', int(tot_b_trl/len(shortest_paths)), '  Unbalanced translocations: ', int(tot_u_trl/len(shortest_paths)),
          '  Fusions: ', int(tot_fus/len(shortest_paths)),
          '  Fissions: ', int(tot_fis/len(shortest_paths)))
    print()
    print()
    print('Solutions: ')
    print()
    path_counter = 1
    for path in Paths_state:
        print('Solution number ', path_counter)
        for genome in path:
            print(genome)
        path_counter+=1
        print()
    print()
    print('############################################################################################################')



    ###############################
    # JUST FOR TESTING

    solution = [([[1, 2, 3, 4, 15], [-8, -7, 6, -5, -14, -13, -12], [9, 11],
                  [-20, -19, -18, -17, -16, -32, 10, -31, -30, -29, -28, -27], [21, 22, 23, 24, 25, 26], [-33],
                  [34, 35, 36, 37, 38, 39, 40]], ('none, this is the source genome', 'N/A')), (
                [[1, 2, 3, 4, 15], [-8, -7, -6, -5, -14, -13, -12], [9, 11],
                 [-20, -19, -18, -17, -16, -32, 10, -31, -30, -29, -28, -27], [21, 22, 23, 24, 25, 26], [-33],
                 [34, 35, 36, 37, 38, 39, 40]], ('inv', (((5.5, 6.5), (6, 7)), ((5.5, 6), (6.5, 7))))), (
                [[1, 2, 3, 4, 15], [-8, -7, -6, -5, -14, -13, -12], [9, 11], [16, 17, 18, 19, 20],
                 [21, 22, 23, 24, 25, 26], [27, 28, 29, 30, 31, -10, 32, 33], [34, 35, 36, 37, 38, 39, 40]],
                ('u_trl', (((16, 32.5), 33), ((32.5, 33), 16)))), (
                [[1, 2, 3, 4, 5, 6, 7, 8], [9, 11], [12, 13, 14, 15], [16, 17, 18, 19, 20], [21, 22, 23, 24, 25, 26],
                 [27, 28, 29, 30, 31, -10, 32, 33], [34, 35, 36, 37, 38, 39, 40]],
                ('b_trl', (((4.5, 15), (5, 14.5)), ((4.5, 5), (14.5, 15))))), (
                [[1, 2, 3, 4, 5, 6, 7, 8], [9, 11], [12, 13, 14, 15], [16, 17, 18, 19, 20], [21, 22, 23, 24, 25, 26],
                 [27, 28, 29, 30, 31, 32, 33], [34, 35, 36, 37, 38, 39, 40], ['o', 10]],
                ('trp0', (((10, 32), (10.5, 31.5)), ((10, 10.5), (31.5, 32))))), (
                [[1, 2, 3, 4, 5, 6, 7, 8], [9, 10, 11], [12, 13, 14, 15], [16, 17, 18, 19, 20],
                 [21, 22, 23, 24, 25, 26], [27, 28, 29, 30, 31, 32, 33], [34, 35, 36, 37, 38, 39, 40]],
                ('trp1', (((9.5, 11), (10, 10.5)), ((9.5, 10), (10.5, 11)))))]

    paths_operations = []
    for element in Paths_state:
        path_operations = [y for (x, y) in element]

        paths_operations.append(path_operations)

    solution_operations = [d for (c, d) in solution]

    path_types = []
    sol_types = [a for a, b in solution_operations]
    for element in paths_operations:
        types = [c for c, d in element]
        path_types.append(types)

    indexes = []
    counter = 0
    for element in path_types:
        if element == sol_types:
            indexer = path_types.index(element)
            counter += 1
            indexes.append(indexer)

    # for element in indexes:
    #     for x in Paths_state[element]:
    #         print(x)
    #     print()
    # print('*****')
    # for element in solution:
    #     print(element)

    print('sol len: ', len(solution))
    print('shortest path len: ',len(shortest_paths[0]))
    print('counter', counter)

    print('And the answer is... ', solution_operations in paths_operations)

    print('Source genome: ',genomeA)
    print('Target genome: ', genomeB)
    print()
    print('Solution: ', solution)
    ##########################################################################################################
    sys.stdout.close()
    sys.stdout=stdoutOrigin
Exemplo n.º 7
0
def run(args):
    genomeA_file = args.source_genome
    genomeB_file = args.target_genome
    weight_ratios_file = args.ratios
    stdoutOrigin = sys.stdout
    sys.stdout = open(args.output_file, 'w')
    #outfile = open(args.output_file, 'w')
    with open(genomeA_file) as csv:
        line = [element.strip('\n').split(',') for element in csv]
    genomeA = []

    for element in line:
        element = list(map(int, element))
        genomeA.append(element)

    with open(genomeB_file) as csv:
        line = [element.strip('\n').split(',') for element in csv]
    genomeB = []

    for element in line:
        element = list(map(int, element))
        genomeB.append(element)

    with open(weight_ratios_file) as csv:
        line = [element.strip('\n').split(',') for element in csv]
    weight_ratios = []

    for element in line:
        element = list(map(int, element))
        weight_ratios.append(element)

    get_adjacencies = Extremities_and_adjacencies()
    adjacencies_genomeA = get_adjacencies.adjacencies_ordered_and_sorted(genomeA)


    adjacencies_genomeB = get_adjacencies.adjacencies_ordered_and_sorted(genomeB)

    #Create start and target node
    start_node = Node(adjacencies_genomeA)
    target_node = Node(adjacencies_genomeB)

    hash_table = {}
    hash_key_start = hash(str(start_node.state))
    hash_key_target = hash(str(target_node.state))
    hash_table.update({hash_key_start:start_node})
    hash_table.update({hash_key_target:target_node})

    #finding rearrangement weights
    max_number = max(weight_ratios[0])
    weights = []
    for element in weight_ratios[0]:
        if element == 0:
            weights.append(max_number^2)
        else:
            weights.append(max_number/element)

    New_Network_wrDCJ.build_hash_table(start_node, hash_table, adjacencies_genomeB, weights)

    network = New_Network_wrDCJ.build_network(hash_table)

    shortest_paths = (list(all_shortest_paths(network, start_node, target_node, weight='weight')))

    j = 1
    tot_b_trl = 0
    tot_u_trl = 0
    tot_inv = 0
    tot_trp1 = 0
    tot_trp2 = 0
    tot_fus = 0
    tot_fis = 0

    Paths_state = []
    Paths_state_weight = []
    # print(shortest_paths[0][4].children_weights[2])
    for path in shortest_paths:
        path_state = []
        path_state_weight = []

        i = 0
        while i < len(path):
            current = path[i]
            if i == 0:
                operation_type = 'none, this is the source genome'
                operation_weight = 'N/A'
                operation = 'N/A'
            else:
                x = path[i - 1].children.index(current)

                operation_type = path[i - 1].children_operations[x][1]
                operation_weight = path[i - 1].children_weights[x]
                operation = path[i - 1].children_operations[x][0]

            adjacencies = current.state
            genome = get_adjacencies.adjacencies_to_genome(adjacencies)
            path_state_weight.append((genome, ((operation_type, operation), operation_weight)))

            path_state.append((genome, (operation_type, operation)))

            i += 1
        Paths_state.append((path_state))
        Paths_state_weight.append(path_state_weight)

    for path in shortest_paths:

        i = 0
        b_trl = 0
        u_trl = 0
        inv = 0
        trp1 = 0
        trp2 = 0
        fus = 0
        fis = 0
        while i < len(path):

            current = path[i]
            if i == 0:
                pass
            else:
                x = path[i - 1].children.index(current)
                operation_type = path[i - 1].children_operations[x][1]
                if operation_type == 'b_trl':
                    b_trl += 1
                elif operation_type == 'u_trl':
                    u_trl += 1
                elif operation_type == 'inv':
                    inv += 1
                elif operation_type == 'trp1':
                    trp1 += 1
                elif operation_type == 'trp2':
                    trp2 += 1
                elif operation_type == 'fus':
                    fus += 1
                elif operation_type == 'fis':
                    fis += 1
            i += 1

        tot_b_trl += b_trl
        tot_u_trl += u_trl
        tot_inv += inv
        tot_trp1 += trp1
        tot_trp2 += trp2
        tot_fus += fus
        tot_fis += fis
        j += 1


    print('############################################################################################################')
    print()
    print('Source Genome: ', genomeA)
    print('Target Genome: ', genomeB)
    print()
    print('Number of most parsimonious solutions: ', len(shortest_paths))
    print()
    print('Average number of operations per solution: ', float(tot_inv/len(shortest_paths))+float(tot_trp1/len(shortest_paths))+float(2*(tot_trp2/len(shortest_paths)))+float(tot_b_trl/len(shortest_paths))+float(tot_u_trl/len(shortest_paths))+float(tot_fis/len(shortest_paths))+float(tot_fus/len(shortest_paths)))
    print()
    print('Average number of each operation per solution:')
    print('Inversions: ', float(tot_inv/len(shortest_paths)), '  Transpositions type 1: ', float(tot_trp1/len(shortest_paths)), '  Transpositions type 2: ', float(tot_trp2/len(shortest_paths)), '  Balanced translocations: ', float(tot_b_trl/len(shortest_paths)), '  Unbalanced translocations: ', float(tot_u_trl/len(shortest_paths)),
          '  Fusions: ', float(tot_fus/len(shortest_paths)),
          '  Fissions: ', float(tot_fis/len(shortest_paths)))
    print()
    print()
    print('Solutions: ')
    print()
    path_counter = 1
    for path in Paths_state:
        print('Solution number ', path_counter)
        for genome in path:
            print(genome)
        path_counter+=1
        print()
    print()
    print('############################################################################################################')

    sys.stdout.close()
    sys.stdout=stdoutOrigin