Exemplo n.º 1
0
    def __init__(self, genomes, name, log_file, tl):
        if len(genomes) != 3:
            raise Exception("Incorrect number of genomes for median problems")

        self.gene_sets = [
            set(genome.get_gene_multiset().keys()) for genome in genomes
        ]
        self.s_all_genes = complete_genes_multiset(
            reduce(operator.or_, self.gene_sets, set()), 1)

        self.cbg_vertex_set = vertex_set_from_gene_multiset(self.s_all_genes)
        self.cbg_ind2vertex = [''] + [u for u in self.cbg_vertex_set]
        self.cbg_vertex2ind = {
            self.cbg_ind2vertex[i]: i
            for i in range(1, len(self.cbg_ind2vertex))
        }

        obverse_edges = observed_edges_from_gene_multiset(self.s_all_genes)
        self.ind_cbg_obverse_edges = {
            tuple(sorted((self.cbg_vertex2ind[u], self.cbg_vertex2ind[v])))
            for u, v in obverse_edges
        }

        genome_graphs = [
            genome.convert_to_genome_graph() for genome in genomes
        ]
        self.ind_cbg_p_i_vertex_sets = [{
            self.cbg_vertex2ind[u]
            for u in vertex_set_from_gene_multiset(
                complete_genes_multiset(gene_set, 1))
        } for gene_set in self.gene_sets]
        self.ind_cbg_p_i_edges = [{
            tuple(sorted((self.cbg_vertex2ind[u], self.cbg_vertex2ind[v])))
            for u, v in matching
        } for matching, _ in genome_graphs]
        self.ind_cbg_p_i_telomers = [{
            self.cbg_vertex2ind[u]
            for u in telomers
        } for _, telomers in genome_graphs]

        self.ind_ancestral_set = set(
            reduce(operator.or_, [
                self.ind_cbg_p_i_vertex_sets[i]
                & self.ind_cbg_p_i_vertex_sets[j]
                for i, j in itertools.combinations(range(len(genomes)), 2)
            ], set()))
        self.ancestral_gene_set = reduce(operator.or_, [
            self.gene_sets[i] & self.gene_sets[j]
            for i, j in itertools.combinations(range(len(self.gene_sets)), 2)
        ], set())

        self.number_of_genomes = len(genomes)
        self.biggest_const = len(self.cbg_ind2vertex)
        self.name_model = name
        self.log_file = log_file
        self.time_limit = tl
Exemplo n.º 2
0
    def __init__(self,
                 duplicated_genome,
                 ordinary_genomes,
                 name,
                 log_file,
                 tl,
                 mult=2):
        if len(ordinary_genomes) != 1:
            raise Exception(
                "Incorrect number of genomes for guided halving problems")

        if mult < 2 or mult > 3:
            raise Exception("Unsupported multiplication of a genome")

        self.gene_sets = [
            set(genome.get_gene_multiset().keys())
            for genome in ordinary_genomes
        ]
        self.genes_of_dupl_genome = duplicated_genome.get_gene_multiset()

        self.s_all_genes = complete_genes_multiset(
            reduce(operator.or_, self.gene_sets, set())
            | self.genes_of_dupl_genome.keys(), 1)

        # Coding contracted breakpoint graph
        self.cbg_vertex_set = vertex_set_from_gene_multiset(self.s_all_genes)
        self.cbg_ind2vertex = [''] + [u for u in self.cbg_vertex_set]
        self.cbg_vertex2ind = {
            self.cbg_ind2vertex[i]: i
            for i in range(1, len(self.cbg_ind2vertex))
        }

        obverse_edges = observed_edges_from_gene_multiset(self.s_all_genes)
        self.ind_cbg_obverse_edges = {
            tuple(sorted((self.cbg_vertex2ind[u], self.cbg_vertex2ind[v])))
            for u, v in obverse_edges
        }

        genome_graphs = [
            genome.convert_to_genome_graph() for genome in ordinary_genomes
        ]
        self.ind_cbg_p_i_vertex_sets = [{
            self.cbg_vertex2ind[u]
            for u in vertex_set_from_gene_multiset(
                complete_genes_multiset(gene_set, 1))
        } for gene_set in self.gene_sets]
        self.ind_cbg_p_i_edges = [{
            tuple(sorted((self.cbg_vertex2ind[u], self.cbg_vertex2ind[v])))
            for u, v in matching
        } for matching, _ in genome_graphs]
        self.ind_cbg_p_i_telomers = [{
            self.cbg_vertex2ind[u]
            for u in telomers
        } for _, telomers in genome_graphs]

        # This contracted genome graph does not contain parallel edges. Maybe a problem.
        cbg_A_matching, cbg_A_telomers = duplicated_genome.convert_to_contracted_genome_graph(
        )
        self.ind_cbg_A_vertices = {
            self.cbg_vertex2ind[u]
            for u in vertex_set_from_gene_multiset(
                complete_genes_multiset(self.genes_of_dupl_genome.keys(), 1))
        }
        self.ind_cbg_A_edges = {
            tuple(sorted((self.cbg_vertex2ind[u], self.cbg_vertex2ind[v])))
            for u, v in cbg_A_matching
        }
        self.ind_cbg_A_telomers = {
            self.cbg_vertex2ind[u]
            for u in cbg_A_telomers
        }

        self.ms_all_genes = complete_genes_multiset(
            reduce(operator.or_, self.gene_sets, set())
            | self.genes_of_dupl_genome.keys(), mult)

        # Coding breakpoint graph
        self.bg_vertex_set = vertex_set_from_gene_multiset(self.ms_all_genes)
        self.bg_ind2vertex = [''] + [u for u in self.bg_vertex_set]
        self.bg_vertex2ind = {
            self.bg_ind2vertex[i]: i
            for i in range(1, len(self.bg_ind2vertex))
        }

        bg_A_matching, bg_A_telomers = duplicated_genome.convert_to_genome_graph(
        )
        self.ind_bg_A_vertices = {
            self.bg_vertex2ind[u]
            for u in vertex_set_from_gene_multiset(self.genes_of_dupl_genome)
        }
        self.ind_bg_A_edges = {
            tuple(sorted((self.bg_vertex2ind[u], self.bg_vertex2ind[v])))
            for u, v in bg_A_matching
        }
        self.ind_bg_A_telomers = {self.bg_vertex2ind[u] for u in bg_A_telomers}

        # Connection between graphs
        self.equiv_map = define_equiv_function(self.ms_all_genes,
                                               self.cbg_vertex2ind,
                                               self.bg_vertex2ind)

        # TODO: HERE need to extend for indels
        self.ind_ancestral_set = self.ind_cbg_p_i_vertex_sets[0]
        self.ancestral_gene_set = self.gene_sets[0]

        self.multiplicity = mult
        self.number_of_genomes = len(ordinary_genomes)
        self.biggest_const = len(self.bg_ind2vertex)
        self.name_model = name
        self.log_file = log_file
        self.time_limit = tl

        flag = False
        if len(self.ind_bg_A_telomers) != 0:
            flag = True

        for i in range(self.number_of_genomes):
            if len(self.ind_cbg_p_i_telomers[i]) != 0:
                flag = True
        self.allowable_ancestral_telomers = {
            x: flag
            for x in self.ind_ancestral_set
        }

        graph = nx.MultiGraph()
        graph.add_nodes_from(self.cbg_vertex2ind.values())
        for edges in self.ind_cbg_A_edges:
            for u, v in edges:
                graph.add_edge(u, v)

        self.number_of_even_cycles = 0
        self.number_of_even_paths = 0
        vertex_sets_for_ancestral_edges = []
        meta_vertex_set_for_edges = []
        for component in nx.connected_component_subgraphs(graph):
            isCycle = True

            for v in component.nodes():
                if graph.degree(v) != 2:
                    isCycle = False

            if isCycle and len(component.nodes()) % 2 == 0:
                self.number_of_even_cycles += 1
                vertex_sets_for_ancestral_edges.append(list(component.nodes()))
            elif len(component.nodes()) % 2 != 0 and not isCycle:
                self.number_of_even_paths += 1
                meta_vertex_set_for_edges.extend(component.nodes())
            else:
                meta_vertex_set_for_edges.extend(component.nodes())
        vertex_sets_for_ancestral_edges.append(meta_vertex_set_for_edges)

        self.allowable_ancestral_edges = set()
        self.connection_ancestral_constrs = dict()
        for v_set in vertex_sets_for_ancestral_edges:
            self.allowable_ancestral_edges.update({
                tuple(sorted([u, v]))
                for u, v in itertools.combinations(v_set, 2)
            })
            self.connection_ancestral_constrs.update({
                u: {tuple(sorted((u, v)))
                    for v in v_set if u != v}
                for u in v_set
            })
Exemplo n.º 3
0
    def __init__(self, genomes, name, log_file, tl):
        if len(genomes) != 3:
            raise Exception("Incorrect number of genomes for median problem")

        self.gene_sets = [
            set(genome.get_gene_multiset().keys()) for genome in genomes
        ]
        self.s_all_genes = complete_genes_multiset(
            reduce(operator.or_, self.gene_sets, set()), 1)

        self.cbg_vertex_set = vertex_set_from_gene_multiset(self.s_all_genes)
        self.cbg_ind2vertex = [''] + [u for u in self.cbg_vertex_set]
        self.cbg_vertex2ind = {
            self.cbg_ind2vertex[i]: i
            for i in range(1, len(self.cbg_ind2vertex))
        }

        obverse_edges = observed_edges_from_gene_multiset(self.s_all_genes)
        self.ind_cbg_obverse_edges = {
            tuple(sorted((self.cbg_vertex2ind[u], self.cbg_vertex2ind[v])))
            for u, v in obverse_edges
        }

        genome_graphs = [
            genome.convert_to_genome_graph() for genome in genomes
        ]
        self.ind_cbg_p_i_vertex_sets = [{
            self.cbg_vertex2ind[u]
            for u in vertex_set_from_gene_multiset(
                complete_genes_multiset(gene_set, 1))
        } for gene_set in self.gene_sets]
        self.ind_cbg_p_i_edges = [{
            tuple(sorted((self.cbg_vertex2ind[u], self.cbg_vertex2ind[v])))
            for u, v in matching
        } for matching, _ in genome_graphs]
        self.ind_cbg_p_i_telomers = [{
            self.cbg_vertex2ind[u]
            for u in telomers
        } for _, telomers in genome_graphs]

        self.ind_ancestral_set = set(
            reduce(operator.or_, [
                self.ind_cbg_p_i_vertex_sets[i]
                & self.ind_cbg_p_i_vertex_sets[j]
                for i, j in itertools.combinations(range(len(genomes)), 2)
            ], set()))

        self.ancestral_gene_set = reduce(operator.or_, [
            self.gene_sets[i] & self.gene_sets[j]
            for i, j in itertools.combinations(range(len(self.gene_sets)), 2)
        ], set())

        self.number_of_genomes = len(genomes)
        self.biggest_const = len(self.cbg_ind2vertex)
        self.name_model = name
        self.log_file = log_file
        self.time_limit = tl

        flag = False
        for i in range(3):
            if len(self.ind_cbg_p_i_telomers[i]) != 0:
                flag = True

        self.allowable_telomers = {x: flag for x in self.ind_ancestral_set}

        graph = nx.MultiGraph()
        graph.add_nodes_from(self.cbg_vertex2ind.values())
        for edges in self.ind_cbg_p_i_edges:
            for u, v in edges:
                graph.add_edge(u, v)

        self.allowable_ancestral_edges = set()
        self.connection_constrs = dict()
        for component in nx.connected_component_subgraphs(graph):
            for v in component.nodes():
                connect_indices = set()
                for u in component.nodes():
                    if u != v:
                        self.allowable_ancestral_edges.add(
                            tuple(sorted((u, v))))
                        connect_indices.add(tuple(sorted((u, v))))
                self.connection_constrs[v] = connect_indices

        self.number_of_cycles = 0
        self.number_of_even_paths = 0

        for component in nx.connected_component_subgraphs(graph):
            isCycle = True

            for v in component.nodes():
                if graph.degree(v) != 2:
                    isCycle = False

            if isCycle:
                self.number_of_cycles += 1
            elif (len(component.nodes()) - 1) % 2 == 0:
                self.number_of_even_paths += 1
Exemplo n.º 4
0
    def __init__(self,
                 duplicated_genome,
                 ordinary_genomes,
                 name,
                 log_file,
                 tl,
                 mult=2):
        if len(ordinary_genomes) != 1:
            raise Exception(
                "Incorrect number of genomes for guided halving problems")

        if mult < 2 or mult > 3:
            raise Exception("Unsupported multiplication of a genome")

        self.gene_sets = [
            set(genome.get_gene_multiset().keys())
            for genome in ordinary_genomes
        ]
        self.genes_of_dupl_genome = duplicated_genome.get_gene_multiset()

        self.s_all_genes = complete_genes_multiset(
            reduce(operator.or_, self.gene_sets, set())
            | self.genes_of_dupl_genome.keys(), 1)
        # Coding contracted breakpoint graph
        self.cbg_vertex_set = vertex_set_from_gene_multiset(self.s_all_genes)
        self.cbg_ind2vertex = [''] + [u for u in self.cbg_vertex_set]
        self.cbg_vertex2ind = {
            self.cbg_ind2vertex[i]: i
            for i in range(1, len(self.cbg_ind2vertex))
        }

        obverse_edges = observed_edges_from_gene_multiset(self.s_all_genes)
        self.ind_cbg_obverse_edges = {
            tuple(sorted((self.cbg_vertex2ind[u], self.cbg_vertex2ind[v])))
            for u, v in obverse_edges
        }

        genome_graphs = [
            genome.convert_to_genome_graph() for genome in ordinary_genomes
        ]
        self.ind_cbg_p_i_vertex_sets = [{
            self.cbg_vertex2ind[u]
            for u in vertex_set_from_gene_multiset(
                complete_genes_multiset(gene_set, 1))
        } for gene_set in self.gene_sets]
        self.ind_cbg_p_i_edges = [{
            tuple(sorted((self.cbg_vertex2ind[u], self.cbg_vertex2ind[v])))
            for u, v in matching
        } for matching, _ in genome_graphs]
        self.ind_cbg_p_i_telomers = [{
            self.cbg_vertex2ind[u]
            for u in telomers
        } for _, telomers in genome_graphs]

        # This contracted genome graph does not contain parallel edges. Maybe a problem.
        cbg_A_matching, cbg_A_telomers = duplicated_genome.convert_to_contracted_genome_graph(
        )
        self.ind_cbg_A_vertices = {
            self.cbg_vertex2ind[u]
            for u in vertex_set_from_gene_multiset(
                complete_genes_multiset(self.genes_of_dupl_genome.keys(), 1))
        }
        self.ind_cbg_A_edges = {
            tuple(sorted((self.cbg_vertex2ind[u], self.cbg_vertex2ind[v])))
            for u, v in cbg_A_matching
        }
        self.ind_cbg_A_telomers = {
            self.cbg_vertex2ind[u]
            for u in cbg_A_telomers
        }

        self.ms_all_genes = complete_genes_multiset(
            reduce(operator.or_, self.gene_sets, set())
            | self.genes_of_dupl_genome.keys(), mult)
        # Coding breakpoint graph
        self.bg_vertex_set = vertex_set_from_gene_multiset(self.ms_all_genes)
        self.bg_ind2vertex = [''] + [u for u in self.bg_vertex_set]
        self.bg_vertex2ind = {
            self.bg_ind2vertex[i]: i
            for i in range(1, len(self.bg_ind2vertex))
        }

        bg_A_matching, bg_A_telomers = duplicated_genome.convert_to_genome_graph(
        )
        self.ind_bg_A_vertices = {
            self.bg_vertex2ind[u]
            for u in vertex_set_from_gene_multiset(self.genes_of_dupl_genome)
        }
        self.ind_bg_A_edges = {
            tuple(sorted((self.bg_vertex2ind[u], self.bg_vertex2ind[v])))
            for u, v in bg_A_matching
        }
        self.ind_bg_A_telomers = {self.bg_vertex2ind[u] for u in bg_A_telomers}

        # Connection between graphs
        self.equiv_map = define_equiv_function(self.ms_all_genes,
                                               self.cbg_vertex2ind,
                                               self.bg_vertex2ind)

        # TODO: HERE need to extend for indels
        self.ind_ancestral_set = self.ind_cbg_p_i_vertex_sets[0]
        self.ancestral_gene_set = self.gene_sets[0]

        self.multiplicity = mult
        self.number_of_genomes = len(ordinary_genomes)
        self.biggest_const = len(self.bg_ind2vertex)
        self.name_model = name
        self.log_file = log_file
        self.time_limit = tl
Exemplo n.º 5
0
    def __init__(self,
                 ordinary_genome,
                 duplicated_genome,
                 name,
                 log_file,
                 tl,
                 mult=2):
        if mult < 2 or mult > 3:
            raise Exception("Unsupported multiplication of a genome")

        self.multiplicity = mult
        self.genes_of_dupl_genome = duplicated_genome.get_gene_multiset()
        self.genes_of_ord_genome = ordinary_genome.get_gene_multiset()

        self.s_all_genes = complete_genes_multiset(
            self.genes_of_ord_genome.keys() | self.genes_of_dupl_genome.keys(),
            1)
        self.ms_all_genes = complete_genes_multiset(
            self.genes_of_ord_genome.keys() | self.genes_of_dupl_genome.keys(),
            mult)

        # Coding breakpoint graph
        self.bg_vertex_set = vertex_set_from_gene_multiset(self.ms_all_genes)
        self.bg_ind2vertex = [''] + [u for u in self.bg_vertex_set]
        self.bg_vertex2ind = {
            self.bg_ind2vertex[i]: i
            for i in range(1, len(self.bg_ind2vertex))
        }

        bg_A_matching, bg_A_telomers = duplicated_genome.convert_to_genome_graph(
        )
        self.ind_bg_A_vertices = {
            self.bg_vertex2ind[u]
            for u in vertex_set_from_gene_multiset(self.genes_of_dupl_genome)
        }
        self.ind_bg_A_edges = {
            tuple(sorted((self.bg_vertex2ind[u], self.bg_vertex2ind[v])))
            for u, v in bg_A_matching
        }
        self.ind_bg_A_telomers = {self.bg_vertex2ind[u] for u in bg_A_telomers}

        # Coding contracted breakpoint graph
        self.cbg_vertex_set = vertex_set_from_gene_multiset(self.s_all_genes)
        self.cbg_ind2vertex = [''] + [u for u in self.cbg_vertex_set]
        self.cbg_vertex2ind = {
            self.cbg_ind2vertex[i]: i
            for i in range(1, len(self.cbg_ind2vertex))
        }

        cbg_R_matching, cbg_R_telomers = ordinary_genome.convert_to_genome_graph(
        )
        self.ind_cbg_R_vertices = {
            self.cbg_vertex2ind[u]
            for u in vertex_set_from_gene_multiset(self.genes_of_ord_genome)
        }
        self.ind_cbg_R_edges = {
            tuple(sorted((self.cbg_vertex2ind[u], self.cbg_vertex2ind[v])))
            for u, v in cbg_R_matching
        }
        self.ind_cbg_R_telomers = {
            self.cbg_vertex2ind[u]
            for u in cbg_R_telomers
        }

        self.equiv_map = define_equiv_function(self.ms_all_genes,
                                               self.cbg_vertex2ind,
                                               self.bg_vertex2ind)

        # Coding completion of genomes
        self.ind_compl_A = set(
            self.bg_vertex2ind.values()) - self.ind_bg_A_vertices
        self.ind_compl_R = set(self.bg_vertex2ind.values()) - reduce(
            operator.or_,
            [set(self.equiv_map[v]) for v in self.ind_cbg_R_vertices], set())

        # hat_V(X)\V(R)
        self.ind_cbg_compl_R = set(
            self.cbg_vertex2ind.values()) - self.ind_cbg_R_vertices

        # J_0(2R)
        self.ind_bg_2R_telomers = reduce(
            operator.or_,
            [set(self.equiv_map[v]) for v in self.ind_cbg_R_telomers], set())

        # J_1(A)
        self.ind_bg_A_nontelomers = self.ind_bg_A_vertices - self.ind_bg_A_telomers

        # J^1
        self.ind_first_type_telomers = self.ind_bg_2R_telomers - self.ind_bg_A_vertices
        # J^2
        self.ind_second_type_telomers = self.ind_compl_A - self.ind_bg_2R_telomers
        # J^3
        self.ind_third_type_telomers = self.ind_bg_A_nontelomers & self.ind_bg_2R_telomers
        # J^4
        self.ind_forth_type_telomers = self.ind_bg_A_telomers - self.ind_bg_2R_telomers

        self.biggest_const = len(self.bg_ind2vertex)
        self.name_model = name
        self.log_file = log_file
        self.time_limit = tl