def testEquals(self):
     mite = Mite("ABC", "4", "+", "132", "154", "NULL", "Unclassified",
                 "Unclassified", "3")
     gene = Gene("gABC", "4", "+", "132", "154", "5", "3")
     pair1 = MiteGenePair(mite, gene)
     pair2 = MiteGenePair(mite, gene)
     self.assertEqual(pair1, pair2)
 def testToRowMiteInGene(self):
     mite = Mite("ABC", "4", "+", "132", "154", "NULL", "Unclassified",
                 "Unclassified", "3")
     gene = Gene("gABC", "4", "+", "120", "160", "5", "3")
     pair = MiteGenePair(mite, gene)
     csv_row_string = pair.to_csv_row()
     expected_string = "ABC,4,+,132,154,3,,,,gABC,4,+,120,160,3,5,0"
     self.assertEqual(expected_string, csv_row_string)
 def testNotEquals(self):
     mite1 = Mite("ABC", "4", "+", "132", "154", "NULL", "Unclassified",
                  "Unclassified", "3")
     gene1 = Gene("gABC", "4", "+", "132", "154", "5", "3")
     mite2 = Mite("ABC", "3", "+", "132", "154", "NULL", "Unclassified",
                  "Unclassified", "3")
     gene2 = Gene("gABC", "4", "+", "132", "154", "5", "3")
     pair1 = MiteGenePair(mite1, gene1)
     pair2 = MiteGenePair(mite2, gene2)
     self.assertNotEqual(pair1, pair2)
    def test_pair_mites_correctly_separates_by_genome_location(self):
        mite1 = Mite("ABC1", "1", "+", "1", "10", "NULL", "AA", "A", "1")
        mite2 = Mite("ABC2", "1", "-", "21", "30", "NULL", "AA", "A", "1")
        mite3 = Mite("ABC3", "2", "+", "41", "50", "NULL", "AA", "A", "1")
        mite4 = Mite("ABC4", "2", "-", "61", "70", "NULL", "AA", "A", "1")
        mite5 = Mite("ABC5", "1", "+", "81", "90", "NULL", "AA", "A", "2")
        mite6 = Mite("ABC6", "1", "-", "101", "110", "NULL", "AA", "A", "2")
        mite7 = Mite("ABC7", "2", "+", "121", "130", "NULL", "AA", "A", "2")
        mite8 = Mite("ABC8", "2", "-", "141", "150", "NULL", "AA", "A", "2")

        gene1 = Gene("gABC1", "1", "+", "126", "135", "5", "1")
        gene2 = Gene("gABC2", "1", "-", "146", "155", "5", "1")
        gene3 = Gene("gABC3", "2", "+", "6", "15", "5", "1")
        gene4 = Gene("gABC4", "2", "-", "26", "35", "5", "1")
        gene5 = Gene("gABC5", "1", "+", "46", "55", "5", "2")
        gene6 = Gene("gABC6", "1", "-", "66", "75", "5", "2")
        gene7 = Gene("gABC7", "2", "+", "86", "95", "5", "2")
        gene8 = Gene("gABC8", "2", "-", "106", "115", "5", "2")

        mites = [mite1, mite2, mite3, mite4, mite5, mite6, mite7, mite8]
        genes = [gene1, gene2, gene3, gene4, gene5, gene6, gene7, gene8]

        pairs = self.pairer.pair_mites_with_genes(mites, genes)
        expected_pairs = [
            MiteGenePair(mite1, gene1),
            MiteGenePair(mite2, gene1),
            MiteGenePair(mite3, gene4),
            MiteGenePair(mite4, gene4),
            MiteGenePair(mite5, gene6),
            MiteGenePair(mite6, gene6),
            MiteGenePair(mite7, gene8),
            MiteGenePair(mite8, gene8)
        ]
        self.assertEqual(pairs, expected_pairs)
Exemplo n.º 5
0
def main():
    mites_path = "../../data/all_mites.csv"
    genes_path = "../../data/all_genes.csv"

    mites_parser = MiteCSVParser()
    genes_parser = GeneCSVParser()
    pairer = MitePairer()

    start_time = time.time()
    mites = mites_parser.parse_csv(mites_path)
    end_time = time.time()
    print("Mite CSV parsing took " + str(end_time - start_time) + "s")

    start_time = time.time()
    genes = genes_parser.parse_csv(genes_path)
    end_time = time.time()
    print("Gene CSV parsing took " + str(end_time - start_time) + "s")

    start_time = time.time()
    pairs = pairer.pair_mites_with_genes(mites, genes)
    end_time = time.time()
    print("Finding mite-gene pairs took " + str(end_time - start_time) + "s")

    start_time = time.time()
    output_path = "../../results/mite_gene_pairs.csv"
    output_file = open(output_path, mode="w+")
    output_file.write(MiteGenePair.csv_headers() + "\n")

    for pair in pairs:
        output_file.write(pair.to_csv_row() + "\n")

    output_file.close()

    end_time = time.time()
    print("Writing mite-gene pairs took " + str(end_time - start_time) + "s")
Exemplo n.º 6
0
    def pair_mites_with_genes(self, mites, genes):
        pairs = []
        indexer = GeneticElementIndex()
        all_elements = mites + genes
        index = indexer.index_elements(all_elements, by_name=True)
        indexed_list = indexer.index_elements(all_elements, by_name=False)

        for genome_key in indexed_list:
            genome = indexed_list[genome_key]
            for chromosome_key in genome:
                elements = genome[chromosome_key].copy()
                elements.sort(key=lambda e: e.start)
                for idx in range(len(elements)):
                    element = elements[idx]
                    if element.type != CONSTANTS.mite_type:
                        continue

                    correlated_gene_id = element.correlated_gene
                    index_bin = index[genome_key][chromosome_key]
                    if correlated_gene_id is not None and correlated_gene_id in index_bin and index_bin[
                            correlated_gene_id].type == CONSTANTS.gene_type:
                        correlated_gene = index_bin[correlated_gene_id]
                        pair = MiteGenePair(element, correlated_gene)
                        pairs.append(pair)
                        continue

                    pair = self._get_closest_mite_gene_pair(
                        elements, element, idx)

                    if pair is not None:
                        pairs.append(pair)

        return pairs
    def test_pair_mites_with_genes_results_independent_of_strand(self):
        mite1 = Mite("ABC", "4", "+", "132", "154", "NULL", "AA", "A", "3")
        mite2 = Mite("ABC1", "4", "+", "0", "15", "NULL", "AA", "A", "3")
        mite3 = Mite("ABC2", "4", "-", "190", "200", "NULL", "AA", "A", "3")
        mite4 = Mite("ABC3", "4", "-", "50", "65", "NULL", "AA", "A", "3")

        gene1 = Gene("gABC", "4", "-", "100", "115", "5", "3")
        gene2 = Gene("gBBC", "4", "+", "160", "170", "5", "3")
        mites = [mite1, mite2, mite3, mite4]
        genes = [gene1, gene2]
        pairs = self.pairer.pair_mites_with_genes(mites, genes)
        expected_pairs = [
            MiteGenePair(mite2, gene1),
            MiteGenePair(mite4, gene1),
            MiteGenePair(mite1, gene2),
            MiteGenePair(mite3, gene2)
        ]
        self.assertEqual(pairs, expected_pairs)
    def test_pair_mites_uses_heuristics_if_correlated_not_exist(self):
        #go back to heuristics if we can't find the correlated gene
        mite1 = Mite("ABC", "4", "+", "132", "154", "ABC", "AA", "A", "3")

        gene1 = Gene("gDBC", "4", "+", "0", "10", "5", "3")
        gene2 = Gene("gBBC", "4", "+", "155", "170", "5", "3")
        gene3 = Gene("gCBC", "4", "+", "100", "143", "5", "3")

        mites = [mite1]
        genes = [gene1, gene2, gene3]
        pairs = self.pairer.pair_mites_with_genes(mites, genes)
        expected_pairs = [MiteGenePair(mite1, gene3)]
        self.assertEqual(pairs, expected_pairs)
    def test_pair_mites_uses_correlated_gene_if_provided(self):
        #we assume that correlated_gene was filled for a reason so we ignore our distance heuristic or any other heuristic
        mite1 = Mite("ABC", "4", "+", "132", "154", "gABC", "AA", "A", "3")

        gene1 = Gene("gABC", "4", "+", "0", "10", "5", "3")
        gene2 = Gene("gBBC", "4", "+", "155", "170", "5", "3")
        gene3 = Gene("gCBC", "4", "+", "100", "143", "5", "3")

        mites = [mite1]
        genes = [gene1, gene2, gene3]
        pairs = self.pairer.pair_mites_with_genes(mites, genes)
        expected_pairs = [MiteGenePair(mite1, gene1)]
        self.assertEqual(pairs, expected_pairs)
Exemplo n.º 10
0
    def test_pair_mites_prefers_nested_elements_input_order_doesnt_matter(
            self):
        #sanity check
        mite1 = Mite("ABC", "4", "+", "132", "154", "NULL", "AA", "A", "3")

        gene1 = Gene("gABC", "4", "+", "100", "143", "5", "3")
        gene2 = Gene("gBBC", "4", "+", "155", "170", "5", "3")

        mites = [mite1]
        genes = [gene2, gene1]
        pairs = self.pairer.pair_mites_with_genes(mites, genes)
        expected_pairs = [MiteGenePair(mite1, gene1)]
        self.assertEqual(pairs, expected_pairs)
Exemplo n.º 11
0
    def test_pair_mites_prefers_nested_elements(self):
        #we assume that if a mite is nested in a gene (partially or completely), then it's associated with the gene
        #so we pair the two elements together rather than another gene that's not nested but may be closer
        mite1 = Mite("ABC", "4", "+", "132", "154", "NULL", "AA", "A", "3")

        gene1 = Gene("gABC", "4", "+", "100", "143", "5", "3")
        gene2 = Gene("gBBC", "4", "+", "155", "170", "5", "3")

        mites = [mite1]
        genes = [gene1, gene2]
        pairs = self.pairer.pair_mites_with_genes(mites, genes)
        expected_pairs = [MiteGenePair(mite1, gene1)]
        self.assertEqual(pairs, expected_pairs)
Exemplo n.º 12
0
    def _get_closest_mite_gene_pair(self, elements, mite, idx):
        left_gene = self._nearest_gene_left_of_idx(elements, idx)
        right_gene = self._nearest_gene_right_of_idx(elements, idx)

        if left_gene is not None and right_gene is None:
            pair = MiteGenePair(mite, left_gene)
        elif left_gene is None and right_gene is not None:
            pair = MiteGenePair(mite, right_gene)
        elif left_gene is not None and right_gene is not None:
            if self._is_contained_in(left_gene, mite):
                pair = MiteGenePair(mite, left_gene)
            elif self._is_contained_in(right_gene, mite):
                pair = MiteGenePair(mite, right_gene)
            else:
                left_distance = mite.start - left_gene.end
                right_distance = right_gene.start - mite.end
                if left_distance < right_distance:
                    pair = MiteGenePair(mite, left_gene)
                else:
                    pair = MiteGenePair(mite, right_gene)
        else:
            pair = None
        return pair
 def testHeaders(self):
     headers = MiteGenePair.csv_headers()
     expected_string = "mite_name,mite_chromosome,mite_strand,mite_start,mite_end,mite_genome_id," \
                       "mite_correlated_gene,mite_family,mite_superfamily,gene_name,gene_chromosome,gene_strand," \
                       "gene_start,gene_end,gene_genome_id,gene_exon_count,distance"
     self.assertEqual(expected_string, headers)