Exemplo n.º 1
0
 def setUp(self):
     self.g = Goldilocks(NucleotideCounterStrategy(
         ["A", "C", "G", "T", "N"]),
                         sequence_data,
                         length=3,
                         stride=1)
     self.TOTAL_REGIONS = 29
Exemplo n.º 2
0
from goldilocks.goldilocks import Goldilocks
from goldilocks.strategies import VariantCounterStrategy, GCRatioStrategy, NucleotideCounterStrategy, KMerCounterStrategy

#TODO Methods may take a list of locations or may need to actually analyze
#     a proper genomic sequence
"""Execute Goldilocks search."""
data = {"ONE": {1: [1,2,5]}}
g = Goldilocks(VariantCounterStrategy(), data, is_seq=False, stride=1, length=3)

candidates = g._filter("max", actual_distance=1)

print candidates

#########################################
data = {"ONE": {1: "CCCGGGAGATTT"}}
g = Goldilocks(GCRatioStrategy(), data, 3, 1)

candidates = g._filter("max", actual_distance=1)

print candidates

candidates.export_fasta(["ONE"])

#########################################
data = {"ONE": {1: "AAACCCGGGCCCGGGAGAAAAAAA"}}
g = Goldilocks(KMerCounterStrategy(["AAA", "CCC"]), data, 6, 1)

candidates = g._filter("max", actual_distance=1, track="AAA")

print candidates
Exemplo n.º 3
0
    def setUpClass(cls):
        cls.sequence_data = {
            "my_sample": {
                2: "NANANANANA",
                "X": "GATTACAGATTACAN",
                "one": "CATCANCAT",
                "three": "..A",
            },
            "my_other_sample": {
                2: "GANGANGAN",
                "X": "GATTACAGATTACAN",
                "one": "TATANTATA",
                "three": ".N.",
            }
        }
        cls.g = Goldilocks(NucleotideCounterStrategy(["A", "C", "G", "T",
                                                      "N"]),
                           cls.sequence_data,
                           length=3,
                           stride=1)
        cls.GROUPS = ["my_sample", "my_other_sample", "total"]
        cls.TRACKS = ["A", "C", "G", "T", "N", "default"]

        cls.EXPECTED_REGIONS = {
            2: {
                "my_sample": {
                    0: {
                        'A': 1,
                        'C': 0,
                        'T': 0,
                        'G': 0,
                        'N': 2,
                        "default": 3
                    },
                    1: {
                        'A': 2,
                        'C': 0,
                        'T': 0,
                        'G': 0,
                        'N': 1,
                        "default": 3
                    },
                    2: {
                        'A': 1,
                        'C': 0,
                        'T': 0,
                        'G': 0,
                        'N': 2,
                        "default": 3
                    },
                    3: {
                        'A': 2,
                        'C': 0,
                        'T': 0,
                        'G': 0,
                        'N': 1,
                        "default": 3
                    },
                    4: {
                        'A': 1,
                        'C': 0,
                        'T': 0,
                        'G': 0,
                        'N': 2,
                        "default": 3
                    },
                    5: {
                        'A': 2,
                        'C': 0,
                        'T': 0,
                        'G': 0,
                        'N': 1,
                        "default": 3
                    },
                    6: {
                        'A': 1,
                        'C': 0,
                        'T': 0,
                        'G': 0,
                        'N': 2,
                        "default": 3
                    },
                    7: {
                        'A': 2,
                        'C': 0,
                        'T': 0,
                        'G': 0,
                        'N': 1,
                        "default": 3
                    },
                },
                "my_other_sample": {
                    0: {
                        'A': 1,
                        'C': 0,
                        'T': 0,
                        'G': 1,
                        'N': 1,
                        "default": 3
                    },
                    1: {
                        'A': 1,
                        'C': 0,
                        'T': 0,
                        'G': 1,
                        'N': 1,
                        "default": 3
                    },
                    2: {
                        'A': 1,
                        'C': 0,
                        'T': 0,
                        'G': 1,
                        'N': 1,
                        "default": 3
                    },
                    3: {
                        'A': 1,
                        'C': 0,
                        'T': 0,
                        'G': 1,
                        'N': 1,
                        "default": 3
                    },
                    4: {
                        'A': 1,
                        'C': 0,
                        'T': 0,
                        'G': 1,
                        'N': 1,
                        "default": 3
                    },
                    5: {
                        'A': 1,
                        'C': 0,
                        'T': 0,
                        'G': 1,
                        'N': 1,
                        "default": 3
                    },
                    6: {
                        'A': 1,
                        'C': 0,
                        'T': 0,
                        'G': 1,
                        'N': 1,
                        "default": 3
                    },
                    7: {
                        'A': 1,
                        'C': 0,
                        'T': 0,
                        'G': 0,
                        'N': 1,
                        "default": 2
                    },
                },
                "total": {
                    0: {
                        'A': 2,
                        'C': 0,
                        'T': 0,
                        'G': 1,
                        'N': 3,
                        "default": 6
                    },
                    1: {
                        'A': 3,
                        'C': 0,
                        'T': 0,
                        'G': 1,
                        'N': 2,
                        "default": 6
                    },
                    2: {
                        'A': 2,
                        'C': 0,
                        'T': 0,
                        'G': 1,
                        'N': 3,
                        "default": 6
                    },
                    3: {
                        'A': 3,
                        'C': 0,
                        'T': 0,
                        'G': 1,
                        'N': 2,
                        "default": 6
                    },
                    4: {
                        'A': 2,
                        'C': 0,
                        'T': 0,
                        'G': 1,
                        'N': 3,
                        "default": 6
                    },
                    5: {
                        'A': 3,
                        'C': 0,
                        'T': 0,
                        'G': 1,
                        'N': 2,
                        "default": 6
                    },
                    6: {
                        'A': 2,
                        'C': 0,
                        'T': 0,
                        'G': 1,
                        'N': 3,
                        "default": 6
                    },
                    7: {
                        'A': 3,
                        'C': 0,
                        'T': 0,
                        'G': 0,
                        'N': 2,
                        "default": 5
                    },
                },
            },
            "X": {
                "my_sample": {
                    0: {
                        'A': 1,
                        'C': 0,
                        'T': 1,
                        'G': 1,
                        'N': 0,
                        "default": 3
                    },
                    1: {
                        'A': 1,
                        'C': 0,
                        'T': 2,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    2: {
                        'A': 1,
                        'C': 0,
                        'T': 2,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    3: {
                        'A': 1,
                        'C': 1,
                        'T': 1,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    4: {
                        'A': 2,
                        'C': 1,
                        'T': 0,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    5: {
                        'A': 1,
                        'C': 1,
                        'T': 0,
                        'G': 1,
                        'N': 0,
                        "default": 3
                    },
                    6: {
                        'A': 2,
                        'C': 0,
                        'T': 0,
                        'G': 1,
                        'N': 0,
                        "default": 3
                    },
                    7: {
                        'A': 1,
                        'C': 0,
                        'T': 1,
                        'G': 1,
                        'N': 0,
                        "default": 3
                    },
                    8: {
                        'A': 1,
                        'C': 0,
                        'T': 2,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    9: {
                        'A': 1,
                        'C': 0,
                        'T': 2,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    10: {
                        'A': 1,
                        'C': 1,
                        'T': 1,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    11: {
                        'A': 2,
                        'C': 1,
                        'T': 0,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    12: {
                        'A': 1,
                        'C': 1,
                        'T': 0,
                        'G': 0,
                        'N': 1,
                        "default": 3
                    },
                },
                "my_other_sample": {
                    0: {
                        'A': 1,
                        'C': 0,
                        'T': 1,
                        'G': 1,
                        'N': 0,
                        "default": 3
                    },
                    1: {
                        'A': 1,
                        'C': 0,
                        'T': 2,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    2: {
                        'A': 1,
                        'C': 0,
                        'T': 2,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    3: {
                        'A': 1,
                        'C': 1,
                        'T': 1,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    4: {
                        'A': 2,
                        'C': 1,
                        'T': 0,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    5: {
                        'A': 1,
                        'C': 1,
                        'T': 0,
                        'G': 1,
                        'N': 0,
                        "default": 3
                    },
                    6: {
                        'A': 2,
                        'C': 0,
                        'T': 0,
                        'G': 1,
                        'N': 0,
                        "default": 3
                    },
                    7: {
                        'A': 1,
                        'C': 0,
                        'T': 1,
                        'G': 1,
                        'N': 0,
                        "default": 3
                    },
                    8: {
                        'A': 1,
                        'C': 0,
                        'T': 2,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    9: {
                        'A': 1,
                        'C': 0,
                        'T': 2,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    10: {
                        'A': 1,
                        'C': 1,
                        'T': 1,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    11: {
                        'A': 2,
                        'C': 1,
                        'T': 0,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    12: {
                        'A': 1,
                        'C': 1,
                        'T': 0,
                        'G': 0,
                        'N': 1,
                        "default": 3
                    },
                },
                "total": {
                    0: {
                        'A': 2,
                        'C': 0,
                        'T': 2,
                        'G': 2,
                        'N': 0,
                        "default": 6
                    },
                    1: {
                        'A': 2,
                        'C': 0,
                        'T': 4,
                        'G': 0,
                        'N': 0,
                        "default": 6
                    },
                    2: {
                        'A': 2,
                        'C': 0,
                        'T': 4,
                        'G': 0,
                        'N': 0,
                        "default": 6
                    },
                    3: {
                        'A': 2,
                        'C': 2,
                        'T': 2,
                        'G': 0,
                        'N': 0,
                        "default": 6
                    },
                    4: {
                        'A': 4,
                        'C': 2,
                        'T': 0,
                        'G': 0,
                        'N': 0,
                        "default": 6
                    },
                    5: {
                        'A': 2,
                        'C': 2,
                        'T': 0,
                        'G': 2,
                        'N': 0,
                        "default": 6
                    },
                    6: {
                        'A': 4,
                        'C': 0,
                        'T': 0,
                        'G': 2,
                        'N': 0,
                        "default": 6
                    },
                    7: {
                        'A': 2,
                        'C': 0,
                        'T': 2,
                        'G': 2,
                        'N': 0,
                        "default": 6
                    },
                    8: {
                        'A': 2,
                        'C': 0,
                        'T': 4,
                        'G': 0,
                        'N': 0,
                        "default": 6
                    },
                    9: {
                        'A': 2,
                        'C': 0,
                        'T': 4,
                        'G': 0,
                        'N': 0,
                        "default": 6
                    },
                    10: {
                        'A': 2,
                        'C': 2,
                        'T': 2,
                        'G': 0,
                        'N': 0,
                        "default": 6
                    },
                    11: {
                        'A': 4,
                        'C': 2,
                        'T': 0,
                        'G': 0,
                        'N': 0,
                        "default": 6
                    },
                    12: {
                        'A': 2,
                        'C': 2,
                        'T': 0,
                        'G': 0,
                        'N': 2,
                        "default": 6
                    },
                },
            },
            "one": {
                "my_sample": {
                    0: {
                        'A': 1,
                        'C': 1,
                        'T': 1,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    1: {
                        'A': 1,
                        'C': 1,
                        'T': 1,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    2: {
                        'A': 1,
                        'C': 1,
                        'T': 1,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    3: {
                        'A': 1,
                        'C': 1,
                        'T': 0,
                        'G': 0,
                        'N': 1,
                        "default": 3
                    },
                    4: {
                        'A': 1,
                        'C': 1,
                        'T': 0,
                        'G': 0,
                        'N': 1,
                        "default": 3
                    },
                    5: {
                        'A': 1,
                        'C': 1,
                        'T': 0,
                        'G': 0,
                        'N': 1,
                        "default": 3
                    },
                    6: {
                        'A': 1,
                        'C': 1,
                        'T': 1,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                },
                "my_other_sample": {
                    0: {
                        'A': 1,
                        'C': 0,
                        'T': 2,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    1: {
                        'A': 2,
                        'C': 0,
                        'T': 1,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    2: {
                        'A': 1,
                        'C': 0,
                        'T': 1,
                        'G': 0,
                        'N': 1,
                        "default": 3
                    },
                    3: {
                        'A': 1,
                        'C': 0,
                        'T': 1,
                        'G': 0,
                        'N': 1,
                        "default": 3
                    },
                    4: {
                        'A': 1,
                        'C': 0,
                        'T': 1,
                        'G': 0,
                        'N': 1,
                        "default": 3
                    },
                    5: {
                        'A': 1,
                        'C': 0,
                        'T': 2,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                    6: {
                        'A': 2,
                        'C': 0,
                        'T': 1,
                        'G': 0,
                        'N': 0,
                        "default": 3
                    },
                },
                "total": {
                    0: {
                        'A': 2,
                        'C': 1,
                        'T': 3,
                        'G': 0,
                        'N': 0,
                        "default": 6
                    },
                    1: {
                        'A': 3,
                        'C': 1,
                        'T': 2,
                        'G': 0,
                        'N': 0,
                        "default": 6
                    },
                    2: {
                        'A': 2,
                        'C': 1,
                        'T': 2,
                        'G': 0,
                        'N': 1,
                        "default": 6
                    },
                    3: {
                        'A': 2,
                        'C': 1,
                        'T': 1,
                        'G': 0,
                        'N': 2,
                        "default": 6
                    },
                    4: {
                        'A': 2,
                        'C': 1,
                        'T': 1,
                        'G': 0,
                        'N': 2,
                        "default": 6
                    },
                    5: {
                        'A': 2,
                        'C': 1,
                        'T': 2,
                        'G': 0,
                        'N': 1,
                        "default": 6
                    },
                    6: {
                        'A': 3,
                        'C': 1,
                        'T': 2,
                        'G': 0,
                        'N': 0,
                        "default": 6
                    },
                },
            },
            "three": {
                "my_sample": {
                    0: {
                        'A': 1,
                        'C': 0,
                        'T': 0,
                        'G': 0,
                        'N': 0,
                        "default": 1
                    },
                },
                "my_other_sample": {
                    0: {
                        'A': 0,
                        'C': 0,
                        'T': 0,
                        'G': 0,
                        'N': 1,
                        "default": 1
                    },
                },
                "total": {
                    0: {
                        'A': 1,
                        'C': 0,
                        'T': 0,
                        'G': 0,
                        'N': 1,
                        "default": 2
                    },
                }
            }
        }

        # 29 regions * 5 bases * (2+1) samples (two samples + total)
        cls.EXPECTED_NUM_REGION = 29
        cls.EXPECTED_REGION_COUNT = cls.EXPECTED_NUM_REGION * 5 * 3

        # Each region gets an additional counter
        cls.EXPECTED_COUNTERS_COUNT = cls.EXPECTED_REGION_COUNT + cls.EXPECTED_NUM_REGION * 3
Exemplo n.º 4
0
    def setUpClass(cls):
        cls.sequence_data = {
            "my_sample": {
                1: "GCGCGCGC..GCGCGC....GCGC......GC",
            },
            "my_other_sample": {
                1: "GC......GCGC....GCGCGC..GCGCGCGC",
            }
        }
        cls.g = Goldilocks(GCRatioStrategy(),
                           cls.sequence_data,
                           length=8,
                           stride=8)
        cls.GROUPS = ["my_sample", "my_other_sample", "total"]
        cls.TRACKS = ["default"]

        cls.EXPECTED_REGIONS = {
            1: {
                "my_sample": {
                    0: {
                        "default": 1.0
                    },
                    1: {
                        "default": 0.75
                    },
                    2: {
                        "default": 0.5
                    },
                    3: {
                        "default": 0.25
                    },
                },
                "my_other_sample": {
                    0: {
                        "default": 0.25
                    },
                    1: {
                        "default": 0.5
                    },
                    2: {
                        "default": 0.75
                    },
                    3: {
                        "default": 1.0
                    },
                },
                "total": {
                    0: {
                        "default": 0.625
                    },
                    1: {
                        "default": 0.625
                    },
                    2: {
                        "default": 0.625
                    },
                    3: {
                        "default": 0.625
                    },
                },
            },
        }

        cls.EXPECTED_NUM_REGION = 4
        cls.EXPECTED_REGION_COUNT = cls.EXPECTED_NUM_REGION * 3

        # Each region gets an additional default counter
        cls.EXPECTED_COUNTERS_COUNT = cls.EXPECTED_REGION_COUNT
Exemplo n.º 5
0
    def setUpClass(cls):
        cls.sequence_data = {
            "my_sample": {
                1: "..N..N..N",
                2: "A.A.AA..A",
                3: "NNN.NN...",
            },
            "my_other_sample": {
                1: "N..NN.NNN",
                2: "A..AA....",
                3: "AAA.AA...",
            }
        }
        cls.g = Goldilocks(NucleotideCounterStrategy(["A", "N"]),
                           cls.sequence_data,
                           length=3,
                           stride=3)
        cls.GROUPS = ["my_sample", "my_other_sample", "total"]
        cls.TRACKS = ["A", "N", "default"]

        cls.EXPECTED_REGIONS = {
            1: {
                "my_sample": {
                    0: {
                        'A': 0,
                        'N': 1,
                        "default": 1
                    },
                    1: {
                        'A': 0,
                        'N': 1,
                        "default": 1
                    },
                    2: {
                        'A': 0,
                        'N': 1,
                        "default": 1
                    },
                },
                "my_other_sample": {
                    0: {
                        'A': 0,
                        'N': 1,
                        "default": 1
                    },
                    1: {
                        'A': 0,
                        'N': 2,
                        "default": 2
                    },
                    2: {
                        'A': 0,
                        'N': 3,
                        "default": 3
                    },
                },
                "total": {
                    0: {
                        'A': 0,
                        'N': 2,
                        "default": 2
                    },
                    1: {
                        'A': 0,
                        'N': 3,
                        "default": 3
                    },
                    2: {
                        'A': 0,
                        'N': 4,
                        "default": 4
                    },
                },
            },
            2: {
                "my_sample": {
                    0: {
                        'A': 2,
                        'N': 0,
                        "default": 2
                    },
                    1: {
                        'A': 2,
                        'N': 0,
                        "default": 2
                    },
                    2: {
                        'A': 1,
                        'N': 0,
                        "default": 1
                    },
                },
                "my_other_sample": {
                    0: {
                        'A': 1,
                        'N': 0,
                        "default": 1
                    },
                    1: {
                        'A': 2,
                        'N': 0,
                        "default": 2
                    },
                    2: {
                        'A': 0,
                        'N': 0,
                        "default": 0
                    },
                },
                "total": {
                    0: {
                        'A': 3,
                        'N': 0,
                        "default": 3
                    },
                    1: {
                        'A': 4,
                        'N': 0,
                        "default": 4
                    },
                    2: {
                        'A': 1,
                        'N': 0,
                        "default": 1
                    },
                },
            },
            3: {
                "my_sample": {
                    0: {
                        'A': 0,
                        'N': 3,
                        "default": 3
                    },
                    1: {
                        'A': 0,
                        'N': 2,
                        "default": 2
                    },
                    2: {
                        'A': 0,
                        'N': 0,
                        "default": 0
                    },
                },
                "my_other_sample": {
                    0: {
                        'A': 3,
                        'N': 0,
                        "default": 3
                    },
                    1: {
                        'A': 2,
                        'N': 0,
                        "default": 2
                    },
                    2: {
                        'A': 0,
                        'N': 0,
                        "default": 0
                    },
                },
                "total": {
                    0: {
                        'A': 3,
                        'N': 3,
                        "default": 6
                    },
                    1: {
                        'A': 2,
                        'N': 2,
                        "default": 4
                    },
                    2: {
                        'A': 0,
                        'N': 0,
                        "default": 0
                    },
                },
            }
        }

        # 9 regions * 2 bases * (2+1) samples (two samples + total)
        cls.EXPECTED_NUM_REGION = 9
        cls.EXPECTED_REGION_COUNT = cls.EXPECTED_NUM_REGION * 2 * 3

        # Each region gets an additional default counter
        cls.EXPECTED_COUNTERS_COUNT = cls.EXPECTED_REGION_COUNT + cls.EXPECTED_NUM_REGION * 3
Exemplo n.º 6
0
class TestGoldilocks(unittest.TestCase):
    def setUp(self):
        self.g = Goldilocks(NucleotideCounterStrategy(
            ["A", "C", "G", "T", "N"]),
                            sequence_data,
                            length=3,
                            stride=1)
        self.TOTAL_REGIONS = 29

    def __test_simple_exclusions(self, EXCLUSIONS, limit=0):

        FILTER_TO_PROPERTY = {
            "start_lte": ("pos_start", "lt"),
            "start_gte": ("pos_start", "gt"),
            "end_lte": ("pos_end", "lt"),
            "end_gte": ("pos_end", "gt"),
            "chr": ("chr", "nin")
        }

        for exclusion_name, exclusion in EXCLUSIONS.items():
            for op in OPS:
                if limit > 0:
                    candidates = self.g._filter(op,
                                                exclusions={
                                                    exclusion["filter"]:
                                                    exclusion["value"]
                                                },
                                                limit=limit).candidates
                else:
                    candidates = self.g._filter(op,
                                                exclusions={
                                                    exclusion["filter"]:
                                                    exclusion["value"]
                                                }).candidates

                for c in candidates:
                    cproperty = FILTER_TO_PROPERTY[exclusion["filter"]][0]
                    test_type = FILTER_TO_PROPERTY[exclusion["filter"]][1]

                    if test_type == "lt":
                        self.assertTrue(c[cproperty] > exclusion["value"])
                    elif test_type == "gt":
                        self.assertTrue(c[cproperty] < exclusion["value"])
                    elif test_type == "nin":
                        self.assertNotIn(c[cproperty], exclusion["value"])
                    else:
                        self.fail("Incorrect test_type")

                if len(candidates) == 0 and ("expect_none" not in exclusion):
                    self.fail(
                        "No candidates returned but at least one expected...")

                if limit:
                    if limit > self.TOTAL_REGIONS:
                        # Don't test if limit is larger than number of regions
                        pass
                    elif "expect_none" not in exclusion:
                        self.assertEqual(limit, len(candidates))
                    else:
                        self.assertEqual(0, len(candidates))

    def test_missing_length(self):
        self.assertRaises(TypeError,
                          Goldilocks,
                          NucleotideCounterStrategy([]),
                          sequence_data,
                          stride=1)

    def test_missing_stride(self):
        self.assertRaises(TypeError,
                          Goldilocks,
                          NucleotideCounterStrategy([]),
                          sequence_data,
                          length=1)

    def test_invalid_stride(self):
        self.assertRaises(ValueError,
                          Goldilocks,
                          NucleotideCounterStrategy([]),
                          sequence_data,
                          length=1,
                          stride=0)
        self.assertRaises(ValueError,
                          Goldilocks,
                          NucleotideCounterStrategy([]),
                          sequence_data,
                          length=1,
                          stride=-1)
        self.assertRaises(ValueError,
                          Goldilocks,
                          NucleotideCounterStrategy([]),
                          sequence_data,
                          length=1,
                          stride=-1000)

    def test_invalid_length(self):
        self.assertRaises(ValueError,
                          Goldilocks,
                          NucleotideCounterStrategy([]),
                          sequence_data,
                          length=0,
                          stride=1)
        self.assertRaises(ValueError,
                          Goldilocks,
                          NucleotideCounterStrategy([]),
                          sequence_data,
                          length=-1,
                          stride=1)
        self.assertRaises(ValueError,
                          Goldilocks,
                          NucleotideCounterStrategy([]),
                          sequence_data,
                          length=-1000,
                          stride=1)

    def test_invalid_filter_distance(self):
        for op in OPS:
            self.assertRaises(ValueError,
                              self.g._filter,
                              op,
                              actual_distance=1,
                              percentile_distance=1)

    def test_invalid_sort_operation(self):
        for op in OPS:
            self.assertRaises(TypeError, self.g._filter, "hoot")

    def test_unimplemented_strategy(self):
        self.assertRaises(NotImplementedError,
                          Goldilocks,
                          BaseStrategy(),
                          sequence_data,
                          length=1,
                          stride=1)

    def test_exclude_chr(self):
        EXCLUSIONS = {
            "simple_chr_str": {
                "filter": "chr",
                "value": ["one"],
            },
            "simple_chr_int": {
                "filter": "chr",
                "value": [2],
            },
            "simple_all_chr": {
                "filter": "chr",
                "value": ["one", "X", 2, "three"],
                "expect_none": True
            },
        }

        self.__test_simple_exclusions(EXCLUSIONS)
        self.__test_simple_exclusions(EXCLUSIONS, limit=1)
        self.__test_simple_exclusions(EXCLUSIONS, limit=5)
        self.__test_simple_exclusions(EXCLUSIONS, limit=100)

    def test_exclude_start_gte(self):
        EXCLUSIONS = {
            "simple_start_gte": {
                "filter": "start_gte",
                "value": 5,
            },
            "prevent_start_gte": {
                "filter": "start_gte",
                "value": 100,
            },
            "all_start_gte": {
                "filter": "start_gte",
                "value": 1,
                "expect_none": True
            },
        }

        self.__test_simple_exclusions(EXCLUSIONS)
        self.__test_simple_exclusions(EXCLUSIONS, limit=1)
        self.__test_simple_exclusions(EXCLUSIONS, limit=5)
        self.__test_simple_exclusions(EXCLUSIONS, limit=100)

    def test_exclude_start_lte(self):
        EXCLUSIONS = {
            "simple_start_lte": {
                "filter": "start_lte",
                "value": 5,
            },
            "prevent_start_lte": {
                "filter": "start_lte",
                "value": 0,
            },
            "all_start_lte": {
                "filter": "start_lte",
                "value": 100,
                "expect_none": True
            },
        }

        self.__test_simple_exclusions(EXCLUSIONS)
        self.__test_simple_exclusions(EXCLUSIONS, limit=1)
        self.__test_simple_exclusions(EXCLUSIONS, limit=5)
        self.__test_simple_exclusions(EXCLUSIONS, limit=100)

    def test_exclude_end_gte(self):
        EXCLUSIONS = {
            "simple_end_gte": {
                "filter": "end_gte",
                "value": 5,
            },
            "prevent_end_gte": {
                "filter": "end_gte",
                "value": 100,
            },
            "all_end_gte": {
                "filter": "end_gte",
                "value": 1,
                "expect_none": True
            },
        }

        self.__test_simple_exclusions(EXCLUSIONS)
        self.__test_simple_exclusions(EXCLUSIONS, limit=1)
        self.__test_simple_exclusions(EXCLUSIONS, limit=5)
        self.__test_simple_exclusions(EXCLUSIONS, limit=100)

    def test_exclude_end_lte(self):
        EXCLUSIONS = {
            "simple_end_lte": {
                "filter": "end_lte",
                "value": 5,
            },
            "prevent_end_lte": {
                "filter": "end_lte",
                "value": 0,
            },
            "all_end_lte": {
                "filter": "end_lte",
                "value": 100,
                "expect_none": True
            },
        }

        self.__test_simple_exclusions(EXCLUSIONS)
        self.__test_simple_exclusions(EXCLUSIONS, limit=1)
        self.__test_simple_exclusions(EXCLUSIONS, limit=5)
        self.__test_simple_exclusions(EXCLUSIONS, limit=100)

    def test_exclude_and(self):
        for op in OPS:
            candidates = self.g._filter(op,
                                        exclusions={
                                            "start_gte": 5,
                                            "end_lte": 9,
                                        },
                                        use_and=True).candidates
            for c in candidates:
                self.assertFalse(c["pos_start"] >= 5 and c["pos_end"] <= 9)

            self.assertTrue(len(candidates) < self.TOTAL_REGIONS)

    def test_exclude_and_with_chr(self):
        for op in OPS:
            candidates = self.g._filter(op,
                                        exclusions={
                                            "start_gte": 5,
                                            "end_lte": 9,
                                            "chr": ["X"],
                                        },
                                        use_and=True).candidates
            non_x_count = 0
            for c in candidates:
                if c["chr"] == "X":
                    self.assertFalse(c["pos_start"] >= 5 and c["pos_end"] <= 9)
                else:
                    if (c["pos_start"] >= 5 and c["pos_end"] <= 9):
                        non_x_count += 1
            self.assertTrue(non_x_count > 0)

            self.assertTrue(len(candidates) < self.TOTAL_REGIONS)

    def test_exclude_chr_specific_chr(self):
        pass

    def test_exclude_chr_specific_start(self):
        pass

    def test_exclude_chr_specific_end(self):
        pass

    def test_exclude_chr_specific_and(self):
        pass

    def test_limit(self):
        for op in OPS:
            candidates = self.g.query(op, limit=1).candidates
            self.assertTrue(len(candidates) == 1)

            candidates = self.g.query(op, limit=10).candidates
            self.assertTrue(len(candidates) == 10)

            candidates = self.g.query(op, limit=100).candidates
            self.assertTrue(len(candidates) == self.TOTAL_REGIONS)

    def test_distance_upper(self):
        pass

    def test_distance_lower(self):
        pass

    def test_distance_around(self):
        pass
Exemplo n.º 7
0
from goldilocks.goldilocks import Goldilocks
from goldilocks.strategies import VariantCounterStrategy, GCRatioStrategy, NucleotideCounterStrategy, KMerCounterStrategy

#TODO Methods may take a list of locations or may need to actually analyze
#     a proper genomic sequence
"""Execute Goldilocks search."""
data = {"ONE": {1: [1, 2, 5]}}
g = Goldilocks(VariantCounterStrategy(),
               data,
               is_seq=False,
               stride=1,
               length=3)

candidates = g._filter("max", actual_distance=1)

print candidates

#########################################
data = {"ONE": {1: "CCCGGGAGATTT"}}
g = Goldilocks(GCRatioStrategy(), data, 3, 1)

candidates = g._filter("max", actual_distance=1)

print candidates

candidates.export_fasta(["ONE"])

#########################################
data = {"ONE": {1: "AAACCCGGGCCCGGGAGAAAAAAA"}}
g = Goldilocks(KMerCounterStrategy(["AAA", "CCC"]), data, 6, 1)
Exemplo n.º 8
0
 def setUp(self):
     self.g = Goldilocks(NucleotideCounterStrategy(["A","C","G","T","N"]), sequence_data, length=3, stride=1)
     self.TOTAL_REGIONS = 29
Exemplo n.º 9
0
class TestGoldilocks(unittest.TestCase):

    def setUp(self):
        self.g = Goldilocks(NucleotideCounterStrategy(["A","C","G","T","N"]), sequence_data, length=3, stride=1)
        self.TOTAL_REGIONS = 29

    def __test_simple_exclusions(self, EXCLUSIONS, limit=0):

        FILTER_TO_PROPERTY = {
            "start_lte": ("pos_start", "lt"),
            "start_gte": ("pos_start", "gt"),
            "end_lte": ("pos_end", "lt"),
            "end_gte": ("pos_end", "gt"),
            "chr": ("chr", "nin")
        }

        for exclusion_name, exclusion in EXCLUSIONS.items():
            for op in OPS:
                if limit > 0:
                    candidates = self.g._filter(op, exclusions={
                        exclusion["filter"]: exclusion["value"]
                    }, limit=limit).candidates
                else:
                    candidates = self.g._filter(op, exclusions={
                        exclusion["filter"]: exclusion["value"]
                    }).candidates

                for c in candidates:
                    cproperty = FILTER_TO_PROPERTY[exclusion["filter"]][0]
                    test_type = FILTER_TO_PROPERTY[exclusion["filter"]][1]

                    if test_type == "lt":
                        self.assertTrue(c[cproperty] > exclusion["value"])
                    elif test_type == "gt":
                        self.assertTrue(c[cproperty] < exclusion["value"])
                    elif test_type == "nin":
                        self.assertNotIn(c[cproperty], exclusion["value"])
                    else:
                        self.fail("Incorrect test_type")

                if len(candidates) == 0 and ("expect_none" not in exclusion):
                    self.fail("No candidates returned but at least one expected...")

                if limit:
                    if limit > self.TOTAL_REGIONS:
                        # Don't test if limit is larger than number of regions
                        pass
                    elif "expect_none" not in exclusion:
                        self.assertEqual(limit, len(candidates))
                    else:
                        self.assertEqual(0, len(candidates))

    def test_missing_length(self):
        self.assertRaises(TypeError, Goldilocks, NucleotideCounterStrategy([]), sequence_data, stride=1)

    def test_missing_stride(self):
        self.assertRaises(TypeError, Goldilocks, NucleotideCounterStrategy([]), sequence_data, length=1)

    def test_invalid_stride(self):
        self.assertRaises(ValueError, Goldilocks, NucleotideCounterStrategy([]), sequence_data, length=1, stride=0)
        self.assertRaises(ValueError, Goldilocks, NucleotideCounterStrategy([]), sequence_data, length=1, stride=-1)
        self.assertRaises(ValueError, Goldilocks, NucleotideCounterStrategy([]), sequence_data, length=1, stride=-1000)

    def test_invalid_length(self):
        self.assertRaises(ValueError, Goldilocks, NucleotideCounterStrategy([]), sequence_data, length=0, stride=1)
        self.assertRaises(ValueError, Goldilocks, NucleotideCounterStrategy([]), sequence_data, length=-1, stride=1)
        self.assertRaises(ValueError, Goldilocks, NucleotideCounterStrategy([]), sequence_data, length=-1000, stride=1)

    def test_invalid_filter_distance(self):
        for op in OPS:
            self.assertRaises(ValueError, self.g._filter, op, actual_distance=1, percentile_distance=1)

    def test_invalid_sort_operation(self):
        for op in OPS:
            self.assertRaises(TypeError, self.g._filter, "hoot")

    def test_unimplemented_strategy(self):
        self.assertRaises(NotImplementedError, Goldilocks, BaseStrategy(), sequence_data, length=1, stride=1)

    def test_exclude_chr(self):
        EXCLUSIONS = {
            "simple_chr_str": {
                "filter": "chr",
                "value": ["one"],
            },
            "simple_chr_int": {
                "filter": "chr",
                "value": [2],
            },
            "simple_all_chr": {
                "filter": "chr",
                "value": ["one", "X", 2, "three"],
                "expect_none": True
            },
        }

        self.__test_simple_exclusions(EXCLUSIONS)
        self.__test_simple_exclusions(EXCLUSIONS, limit=1)
        self.__test_simple_exclusions(EXCLUSIONS, limit=5)
        self.__test_simple_exclusions(EXCLUSIONS, limit=100)

    def test_exclude_start_gte(self):
        EXCLUSIONS = {
            "simple_start_gte": {
                "filter": "start_gte",
                "value": 5,
            },
            "prevent_start_gte": {
                "filter": "start_gte",
                "value": 100,
            },
            "all_start_gte": {
                "filter": "start_gte",
                "value": 1,
                "expect_none": True
            },
        }

        self.__test_simple_exclusions(EXCLUSIONS)
        self.__test_simple_exclusions(EXCLUSIONS, limit=1)
        self.__test_simple_exclusions(EXCLUSIONS, limit=5)
        self.__test_simple_exclusions(EXCLUSIONS, limit=100)


    def test_exclude_start_lte(self):
        EXCLUSIONS = {
            "simple_start_lte": {
                "filter": "start_lte",
                "value": 5,
            },
            "prevent_start_lte": {
                "filter": "start_lte",
                "value": 0,
            },
            "all_start_lte": {
                "filter": "start_lte",
                "value": 100,
                "expect_none": True
            },
        }

        self.__test_simple_exclusions(EXCLUSIONS)
        self.__test_simple_exclusions(EXCLUSIONS, limit=1)
        self.__test_simple_exclusions(EXCLUSIONS, limit=5)
        self.__test_simple_exclusions(EXCLUSIONS, limit=100)

    def test_exclude_end_gte(self):
        EXCLUSIONS = {
            "simple_end_gte": {
                "filter": "end_gte",
                "value": 5,
            },
            "prevent_end_gte": {
                "filter": "end_gte",
                "value": 100,
            },
            "all_end_gte": {
                "filter": "end_gte",
                "value": 1,
                "expect_none": True
            },
        }

        self.__test_simple_exclusions(EXCLUSIONS)
        self.__test_simple_exclusions(EXCLUSIONS, limit=1)
        self.__test_simple_exclusions(EXCLUSIONS, limit=5)
        self.__test_simple_exclusions(EXCLUSIONS, limit=100)

    def test_exclude_end_lte(self):
        EXCLUSIONS = {
            "simple_end_lte": {
                "filter": "end_lte",
                "value": 5,
            },
            "prevent_end_lte": {
                "filter": "end_lte",
                "value": 0,
            },
            "all_end_lte": {
                "filter": "end_lte",
                "value": 100,
                "expect_none": True
            },
        }

        self.__test_simple_exclusions(EXCLUSIONS)
        self.__test_simple_exclusions(EXCLUSIONS, limit=1)
        self.__test_simple_exclusions(EXCLUSIONS, limit=5)
        self.__test_simple_exclusions(EXCLUSIONS, limit=100)

    def test_exclude_and(self):
        for op in OPS:
            candidates = self.g._filter(op, exclusions={
                                                        "start_gte": 5,
                                                        "end_lte": 9,
                                                        }, use_and=True).candidates
            for c in candidates:
                self.assertFalse(c["pos_start"] >= 5 and c["pos_end"] <= 9)

            self.assertTrue(len(candidates) < self.TOTAL_REGIONS)

    def test_exclude_and_with_chr(self):
        for op in OPS:
            candidates = self.g._filter(op, exclusions={
                                                        "start_gte": 5,
                                                        "end_lte": 9,
                                                        "chr": ["X"],
                                                        }, use_and=True).candidates
            non_x_count = 0
            for c in candidates:
                if c["chr"] == "X":
                    self.assertFalse(c["pos_start"] >= 5 and c["pos_end"] <= 9)
                else:
                    if (c["pos_start"] >= 5 and c["pos_end"] <= 9):
                        non_x_count += 1
            self.assertTrue(non_x_count > 0)

            self.assertTrue(len(candidates) < self.TOTAL_REGIONS)

    def test_exclude_chr_specific_chr(self):
        pass

    def test_exclude_chr_specific_start(self):
        pass

    def test_exclude_chr_specific_end(self):
        pass

    def test_exclude_chr_specific_and(self):
        pass

    def test_limit(self):
        for op in OPS:
            candidates = self.g.query(op, limit=1).candidates
            self.assertTrue(len(candidates) == 1)

            candidates = self.g.query(op, limit=10).candidates
            self.assertTrue(len(candidates) == 10)

            candidates = self.g.query(op, limit=100).candidates
            self.assertTrue(len(candidates) == self.TOTAL_REGIONS)

    def test_distance_upper(self):
        pass

    def test_distance_lower(self):
        pass

    def test_distance_around(self):
        pass