def test_add_tag_zeroInputsIncluded(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={ "SA": { "X": "4" }, "SB": { "X": "0" } }) rec2 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={ "SA": { "X": "0" }, "SB": { "X": "8" } }) reader = MockVcfReader(records=[rec1, rec2]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) tag.add_tag_values(rec1) sampleA_tag_values = rec1.sample_tag_values["SA"] self.assertEquals("0.3015", sampleA_tag_values["ZScoreX"]) sampleB_tag_values = rec1.sample_tag_values["SB"] self.assertEquals("-0.9045", sampleB_tag_values["ZScoreX"])
def test_add_tag_nullInputsProduceNullZScores(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={ "SA": { "X": "4" }, "SB": { "X": "." } }) rec2 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={ "SA": { "X": "." }, "SB": { "X": "8" } }) reader = MockVcfReader(records=[rec1, rec2]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) tag.add_tag_values(rec1) sampleA_tag_values = rec1.sample_tag_values["SA"] self.assertEquals("-1.0", sampleA_tag_values["ZScoreX"]) sampleB_tag_values = rec1.sample_tag_values["SB"] self.assertEquals(".", sampleB_tag_values["ZScoreX"])
def test_add_tag_doesNothingIfNoStdev(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={ "SA": { "X": "4" }, "SB": { "X": "4" } }) rec2 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={ "SA": { "X": "4" }, "SB": { "X": "4" } }) reader = MockVcfReader(records=[rec1, rec2]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) tag.add_tag_values(rec1) self.assertEquals(0, tag._stdev) self.assertEqual(["X"], sorted(rec1.sample_tag_values["SA"].keys())) self.assertEqual(["X"], sorted(rec1.sample_tag_values["SB"].keys()))
def test_add_tag(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={ "SA": { "X": "4" }, "SB": { "X": "7" } }) rec2 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={ "SA": { "X": "13" }, "SB": { "X": "16" } }) reader = MockVcfReader(records=[rec1, rec2]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) tag.add_tag_values(rec1) sampleA_tag_values = rec1.sample_tag_values["SA"] self.assertEquals("-1.2649", sampleA_tag_values["ZScoreX"]) sampleB_tag_values = rec1.sample_tag_values["SB"] self.assertEquals("-0.6325", sampleB_tag_values["ZScoreX"])
def test_init_setsPopulationStatistics(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={ "SA": { "X": "4" }, "SB": { "X": "7" } }) rec2 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={ "SA": { "X": "13" }, "SB": { "X": "16" } }) reader = MockVcfReader(records=[rec1, rec2]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) values = [4, 7, 13, 16] self.assertAlmostEquals(mean(values), tag._mean, _ZScoreTag._MAX_PRECISION) self.assertAlmostEquals(stdev(values), tag._stdev, _ZScoreTag._MAX_PRECISION)
def test_init_setsPopulationStatisticsAssignsStddevCorrectlyWhenOneValue(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"4"}, "SB":{"X":"."}}) reader = MockVcfReader(records=[rec1]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) values = [4] self.assertAlmostEquals(mean(values), tag._mean, _ZScoreTag._MAX_PRECISION) self.assertAlmostEquals(stdev(values), tag._stdev, _ZScoreTag._MAX_PRECISION)
def test_init_setsPopulationStatisticsAssignsStddevCorrectlyWhenNoValues(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"."}, "SB":{"X":"."}}) reader = MockVcfReader(records=[rec1]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) expected_mean = None expected_stdev = None self.assertEquals(expected_mean, tag._mean) self.assertEquals(expected_stdev, tag._stdev)
def test_init_setsPopulationStatisticsRoundsTo13digits(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"1"}, "SB":{"X":"1"}, "SC":{"X" : "0"}}) reader = MockVcfReader(records=[rec1]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) self.assertEquals(round(2/3, 13), tag._mean, repr(tag._mean)) self.assertEquals(round(stdev([1,1,0]), 13), tag._stdev)
def test_init_setsPopulationStatisticsUsingMaxRangeForMultiValuedInput(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"4"}, "SB":{"X":"7"}}) rec2 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"13,0"}, "SB":{"X":"0,16"}}) reader = MockVcfReader(records=[rec1, rec2]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) values = [4, 7, 13, 16] self.assertAlmostEquals(mean(values), tag._mean, _ZScoreTag._MAX_PRECISION) self.assertAlmostEquals(stdev(values), tag._stdev, _ZScoreTag._MAX_PRECISION)
def test_init_setsPopulationStatisticsParsesFloats(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"2"}, "SB":{"X":"3.5"}}) rec2 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"6.5"}, "SB":{"X":"8"}}) reader = MockVcfReader(records=[rec1, rec2]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) values = [2, 3.5, 6.5, 8] self.assertAlmostEquals(mean(values), tag._mean, _ZScoreTag._MAX_PRECISION) self.assertAlmostEquals(stdev(values), tag._stdev, _ZScoreTag._MAX_PRECISION)
def test_add_tag(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"4"}, "SB":{"X":"7"}}) rec2 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"13"}, "SB":{"X":"16"}}) reader = MockVcfReader(records=[rec1, rec2]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) tag.add_tag_values(rec1) sampleA_tag_values = rec1.sample_tag_values["SA"] self.assertEquals("-1.2649", sampleA_tag_values["ZScoreX"]) sampleB_tag_values = rec1.sample_tag_values["SB"] self.assertEquals("-0.6325", sampleB_tag_values["ZScoreX"])
def test_add_tag_nullInputsProduceNullZScores(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"4"}, "SB":{"X":"."}}) rec2 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"."}, "SB":{"X":"8"}}) reader = MockVcfReader(records=[rec1, rec2]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) tag.add_tag_values(rec1) sampleA_tag_values = rec1.sample_tag_values["SA"] self.assertEquals("-1.0", sampleA_tag_values["ZScoreX"]) sampleB_tag_values = rec1.sample_tag_values["SB"] self.assertEquals(".", sampleB_tag_values["ZScoreX"])
def test_add_tag_zeroInputsIncluded(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"4"}, "SB":{"X":"0"}}) rec2 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"0"}, "SB":{"X":"8"}}) reader = MockVcfReader(records=[rec1, rec2]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) tag.add_tag_values(rec1) sampleA_tag_values = rec1.sample_tag_values["SA"] self.assertEquals("0.3015", sampleA_tag_values["ZScoreX"]) sampleB_tag_values = rec1.sample_tag_values["SB"] self.assertEquals("-0.9045", sampleB_tag_values["ZScoreX"])
def test_add_tag_doesNothingIfNoStdev(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"4"}, "SB":{"X":"4"}}) rec2 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"4"}, "SB":{"X":"4"}}) reader = MockVcfReader(records=[rec1, rec2]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) tag.add_tag_values(rec1) self.assertEquals(0, tag._stdev) self.assertEqual(["X"], sorted(rec1.sample_tag_values["SA"].keys())) self.assertEqual(["X"], sorted(rec1.sample_tag_values["SB"].keys()))
def test_init_setsPopulationStatisticsConsidersZeros(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"4"}, "SB":{"X":"0"}}) rec2 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"7"}, "SB":{"X":"13"}}) rec3 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"16"}, "SB":{"X":"0"}}) reader = MockVcfReader(records=[rec1, rec2, rec3]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) values = [4, 0, 7, 13, 16, 0] self.assertAlmostEquals(mean(values), float(tag._mean), _ZScoreTag._MAX_PRECISION) self.assertAlmostEquals(stdev(values), float(tag._stdev), _ZScoreTag._MAX_PRECISION)
def test_init_setsPopulationStatisticsIgnoresUnparsableValues(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"4"}, "SB":{"X":"7"}}) rec2 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"A1"}, "SB":{"X":"2A"}}) rec3 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"13"}, "SB":{"X":"16"}}) reader = MockVcfReader(records=[rec1, rec2, rec3]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) values = [4, 7, 13, 16] self.assertAlmostEquals(mean(values), tag._mean, _ZScoreTag._MAX_PRECISION) self.assertAlmostEquals(stdev(values), tag._stdev, _ZScoreTag._MAX_PRECISION)
def test_init_metaheaders(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"4"}, "SB":{"X":"7"}}) rec2 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={"SA":{"X":"13"}, "SB":{"X":"16"}}) reader = MockVcfReader(records=[rec1, rec2]) tag = zscore_caller._ZScoreTag("ZScoreX", '##FORMAT=<ID=ZScoreX,Number=1,Type=Float,Description="ZScore for X">', "X", reader) self.assertEquals(3, len(tag.metaheaders)) it = iter(tag.metaheaders) self.assertEquals(next(it), '##jacquard.summarize.ZScoreX.X_mean=' + repr(tag._mean)) self.assertEquals(next(it), '##jacquard.summarize.ZScoreX.X_stdev=' + repr(tag._stdev)) self.assertRegexpMatches(next(it), '##FORMAT=<ID=ZScoreX,Number=1,Type=Float,Description="ZScore for X">')
def test_init_metaheaders(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={ "SA": { "X": "4" }, "SB": { "X": "7" } }) rec2 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={ "SA": { "X": "13" }, "SB": { "X": "16" } }) reader = MockVcfReader(records=[rec1, rec2]) tag = zscore_caller._ZScoreTag( "ZScoreX", '##FORMAT=<ID=ZScoreX,Number=1,Type=Float,Description="ZScore for X">', "X", reader) self.assertEquals(3, len(tag.metaheaders)) it = iter(tag.metaheaders) self.assertEquals( next(it), '##jacquard.summarize.ZScoreX.X_mean=' + repr(tag._mean)) self.assertEquals( next(it), '##jacquard.summarize.ZScoreX.X_stdev=' + repr(tag._stdev)) self.assertRegexpMatches( next(it), '##FORMAT=<ID=ZScoreX,Number=1,Type=Float,Description="ZScore for X">' )
def test_init_setsPopulationStatisticsAssignsStddevCorrectlyWhenNoValues( self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={ "SA": { "X": "." }, "SB": { "X": "." } }) reader = MockVcfReader(records=[rec1]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) expected_mean = None expected_stdev = None self.assertEquals(expected_mean, tag._mean) self.assertEquals(expected_stdev, tag._stdev)
def test_init_setsPopulationStatisticsRoundsTo13digits(self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={ "SA": { "X": "1" }, "SB": { "X": "1" }, "SC": { "X": "0" } }) reader = MockVcfReader(records=[rec1]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) self.assertEquals(round(2 / 3, 13), tag._mean, repr(tag._mean)) self.assertEquals(round(stdev([1, 1, 0]), 13), tag._stdev)
def test_init_setsPopulationStatisticsAssignsStddevCorrectlyWhenOneValue( self): rec1 = vcf.VcfRecord("1", "42", "A", "C", sample_tag_values={ "SA": { "X": "4" }, "SB": { "X": "." } }) reader = MockVcfReader(records=[rec1]) tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader) values = [4] self.assertAlmostEquals(mean(values), tag._mean, _ZScoreTag._MAX_PRECISION) self.assertAlmostEquals(stdev(values), tag._stdev, _ZScoreTag._MAX_PRECISION)