def _small_file_innerdist(start, fastq_file, pair_file, ref_file, out_base, out_dir, config, remove_workdir=False): work_dir = os.path.join(out_dir, "innerdist_estimate") if os.path.exists(work_dir): shutil.rmtree(work_dir) safe_makedir(work_dir) extra_args = ["-s", str(start), "-u", "250000"] bowtie_runner = _select_bowtie_version(config) out_sam = bowtie_runner.align(fastq_file, pair_file, ref_file, out_base, work_dir, config, extra_args) dists = [] with closing(pysam.Samfile(out_sam)) as work_sam: for read in work_sam: if read.is_proper_pair and read.is_read1: dists.append(abs(read.isize) - 2 * read.rlen) if dists: dist_stats = Stats(dists) return int(round(dist_stats.mean())), int( round(dist_stats.standard_deviation())) else: return None, None
def _estimate_paired_innerdist(fastq_file, pair_file, ref_file, out_base, out_dir, config): """Use Bowtie to estimate the inner distance of paired reads. """ # skip initial reads for large file, but not for smaller dists = _bowtie_for_innerdist("1000000", fastq_file, pair_file, ref_file, out_base, out_dir, config) if len(dists) == 0: dists = _bowtie_for_innerdist("1", fastq_file, pair_file, ref_file, out_base, out_dir, config, True) dist_stats = Stats(dists) return int(round(dist_stats.mean())), int(round(dist_stats.standard_deviation()))
def _estimate_paired_innerdist(fastq_file, pair_file, ref_file, out_base, out_dir, config): """Use Bowtie to estimate the inner distance of paired reads. """ # skip initial reads for large file, but not for smaller dists = _bowtie_for_innerdist("1000000", fastq_file, pair_file, ref_file, out_base, out_dir, config) if len(dists) == 0: dists = _bowtie_for_innerdist("1", fastq_file, pair_file, ref_file, out_base, out_dir, config, True) dist_stats = Stats(dists) return int(round(dist_stats.mean())), int( round(dist_stats.standard_deviation()))
def _bowtie_for_innerdist(start, fastq_file, pair_file, ref_file, out_base, out_dir, data, remove_workdir=False): work_dir = os.path.join(out_dir, "innerdist_estimate") if os.path.exists(work_dir): shutil.rmtree(work_dir) safe_makedir(work_dir) extra_args = ["-s", str(start), "-u", "250000"] ref_file, bowtie_runner = _determine_aligner_and_reference(ref_file, data["config"]) out_sam = bowtie_runner.align(fastq_file, pair_file, ref_file, {"lane": out_base}, work_dir, data, extra_args) dists = [] with closing(pysam.Samfile(out_sam)) as work_sam: for read in work_sam: if read.is_proper_pair and read.is_read1: dists.append(abs(read.isize) - 2 * read.rlen) if dists: dist_stats = Stats(dists) return int(round(dist_stats.mean())), int(round(dist_stats.standard_deviation())) else: return None, None
def descriptive_stats(xs): if len(xs) < 2: return xs calc = Stats(xs) parts = ["min: %s" % min(xs), "5%%: %s" % calc.percentile(5), "25%%: %s" % calc.percentile(25), "median: %s" % calc.percentile(50), "75%%: %s" % calc.percentile(75), "95%%: %s" % calc.percentile(95), "99%%: %s" % calc.percentile(99), "max: %s" % max(xs)] return "\n".join([" " + x for x in parts])
def descriptive_stats(xs): if len(xs) < 2: return xs calc = Stats(xs) parts = [ "min: %s" % min(xs), "5%%: %s" % calc.percentile(5), "25%%: %s" % calc.percentile(25), "median: %s" % calc.percentile(50), "75%%: %s" % calc.percentile(75), "95%%: %s" % calc.percentile(95), "99%%: %s" % calc.percentile(99), "max: %s" % max(xs) ] return "\n".join([" " + x for x in parts])
class TestPyDescriptiveStatistics(unittest.TestCase): def setUp(self): self.enum = Enum([2, 6, 9, 3, 5, 1, 8, 3, 6, 9, 2]) def test_number(self): self.assertEqual(self.enum.number(), 11) def test_sum(self): self.assertEqual(self.enum.sum(), 54) def test_mean(self): self.assertEqual(self.enum.mean(), 4.909090909090909) def test_median(self): self.assertEqual(self.enum.median(), 5.0) def test_variance(self): self.assertEqual(self.enum.variance(), 7.7190082644628095) def test_standard_deviation(self): self.assertEqual(self.enum.standard_deviation(), 2.778310325442932) def test_percentile(self): self.assertEqual(self.enum.percentile(70), 6.0)
class TestPyDescriptiveStatistics(unittest.TestCase): def setUp(self): self.enum = Enum([2,6,9,3,5,1,8,3,6,9,2]) def test_number(self): self.assertEqual(self.enum.number(), 11) def test_sum(self): self.assertEqual(self.enum.sum(), 54) def test_mean(self): self.assertEqual(self.enum.mean(), 4.909090909090909) def test_median(self): self.assertEqual(self.enum.median(), 5.0) def test_variance(self): self.assertEqual(self.enum.variance(), 7.7190082644628095) def test_standard_deviation(self): self.assertEqual(self.enum.standard_deviation(), 2.778310325442932) def test_percentile(self): self.assertEqual(self.enum.percentile(70), 6.0)
#! /usr/bin/python import subprocess from py_descriptive_statistics import Enum # calculates variance using https://github.com/gleicon/py_descriptive_statistics city = 'cincinnati' last = subprocess.check_output(["head", "-n", "1", "{}pair.txt".format(city)]) now = subprocess.check_output(["tail", "-n", "1", "{}pair.txt".format(city)]) last = float(last) now = float(now) enum = Enum([last, now]) rounded = round(enum.variance()) print rounded
import sys sys.path.append("..") from py_descriptive_statistics import Enum enum = Enum([2,6,9,3,5,1,8,3,6,9,2]) print enum.number() print enum.sum() print enum.mean() print enum.median() print enum.variance() print enum.standard_deviation() print enum.percentile(70) print enum.percentile(95) print enum.percentile(99)
def setUp(self): self.enum = Enum([2, 6, 9, 3, 5, 1, 8, 3, 6, 9, 2])
#! /usr/bin/python import subprocess from py_descriptive_statistics import Enum # calculates variance using https://github.com/gleicon/py_descriptive_statistics city = 'cincinnati' last = subprocess.check_output(["head", "-n", "1", "{}pair.txt".format(city)]) now = subprocess.check_output(["tail", "-n", "1", "{}pair.txt".format(city)]) last = float(last) now = float(now) enum = Enum([last,now]) rounded = round(enum.variance()) print rounded
import sys sys.path.append("..") from py_descriptive_statistics import Enum enum = Enum([2, 6, 9, 3, 5, 1, 8, 3, 6, 9, 2]) print enum.number() print enum.sum() print enum.mean() print enum.median() print enum.variance() print enum.standard_deviation() print enum.percentile(70) print enum.percentile(95) print enum.percentile(99)
#! /usr/bin/python import subprocess from py_descriptive_statistics import Enum # calculates standard deviation (root of variance?) using https://github.com/gleicon/py_descriptive_statistics city = 'cincinnati' last = subprocess.check_output(["head", "-n", "1", "{}pair.txt".format(city)]) now = subprocess.check_output(["tail", "-n", "1", "{}pair.txt".format(city)]) last = float(last) now = float(now) enum = Enum([last, now]) rounded = round(enum.standard_deviation()) # rounds the number cleanly print rounded
def setUp(self): self.enum = Enum([2,6,9,3,5,1,8,3,6,9,2])
#! /usr/bin/python import subprocess from py_descriptive_statistics import Enum # calculates standard deviation (root of variance?) using https://github.com/gleicon/py_descriptive_statistics city = 'cincinnati' last = subprocess.check_output(["head", "-n", "1", "{}pair.txt".format(city)]) now = subprocess.check_output(["tail", "-n", "1", "{}pair.txt".format(city)]) last = float(last) now = float(now) enum = Enum([last,now]) rounded = round(enum.standard_deviation()) # rounds the number cleanly print rounded