示例#1
0
def do_access(fa_fname, exclude_fnames=(), min_gap_size=5000):
    """List the locations of accessible sequence regions in a FASTA file."""
    access_regions = GA.from_rows(get_regions(fa_fname))
    for ex_fname in exclude_fnames:
        excluded = tabio.read(ex_fname, 'bed3')
        access_regions = access_regions.subtract(excluded)
    return GA.from_rows(join_regions(access_regions, min_gap_size))
示例#2
0
 def _from_intervals(coords):
     garr = GA(
         pd.DataFrame(list(coords),
                      columns=['start', 'end',
                               'gene']).assign(chromosome='chr0'))
     garr.sort_columns()
     return garr
示例#3
0
def do_access(fa_fname, exclude_fnames=(), min_gap_size=5000,
              skip_noncanonical=True):
    """List the locations of accessible sequence regions in a FASTA file."""
    fa_regions = get_regions(fa_fname)
    if skip_noncanonical:
        fa_regions = drop_noncanonical_contigs(fa_regions)
    access_regions = GA.from_rows(fa_regions)
    for ex_fname in exclude_fnames:
        excluded = tabio.read(ex_fname, 'bed3')
        access_regions = access_regions.subtract(excluded)
    return GA.from_rows(join_regions(access_regions, min_gap_size))
示例#4
0
def guess_chromosome_regions(targets, telomere_size):
    """Determine (minimum) chromosome lengths from target coordinates."""
    endpoints = [subarr.end.iat[-1] for _c, subarr in targets.by_chromosome()]
    whole_chroms = GA.from_columns({
        'chromosome': targets.chromosome.drop_duplicates(),
        'start': telomere_size,
        'end': endpoints})
    return whole_chroms
示例#5
0
def guess_chromosome_regions(targets, telomere_size):
    """Determine (minimum) chromosome lengths from target coordinates."""
    endpoints = [subarr.end.iat[-1] for _c, subarr in targets.by_chromosome()]
    whole_chroms = GA.from_columns({
        'chromosome': targets.chromosome.drop_duplicates(),
        'start': telomere_size,
        'end': endpoints})
    return whole_chroms
示例#6
0
 def test_residuals(self):
     cnarr = cnvlib.read("formats/amplicon.cnr")
     segments = cnvlib.read("formats/amplicon.cns")
     regions = GenomicArray(segments.data).drop_extra_columns()
     for grouping_arg in (None, segments, regions):
         resid = cnarr.residuals(grouping_arg)
         self.assertAlmostEqual(0, resid.mean(), delta=.3)
         self.assertAlmostEqual(1, np.percentile(resid, 80), delta=.2)
         self.assertAlmostEqual(2, resid.std(), delta=.5)
示例#7
0
 def __init__(self, data_table, meta_dict=None):
     GenomicArray.__init__(self, data_table, meta_dict)
示例#8
0
文件: cnary.py 项目: chapmanb/cnvkit
 def __init__(self, data_table, meta_dict=None):
     GenomicArray.__init__(self, data_table, meta_dict)
示例#9
0
 def _from_intervals(coords):
     garr = GA(pd.DataFrame(list(coords),
                            columns=['start', 'end', 'gene'])
               .assign(chromosome='chr0'))
     garr.sort_columns()
     return garr