Python BedTool.BedTool примеры использования

Язык программирования: Python

Пространство имен/Пакет: pybedtools.bedtool

Класс/Тип: BedTool

Метод/Функция: BedTool

Примеров на hotexamples.com: 7

Python BedTool.BedTool - 7 примеров найдено. Это лучшие примеры Python кода для pybedtools.bedtool.BedTool.BedTool, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

from_dataframe(17)

BedTool(7)

sort_values(3)

cat(1)

count(1)

intersect(1)

Пример #1

Показать файл

Файл: parse.py Проект: Clinical-Genomics/cgbeacon2

def genes_to_bedtool(gene_collection, hgnc_ids=None, ensembl_ids=None, build="GRCh37"):
    """Create a Bedtool object with gene coordinates from a list of genes contained in the database

    Accepts:
        hgnc_ids(list): a list of hgnc genes ids
        ensembl_ids(list): a list of ensembl gene ids
        gene_collection(pymongo.collection.Collection)
        build(str): genome build, GRCh37 or GRCh38

    Returns:
        bt(pybedtools.bedtool.BedTool): a BedTool object containing gene intervals
    """
    if not (hgnc_ids or ensembl_ids):
        return None  # No gene was specified to filter VCF file with

    query = {"build": build}
    if hgnc_ids:
        query["hgnc_id"] = {"$in": hgnc_ids}
    elif ensembl_ids:  # either HGNC or ENSEMBL IDs, not both in the query dictionary
        query["ensembl_id"] = {"$in": ensembl_ids}
    # Query database for genes coordinates
    results = gene_collection.find(query)
    # Create a string containing gene intervals to initialize a Bedtool object with
    bedtool_string = ""
    for gene in results:
        bedtool_string += (
            "\t".join([gene["chromosome"], str(gene["start"]), str(gene["end"])]) + "\n"
        )
    if bedtool_string == "":
        return None
    bt = BedTool(bedtool_string, from_string=True)
    return bt

Пример #2

Показать файл

Файл: ctcfs_on_tads.py Проект: lucananni93/CTCF_Spatial_Patterns

def aggregate_by_tad(all_TADs_by_celltype,
                     aggregations,
                     other,
                     extension=0.1,
                     n_windows=100):
    tot_windows = n_windows + int(n_windows * extension) * 2
    tad_start_window = int(n_windows * extension)
    tad_end_window = n_windows + int(n_windows * extension)

    regions = all_TADs_by_celltype[coords + ['tad_uid']].copy()
    regions['tad_uid'] = regions.tad_uid.map(lambda x: x.replace("_", "-"))
    windows = BedTool().window_maker(b=BedTool.from_dataframe(regions)\
                                     .slop(l=extension, r=extension,
                                           pct=True, genome="hg19"),
                                     n=tot_windows, i='srcwinnum')\
                           .to_dataframe(names=coords + ['window_uid'])
    windows_idxs = windows.window_uid.str.split("_", expand=True)
    windows_idxs.columns = ['tad_uid', 'win_num']
    windows = pd.concat((windows, windows_idxs), axis=1)
    windows['win_num'] = windows['win_num'].astype(int)
    windows = windows.sort_values(coords).reset_index(drop=True)

    windows_with_ctcfs = coverage_by_window(windows, other, aggregations)
    aggregations_by_tad = {}
    for c in aggregations.keys():
        print(" " * 100, end='\r')
        print(c, end="\r")
        cagg = windows_with_ctcfs.pivot_table(index='tad_uid',
                                              columns='win_num',
                                              values=c).sort_index(axis=1)
        cagg = cagg.sort_index(axis=1)
        aggregations_by_tad[c] = cagg
    return aggregations_by_tad, tad_start_window, tad_end_window

Пример #3

Показать файл

Файл: aligner.py Проект: Vikash84/MegaPath-Nano

def align_list_to_bed(*, align_list):

    temp_align_list = align_list.assign(assembly_id_sequence_id=lambda x: x[
        'assembly_id'] + ',' + x['sequence_id'])
    temp_bed = BedTool.from_dataframe(temp_align_list[[
        'assembly_id_sequence_id', 'sequence_from', 'sequence_to'
    ]])

    temp_merged_bed = temp_bed.sort().merge()
    if temp_merged_bed.count() > 0:
        temp_merged_bed_df = temp_merged_bed.to_dataframe()
        temp_merged_bed_df = pandas.concat([
            temp_merged_bed_df['chrom'].str.split(
                ',', n=1, expand=True).rename(columns={
                    0: 'assembly_id',
                    1: 'sequence_id'
                }), temp_merged_bed_df[['start', 'end']]
        ],
                                           axis=1)
        bed = BedTool.from_dataframe(
            temp_merged_bed_df[['sequence_id', 'start', 'end', 'assembly_id']])
        os.remove(temp_merged_bed.fn)
    else:
        bed = BedTool('', from_string=True)

    os.remove(temp_bed.fn)

    return bed

Пример #4

Показать файл

def windowing_by_number(all_TADs_by_celltype, n_windows):
    windows = BedTool().window_maker(b=BedTool.from_dataframe(all_TADs_by_celltype), 
                                     n=n_windows, i='srcwinnum')\
                       .to_dataframe(names=all_TADs_by_celltype.columns.tolist())
    idxs = windows[all_TADs_by_celltype.columns[-1]].str.split("_", expand=True)
    tad_ids = idxs.iloc[:, :-1].apply(lambda x: "_".join(x), axis=1)
    w_nums = idxs.iloc[:, -1].astype(int) - 1
    windows[all_TADs_by_celltype.columns[-1]] = tad_ids
    windows['w_num'] = w_nums
    windows = windows.sort_values(coords).reset_index(drop=True)
    return windows

Пример #5

Показать файл

def windowing_by_size(centered_boundaries, window_size):    
    windows = BedTool().window_maker(b=BedTool.from_dataframe(centered_boundaries), 
                                     w=window_size, i='srcwinnum')\
                       .to_dataframe(names=centered_boundaries.columns.tolist())
    idxs = windows[centered_boundaries.columns[-1]].str.split("_", expand=True)
    tad_ids = idxs.iloc[:, :-1].apply(lambda x: "_".join(x), axis=1)
    w_nums = idxs.iloc[:, -1].astype(int) - 1
    windows[centered_boundaries.columns[-1]] = tad_ids
    windows['w_num'] = w_nums
    windows = windows.sort_values(coords).reset_index(drop=True)
    return windows

Пример #6

Показать файл

Файл: parse.py Проект: Clinical-Genomics/cgbeacon2

def merge_intervals(panels):
    """Create genomic intervals to filter VCF files starting from the provided panel file(s)

    Accepts:
        panels(list) : path to one or more panel bed files

    Returns:
        merged_panels(Temp BED File): a temporary file with merged panel intervals

    """
    merged_panels = BedTool(panels[0])
    if len(panels) > 1:
        merged_panels = merged_panels.cat(*panels[1:])

    return merged_panels

Пример #7

Показать файл

Файл: parse.py Проект: Clinical-Genomics/cgbeacon2

def _compute_intersections(vcf_file, filter):
    """Create a temporary file with the gene panel intervals

    Accepts:
        vcf_file(str): path to the VCF file
        filter(BcfTool object)

    Returns:
        intersections()
    """

    vcf_bed = BedTool(vcf_file)
    LOG.info(
        "Extracting %s intervals from the %s total entries of the VCF file.",
        filter.count(),
        vcf_bed.count(),
    )
    intersections = vcf_bed.intersect(filter, header=True)
    intersected_vars = intersections.count()
    LOG.info("Number of variants found in the intervals:%s", intersected_vars)

    return intersections