Exemplo n.º 1
0
    def __init__(self, config, workflow_name="", validate_barcodes=True):
        self._config = config
        self._workflow_name = workflow_name
        self._locus = None
        
        bc_ids = config.get("BARCODE_IDS", "[]")
        if isinstance(bc_ids, str):
            self._barcode_ids = yaml.load(bc_ids)
        else:
            self._barcode_ids = bc_ids
        
        try:
            with open(config["BARCODES"], "r") as bc_file:
                self._all_barcodes = [line.strip()[1:] for line in bc_file if line.startswith(">")]
        except KeyError:
            self._all_barcodes = []
            #raise WorkflowError("Barcode file not specified")
        except IOError:
            raise WorkflowError("Could not load barcodes")
        
        if validate_barcodes:
            if len(self._barcode_ids) and len(self._all_barcodes):
                assert all((x in self._all_barcodes for x in self._barcode_ids)), "barcode id not in barcode file"
        
            if len(self._barcode_ids) == 0:
                if len(self._all_barcodes) > 0:
                    self._barcode_ids = self._all_barcodes
                else:
                    raise WorkflowError("No valid barcodes provided")

        self._genes = {}
        for locus_file in config.get("LOCI", []):
            try:
                locus = locus_processing.load_locus_yaml(locus_file)
            except IOError:
                raise WorkflowError("Locus definition file {} does not exist".format(locus_file))
            except ValueError:
                raise WorkflowError("{} is not a valid locus definition".format(locus_file))

            self._genes[locus.name] = locus_file
Exemplo n.º 2
0
    phase = int(fields[2].split("haplotype")[-1])
    return counts[(cluster, phase)]


def count_passes(allele_id, phasing):
    if phasing is None:
        return 0

    counts = phasing.groupby(["cluster", "phase"])["np"].sum()
    fields = allele_id.split(".")
    cluster = int(fields[1].split("cluster")[-1])
    phase = int(fields[2].split("haplotype")[-1])
    return counts[(cluster, phase)]


gene = locus_processing.load_locus_yaml(snakemake.input.gene)


def summarize_alleles(barcode):
    alleles = load_alleles(
        next(f for f in snakemake.input.haplotypes if barcode in f))
    vep = load_vep(next(f for f in snakemake.input.vep if barcode in f))
    last = load_last(next(f for f in snakemake.input.last if barcode in f))
    phasing = load_phasing_summary(
        next(f for f in snakemake.input.phasing if barcode in f))

    num_alleles = len(alleles)
    first = True

    allele_info = []
    for allele in alleles:
Exemplo n.º 3
0
"""
Use PyBedTools to generate a fasta file containing the sequence for a single region
"""
from Bio import SeqIO
import pybedtools
import locus_processing
import yaml


locus = locus_processing.load_locus_yaml(snakemake.input.locus)

try:
    with open(snakemake.config["EXPERIMENT"], "r") as infile:
        experiment = yaml.safe_load(infile)
        start_pos = experiment["targets"][0]["primers"][0]["forward"]["start"]
        end_pos = experiment["targets"][0]["primers"][0]["reverse"]["end"]
except (KeyError, IOError):
    start_pos = locus.coordinates.start
    end_pos = locus.coordinates.end


# create a bed tool for the required region
bed_tool = pybedtools.BedTool([(locus.chromosome.name, start_pos - 1, end_pos)])

# associate the bedtool with the reference genome fasta
bed_tool = bed_tool.sequence(fi=snakemake.input.genome)

# get the sequence and save it
with open(snakemake.output[0], "w") as outfile:
    sequence = SeqIO.read(bed_tool.seqfn, "fasta")
    SeqIO.write(sequence, outfile, "fasta")