def permute_cnas(cnas, gene2chromo, chromo2genes): samples2cnas = defaultdict(list) for cna in cnas: samples2cnas[cna.sample].append(cna) permuted_cnas = [] for sample in samples2cnas: chromo2blocks = get_cna_blocks_for_sample(samples2cnas[sample], gene2chromo, chromo2genes) for chromo, blocks in chromo2blocks.iteritems(): genes = chromo2genes[chromo] invalid_indices = [] for block in blocks: permuted_indices = get_block_indices(len(genes), len(block.genes), invalid_indices) for index in permuted_indices: permuted_cnas.append( Mutation(sample, genes[index], block.mut_type)) new_invalid_indices = permuted_indices +\ [min(permuted_indices) - 1, max(permuted_indices) + 1] invalid_indices.extend(new_invalid_indices) return permuted_cnas
def permute_snvs(samples, tested_genes, gene2length, bmr, gene2bmr): permuted_snvs = [] for sample in samples: for gene in tested_genes: gene_bmr = gene2bmr[gene] if gene in gene2bmr else bmr gene_length = gene2length[gene] prob = 1 - pow(1 - gene_bmr, gene_length) if random.random() <= prob: permuted_snvs.append(Mutation(sample, gene, SNV)) return permuted_snvs
def load_snvs(snv_file, gene_wlst=None, sample_wlst=None): """Load SNV data from a file and return as a list of Mutation tuples with mut_type == SNV. Arguments: snv_file -- path to TSV file containing SNVs where the first column of each line is a sample ID and subsequent columns contain the names of SNVs with mutations in that sample. Lines starting with "#" will be ignored. gene_wlist -- whitelist of allowed genes (default None). Genes not in this list will be ignored. If None, all mutated genes will be included. sample_wlist -- whitelist of allowed samples (default None). Samples not in this list will be ignored. If None, all samples will be included. """ with open(snv_file) as f: arrs = [l.rstrip().split("\t") for l in f if not l.startswith("#")] return [ Mutation(arr[0], gene, SNV) for arr in arrs if include(arr[0], sample_wlst) for gene in arr[1:] if include(gene, gene_wlst) ]
def load_cnas(cna_file, gene_wlst=None, sample_wlst=None): """Load CNA data from a file and return as a list of Mutation tuples with mut_type == AMP or DEL. Arguments: cna_file -- path to TSV file containing CNAs where the first column of each line is a sample ID and subsequent columns contain gene names followed by "(A)" or "(D)" indicating an ammplification or deletion in that gene for the sample. Lines starting with '#' will be ignored. gene_wlist -- whitelist of allowed genes (default None). Genes not in this list will be ignored. If None, all mutated genes will be included. sample_wlist -- whitelist of allowed samples (default None). Samples not in this list will be ignored. If None, all samples will be included. """ with open(cna_file) as f: arrs = [l.rstrip().split("\t") for l in f if not l.startswith("#")] return [ Mutation(arr[0], cna.split("(")[0], get_mut_type(cna)) for arr in arrs if include(arr[0], sample_wlst) for cna in arr[1:] if include(cna.split("(")[0], gene_wlst) ]
def load_inactivating_snvs(inactivating_snvs_file, gene_wlst=None, sample_wlst=None): """Load inactivating SNVs from a file and return as a list of Mutation tuples with mut_type == INACTIVE_SNV. Arguments: inactivating_snvs_file -- path to TSV file listing inactivating SNVs where the first column of each line is a gene name and the second column is a sample ID. Lines starting with "#" will be ignored. gene_wlist -- whitelist of allowed genes (default None). Genes not in this list will be ignored. If None, all mutated genes will be included. sample_wlist -- whitelist of allowed samples (default None). Samples not in this list will be ignored. If None, all samples will be included. """ with open(inactivating_snvs_file) as f: arrs = [line.split() for line in f if not line.startswith("#")] return [ Mutation(arr[1], arr[0], INACTIVE_SNV) for arr in arrs if include(arr[1], sample_wlst) and include(arr[0], gene_wlst) ]
def get_invalidated_mutation(mutation): return Mutation(mutation.sample, mutation.gene, mutation.mut_type, False)