def m80_Fasta(): ''' Create a Fasta which doesn't get returned. Access the Fasta through the m80 API ''' # delete the onl m80tools.delete('Fasta','ACGT',force=True) f = Fasta.from_file('ACGT','raw/ACGT.fasta') return True
def print_flank_sequence(vcf,fasta,out,window=30): vcf = pysam.VariantFile(vcf) if not available("Fasta",'temp'): fasta = Fasta.from_file('temp',fasta) else: fasta = Fasta('temp') with open(out,'w') as OUT: for i,var in enumerate(vcf): # Grab the FASTA flank sequence if fasta[var.chrom][var.pos] != var.ref: print(f"{var.id} REF does not match the FASTA. oops.") # Grab the probe sequence anyways kmer = ''.join(fasta[var.chrom][var.pos+1:var.pos+window]).upper() print(">{}.{}_{}\n{}".format(var.chrom, var.pos, "1", kmer),file=OUT) kmer = ''.join(fasta[var.chrom][var.pos-window:var.pos-1]).upper() print(">{}.{}_{}\n{}".format(var.chrom, var.pos, "2", kmer),file=OUT)
def _add_fasta(self, fasta): ''' Add a reference geneome sequence to the database. This reference sequence will be used to sort and conform the genotype (VCF) files used for imputation. A fasta can only be attached once to a hapdab. Parameters ---------- fasta : str, path-like or a locuspocus.Fasta object Path to the fasta file Returns ------- None if successful. See the API for accessing the fasta object. Raises ------ ValueError if a fasta has already been assigned to the database. ''' if 'Fasta' in self._dict: raise ValueError( 'A Fasta has already been assigned to this database!') if os.path.exists(fasta): f = Fasta.from_file(self._m80_name, fasta, parent=self) elif m80.Tools.available('Fasta', fasta): f = Fasta(fasta) os.symlink(f._basedir, os.path.join(self._basedir, f'Fasta.{f._m80_name}')) else: raise ValueError(f'Unable to determine the type of fasta') self._fasta = f # and remember for next time self._dict['Fasta'] = f._m80_name