def fasta(self): """Make a fasta file with all uniprot proteins that are related to this family.""" fasta = FASTA(self.p.proteins) if not fasta.exists: fasta.create() for seq in pfam.fasta: if self.fam_name in seq.description: fasta.add_seq(seq) fasta.close() assert fasta # Return # return fasta
class PairedFASTA(object): """Read and write FASTA file pairs without using too much RAM""" format = 'fasta' def __len__(self): return self.count def __iter__(self): return self.parse() def __nonzero__(self): return bool(self.fwd) and bool(self.rev) def __repr__(self): return '<%s object on "%s" and "%s">' % \ (self.__class__.__name__, self.fwd.path, self.rev.path) def __enter__(self): return self.create() def __exit__(self, exc_type, exc_value, traceback): self.close() @property def exists(self): return self.fwd.exists and self.rev.exists def __init__(self, fwd, rev, parent=None): # FASTA objects # self.fwd = FASTA(fwd) self.rev = FASTA(rev) # Extra # self.gzipped = self.fwd.gzipped self.parent = parent @property_cached def count(self): assert self.fwd.count == self.rev.count return self.fwd.count def open(self): self.fwd.open() self.rev.open() def parse(self): return izip(self.fwd.parse(), self.rev.parse()) def close(self): self.fwd.close() self.rev.close() def create(self): self.fwd.create() self.rev.create() return self def add(self, f, r): return self.add_pair((f, r)) def add_pair(self, pair): self.fwd.add_seq(pair[0]) self.rev.add_seq(pair[1]) def remove(self): self.fwd.remove() self.rev.remove() @property def progress(self): """Just like self.parse but display a progress bar""" return tqdm(self, total=len(self)) def subsample(self, down_to, dest_pair=None): # Check size # assert down_to < len(self) # Make new pair of files # if dest_pair is None: dest_fwd_path = self.fwd_path.new_name_insert("subsampled") dest_rev_path = self.rev_path.new_name_insert("subsampled") dest_pair = self.__class__(dest_fwd_path, dest_rev_path) # Do it # dest_pair.create() for pair in isubsample(self, down_to): dest_pair.add_pair(pair) self.subsampled.close() # Did it work # assert len(dest_pair) == down_to #------------------------------- Extensions ------------------------------# def parse_primers(self, *args, **kwargs): fwd_gen = self.fwd.parse_primers(*args, **kwargs) rev_gen = self.rev.parse_primers(*args, **kwargs) generator = izip(fwd_gen, rev_gen) return GenWithLength(generator, len(fwd_gen))
class PairedFASTA(object): """Read and write FASTA file pairs without using too much RAM""" format = 'fasta' def __len__(self): return self.count def __iter__(self): return self.parse() def __nonzero__(self): return bool(self.fwd) and bool(self.rev) def __repr__(self): return '<%s object on "%s" and "%s">' % \ (self.__class__.__name__, self.fwd.path, self.rev.path) def __enter__(self): return self.create() def __exit__(self, exc_type, exc_value, traceback): self.close() @property def exists(self): return self.fwd.exists and self.rev.exists def __init__(self, fwd, rev, parent=None): # FASTA objects # self.fwd = FASTA(fwd) self.rev = FASTA(rev) # Extra # self.gzipped = self.fwd.gzipped self.parent = parent @property_cached def count(self): assert self.fwd.count == self.rev.count return self.fwd.count def open(self): self.fwd.open() self.rev.open() def parse(self): return izip(self.fwd.parse(), self.rev.parse()) def close(self): self.fwd.close() self.rev.close() def create(self): self.fwd.create() self.rev.create() return self def add(self, f, r): return self.add_pair((f,r)) def add_pair(self, pair): self.fwd.add_seq(pair[0]) self.rev.add_seq(pair[1]) def remove(self): self.fwd.remove() self.rev.remove() @property def progress(self): """Just like self.parse but display a progress bar""" return tqdm(self, total=len(self)) def subsample(self, down_to, dest_pair=None): # Check size # assert down_to < len(self) # Make new pair of files # if dest_pair is None: dest_fwd_path = self.fwd_path.new_name_insert("subsampled") dest_rev_path = self.rev_path.new_name_insert("subsampled") dest_pair = self.__class__(dest_fwd_path, dest_rev_path) # Do it # dest_pair.create() for pair in isubsample(self, down_to): dest_pair.add_pair(pair) self.subsampled.close() # Did it work # assert len(dest_pair) == down_to #------------------------------- Extensions ------------------------------# def parse_primers(self, *args, **kwargs): fwd_gen = self.fwd.parse_primers(*args, **kwargs) rev_gen = self.rev.parse_primers(*args, **kwargs) generator = izip(fwd_gen, rev_gen) return GenWithLength(generator, len(fwd_gen))