def get_sequences(filename, header_sep=None): """Determine file type and get sequences""" ext = os.path.splitext(filename)[1] if ext in ['.fa','.faa','.fasta']: seqs = sequtils.fasta_to_dataframe(filename, header_sep=header_sep) #print ('found fasta file') elif ext in ['.gb','.gbk','.genbank','.gbff']: seqs = sequtils.genbank_to_dataframe(filename, cds=True) return seqs
def get_protein_set(): syf = os.path.join(datadir, 'SYF_set.fasta') return sequtils.fasta_to_dataframe(syf)