Exemplo n.º 1
0
def get_fastx_entries(fastx_filename,
                      fasta=False,
                      fastq=False):
    """
    Get entries of FASTQ/FASTA file.

    if fasta=True, read file as fasta regardless of extension.
    if fastq=True, read file as fastq regardless of extension
    """
    entries = []
    fastx_type = get_fastx_type(fastx_filename)
    
    if (fastx_type == "fasta") or fasta:
        # It's a FASTA file
        entries = fasta_utils.read_fasta(fastx_filename)
    elif (fastx_type == "fastq") or fastq:
        # It's a FASTQ file
        entries = fastq_utils.read_fastq(fastx_filename)
    return entries
Exemplo n.º 2
0
def download_misc_seqs(genome, output_dir):
    """
    Download assorted sequences related to genome.
    """
    # Mapping from sequence label (e.g. rRNA)
    # to accession numbers
    organism = None
    if genome.startswith("hg"):
        organism = "human"
    elif genome.startswith("mm"):
        organism = "mouse"
    else:
        print "Error: Unsupported genome."
        sys.exit(1)
    # Fetch the accession numbers for the organism's
    # misc sequences and download them
    misc_seqs = NCBI_MISC_SEQS[organism]
    ncbi_outdir = os.path.join(output_dir, "ncbi")
    misc_outdir = os.path.join(output_dir, "misc")
    utils.make_dir(ncbi_outdir)
    utils.make_dir(misc_outdir)
    for seq_label, access_id in misc_seqs.iteritems():
        if access_id is None:
            continue
        output_filename = os.path.join(misc_outdir, "%s.fa" %(seq_label))
        if os.path.isfile(output_filename):
            print "%s exists. Skipping download.." %(seq_label)
            continue
        print "Downloading: %s (NCBI: %s)" %(seq_label,
                                             access_id)
        url_filename = download_ncbi_fasta(access_id, ncbi_outdir)
        fasta_in = fasta_utils.read_fasta(url_filename)
        fasta_out = open(output_filename, "w")
        print "  - Writing to: %s" %(output_filename)
        # Fetch first FASTA record
        rec = fasta_in.next()
        curr_label, fasta_seq = rec
        # Output it with the required label
        new_rec = (">%s" %(seq_label), fasta_seq)
        fasta_utils.write_fasta(fasta_out, [new_rec])
Exemplo n.º 3
0
 def __init__(self, fasta_fname):
     self.fasta_fname = fasta_fname
     self.seqs = fasta_utils.read_fasta(self.fasta_fname)
Exemplo n.º 4
0
 def __init__(self, fasta_fname):
     self.fasta_fname = fasta_fname
     self.seqs = fasta_utils.read_fasta(self.fasta_fname)