Exemplo n.º 1
0
def peak2fasta(peak_ids, ref_genome):
    '''
    Convert peak_id into fasta object.

    Args:
        peak_id (str or list of str): Peak_id.  e.g. "chr5_0930303_9499409"
            or it can be a list of peak_id.  e.g. ["chr5_0930303_9499409", "chr11_123445555_123445577"]

        ref_genome (str): Reference genome name.   e.g. "mm9", "mm10", "hg19" etc

    Returns:
        gimmemotifs fasta object: DNA sequence in fasta format

    '''
    genome_data = Genome(ref_genome)

    def peak2seq(peak_id):
        chromosome_name, start, end = decompose_chrstr(peak_id)
        locus = (int(start), int(end))

        tmp = genome_data[chromosome_name][locus[0]:locus[1]]
        name = f"{tmp.name}_{tmp.start}_{tmp.end}"
        seq = tmp.seq
        return (name, seq)

    if type(peak_ids) is str:
        peak_ids = [peak_ids]

    fasta = Fasta()
    for peak_id in peak_ids:
        name, seq = peak2seq(peak_id)
        fasta.add(name, seq)

    return fasta
Exemplo n.º 2
0
def remove_zero_seq(fasta_object):
    """
    Remove DNA sequence with zero length
    """
    fasta = Fasta()
    for i, seq in enumerate(fasta_object.seqs):
        if seq:
            name = fasta_object.ids[i]
            fasta.add(name, seq)
    return fasta