Пример #1
0
 def records():
     if refseq:
         yield HMMER.valid(refseq, is_dna=is_dna)
     for record in seqrecords:
         if not is_dna and record.seq.alphabet == DNAAlphabet:
             record = translate(record)
         yield HMMER.valid(record, is_dna=is_dna)
Пример #2
0
 def records():
     if refseq:
         yield HMMER.valid(refseq, is_dna=is_dna)
     for record in seqrecords:
         if not is_dna and record.seq.alphabet == DNAAlphabet:
             record = translate(record)
         yield HMMER.valid(record, is_dna=is_dna)
Пример #3
0
def generate_hmm_(opts):
    fd, tmphmm = mkstemp()
    close(fd)
    fd, tmpaln = mkstemp()
    close(fd)

    is_dna = opts.ENCODER == DNAEncoder

    try:
        with open(opts.REFMSA) as msa_fh:
            with open(tmpaln, 'w') as aln_fh:
                msa_fmt = seqfile_format(opts.REFMSA)
                source = Verifier(SeqIO.parse(msa_fh, msa_fmt), DNAAlphabet)
                try:
                    SeqIO.write((record if is_dna else translate(record)
                                 for record in source), aln_fh, 'stockholm')
                except VerifyError:
                    if is_dna:
                        raise RuntimeError(
                            "DNA encoding incompatible with protein reference MSA"
                        )
                    source.set_alphabet(AminoAlphabet)
                    aln_fh.seek(0)
                    SeqIO.write(source, aln_fh, 'stockholm')

        hmmer = HMMER(opts.HMMER_ALIGN_BIN, opts.HMMER_BUILD_BIN)
        hmmer.build(tmphmm,
                    tmpaln,
                    alphabet=HMMER.DNA if is_dna else HMMER.AMINO)
    finally:
        if exists(tmpaln):
            remove(tmpaln)

    return tmphmm
Пример #4
0
def generate_alignment_(seqrecords, hmmfile, opts, refseq=None):
    fd, tmpseq = mkstemp(); close(fd)
    fd, tmpaln = mkstemp(); close(fd)
    finished = False

    is_dna = opts.ENCODER == DNAEncoder
    log = getLogger(IDEPI_LOGGER)

    try:
        # get the FASTA format file so we can HMMER it
        with open(tmpseq, 'w') as seq_fh:

            def records():
                if refseq:
                    yield HMMER.valid(refseq, is_dna=is_dna)
                for record in seqrecords:
                    if not is_dna and record.seq.alphabet == DNAAlphabet:
                        record = translate(record)
                    yield HMMER.valid(record, is_dna=is_dna)

            SeqIO.write(records(), seq_fh, 'fasta')

        log.debug('aligning sequences')

        hmmer = HMMER(opts.HMMER_ALIGN_BIN, opts.HMMER_BUILD_BIN)
        hmmer.align(
            hmmfile,
            tmpseq,
            output=tmpaln,
            alphabet=HMMER.DNA if is_dna else HMMER.AMINO,
            outformat=HMMER.PFAM
        )

        # rename the final alignment to its destination
        finished = True
    finally:
        # cleanup these files
        if exists(tmpseq):
            remove(tmpseq)

    if not finished:
        raise RuntimeError("failed to generate alignment")

    return tmpaln
Пример #5
0
def generate_alignment_(seqrecords, hmmfile, opts, refseq=None):
    fd, tmpseq = mkstemp()
    close(fd)
    fd, tmpaln = mkstemp()
    close(fd)
    finished = False

    is_dna = opts.ENCODER == DNAEncoder
    log = getLogger(IDEPI_LOGGER)

    try:
        # get the FASTA format file so we can HMMER it
        with open(tmpseq, 'w') as seq_fh:

            def records():
                if refseq:
                    yield HMMER.valid(refseq, is_dna=is_dna)
                for record in seqrecords:
                    if not is_dna and record.seq.alphabet == DNAAlphabet:
                        record = translate(record)
                    yield HMMER.valid(record, is_dna=is_dna)

            SeqIO.write(records(), seq_fh, 'fasta')

        log.debug('aligning sequences')

        hmmer = HMMER(opts.HMMER_ALIGN_BIN, opts.HMMER_BUILD_BIN)
        hmmer.align(hmmfile,
                    tmpseq,
                    output=tmpaln,
                    alphabet=HMMER.DNA if is_dna else HMMER.AMINO,
                    outformat=HMMER.PFAM)

        # rename the final alignment to its destination
        finished = True
    finally:
        # cleanup these files
        if exists(tmpseq):
            remove(tmpseq)

    if not finished:
        raise RuntimeError("failed to generate alignment")

    return tmpaln
Пример #6
0
def generate_hmm_(opts):
    fd, tmphmm = mkstemp(); close(fd)
    fd, tmpaln = mkstemp(); close(fd)

    is_dna = opts.ENCODER == DNAEncoder

    try:
        with open(opts.REFMSA) as msa_fh:
            with open(tmpaln, 'w') as aln_fh:
                msa_fmt = seqfile_format(opts.REFMSA)
                source = Verifier(SeqIO.parse(msa_fh, msa_fmt), DNAAlphabet)
                try:
                    SeqIO.write(
                        (record if is_dna else translate(record) for record in source),
                        aln_fh,
                        'stockholm')
                except VerifyError:
                    if is_dna:
                        raise RuntimeError("DNA encoding incompatible with protein reference MSA")
                    source.set_alphabet(AminoAlphabet)
                    aln_fh.seek(0)
                    SeqIO.write(
                        source,
                        aln_fh,
                        'stockholm')

        hmmer = HMMER(opts.HMMER_ALIGN_BIN, opts.HMMER_BUILD_BIN)
        hmmer.build(
            tmphmm,
            tmpaln,
            alphabet=HMMER.DNA if is_dna else HMMER.AMINO
            )
    finally:
        if exists(tmpaln):
            remove(tmpaln)

    return tmphmm