示例#1
0
def run_makedb(fasta, db=None, **kwargs):
    '''Format database from a fasta file.

    This is similar to running ``diamond makedb --in db.faa --db db``.

    Parameters
    ----------
    fasta : str
        Input path for the fasta file.
    db : str or None (default)
        Output path for the formatted database file. It will be named
        after input file in the same directory by default.
    kwargs : dict
        keyword arguments. Other command line parameters for diamond makedb.

    Returns
    -------
    `Dumpling`
    '''
    logger = getLogger(__name__)
    if db is None:
        db = splitext(fasta)[0]
    makedb = Dumpling(['diamond', 'makedb'],
                      params=Parameters(*makedb_params),
                      version='0.7.12',
                      url='https://github.com/bbuchfink/diamond')
    makedb.update(fasta=fasta, db=db, **kwargs)
    logger.info('Running {}'.format(makedb.command))
    makedb()
    return makedb
示例#2
0
def run_blast(query, daa, aligner='blastp', **kwargs):
    '''Search query sequences against the database.

    Parameters
    ----------
    query : str
        The file path of the query seq
    daa : str
        The file path of the output daa file
    aligner : str
        The aligner. blastp or blastx
    kwargs : dict
        keyword arguments. Command line parameters for diamond blastp
        or blastx.
    Returns
    -------
    str
        The file path of the blast result.
    '''
    logger = getLogger(__name__)

    blast = Dumpling(['diamond', aligner],
                     params=Parameters(*blast_params))
    blast.update(query=query, daa=daa, **kwargs)
    logger.info('Running {}'.format(blast.command))
    blast()
    return blast
示例#3
0
def run(db, query, out_dir, **kwargs):
    '''Scan a fasta file against a covariance model database.

    Parameters
    ----------
    db : str
        The file path to HMM database.
    query : str
        Input fasta file.
    out_dir : str
        dir to store output file path of target hits table.
    kwargs : dict
        Other command line parameters for hmmscan. key is the option
        (e.g. "-T") and value is the value for the option (e.g. "50").
        If the option is a flag, set the value to None.

    Returns
    -------
    `Dumpling`
    '''
    logger = getLogger(__name__)
    prefix = splitext(basename(query))[0]
    out = join(out_dir, '{}.tblout'.format(prefix))
    hmmscan = Dumpling('hmmscan', params=Parameters(*_scan_params))
    hmmscan.update(query=query, db=db, out=out, **kwargs)
    logger.info('Running {}'.format(hmmscan.command))
    hmmscan()
    return hmmscan
示例#4
0
def run(db, query, out_dir, **kwargs):
    '''Scan a fasta file against a covariance model database.

    Parameters
    ----------
    db : str
        The file path to HMM database.
    query : str
        Input fasta file.
    out_dir : str
        dir to store output file path of target hits table.
    kwargs : dict
        Other command line parameters for hmmscan. key is the option
        (e.g. "-T") and value is the value for the option (e.g. "50").
        If the option is a flag, set the value to None.

    Returns
    -------
    `Dumpling`
    '''
    logger = getLogger(__name__)
    prefix = splitext(basename(query))[0]
    out = join(out_dir, '{}.tblout'.format(prefix))
    hmmscan = Dumpling('hmmscan', params=Parameters(*_scan_params))
    hmmscan.update(query=query, db=db, out=out, **kwargs)
    logger.info('Running {}'.format(hmmscan.command))
    hmmscan()
    return hmmscan
示例#5
0
def run(db, query, out_dir, **kwargs):
    '''Scan a fasta file against a covariance model database.

    Parameters
    ----------
    db : str
        The file path to CM database.
    query : str
        Input fasta file.
    out_dir : str
        dir to store output file path of target hits table.
    kwargs : dict
        keyword arguments. command line parameters for cmscan.

    Returns
    -------
    `Dumpling`
    '''
    logger = getLogger(__name__)
    prefix = splitext(basename(query))[0]
    out = join(out_dir, '{}.tblout'.format(prefix))
    cmscan = Dumpling('cmscan', params=Parameters(*cmscan_params))
    cmscan.update(query=query, db=db, out=out, **kwargs)
    logger.info('Running {}'.format(cmscan.command))
    cmscan()
    return cmscan
示例#6
0
def run_blast(query, daa, aligner='blastp', **kwargs):
    '''Search query sequences against the database.

    Parameters
    ----------
    query : str
        The file path of the query seq
    daa : str
        The file path of the output daa file
    aligner : str
        The aligner. blastp or blastx
    kwargs : dict
        keyword arguments. Command line parameters for diamond blastp
        or blastx.
    Returns
    -------
    str
        The file path of the blast result.
    '''
    logger = getLogger(__name__)

    blast = Dumpling(['diamond', aligner], params=Parameters(*blast_params))
    blast.update(query=query, daa=daa, **kwargs)
    logger.info('Running {}'.format(blast.command))
    blast()
    return blast
示例#7
0
def run_makedb(fasta, db=None, **kwargs):
    '''Format database from a fasta file.

    This is similar to running ``diamond makedb --in db.faa --db db``.

    Parameters
    ----------
    fasta : str
        Input path for the fasta file.
    db : str or None (default)
        Output path for the formatted database file. It will be named
        after input file in the same directory by default.
    kwargs : dict
        keyword arguments. Other command line parameters for diamond makedb.

    Returns
    -------
    `Dumpling`
    '''
    logger = getLogger(__name__)
    if db is None:
        db = splitext(fasta)[0]
    makedb = Dumpling(['diamond', 'makedb'], params=Parameters(*makedb_params),
                      version='0.7.12', url='https://github.com/bbuchfink/diamond')
    makedb.update(fasta=fasta, db=db, **kwargs)
    logger.info('Running {}'.format(makedb.command))
    makedb()
    return makedb
示例#8
0
def run(query, out_dir, cpus=1, **kwargs):
    '''Run prodigal for gene prediction.

    Notes
    -----
    It will create 3 output files with prefix of "prodigal" in "out_dir" folder:
      1. the annotation file (GFF3 file by default)
      2. the nucleotide sequences for each predicted gene
         with file suffix of .fna.
      3. the protein sequence translated from each gene
         with file suffix of .faa.

    Parameters
    ----------
    query : str
        input file path of sequence
    out_dir : str
        output dir
    cpus : int
        Prodigal does not have a param to set up how many CPU cores. This is a fake parameter
        to make it conform to the same API with other apps for the sake of convenience.
    kwargs : dict
        keyword arguments for Prodigal.

    Returns
    -------
    `Dumpling`
    '''
    logger = getLogger(__name__)

    makedirs(out_dir, exist_ok=True)

    prodigal = Dumpling('prodigal',
                        params=Parameters(*params),
                        version='v2.6.3',
                        url='https://github.com/hyattpd/Prodigal')
    # set default output to gff and run mode to draft genome
    prodigal.update(query=query, fmt='gff', mode='single')
    # update with kwargs
    prodigal.update(**kwargs)

    suffices = {
        '-a': 'faa',
        # output file of nucleotide sequences of genes
        '-d': 'fna',
        '-o': prodigal.params['fmt'].value
    }

    for flag in ['-a', '-d', '-o']:
        p = prodigal.params[flag]
        if p.is_off():
            p.on(join(out_dir, 'prodigal.{}'.format(suffices[flag])))

    logger.info('Running CDS prediction {}'.format(prodigal.command))
    prodigal(stdout=join(out_dir, 'prodigal.log'))
    return prodigal
示例#9
0
def run(query, out_dir, cpus=1, **kwargs):
    """Run prodigal for gene prediction.

    Notes
    -----
    It will create 3 output files with prefix of "prodigal" in "out_dir" folder:
      1. the annotation file (GFF3 file by default)
      2. the nucleotide sequences for each predicted gene
         with file suffix of .fna.
      3. the protein sequence translated from each gene
         with file suffix of .faa.

    Parameters
    ----------
    query : str
        input file path of sequence
    out_dir : str
        output dir
    cpus : int
        Prodigal does not have a param to set up how many CPU cores. This is a fake parameter
        to make it conform to the same API with other apps for the sake of convenience.
    kwargs : dict
        keyword arguments for Prodigal.

    Returns
    -------
    `Dumpling`
    """
    logger = getLogger(__name__)

    makedirs(out_dir, exist_ok=True)

    prodigal = Dumpling(
        "prodigal", params=Parameters(*params), version="v2.6.3", url="https://github.com/hyattpd/Prodigal"
    )
    # set default output to gff and run mode to draft genome
    prodigal.update(query=query, fmt="gff", mode="single")
    # update with kwargs
    prodigal.update(**kwargs)

    suffices = {
        "-a": "faa",
        # output file of nucleotide sequences of genes
        "-d": "fna",
        "-o": prodigal.params["fmt"].value,
    }

    for flag in ["-a", "-d", "-o"]:
        p = prodigal.params[flag]
        if p.is_off():
            p.on(join(out_dir, "prodigal.{}".format(suffices[flag])))

    logger.info("Running CDS prediction {}".format(prodigal.command))
    prodigal(stdout=join(out_dir, "prodigal.log"))
    return prodigal
示例#10
0
def run(query, out_dir, cpus=1, gff=True, **kwargs):
    '''Predict CRISPRs for the input file.

    Notes
    -----
    It will create 1 or 2 output files, depending on the parameters:

      1. file containing CRIPSR information, including locations of CRISPRs
         and their sequence composition OR

         GFF file with short information on CRISPR locations OR

         GFFFull file with detailed information on CRISPR locations

      2. (OPTIONAL; -spacers flag) Fasta file of predicted CRISPR spacers

    Parameters
    ----------
    query : str
        input file path of nucleotide sequence
    out_dir : str
        output dir
    cpus : int
        Minced does not have a param to set up how many CPU cores. This is a fake parameter
        to make it conform to the same API with other apps for the sake of convenience.
    gff : bool
        output in gff3 format
    kwargs : dict
        keyword arguments. Other command line parameters for MinCED. key is the option
        (e.g. "-searchWL") and value is the value for the option (e.g. "6").
        If the option is a flag, set the value to None.

    Returns
    -------
    '''
    logger = getLogger(__name__)
    minced = Dumpling('minced', params=Parameters(*params),
                      version='0.2.0', url='https://github.com/ctSkennerton/minced')
    prefix = splitext(basename(query))[0]
    out = join(out_dir, '{}.gff'.format(prefix))
    minced.update(query=query, out=out, gff=gff, **kwargs)
    logger.info('Running {}'.format(minced.command))
    p = minced()
    if not exists(out):
        # minced raises Java error but return 0 exit code if the input file is invalid,
        # so raise the exception for minced manually if no output is created.
        p.returncode = 1
        raise CalledProcessError(
            p.returncode,
            cmd=repr(p.args))
    return minced
示例#11
0
def run(query, out_dir, cpus=1, gff=True, **kwargs):
    """Predict CRISPRs for the input file.

    Notes
    -----
    It will create 1 or 2 output files, depending on the parameters:

      1. file containing CRIPSR information, including locations of CRISPRs
         and their sequence composition OR

         GFF file with short information on CRISPR locations OR

         GFFFull file with detailed information on CRISPR locations

      2. (OPTIONAL; -spacers flag) Fasta file of predicted CRISPR spacers

    Parameters
    ----------
    query : str
        input file path of nucleotide sequence
    out_dir : str
        output dir
    cpus : int
        Minced does not have a param to set up how many CPU cores. This is a fake parameter
        to make it conform to the same API with other apps for the sake of convenience.
    gff : bool
        output in gff3 format
    kwargs : dict
        keyword arguments. Other command line parameters for MinCED. key is the option
        (e.g. "-searchWL") and value is the value for the option (e.g. "6").
        If the option is a flag, set the value to None.

    Returns
    -------
    """
    logger = getLogger(__name__)
    minced = Dumpling(
        "minced", params=Parameters(*params), version="0.2.0", url="https://github.com/ctSkennerton/minced"
    )
    prefix = splitext(basename(query))[0]
    out = join(out_dir, "{}.gff".format(prefix))
    minced.update(query=query, out=out, gff=gff, **kwargs)
    logger.info("Running {}".format(minced.command))
    p = minced()
    if not exists(out):
        # minced raises Java error but return 0 exit code if the input file is invalid,
        # so raise the exception for minced manually if no output is created.
        p.returncode = 1
        raise CalledProcessError(p.returncode, cmd=repr(p.args))
    return minced
示例#12
0
def run_hmmpress(hmm, force=False):
    '''Compress the HMM database.

    Parameters
    ----------
    hmm : str
        The file path to HMM database.
    force : boolean
        Whether to overwrite.'''
    params = [
        OptionParam('-f', name='force', help='force overwrite'),
        ArgmntParam(name='hmm', help='hmm file to press')]
    hmmpress = Dumpling('hmmpress', params=Parameters(*params))
    hmmpress.update(hmm=hmm, force=force)
    hmmpress()
    return hmmpress
示例#13
0
def run_cmpress(cm, force=False):
    '''Compress the CM database.

    Parameters
    ----------
    cm : str
        The file path to CM database.
    force : boolean
        Whether to overwrite.'''
    params = [
        OptionParam('-F', name='force', help='force overwrite'),
        ArgmntParam(name='cm', help='cm file to press')]
    cmpress = Dumpling('cmpress', params=Parameters(*params))
    cmpress.update(cm=cm, force=force)
    cmpress()
    return cmpress
示例#14
0
def run_view(daa, out, fmt='sam', **kwargs):
    '''Convert Diamond daa file to a human readable output.

    Parameters
    ----------
    daa : str
        Input file resulting from diamond blast.
    out : str
        Output file.
    '''
    logger = getLogger(__name__)
    view = Dumpling(['diamond', 'view'], params=Parameters(*view_params))
    view.update(daa=daa, out=out, fmt=fmt, **kwargs)
    logger.info('Running {}'.format(view.command))
    view()
    return view
示例#15
0
def run_hmmpress(hmm, force=False):
    '''Compress the HMM database.

    Parameters
    ----------
    hmm : str
        The file path to HMM database.
    force : boolean
        Whether to overwrite.'''
    params = [
        OptionParam('-f', name='force', help='force overwrite'),
        ArgmntParam(name='hmm', help='hmm file to press')
    ]
    hmmpress = Dumpling('hmmpress', params=Parameters(*params))
    hmmpress.update(hmm=hmm, force=force)
    hmmpress()
    return hmmpress
示例#16
0
def run_view(daa, out, fmt='sam', **kwargs):
    '''Convert Diamond daa file to a human readable output.

    Parameters
    ----------
    daa : str
        Input file resulting from diamond blast.
    out : str
        Output file.
    '''
    logger = getLogger(__name__)
    view = Dumpling(['diamond', 'view'],
                    params=Parameters(*view_params))
    view.update(daa=daa, out=out, fmt=fmt, **kwargs)
    logger.info('Running {}'.format(view.command))
    view()
    return view
示例#17
0
文件: cmscan.py 项目: RNAer/dumpling
def scan_file(query, db, cpu=1, **kwargs):
    cmscan = Dumpling('cmscan', params=Parameters(*_params))
    cmscan.update(query=query, db=db, **kwargs)
    return cmscan()
示例#18
0
文件: cmscan.py 项目: RNAer/dumpling
def scan_seq(seq, db, cpu=1, **kwargs):
    cmscan = Dumpling('cmscan', params=Parameters(*_params))
    with NamedTemporaryFile(mode='w+') as i:
        write(seq, into=i.name, format='fasta')
        cmscan.update(query=i.name, db=db, **kwargs)
        return cmscan()