Exemplo n.º 1
0
def exportMotifDiscoverySequences(infile, outfile):
    '''export sequences for motif discovery.

    This method requires the _interval tables.

    For motif discovery, only the sequences with the highest S/N ratio
    are supplied.

    1. The top *motifs_proportion* intervals sorted by peakval
    2. Only a region +/- *motifs_halfwidth* around the peak
    3. At least *motifs_min_sequences*. If there are not enough sequences
          to start with, all will be used.
    4. At most *motifs_max_size* sequences will be output.

    '''
    track = P.snip(infile, "_intervals.load")
    dbhandle = connect()

    p = P.substituteParameters(**locals())
    nseq = PipelineMotifs.writeSequencesForIntervals(
        track,
        outfile,
        dbhandle,
        full=False,
        masker=P.asList(p['motifs_masker']),
        halfwidth=int(p["motifs_halfwidth"]),
        maxsize=int(p["motifs_max_size"]),
        proportion=p["motifs_proportion"],
        min_sequences=p["motifs_min_sequences"],
        num_sequences=p["motifs_num_sequences"],
        order=p['motifs_score'])

    if nseq == 0:
        E.warn("%s: no sequences - meme skipped" % outfile)
        P.touch(outfile)
Exemplo n.º 2
0
def run_mapping(infile, outfile):
    ''' Map reads with the specified read mapper '''

    if PARAMS["mapper"] == "star":
        job_threads = PARAMS["star_threads"]
        job_memory = PARAMS["star_memory"]
        star_mapping_genome = PARAMS["star_genome"] or PARAMS["genome"]
        m = PipelineMapping.STAR(
            executable=P.substituteParameters(**locals())["star_executable"],
            strip_sequence=0)

    elif PARAMS["mapper"] == "bowtie":
        job_threads = PARAMS["bowtie_threads"]
        job_memory = PARAMS["bowtie_memory"]

        m = PipelineMapping.Bowtie(executable="bowtie",
                                   tool_options=PARAMS["bowtie_options"],
                                   strip_sequence=0)

        genome = PARAMS["bowtie_genome"]
        reffile = os.path.join(PARAMS["bowtie_index_dir"],
                               PARAMS["bowtie_genome"] + ".fa")

    statement = m.build((infile, ), outfile)

    P.run()
Exemplo n.º 3
0
def exportMotifDiscoverySequences(infile, outfile):
    '''export sequences for motif discovery.

    This method requires the _interval tables.

    For motif discovery, only the sequences with the highest S/N ratio
    are supplied.

    1. The top *motifs_proportion* intervals sorted by peakval
    2. Only a region +/- *motifs_halfwidth* around the peak
    3. At least *motifs_min_sequences*. If there are not enough sequences
          to start with, all will be used.
    4. At most *motifs_max_size* sequences will be output.

    '''
    track = P.snip(infile, "_intervals.load")
    dbhandle = connect()

    p = P.substituteParameters(**locals())
    nseq = PipelineMotifs.writeSequencesForIntervals(
        track,
        outfile,
        dbhandle,
        full=False,
        masker=P.asList(
            p['motifs_masker']),
        halfwidth=int(
            p["motifs_halfwidth"]),
        maxsize=int(
            p["motifs_max_size"]),
        proportion=p[
            "motifs_proportion"],
        min_sequences=p[
            "motifs_min_sequences"],
        num_sequences=p[
            "motifs_num_sequences"],
        order=p['motifs_score'])

    if nseq == 0:
        E.warn("%s: no sequences - meme skipped" % outfile)
        P.touch(outfile)
Exemplo n.º 4
0
def mapReadsWithHisat(infiles, outfile):
    '''
    Map reads using Hisat  (spliced reads).

    Parameters
    ----------
    infiles: list
        contains two filenames -

    infiles[0]: str
        filename of reads file
        can be :term:`fastq`, :term:`sra`, csfasta

    infiles[1]: str
        filename with suffix .junctions containing a list of known
        splice junctions.

    hisat_threads: int
        :term:`PARAMS`
        number of threads with which to run hisat

    hisat_memory: str
        :term:`PARAMS`
        memory required for hisat job

    hisat_executable: str
        :term:`PARAMS`
        path to hisat executable

    hisat_library_type: str
        :term:`PARAMS`
        hisat rna-strandess parameter, see
        https://ccb.jhu.edu/software/hisat/manual.shtml#command-line

    hisat_options: str
        options string for hisat, see
        https://ccb.jhu.edu/software/hisat/manual.shtml#command-line

    hisat_index_dir: str
        path to directory containing hisat indices

    strip_sequence: bool
        :term:`PARAMS`
        if set, strip read sequence and quality information

    outfile: str
        :term:`bam` filename to write the mapped reads in bam format.

    .. note::
    If hisat fails with an error such as::

       Error: segment-based junction search failed with err =-6
       what():  std::bad_alloc

    it means that it ran out of memory.

    '''

    job_threads = PARAMS["hisat_threads"]
    job_memory = PARAMS["hisat_memory"]

    m = PipelineMapping.Hisat(
        executable=P.substituteParameters(
            **locals())["hisat_executable"],
        strip_sequence=PARAMS["strip_sequence"])

    infile, junctions = infiles
    infile = P.snip(infile, ".subset") + ".fastq.gz"
    if not os.path.exists(infile):
        infile = P.snip(infile, ".fastq.gz") + ".fastq.1.gz"

    statement = m.build((infile,), outfile)

    P.run()