示例#1
0
def exportMotifDiscoverySequences(infile, outfile):
    '''export sequences for motif discovery.

    This method requires the _interval tables.

    For motif discovery, only the sequences with the highest S/N ratio are supplied.

    1. The top *motifs_proportion* intervals sorted by peakval
    2. Only a region +/- *motifs_halfwidth* around the peak 
    3. At least *motifs_min_sequences*. If there are not enough sequences
          to start with, all will be used.
    4. At most *motifs_max_size* sequences will be output.
    '''
    track = P.snip(infile, "_intervals.load")
    dbhandle = connect()

    p = P.substituteParameters(**locals())
    nseq = PipelineMotifs.writeSequencesForIntervals(
        track,
        outfile,
        dbhandle,
        full=False,
        masker=P.asList(p['motifs_masker']),
        halfwidth=int(p["motifs_halfwidth"]),
        maxsize=int(p["motifs_max_size"]),
        proportion=p["motifs_proportion"],
        min_sequences=p["motifs_min_sequences"],
        num_sequences=p["motifs_num_sequences"],
        order=p['motifs_score'])

    if nseq == 0:
        E.warn("%s: no sequences - meme skipped" % outfile)
        P.touch(outfile)
def exportMotifDiscoverySequences( infile, outfile ):
    '''export sequences for motif discovery.

    This method requires the _interval tables.

    For motif discovery, only the sequences with the highest S/N ratio are supplied.
    
    1. The top *motifs_proportion* intervals sorted by peakval
    2. Only a region +/- *motifs_halfwidth* around the peak 
    3. At least *motifs_min_sequences*. If there are not enough sequences
          to start with, all will be used.
    4. At most *motifs_max_size* sequences will be output.
    '''
    track = P.snip( infile, "_intervals.load" )
    dbhandle = connect()
        
    p = P.substituteParameters( **locals() )
    nseq = PipelineMotifs.writeSequencesForIntervals( track, 
                                                      outfile,
                                                      dbhandle,
                                                      full = False,
                                                      masker = P.asList(p['motifs_masker']),
                                                      halfwidth = int(p["motifs_halfwidth"]),
                                                      maxsize = int(p["motifs_max_size"]),
                                                      proportion = p["motifs_proportion"],
                                                      min_sequences = p["motifs_min_sequences"],
                                                      num_sequences = p["motifs_num_sequences"],
                                                      order = p['motifs_score'])

    if nseq == 0:
        E.warn( "%s: no sequences - meme skipped" % outfile)
        P.touch( outfile )
示例#3
0
def mapReadsAgainstSpadesContigs(infiles, outfile):
    '''
    map reads against spades contigs
    '''
    inf = infiles[0]
    to_cluster = True
    index_dir = os.path.dirname(outfile)

    if "agg" not in infiles[1]:
        genome = re.search(
            ".*R[0-9]*", infiles[0]).group(0) + ".filtered.contigs.fa"
    else:
        genome = "agg-agg-agg.filtered.contigs.fa"

    if infiles[1].endswith(".bt2") or infiles[1].endswith(".ebwt"):
        infile, reffile = infiles[0],  os.path.join(index_dir, genome) + ".fa"
        m = PipelineMapping.Bowtie(
            executable=P.substituteParameters(**locals())["bowtie_executable"])

    elif infiles[1].endswith("bwt"):
        genome = genome
        job_options = " -l mem_free=%s" % (PARAMS["bwa_memory"])
        bwa_index_dir = index_dir
        bwa_mem_options = PARAMS["bwa_mem_options"]
        bwa_threads = PARAMS["bwa_threads"]
        m = PipelineMapping.BWAMEM(remove_non_unique=True)
    statement = m.build((inf,), outfile)
    P.run()
def mapReadsWithBowtieAgainstRayContigs(infile, outfile):
    '''
    map reads against contigs with bowtie
    '''
    PARAMS["bowtie_index_dir"] = "ray.dir"
    PARAMS["genome"] = TRACKS.getTracks(infile)[0].split(".")[0]
        
    infile, reffile = infile,  os.path.join("ray.dir", TRACKS.getTracks(infile)[0])
    m = PipelineMapping.Bowtie( executable = P.substituteParameters( **locals() )["bowtie_executable"] )
    statement = m.build( (infile,), outfile ) 
    P.run()
示例#5
0
def mapReadsWithBowtieAgainstRayContigs(infile, outfile):
    '''
    map reads against contigs with bowtie
    '''
    PARAMS["bowtie_index_dir"] = "ray.dir"
    PARAMS["genome"] = TRACKS.getTracks(infile)[0].split(".")[0]

    infile, reffile = infile, os.path.join("ray.dir",
                                           TRACKS.getTracks(infile)[0])
    m = PipelineMapping.Bowtie(executable=P.substituteParameters(
        **locals())["bowtie_executable"])
    statement = m.build((infile, ), outfile)
    P.run()
def mapReadsWithBowtieAgainstExpectedContigs(infiles, outfile):
    '''
    map reads against contigs with bowtie
    '''
    to_cluster = True
    
    bowtie_index_dir = "expected_contigs.dir"
    genome = os.path.basename(re.search(".*R[0-9]*", infiles[0]).group(0) + ".contigs.expected")
    for seq in infiles[1]:
        to_cluster = True
        infile, reffile = seq,  genome + ".fa"
        m = PipelineMapping.Bowtie( executable = P.substituteParameters( **locals() )["bowtie_executable"] )
        statement = m.build( (infile,), outfile ) 
        P.run()