Пример #1
0
],
                 defaults={'annotations_dir': ""})

PARAMS = P.PARAMS

PARAMS_ANNOTATIONS = P.peek_parameters(PARAMS["annotations_dir"], "genesets")

###################################################################
###################################################################
###################################################################
# Helper functions mapping tracks to conditions, etc
###################################################################
# load all tracks - exclude input/control tracks
Sample = tracks.Sample

TRACKS = tracks.Tracks(Sample).loadFromDirectory(glob.glob("*.bed.gz"),
                                                 "(\S+).bed.gz")

TRACKS_BEDFILES = ["%s.bed.gz" % x for x in TRACKS]


def getAssociatedBAMFiles(track):
    '''return a list of BAM files associated with a track.

    By default, this method searches for ``track.bam`` file in the
    current directory and returns an offset of 0.

    Associations can be defined in the .yml file in the section
    [bams]. For example, the following snippet associates track
    track1 with the bamfiles :file:`track1.bam` and :file:`track2.bam`::

       [bams]
Пример #2
0
###################################################################
###################################################################
###################################################################
# Helper functions mapping tracks to conditions, etc
###################################################################
# load all tracks - exclude input/control tracks
# determine the location of the input files (reads).
DATADIR = PARAMS.get('input', '.')
if not os.path.exists(DATADIR):
    raise OSError('data directory %s does not exists')

Sample = PipelineTracks.Sample

TRACKS = PipelineTracks.Tracks(Sample).loadFromDirectory(
    glob.glob(os.path.join(DATADIR, "*.bed.gz")),
    "(\S+).bed.gz")

BEDFILES = [os.path.join(
    DATADIR, "%s.bed.gz") % x for x in TRACKS]


# create an indicator target
@transform(BEDFILES, suffix(".gz"), ".gz")
def BedFiles(infile, outfile):
    pass


BAMFILES = glob.glob(os.path.join(DATADIR, "*.bam"))

Пример #3
0
    dbh = sqlite3.connect(PARAMS["database_name"])
    statement = '''ATTACH DATABASE '%s' as annotations''' % (
        PARAMS["annotations_database"])
    cc = dbh.cursor()
    cc.execute(statement)
    cc.close()

    return dbh


class MySample(tracks.Sample):
    attributes = tuple(PARAMS["attributes"].split(","))


TRACKS = tracks.Tracks(MySample).loadFromDirectory(glob.glob("*.bam"),
                                                   "(\S+).bam")

Sample = tracks.AutoSample
DESIGNS = tracks.Tracks(Sample).loadFromDirectory(glob.glob("*.design.tsv"),
                                                  "(\S+).design.tsv")

###################################################################
###################################################################
###################################################################
# DEXSeq workflow
###################################################################


@mkdir("results.dir")
@files(PARAMS["annotations_interface_geneset_all_gtf"], "geneset_flat.gff")
def buildGff(infile, outfile):
Пример #4
0
try:
    PARAMS["input"]
except KeyError:
    DATADIR = "."
else:
    if PARAMS["input"] == 0:
        DATADIR = "."
    elif PARAMS["input"] == 1:
        DATADIR = "data.dir"
    else:
        DATADIR = PARAMS["input"]  # not recommended practise.

Sample = tracks.AutoSample

# collect sra and fastq.gz tracks
BAM_TRACKS = tracks.Tracks(Sample).loadFromDirectory(glob.glob("*.bam"),
                                                     "(\S+).bam")

DESIGNS = tracks.Tracks(Sample).loadFromDirectory(glob.glob("design*.tsv"),
                                                  "design(\S+).tsv")

# do not use - legacy methods
# here only to stop ruffus erroring. Remove once pipleine scrum is
# complete and old code has been removed
GENESETS = tracks.Tracks(Sample).loadFromDirectory(glob.glob("*.gtf.gz"),
                                                   "(\S+).gtf.gz")
TRACKS = tracks.Tracks(Sample).loadFromDirectory(glob.glob("*.bam"),
                                                 "(\S+).bam")
# group by experiment (assume that last field is a replicate identifier)
EXPERIMENTS = tracks.Aggregate(BAM_TRACKS, labels=("condition", "tissue"))

###############################################################################