Exemplo n.º 1
0
                    "*.csfasta.F3.gz",
                    )

SEQUENCEFILES = tuple([os.path.join(DATADIR, suffix_name)
                       for suffix_name in SEQUENCESUFFIXES])

SEQUENCEFILES_REGEX = regex(
    r"(\S+)-(\S+)-(\S+).(?P<suffix>fastq.1.gz|fastq.gz|sra)")

Sample = PipelineTracks.AutoSample
Sample.attributes = ('tissue', 'condition', 'replicate')
TRACKS = PipelineTracks.Tracks(Sample).loadFromDirectory(
    [y for x in SEQUENCESUFFIXES for y in glob.glob(x)],
    "(\S+).(fastq.1.gz|fastq.gz|sra)")

EXPERIMENTS = PipelineTracks.Aggregate(TRACKS, labels=("tissue", "condition"))
CONDITIONS = PipelineTracks.Aggregate(TRACKS, labels=("condition", ))
REPLICATES = PipelineTracks.Aggregate(TRACKS, labels=("replicate", ))

#########################################################################
# summarise read 3'
#########################################################################


@follows(mkdir("sequence_characteristics.dir"))
@transform(SEQUENCEFILES,
           SEQUENCEFILES_REGEX,
           r"sequence_characteristics.dir/\1-\2-\3.\g<suffix>_start.tsv")
def summariseReadStart(infile, outfile):
    # this only works for fastq files. Fails with .sra files
    # this function and the next section should be replaced with a call to
Exemplo n.º 2
0
                                      "pipeline_annotations.py")

###################################################################
###################################################################
# Helper functions mapping tracks to conditions, etc
###################################################################
import CGATPipelines.PipelineTracks as PipelineTracks

Sample = PipelineTracks.AutoSample

# collect sra nd fastq.gz tracks
TRACKS = PipelineTracks.Tracks(Sample).loadFromDirectory(
    glob.glob("*.bam"), "(\S+).bam")

# group by experiment (assume that last field is a replicate identifier)
EXPERIMENTS = PipelineTracks.Aggregate(TRACKS, labels=("condition", "tissue"))

GENESETS = PipelineTracks.Tracks(Sample).loadFromDirectory(
    glob.glob("*.gtf.gz"), "(\S+).gtf.gz")

###################################################################
###################################################################
###################################################################


def connect():
    '''connect to database.

    This method also attaches to helper databases.
    '''