################################################### ################################################### ################################################### # Pipeline configuration ################################################### # load options from the config file import CGAT.Pipeline as P P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"]) PARAMS = P.PARAMS PARAMS_ANNOTATIONS = P.peekParameters(PARAMS["annotations_dir"], "pipeline_annotations.py", on_error_raise=__name__ == "__main__") ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc ################################################################### import CGATPipelines.PipelineTracks as PipelineTracks Sample = PipelineTracks.AutoSample # collect sra nd fastq.gz tracks TRACKS = PipelineTracks.Tracks(Sample).loadFromDirectory( glob.glob("*.bam"), "(\S+).bam") # group by experiment (assume that last field is a replicate identifier) EXPERIMENTS = PipelineTracks.Aggregate(TRACKS, labels=("condition", "tissue"))
################################################### ################################################### ################################################### ## Pipeline configuration ################################################### # load options from the config file import CGAT.Pipeline as P P.getParameters( ["%s.ini" % __file__[:-len(".py")], "../pipeline.ini", "pipeline.ini" ] ) PARAMS = P.PARAMS PARAMS_ANNOTATIONS = P.peekParameters( PARAMS["annotations_dir"], "pipeline_annotations.py" ) ################################################################### ################################################################### ## Helper functions mapping tracks to conditions, etc ################################################################### import CGATPipelines.PipelineTracks as PipelineTracks # define some tracks if needed TRACKS = PipelineTracks.Tracks( PipelineTracks.Sample ).loadFromDirectory( glob.glob("*.ini" ), "(\S+).ini" ) ################################################################### ################################################################### ###################################################################
import CGAT.GTF as GTF # load options from the config file import CGAT.Pipeline as P P.getParameters([ "%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini" ]) PARAMS = P.PARAMS USECLUSTER = True # link up with annotations PARAMS_ANNOTATIONS = P.peekParameters(PARAMS["annotations_dir"], "pipeline_annotations.py", on_error_raise=__name__ == "__main__") # link up with ancestral repeats PARAMS_ANCESTRAL_REPEATS = P.peekParameters(PARAMS["ancestral_repeats_dir"], "pipeline_ancestral_repeats.py") ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc ################################################################### import CGATPipelines.PipelineTracks as PipelineTracks # collect sra nd fastq.gz tracks TRACKS = PipelineTracks.Tracks(PipelineTracks.Sample).loadFromDirectory( glob.glob("*.gtf.gz"),
import logging as L from ruffus import * import CGATPipelines.PipelineMapping as PipelineMapping USECLUSTER = True ################################################### ################################################### ################################################### ## Pipeline configuration ################################################### import CGAT.Pipeline as P P.getParameters( ["%s.ini" % __file__[:-len(".py")], "../pipeline.ini", "pipeline.ini"]) PARAMS = P.PARAMS PARAMS_ANNOTATIONS = P.peekParameters(PARAMS["annotations_dir"], "pipeline_annotations.py") ################################################################### ################################################################### ################################################################### ## TRIM READS @follows(mkdir("trim")) @transform("*.gz", regex(r"(\S+).gz"), r"trim/\1.gz") def trimReads(infile, outfile): '''trim reads with FastX''' to_cluster = True tmpdir_fastq = P.getTempDir() track = P.snip(os.path.basename(infile), ".gz") statement = """gunzip < %(infile)s | python %%(scriptsdir)s/fastq2fastq.py
import PipelineGeneset as PGeneset import PipelineAnnotator as PAnnotator # load options from the config file import CGAT.Pipeline as P P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini" ] ) PARAMS = P.PARAMS USECLUSTER = True ## link up with annotations PARAMS_ANNOTATIONS = P.peekParameters( PARAMS["annotations_dir"], "pipeline_annotations.py" ) ## link up with ancestral repeats PARAMS_ANCESTRAL_REPEATS = P.peekParameters( PARAMS["ancestral_repeats_dir"], "pipeline_ancestral_repeats.py" ) ################################################################### ################################################################### ## Helper functions mapping tracks to conditions, etc ################################################################### import CGATPipelines.PipelineTracks as PipelineTracks # collect sra nd fastq.gz tracks TRACKS = PipelineTracks.Tracks( PipelineTracks.Sample ).loadFromDirectory( glob.glob( "*.gtf.gz" ), "(\S+).gtf.gz", exclude=("repeats.gtf.gz", "introns.gtf.gz", "merged.gtf.gz") )
################################################### ################################################### ################################################### # Pipeline configuration ################################################### import CGAT.Pipeline as P P.getParameters([ "%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini" ], defaults={'annotations_dir': ""}) PARAMS = P.PARAMS PARAMS_ANNOTATIONS = P.peekParameters(PARAMS["annotations_dir"], "pipeline_annotations.py", on_error_raise=__name__ == "__main__") ################################################################### ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc ################################################################### # load all tracks - exclude input/control tracks Sample = PipelineTracks.Sample TRACKS = PipelineTracks.Tracks(Sample).loadFromDirectory( glob.glob("*.bed.gz"), "(\S+).bed.gz") TRACKS_BEDFILES = ["%s.bed.gz" % x for x in TRACKS]
################################################################### ################################################################### ## parameterization EXPORTDIR=P['chipseq_exportdir'] DATADIR=P['chipseq_datadir'] DATABASE=P['chipseq_backend'] ################################################################### # cf. pipeline_chipseq.py # This should be automatically gleaned from pipeline_chipseq.py ################################################################### import CGAT.Pipeline as Pipeline PARAMS_PIPELINE = Pipeline.peekParameters( ".", "pipeline_chipseq.py" ) import CGATPipelines.PipelineTracks as PipelineTracks Sample = PipelineTracks.Sample3 suffixes = ["export.txt.gz", "sra", "fastq.gz", "fastq.1.gz", "csfasta.gz" ] TRACKS = sum( itertools.chain( [ PipelineTracks.Tracks( Sample ).loadFromDirectory( [ x for x in glob.glob( "%s/*.%s" % (DATADIR, s) ) if "input" not in x ], "%s/(\S+).%s" % (DATADIR, s) ) for s in suffixes ] ), PipelineTracks.Tracks( Sample ) )
from AnnotationReport import * import CGAT.Pipeline as P import CGATCore.IOTools as IOTools PARAMS = P.peekParameters(".", "pipeline_annotations.py", on_error_raise=__name__ == "__main__", prefix="annotations_", update_interface=True) class AnnotationStatus(Status): '''status information for annotations. ''' tracks = [ x for x, y in list(PARAMS.items()) if str(y).endswith((".bed.gz", ".gtf.gz", ".gff.gz", ".tsv.gz", ".tsv")) ] slices = ('AnnotationIsPresent', ) def testAnnotationIsPresent(self, track): ''' PASS: File exists and is not empty FAIL: File exists and is empty (no data except comments)
from GeneSetsReport import * import CGAT.Pipeline as P import CGAT.IOTools as IOTools PARAMS = P.peekParameters( ".", "pipeline_annotations.py", on_error_raise=__name__ == "__main__", prefix="annotations_", update_interface=True) class HypergeometricStatus(Status): '''status information for annotations. ''' pattern = 'hypergeometric_(\S+)_summary$' slices = ('SignificantResults',) def testSignificantResults(self, track): ''' PASS: Genes have been found in foreground and significant results exist. WARN: Genes have been found in foreground, but no significant results exist. FAIL: No genes in foreground sets. The value indicates the number of significant results.
import PipelineGeneset as PGeneset import PipelineAnnotator as PAnnotator # load options from the config file import CGAT.Pipeline as P P.getParameters([ "%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini" ]) PARAMS = P.PARAMS USECLUSTER = True # link up with annotations PARAMS_ANNOTATIONS = P.peekParameters(PARAMS["annotations_dir"], "pipeline_annotations.py") # link up with ancestral repeats PARAMS_ANCESTRAL_REPEATS = P.peekParameters(PARAMS["ancestral_repeats_dir"], "pipeline_ancestral_repeats.py") ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc ################################################################### import CGATPipelines.PipelineTracks as PipelineTracks # collect sra nd fastq.gz tracks TRACKS = PipelineTracks.Tracks(PipelineTracks.Sample).loadFromDirectory( glob.glob("*.gtf.gz"), "(\S+).gtf.gz",
import PipelineGeneset as PGeneset # load options from the config file import CGAT.Pipeline as P P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"]) PARAMS = P.PARAMS USECLUSTER = True # link up with annotations PARAMS_ANNOTATIONS = P.peekParameters(PARAMS["annotations_dir"], "pipeline_annotations.py", on_error_raise=__name__ == "__main__") # link up with ancestral repeats PARAMS_ANCESTRAL_REPEATS = P.peekParameters(PARAMS["ancestral_repeats_dir"], "pipeline_ancestral_repeats.py") ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc ################################################################### import CGATPipelines.PipelineTracks as PipelineTracks # collect sra nd fastq.gz tracks TRACKS = PipelineTracks.Tracks(PipelineTracks.Sample).loadFromDirectory( glob.glob("*.gtf.gz"), "(\S+).gtf.gz", exclude=("repeats.gtf.gz", "introns.gtf.gz", "merged.gtf.gz"))