logopath = os.path.join(themedir, "cgat_logo.png") ################################################################ # Import pipeline configuration from pipeline.ini in the current # directory and the common one. # PATH were code for pipelines is stored pipelinesdir = os.path.dirname(CGATPipelines.__file__) # The default configuration file - 'inifile' is read by # sphinx-report. inifile = os.path.join(os.path.dirname(CGATPipelines.__file__), 'configuration', 'pipeline.ini') PARAMS = P.getParameters([inifile, "pipeline.ini"]) # Definition now part of CGATReport # def setup(app): # app.add_config_value('PARAMS', {}, True) ################################################################ ################################################################ ################################################################ # The pipeline assumes that sphinxreport is called within the # working directory. If the report is in a separate build directory, # change the paths below. # # directory with export directory from pipeline # This should be a directory in the build directory - you can # link from here to a directory outside the build tree, though.
import os import CGAT.Experiment as E import CGATPipelines.Pipeline as P import CGAT.Sra as SRA import CGAT.IOTools as IOTools import CGATPipelines.PipelinePreprocess as PipelinePreprocess import CGATPipelines.PipelineMapping as PipelineMapping import PipelineScRNASeq ################################################### # Pipeline configuration ################################################### P.getParameters([ "%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini" ], defaults={'paired_end': False}, only_import=__name__ != "__main__") PARAMS = P.PARAMS ############################################################################### # Section - START - GSE53638 - Soumillon et al 2014 ############################################################################### @follows(mkdir("GSE53638")) @originate([ "GSE53638/SRR1058003.sra", "GSE53638/SRR1058023.sra", "GSE53638/SRR1058032.sra", "GSE53638/SRR1058038.sra" ])
from CGATReport.Tracker import * import CGATPipelines.Pipeline as P ############################################################################# # Get parameterization P.getParameters( ["%s/pipeline.ini" % __file__[:-len(".py")], "../pipeline.ini", "pipeline.ini" ] ) LOCALPARAMS = P.PARAMS P.getParameters( ["%s/pipeline.ini" % __file__[:-len(".py")], "../pipeline.ini", "%s/pipeline.ini" % LOCALPARAMS["iclip_dir"], "pipeline.ini" ]) PARAMS = P.PARAMS class ProjectTracker(TrackerSQL): PARAMS = P.PARAMS def __init__(self, *args, **kwargs ): database_path = os.path.join(PARAMS["iclip_dir"], PARAMS["iclip_database"]) database = database_path TrackerSQL.__init__(self, *args, backend = "sqlite:///" + database , **kwargs )
#load modules from ruffus import * import os import sys import math ################################################### ################################################### ################################################### # Pipeline configuration ################################################### # load options from the config file import CGATPipelines.Pipeline as P P.getParameters( ["%s.ini" % __file__[:-len(".py")], "../pipeline.ini", "pipeline.ini" ] ) PARAMS = P.PARAMS #add Pipeline functions import PipelineMetaAssemblyKit import PipelinePoolReads #get all files within the directory to process SEQUENCEFILES = ("*.fasta", "*.fasta.gz", "*.fasta.1.gz", "*.fasta.1", "*.fna", "*.fna.gz", "*.fna.1.gz", "*.fna.1", "*.fa", "*.fa.gz", "*.fa.1.gz", "*.fa.1", "*.fastq", "*.fastq.gz", "*.fastq.1.gz","*.fastq.1") #using log files to track progression as ruffus cant parse inputs within merge
from ruffus import * import sys import os import sqlite3 import CGAT.Experiment as E import CGATPipelines.Pipeline as P import CGATPipelines.PipelineTracks as PipelineTracks import PipelineProject035 as P35 import glob import itertools # load options from the config file PARAMS = P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"]) # add configuration values from associated pipelines # # 1. pipeline_annotations: any parameters will be added with the # prefix "annotations_". The interface will be updated with # "annotations_dir" to point to the absolute path names. PARAMS.update(P.peekParameters( PARAMS["annotations_dir"], "pipeline_annotations.py", on_error_raise=__name__ == "__main__", prefix="annotations_", update_interface=True))
from rpy2.robjects import r as R import CGAT.Experiment as E import CGATPipelines.Pipeline as P import CGAT.GTF as GTF import CGAT.IOTools as IOTools import CGATPipelines.PipelineLncRNA as PipelineLncRNA ################################################### # Pipeline configuration ################################################### P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"], defaults={"annotations_dir": "", "genesets_abinitio_coding": "pruned.gtf.gz", "genesets_abinitio_lncrna": "pruned.gtf.gz", "genesets_reference": "reference.gtf.gz", "genesets_refcoding": "refcoding.gtf.gz", "genesets_previous": ""}) PARAMS = P.PARAMS PARAMS.update(P.peekParameters( PARAMS["annotations_dir"], "pipeline_annotations.py", prefix="annotations_", update_interface=True)) PREVIOUS = P.asList(PARAMS["genesets_previous"])
import MySQLdb import CGAT.Experiment as E import logging as L from ruffus import * ################################################### ################################################### ################################################### # Pipeline configuration ################################################### import CGATPipelines.Pipeline as P P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"], defaults={ 'query': "", 'target': ""}) PARAMS = P.PARAMS if os.path.exists("pipeline_conf.py"): L.info("reading additional configuration from pipeline_conf.py") exec(compile(open("pipeline_conf.py").read(), "pipeline_conf.py", 'exec')) def getGenomes(): '''return genome names of query and target.''' genome_query = os.path.join(PARAMS["genome_dir"], PARAMS["query"]) genome_target = os.path.join(PARAMS["genome_dir"], PARAMS["target"])
def connect(): '''connect to database. Use this method to connect to additional databases. Returns a database connection. ''' dbh = sqlite3.connect(PARAMS["database_name"]) return dbh ######################################################################### P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"], defaults={ 'paired_end': False}, only_import=__name__ != "__main__") PARAMS = P.PARAMS PipelineMapping.PARAMS = PARAMS PipelineMappingQC.PARAMS = PARAMS PipelineExome.PARAMS = PARAMS ######################################################################### ######################################################################### # Load manual annotations #########################################################################
""" from ruffus import * import sys import glob import os import CGAT.Experiment as E import CGATPipelines.PipelineChipseq as PIntervals import CGATPipelines.PipelineTracks as PipelineTracks import CGATPipelines.PipelineMapping as PipelineMapping import CGATPipelines.Pipeline as P USECLUSTER = True P.getParameters(["%s.ini" % os.path.splitext(__file__)[0], "pipeline.ini"]) PARAMS = P.PARAMS ################################################################### ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc ################################################################### # load all tracks - exclude input/control tracks Sample = PipelineTracks.Sample3 #TRACKS = PipelineTracks.Tracks( PipelineTracks.Sample3 ).loadFromDirectory( [x for x in glob.glob( "*.fastq.gz" ) if PARAMS["tracks_control"] not in x], "(\S+).fastq.gz" ) TRACKS = PipelineTracks.Tracks(Sample).loadFromDirectory( [x.replace("../", "") for x in glob.glob("*.export.txt.gz") if PARAMS["tracks_control"] not in x], "(\S+).export.txt.gz" ) +\
if os.path.exists("conf.py"): execfile("conf.py") TARGET_ANNOTATION = 'ensembl_regions.gff' TARGET_GENESET = 'ensembl.gtf' TARGET_PROMOTORS = 'promotors.gtf' TARGET_TSS = 'tss.gtf' TARGET_REPEATS = 'repeats.gff' TARGET_TRANSCRIPTS = 'transcripts.gtf.gz' TARGET_PROBESET = 'probeset.gtf' TARGET_TRANSCRIPTS_TSS = 'transcripts_tss.gtf' TARGET_TRANSCRIPTS_PROMOTORS = 'transcripts_promotors.gtf' TARGET_ANNOTATOR_GENETERRITORIES = 'annotator_geneterritories.gff' TARGET_MAPPABILITY = 'mappability.bed' PARAMS = P.getParameters() @files(((None, "probeset2gene.table"), )) def buildProbeset2Gene(infile, outfile): '''build map relating a probeset to an ENSEMBL gene_id''' Expression.buildProbeset2Gene(infile, outfile) @follows(buildProbeset2Gene) def prepare(): pass @files([((x, "%s.map" % x), "%s_levels.import" % x[:-len("_series_matrix.txt.gz")]) for x in glob.glob("*_series_matrix.txt.gz")])
import CGAT.Experiment as E import CGAT.IOTools as IOTools import CGATPipelines.PipelineMotifs as PipelineMotifs import CGATPipelines.PipelineTracks as PipelineTracks ################################################### ################################################### ################################################### # Pipeline configuration ################################################### import CGATPipelines.Pipeline as P P.getParameters([ "%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini" ], defaults={'annotations_dir': ""}) PARAMS = P.PARAMS PARAMS_ANNOTATIONS = P.peekParameters(PARAMS["annotations_dir"], "pipeline_genesets.py") ################################################################### ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc ################################################################### # load all tracks - exclude input/control tracks Sample = PipelineTracks.Sample
import sqlite3 import io import fileinput import CGAT.Fastq as fq import logging as L import CGAT.Experiment as E import CGATPipelines.PipelineMapping as PipelineMapping from ruffus import * ################################################################### ################################################################### ################################################################### # Pipeline configuration import CGATPipelines.Pipeline as P P.getParameters(["pipeline.ini"]) PARAMS = P.PARAMS USECLUSTER = True ################################################################### ################################################################### ################################################################### # Count raw reads @transform("*.fastq.1.gz", regex(r"(\S+).fastq.1.gz"), r"\1.nreads") def countReads(infile, outfile): '''count number of reads in input files.''' to_cluster = True m = PipelineMapping.Counter() statement = m.build((infile,), outfile)
import sqlite3 import CGAT.Experiment as E import CGAT.IOTools as IOTools import CGATPipelines.PipelineMapping as PipelineMapping import PipelineiCLIP ################################################### ################################################### ################################################### # Pipeline configuration ################################################### # load options from the config file import CGATPipelines.Pipeline as P P.getParameters([ os.path.join(os.path.dirname(__file__), "configuration", "pipeline.ini"), "pipeline.ini" ]) PARAMS = P.PARAMS PARAMS_ANNOTATIONS = P.peekParameters(PARAMS["annotations_dir"], "pipeline_annotations.py") PipelineiCLIP.PARAMS = PARAMS PipelineiCLIP.PARAMS_ANNOTATIONS = PARAMS_ANNOTATIONS PARAMS["project_src"] = os.path.join(os.path.dirname(__file__), "..") ################################################################### # Helper functions mapping tracks to conditions, etc ################################################################### import CGATPipelines.PipelineTracks as PipelineTracks
""" from ruffus import * import sys import glob import os import CGAT.Experiment as E import PipelineChipseq as PIntervals import CGATPipelines.PipelineTracks as PipelineTracks import CGATPipelines.PipelineMapping as PipelineMapping import CGATPipelines.Pipeline as P USECLUSTER = True P.getParameters(["%s.ini" % os.path.splitext(__file__)[0], "pipeline.ini"]) PARAMS = P.PARAMS ################################################################### ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc ################################################################### # load all tracks - exclude input/control tracks Sample = PipelineTracks.Sample3 #TRACKS = PipelineTracks.Tracks( PipelineTracks.Sample3 ).loadFromDirectory( [x for x in glob.glob( "*.fastq.gz" ) if PARAMS["tracks_control"] not in x], "(\S+).fastq.gz" ) TRACKS = PipelineTracks.Tracks(Sample).loadFromDirectory( [x.replace("../", "") for x in glob.glob("*.export.txt.gz") if PARAMS["tracks_control"] not in x], "(\S+).export.txt.gz" ) +\
if os.path.exists("conf.py"): exec(compile(open("conf.py").read(), "conf.py", 'exec')) TARGET_ANNOTATION = 'ensembl_regions.gff' TARGET_GENESET = 'ensembl.gtf' TARGET_PROMOTORS = 'promotors.gtf' TARGET_TSS = 'tss.gtf' TARGET_REPEATS = 'repeats.gff' TARGET_TRANSCRIPTS = 'transcripts.gtf.gz' TARGET_PROBESET = 'probeset.gtf' TARGET_TRANSCRIPTS_TSS = 'transcripts_tss.gtf' TARGET_TRANSCRIPTS_PROMOTORS = 'transcripts_promotors.gtf' TARGET_ANNOTATOR_GENETERRITORIES = 'annotator_geneterritories.gff' TARGET_MAPPABILITY = 'mappability.bed' PARAMS = P.getParameters() @files(((None, "probeset2gene.table"), )) def buildProbeset2Gene(infile, outfile): '''build map relating a probeset to an ENSEMBL gene_id''' Expression.buildProbeset2Gene(infile, outfile) @follows(buildProbeset2Gene) def prepare(): pass @files([((x, "%s.map" % x), "%s_levels.import" % x[:-len("_series_matrix.txt.gz")])
import sys import glob import os import itertools import sqlite3 import CGAT.Experiment as E import CGATPipelines.Pipeline as P import PipelineGeneset as PGeneset ################################################################### ################################################################### ################################################################### # read global options from configuration file P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"], defaults={'polyphen_modes': ""}) P.PARAMS.update( {"transcripts": "transcripts.gtf.gz", "genes": 'genes.gtf.gz', "annotation": 'geneset_regions.gff.gz', "peptides": 'peptides.fasta', "cdna": 'cdna.fasta', "cds": 'cds.fasta'}) PARAMS = P.PARAMS PGeneset.PARAMS = PARAMS
import CGAT.Experiment as E import CGATPipelines.Pipeline as P import CGAT.IOTools as IOTools import CGAT.Bed as Bed import CGATPipelines.PipelinePeakcalling as PipelinePeakcalling import PipelineDeNovoMotifs as PipelineMotifs import CGATPipelines.PipelineTracks as PipelineTracks ################################################### ################################################### ################################################### # Pipeline configuration ################################################### P.getParameters([ "%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini" ], defaults={'paired_end': False}) PARAMS = P.PARAMS PARAMS.update( P.peekParameters(PARAMS["annotations_dir"], "pipeline_annotations.py", prefix="annotations_", update_interface=True)) PipelinePeakcalling.PARAMS = PARAMS PipelineMotifs.PARAMS = PARAMS ###################################################################
from CGATReport.Tracker import * import CGATPipelines.Pipeline as P ############################################################################# # Get parameterization P.getParameters([ "%s/pipeline.ini" % __file__[:-len(".py")], "../pipeline.ini", "pipeline.ini" ]) LOCALPARAMS = P.PARAMS P.getParameters([ "%s/pipeline.ini" % __file__[:-len(".py")], "../pipeline.ini", "%s/pipeline.ini" % LOCALPARAMS["iclip_dir"], "pipeline.ini" ]) PARAMS = P.PARAMS class ProjectTracker(TrackerSQL): PARAMS = P.PARAMS def __init__(self, *args, **kwargs): database_path = os.path.join(PARAMS["iclip_dir"], PARAMS["iclip_database"]) database = database_path
import CGAT.IOTools as IOTools import CGAT.Database as Database import CGAT.FastaIterator as FastaIterator import numpy as np from PipelinePrimerDesign import PrimerSet ################################################### ################################################### ################################################### ## Pipeline configuration ################################################### # load options from the config file import CGATPipelines.Pipeline as P P.getParameters(["pipeline.ini"]) PARAMS = P.PARAMS ################################################### ################################################### ################################################### def readIdentifiers(identifiers): ''' return list of identifiers from file ''' ids = [x.strip("\n") for x in IOTools.openFile(identifiers).readlines()] return ids
import CGAT.IndexedFasta as IndexedFasta import CGAT.IndexedGenome as IndexedGenome import CGAT.FastaIterator as FastaIterator import CGAT.Genomics as Genomics import CGAT.GTF as GTF import CGAT.Bed as Bed import pysam import numpy import CGAT.Experiment as E import CGATPipelines.Pipeline as P ############################################################ ############################################################ ############################################################ # Pipeline configuration P.getParameters(["%s.ini" % __file__[:-len(".py")], "pipeline.ini"]) PARAMS = P.PARAMS ############################################################ ############################################################ ############################################################ def exportIntervalsAsBed(database, query, outfile): '''export intervals from SQlite database as bed files. ''' dbhandle = sqlite3.connect(database) cc = dbhandle.cursor() cc.execute(query) outs = IOTools.openFile(outfile, "w")
import MySQLdb import CGAT.Experiment as E import logging as L from ruffus import * ################################################### ################################################### ################################################### # Pipeline configuration ################################################### import CGATPipelines.Pipeline as P P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"], defaults={ 'query': "", 'target': ""}) PARAMS = P.PARAMS if os.path.exists("pipeline_conf.py"): L.info("reading additional configuration from pipeline_conf.py") execfile("pipeline_conf.py") def getGenomes(): '''return genome names of query and target.''' genome_query = os.path.join(PARAMS["genome_dir"], PARAMS["query"]) genome_target = os.path.join(PARAMS["genome_dir"], PARAMS["target"])
from rpy2.robjects import r as R import CGAT.Experiment as E import CGATPipelines.Pipeline as P import CGAT.GTF as GTF import CGAT.IOTools as IOTools import CGATPipelines.PipelineLncRNA as PipelineLncRNA ################################################### # Pipeline configuration ################################################### P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"], defaults={"annotations_dir": "", "genesets_abinitio_coding": "pruned.gtf.gz", "genesets_abinitio_lncrna": "pruned.gtf.gz", "genesets_reference": "reference.gtf.gz", "genesets_refcoding": "refcoding.gtf.gz", "genesets_previous": ""}) PARAMS = P.PARAMS PARAMS.update(P.peekParameters( PARAMS["annotations_annotations_dir"], "pipeline_annotations.py", prefix="annotations_", update_interface=True)) PREVIOUS = P.asList(PARAMS["genesets_previous"])
""" from ruffus import * import sys import os import glob import sqlite3 import CGAT.Experiment as E import CGATPipelines.Pipeline as P import CGATPipelines.PipelineTracks as PipelineTracks import pysam # load options from the config file PARAMS = P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"]) # add configuration values from associated pipelines # # 1. pipeline_annotations: any parameters will be added with the # prefix "annotations_". The interface will be updated with # "annotations_dir" to point to the absolute path names. PARAMS.update(P.peekParameters( PARAMS["annotations_dir"], "pipeline_annotations.py", on_error_raise=__name__ == "__main__", prefix="annotations_", update_interface=True)) # define some tracks if needed
import CGAT.GTF as GTF import CGAT.IOTools as IOTools ################################################### ################################################### ################################################### # Pipeline configuration ################################################### # load options from the config file import CGATPipelines.Pipeline as P from functools import reduce P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"], defaults={ 'annotations_dir': "", 'paired_end': False}) PARAMS = P.PARAMS PARAMS_ANNOTATIONS = P.peekParameters( PARAMS["annotations_dir"], "pipeline_annotations.py") # get options that are to be tested cufflinks_options = {} if "cufflinks_test_options" in PARAMS: options = P.asList(PARAMS["cufflinks_test_options"]) for option in options:
'pipeline_docs', 'themes') logopath = os.path.join(themedir, "cgat_logo.png") ################################################################ # Import pipeline configuration from pipeline.ini in the current # directory and the common one. # PATH were code for pipelines is stored pipelinesdir = os.path.dirname(CGATPipelines.__file__) # The default configuration file - 'inifile' is read by # sphinx-report. inifile = os.path.join(os.path.dirname(CGATPipelines.__file__), 'configuration', 'pipeline.ini') PARAMS = P.getParameters([inifile, "pipeline.ini"]) def setup(app): app.add_config_value('PARAMS', {}, True) ################################################################ ################################################################ ################################################################ # The pipeline assumes that sphinxreport is called within the # working directory. If the report is in a separate build directory, # change the paths below. # # directory with export directory from pipeline # This should be a directory in the build directory - you can
import CGAT.Experiment as E import CGATPipelines.Pipeline as P import CGAT.IOTools as IOTools import CGAT.IndexedFasta as IndexedFasta ################################################### ################################################### ################################################### # Pipeline configuration ################################################### # load options from the config file P.getParameters([ "%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini" ], defaults={"maps": ""}) PARAMS = P.PARAMS ################################################################### ################################################################### ################################################################### ## ################################################################### if os.path.exists("pipeline_conf.py"): L.info("reading additional configuration from pipeline_conf.py") exec(compile(open("pipeline_conf.py").read(), "pipeline_conf.py", 'exec')) PARAMS = P.getParameters()
import CGAT.Experiment as E import CGATPipelines.Pipeline as P import CGAT.IOTools as IOTools import CGAT.IndexedFasta as IndexedFasta ################################################### ################################################### ################################################### # Pipeline configuration ################################################### # load options from the config file P.getParameters( ["%s/pipeline.ini" % os.path.splitext(__file__)[0], "../pipeline.ini", "pipeline.ini"], defaults={"maps": ""}) PARAMS = P.PARAMS ################################################################### ################################################################### ################################################################### ## ################################################################### if os.path.exists("pipeline_conf.py"): L.info("reading additional configuration from pipeline_conf.py") exec(compile(open("pipeline_conf.py").read(), "pipeline_conf.py", 'exec')) PARAMS = P.getParameters()
INI_file = os.path.abspath(path) print( 'You have no project configuration (".ini") file or more than one', 'in the directory:', '\n', path) sys.exit(''' No ini file found. Exiting. You will have to manually edit the Sphinx conf.py file. ''') return (INI_file) modulename = 'P' if modulename in sys.modules: ini_file = 'pipelin{}.ini'.format(r'(.*)') P.getParameters([ "{}/{}".format(os.path.splitext(__file__)[0], ini_file), "../{}".format(ini_file), "{}".format(ini_file), ], ) PARAMS = P.PARAMS else: # Get location to this file: here = os.path.abspath(os.path.dirname(__file__)) print('This directory is:', '\n', here, '\n') #ini_file = getINIdir(os.path.join(here, '..')) ini_file = getINIdir(os.path.abspath(here)) # Print keys (sections): print('Values found in INI file:', '\n')