# load options from the config file PARAMS = P.get_parameters([ "%s/pipeline.yml" % os.path.splitext(__file__)[0], "../pipeline.yml", "pipeline.yml" ]) # add configuration values from associated pipelines # # 1. pipeline_annotations: any parameters will be added with the # prefix "annotations_". The interface will be updated with # "annotations_dir" to point to the absolute path names. PARAMS.update( P.peek_parameters(PARAMS["annotations_dir"], 'genesets', prefix="annotations_", update_interface=True, restrict_interface=True)) PARAMS["project_src"] = os.path.dirname(__file__) # if necessary, update the PARAMS dictionary in any modules file. # e.g.: # # import CGATPipelines.PipelineGeneset as PipelineGeneset # PipelineGeneset.PARAMS = PARAMS # # Note that this is a hack and deprecated, better pass all # parameters that are needed by a function explicitely. RnaSeq.PARAMS = PARAMS
# load options from the config file PARAMS = P.get_parameters([ "%s/pipeline.yml" % os.path.splitext(__file__)[0], "../pipeline.yml", "pipeline.yml" ]) # add configuration values from associated pipelines # # 1. pipeline_genesets: any parameters will be added with the # prefix "annotations_". The interface will be updated with # "annotations_dir" to point to the absolute path names. PARAMS.update( P.peek_parameters(PARAMS["annotations_dir"], "pipeline_genesets.py", "genesets", on_error_raise=__name__ == "__main__", prefix="annotations_", update_interface=True)) # --------------------------------------------------- # Specific pipeline tasks @transform(("pipeline.yml", ), regex("(.*)\.(.*)"), r"\1.counts") def count_words(infile, outfile): '''count the number of words in the pipeline configuration files.''' # the command line statement we want to execute statement = '''awk 'BEGIN { printf("word\\tfreq\\n"); } {for (i = 1; i <= NF; i++) freq[$i]++} END { for (word in freq) printf "%%s\\t%%d\\n", word, freq[word] }' < %(infile)s > %(outfile)s'''
################################################### ################################################### ################################################### # Pipeline configuration ################################################### from CGATCore import Pipeline as P P.get_parameters([ "%s/pipeline.yml" % os.path.splitext(__file__)[0], "../pipeline.yml", "pipeline.yml" ], defaults={'annotations_dir': ""}) PARAMS = P.PARAMS PARAMS_ANNOTATIONS = P.peek_parameters(PARAMS["annotations_dir"], "genesets") ################################################################### ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc ################################################################### # load all tracks - exclude input/control tracks Sample = PipelineTracks.Sample TRACKS = PipelineTracks.Tracks(Sample).loadFromDirectory( glob.glob("*.bed.gz"), "(\S+).bed.gz") TRACKS_BEDFILES = ["%s.bed.gz" % x for x in TRACKS]