# load options from the config file
PARAMS = P.get_parameters([
    "%s/pipeline.yml" % os.path.splitext(__file__)[0], "../pipeline.yml",
    "pipeline.yml"
])

# add configuration values from associated pipelines
#
# 1. pipeline_annotations: any parameters will be added with the
#    prefix "annotations_". The interface will be updated with
#    "annotations_dir" to point to the absolute path names.
PARAMS.update(
    P.peek_parameters(PARAMS["annotations_dir"],
                      'genesets',
                      prefix="annotations_",
                      update_interface=True,
                      restrict_interface=True))

PARAMS["project_src"] = os.path.dirname(__file__)

# if necessary, update the PARAMS dictionary in any modules file.
# e.g.:
#
# import CGATPipelines.PipelineGeneset as PipelineGeneset
# PipelineGeneset.PARAMS = PARAMS
#
# Note that this is a hack and deprecated, better pass all
# parameters that are needed by a function explicitely.
RnaSeq.PARAMS = PARAMS
# load options from the config file
PARAMS = P.get_parameters([
    "%s/pipeline.yml" % os.path.splitext(__file__)[0], "../pipeline.yml",
    "pipeline.yml"
])

# add configuration values from associated pipelines
#
# 1. pipeline_genesets: any parameters will be added with the
#    prefix "annotations_". The interface will be updated with
#    "annotations_dir" to point to the absolute path names.
PARAMS.update(
    P.peek_parameters(PARAMS["annotations_dir"],
                      "pipeline_genesets.py",
                      "genesets",
                      on_error_raise=__name__ == "__main__",
                      prefix="annotations_",
                      update_interface=True))


# ---------------------------------------------------
# Specific pipeline tasks
@transform(("pipeline.yml", ), regex("(.*)\.(.*)"), r"\1.counts")
def count_words(infile, outfile):
    '''count the number of words in the pipeline configuration files.'''

    # the command line statement we want to execute
    statement = '''awk 'BEGIN { printf("word\\tfreq\\n"); } 
    {for (i = 1; i <= NF; i++) freq[$i]++}
    END { for (word in freq) printf "%%s\\t%%d\\n", word, freq[word] }'
    < %(infile)s > %(outfile)s'''
示例#3
0
###################################################
###################################################
###################################################
# Pipeline configuration
###################################################
from CGATCore import Pipeline as P
P.get_parameters([
    "%s/pipeline.yml" % os.path.splitext(__file__)[0], "../pipeline.yml",
    "pipeline.yml"
],
                 defaults={'annotations_dir': ""})

PARAMS = P.PARAMS

PARAMS_ANNOTATIONS = P.peek_parameters(PARAMS["annotations_dir"], "genesets")

###################################################################
###################################################################
###################################################################
# Helper functions mapping tracks to conditions, etc
###################################################################
# load all tracks - exclude input/control tracks
Sample = PipelineTracks.Sample

TRACKS = PipelineTracks.Tracks(Sample).loadFromDirectory(
    glob.glob("*.bed.gz"), "(\S+).bed.gz")

TRACKS_BEDFILES = ["%s.bed.gz" % x for x in TRACKS]