Authors: Bernie Pope, Gayle Philip, Clare Sloggett

Description:

Simple pipeline to demonstrate how to use the base tools.
Counts the number of lines in a set of files and then sums
them up.

'''

import sys
from ruffus import *
from utils import (runStageCheck, getOptions, initLog)
from cmdline_args import get_cmdline_args

args = get_cmdline_args()
options = getOptions(args)
logDir = options.pipeline['logDir']
logger = initLog(options)

# the input files
data_files = ['test_data/data1.txt', 'test_data/data2.txt']

# count the number of lines in a file
@transform(data_files, suffix('.txt'), ['.count', '.count.Success'])
def countLines(file, outputs):
    output,flagFile = outputs
    runStageCheck('countLines', flagFile, logger, options, file, output)

# sum the counts from the previous stage
@merge(countLines,  ['test_data/total.txt', 'test_data/total.Success'])
Пример #2
0
Authors: Bernie Pope, Gayle Philip, Clare Sloggett

Description:

Simple pipeline to demonstrate how to use the base tools.
Counts the number of lines in a set of files and then sums
them up.

'''

import sys
from ruffus import *
from utils import (runStageCheck, getOptions, initLog)
from cmdline_args import get_cmdline_args

args = get_cmdline_args()
options = getOptions(args)
logDir = options.pipeline['logDir']
logger = initLog(options)

# the input files
data_files = ['test_data/data1.txt', 'test_data/data2.txt']


# count the number of lines in a file
@transform(data_files, suffix('.txt'), ['.count', '.count.Success'])
def countLines(file, outputs):
    output, flagFile = outputs
    runStageCheck('countLines', flagFile, logger, options, file, output)

Пример #3
0
def main():

    args = get_cmdline_args()

    # We want to look for modules in the directory local to the pipeline,
    # just as if the pipeline script had been called directly.
    # This includes the script itself and the config files imported by getOptions
    sys.path.insert(0, os.path.dirname(args.pipeline))

    # options must be set before pipeline is imported
    options = getOptions(args)
    setOptions(options)

    # import the pipeline so its stages are defined
    # the name of the pipeline is given on the command line
    __import__(drop_py_suffix(args.pipeline))

    logDir = options.pipeline['logDir']
    startLogger()
    pipelineOptions = options.pipeline
    endTasks = pipelineOptions['end']
    forcedTasks = pipelineOptions['force']
    style = pipelineOptions['style']
    if pipelineOptions['rebuild'] == 'fromstart':
        rebuildMode = True
    elif pipelineOptions['rebuild'] == 'fromend':
        rebuildMode = False
    else:
        rebuildMode = True
    if style in ['run', 'touchfiles']:
        touchfiles_flag = (style == 'touchfiles')
        # Perform the pipeline steps (run the pipeline).
        pipeline_run(
            # End points of the pipeline.
            endTasks,
            # How many ruffus tasks to run.
            multiprocess=pipelineOptions['procs'],
            logger=black_hole_logger,
            # Force the pipeline to start from here, regardless of whether the
            # stage is up-to-date or not.
            forcedtorun_tasks=forcedTasks,
            # If the style was touchfiles, we will set a flag to bring
            # files up to date without running anything
            touch_files_only=touchfiles_flag,
            # Choose the mode in which ruffus decides how much work needs to be
            # done.
            gnu_make_maximal_rebuild_mode=rebuildMode)
    elif style == 'flowchart':
        # Draw the pipeline as a diagram.
        pipeline_printout_graph('flowchart.svg',
                                'svg',
                                endTasks,
                                no_key_legend=False)
    elif style == 'print':
        # Print a textual description of what the piplines would do,
        #but don't actuall run it.
        pipeline_printout(sys.stdout,
                          endTasks,
                          verbose=5,
                          wrap_width=100000,
                          forcedtorun_tasks=forcedTasks,
                          gnu_make_maximal_rebuild_mode=rebuildMode)
Пример #4
0
def main():

    args = get_cmdline_args()

    # We want to look for modules in the directory local to the pipeline,
    # just as if the pipeline script had been called directly.
    # This includes the script itself and the config files imported by getOptions
    sys.path.insert(0, os.path.dirname(args.pipeline))

    # options must be set before pipeline is imported
    options = getOptions(args)
    setOptions(options)

    # import the pipeline so its stages are defined
    # the name of the pipeline is given on the command line
    __import__(drop_py_suffix(args.pipeline))

    logDir = options.pipeline['logDir']
    startLogger()
    pipelineOptions = options.pipeline
    endTasks = pipelineOptions['end']
    forcedTasks = pipelineOptions['force']
    style = pipelineOptions['style']
    if pipelineOptions['rebuild'] == 'fromstart':
        rebuildMode = True
    elif pipelineOptions['rebuild'] == 'fromend':
        rebuildMode = False
    else:
        rebuildMode = True
    if style in ['run', 'touchfiles']:
        touchfiles_flag = (style=='touchfiles')
        # Perform the pipeline steps (run the pipeline).
        pipeline_run(
            # End points of the pipeline.
            endTasks,
            # How many ruffus tasks to run.
            multiprocess=pipelineOptions['procs'],
            logger=black_hole_logger,
            # Force the pipeline to start from here, regardless of whether the
            # stage is up-to-date or not.
            forcedtorun_tasks=forcedTasks,
            # If the style was touchfiles, we will set a flag to bring 
            # files up to date without running anything
            touch_files_only=touchfiles_flag,
            # Choose the mode in which ruffus decides how much work needs to be
            # done.
            gnu_make_maximal_rebuild_mode=rebuildMode)
    elif style == 'flowchart':
        # Draw the pipeline as a diagram.
        pipeline_printout_graph(
            'flowchart.svg',
            'svg',
            endTasks,
            no_key_legend=False)
    elif style == 'print':
        # Print a textual description of what the piplines would do,
        #but don't actuall run it.
        pipeline_printout(
            sys.stdout,
            endTasks,
            verbose=5,
            wrap_width=100000,
            forcedtorun_tasks=forcedTasks,
            gnu_make_maximal_rebuild_mode=rebuildMode)