예제 #1
0
-----------------------------------------------------------------------------'''

experiment_name = ''
description = ''

dataset_name = ''

# Load previously processed data info
#===============================================================================
W = Workflow()
W.load(name=dataset_name)

# Clustering 
#===============================================================================

W.add_experiment_name(experiment_name, description)

default_params = { 'c_thresh' : 0.90,
                   'n_filter' : 8,
                    'maskN' : False}

#===============================================================================
# Choose method to split files 
#===============================================================================

subgroups = { 'zebra'  : '.*zebra.*',
            'gazelle' : '.*gazelle.*'}

splitby = None

if splitby == 'subgroups':
예제 #2
0
                    'phred': 25,
                    'cutsite_edit_dist' : 2,
                    'overhang_edit_dist' : 0},
     'cleaning' : {'max_edit_dist' : 1 }}

if testing:
    W.setup_preprocessing(infiles_pattern="testset_500.fastq.bgzf" , params=p)
else:
    W.setup_preprocessing(infiles_pattern="lane*.bgzf" , params=p)

W.run_preprocessing()

# Clustering 
#===============================================================================
if testing:
    W.add_experiment_name('gz-subgroups-test', 'Test for splitting files by subgroups')
else:
    W.add_experiment_name(experiment_name, description)

default_params = { 'c_thresh' : 0.90,
                   'n_filter' : 8,
                    'maskN' : False}

subgroups = { 'zebra'  : '.*zebra.*',
            'gazelle' : '.*gazelle.*'}

if testing:
    W.setup_clustering(mode='split_by_subgroups', infiles_pattern='test*-clean.fastq.bgzf',
                     default_params=default_params, subgroups=subgroups) 
#     W.setup_clustering(mode='split_by_tags', infiles_pattern='test*-clean.fastq.bgzf',
#                      default_params=default_params) 
예제 #3
0
p = {
    "filtering": {"propN": 0.1, "phred": 25, "cutsite_edit_dist": 2, "overhang_edit_dist": 0},
    "cleaning": {"max_edit_dist": 1},
}

if testing:
    W.setup_preprocessing(infiles_pattern="testset_500.fastq.bgzf", params=p)
else:
    W.setup_preprocessing(infiles_pattern="*.bgzf", params=p)

W.run_preprocessing()

# Clustering
# ===============================================================================
if testing:
    W.add_experiment_name("subgroups-test", "Test for splitting files by subgroups")
else:
    W.add_experiment_name(experiment_name, description)

default_params = {"c_thresh": 0.90, "n_filter": 8, "maskN": False}

# Choose method to split files
# ===============================================================================

splitby = None

subgroups = {"zebra": ".*zebra.*", "gazelle": ".*gazelle.*"}

if splitby == "subgroups":
    W.setup_clustering(
        mode="split_by_subgroups",