예제 #1
0
    W.add_datafiles(data_files='lane6*bgzf' , barcode_files='*[6].txt')
    W.add_datafiles(data_files='lane8*bgzf' , barcode_files='*[8].txt')

# Set parameters
p = {'filtering' : {'propN': 0.1,
                    'phred': 25,
                    'cutsite_edit_dist' : 2,
                    'overhang_edit_dist' : 0},
     'cleaning' : {'max_edit_dist' : 1 }}

if testing:
    W.setup_preprocessing(infiles_pattern="testset_500.fastq.bgzf" , params=p)
else:
    W.setup_preprocessing(infiles_pattern="lane*.bgzf" , params=p)

W.run_preprocessing()

# Clustering 
#===============================================================================
if testing:
    W.add_experiment_name('gz-subgroups-test', 'Test for splitting files by subgroups')
else:
    W.add_experiment_name(experiment_name, description)

default_params = { 'c_thresh' : 0.90,
                   'n_filter' : 8,
                    'maskN' : False}

subgroups = { 'zebra'  : '.*zebra.*',
            'gazelle' : '.*gazelle.*'}