W.add_datafiles(data_files='lane6*bgzf' , barcode_files='*[6].txt') W.add_datafiles(data_files='lane8*bgzf' , barcode_files='*[8].txt') # Set parameters p = {'filtering' : {'propN': 0.1, 'phred': 25, 'cutsite_edit_dist' : 2, 'overhang_edit_dist' : 0}, 'cleaning' : {'max_edit_dist' : 1 }} if testing: W.setup_preprocessing(infiles_pattern="testset_500.fastq.bgzf" , params=p) else: W.setup_preprocessing(infiles_pattern="lane*.bgzf" , params=p) W.run_preprocessing() # Clustering #=============================================================================== if testing: W.add_experiment_name('gz-subgroups-test', 'Test for splitting files by subgroups') else: W.add_experiment_name(experiment_name, description) default_params = { 'c_thresh' : 0.90, 'n_filter' : 8, 'maskN' : False} subgroups = { 'zebra' : '.*zebra.*', 'gazelle' : '.*gazelle.*'}