-----------------------------------------------------------------------------''' experiment_name = '' description = '' dataset_name = '' # Load previously processed data info #=============================================================================== W = Workflow() W.load(name=dataset_name) # Clustering #=============================================================================== W.add_experiment_name(experiment_name, description) default_params = { 'c_thresh' : 0.90, 'n_filter' : 8, 'maskN' : False} #=============================================================================== # Choose method to split files #=============================================================================== subgroups = { 'zebra' : '.*zebra.*', 'gazelle' : '.*gazelle.*'} splitby = None if splitby == 'subgroups':
'phred': 25, 'cutsite_edit_dist' : 2, 'overhang_edit_dist' : 0}, 'cleaning' : {'max_edit_dist' : 1 }} if testing: W.setup_preprocessing(infiles_pattern="testset_500.fastq.bgzf" , params=p) else: W.setup_preprocessing(infiles_pattern="lane*.bgzf" , params=p) W.run_preprocessing() # Clustering #=============================================================================== if testing: W.add_experiment_name('gz-subgroups-test', 'Test for splitting files by subgroups') else: W.add_experiment_name(experiment_name, description) default_params = { 'c_thresh' : 0.90, 'n_filter' : 8, 'maskN' : False} subgroups = { 'zebra' : '.*zebra.*', 'gazelle' : '.*gazelle.*'} if testing: W.setup_clustering(mode='split_by_subgroups', infiles_pattern='test*-clean.fastq.bgzf', default_params=default_params, subgroups=subgroups) # W.setup_clustering(mode='split_by_tags', infiles_pattern='test*-clean.fastq.bgzf', # default_params=default_params)
p = { "filtering": {"propN": 0.1, "phred": 25, "cutsite_edit_dist": 2, "overhang_edit_dist": 0}, "cleaning": {"max_edit_dist": 1}, } if testing: W.setup_preprocessing(infiles_pattern="testset_500.fastq.bgzf", params=p) else: W.setup_preprocessing(infiles_pattern="*.bgzf", params=p) W.run_preprocessing() # Clustering # =============================================================================== if testing: W.add_experiment_name("subgroups-test", "Test for splitting files by subgroups") else: W.add_experiment_name(experiment_name, description) default_params = {"c_thresh": 0.90, "n_filter": 8, "maskN": False} # Choose method to split files # =============================================================================== splitby = None subgroups = {"zebra": ".*zebra.*", "gazelle": ".*gazelle.*"} if splitby == "subgroups": W.setup_clustering( mode="split_by_subgroups",