def placement(self): """Place sequence variants on a reference package.""" args = self.__args__ if args.luigi_manager: local_scheduler = False else: local_scheduler = True cmdline_args = [ '--sv-fasta={}'.format(args.sequence_variants), '--working-dir={}'.format(args.working_dir), '--destination-dir={}'.format(args.destination_dir), '--refpkg-tgz={}'.format(args.refpkg_tgz), '--seq-map-csv={}'.format(args.seq_map_csv, ), '--workers={}'.format(args.workers), ] if args.sv_weights_csv: cmdline_args.append( '--sv-weights-csv={}'.format(args.sv_weights_csv, ), ) sl.run(local_scheduler=local_scheduler, main_task_cls=placement.Workflow_Placement, cmdline_args=cmdline_args)
def classify(self): """Classify sequence variants using a placement.""" args = self.__args__ if args.luigi_manager: local_scheduler = False else: local_scheduler = True cmdline_args = [ '--sv-fasta={}'.format(args.sequence_variants), '--working-dir={}'.format(args.working_dir), '--jplace={}'.format(args.jplace), '--destination-dir={}'.format(args.destination_dir), '--refpkg-tgz={}'.format(args.refpkg_tgz), '--seq-map-csv={}'.format(args.seq_map_csv, ), '--workers={}'.format(args.workers), ] if args.sv_weights_csv: cmdline_args.append( '--sv-weights-csv={}'.format(args.sv_weights_csv, ), ) if args.labels: cmdline_args.append('--labels={}'.format(args.labels, ), ) sl.run(local_scheduler=local_scheduler, main_task_cls=classify.Workflow_Classify, cmdline_args=cmdline_args)
def refpkg(self): """Make a reference package appropriate for pplacer or other pipelines.""" args = self.__args__ if args.luigi_manager: local_scheduler = False else: local_scheduler = True sl.run(local_scheduler=local_scheduler, main_task_cls=refpkg.WorkflowMakeRefpkg, cmdline_args=[ '--sequence-variants-path={}'.format( args.sequence_variants), '--entrez-email={}'.format(args.entrez_email), '--repo-seq-info={}'.format(",".join(args.repo_seq_info)), '--repo-valid-fasta={}'.format(",".join( args.repo_valid_fasta)), '--repo-annotated-fasta={}'.format(",".join( args.repo_annotated_fasta)), '--new-refpkg-path={}'.format(args.refpkg_destdir), '--new-refpkg-name={}'.format(args.refpkg_name), '--working-dir={}'.format(args.working_dir), '--min-id-annotated={}'.format(args.min_id_annotated), '--min-id-valid={}'.format(args.min_id_valid), '--min-best={}'.format(args.min_best), '--workers={}'.format(args.workers), ])
def ncbi_16s(self): """Update a repository of 16S sequences from NCBI NT.""" print("Starting NCBI_16s") args = self.__args__ if args.luigi_manager: local_scheduler = False else: local_scheduler = True sl.run(local_scheduler=local_scheduler, main_task_cls=ncbi_16s.Workflow_NCBI_16s, cmdline_args=[ '--ncbi-email={}'.format(args.ncbi_email), '--repo-url={}'.format(args.repo_secret), '--example-seqs={}'.format(args.example_seqs), '--working-dir={}'.format(args.working_dir), '--workers={}'.format(args.workers), ])
def sv_dada2(self): """Generate sequence variants using DADA2.""" args = self.__args__ if args.luigi_manager: local_scheduler = False else: local_scheduler = True sl.run(local_scheduler=local_scheduler, main_task_cls=sv_dada2.Workflow_DADA2, cmdline_args=[ '--workers={}'.format(args.workers), '--working-dir={}'.format(args.working_dir), '--destination-dir={}'.format(args.destination_dir), '--manifest={}'.format(args.manifest), '--trimLeft={}'.format(args.trim_left), '--maxN={}'.format(args.maxN), '--maxEE={}'.format(args.maxEE), '--truncLenF={}'.format(args.truncLenF), '--truncLenR={}'.format(args.truncLenR), '--truncQ={}'.format(args.truncQ), ])
help="Job Queue to use with AWS Batch", type=str, default="optimal") parser.add_argument("--aws-s3-scratch-loc", help="S3 bucket to use for scratch files", type=str) parser.add_argument("--engine", help="Execution engine", type=str, default="aws_batch") parser.add_argument("--workers", help="Number of workers to use for parallel execution", type=int, default=500) args = parser.parse_args() # Either specify the SRA or S3 assert args.input_location in ["SRA", "S3"] assert os.path.exists(args.metadata_fp) sl.run(main_task_cls=AlignFastsqWorkflow, cmdline_args=[ "--{}={}".format(k.replace("_", "-"), v) for k, v in args.__dict__.items() ])
parser.add_argument( "--aws-batch-job-queue", help="Job Queue to use with AWS Batch", type=str, default="optimal" ) parser.add_argument( "--aws-s3-scratch-loc", help="S3 bucket to use for scratch files", type=str ) parser.add_argument( "--engine", help="Execution engine", type=str, default="aws_batch" ) args = parser.parse_args() sl.run( main_task_cls=FetchPatricFunctions, cmdline_args=[ "--{}={}".format( k.replace("_", "-"), v) for k, v in args.__dict__.items() ] )
' --sampling-method=%s' % self.sampling_method + ' --train-method=%s' % self.train_method + ' --train-size=%s' % self.train_size + ' --test-size=%s' % self.test_size + ' --lin-type=%s' % self.lin_type + ' --lin-cost=%s' % lowest_cost + ' --slurm-project=%s' % self.slurm_project + ' --runmode=%s' % self.runmode) with self.out_done().open('w') as donefile: donefile.write('Done!\n') # ================================================================================ if __name__ == '__main__': sl.run(cmdline_args=['--scheduler-host=localhost', '--workers=1'], main_task_cls=CrossValidate) merged_report_filepath = 'data/test_run_001_merged_report.csv' rowdicts = [] with open(merged_report_filepath) as infile: csvrd = csv.reader(infile, delimiter=',') for rid, row in enumerate(csvrd): if rid == 0: headerrow = row else: rowdict = {headerrow[i]: v for i, v in enumerate(row)} rowdicts.append(rowdict) repl_ids = ['r1', 'r2', 'r3'] repl_markers = {'r1': 'o', 'r2': '*', 'r3': '+'} repl_linestyles = {'r1': '--', 'r2': ':', 'r3': '-.'}
' --lin-type=%s' % self.lin_type + ' --lin-cost=%s' % lowest_cost + ' --slurm-project=%s' % self.slurm_project + ' --runmode=%s' % self.runmode) with self.out_done().open('w') as donefile: donefile.write('Done!\n') # ## Execute the workflow # # Execute the workflow locally (using the luigi daemon which runs in the background), starting with the `CrossValidateWorkflow` workflow class. # In[ ]: print time.strftime('%Y-%m-%d %H:%M:%S: ') + 'Workflow started ...' sciluigi.run(cmdline_args=['--scheduler-host=localhost', '--workers=4'], main_task_cls=CrossValidateWorkflow) print time.strftime('%Y-%m-%d %H:%M:%S: ') + 'Workflow finished!' # ## Parse result data from workflow into python dicts # # This step does not produce any output, but is done as a preparation for the subsequent printing of values, and plotting. # In[ ]: import csv from matplotlib.pyplot import * merged_report_filepath = 'data/test_run_001_merged_report.csv' replicate_ids = ['r1', 'r2', 'r3'] rowdicts = []