Пример #1
0
    def placement(self):
        """Place sequence variants on a reference package."""
        args = self.__args__

        if args.luigi_manager:
            local_scheduler = False
        else:
            local_scheduler = True

        cmdline_args = [
            '--sv-fasta={}'.format(args.sequence_variants),
            '--working-dir={}'.format(args.working_dir),
            '--destination-dir={}'.format(args.destination_dir),
            '--refpkg-tgz={}'.format(args.refpkg_tgz),
            '--seq-map-csv={}'.format(args.seq_map_csv, ),
            '--workers={}'.format(args.workers),
        ]

        if args.sv_weights_csv:
            cmdline_args.append(
                '--sv-weights-csv={}'.format(args.sv_weights_csv, ), )

        sl.run(local_scheduler=local_scheduler,
               main_task_cls=placement.Workflow_Placement,
               cmdline_args=cmdline_args)
Пример #2
0
    def classify(self):
        """Classify sequence variants using a placement."""
        args = self.__args__

        if args.luigi_manager:
            local_scheduler = False
        else:
            local_scheduler = True

        cmdline_args = [
            '--sv-fasta={}'.format(args.sequence_variants),
            '--working-dir={}'.format(args.working_dir),
            '--jplace={}'.format(args.jplace),
            '--destination-dir={}'.format(args.destination_dir),
            '--refpkg-tgz={}'.format(args.refpkg_tgz),
            '--seq-map-csv={}'.format(args.seq_map_csv, ),
            '--workers={}'.format(args.workers),
        ]

        if args.sv_weights_csv:
            cmdline_args.append(
                '--sv-weights-csv={}'.format(args.sv_weights_csv, ), )

        if args.labels:
            cmdline_args.append('--labels={}'.format(args.labels, ), )

        sl.run(local_scheduler=local_scheduler,
               main_task_cls=classify.Workflow_Classify,
               cmdline_args=cmdline_args)
Пример #3
0
    def refpkg(self):
        """Make a reference package appropriate for pplacer or other pipelines."""
        args = self.__args__

        if args.luigi_manager:
            local_scheduler = False
        else:
            local_scheduler = True

        sl.run(local_scheduler=local_scheduler,
               main_task_cls=refpkg.WorkflowMakeRefpkg,
               cmdline_args=[
                   '--sequence-variants-path={}'.format(
                       args.sequence_variants),
                   '--entrez-email={}'.format(args.entrez_email),
                   '--repo-seq-info={}'.format(",".join(args.repo_seq_info)),
                   '--repo-valid-fasta={}'.format(",".join(
                       args.repo_valid_fasta)),
                   '--repo-annotated-fasta={}'.format(",".join(
                       args.repo_annotated_fasta)),
                   '--new-refpkg-path={}'.format(args.refpkg_destdir),
                   '--new-refpkg-name={}'.format(args.refpkg_name),
                   '--working-dir={}'.format(args.working_dir),
                   '--min-id-annotated={}'.format(args.min_id_annotated),
                   '--min-id-valid={}'.format(args.min_id_valid),
                   '--min-best={}'.format(args.min_best),
                   '--workers={}'.format(args.workers),
               ])
Пример #4
0
    def ncbi_16s(self):
        """Update a repository of 16S sequences from NCBI NT."""
        print("Starting NCBI_16s")
        args = self.__args__

        if args.luigi_manager:
            local_scheduler = False
        else:
            local_scheduler = True

        sl.run(local_scheduler=local_scheduler,
               main_task_cls=ncbi_16s.Workflow_NCBI_16s,
               cmdline_args=[
                   '--ncbi-email={}'.format(args.ncbi_email),
                   '--repo-url={}'.format(args.repo_secret),
                   '--example-seqs={}'.format(args.example_seqs),
                   '--working-dir={}'.format(args.working_dir),
                   '--workers={}'.format(args.workers),
               ])
Пример #5
0
    def sv_dada2(self):
        """Generate sequence variants using DADA2."""
        args = self.__args__

        if args.luigi_manager:
            local_scheduler = False
        else:
            local_scheduler = True

        sl.run(local_scheduler=local_scheduler,
               main_task_cls=sv_dada2.Workflow_DADA2,
               cmdline_args=[
                   '--workers={}'.format(args.workers),
                   '--working-dir={}'.format(args.working_dir),
                   '--destination-dir={}'.format(args.destination_dir),
                   '--manifest={}'.format(args.manifest),
                   '--trimLeft={}'.format(args.trim_left),
                   '--maxN={}'.format(args.maxN),
                   '--maxEE={}'.format(args.maxEE),
                   '--truncLenF={}'.format(args.truncLenF),
                   '--truncLenR={}'.format(args.truncLenR),
                   '--truncQ={}'.format(args.truncQ),
               ])
Пример #6
0
                        help="Job Queue to use with AWS Batch",
                        type=str,
                        default="optimal")

    parser.add_argument("--aws-s3-scratch-loc",
                        help="S3 bucket to use for scratch files",
                        type=str)

    parser.add_argument("--engine",
                        help="Execution engine",
                        type=str,
                        default="aws_batch")

    parser.add_argument("--workers",
                        help="Number of workers to use for parallel execution",
                        type=int,
                        default=500)

    args = parser.parse_args()

    # Either specify the SRA or S3
    assert args.input_location in ["SRA", "S3"]

    assert os.path.exists(args.metadata_fp)

    sl.run(main_task_cls=AlignFastsqWorkflow,
           cmdline_args=[
               "--{}={}".format(k.replace("_", "-"), v)
               for k, v in args.__dict__.items()
           ])
    parser.add_argument(
        "--aws-batch-job-queue",
        help="Job Queue to use with AWS Batch",
        type=str,
        default="optimal"
    )

    parser.add_argument(
        "--aws-s3-scratch-loc",
        help="S3 bucket to use for scratch files",
        type=str
    )

    parser.add_argument(
        "--engine",
        help="Execution engine",
        type=str,
        default="aws_batch"
    )
    args = parser.parse_args()

    sl.run(
        main_task_cls=FetchPatricFunctions,
        cmdline_args=[
            "--{}={}".format(
                k.replace("_", "-"),
                v)
            for k, v in args.__dict__.items()
        ]
    )
Пример #8
0
                ' --sampling-method=%s' % self.sampling_method +
                ' --train-method=%s' % self.train_method +
                ' --train-size=%s' % self.train_size +
                ' --test-size=%s' % self.test_size +
                ' --lin-type=%s' % self.lin_type +
                ' --lin-cost=%s' % lowest_cost +
                ' --slurm-project=%s' % self.slurm_project +
                ' --runmode=%s' % self.runmode)
        with self.out_done().open('w') as donefile:
            donefile.write('Done!\n')


# ================================================================================

if __name__ == '__main__':
    sl.run(cmdline_args=['--scheduler-host=localhost', '--workers=1'],
           main_task_cls=CrossValidate)

    merged_report_filepath = 'data/test_run_001_merged_report.csv'
    rowdicts = []
    with open(merged_report_filepath) as infile:
        csvrd = csv.reader(infile, delimiter=',')
        for rid, row in enumerate(csvrd):
            if rid == 0:
                headerrow = row
            else:
                rowdict = {headerrow[i]: v for i, v in enumerate(row)}
                rowdicts.append(rowdict)

    repl_ids = ['r1', 'r2', 'r3']
    repl_markers = {'r1': 'o', 'r2': '*', 'r3': '+'}
    repl_linestyles = {'r1': '--', 'r2': ':', 'r3': '-.'}
                ' --lin-type=%s' % self.lin_type +
                ' --lin-cost=%s' % lowest_cost +
                ' --slurm-project=%s' % self.slurm_project +
                ' --runmode=%s' % self.runmode)
        with self.out_done().open('w') as donefile:
            donefile.write('Done!\n')


# ## Execute the workflow
#
# Execute the workflow locally (using the luigi daemon which runs in the background), starting with the `CrossValidateWorkflow` workflow class.

# In[ ]:

print time.strftime('%Y-%m-%d %H:%M:%S: ') + 'Workflow started ...'
sciluigi.run(cmdline_args=['--scheduler-host=localhost', '--workers=4'],
             main_task_cls=CrossValidateWorkflow)
print time.strftime('%Y-%m-%d %H:%M:%S: ') + 'Workflow finished!'

# ## Parse result data from workflow into python dicts
#
# This step does not produce any output, but is done as a preparation for the subsequent printing of values, and plotting.

# In[ ]:

import csv
from matplotlib.pyplot import *

merged_report_filepath = 'data/test_run_001_merged_report.csv'
replicate_ids = ['r1', 'r2', 'r3']
rowdicts = []