def main(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None,
         force_update=False, workflow="NGI", already_parsed=False,
         config=None, config_file_path=None):
    if force_update: force_update = validate_force_update()
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    demux_fcid_dirs_set = set(demux_fcid_dirs)
    # Sort/copy each raw demux FC into project/sample/fcid format -- "analysis-ready"
    projects_to_analyze = dict()

    if already_parsed: # Starting from Project/Sample/Libprep/Seqrun tree format
        for demux_fcid_dir in demux_fcid_dirs_set:
            p = recreate_project_from_filesystem(demux_fcid_dir)
            projects_to_analyze[p.name] = p
    else: # Raw illumina flowcell
        for demux_fcid_dir in demux_fcid_dirs_set:
            # These will be a bunch of Project objects each containing Samples, FCIDs, lists of fastq files
            projects_to_analyze = setup_analysis_directory_structure(demux_fcid_dir,
                                                                     projects_to_analyze,
                                                                     restrict_to_projects,
                                                                     restrict_to_samples,
                                                                     create_files=False,
                                                                     config=config)
    if not projects_to_analyze:
        sys.exit("Quitting: no projects found to process in flowcells {}"
                 "or there was an error gathering required "
                 "information.".format(",".join(demux_fcid_dirs_set)))
    else:
        # Don't need the dict functionality anymore; revert to list
        projects_to_analyze = projects_to_analyze.values()
        for project in projects_to_analyze:
            try:
                create_charon_entries_from_project(project, workflow=workflow, force_overwrite=force_update)
            except Exception as e:
                print(e, file=sys.stderr)
Exemplo n.º 2
0
    def test_create_charon_entries_from_project_update(
            self, mock_seqrun_ud, mock_seqrun_cr, mock_libprep_ud,
            mock_libprep_cr, mock_sample_ud, mock_sample_cr, mock_project_ud,
            mock_project_cr):
        # Not the neatest of tests but gets the job done...
        mock_project_cr.side_effect = CharonError('Error', status_code=400)
        mock_sample_cr.side_effect = CharonError('Error', status_code=400)
        mock_libprep_cr.side_effect = CharonError('Error', status_code=400)
        mock_seqrun_cr.side_effect = CharonError('Error', status_code=400)

        create_charon_entries_from_project(self.project_obj,
                                           force_overwrite=True)

        mock_project_ud.assert_called_once_with(
            best_practice_analysis='whole_genome_reseq',
            name='S.One_20_02',
            projectid='P100001',
            sequencing_facility='NGI-S',
            status='OPEN')
        mock_sample_ud.assert_called_once_with(analysis_status='TO_ANALYZE',
                                               projectid='P100001',
                                               sampleid='P100001_101',
                                               status='STALE')
        mock_libprep_ud.assert_called_once_with(libprepid='A',
                                                projectid='P100001',
                                                qc='PASSED',
                                                sampleid='P100001_101')
        mock_seqrun_ud.assert_called_once_with(
            alignment_status='NOT_RUNNING',
            libprepid='A',
            mean_autosomal_coverage=0,
            projectid='P100001',
            sampleid='P100001_101',
            seqrunid='201030_A00187_0332_AHFCFLDSXX',
            total_reads=0)
Exemplo n.º 3
0
 def test_create_charon_entries_from_project(self, mock_seqrun,
                                             mock_libprep, mock_sample,
                                             mock_proj):
     create_charon_entries_from_project(self.project_obj)
     mock_proj.assert_called_once_with(
         best_practice_analysis='whole_genome_reseq',
         name='S.One_20_02',
         projectid='P100001',
         sequencing_facility='NGI-S',
         status='OPEN')
     mock_sample.assert_called_once_with(analysis_status='TO_ANALYZE',
                                         projectid='P100001',
                                         sampleid='P100001_101')
     mock_libprep.assert_called_once_with(libprepid='A',
                                          projectid='P100001',
                                          qc='PASSED',
                                          sampleid='P100001_101')
     mock_seqrun.assert_called_once_with(
         alignment_status='NOT_RUNNING',
         libprepid='A',
         mean_autosomal_coverage=0,
         projectid='P100001',
         sampleid='P100001_101',
         seqrunid='201030_A00187_0332_AHFCFLDSXX',
         total_reads=0)
Exemplo n.º 4
0
def process_demultiplexed_flowcells(demux_fcid_dirs,
                                    restrict_to_projects=None,
                                    restrict_to_samples=None,
                                    restart_failed_jobs=False,
                                    restart_finished_jobs=False,
                                    restart_running_jobs=False,
                                    fallback_libprep=None,
                                    keep_existing_data=False,
                                    no_qc=False,
                                    quiet=False,
                                    manual=False,
                                    config=None,
                                    config_file_path=None,
                                    generate_bqsr_bam=False):
    """Sort demultiplexed Illumina flowcells into projects and launch their analysis.

    :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories.
    :param list restrict_to_projects: A list of projects; analysis will be
                                      restricted to these. Optional.
    :param list restrict_to_samples: A list of samples; analysis will be
                                     restricted to these. Optional.
    :param bool restart_failed_jobs: Restart jobs marked as "FAILED" in Charon.
    :param bool restart_finished_jobs: Restart jobs marked as "DONE" in Charon.
    :param bool restart_running_jobs: Restart jobs marked as running in Charon
    :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None)
    :param bool keep_existing_data: Keep existing analysis data when launching new jobs
    :param bool quiet: Don't send notification emails; added to config
    :param bool manual: This is being run from a user script; added to config
    :param dict config: The parsed NGI configuration file; optional.
    :param str config_file_path: The path to the NGI configuration file; optional.
    """
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    projects_to_analyze = organize_projects_from_flowcell(
        demux_fcid_dirs=demux_fcid_dirs,
        restrict_to_projects=restrict_to_projects,
        restrict_to_samples=restrict_to_samples,
        fallback_libprep=fallback_libprep,
        quiet=quiet,
        config=config)
    for project in projects_to_analyze:
        if UPPSALA_PROJECT_RE.match(project.project_id):
            LOG.info(
                'Creating Charon records for Uppsala project "{}" if they '
                'are missing'.format(project))
            create_charon_entries_from_project(project,
                                               sequencing_facility="NGI-U")
    launch_analysis(projects_to_analyze,
                    restart_failed_jobs,
                    restart_finished_jobs,
                    restart_running_jobs,
                    keep_existing_data=keep_existing_data,
                    no_qc=no_qc,
                    config=config,
                    generate_bqsr_bam=generate_bqsr_bam)
Exemplo n.º 5
0
def process_demultiplexed_flowcells(demux_fcid_dirs, restrict_to_projects=None,
                                    restrict_to_samples=None, restart_failed_jobs=False,
                                    restart_finished_jobs=False, restart_running_jobs=False,
                                    config=None, config_file_path=None, quiet=False, manual=False):
    """Sort demultiplexed Illumina flowcells into projects and launch their analysis.

    :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories.
    :param list restrict_to_projects: A list of projects; analysis will be
                                      restricted to these. Optional.
    :param list restrict_to_samples: A list of samples; analysis will be
                                     restricted to these. Optional.
    :param bool restart_failed_jobs: Restart jobs marked as "FAILED" in Charon.
    :param bool restart_finished_jobs: Restart jobs marked as "DONE" in Charon.
    :param dict config: The parsed NGI configuration file; optional.
    :param str config_file_path: The path to the NGI configuration file; optional.
    """
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    demux_fcid_dirs_set = set(demux_fcid_dirs)
    # Sort/copy each raw demux FC into project/sample/fcid format -- "analysis-ready"
    projects_to_analyze = dict()
    for demux_fcid_dir in demux_fcid_dirs_set:
        demux_fcid_dir = os.path.abspath(demux_fcid_dir)
        # These will be a bunch of Project objects each containing Samples, FCIDs, lists of fastq files
        projects_to_analyze = setup_analysis_directory_structure(demux_fcid_dir,
                                                                 projects_to_analyze,
                                                                 restrict_to_projects,
                                                                 restrict_to_samples,
                                                                 create_files=True,
                                                                 config=config,
                                                                 quiet=quiet)
    if not projects_to_analyze:
        if restrict_to_projects:
            error_message = ("No projects found to process: the specified flowcells "
                             "({fcid_dirs}) do not contain the specified project(s) "
                             "({restrict_to_projects}) or there was an error "
                             "gathering required information.").format(
                                    fcid_dirs=",".join(demux_fcid_dirs_set),
                                    restrict_to_projects=",".join(restrict_to_projects))
        else:
            error_message = ("No projects found to process in flowcells {} "
                             "or there was an error gathering required "
                             "information.".format(",".join(demux_fcid_dirs_set)))
        raise RuntimeError(error_message)
    else:
        projects_to_analyze = projects_to_analyze.values()
    for project in projects_to_analyze:
        if UPPSALA_PROJECT_RE.match(project.project_id):
            LOG.info('Creating Charon records for Uppsala project "{}" if they '
                     'are missing'.format(project))
            create_charon_entries_from_project(project, sequencing_facility="NGI-U")
    launch_analysis(projects_to_analyze, restart_failed_jobs, restart_finished_jobs,
                    restart_running_jobs, config=config)
Exemplo n.º 6
0
def main(demux_fcid_dirs,
         restrict_to_projects=None,
         restrict_to_samples=None,
         best_practice_analysis=None,
         sequencing_facility=None,
         already_parsed=False,
         force_update=False,
         delete_existing=False,
         force_create_project=False,
         config=None,
         config_file_path=None):
    if force_update: force_update = validate_force_update()
    if delete_existing: delete_existing = validate_delete_existing()
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    demux_fcid_dirs_set = set(demux_fcid_dirs)
    projects_to_analyze = dict()
    if already_parsed:  # Starting from Project/Sample/Libprep/Seqrun tree format
        for demux_fcid_dir in demux_fcid_dirs_set:
            p = recreate_project_from_filesystem(
                demux_fcid_dir, force_create_project=force_create_project)
            projects_to_analyze[p.name] = p
    else:  # Raw illumina flowcell
        for demux_fcid_dir in demux_fcid_dirs_set:
            projects_to_analyze = setup_analysis_directory_structure(
                demux_fcid_dir,
                projects_to_analyze,
                restrict_to_projects,
                restrict_to_samples,
                config=config)
    if not projects_to_analyze:
        sys.exit("Quitting: no projects found to process in flowcells {}"
                 "or there was an error gathering required "
                 "information.".format(",".join(demux_fcid_dirs_set)))
    else:
        projects_to_analyze = projects_to_analyze.values()
        for project in projects_to_analyze:
            try:
                create_charon_entries_from_project(
                    project,
                    best_practice_analysis=best_practice_analysis,
                    sequencing_facility=sequencing_facility,
                    force_overwrite=force_update,
                    delete_existing=delete_existing)
            except Exception as e:
                print(e, file=sys.stderr)
Exemplo n.º 7
0
    def test_create_charon_entries_from_project(self):
        # Create the NGIObjects
        project_obj = NGIProject(name=self.p_name,
                                 dirname=self.p_name,
                                 project_id=self.p_id,
                                 base_path=self.p_bp)
        sample_obj = project_obj.add_sample(name=self.s_id, dirname=self.s_id)
        libprep_obj = sample_obj.add_libprep(name=self.l_id, dirname=self.l_id)
        seqrun_obj = libprep_obj.add_seqrun(name=self.sr_id,
                                            dirname=self.sr_id)

        try:
            # Create them in the db
            create_charon_entries_from_project(project_obj)
        finally:
            charon_session = CharonSession()
            charon_session.project_delete(project_obj.project_id)
Exemplo n.º 8
0
    def test_create_charon_entries_from_project(self):
        # Create the NGIObjects
        project_obj = NGIProject(name=self.p_name,
                                 dirname=self.p_name,
                                 project_id=self.p_id,
                                 base_path=self.p_bp)
        sample_obj = project_obj.add_sample(name=self.s_id,
                                            dirname=self.s_id)
        libprep_obj = sample_obj.add_libprep(name=self.l_id,
                                             dirname=self.l_id)
        seqrun_obj = libprep_obj.add_seqrun(name=self.sr_id,
                                            dirname=self.sr_id)

        try:
        # Create them in the db
            create_charon_entries_from_project(project_obj)
        finally:
            charon_session = CharonSession()
            charon_session.project_delete(project_obj.project_id)
Exemplo n.º 9
0
def process_demultiplexed_flowcells(demux_fcid_dirs, restrict_to_projects=None,
                                    restrict_to_samples=None, restart_failed_jobs=False,
                                    restart_finished_jobs=False, restart_running_jobs=False,
                                    fallback_libprep=None, keep_existing_data=False, no_qc=False,
                                    quiet=False, manual=False, config=None, config_file_path=None,
                                    generate_bqsr_bam=False):
    """Sort demultiplexed Illumina flowcells into projects and launch their analysis.

    :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories.
    :param list restrict_to_projects: A list of projects; analysis will be
                                      restricted to these. Optional.
    :param list restrict_to_samples: A list of samples; analysis will be
                                     restricted to these. Optional.
    :param bool restart_failed_jobs: Restart jobs marked as "FAILED" in Charon.
    :param bool restart_finished_jobs: Restart jobs marked as "DONE" in Charon.
    :param bool restart_running_jobs: Restart jobs marked as running in Charon
    :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None)
    :param bool keep_existing_data: Keep existing analysis data when launching new jobs
    :param bool quiet: Don't send notification emails; added to config
    :param bool manual: This is being run from a user script; added to config
    :param dict config: The parsed NGI configuration file; optional.
    :param str config_file_path: The path to the NGI configuration file; optional.
    """
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    projects_to_analyze = organize_projects_from_flowcell(demux_fcid_dirs=demux_fcid_dirs,
                                                          restrict_to_projects=restrict_to_projects,
                                                          restrict_to_samples=restrict_to_samples,
                                                          fallback_libprep=fallback_libprep,
                                                          quiet=quiet, config=config)
    for project in projects_to_analyze:
        if UPPSALA_PROJECT_RE.match(project.project_id):
            LOG.info('Creating Charon records for Uppsala project "{}" if they '
                     'are missing'.format(project))
            create_charon_entries_from_project(project, sequencing_facility="NGI-U")
    launch_analysis(projects_to_analyze, restart_failed_jobs, restart_finished_jobs,
                    restart_running_jobs, keep_existing_data=keep_existing_data,
                    no_qc=no_qc, config=config, generate_bqsr_bam=generate_bqsr_bam)
def main(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None,
         best_practice_analysis=None, sequencing_facility=None,
         already_parsed=False,
         force_update=False, delete_existing=False,
         force_create_project=False,
         config=None, config_file_path=None):
    if force_update: force_update = validate_force_update()
    if delete_existing: delete_existing = validate_delete_existing()
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    demux_fcid_dirs_set = set(demux_fcid_dirs)
    projects_to_analyze = dict()
    if already_parsed: # Starting from Project/Sample/Libprep/Seqrun tree format
        for demux_fcid_dir in demux_fcid_dirs_set:
            p = recreate_project_from_filesystem(demux_fcid_dir,
                                                 force_create_project=force_create_project)
            projects_to_analyze[p.name] = p
    else: # Raw illumina flowcell
        for demux_fcid_dir in demux_fcid_dirs_set:
            projects_to_analyze = setup_analysis_directory_structure(demux_fcid_dir,
                                                                     projects_to_analyze,
                                                                     restrict_to_projects,
                                                                     restrict_to_samples,
                                                                     config=config)
    if not projects_to_analyze:
        sys.exit("Quitting: no projects found to process in flowcells {}"
                 "or there was an error gathering required "
                 "information.".format(",".join(demux_fcid_dirs_set)))
    else:
        projects_to_analyze = projects_to_analyze.values()
        for project in projects_to_analyze:
            try:
                create_charon_entries_from_project(project, best_practice_analysis=best_practice_analysis,
                                                   sequencing_facility=sequencing_facility,
                                                   force_overwrite=force_update,
                                                   delete_existing=delete_existing)
            except Exception as e:
                print(e, file=sys.stderr)
Exemplo n.º 11
0
                restrict_to_samples=args.restrict_to_samples)
            if not project.samples:
                LOG.info('No samples found for project {} (path {})'.format(
                    project.project_id, qc_project_dir))
            for sample in project:
                qc_ngi.launchers.analyze(project, sample, quiet=args.quiet)

    ## Organize Flowcell
    elif 'organize_fc_dirs' in args:
        organize_fc_dirs_list = list(set(args.organize_fc_dirs))
        LOG.info("Organizing flowcell {} {}".format(
            inflector.plural("directory", len(organize_fc_dirs_list)),
            ", ".join(organize_fc_dirs_list)))
        projects_to_analyze = \
                organize_projects_from_flowcell(demux_fcid_dirs=organize_fc_dirs_list,
                                                restrict_to_projects=args.restrict_to_projects,
                                                restrict_to_samples=args.restrict_to_samples,
                                                fallback_libprep=args.fallback_libprep,
                                                quiet=args.quiet)
        for project in projects_to_analyze:
            try:
                create_charon_entries_from_project(
                    project=project,
                    best_practice_analysis=args.best_practice_analysis,
                    sequencing_facility=args.sequencing_facility,
                    force_overwrite=args.force_update)
            except Exception as e:
                LOG.error(e.message)
                print(e, file=sys.stderr)
        LOG.info("Done with organization.")
Exemplo n.º 12
0
def process_demultiplexed_flowcells(demux_fcid_dirs,
                                    restrict_to_projects=None,
                                    restrict_to_samples=None,
                                    restart_failed_jobs=False,
                                    restart_finished_jobs=False,
                                    restart_running_jobs=False,
                                    config=None,
                                    config_file_path=None,
                                    quiet=False,
                                    manual=False):
    """Sort demultiplexed Illumina flowcells into projects and launch their analysis.

    :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories.
    :param list restrict_to_projects: A list of projects; analysis will be
                                      restricted to these. Optional.
    :param list restrict_to_samples: A list of samples; analysis will be
                                     restricted to these. Optional.
    :param bool restart_failed_jobs: Restart jobs marked as "FAILED" in Charon.
    :param bool restart_finished_jobs: Restart jobs marked as "DONE" in Charon.
    :param dict config: The parsed NGI configuration file; optional.
    :param str config_file_path: The path to the NGI configuration file; optional.
    """
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    demux_fcid_dirs_set = set(demux_fcid_dirs)
    # Sort/copy each raw demux FC into project/sample/fcid format -- "analysis-ready"
    projects_to_analyze = dict()
    for demux_fcid_dir in demux_fcid_dirs_set:
        demux_fcid_dir = os.path.abspath(demux_fcid_dir)
        # These will be a bunch of Project objects each containing Samples, FCIDs, lists of fastq files
        projects_to_analyze = setup_analysis_directory_structure(
            demux_fcid_dir,
            projects_to_analyze,
            restrict_to_projects,
            restrict_to_samples,
            create_files=True,
            config=config,
            quiet=quiet)
    if not projects_to_analyze:
        if restrict_to_projects:
            error_message = (
                "No projects found to process: the specified flowcells "
                "({fcid_dirs}) do not contain the specified project(s) "
                "({restrict_to_projects}) or there was an error "
                "gathering required information.").format(
                    fcid_dirs=",".join(demux_fcid_dirs_set),
                    restrict_to_projects=",".join(restrict_to_projects))
        else:
            error_message = ("No projects found to process in flowcells {} "
                             "or there was an error gathering required "
                             "information.".format(
                                 ",".join(demux_fcid_dirs_set)))
        raise RuntimeError(error_message)
    else:
        projects_to_analyze = projects_to_analyze.values()
    for project in projects_to_analyze:
        if UPPSALA_PROJECT_RE.match(project.project_id):
            LOG.info(
                'Creating Charon records for Uppsala project "{}" if they '
                'are missing'.format(project))
            create_charon_entries_from_project(project,
                                               sequencing_facility="NGI-U")
    launch_analysis(projects_to_analyze,
                    restart_failed_jobs,
                    restart_finished_jobs,
                    restart_running_jobs,
                    config=config)
                inflector.plural("directory", len(qc_flowcell_dirs_list)), ", ".join(qc_flowcell_dirs_list)
            )
        )
        projects_to_analyze = organize_projects_from_flowcell(
            demux_fcid_dirs=qc_flowcell_dirs_list,
            restrict_to_projects=args.restrict_to_projects,
            restrict_to_samples=args.restrict_to_samples,
            fallback_libprep=args.fallback_libprep,
            quiet=args.quiet,
        )
        for project in projects_to_analyze:
            try:
                create_charon_entries_from_project(
                    project=project,
                    best_practice_analysis=args.best_practice_analysis,
                    sequencing_facility=args.sequencing_facility,
                    force_overwrite=args.force_update,
                    delete_existing=args.delete_existing,
                )
            except Exception as e:
                print(e, file=sys.stderr)
        LOG.info("Done with organization.")
        for project in projects_to_analyze:
            for sample in project:
                qc_ngi.launchers.analyze(project, sample, quiet=args.quiet)

    ## QC Project
    elif "qc_project_dirs" in args:
        for qc_project_dir in args.qc_project_dirs:
            project = recreate_project_from_filesystem(
                project_dir=qc_project_dir, restrict_to_samples=args.restrict_to_samples