示例#1
0
def recreate_project_from_db(analysis_top_dir, project_name, project_id):
    project_dir = os.path.join(analysis_top_dir, "DATA", project_name)
    project_obj = NGIProject(name=project_name,
                             dirname=project_name,
                             project_id=project_id,
                             base_path=analysis_top_dir)
    charon_session = CharonSession()
    try:
        samples_dict = charon_session.project_get_samples(project_id)["samples"]
    except CharonError as e:
        raise RuntimeError("Could not access samples for project {}: {}".format(project_id, e))
    for sample in samples_dict:
        sample_id = sample.get("sampleid")
        sample_dir = os.path.join(project_dir, sample_id)
        sample_obj = project_obj.add_sample(name=sample_id, dirname=sample_id)
        sample_obj.status = sample.get("status", "unknown")
        try:
            libpreps_dict = charon_session.sample_get_libpreps(project_id, sample_id)["libpreps"]
        except CharonError as e:
            raise RuntimeError("Could not access libpreps for project {} / sample {}: {}".format(project_id,sample_id, e))
        for libprep in libpreps_dict:
            libprep_id = libprep.get("libprepid")
            libprep_obj = sample_obj.add_libprep(name=libprep_id,  dirname=libprep_id)
            libprep_obj.status = libprep.get("status", "unknown")
            try:
                seqruns_dict = charon_session.libprep_get_seqruns(project_id, sample_id, libprep_id)["seqruns"]
            except CharonError as e:
                raise RuntimeError("Could not access seqruns for project {} / sample {} / "
                                   "libprep {}: {}".format(project_id, sample_id, libprep_id, e))
            for seqrun in seqruns_dict:
                # e.g. 140528_D00415_0049_BC423WACXX
                seqrun_id = seqrun.get("seqrunid")
                seqrun_obj = libprep_obj.add_seqrun(name=seqrun_id, dirname=seqrun_id)
                seqrun_obj.status = seqrun.get("status", "unknown")
    return project_obj
示例#2
0
def get_finished_seqruns_for_sample(project_id, sample_id,
                                    include_failed_libpreps=False):
    """Find all the finished seqruns for a particular sample.

    :param str project_id: The id of the project
    :param str sample_id: The id of the sample

    :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...}
    :rtype: dict
    """
    charon_session = CharonSession()
    sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id,
                                                         sampleid=sample_id)
    libpreps = collections.defaultdict(list)
    for libprep in sample_libpreps['libpreps']:
        if libprep.get('qc') != "FAILED" or include_failed_libpreps:
            libprep_id = libprep['libprepid']
            for seqrun in charon_session.libprep_get_seqruns(projectid=project_id,
                                                             sampleid=sample_id,
                                                             libprepid=libprep_id)['seqruns']:
                seqrun_id = seqrun['seqrunid']
                aln_status = charon_session.seqrun_get(projectid=project_id,
                                                       sampleid=sample_id,
                                                       libprepid=libprep_id,
                                                       seqrunid=seqrun_id).get('alignment_status')
                if aln_status == "DONE":
                    libpreps[libprep_id].append(seqrun_id)
                else:
                    LOG.debug('Skipping seqrun "{}" due to alignment_status '
                              '"{}"'.format(seqrun_id, aln_status))
        else:
            LOG.info('Skipping libprep "{}" due to qc status '
                     '"{}"'.format(libprep, libprep.get("qc")))
    return dict(libpreps)
示例#3
0
def update_analysis(project_id, status):
    charon_session = CharonSession()
    mail_analysis(project_id,
                  engine_name='rna_ngi',
                  level='INFO' if status else 'ERROR')
    new_sample_status = 'ANALYZED' if status else 'FAILED'
    new_seqrun_status = 'DONE' if status else 'FAILED'
    for sample in charon_session.project_get_samples(project_id).get(
            "samples", {}):
        if sample.get('analysis_status') == "UNDER_ANALYSIS":
            LOG.info("Marking analysis of sample {}/{} as {}".format(
                project_id, sample.get('sampleid'), new_sample_status))
            charon_session.sample_update(project_id,
                                         sample.get('sampleid'),
                                         analysis_status=new_sample_status)
            for libprep in charon_session.sample_get_libpreps(
                    project_id, sample.get('sampleid')).get('libpreps', {}):
                if libprep.get('qc') != 'FAILED':
                    for seqrun in charon_session.libprep_get_seqruns(
                            project_id, sample.get('sampleid'),
                            libprep.get('libprepid')).get('seqruns', {}):
                        if seqrun.get('alignment_status') == "RUNNING":
                            LOG.info(
                                "Marking analysis of seqrun {}/{}/{}/{} as {}".
                                format(project_id, sample.get('sampleid'),
                                       libprep.get('libprepid'),
                                       seqrun.get('seqrunid'),
                                       new_seqrun_status))
                            charon_session.seqrun_update(
                                project_id,
                                sample.get('sampleid'),
                                libprep.get('libprepid'),
                                seqrun.get('seqrunid'),
                                alignment_status=new_seqrun_status)
示例#4
0
def recreate_project_from_db(analysis_top_dir, project_name, project_id):
    project_dir = os.path.join(analysis_top_dir, "DATA", project_name)
    project_obj = NGIProject(name=project_name,
                             dirname=project_name,
                             project_id=project_id,
                             base_path=analysis_top_dir)
    charon_session = CharonSession()
    try:
        samples_dict = charon_session.project_get_samples(project_id)["samples"]
    except CharonError as e:
        raise RuntimeError("Could not access samples for project {}: {}".format(project_id, e))
    for sample in samples_dict:
        sample_id = sample.get("sampleid")
        sample_dir = os.path.join(project_dir, sample_id)
        sample_obj = project_obj.add_sample(name=sample_id, dirname=sample_id)
        sample_obj.status = sample.get("status", "unknown")
        try:
            libpreps_dict = charon_session.sample_get_libpreps(project_id, sample_id)["libpreps"]
        except CharonError as e:
            raise RuntimeError("Could not access libpreps for project {} / sample {}: {}".format(project_id,sample_id, e))
        for libprep in libpreps_dict:
            libprep_id = libprep.get("libprepid")
            libprep_obj = sample_obj.add_libprep(name=libprep_id,  dirname=libprep_id)
            libprep_obj.status = libprep.get("status", "unknown")
            try:
                seqruns_dict = charon_session.libprep_get_seqruns(project_id, sample_id, libprep_id)["seqruns"]
            except CharonError as e:
                raise RuntimeError("Could not access seqruns for project {} / sample {} / "
                                   "libprep {}: {}".format(project_id, sample_id, libprep_id, e))
            for seqrun in seqruns_dict:
                # e.g. 140528_D00415_0049_BC423WACXX
                seqrun_id = seqrun.get("seqrunid")
                seqrun_obj = libprep_obj.add_seqrun(name=seqrun_id, dirname=seqrun_id)
                seqrun_obj.status = seqrun.get("status", "unknown")
    return project_obj
示例#5
0
def check_for_preexisting_sample_runs(project_obj, sample_obj, restart_running_jobs, restart_finished_jobs):
    """If any analysis is undergoing or has completed for this sample's
    seqruns, raise a RuntimeError.

    :param NGIProject project_obj: The project object
    :param NGISample sample_obj: The sample object
    :param boolean restart_running_jobs: command line parameter
    :param boolean restart_finished_jobs: command line parameter

    :raise RuntimeError if the status is RUNNING or DONE and the flags do not allow to continue
    """
    project_id = project_obj.project_id
    sample_id = sample_obj.name
    charon_session = CharonSession()
    sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id,
                                                         sampleid=sample_id)
    for libprep in sample_libpreps['libpreps']:
        libprep_id = libprep['libprepid']
        for seqrun in charon_session.libprep_get_seqruns(projectid=project_id,
                                                         sampleid=sample_id,
                                                         libprepid=libprep_id)['seqruns']:
            seqrun_id = seqrun['seqrunid']
            aln_status = charon_session.seqrun_get(projectid=project_id,
                                                   sampleid=sample_id,
                                                   libprepid=libprep_id,
                                                   seqrunid=seqrun_id).get('alignment_status')
            if (aln_status == "RUNNING" and not restart_running_jobs) or \
                (aln_status == "DONE" and not restart_finished_jobs):
                    raise RuntimeError('Project/Sample "{}/{}" has a preexisting '
                          'seqrun "{}" with status "{}"'.format(project_obj,
                          sample_obj, seqrun_id, aln_status))
示例#6
0
def get_valid_seqruns_for_sample(project_id,
                                 sample_id,
                                 include_failed_libpreps=False,
                                 include_done_seqruns=False,
                                 status_field="alignment_status"):
    """Find all the valid seqruns for a particular sample.

    :param str project_id: The id of the project
    :param str sample_id: The id of the sample
    :param bool include_failed_libpreps: Include seqruns for libreps that have failed QC
    :param bool include_done_seqruns: Include seqruns that are already marked DONE

    :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...}
    :rtype: dict

    :raises ValueError: If status_field is not a valid value
    """
    valid_status_values = (
        "alignment_status",
        "genotype_status",
    )
    if status_field not in valid_status_values:
        raise ValueError('"status_field" argument must be one of {} '
                         '(value passed was "{}")'.format(
                             ", ".join(valid_status_values), status_field))
    charon_session = CharonSession()
    sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id,
                                                         sampleid=sample_id)
    libpreps = collections.defaultdict(list)
    for libprep in sample_libpreps['libpreps']:
        if libprep.get('qc') != "FAILED" or include_failed_libpreps:
            libprep_id = libprep['libprepid']
            for seqrun in charon_session.libprep_get_seqruns(
                    projectid=project_id, sampleid=sample_id,
                    libprepid=libprep_id)['seqruns']:
                seqrun_id = seqrun['seqrunid']
                try:
                    aln_status = charon_session.seqrun_get(
                        projectid=project_id,
                        sampleid=sample_id,
                        libprepid=libprep_id,
                        seqrunid=seqrun_id)[status_field]
                except KeyError:
                    LOG.error(
                        'Field "{}" not available for seqrun "{}" in Charon '
                        'for project "{}" / sample "{}". Including as '
                        'valid.'.format(status_field, seqrun_id, project_id,
                                        sample_id))
                    aln_status = None
                if aln_status != "DONE" or include_done_seqruns:
                    libpreps[libprep_id].append(seqrun_id)
                else:
                    LOG.info('Skipping seqrun "{}" due to {}'
                             '"{}"'.format(seqrun_id, status_field,
                                           aln_status))
        else:
            LOG.info('Skipping libprep "{}" due to qc status '
                     '"{}"'.format(libprep, libprep.get("qc")))
    return dict(libpreps)
示例#7
0
def reset_charon_records_by_name(project_id, restrict_to_samples=None,
                                 restrict_to_libpreps=None, restrict_to_seqruns=None):
    if not restrict_to_samples: restrict_to_samples = []
    if not restrict_to_libpreps: restrict_to_libpreps = []
    if not restrict_to_seqruns: restrict_to_seqruns = []
    charon_session = CharonSession()
    LOG.info("Resetting Charon record for project {}".format(project_id))
    charon_session.project_reset(projectid=project_id)
    LOG.info("Charon record for project {} reset".format(project_id))
    for sample in charon_session.project_get_samples(projectid=project_id).get('samples', []):
        sample_id = sample['sampleid']
        if restrict_to_samples and sample_id not in restrict_to_samples:
            LOG.info("Skipping project/sample {}/{}: not in list of samples to use "
                     "({})".format(project_id, sample_id, ", ".join(restrict_to_samples)))
            continue
        LOG.info("Resetting Charon record for project/sample {}/{}".format(project_id,
                                                                           sample_id))
        charon_session.sample_reset(projectid=project_id, sampleid=sample_id)
        LOG.info("Charon record for project/sample {}/{} reset".format(project_id,
                                                                       sample_id))
        for libprep in charon_session.sample_get_libpreps(projectid=project_id,
                                                          sampleid=sample_id).get('libpreps', []):
            libprep_id = libprep['libprepid']
            if restrict_to_libpreps and libprep_id not in restrict_to_libpreps:
                LOG.info("Skipping project/sample/libprep {}/{}/{}: not in list "
                         "of libpreps to use ({})".format(project_id, sample_id,
                                                          libprep_id, ", ".join(restrict_to_libpreps)))
                continue
            LOG.info("Resetting Charon record for project/sample"
                     "libprep {}/{}/{}".format(project_id, sample_id, libprep_id))
            charon_session.libprep_reset(projectid=project_id, sampleid=sample_id,
                                         libprepid=libprep_id)
            LOG.info("Charon record for project/sample/libprep {}/{}/{} "
                     "reset".format(project_id, sample_id, libprep_id))
            for seqrun in charon_session.libprep_get_seqruns(projectid=project_id,
                                                             sampleid=sample_id,
                                                             libprepid=libprep_id).get('seqruns', []):
                seqrun_id = seqrun['seqrunid']
                if restrict_to_seqruns and seqrun_id not in restrict_to_seqruns:
                    LOG.info("Skipping project/sample/libprep/seqrun {}/{}/{}/{}: "
                             "not in list of seqruns to use ({})".format(project_id,
                                                                         sample_id,
                                                                         libprep_id,
                                                                         seqrun_id,
                                                                         ", ".join(restrict_to_seqruns)))
                    continue
                LOG.info("Resetting Charon record for project/sample/libprep/"
                         "seqrun {}/{}/{}/{}".format(project_id, sample_id,
                                                     libprep_id, seqrun_id))
                charon_session.seqrun_reset(projectid=project_id, sampleid=sample_id,
                                            libprepid=libprep_id, seqrunid=seqrun_id)
                LOG.info("Charon record for project/sample/libprep/seqrun "
                         "{}/{}/{}/{} reset".format(project_id, sample_id,
                                                    libprep_id, seqrun_id))
示例#8
0
def get_valid_seqruns_for_sample(project_id, sample_id,
                                 include_failed_libpreps=False,
                                 include_done_seqruns=False,
                                 status_field="alignment_status"):
    """Find all the valid seqruns for a particular sample.

    :param str project_id: The id of the project
    :param str sample_id: The id of the sample
    :param bool include_failed_libpreps: Include seqruns for libreps that have failed QC
    :param bool include_done_seqruns: Include seqruns that are already marked DONE

    :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...}
    :rtype: dict

    :raises ValueError: If status_field is not a valid value
    """
    valid_status_values = ("alignment_status", "genotype_status",)
    if status_field not in valid_status_values:
        raise ValueError('"status_field" argument must be one of {} '
                         '(value passed was "{}")'.format(", ".join(valid_status_values),
                                                          status_field))
    charon_session = CharonSession()
    sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id,
                                                         sampleid=sample_id)
    libpreps = collections.defaultdict(list)
    for libprep in sample_libpreps['libpreps']:
        if libprep.get('qc') != "FAILED" or include_failed_libpreps:
            libprep_id = libprep['libprepid']
            for seqrun in charon_session.libprep_get_seqruns(projectid=project_id,
                                                             sampleid=sample_id,
                                                             libprepid=libprep_id)['seqruns']:
                seqrun_id = seqrun['seqrunid']
                try:
                    aln_status = charon_session.seqrun_get(projectid=project_id,
                                                           sampleid=sample_id,
                                                           libprepid=libprep_id,
                                                           seqrunid=seqrun_id)[status_field]
                except KeyError:
                    LOG.error('Field "{}" not available for seqrun "{}" in Charon '
                              'for project "{}" / sample "{}". Including as '
                              'valid.'.format(status_field, seqrun_id,
                                              project_id, sample_id))
                    aln_status = None
                if aln_status != "DONE" or include_done_seqruns:
                    libpreps[libprep_id].append(seqrun_id)
                else:
                    LOG.info('Skipping seqrun "{}" due to {}'
                             '"{}"'.format(seqrun_id,status_field, aln_status))
        else:
            LOG.info('Skipping libprep "{}" due to qc status '
                     '"{}"'.format(libprep, libprep.get("qc")))
    return dict(libpreps)
示例#9
0
def main(project):

    charon_session = CharonSession()
    samples = charon_session.project_get_samples(project)
    for sample in samples["samples"]:
        
        charon_session.sample_update(project, sample["sampleid"],
                                     analysis_status= "TO_ANALYZE",
                                     genotype_status=None,
                                     total_autosomal_coverage="0",
                                     total_sequenced_reads="0")
        for sample_prep in charon_session.sample_get_libpreps(project, sample["sampleid"])['libpreps']:
            seqruns = charon_session.libprep_get_seqruns(project, sample["sampleid"], sample_prep["libprepid"])['seqruns']
            for seqrun in seqruns:
                charon_session.seqrun_update(project, sample["sampleid"], sample_prep["libprepid"], seqrun["seqrunid"],
                                             mean_autosomal_coverage = "0",
                                             alignment_status  = "NOT_RUNNING")
示例#10
0
def determine_library_prep_from_fcid(project_id, sample_name, fcid):
    """Use the information in the database to get the library prep id
    from the project name, sample name, and flowcell id.

    :param str project_id: The ID of the project
    :param str sample_name: The name of the sample
    :param str fcid: The flowcell ID

    :returns: The library prep (e.g. "A")
    :rtype str
    :raises ValueError: If no match was found.
    """
    charon_session = CharonSession()
    try:
        libpreps = charon_session.sample_get_libpreps(project_id, sample_name)['libpreps']
        if libpreps:
            for libprep in libpreps:
                # Get the sequencing runs and see if they match the FCID we have
                seqruns = charon_session.libprep_get_seqruns(project_id,
                                                             sample_name,
                                                             libprep['libprepid'])['seqruns']
                if seqruns:
                    for seqrun in seqruns:
                        seqrun_runid = seqrun["seqrunid"]
                        if seqrun_runid == fcid:
                            ## BUG if we have one sample with two libpreps on the same flowcell,
                            ##     this just picks the first one it encounters; instead,
                            ##     it should raise an Exception. Requires restructuring.
                            return libprep['libprepid']
                else:
                    raise CharonError("No seqruns found!", 404)
            else:
                raise CharonError("No match", 404)
        else:
            raise CharonError("No libpreps found!", 404)
    except CharonError as e:
        if e.status_code == 404:
            raise ValueError('No library prep found for project "{}" / sample "{}" '
                             '/ fcid "{}"'.format(project_id, sample_name, fcid))
        else:
            raise ValueError('Could not determine library prep for project "{}" '
                             '/ sample "{}" / fcid "{}": {}'.format(project_id,
                                                                    sample_name,
                                                                    fcid,
                                                                    e))
示例#11
0
def determine_library_prep_from_fcid(project_id, sample_name, fcid):
    """Use the information in the database to get the library prep id
    from the project name, sample name, and flowcell id.

    :param str project_id: The ID of the project
    :param str sample_name: The name of the sample
    :param str fcid: The flowcell ID

    :returns: The library prep (e.g. "A")
    :rtype str
    :raises ValueError: If no match was found.
    """
    charon_session = CharonSession()
    try:
        libpreps = charon_session.sample_get_libpreps(project_id, sample_name)["libpreps"]
        if libpreps:
            for libprep in libpreps:
                # Get the sequencing runs and see if they match the FCID we have
                seqruns = charon_session.libprep_get_seqruns(project_id, sample_name, libprep["libprepid"])["seqruns"]
                if seqruns:
                    for seqrun in seqruns:
                        seqrun_runid = seqrun["seqrunid"]
                        if seqrun_runid == fcid:
                            ## BUG if we have one sample with two libpreps on the same flowcell,
                            ##     this just picks the first one it encounters; instead,
                            ##     it should raise an Exception. Requires restructuring.
                            return libprep["libprepid"]
                else:
                    raise CharonError("No seqruns found!", 404)
            else:
                raise CharonError("No match", 404)
        else:
            raise CharonError("No libpreps found!", 404)
    except CharonError as e:
        if e.status_code == 404:
            raise ValueError(
                'No library prep found for project "{}" / sample "{}" '
                '/ fcid "{}"'.format(project_id, sample_name, fcid)
            )
        else:
            raise ValueError(
                'Could not determine library prep for project "{}" '
                '/ sample "{}" / fcid "{}": {}'.format(project_id, sample_name, fcid, e)
            )
示例#12
0
def determine_library_prep_from_fcid(project_id, sample_name, fcid):
    """Use the information in the database to get the library prep id
    from the project name, sample name, and flowcell id.

    :param str project_id: The ID of the project
    :param str sample_name: The name of the sample
    :param str fcid: The flowcell ID

    :returns: The library prep (e.g. "A")
    :rtype str
    :raises ValueError: If no match was found.
    """
    charon_session = CharonSession()
    try:
        libpreps = charon_session.sample_get_libpreps(project_id, sample_name)['libpreps']
        if libpreps:
            for libprep in libpreps:
                # Get the sequencing runs and see if they match the FCID we have
                seqruns = charon_session.libprep_get_seqruns(project_id,
                                                             sample_name,
                                                             libprep['libprepid'])['seqruns']
                if seqruns:
                    for seqrun in seqruns:
                        seqrun_runid = seqrun["seqrunid"]
                        if seqrun_runid == fcid:
                            return libprep['libprepid']
                    else:
                        raise CharonError("No match", 404)
                else:
                    raise CharonError("No seqruns found!", 404)
        else:
            raise CharonError("No libpreps found!", 404)
    except CharonError as e:
        if e.status_code == 404:
            raise ValueError('No library prep found for project "{}" / sample "{}" '
                             '/ fcid "{}"'.format(project_id, sample_name, fcid))
        else:
            raise ValueError('Could not determine library prep for project "{}" '
                             '/ sample "{}" / fcid "{}": {}'.format(project_id,
                                                                    sample_name,
                                                                    fcid,
                                                                    e))
示例#13
0
def main(project):

    charon_session = CharonSession()
    samples = charon_session.project_get_samples(project)
    for sample in samples["samples"]:

        charon_session.sample_update(project,
                                     sample["sampleid"],
                                     analysis_status="TO_ANALYZE",
                                     genotype_status=None,
                                     total_autosomal_coverage="0",
                                     total_sequenced_reads="0")
        for sample_prep in charon_session.sample_get_libpreps(
                project, sample["sampleid"])['libpreps']:
            seqruns = charon_session.libprep_get_seqruns(
                project, sample["sampleid"],
                sample_prep["libprepid"])['seqruns']
            for seqrun in seqruns:
                charon_session.seqrun_update(project,
                                             sample["sampleid"],
                                             sample_prep["libprepid"],
                                             seqrun["seqrunid"],
                                             mean_autosomal_coverage="0",
                                             alignment_status="NOT_RUNNING")
示例#14
0
def project_summarize(projects, verbosity=0):
    if type(verbosity) is not int or verbosity < 0:
        print_stderr('Invalid verbosity level ("{}"); must be a positive '
                     'integer; falling back to 0')
        verbosity = 0
    update_charon_with_local_jobs_status(quiet=True) # Don't send mails
    charon_session = CharonSession()
    projects_list = []
    for project in projects:
        try:
            project = os.path.basename(locate_project(project))
        except ValueError as e:
            print_stderr("Skipping project: {}".format(e))
            continue
        print_stderr('Gathering information for project "{}"...'.format(project))
        project_dict = {}
        try:
            project = charon_session.project_get(project)
        except CharonError as e:
            print_stderr('Project "{}" not found in Charon; skipping ({})'.format(project, e), file=sys.stderr)
            continue
        project_dict['name'] = project['name']
        project_dict['id'] = project['projectid']
        project_dict['status'] = project['status']
        samples_list = project_dict['samples'] = []
        for sample in charon_session.project_get_samples(project['projectid']).get('samples', []):
            sample_dict = {}
            sample_dict['id'] = sample['sampleid']
            sample_dict['analysis_status'] = sample['analysis_status']
            sample_dict['coverage'] = sample['total_autosomal_coverage']
            libpreps_list = sample_dict['libpreps'] = []
            samples_list.append(sample_dict)
            for libprep in charon_session.sample_get_libpreps(project['projectid'],
                                                              sample['sampleid']).get('libpreps', []):
                libprep_dict = {}
                libprep_dict['id'] = libprep['libprepid']
                libprep_dict['qc'] = libprep['qc']
                seqruns_list = libprep_dict['seqruns'] = []
                libpreps_list.append(libprep_dict)
                for seqrun in charon_session.libprep_get_seqruns(project['projectid'],
                                                                 sample['sampleid'],
                                                                 libprep['libprepid']).get('seqruns', []):
                    seqrun_dict = {}
                    seqrun_dict['id'] = seqrun['seqrunid']
                    seqrun_dict['alignment_status'] = seqrun['alignment_status']
                    seqrun_dict['coverage'] = seqrun['mean_autosomal_coverage']
                    if seqrun.get('total_reads'):
                        seqrun_dict['total_reads'] = seqrun['total_reads']
                    seqruns_list.append(seqrun_dict)
        projects_list.append(project_dict)


    if verbosity in (0, 1):
        projects_status_list = []
        #projects_by_status = collections.defaultdict(dict)
        #samples_by_status = collections.defaultdict(set)
        #libpreps_by_status = collections.defaultdict(set)
        #seqruns_by_status = collections.defaultdict(set)
        for project_dict in projects_list:
            project_status_dict = {}
            project_status_dict['name'] = "{} ({})".format(project_dict['name'], project_dict['id'])
            project_status_dict['status'] = project_dict['status']
            samples_by_status = project_status_dict['samples_by_status'] = collections.defaultdict(set)
            libpreps_by_status = project_status_dict['libpreps_by_status'] = collections.defaultdict(set)
            seqruns_by_status = project_status_dict['seqruns_by_status'] = collections.defaultdict(set)
            for sample_dict in project_dict.get('samples', []):
                #samples_by_status[sample_dict['analysis_status']].add(sample_dict['id'])
                sample_status = sample_dict['analysis_status']
                libpreps = sample_dict.get('libpreps')
                if libpreps:
                    if not any([libprep["seqruns"] for libprep in libpreps]):
                        sample_status = "NO_SEQRUNS"
                    else:
                        for libprep_dict in libpreps:
                            libpreps_by_status[libprep_dict['qc']].add(libprep_dict['id'])
                            for seqrun_dict in libprep_dict.get('seqruns', []):
                                seqruns_by_status[seqrun_dict['alignment_status']].add(seqrun_dict['id'])
                else:
                    sample_status = "NO_LIBPREPS"
                samples_by_status[sample_status].add(sample_dict['id'])
            projects_status_list.append(project_status_dict)

        print_items = (("Samples", "samples_by_status"),
                       ("Libpreps", "libpreps_by_status"),
                       ("Seqruns", "seqruns_by_status"),)

        for project_dict in projects_status_list:
            print_stderr("\nProject\n-------")
            print_stderr("    Name:   {:>40}".format(project_dict['name']))
            print_stderr("    Status: {:>40}".format(project_dict['status']))
            for name, dict_key in print_items:
                status_dict = project_dict[dict_key]
                print_stderr("{}\n{}".format(name, "-"*len(name)))
                total_items = sum(map(len, status_dict.values()))
                # Sort by analysis value
                for status, item_set in sorted(status_dict.iteritems(), key=lambda key_value: key_value[0]):
                    num_items = len(item_set)
                    percent = (100.00 * num_items) / total_items
                    print_stderr("    Status: {:<20} ({:>3}/{:<3}) ({:>6.2f}%)".format(status,
                                                                                       num_items,
                                                                                       total_items,
                                                                                       percent))
                    if verbosity == 1:
                        for item in sorted(item_set):
                            print_stderr("        {}".format(item))
            print_stderr("")

    else: # Verbosity is 2+, maximum verbosity
        output_template = "{}{:<30}{:>{rspace}}"
        for project_dict in projects_list:
            offset = 0
            indent = " " * offset
            rspace = 80 - offset
            print_stderr(output_template.format(indent, "Project name:", project_dict['name'], rspace=rspace))
            print_stderr(output_template.format(indent, "Project ID:", project_dict['id'], rspace=rspace))
            print_stderr(output_template.format(indent, "Project status:", project_dict['status'], rspace=rspace))
            for sample_dict in project_dict['samples']:
                print_stderr("")
                offset = 4
                indent = " " * offset
                rspace = 80 - offset
                print_stderr(output_template.format(indent, "Sample ID:", sample_dict['id'], rspace=rspace))
                print_stderr(output_template.format(indent, "Sample analysis status:", sample_dict['analysis_status'], rspace=rspace))
                print_stderr(output_template.format(indent, "Sample coverage:", sample_dict['coverage'], rspace=rspace))
                for libprep_dict in sample_dict['libpreps']:
                    print_stderr("")
                    offset = 8
                    indent = " " * offset
                    rspace = 80 - offset
                    print_stderr(output_template.format(indent, "Libprep ID:", libprep_dict['id'], rspace=rspace))
                    print_stderr(output_template.format(indent, "Libprep qc status:", libprep_dict['qc'], rspace=rspace))
                    for seqrun_dict in libprep_dict['seqruns']:
                        print_stderr("")
                        offset = 12
                        indent = " " * offset
                        rspace = 80 - offset
                        print_stderr(output_template.format(indent, "Seqrun ID:", seqrun_dict['id'], rspace=rspace))
                        print_stderr(output_template.format(indent, "Seqrun alignment status:", seqrun_dict['alignment_status'], rspace=rspace))
                        print_stderr(output_template.format(indent, "Seqrun mean auto. coverage:", seqrun_dict['coverage'], rspace=rspace))
                        if "total_reads" in seqrun_dict:
                            print_stderr(output_template.format(indent, "Seqrun total reads:", seqrun_dict['total_reads'], rspace=rspace))
            print_stderr("\n")
def project_summarize(projects, verbosity=0):
    if type(verbosity) is not int or verbosity < 0:
        print_stderr('Invalid verbosity level ("{}"); must be a positive '
                     'integer; falling back to 0')
        verbosity = 0
    update_charon_with_local_jobs_status(quiet=True)  # Don't send mails
    charon_session = CharonSession()
    projects_list = []
    for project in projects:
        try:
            project = os.path.basename(locate_project(project))
        except ValueError as e:
            print_stderr("Skipping project: {}".format(e))
            continue
        print_stderr(
            'Gathering information for project "{}"...'.format(project))
        project_dict = {}
        try:
            project = charon_session.project_get(project)
        except CharonError as e:
            print_stderr(
                'Project "{}" not found in Charon; skipping ({})'.format(
                    project, e),
                file=sys.stderr)
            continue
        project_dict['name'] = project['name']
        project_dict['id'] = project['projectid']
        project_dict['status'] = project['status']
        samples_list = project_dict['samples'] = []
        for sample in charon_session.project_get_samples(
                project['projectid']).get('samples', []):
            sample_dict = {}
            sample_dict['id'] = sample['sampleid']
            sample_dict['analysis_status'] = sample['analysis_status']
            sample_dict['coverage'] = sample['total_autosomal_coverage']
            libpreps_list = sample_dict['libpreps'] = []
            samples_list.append(sample_dict)
            for libprep in charon_session.sample_get_libpreps(
                    project['projectid'],
                    sample['sampleid']).get('libpreps', []):
                libprep_dict = {}
                libprep_dict['id'] = libprep['libprepid']
                libprep_dict['qc'] = libprep['qc']
                seqruns_list = libprep_dict['seqruns'] = []
                libpreps_list.append(libprep_dict)
                for seqrun in charon_session.libprep_get_seqruns(
                        project['projectid'], sample['sampleid'],
                        libprep['libprepid']).get('seqruns', []):
                    seqrun_dict = {}
                    seqrun_dict['id'] = seqrun['seqrunid']
                    seqrun_dict['alignment_status'] = seqrun[
                        'alignment_status']
                    seqrun_dict['coverage'] = seqrun['mean_autosomal_coverage']
                    if seqrun.get('total_reads'):
                        seqrun_dict['total_reads'] = seqrun['total_reads']
                    seqruns_list.append(seqrun_dict)
        projects_list.append(project_dict)

    if verbosity in (0, 1):
        projects_status_list = []
        #projects_by_status = collections.defaultdict(dict)
        #samples_by_status = collections.defaultdict(set)
        #libpreps_by_status = collections.defaultdict(set)
        #seqruns_by_status = collections.defaultdict(set)
        for project_dict in projects_list:
            project_status_dict = {}
            project_status_dict['name'] = "{} ({})".format(
                project_dict['name'], project_dict['id'])
            project_status_dict['status'] = project_dict['status']
            samples_by_status = project_status_dict[
                'samples_by_status'] = collections.defaultdict(set)
            libpreps_by_status = project_status_dict[
                'libpreps_by_status'] = collections.defaultdict(set)
            seqruns_by_status = project_status_dict[
                'seqruns_by_status'] = collections.defaultdict(set)
            for sample_dict in project_dict.get('samples', []):
                #samples_by_status[sample_dict['analysis_status']].add(sample_dict['id'])
                sample_status = sample_dict['analysis_status']
                libpreps = sample_dict.get('libpreps')
                if libpreps:
                    if not any([libprep["seqruns"] for libprep in libpreps]):
                        sample_status = "NO_SEQRUNS"
                    else:
                        for libprep_dict in libpreps:
                            libpreps_by_status[libprep_dict['qc']].add(
                                libprep_dict['id'])
                            for seqrun_dict in libprep_dict.get('seqruns', []):
                                seqruns_by_status[
                                    seqrun_dict['alignment_status']].add(
                                        seqrun_dict['id'])
                else:
                    sample_status = "NO_LIBPREPS"
                samples_by_status[sample_status].add(sample_dict['id'])
            projects_status_list.append(project_status_dict)

        print_items = (
            ("Samples", "samples_by_status"),
            ("Libpreps", "libpreps_by_status"),
            ("Seqruns", "seqruns_by_status"),
        )

        for project_dict in projects_status_list:
            print_stderr("\nProject\n-------")
            print_stderr("    Name:   {:>40}".format(project_dict['name']))
            print_stderr("    Status: {:>40}".format(project_dict['status']))
            for name, dict_key in print_items:
                status_dict = project_dict[dict_key]
                print_stderr("{}\n{}".format(name, "-" * len(name)))
                total_items = sum(map(len, status_dict.values()))
                # Sort by analysis value
                for status, item_set in sorted(
                        status_dict.iteritems(),
                        key=lambda key_value: key_value[0]):
                    num_items = len(item_set)
                    percent = (100.00 * num_items) / total_items
                    print_stderr(
                        "    Status: {:<20} ({:>3}/{:<3}) ({:>6.2f}%)".format(
                            status, num_items, total_items, percent))
                    if verbosity == 1:
                        for item in sorted(item_set):
                            print_stderr("        {}".format(item))
            print_stderr("")

    else:  # Verbosity is 2+, maximum verbosity
        output_template = "{}{:<30}{:>{rspace}}"
        for project_dict in projects_list:
            offset = 0
            indent = " " * offset
            rspace = 80 - offset
            print_stderr(
                output_template.format(indent,
                                       "Project name:",
                                       project_dict['name'],
                                       rspace=rspace))
            print_stderr(
                output_template.format(indent,
                                       "Project ID:",
                                       project_dict['id'],
                                       rspace=rspace))
            print_stderr(
                output_template.format(indent,
                                       "Project status:",
                                       project_dict['status'],
                                       rspace=rspace))
            for sample_dict in project_dict['samples']:
                print_stderr("")
                offset = 4
                indent = " " * offset
                rspace = 80 - offset
                print_stderr(
                    output_template.format(indent,
                                           "Sample ID:",
                                           sample_dict['id'],
                                           rspace=rspace))
                print_stderr(
                    output_template.format(indent,
                                           "Sample analysis status:",
                                           sample_dict['analysis_status'],
                                           rspace=rspace))
                print_stderr(
                    output_template.format(indent,
                                           "Sample coverage:",
                                           sample_dict['coverage'],
                                           rspace=rspace))
                for libprep_dict in sample_dict['libpreps']:
                    print_stderr("")
                    offset = 8
                    indent = " " * offset
                    rspace = 80 - offset
                    print_stderr(
                        output_template.format(indent,
                                               "Libprep ID:",
                                               libprep_dict['id'],
                                               rspace=rspace))
                    print_stderr(
                        output_template.format(indent,
                                               "Libprep qc status:",
                                               libprep_dict['qc'],
                                               rspace=rspace))
                    for seqrun_dict in libprep_dict['seqruns']:
                        print_stderr("")
                        offset = 12
                        indent = " " * offset
                        rspace = 80 - offset
                        print_stderr(
                            output_template.format(indent,
                                                   "Seqrun ID:",
                                                   seqrun_dict['id'],
                                                   rspace=rspace))
                        print_stderr(
                            output_template.format(
                                indent,
                                "Seqrun alignment status:",
                                seqrun_dict['alignment_status'],
                                rspace=rspace))
                        print_stderr(
                            output_template.format(
                                indent,
                                "Seqrun mean auto. coverage:",
                                seqrun_dict['coverage'],
                                rspace=rspace))
                        if "total_reads" in seqrun_dict:
                            print_stderr(
                                output_template.format(
                                    indent,
                                    "Seqrun total reads:",
                                    seqrun_dict['total_reads'],
                                    rspace=rspace))
            print_stderr("\n")