示例#1
0
def get_finished_seqruns_for_sample(project_id, sample_id,
                                    include_failed_libpreps=False):
    """Find all the finished seqruns for a particular sample.

    :param str project_id: The id of the project
    :param str sample_id: The id of the sample

    :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...}
    :rtype: dict
    """
    charon_session = CharonSession()
    sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id,
                                                         sampleid=sample_id)
    libpreps = collections.defaultdict(list)
    for libprep in sample_libpreps['libpreps']:
        if libprep.get('qc') != "FAILED" or include_failed_libpreps:
            libprep_id = libprep['libprepid']
            for seqrun in charon_session.libprep_get_seqruns(projectid=project_id,
                                                             sampleid=sample_id,
                                                             libprepid=libprep_id)['seqruns']:
                seqrun_id = seqrun['seqrunid']
                aln_status = charon_session.seqrun_get(projectid=project_id,
                                                       sampleid=sample_id,
                                                       libprepid=libprep_id,
                                                       seqrunid=seqrun_id).get('alignment_status')
                if aln_status == "DONE":
                    libpreps[libprep_id].append(seqrun_id)
                else:
                    LOG.debug('Skipping seqrun "{}" due to alignment_status '
                              '"{}"'.format(seqrun_id, aln_status))
        else:
            LOG.info('Skipping libprep "{}" due to qc status '
                     '"{}"'.format(libprep, libprep.get("qc")))
    return dict(libpreps)
示例#2
0
def update_analysis(project_id, status):
    charon_session = CharonSession()
    mail_analysis(project_id,
                  engine_name='rna_ngi',
                  level='INFO' if status else 'ERROR')
    new_sample_status = 'ANALYZED' if status else 'FAILED'
    new_seqrun_status = 'DONE' if status else 'FAILED'
    for sample in charon_session.project_get_samples(project_id).get(
            "samples", {}):
        if sample.get('analysis_status') == "UNDER_ANALYSIS":
            LOG.info("Marking analysis of sample {}/{} as {}".format(
                project_id, sample.get('sampleid'), new_sample_status))
            charon_session.sample_update(project_id,
                                         sample.get('sampleid'),
                                         analysis_status=new_sample_status)
            for libprep in charon_session.sample_get_libpreps(
                    project_id, sample.get('sampleid')).get('libpreps', {}):
                if libprep.get('qc') != 'FAILED':
                    for seqrun in charon_session.libprep_get_seqruns(
                            project_id, sample.get('sampleid'),
                            libprep.get('libprepid')).get('seqruns', {}):
                        if seqrun.get('alignment_status') == "RUNNING":
                            LOG.info(
                                "Marking analysis of seqrun {}/{}/{}/{} as {}".
                                format(project_id, sample.get('sampleid'),
                                       libprep.get('libprepid'),
                                       seqrun.get('seqrunid'),
                                       new_seqrun_status))
                            charon_session.seqrun_update(
                                project_id,
                                sample.get('sampleid'),
                                libprep.get('libprepid'),
                                seqrun.get('seqrunid'),
                                alignment_status=new_seqrun_status)
示例#3
0
def recreate_project_from_db(analysis_top_dir, project_name, project_id):
    project_dir = os.path.join(analysis_top_dir, "DATA", project_name)
    project_obj = NGIProject(name=project_name,
                             dirname=project_name,
                             project_id=project_id,
                             base_path=analysis_top_dir)
    charon_session = CharonSession()
    try:
        samples_dict = charon_session.project_get_samples(project_id)["samples"]
    except CharonError as e:
        raise RuntimeError("Could not access samples for project {}: {}".format(project_id, e))
    for sample in samples_dict:
        sample_id = sample.get("sampleid")
        sample_dir = os.path.join(project_dir, sample_id)
        sample_obj = project_obj.add_sample(name=sample_id, dirname=sample_id)
        sample_obj.status = sample.get("status", "unknown")
        try:
            libpreps_dict = charon_session.sample_get_libpreps(project_id, sample_id)["libpreps"]
        except CharonError as e:
            raise RuntimeError("Could not access libpreps for project {} / sample {}: {}".format(project_id,sample_id, e))
        for libprep in libpreps_dict:
            libprep_id = libprep.get("libprepid")
            libprep_obj = sample_obj.add_libprep(name=libprep_id,  dirname=libprep_id)
            libprep_obj.status = libprep.get("status", "unknown")
            try:
                seqruns_dict = charon_session.libprep_get_seqruns(project_id, sample_id, libprep_id)["seqruns"]
            except CharonError as e:
                raise RuntimeError("Could not access seqruns for project {} / sample {} / "
                                   "libprep {}: {}".format(project_id, sample_id, libprep_id, e))
            for seqrun in seqruns_dict:
                # e.g. 140528_D00415_0049_BC423WACXX
                seqrun_id = seqrun.get("seqrunid")
                seqrun_obj = libprep_obj.add_seqrun(name=seqrun_id, dirname=seqrun_id)
                seqrun_obj.status = seqrun.get("status", "unknown")
    return project_obj
示例#4
0
def recreate_project_from_db(analysis_top_dir, project_name, project_id):
    project_dir = os.path.join(analysis_top_dir, "DATA", project_name)
    project_obj = NGIProject(name=project_name,
                             dirname=project_name,
                             project_id=project_id,
                             base_path=analysis_top_dir)
    charon_session = CharonSession()
    try:
        samples_dict = charon_session.project_get_samples(project_id)["samples"]
    except CharonError as e:
        raise RuntimeError("Could not access samples for project {}: {}".format(project_id, e))
    for sample in samples_dict:
        sample_id = sample.get("sampleid")
        sample_dir = os.path.join(project_dir, sample_id)
        sample_obj = project_obj.add_sample(name=sample_id, dirname=sample_id)
        sample_obj.status = sample.get("status", "unknown")
        try:
            libpreps_dict = charon_session.sample_get_libpreps(project_id, sample_id)["libpreps"]
        except CharonError as e:
            raise RuntimeError("Could not access libpreps for project {} / sample {}: {}".format(project_id,sample_id, e))
        for libprep in libpreps_dict:
            libprep_id = libprep.get("libprepid")
            libprep_obj = sample_obj.add_libprep(name=libprep_id,  dirname=libprep_id)
            libprep_obj.status = libprep.get("status", "unknown")
            try:
                seqruns_dict = charon_session.libprep_get_seqruns(project_id, sample_id, libprep_id)["seqruns"]
            except CharonError as e:
                raise RuntimeError("Could not access seqruns for project {} / sample {} / "
                                   "libprep {}: {}".format(project_id, sample_id, libprep_id, e))
            for seqrun in seqruns_dict:
                # e.g. 140528_D00415_0049_BC423WACXX
                seqrun_id = seqrun.get("seqrunid")
                seqrun_obj = libprep_obj.add_seqrun(name=seqrun_id, dirname=seqrun_id)
                seqrun_obj.status = seqrun.get("status", "unknown")
    return project_obj
示例#5
0
def check_for_preexisting_sample_runs(project_obj, sample_obj, restart_running_jobs, restart_finished_jobs):
    """If any analysis is undergoing or has completed for this sample's
    seqruns, raise a RuntimeError.

    :param NGIProject project_obj: The project object
    :param NGISample sample_obj: The sample object
    :param boolean restart_running_jobs: command line parameter
    :param boolean restart_finished_jobs: command line parameter

    :raise RuntimeError if the status is RUNNING or DONE and the flags do not allow to continue
    """
    project_id = project_obj.project_id
    sample_id = sample_obj.name
    charon_session = CharonSession()
    sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id,
                                                         sampleid=sample_id)
    for libprep in sample_libpreps['libpreps']:
        libprep_id = libprep['libprepid']
        for seqrun in charon_session.libprep_get_seqruns(projectid=project_id,
                                                         sampleid=sample_id,
                                                         libprepid=libprep_id)['seqruns']:
            seqrun_id = seqrun['seqrunid']
            aln_status = charon_session.seqrun_get(projectid=project_id,
                                                   sampleid=sample_id,
                                                   libprepid=libprep_id,
                                                   seqrunid=seqrun_id).get('alignment_status')
            if (aln_status == "RUNNING" and not restart_running_jobs) or \
                (aln_status == "DONE" and not restart_finished_jobs):
                    raise RuntimeError('Project/Sample "{}/{}" has a preexisting '
                          'seqrun "{}" with status "{}"'.format(project_obj,
                          sample_obj, seqrun_id, aln_status))
示例#6
0
def get_valid_seqruns_for_sample(project_id,
                                 sample_id,
                                 include_failed_libpreps=False,
                                 include_done_seqruns=False,
                                 status_field="alignment_status"):
    """Find all the valid seqruns for a particular sample.

    :param str project_id: The id of the project
    :param str sample_id: The id of the sample
    :param bool include_failed_libpreps: Include seqruns for libreps that have failed QC
    :param bool include_done_seqruns: Include seqruns that are already marked DONE

    :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...}
    :rtype: dict

    :raises ValueError: If status_field is not a valid value
    """
    valid_status_values = (
        "alignment_status",
        "genotype_status",
    )
    if status_field not in valid_status_values:
        raise ValueError('"status_field" argument must be one of {} '
                         '(value passed was "{}")'.format(
                             ", ".join(valid_status_values), status_field))
    charon_session = CharonSession()
    sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id,
                                                         sampleid=sample_id)
    libpreps = collections.defaultdict(list)
    for libprep in sample_libpreps['libpreps']:
        if libprep.get('qc') != "FAILED" or include_failed_libpreps:
            libprep_id = libprep['libprepid']
            for seqrun in charon_session.libprep_get_seqruns(
                    projectid=project_id, sampleid=sample_id,
                    libprepid=libprep_id)['seqruns']:
                seqrun_id = seqrun['seqrunid']
                try:
                    aln_status = charon_session.seqrun_get(
                        projectid=project_id,
                        sampleid=sample_id,
                        libprepid=libprep_id,
                        seqrunid=seqrun_id)[status_field]
                except KeyError:
                    LOG.error(
                        'Field "{}" not available for seqrun "{}" in Charon '
                        'for project "{}" / sample "{}". Including as '
                        'valid.'.format(status_field, seqrun_id, project_id,
                                        sample_id))
                    aln_status = None
                if aln_status != "DONE" or include_done_seqruns:
                    libpreps[libprep_id].append(seqrun_id)
                else:
                    LOG.info('Skipping seqrun "{}" due to {}'
                             '"{}"'.format(seqrun_id, status_field,
                                           aln_status))
        else:
            LOG.info('Skipping libprep "{}" due to qc status '
                     '"{}"'.format(libprep, libprep.get("qc")))
    return dict(libpreps)
示例#7
0
def reset_charon_records_by_name(project_id, restrict_to_samples=None,
                                 restrict_to_libpreps=None, restrict_to_seqruns=None):
    if not restrict_to_samples: restrict_to_samples = []
    if not restrict_to_libpreps: restrict_to_libpreps = []
    if not restrict_to_seqruns: restrict_to_seqruns = []
    charon_session = CharonSession()
    LOG.info("Resetting Charon record for project {}".format(project_id))
    charon_session.project_reset(projectid=project_id)
    LOG.info("Charon record for project {} reset".format(project_id))
    for sample in charon_session.project_get_samples(projectid=project_id).get('samples', []):
        sample_id = sample['sampleid']
        if restrict_to_samples and sample_id not in restrict_to_samples:
            LOG.info("Skipping project/sample {}/{}: not in list of samples to use "
                     "({})".format(project_id, sample_id, ", ".join(restrict_to_samples)))
            continue
        LOG.info("Resetting Charon record for project/sample {}/{}".format(project_id,
                                                                           sample_id))
        charon_session.sample_reset(projectid=project_id, sampleid=sample_id)
        LOG.info("Charon record for project/sample {}/{} reset".format(project_id,
                                                                       sample_id))
        for libprep in charon_session.sample_get_libpreps(projectid=project_id,
                                                          sampleid=sample_id).get('libpreps', []):
            libprep_id = libprep['libprepid']
            if restrict_to_libpreps and libprep_id not in restrict_to_libpreps:
                LOG.info("Skipping project/sample/libprep {}/{}/{}: not in list "
                         "of libpreps to use ({})".format(project_id, sample_id,
                                                          libprep_id, ", ".join(restrict_to_libpreps)))
                continue
            LOG.info("Resetting Charon record for project/sample"
                     "libprep {}/{}/{}".format(project_id, sample_id, libprep_id))
            charon_session.libprep_reset(projectid=project_id, sampleid=sample_id,
                                         libprepid=libprep_id)
            LOG.info("Charon record for project/sample/libprep {}/{}/{} "
                     "reset".format(project_id, sample_id, libprep_id))
            for seqrun in charon_session.libprep_get_seqruns(projectid=project_id,
                                                             sampleid=sample_id,
                                                             libprepid=libprep_id).get('seqruns', []):
                seqrun_id = seqrun['seqrunid']
                if restrict_to_seqruns and seqrun_id not in restrict_to_seqruns:
                    LOG.info("Skipping project/sample/libprep/seqrun {}/{}/{}/{}: "
                             "not in list of seqruns to use ({})".format(project_id,
                                                                         sample_id,
                                                                         libprep_id,
                                                                         seqrun_id,
                                                                         ", ".join(restrict_to_seqruns)))
                    continue
                LOG.info("Resetting Charon record for project/sample/libprep/"
                         "seqrun {}/{}/{}/{}".format(project_id, sample_id,
                                                     libprep_id, seqrun_id))
                charon_session.seqrun_reset(projectid=project_id, sampleid=sample_id,
                                            libprepid=libprep_id, seqrunid=seqrun_id)
                LOG.info("Charon record for project/sample/libprep/seqrun "
                         "{}/{}/{}/{} reset".format(project_id, sample_id,
                                                    libprep_id, seqrun_id))
示例#8
0
def get_valid_seqruns_for_sample(project_id, sample_id,
                                 include_failed_libpreps=False,
                                 include_done_seqruns=False,
                                 status_field="alignment_status"):
    """Find all the valid seqruns for a particular sample.

    :param str project_id: The id of the project
    :param str sample_id: The id of the sample
    :param bool include_failed_libpreps: Include seqruns for libreps that have failed QC
    :param bool include_done_seqruns: Include seqruns that are already marked DONE

    :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...}
    :rtype: dict

    :raises ValueError: If status_field is not a valid value
    """
    valid_status_values = ("alignment_status", "genotype_status",)
    if status_field not in valid_status_values:
        raise ValueError('"status_field" argument must be one of {} '
                         '(value passed was "{}")'.format(", ".join(valid_status_values),
                                                          status_field))
    charon_session = CharonSession()
    sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id,
                                                         sampleid=sample_id)
    libpreps = collections.defaultdict(list)
    for libprep in sample_libpreps['libpreps']:
        if libprep.get('qc') != "FAILED" or include_failed_libpreps:
            libprep_id = libprep['libprepid']
            for seqrun in charon_session.libprep_get_seqruns(projectid=project_id,
                                                             sampleid=sample_id,
                                                             libprepid=libprep_id)['seqruns']:
                seqrun_id = seqrun['seqrunid']
                try:
                    aln_status = charon_session.seqrun_get(projectid=project_id,
                                                           sampleid=sample_id,
                                                           libprepid=libprep_id,
                                                           seqrunid=seqrun_id)[status_field]
                except KeyError:
                    LOG.error('Field "{}" not available for seqrun "{}" in Charon '
                              'for project "{}" / sample "{}". Including as '
                              'valid.'.format(status_field, seqrun_id,
                                              project_id, sample_id))
                    aln_status = None
                if aln_status != "DONE" or include_done_seqruns:
                    libpreps[libprep_id].append(seqrun_id)
                else:
                    LOG.info('Skipping seqrun "{}" due to {}'
                             '"{}"'.format(seqrun_id,status_field, aln_status))
        else:
            LOG.info('Skipping libprep "{}" due to qc status '
                     '"{}"'.format(libprep, libprep.get("qc")))
    return dict(libpreps)
示例#9
0
def main(project):

    charon_session = CharonSession()
    samples = charon_session.project_get_samples(project)
    for sample in samples["samples"]:
        
        charon_session.sample_update(project, sample["sampleid"],
                                     analysis_status= "TO_ANALYZE",
                                     genotype_status=None,
                                     total_autosomal_coverage="0",
                                     total_sequenced_reads="0")
        for sample_prep in charon_session.sample_get_libpreps(project, sample["sampleid"])['libpreps']:
            seqruns = charon_session.libprep_get_seqruns(project, sample["sampleid"], sample_prep["libprepid"])['seqruns']
            for seqrun in seqruns:
                charon_session.seqrun_update(project, sample["sampleid"], sample_prep["libprepid"], seqrun["seqrunid"],
                                             mean_autosomal_coverage = "0",
                                             alignment_status  = "NOT_RUNNING")
示例#10
0
def determine_library_prep_from_fcid(project_id, sample_name, fcid):
    """Use the information in the database to get the library prep id
    from the project name, sample name, and flowcell id.

    :param str project_id: The ID of the project
    :param str sample_name: The name of the sample
    :param str fcid: The flowcell ID

    :returns: The library prep (e.g. "A")
    :rtype str
    :raises ValueError: If no match was found.
    """
    charon_session = CharonSession()
    try:
        libpreps = charon_session.sample_get_libpreps(project_id, sample_name)['libpreps']
        if libpreps:
            for libprep in libpreps:
                # Get the sequencing runs and see if they match the FCID we have
                seqruns = charon_session.libprep_get_seqruns(project_id,
                                                             sample_name,
                                                             libprep['libprepid'])['seqruns']
                if seqruns:
                    for seqrun in seqruns:
                        seqrun_runid = seqrun["seqrunid"]
                        if seqrun_runid == fcid:
                            ## BUG if we have one sample with two libpreps on the same flowcell,
                            ##     this just picks the first one it encounters; instead,
                            ##     it should raise an Exception. Requires restructuring.
                            return libprep['libprepid']
                else:
                    raise CharonError("No seqruns found!", 404)
            else:
                raise CharonError("No match", 404)
        else:
            raise CharonError("No libpreps found!", 404)
    except CharonError as e:
        if e.status_code == 404:
            raise ValueError('No library prep found for project "{}" / sample "{}" '
                             '/ fcid "{}"'.format(project_id, sample_name, fcid))
        else:
            raise ValueError('Could not determine library prep for project "{}" '
                             '/ sample "{}" / fcid "{}": {}'.format(project_id,
                                                                    sample_name,
                                                                    fcid,
                                                                    e))
示例#11
0
def determine_library_prep_from_fcid(project_id, sample_name, fcid):
    """Use the information in the database to get the library prep id
    from the project name, sample name, and flowcell id.

    :param str project_id: The ID of the project
    :param str sample_name: The name of the sample
    :param str fcid: The flowcell ID

    :returns: The library prep (e.g. "A")
    :rtype str
    :raises ValueError: If no match was found.
    """
    charon_session = CharonSession()
    try:
        libpreps = charon_session.sample_get_libpreps(project_id, sample_name)["libpreps"]
        if libpreps:
            for libprep in libpreps:
                # Get the sequencing runs and see if they match the FCID we have
                seqruns = charon_session.libprep_get_seqruns(project_id, sample_name, libprep["libprepid"])["seqruns"]
                if seqruns:
                    for seqrun in seqruns:
                        seqrun_runid = seqrun["seqrunid"]
                        if seqrun_runid == fcid:
                            ## BUG if we have one sample with two libpreps on the same flowcell,
                            ##     this just picks the first one it encounters; instead,
                            ##     it should raise an Exception. Requires restructuring.
                            return libprep["libprepid"]
                else:
                    raise CharonError("No seqruns found!", 404)
            else:
                raise CharonError("No match", 404)
        else:
            raise CharonError("No libpreps found!", 404)
    except CharonError as e:
        if e.status_code == 404:
            raise ValueError(
                'No library prep found for project "{}" / sample "{}" '
                '/ fcid "{}"'.format(project_id, sample_name, fcid)
            )
        else:
            raise ValueError(
                'Could not determine library prep for project "{}" '
                '/ sample "{}" / fcid "{}": {}'.format(project_id, sample_name, fcid, e)
            )
示例#12
0
def determine_library_prep_from_fcid(project_id, sample_name, fcid):
    """Use the information in the database to get the library prep id
    from the project name, sample name, and flowcell id.

    :param str project_id: The ID of the project
    :param str sample_name: The name of the sample
    :param str fcid: The flowcell ID

    :returns: The library prep (e.g. "A")
    :rtype str
    :raises ValueError: If no match was found.
    """
    charon_session = CharonSession()
    try:
        libpreps = charon_session.sample_get_libpreps(project_id, sample_name)['libpreps']
        if libpreps:
            for libprep in libpreps:
                # Get the sequencing runs and see if they match the FCID we have
                seqruns = charon_session.libprep_get_seqruns(project_id,
                                                             sample_name,
                                                             libprep['libprepid'])['seqruns']
                if seqruns:
                    for seqrun in seqruns:
                        seqrun_runid = seqrun["seqrunid"]
                        if seqrun_runid == fcid:
                            return libprep['libprepid']
                    else:
                        raise CharonError("No match", 404)
                else:
                    raise CharonError("No seqruns found!", 404)
        else:
            raise CharonError("No libpreps found!", 404)
    except CharonError as e:
        if e.status_code == 404:
            raise ValueError('No library prep found for project "{}" / sample "{}" '
                             '/ fcid "{}"'.format(project_id, sample_name, fcid))
        else:
            raise ValueError('Could not determine library prep for project "{}" '
                             '/ sample "{}" / fcid "{}": {}'.format(project_id,
                                                                    sample_name,
                                                                    fcid,
                                                                    e))
示例#13
0
def main(project):

    charon_session = CharonSession()
    samples = charon_session.project_get_samples(project)
    for sample in samples["samples"]:

        charon_session.sample_update(project,
                                     sample["sampleid"],
                                     analysis_status="TO_ANALYZE",
                                     genotype_status=None,
                                     total_autosomal_coverage="0",
                                     total_sequenced_reads="0")
        for sample_prep in charon_session.sample_get_libpreps(
                project, sample["sampleid"])['libpreps']:
            seqruns = charon_session.libprep_get_seqruns(
                project, sample["sampleid"],
                sample_prep["libprepid"])['seqruns']
            for seqrun in seqruns:
                charon_session.seqrun_update(project,
                                             sample["sampleid"],
                                             sample_prep["libprepid"],
                                             seqrun["seqrunid"],
                                             mean_autosomal_coverage="0",
                                             alignment_status="NOT_RUNNING")
示例#14
0
def setup_analysis_directory_structure(fc_dir, projects_to_analyze,
                                       restrict_to_projects=None, restrict_to_samples=None,
                                       create_files=True,
                                       fallback_libprep=None,
                                       quiet=False,
                                       config=None, config_file_path=None):
    """
    Copy and sort files from their CASAVA-demultiplexed flowcell structure
    into their respective project/sample/libPrep/FCIDs. This collects samples
    split across multiple flowcells.

    :param str fc_dir: The directory created by CASAVA for this flowcell.
    :param dict config: The parsed configuration file.
    :param set projects_to_analyze: A dict (of Project objects, or empty)
    :param bool create_files: Alter the filesystem (as opposed to just parsing flowcells) (default True)
    :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None)
    :param list restrict_to_projects: Specific projects within the flowcell to process exclusively
    :param list restrict_to_samples: Specific samples within the flowcell to process exclusively

    :returns: A list of NGIProject objects that need to be run through the analysis pipeline
    :rtype: list

    :raises KeyError: If a required configuration key is not available.
    """
    LOG.info("Setting up analysis for demultiplexed data in source folder \"{}\"".format(fc_dir))
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    config["quiet"] = quiet # Hack because I enter here from a script sometimes
    pattern="(.+(?:{}|{}))\/.+".format(config["analysis"]["sthlm_root"], config["analysis"]["upps_root"])
    matches=re.match(pattern, fc_dir)
    if matches:
        flowcell_root=matches.group(1)
    else:
        LOG.error("cannot guess which project the flowcell {} belongs to".format(fc_dir))
        raise RuntimeError

    analysis_top_dir = os.path.abspath(os.path.join(flowcell_root,config["analysis"]["top_dir"]))
    try:
        safe_makedir(analysis_top_dir)
    except OSError as e:
        LOG.error('Error: Analysis top directory {} does not exist and could not '
                  'be created.'.format(analysis_top_dir))
    fc_dir = fc_dir if os.path.isabs(fc_dir) else os.path.join(analysis_top_dir, fc_dir)
    if not os.path.exists(fc_dir):
        LOG.error("Error: Flowcell directory {} does not exist".format(fc_dir))
        return []
    # Map the directory structure for this flowcell
    try:
        fc_dir_structure = parse_flowcell(fc_dir)
    except (OSError, ValueError) as e:
        LOG.error("Error when processing flowcell dir \"{}\": {}".format(fc_dir, e))
        return []
    fc_full_id = fc_dir_structure['fc_full_id']
    if not fc_dir_structure.get('projects'):
        LOG.warn("No projects found in specified flowcell directory \"{}\"".format(fc_dir))
    # Iterate over the projects in the flowcell directory
    for project in fc_dir_structure.get('projects', []):
        project_name = project['project_name']
        project_original_name = project['project_original_name']
        samplesheet_path = fc_dir_structure.get("samplesheet_path")
        try:
            # Maps e.g. "Y.Mom_14_01" to "P123"
            project_id = get_project_id_from_name(project_name)
        except (CharonError, RuntimeError, ValueError) as e:
            LOG.warn('Could not retrieve project id from Charon (record missing?). '
                     'Using project name ("{}") as project id '
                     '(error: {})'.format(project_name, e))
            project_id = project_name
        # If specific projects are specified, skip those that do not match
        if restrict_to_projects and project_name not in restrict_to_projects and \
                                    project_id not in restrict_to_projects:
            LOG.debug("Skipping project {} (not in restrict_to_projects)".format(project_name))
            continue
        LOG.info("Setting up project {}".format(project.get("project_name")))
        # Create a project directory if it doesn't already exist, including
        # intervening "DATA" directory
        project_dir = os.path.join(analysis_top_dir, "DATA", project_id)
        project_sl_dir = os.path.join(analysis_top_dir, "DATA", project_name)
        project_analysis_dir = os.path.join(analysis_top_dir, "ANALYSIS", project_id)
        project_analysis_sl_dir = os.path.join(analysis_top_dir, "ANALYSIS", project_name)
        if create_files:
            safe_makedir(project_dir, 0o2770)
            safe_makedir(project_analysis_dir, 0o2770)
            if not project_dir == project_sl_dir and \
               not os.path.exists(project_sl_dir):
                os.symlink(project_dir, project_sl_dir)
            if not project_analysis_dir == project_analysis_sl_dir and \
               not os.path.exists(project_analysis_sl_dir):
                os.symlink(project_analysis_dir, project_analysis_sl_dir)
        try:
            project_obj = projects_to_analyze[project_dir]
        except KeyError:
            project_obj = NGIProject(name=project_name, dirname=project_id,
                                     project_id=project_id,
                                     base_path=analysis_top_dir)
            projects_to_analyze[project_dir] = project_obj
        # Iterate over the samples in the project
        for sample in project.get('samples', []):
            sample_name = sample['sample_name']
            # If specific samples are specified, skip those that do not match
            if restrict_to_samples and sample_name not in restrict_to_samples:
                LOG.debug("Skipping sample {}: not in specified samples "
                          "{}".format(sample_name, ", ".join(restrict_to_samples)))
                continue
            LOG.info("Setting up sample {}".format(sample_name))
            # Create a directory for the sample if it doesn't already exist
            sample_dir = os.path.join(project_dir, sample_name)
            if create_files: safe_makedir(sample_dir, 0o2770)
            # This will only create a new sample object if it doesn't already exist in the project
            sample_obj = project_obj.add_sample(name=sample_name, dirname=sample_name)
            # Get the Library Prep ID for each file
            pattern = re.compile(".*\.(fastq|fq)(\.gz|\.gzip|\.bz2)?$")
            fastq_files = filter(pattern.match, sample.get('files', []))
            # For each fastq file, create the libprep and seqrun objects
            # and add the fastq file to the seqprep object
            # Note again that these objects only get created if they don't yet exist;
            # if they do exist, the existing object is returned
            for fq_file in fastq_files:
                # Try to parse from SampleSheet
                try:
                    if not samplesheet_path: raise ValueError()
                    lane_num = re.match(r'[\w-]+_L\d{2}(\d)_\w+', fq_file).groups()[0]
                    libprep_name = determine_library_prep_from_samplesheet(samplesheet_path,
                                                                           project_original_name,
                                                                           sample_name,
                                                                           lane_num)
                except (IndexError, ValueError) as e:
                    LOG.debug('Unable to determine library prep from sample sheet file '
                              '("{}"); try to determine from Charon'.format(e))
                    try:
                        # Requires Charon access
                        libprep_name = determine_library_prep_from_fcid(project_id, sample_name, fc_full_id)
                        LOG.debug('Found libprep name "{}" in Charon'.format(libprep_name))
                    except ValueError:
                        charon_session = CharonSession()
                        libpreps = charon_session.sample_get_libpreps(project_id, sample_name).get('libpreps')
                        if len(libpreps) == 1:
                            libprep_name = libpreps[0].get('libprepid')
                            LOG.warn('Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" '
                                     'has no libprep information in Charon, but only one '
                                     'library prep is present in Charon ("{}"). Using '
                                     'this as the library prep.'.format(project_name,
                                                                        sample_name,
                                                                        fc_full_id,
                                                                        fq_file,
                                                                        libprep_name))
                        elif fallback_libprep:
                            libprep_name = fallback_libprep
                            LOG.warn('Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" '
                                     'has no libprep information in Charon, but a fallback '
                                     'libprep value of "{}" was supplied -- using this '
                                     'value.'.format(project_name,
                                                     sample_name,
                                                     fc_full_id,
                                                     fq_file,
                                                     libprep_name,
                                                     fallback_libprep))
                        else:
                            error_text = ('Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" '
                                          'has no libprep information in Charon. Skipping '
                                          'analysis.'.format(project_name, sample_name,
                                                             fc_full_id, fq_file))
                            LOG.error(error_text)
                            if not config.get('quiet'):
                                mail_analysis(project_name=project_name,
                                              sample_name=sample_name,
                                              level="ERROR",
                                              info_text=error_text)
                            continue
                libprep_object = sample_obj.add_libprep(name=libprep_name,
                                                        dirname=libprep_name)
                libprep_dir = os.path.join(sample_dir, libprep_name)
                if create_files: safe_makedir(libprep_dir, 0o2770)
                seqrun_object = libprep_object.add_seqrun(name=fc_full_id,
                                                          dirname=fc_full_id)
                seqrun_dir = os.path.join(libprep_dir, fc_full_id)
                if create_files: safe_makedir(seqrun_dir, 0o2770)
                seqrun_object.add_fastq_files(fq_file)
            if fastq_files and create_files:
                src_sample_dir = os.path.join(fc_dir_structure['fc_dir'],
                                              project['data_dir'],
                                              project['project_dir'],
                                              sample['sample_dir'])
                for libprep_obj in sample_obj:
                    for seqrun_obj in libprep_obj:
                        src_fastq_files = [os.path.join(src_sample_dir, fastq_file) for
                                           fastq_file in seqrun_obj.fastq_files]
                        seqrun_dst_dir = os.path.join(project_obj.base_path, project_obj.dirname,
                                                      sample_obj.dirname, libprep_obj.dirname,
                                                      seqrun_obj.dirname)
                        LOG.info("Symlinking fastq files from {} to {}...".format(src_sample_dir, seqrun_dir))
                        try:
                            do_symlink(src_fastq_files, seqrun_dir)
                        except OSError:
                            error_text = ('Could not symlink files for project/sample'
                                          'libprep/seqrun {}/{}/{}/{}'.format(project_obj,
                                                                              sample_obj,
                                                                              libprep_obj,
                                                                              seqrun_obj))
                            LOG.error(error_text)
                            if not config.get('quiet'):
                                mail_analysis(project_name=project_name,
                                              sample_name=sample_name,
                                              level="ERROR",
                                              info_text=error_text)
                            continue
    return projects_to_analyze
示例#15
0
def setup_analysis_directory_structure(fc_dir,
                                       projects_to_analyze,
                                       restrict_to_projects=None,
                                       restrict_to_samples=None,
                                       create_files=True,
                                       fallback_libprep=None,
                                       quiet=False,
                                       config=None,
                                       config_file_path=None):
    """
    Copy and sort files from their CASAVA-demultiplexed flowcell structure
    into their respective project/sample/libPrep/FCIDs. This collects samples
    split across multiple flowcells.

    :param str fc_dir: The directory created by CASAVA for this flowcell.
    :param dict config: The parsed configuration file.
    :param set projects_to_analyze: A dict (of Project objects, or empty)
    :param bool create_files: Alter the filesystem (as opposed to just parsing flowcells) (default True)
    :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None)
    :param list restrict_to_projects: Specific projects within the flowcell to process exclusively
    :param list restrict_to_samples: Specific samples within the flowcell to process exclusively

    :returns: A list of NGIProject objects that need to be run through the analysis pipeline
    :rtype: list

    :raises KeyError: If a required configuration key is not available.
    """
    LOG.info(
        "Setting up analysis for demultiplexed data in source folder \"{}\"".
        format(fc_dir))
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    config[
        "quiet"] = quiet  # Hack because I enter here from a script sometimes
    #Checks flowcell path to establish which group owns it
    pattern = ".+({}|{})\/.+".format(config["analysis"]["sthlm_root"],
                                     config["analysis"]["upps_root"])
    matches = re.match(pattern, fc_dir)
    if matches:
        flowcell_uppnexid = matches.group(1)
    else:
        LOG.error(
            "cannot guess which project (sthlm/uppsala) the flowcell {} belongs to"
            .format(fc_dir))
        raise RuntimeError

    analysis_top_dir = os.path.abspath(
        os.path.join(config["analysis"]["base_root"], flowcell_uppnexid,
                     config["analysis"]["top_dir"]))
    try:
        safe_makedir(analysis_top_dir)
    except OSError as e:
        LOG.error(
            'Error: Analysis top directory {} does not exist and could not '
            'be created.'.format(analysis_top_dir))
    fc_dir = fc_dir if os.path.isabs(fc_dir) else os.path.join(
        analysis_top_dir, fc_dir)
    if not os.path.exists(fc_dir):
        LOG.error("Error: Flowcell directory {} does not exist".format(fc_dir))
        return []
    # Map the directory structure for this flowcell
    try:
        fc_dir_structure = parse_flowcell(fc_dir)
    except (OSError, ValueError) as e:
        LOG.error("Error when processing flowcell dir \"{}\": {}".format(
            fc_dir, e))
        return []
    fc_full_id = fc_dir_structure['fc_full_id']
    if not fc_dir_structure.get('projects'):
        LOG.warning(
            "No projects found in specified flowcell directory \"{}\"".format(
                fc_dir))

    # Iterate over the projects in the flowcell directory
    for project in fc_dir_structure.get('projects', []):
        project_name = project['project_name']
        project_original_name = project['project_original_name']
        samplesheet_path = fc_dir_structure.get("samplesheet_path")

        # parse the samplesheet and get the expected sample numbers assigned by bcl2fastq
        samplesheet_sample_numbers = get_sample_numbers_from_samplesheet(
            samplesheet_path) if samplesheet_path else None

        try:
            # Maps e.g. "Y.Mom_14_01" to "P123"
            project_id = get_project_id_from_name(project_name)
        except (CharonError, RuntimeError, ValueError) as e:
            LOG.warning(
                'Could not retrieve project id from Charon (record missing?). '
                'Using project name ("{}") as project id '
                '(error: {})'.format(project_name, e))
            project_id = project_name
        # If specific projects are specified, skip those that do not match
        if restrict_to_projects and project_name not in restrict_to_projects and \
                                    project_id not in restrict_to_projects:
            LOG.debug(
                "Skipping project {} (not in restrict_to_projects)".format(
                    project_name))
            continue
        LOG.info("Setting up project {}".format(project.get("project_name")))
        # Create a project directory if it doesn't already exist, including
        # intervening "DATA" directory
        project_dir = os.path.join(analysis_top_dir, "DATA", project_id)
        project_sl_dir = os.path.join(analysis_top_dir, "DATA", project_name)
        project_analysis_dir = os.path.join(analysis_top_dir, "ANALYSIS",
                                            project_id)
        project_analysis_sl_dir = os.path.join(analysis_top_dir, "ANALYSIS",
                                               project_name)
        if create_files:
            safe_makedir(project_dir, 0o2770)
            safe_makedir(project_analysis_dir, 0o2770)
            if not project_dir == project_sl_dir and \
               not os.path.exists(project_sl_dir):
                os.symlink(project_dir, project_sl_dir)
            if not project_analysis_dir == project_analysis_sl_dir and \
               not os.path.exists(project_analysis_sl_dir):
                os.symlink(project_analysis_dir, project_analysis_sl_dir)
        try:
            project_obj = projects_to_analyze[project_dir]
        except KeyError:
            project_obj = NGIProject(name=project_name,
                                     dirname=project_id,
                                     project_id=project_id,
                                     base_path=analysis_top_dir)
            projects_to_analyze[project_dir] = project_obj
        # Iterate over the samples in the project
        for sample in project.get('samples', []):
            sample_name = sample['sample_name']
            # If specific samples are specified, skip those that do not match
            if restrict_to_samples and sample_name not in restrict_to_samples:
                LOG.debug("Skipping sample {}: not in specified samples "
                          "{}".format(sample_name,
                                      ", ".join(restrict_to_samples)))
                continue
            LOG.info("Setting up sample {}".format(sample_name))
            # Create a directory for the sample if it doesn't already exist
            sample_dir = os.path.join(project_dir, sample_name)
            if create_files: safe_makedir(sample_dir, 0o2770)
            # This will only create a new sample object if it doesn't already exist in the project
            sample_obj = project_obj.add_sample(name=sample_name,
                                                dirname=sample_name)
            # Get the Library Prep ID for each file
            pattern = re.compile(".*\.(fastq|fq)(\.gz|\.gzip|\.bz2)?$")
            fastq_files = list(filter(pattern.match, sample.get('files', [])))
            # For each fastq file, create the libprep and seqrun objects
            # and add the fastq file to the seqprep object
            # Note again that these objects only get created if they don't yet exist;
            # if they do exist, the existing object is returned
            for fq_file in fastq_files:
                # Try to use assignment from SampleSheet
                samplesheet_sample = match_fastq_sample_number_to_samplesheet(
                    fq_file, samplesheet_sample_numbers, project_id)
                if samplesheet_sample is not None and \
                        samplesheet_sample[6] is not None:
                    libprep_name = samplesheet_sample[6]
                else:
                    LOG.debug(
                        'Unable to determine library prep from sample sheet file; try to determine from Charon'
                    )
                    try:
                        # Requires Charon access
                        libprep_name = determine_library_prep_from_fcid(
                            project_id, sample_name, fc_full_id)
                        LOG.debug('Found libprep name "{}" in Charon'.format(
                            libprep_name))
                    except ValueError:
                        charon_session = CharonSession()
                        libpreps = charon_session.sample_get_libpreps(
                            project_id, sample_name).get('libpreps')
                        if len(libpreps) == 1:
                            libprep_name = libpreps[0].get('libprepid')
                            LOG.warning(
                                'Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" '
                                'has no libprep information in Charon, but only one '
                                'library prep is present in Charon ("{}"). Using '
                                'this as the library prep.'.format(
                                    project_name, sample_name, fc_full_id,
                                    fq_file, libprep_name))
                        elif fallback_libprep:
                            libprep_name = fallback_libprep
                            LOG.warning(
                                'Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" '
                                'has no libprep information in Charon, but a fallback '
                                'libprep value of "{}" was supplied -- using this '
                                'value.'.format(project_name, sample_name,
                                                fc_full_id, fq_file,
                                                libprep_name))
                        else:
                            error_text = (
                                'Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" '
                                'has no libprep information in Charon. Skipping '
                                'analysis.'.format(project_name, sample_name,
                                                   fc_full_id, fq_file))
                            LOG.error(error_text)
                            if not config.get('quiet'):
                                mail_analysis(project_name=project_name,
                                              sample_name=sample_name,
                                              level="ERROR",
                                              info_text=error_text)
                            continue
                libprep_object = sample_obj.add_libprep(name=libprep_name,
                                                        dirname=libprep_name)
                libprep_dir = os.path.join(sample_dir, libprep_name)
                if create_files: safe_makedir(libprep_dir, 0o2770)
                seqrun_object = libprep_object.add_seqrun(name=fc_full_id,
                                                          dirname=fc_full_id)
                seqrun_dir = os.path.join(libprep_dir, fc_full_id)
                if create_files: safe_makedir(seqrun_dir, 0o2770)
                seqrun_object.add_fastq_files(fq_file)
            if fastq_files and create_files:
                src_sample_dir = os.path.join(fc_dir_structure['fc_dir'],
                                              project['data_dir'],
                                              project['project_dir'],
                                              sample['sample_dir'])
                for libprep_obj in sample_obj:
                    for seqrun_obj in libprep_obj:
                        src_fastq_files = [
                            os.path.join(src_sample_dir, fastq_file)
                            for fastq_file in seqrun_obj.fastq_files
                        ]
                        seqrun_dst_dir = os.path.join(project_obj.base_path,
                                                      "DATA",
                                                      project_obj.dirname,
                                                      sample_obj.dirname,
                                                      libprep_obj.dirname,
                                                      seqrun_obj.dirname)
                        LOG.info(
                            "Symlinking fastq files from {} to {}...".format(
                                src_sample_dir, seqrun_dst_dir))
                        try:
                            do_symlink(src_fastq_files, seqrun_dst_dir)
                        except OSError:
                            error_text = (
                                'Could not symlink files for project/sample'
                                'libprep/seqrun {}/{}/{}/{}'.format(
                                    project_obj, sample_obj, libprep_obj,
                                    seqrun_obj))
                            LOG.error(error_text)
                            if not config.get('quiet'):
                                mail_analysis(project_name=project_name,
                                              sample_name=sample_name,
                                              level="ERROR",
                                              info_text=error_text)
                            continue
    return projects_to_analyze
def project_summarize(projects, verbosity=0):
    if type(verbosity) is not int or verbosity < 0:
        print_stderr('Invalid verbosity level ("{}"); must be a positive '
                     'integer; falling back to 0')
        verbosity = 0
    update_charon_with_local_jobs_status(quiet=True)  # Don't send mails
    charon_session = CharonSession()
    projects_list = []
    for project in projects:
        try:
            project = os.path.basename(locate_project(project))
        except ValueError as e:
            print_stderr("Skipping project: {}".format(e))
            continue
        print_stderr(
            'Gathering information for project "{}"...'.format(project))
        project_dict = {}
        try:
            project = charon_session.project_get(project)
        except CharonError as e:
            print_stderr(
                'Project "{}" not found in Charon; skipping ({})'.format(
                    project, e),
                file=sys.stderr)
            continue
        project_dict['name'] = project['name']
        project_dict['id'] = project['projectid']
        project_dict['status'] = project['status']
        samples_list = project_dict['samples'] = []
        for sample in charon_session.project_get_samples(
                project['projectid']).get('samples', []):
            sample_dict = {}
            sample_dict['id'] = sample['sampleid']
            sample_dict['analysis_status'] = sample['analysis_status']
            sample_dict['coverage'] = sample['total_autosomal_coverage']
            libpreps_list = sample_dict['libpreps'] = []
            samples_list.append(sample_dict)
            for libprep in charon_session.sample_get_libpreps(
                    project['projectid'],
                    sample['sampleid']).get('libpreps', []):
                libprep_dict = {}
                libprep_dict['id'] = libprep['libprepid']
                libprep_dict['qc'] = libprep['qc']
                seqruns_list = libprep_dict['seqruns'] = []
                libpreps_list.append(libprep_dict)
                for seqrun in charon_session.libprep_get_seqruns(
                        project['projectid'], sample['sampleid'],
                        libprep['libprepid']).get('seqruns', []):
                    seqrun_dict = {}
                    seqrun_dict['id'] = seqrun['seqrunid']
                    seqrun_dict['alignment_status'] = seqrun[
                        'alignment_status']
                    seqrun_dict['coverage'] = seqrun['mean_autosomal_coverage']
                    if seqrun.get('total_reads'):
                        seqrun_dict['total_reads'] = seqrun['total_reads']
                    seqruns_list.append(seqrun_dict)
        projects_list.append(project_dict)

    if verbosity in (0, 1):
        projects_status_list = []
        #projects_by_status = collections.defaultdict(dict)
        #samples_by_status = collections.defaultdict(set)
        #libpreps_by_status = collections.defaultdict(set)
        #seqruns_by_status = collections.defaultdict(set)
        for project_dict in projects_list:
            project_status_dict = {}
            project_status_dict['name'] = "{} ({})".format(
                project_dict['name'], project_dict['id'])
            project_status_dict['status'] = project_dict['status']
            samples_by_status = project_status_dict[
                'samples_by_status'] = collections.defaultdict(set)
            libpreps_by_status = project_status_dict[
                'libpreps_by_status'] = collections.defaultdict(set)
            seqruns_by_status = project_status_dict[
                'seqruns_by_status'] = collections.defaultdict(set)
            for sample_dict in project_dict.get('samples', []):
                #samples_by_status[sample_dict['analysis_status']].add(sample_dict['id'])
                sample_status = sample_dict['analysis_status']
                libpreps = sample_dict.get('libpreps')
                if libpreps:
                    if not any([libprep["seqruns"] for libprep in libpreps]):
                        sample_status = "NO_SEQRUNS"
                    else:
                        for libprep_dict in libpreps:
                            libpreps_by_status[libprep_dict['qc']].add(
                                libprep_dict['id'])
                            for seqrun_dict in libprep_dict.get('seqruns', []):
                                seqruns_by_status[
                                    seqrun_dict['alignment_status']].add(
                                        seqrun_dict['id'])
                else:
                    sample_status = "NO_LIBPREPS"
                samples_by_status[sample_status].add(sample_dict['id'])
            projects_status_list.append(project_status_dict)

        print_items = (
            ("Samples", "samples_by_status"),
            ("Libpreps", "libpreps_by_status"),
            ("Seqruns", "seqruns_by_status"),
        )

        for project_dict in projects_status_list:
            print_stderr("\nProject\n-------")
            print_stderr("    Name:   {:>40}".format(project_dict['name']))
            print_stderr("    Status: {:>40}".format(project_dict['status']))
            for name, dict_key in print_items:
                status_dict = project_dict[dict_key]
                print_stderr("{}\n{}".format(name, "-" * len(name)))
                total_items = sum(map(len, status_dict.values()))
                # Sort by analysis value
                for status, item_set in sorted(
                        status_dict.iteritems(),
                        key=lambda key_value: key_value[0]):
                    num_items = len(item_set)
                    percent = (100.00 * num_items) / total_items
                    print_stderr(
                        "    Status: {:<20} ({:>3}/{:<3}) ({:>6.2f}%)".format(
                            status, num_items, total_items, percent))
                    if verbosity == 1:
                        for item in sorted(item_set):
                            print_stderr("        {}".format(item))
            print_stderr("")

    else:  # Verbosity is 2+, maximum verbosity
        output_template = "{}{:<30}{:>{rspace}}"
        for project_dict in projects_list:
            offset = 0
            indent = " " * offset
            rspace = 80 - offset
            print_stderr(
                output_template.format(indent,
                                       "Project name:",
                                       project_dict['name'],
                                       rspace=rspace))
            print_stderr(
                output_template.format(indent,
                                       "Project ID:",
                                       project_dict['id'],
                                       rspace=rspace))
            print_stderr(
                output_template.format(indent,
                                       "Project status:",
                                       project_dict['status'],
                                       rspace=rspace))
            for sample_dict in project_dict['samples']:
                print_stderr("")
                offset = 4
                indent = " " * offset
                rspace = 80 - offset
                print_stderr(
                    output_template.format(indent,
                                           "Sample ID:",
                                           sample_dict['id'],
                                           rspace=rspace))
                print_stderr(
                    output_template.format(indent,
                                           "Sample analysis status:",
                                           sample_dict['analysis_status'],
                                           rspace=rspace))
                print_stderr(
                    output_template.format(indent,
                                           "Sample coverage:",
                                           sample_dict['coverage'],
                                           rspace=rspace))
                for libprep_dict in sample_dict['libpreps']:
                    print_stderr("")
                    offset = 8
                    indent = " " * offset
                    rspace = 80 - offset
                    print_stderr(
                        output_template.format(indent,
                                               "Libprep ID:",
                                               libprep_dict['id'],
                                               rspace=rspace))
                    print_stderr(
                        output_template.format(indent,
                                               "Libprep qc status:",
                                               libprep_dict['qc'],
                                               rspace=rspace))
                    for seqrun_dict in libprep_dict['seqruns']:
                        print_stderr("")
                        offset = 12
                        indent = " " * offset
                        rspace = 80 - offset
                        print_stderr(
                            output_template.format(indent,
                                                   "Seqrun ID:",
                                                   seqrun_dict['id'],
                                                   rspace=rspace))
                        print_stderr(
                            output_template.format(
                                indent,
                                "Seqrun alignment status:",
                                seqrun_dict['alignment_status'],
                                rspace=rspace))
                        print_stderr(
                            output_template.format(
                                indent,
                                "Seqrun mean auto. coverage:",
                                seqrun_dict['coverage'],
                                rspace=rspace))
                        if "total_reads" in seqrun_dict:
                            print_stderr(
                                output_template.format(
                                    indent,
                                    "Seqrun total reads:",
                                    seqrun_dict['total_reads'],
                                    rspace=rspace))
            print_stderr("\n")
示例#17
0
def project_summarize(projects, verbosity=0):
    if type(verbosity) is not int or verbosity < 0:
        print_stderr('Invalid verbosity level ("{}"); must be a positive '
                     'integer; falling back to 0')
        verbosity = 0
    update_charon_with_local_jobs_status(quiet=True) # Don't send mails
    charon_session = CharonSession()
    projects_list = []
    for project in projects:
        try:
            project = os.path.basename(locate_project(project))
        except ValueError as e:
            print_stderr("Skipping project: {}".format(e))
            continue
        print_stderr('Gathering information for project "{}"...'.format(project))
        project_dict = {}
        try:
            project = charon_session.project_get(project)
        except CharonError as e:
            print_stderr('Project "{}" not found in Charon; skipping ({})'.format(project, e), file=sys.stderr)
            continue
        project_dict['name'] = project['name']
        project_dict['id'] = project['projectid']
        project_dict['status'] = project['status']
        samples_list = project_dict['samples'] = []
        for sample in charon_session.project_get_samples(project['projectid']).get('samples', []):
            sample_dict = {}
            sample_dict['id'] = sample['sampleid']
            sample_dict['analysis_status'] = sample['analysis_status']
            sample_dict['coverage'] = sample['total_autosomal_coverage']
            libpreps_list = sample_dict['libpreps'] = []
            samples_list.append(sample_dict)
            for libprep in charon_session.sample_get_libpreps(project['projectid'],
                                                              sample['sampleid']).get('libpreps', []):
                libprep_dict = {}
                libprep_dict['id'] = libprep['libprepid']
                libprep_dict['qc'] = libprep['qc']
                seqruns_list = libprep_dict['seqruns'] = []
                libpreps_list.append(libprep_dict)
                for seqrun in charon_session.libprep_get_seqruns(project['projectid'],
                                                                 sample['sampleid'],
                                                                 libprep['libprepid']).get('seqruns', []):
                    seqrun_dict = {}
                    seqrun_dict['id'] = seqrun['seqrunid']
                    seqrun_dict['alignment_status'] = seqrun['alignment_status']
                    seqrun_dict['coverage'] = seqrun['mean_autosomal_coverage']
                    if seqrun.get('total_reads'):
                        seqrun_dict['total_reads'] = seqrun['total_reads']
                    seqruns_list.append(seqrun_dict)
        projects_list.append(project_dict)


    if verbosity in (0, 1):
        projects_status_list = []
        #projects_by_status = collections.defaultdict(dict)
        #samples_by_status = collections.defaultdict(set)
        #libpreps_by_status = collections.defaultdict(set)
        #seqruns_by_status = collections.defaultdict(set)
        for project_dict in projects_list:
            project_status_dict = {}
            project_status_dict['name'] = "{} ({})".format(project_dict['name'], project_dict['id'])
            project_status_dict['status'] = project_dict['status']
            samples_by_status = project_status_dict['samples_by_status'] = collections.defaultdict(set)
            libpreps_by_status = project_status_dict['libpreps_by_status'] = collections.defaultdict(set)
            seqruns_by_status = project_status_dict['seqruns_by_status'] = collections.defaultdict(set)
            for sample_dict in project_dict.get('samples', []):
                #samples_by_status[sample_dict['analysis_status']].add(sample_dict['id'])
                sample_status = sample_dict['analysis_status']
                libpreps = sample_dict.get('libpreps')
                if libpreps:
                    if not any([libprep["seqruns"] for libprep in libpreps]):
                        sample_status = "NO_SEQRUNS"
                    else:
                        for libprep_dict in libpreps:
                            libpreps_by_status[libprep_dict['qc']].add(libprep_dict['id'])
                            for seqrun_dict in libprep_dict.get('seqruns', []):
                                seqruns_by_status[seqrun_dict['alignment_status']].add(seqrun_dict['id'])
                else:
                    sample_status = "NO_LIBPREPS"
                samples_by_status[sample_status].add(sample_dict['id'])
            projects_status_list.append(project_status_dict)

        print_items = (("Samples", "samples_by_status"),
                       ("Libpreps", "libpreps_by_status"),
                       ("Seqruns", "seqruns_by_status"),)

        for project_dict in projects_status_list:
            print_stderr("\nProject\n-------")
            print_stderr("    Name:   {:>40}".format(project_dict['name']))
            print_stderr("    Status: {:>40}".format(project_dict['status']))
            for name, dict_key in print_items:
                status_dict = project_dict[dict_key]
                print_stderr("{}\n{}".format(name, "-"*len(name)))
                total_items = sum(map(len, status_dict.values()))
                # Sort by analysis value
                for status, item_set in sorted(status_dict.iteritems(), key=lambda key_value: key_value[0]):
                    num_items = len(item_set)
                    percent = (100.00 * num_items) / total_items
                    print_stderr("    Status: {:<20} ({:>3}/{:<3}) ({:>6.2f}%)".format(status,
                                                                                       num_items,
                                                                                       total_items,
                                                                                       percent))
                    if verbosity == 1:
                        for item in sorted(item_set):
                            print_stderr("        {}".format(item))
            print_stderr("")

    else: # Verbosity is 2+, maximum verbosity
        output_template = "{}{:<30}{:>{rspace}}"
        for project_dict in projects_list:
            offset = 0
            indent = " " * offset
            rspace = 80 - offset
            print_stderr(output_template.format(indent, "Project name:", project_dict['name'], rspace=rspace))
            print_stderr(output_template.format(indent, "Project ID:", project_dict['id'], rspace=rspace))
            print_stderr(output_template.format(indent, "Project status:", project_dict['status'], rspace=rspace))
            for sample_dict in project_dict['samples']:
                print_stderr("")
                offset = 4
                indent = " " * offset
                rspace = 80 - offset
                print_stderr(output_template.format(indent, "Sample ID:", sample_dict['id'], rspace=rspace))
                print_stderr(output_template.format(indent, "Sample analysis status:", sample_dict['analysis_status'], rspace=rspace))
                print_stderr(output_template.format(indent, "Sample coverage:", sample_dict['coverage'], rspace=rspace))
                for libprep_dict in sample_dict['libpreps']:
                    print_stderr("")
                    offset = 8
                    indent = " " * offset
                    rspace = 80 - offset
                    print_stderr(output_template.format(indent, "Libprep ID:", libprep_dict['id'], rspace=rspace))
                    print_stderr(output_template.format(indent, "Libprep qc status:", libprep_dict['qc'], rspace=rspace))
                    for seqrun_dict in libprep_dict['seqruns']:
                        print_stderr("")
                        offset = 12
                        indent = " " * offset
                        rspace = 80 - offset
                        print_stderr(output_template.format(indent, "Seqrun ID:", seqrun_dict['id'], rspace=rspace))
                        print_stderr(output_template.format(indent, "Seqrun alignment status:", seqrun_dict['alignment_status'], rspace=rspace))
                        print_stderr(output_template.format(indent, "Seqrun mean auto. coverage:", seqrun_dict['coverage'], rspace=rspace))
                        if "total_reads" in seqrun_dict:
                            print_stderr(output_template.format(indent, "Seqrun total reads:", seqrun_dict['total_reads'], rspace=rspace))
            print_stderr("\n")