def main(inbox=None, num_days=14, genotype_files=None, config=None, config_file_path=None):
    if genotype_files:
        gt_files_valid = [os.path.abspath(gt_file) for gt_file in genotype_files]
    else:
        if not inbox:
            try:
                inboxes = config["environment"]["flowcell_inbox"]
            except (KeyError, TypeError):
                raise ValueError("No path to delivery inbox specified by argument "
                                 "or in configuration file ({}). Exiting.".format(config_file_path))
        for inbox in inboxes:
            inbox = os.path.abspath(inbox)
            # Convert to seconds
            cutoff_age = time.time() - (int(num_days) * 24 * 60 * 60)
            LOG.info("Searching for genotype files under {} modified after "
                     "{}".format(inbox, time.ctime(cutoff_age)))
            gt_files_valid = []
            for gt_file in filter(GENOTYPE_FILE_RE.match, glob.glob(os.path.join(inbox, "*"))):
                if os.stat(gt_file).st_mtime > time.time() - cutoff_age:
                    gt_files_valid.append(os.path.abspath(gt_file))

    if not gt_files_valid:
        LOG.info("No genotype files found under {} newer than "
                 "{}".format(inbox, time.ctime(cutoff_age)))
    else:
        charon_session = CharonSession()
        for gt_file_path in gt_files_valid:
            project_samples_dict = \
                    find_projects_from_samples(parse_samples_from_vcf(gt_file_path))
            for project_id, samples in project_samples_dict.iteritems():
                LOG.info("Updating project {}...".format(project_id))
                for sample in samples:
                    try:
                        genotype_status = \
                            charon_session.sample_get(projectid=project_id,
                                                      sampleid=sample).get("genotype_status")
                        if genotype_status in (None, "NOT_AVAILABLE"):
                            LOG.info('Updating sample {} genotype_status '
                                     'to "AVAILABLE"...'.format(sample))
                            charon_session.sample_update(projectid=project_id,
                                                         sampleid=sample,
                                                         genotype_status="AVAILABLE")
                        else:
                            LOG.info('Not updating sample {} genotype_status '
                                     '(already "{}")'.format(sample, genotype_status))
                    except CharonError as e:
                        LOG.error('Could not update genotype status to "AVAILABLE" '
                                  'for project/sample "{}/{}": {}'.format(project_id,
                                                                          sample,
                                                                          e))
                )
            except Exception as e:
                LOG.error(e.message)
                print(e, file=sys.stderr)
        LOG.info("Done with organization.")

    elif "genotype_project_dirs" in args:
        from ngi_pipeline.engines import piper_ngi

        genotype_file_path = args.genotype_file
        project_obj_list = []
        if not args.genotype_project_dirs:
            LOG.info("No projects specified; running genotype analysis for all " "samples present in VCF file.")
            # User passed only the genotype file; try to determine samples/projects
            # from vcf file
            projects_samples_dict = find_projects_from_samples(parse_samples_from_vcf(genotype_file_path))
            for project_id, samples in projects_samples_dict.iteritems():
                try:
                    path_to_project = locate_project(project_id)
                except ValueError:
                    # Project has not yet been organized from flowcell level
                    LOG.warn(
                        'Project "{}" has not yet been organized from '
                        "flowcell to project level; skipping.".format(project_id)
                    )
                    continue
                project = recreate_project_from_filesystem(project_dir=path_to_project, restrict_to_samples=samples)
                project_obj_list.append(project)
        else:
            for genotype_project_dir in args.genotype_project_dirs:
                LOG.info(
def main(inbox=None,
         num_days=14,
         genotype_files=None,
         config=None,
         config_file_path=None):
    if genotype_files:
        gt_files_valid = [
            os.path.abspath(gt_file) for gt_file in genotype_files
        ]
    else:
        if not inbox:
            try:
                inboxes = config["environment"]["flowcell_inbox"]
            except (KeyError, TypeError):
                raise ValueError(
                    "No path to delivery inbox specified by argument "
                    "or in configuration file ({}). Exiting.".format(
                        config_file_path))
        for inbox in inboxes:
            inbox = os.path.abspath(inbox)
            # Convert to seconds
            cutoff_age = time.time() - (int(num_days) * 24 * 60 * 60)
            LOG.info("Searching for genotype files under {} modified after "
                     "{}".format(inbox, time.ctime(cutoff_age)))
            gt_files_valid = []
            for gt_file in filter(GENOTYPE_FILE_RE.match,
                                  glob.glob(os.path.join(inbox, "*"))):
                if os.stat(gt_file).st_mtime > time.time() - cutoff_age:
                    gt_files_valid.append(os.path.abspath(gt_file))

    if not gt_files_valid:
        LOG.info("No genotype files found under {} newer than "
                 "{}".format(inbox, time.ctime(cutoff_age)))
    else:
        charon_session = CharonSession()
        for gt_file_path in gt_files_valid:
            project_samples_dict = \
                    find_projects_from_samples(parse_samples_from_vcf(gt_file_path))
            for project_id, samples in project_samples_dict.iteritems():
                LOG.info("Updating project {}...".format(project_id))
                for sample in samples:
                    try:
                        genotype_status = \
                            charon_session.sample_get(projectid=project_id,
                                                      sampleid=sample).get("genotype_status")
                        if genotype_status in (None, "NOT_AVAILABLE"):
                            LOG.info('Updating sample {} genotype_status '
                                     'to "AVAILABLE"...'.format(sample))
                            charon_session.sample_update(
                                projectid=project_id,
                                sampleid=sample,
                                genotype_status="AVAILABLE")
                        else:
                            LOG.info('Not updating sample {} genotype_status '
                                     '(already "{}")'.format(
                                         sample, genotype_status))
                    except CharonError as e:
                        LOG.error(
                            'Could not update genotype status to "AVAILABLE" '
                            'for project/sample "{}/{}": {}'.format(
                                project_id, sample, e))
Пример #4
0
            except Exception as e:
                LOG.error(e.message)
                print(e, file=sys.stderr)
        LOG.info("Done with organization.")

    elif 'genotype_project_dirs' in args:
        from ngi_pipeline.engines import piper_ngi
        genotype_file_path = args.genotype_file
        project_obj_list = []
        if not args.genotype_project_dirs:
            LOG.info('No projects specified; running genotype analysis for all '
                     'samples present in VCF file.')
            # User passed only the genotype file; try to determine samples/projects
            # from vcf file
            projects_samples_dict = \
                    find_projects_from_samples(parse_samples_from_vcf(genotype_file_path))
            for project_id, samples in projects_samples_dict.iteritems():
                try:
                    path_to_project = locate_project(project_id)
                except ValueError:
                    # Project has not yet been organized from flowcell level
                    LOG.warn('Project "{}" has not yet been organized from '
                             'flowcell to project level; skipping.'.format(project_id))
                    continue
                project = recreate_project_from_filesystem(project_dir=path_to_project,
                                                           restrict_to_samples=samples)
                project_obj_list.append(project)
        else:
            for genotype_project_dir in args.genotype_project_dirs:
                LOG.info("Starting genotype analysis of project {} with genotype "
                         "file {}".format(genotype_project_dir, genotype_file_path))