Exemplo n.º 1
0
class LimsService(object):
    name = "lims_service"

    def __init__(self):
        super(LimsService, self).__init__()
        self.lims = Lims(BASEURI, USERNAME, PASSWORD)
        self.lims.check_version()

    @rpc
    def sample(self, lims_id):
        """Get a sample from LIMS."""
        sample_obj = LimsSample(self.lims, id=lims_id)
        sample_dict = transform_entry(sample_obj)
        return sample_dict

    @rpc
    def validate_sample(self, lims_id):
        """Validate information in the LIMS on sample level."""
        logger.debug("fetch sample from LIMS")
        sample_obj = LimsSample(self.lims, id=lims_id)

        try:
            case_id = sample_obj.udf["familyID"]
            cust_id = sample_obj.udf["customer"]
        except KeyError as error:
            raise MissingLimsDataException(error.message)
        except HTTPError as error:
            raise LimsSampleNotFoundError(error.message)

    @rpc
    def ls_case(self, cust_id, case_id):
        """Fetch all samples for a case from the LIMS."""
        sample_objs = self.lims.get_samples(udf={"customer": cust_id, "familyID": case_id})
        sample_dicts = [transform_entry(sample) for sample in sample_objs]
        analysis_types = set(sample["analysis_type"] for sample in sample_dicts)
        case_data = {"analysis_types": list(analysis_types), "samples": sample_dicts}
        return case_data

    @rpc
    def ls_project(self, project_id):
        """List all samples in a project."""
        samples = self.lims.get_samples(projectname=project_id)
        lims_ids = [sample.id for sample in samples]
        return lims_ids

    @rpc
    def pedigree(self, cust_id, case_id):
        """Generate pedigree content for a case."""
        ped_content = serialize_pedigree(self.lims, cust_id, case_id)
        return ped_content

    @rpc
    def target_reads(self, lims_id):
        """Determine the amount of reads to be sequenced."""
        sample_obj = LimsSample(self.lims, id=lims_id)
        app_tag = analysis_info(sample_obj)
        # millions of reads
        target_reads = app_tag["reads"] * 1000000
        return target_reads
Exemplo n.º 2
0
def main(args):
    lims_db = get_session()
    lims = Lims(BASEURI, USERNAME, PASSWORD)
    with open(args.conf) as cf:
        db_conf = yaml.load(cf)
        couch = setupServer(db_conf)
    db = couch["expected_yields"]
    postgres_string = "{} hours".format(args.hours)
    project_ids = get_last_modified_projectids(lims_db, postgres_string)

    for project in [Project(lims, id=x) for x in project_ids]:
        samples_count = 0
        samples = lims.get_samples(projectname=project.name)
        for sample in samples:
            if not ("Status (manual)" in sample.udf
                    and sample.udf["Status (manual)"] == "Aborted"):
                samples_count += 1
        try:
            lanes_ordered = project.udf['Sequence units ordered (lanes)']
            key = parse_sequencing_platform(project.udf['Sequencing platform'])
        except:
            continue
        for row in db.view("yields/min_yield"):
            db_key = [x.lower() if x else None for x in row.key]
            if db_key == key:
                try:
                    project.udf['Reads Min'] = float(
                        row.value) * lanes_ordered / samples_count
                    project.put()
                except ZeroDivisionError:
                    pass
def main(args):
    lims_db = get_session()
    lims = Lims(BASEURI,USERNAME,PASSWORD)
    with open(args.conf) as cf:
        db_conf = yaml.load(cf)
        couch = setupServer(db_conf)
    db = couch["expected_yields"]
    postgres_string="{} hours".format(args.hours)
    project_ids=get_last_modified_projectids(lims_db, postgres_string)

    for project in [Project(lims, id=x) for x in project_ids]:
        samples_count = 0
        samples = lims.get_samples(projectname=project.name)
        for sample in samples:
            if not("Status (manual)" in sample.udf and sample.udf["Status (manual)"] == "Aborted"):
                samples_count +=1
        try:
            lanes_ordered = project.udf['Sequence units ordered (lanes)']
            key = parse_sequencing_platform(project.udf['Sequencing platform'])
        except:
            continue
        for row in db.view("yields/min_yield"):
            db_key = [x.lower() if x else None for x in row.key]
            if db_key==key:
                try:
                    project.udf['Reads Min'] = float(row.value) * lanes_ordered / samples_count
                    project.put()
                except ZeroDivisionError:
                    pass
Exemplo n.º 4
0
def test_D(server_test1):
    # GIVEN: A lims with a sample with:
    #   name: 'maya'
    #   Udf "Source": "blood", "Reads missing (M)": 0

    # WHEN creating a genologics Lims object and filtering on the fields.
    lims = Lims("http://127.0.0.1:8000", 'dummy', 'dummy')
    samples = lims.get_samples(udf={"Source": "blood", "Reads missing (M)": 0}, name='maya')

    # Then the sample should be found
    assert samples == [Sample(lims, id='ACC2351A2')]
class MultiQC_clarity_metadata(BaseMultiqcModule):
    def __init__(self):

        self.log = logging.getLogger('multiqc')

        # Check that this plugin hasn't been disabled
        if config.kwargs.get('disable_clarity', False) is True:
            self.log.info(
                "Skipping MultiQC_Clarity as disabled on command line")
            return None
        if getattr(config, 'disable_clarity', False) is True:
            self.log.debug(
                "Skipping MultiQC_Clarity as specified in config file")
            return None

        super(MultiQC_clarity_metadata, self).__init__(name='Clarity LIMS',
                                                       anchor='clarity')

        self.intro = '''<p>The <a href="https://github.com/MultiQC/MultiQC_Clarity" target="_blank">MultiQC_Clarity</a>
            plugin fetches data from a specified
            <a href="https://www.genologics.com/clarity-lims/" target="_blank">Basespace Clarity LIMS</a> instance.</p>'''

        self.lims = Lims(BASEURI, USERNAME, PASSWORD)
        self.metadata = {}
        self.header_metadata = {}
        self.general_metadata = {}
        self.tab_metadata = {}
        self.samples = []

        self.schema = getattr(config, 'clarity', None)
        if self.schema is None:
            self.log.debug("No config found for MultiQC_Clarity")
            return None

        self.get_samples()
        self.get_metadata('report_header_info')
        self.get_metadata('general_stats')
        self.get_metadata('clarity_module')
        self.update_multiqc_report()
        self.make_sections()
        report.modules_output.append(self)

    def get_samples(self):
        if config.kwargs.get('clarity_project_name'):
            pj = self.lims.get_projects(
                name=config.kwargs['clarity_project_name'])
            self.samples = pj.samples
        else:
            names = set()
            for x in report.general_stats_data:
                names.update(x.keys())
            for d in report.saved_raw_data.values():
                try:
                    self.names.update(d.keys())
                except AttributeError:
                    pass
            if not config.kwargs.get('clarity_skip_edit_names'):
                names = self.edit_names(names)

            self.log.debug("Looking into Clarity for samples {}".format(
                ", ".join(names)))
            found = 0
            try:
                for name in names:
                    matching_samples = self.lims.get_samples(name=name)
                    if not matching_samples:
                        self.log.error(
                            "Could not find a sample matching {0}, skipping.".
                            format(name))
                        continue
                    if len(matching_samples) > 1:
                        self.log.error(
                            "Found multiple samples matching {0}, skipping".
                            format(name))
                        continue
                    found += 1
                    self.samples.append(matching_samples[0])
            except Exception as e:
                self.log.warn(
                    "Could not connect to Clarity LIMS: {}".format(e))
                return None
        self.log.info("Found {} out of {} samples in LIMS.".format(
            found, len(names)))

    def edit_names(self, names):
        edited = []
        for name in names:
            if name.endswith("_1") or name.endswith("_2"):
                edited.append(name[:-2])
            elif name.endswith("_R1") or name.endswith("_R2"):
                edited.append(name[:-3])
            else:
                edited.append(name)

        return edited

    def flatten_metadata(self, metadata):
        for first_level in metadata:
            for second_level in metadata[first_level]:
                if isinstance(metadata[first_level][second_level],
                              set) or isinstance(
                                  metadata[first_level][second_level], list):
                    metadata[first_level][second_level] = ", ".join(
                        metadata[first_level][second_level])

        return metadata

    def get_project_metadata(self, udfs):
        project_metadata = {}
        for sample in self.samples:
            project_metadata[sample.project.name] = {}
            for udf in udfs:
                if udf in sample.project.udf:
                    try:
                        project_metadata[sample.project.name][udf].add(
                            str(sample.project.udf[udf]))
                    except:
                        project_metadata[sample.project.name][udf] = set()
                        project_metadata[sample.project.name][udf].add(
                            str(sample.project.udf[udf]))

        return self.flatten_metadata(project_metadata)

    def get_sample_metadata(self, udfs):
        sample_metadata = {}
        for sample in self.samples:
            sample_metadata[sample.name] = {}
            for udf in udfs:
                if udf in sample.udf:
                    try:
                        sample_metadata[sample.name][udf].add(
                            str(sample.udf[udf]))
                    except:
                        sample_metadata[sample.name][udf] = set()
                        sample_metadata[sample.name][udf].add(
                            str(sample.udf[udf]))

        return self.flatten_metadata(sample_metadata)

    def get_metadata(self, part):
        for key in self.schema[part]:
            if key == 'Project':
                metadata = self.get_project_metadata(
                    self.schema[part]['Project'])
            elif key == 'Sample':
                metadata = self.get_sample_metadata(
                    self.schema[part]['Sample'])
            else:
                metadata = self.get_artifact_metadata(self.schema[part])

            if part == "report_header_info":
                self.header_metadata.update(metadata)
            elif part == "general_stats":
                self.general_metadata.update(metadata)
            else:
                self.tab_metadata.update(metadata)

    def get_artifact_metadata(self, pt_to_udfs):
        artifact_metadata = {}
        for sample in self.samples:
            artifact_metadata[sample.name] = {}
            for process_type in pt_to_udfs:
                if process_type == 'Sample':
                    continue
                if process_type == 'Project':
                    continue
                artifacts = self.lims.get_artifacts(sample_name=sample.name,
                                                    process_type=process_type)
                for udf_name in pt_to_udfs[process_type].get("outputs", []):
                    values = []
                    for artifact in artifacts:
                        if udf_name in artifact.udf:
                            values.append(str(artifact.udf[udf_name]))

                    artifact_metadata[sample.name][udf_name] = values

                processes = set([art.parent_process for art in artifacts])
                inputs = []
                for p in processes:
                    inputs.extend([
                        art for art in p.all_inputs()
                        if sample.name in [s.name for s in art.samples]
                    ])
                for udf_name in pt_to_udfs[process_type].get("inputs", []):
                    values = []
                    for artifact in inputs:
                        if udf_name in artifact.udf:
                            values.append(str(artifact.udf[udf_name]))

                    artifact_metadata[sample.name][udf_name] = values

        return self.flatten_metadata(artifact_metadata)

    def update_multiqc_report(self):
        if config.report_header_info is None:
            config.report_header_info = []
        for first_level in self.header_metadata:
            d = {}
            for key in self.header_metadata[first_level]:
                d[key] = self.header_metadata[first_level][key]
            config.report_header_info.append(d)

        headers = {}
        for first_level in self.schema["general_stats"]:
            for header in self.schema["general_stats"][first_level]:
                headers[header] = {}
                if isinstance(
                        self.schema["general_stats"][first_level][header],
                        dict):
                    for subsubkey, cfg in self.schema["general_stats"][
                            first_level][header].items():
                        if subsubkey == 'multiply_by':
                            mby = str(cfg)[:]
                            headers[header]['modify'] = lambda x: float(
                                x) * float(mby)
                        else:
                            headers[header][subsubkey] = cfg
                headers[header]['description'] = headers[header].get(
                    'description', '{} - {}'.format(first_level, header))
                headers[header]['namespace'] = headers[header].get(
                    'namespace', 'Clarity LIMS')
                headers[header]['scale'] = headers[header].get('scale', 'YlGn')

        report.general_stats_headers.append(headers)
        report.general_stats_data.append(self.general_metadata)

    def make_sections(self):
        headers = OrderedDict()
        for first_level in self.tab_metadata:
            for header in self.tab_metadata[first_level]:
                desc = header
                if header not in headers:
                    headers[header] = {}
                    for key in self.schema['clarity_module']:
                        if header in self.schema['clarity_module'][key]:
                            desc = key
                        elif isinstance(self.schema['clarity_module'][key],
                                        dict):
                            for subkey, val in self.schema['clarity_module'][
                                    key].items():
                                # print(val)
                                if val is None:
                                    break
                                elif header in val:
                                    desc = key
                                    if isinstance(val[header], dict):
                                        for subsubkey, cfg in val[
                                                header].items():
                                            if subsubkey == 'multiply_by':
                                                mby = str(cfg)[:]
                                                headers[header][
                                                    'modify'] = lambda x: float(
                                                        x) * float(mby)
                                            else:
                                                headers[header][
                                                    subsubkey] = cfg

                    headers[header]['namespace'] = headers[header].get(
                        'namespace', desc)
                    headers[header]['title'] = headers[header].get(
                        'title', header)
                    headers[header]['description'] = headers[header].get(
                        'description', header)

        self.intro += table.plot(self.tab_metadata, headers)
class ProjectReport:

    def __init__(self, project_name):
        self.project_name = project_name
        self.project_source = os.path.join(cfg.query('sample','delivery_source'), project_name)
        self.project_delivery = os.path.join(cfg.query('sample','delivery_dest'), project_name)
        self.lims=Lims(**cfg.get('clarity'))
        self.params = {'project_name':project_name}
        self.results = {}
        self.fill_sample_names_from_lims()
        self.samples_delivered = self.read_metrics_csv(os.path.join(self.project_delivery, 'summary_metrics.csv'))
        self.get_sample_param()
        self.fill_project_information_from_lims()

    def fill_project_information_from_lims(self):
        project = self.lims.get_projects(name=self.project_name)[0]
        self.project_info = {}
        self.project_info['project_name']=['Project name:',self.project_name]
        self.project_info['project_title']=['Project title:', project.udf.get('Project Title', '')]
        self.project_info['enquiry'] = ['Enquiry no:', project.udf.get('Enquiry Number', '')]
        self.project_info['quote'] = ['Quote no:', project.udf.get('Quote No.', '')]
        self.project_info['researcher'] = ['Researcher:','%s %s (%s)'%(project.researcher.first_name,
                                                                       project.researcher.last_name,
                                                                       project.researcher.email)]
        self.project_order = ['project_name', 'project_title', 'enquiry', 'quote', 'researcher']


    def fill_sample_names_from_lims(self):
        samples = self.lims.get_samples(projectname=self.project_name)
        self.samples = [s.name for s in samples]
        self.modified_samples = [re.sub(r'[: ]','_', s.name) for s in samples]


    def get_library_workflow_from_sample(self, sample_name):
        samples = self.lims.get_samples(projectname=self.project_name, name=sample_name)
        if len(samples) == 1:
            return samples[0].udf.get('Prep Workflow')
        else:
            app_logger.error('%s samples found for sample name %s'%sample_name)

    def get_species_from_sample(self, sample_name):
        samples = self.lims.get_samples(projectname=self.project_name, name=sample_name)
        if len(samples) == 1:
            s = samples[0].udf.get('Species')
            return species_alias.get(s, s)
        else:
            app_logger.error('%s samples found for sample name %s'%sample_name)

    def parse_program_csv(self, program_csv):
        all_programs = {}
        if os.path.exists(program_csv):
            with open(program_csv) as open_prog:
                for row in csv.reader(open_prog):
                    all_programs[row[0]]=row[1]
        #TODO: change the hardcoded version of bcl2fastq
        all_programs['bcl2fastq'] = '2.17.1.14'
        for p in ['bcl2fastq','bcbio', 'bwa', 'gatk', 'samblaster']:
            if p in all_programs:
                self.params[p + '_version']=all_programs.get(p)
        

    def parse_project_summary_yaml(self, summary_yaml):
        with open(summary_yaml, 'r') as open_file:
            full_yaml = yaml.safe_load(open_file)
        sample_yaml=full_yaml['samples'][0]
        path_to_bcbio = os.path.basename(os.path.dirname(sample_yaml['dirs']['galaxy']))
        self.params['bcbio_version'] = path_to_bcbio.split('/')[-2]
        if sample_yaml['genome_build'] == 'hg38':
            self.params['genome_version'] = 'GRCh38 (with alt, decoy and HLA sequences)'

    def read_metrics_csv(self, metrics_csv):
        samples_to_info={}
        with open(metrics_csv) as open_metrics:
            reader = csv.DictReader(open_metrics, delimiter='\t', quoting=csv.QUOTE_NONE)
            for row in reader:
                samples_to_info[row['Sample Id']] = row
        return samples_to_info

    def get_sample_param(self):
        self.fill_sample_names_from_lims()
        project_size = 0
        library_workflows=set()
        species = set()
        for sample in self.samples:
            library_workflow = self.get_library_workflow_from_sample(sample)
            library_workflows.add(library_workflow)
            species.add(self.get_species_from_sample(sample))
        if len(library_workflows) == 1 :
            self.library_workflow = library_workflows.pop()
        else:
            app_logger.error('More than one workfkow used in project %s: %s'%(self.project_name, ', '.join(library_workflows)))

        if len(species) == 1 :
            self.species = species.pop()
        else:
            app_logger.error('More than one species used in project %s: %s'%(self.project_name, ', '.join(species)))


        if self.library_workflow in ['TruSeq Nano DNA Sample Prep', None] :
            self.template = 'truseq_nano_template'
        elif self.library_workflow in ['TruSeq PCR-Free DNA Sample Prep', 'TruSeq PCR-Free Sample Prep'] :
            self.template = 'truseq_pcrfree_template'
        else:
            app_logger.error('Unknown library workflow %s for project %s'%(self.library_workflow, self.project_name))
            return None

        if self.species == 'Human':
            self.template += '.html'
        else:
            self.template += '_non_human.html'

        self.params['adapter1'] = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA"
        self.params['adapter2'] = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"

        project_size = getFolderSize(self.project_delivery)
        for sample in set(self.modified_samples):
            sample_source=os.path.join(self.project_source, sample)
            if os.path.exists(sample_source):
                program_csv = os.path.join(sample_source, 'programs.txt')
                if not os.path.exists(program_csv):
                    program_csv = os.path.join(sample_source, '.qc', 'programs.txt')
                self.parse_program_csv(program_csv)
                summary_yaml = os.path.join(sample_source, 'project-summary.yaml')
                if not os.path.exists(summary_yaml):
                    summary_yaml = os.path.join(sample_source, '.qc', 'project-summary.yaml')
                if os.path.exists(summary_yaml):
                    self.parse_project_summary_yaml(summary_yaml)

        self.results['project_size']=['Total folder size:','%.2fTb'%(project_size/1000000000000.0)]
        self.results['nb_sample']=['Number of sample:', len(self.samples)]
        self.results['nb_sample_delivered']=['Number of sample delivered:',len(self.samples_delivered)]
        yields = [float(self.samples_delivered[s]['Yield']) for s in self.samples_delivered]
        self.results['yield']=['Total yield Gb:','%.2f'%sum(yields)]
        self.results['mean_yield']=['Average yield Gb:','%.1f'%(sum(yields)/max(len(yields), 1))]

        try:
            coverage = [float(self.samples_delivered[s]['Mean coverage']) for s in self.samples_delivered]
            self.results['coverage']=['Average coverage per samples:','%.2f'%(sum(coverage)/max(len(coverage), 1))]
            self.results_order=['nb_sample','nb_sample_delivered', 'yield', 'mean_yield', 'coverage', 'project_size']
        except KeyError:
            self.results_order=['nb_sample','nb_sample_delivered', 'yield', 'mean_yield', 'project_size']



    def generate_report(self):
        template_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'templates'))
        env = Environment(loader=FileSystemLoader(template_dir))
        template = env.get_template(self.template)
        output = template.render(results_order=self.results_order, results=self.results,
                                 project_info=self.project_info, project_order=self.project_order,
                                 **self.params)
        pdf = get_pdf(output)
        project_file = os.path.join(self.project_delivery, 'project_%s_report.pdf'%self.project_name)
        with open(project_file, 'w') as open_pdf:
            open_pdf.write(pdf.getvalue())
Exemplo n.º 7
0
Arquivo: api.py Projeto: espre05/lims
class LimsAPI(object):

    """docstring for LimsAPI"""

    def __init__(self, lims=None):
        super(LimsAPI, self).__init__()
        self.lims = lims

    def init_app(self, app):
        """Connect with credentials from Flask app config."""
        self.connect(**app.config['LIMS_CONFIG'])

    def connect(self, baseuri, username, password):
        """Connect to the LIMS instance."""
        self.lims = Lims(baseuri, username, password)

    def sample(self, lims_id):
        """Get a sample from the LIMS."""
        logger.debug('fetch sample from LIMS')
        sample_obj = Sample(self.lims, id=lims_id)

        try:
            sample_json = transform_entry(sample_obj)
        except KeyError as error:
            message = "missing UDF: {}".format(error.message)
            logger.warn(message)
            raise MissingLimsDataException(message)
        except HTTPError as error:
            logger.warn('unknown lims id')
            raise error

        return sample_json

    def samples(self, project_id=None, case=None, sample_ids=None, limit=20,
                **kwargs):
        if project_id:
            sample_objs = self.lims.get_samples(projectname=project_id)
        elif case:
            sample_objs = self.lims.get_samples(udf={'customer': case[0],
                                                     'familyID': case[1]})
        else:
            sample_objs = self.lims.get_samples(**kwargs)

        sample_dicts = []
        for index, sample in enumerate(sample_objs):
            if index < limit:
                sample_dicts.append(transform_entry(sample))
            else:
                break

        analysis_types = set(sample['analysis_type'] for sample in
                             sample_dicts)
        case_data = {
            'analysis_types': list(analysis_types),
            'samples': sample_dicts
        }
        return case_data

    def cases(self):
        """Return a list of cases from the database."""
        samples = ((sample.udf['customer'], sample.udf['familyID'])
                   for sample in self.lims.get_samples()
                   if 'familyID' in sample.udf and 'customer' in sample.udf)
        return samples
Exemplo n.º 8
0
class MultiQC_clarity_metadata(BaseMultiqcModule):
    def __init__(self):

        self.log = logging.getLogger('multiqc')

        # Check that this plugin hasn't been disabled
        if config.kwargs.get('disable_clarity', False) is True:
            self.log.info(
                "Skipping MultiQC_Clarity as disabled on command line")
            return None
        if getattr(config, 'disable_clarity', False) is True:
            self.log.debug(
                "Skipping MultiQC_Clarity as specified in config file")
            return None

        super(MultiQC_clarity_metadata, self).__init__(
            name='Clarity',
            anchor='clarity',
            href='https://github.com/Galithil/MultiQC_Clarity',
            info="fetches data from your Basespace Clarity LIMS instance.")

        self.lims = Lims(BASEURI, USERNAME, PASSWORD)
        self.metadata = {}
        self.header_metadata = {}
        self.general_metadata = {}
        self.tab_metadata = {}
        self.samples = []
        self.sections = []

        self.schema = getattr(config, 'clarity', None)
        if self.schema is None:
            self.log.warn("No config found for MultiQC_Clarity")
            return None

        self.get_samples()
        self.get_metadata('Header')
        self.get_metadata('General Statistics')
        self.get_metadata('Clarity Tab')
        self.update_multiqc_report()
        self.make_sections()
        report.modules_output.append(self)

    def get_samples(self):
        if config.kwargs.get('clarity_project_name'):
            pj = self.lims.get_projects(
                name=config.kwargs['clarity_project_name'])
            self.samples = pj.samples
        else:
            names = set()
            for x in report.general_stats_data:
                names.update(x.keys())
            for d in report.saved_raw_data.values():
                try:
                    self.names.update(d.keys())
                except AttributeError:
                    pass
            if not config.kwargs.get('clarity_skip_edit_names'):
                names = self.edit_names(names)

            self.log.debug("Looking into Clarity for samples {}".format(
                ", ".join(names)))
            found = 0
            try:
                for name in names:
                    matching_samples = self.lims.get_samples(name=name)
                    if not matching_samples:
                        self.log.error(
                            "Could not find a sample matching {0}, skipping.".
                            format(name))
                        continue
                    if len(matching_samples) > 1:
                        self.log.error(
                            "Found multiple samples matching {0}, skipping".
                            format(name))
                        continue
                    found += 1
                    self.samples.append(matching_samples[0])
            except Exception as e:
                self.log.warn(
                    "Could not connect to Clarity LIMS: {}".format(e))
                return None
        self.log.info("Found {} out of {} samples in LIMS.".format(
            found, len(names)))

    def edit_names(self, names):
        edited = []
        for name in names:
            if name.endswith("_1") or name.endswith("_2"):
                edited.append(name[:-2])
            elif name.endswith("_R1") or name.endswith("_R2"):
                edited.append(name[:-3])
            else:
                edited.append(name)

        return edited

    def flatten_metadata(self, metadata):
        for first_level in metadata:
            for second_level in metadata[first_level]:
                if isinstance(metadata[first_level][second_level],
                              set) or isinstance(
                                  metadata[first_level][second_level], list):
                    metadata[first_level][second_level] = ", ".join(
                        metadata[first_level][second_level])

        return metadata

    def get_project_metadata(self, udfs):
        project_metadata = {}
        for sample in self.samples:
            project_metadata[sample.project.name] = {}
            for udf in udfs:
                if udf in sample.project.udf:
                    try:
                        project_metadata[sample.project.name][udf].add(
                            str(sample.project.udf[udf]))
                    except:
                        project_metadata[sample.project.name][udf] = set()
                        project_metadata[sample.project.name][udf].add(
                            str(sample.project.udf[udf]))

        return self.flatten_metadata(project_metadata)

    def get_sample_metadata(self, udfs):
        sample_metadata = {}
        for sample in self.samples:
            sample_metadata[sample.name] = {}
            for udf in udfs:
                if udf in sample.udf:
                    try:
                        sample_metadata[sample.name][udf].add(
                            str(sample.udf[udf]))
                    except:
                        sample_metadata[sample.name][udf] = set()
                        sample_metadata[sample.name][udf].add(
                            str(sample.udf[udf]))

        return self.flatten_metadata(sample_metadata)

    def get_metadata(self, part):
        for key in self.schema[part]:
            if key == 'Project':
                metadata = self.get_project_metadata(
                    self.schema[part]['Project'])
            elif key == 'Sample':
                metadata = self.get_sample_metadata(
                    self.schema[part]['Sample'])
            else:
                metadata = self.get_artifact_metadata(self.schema[part])

            if part == "Header":
                self.header_metadata.update(metadata)
            elif part == "General Statistics":
                self.general_metadata.update(metadata)
            else:
                self.tab_metadata.update(metadata)

    def get_artifact_metadata(self, pt_to_udfs):
        artifact_metadata = {}
        for sample in self.samples:
            artifact_metadata[sample.name] = {}
            for process_type in pt_to_udfs:
                if process_type == 'Sample':
                    continue
                if process_type == 'Project':
                    continue
                artifacts = self.lims.get_artifacts(sample_name=sample.name,
                                                    process_type=process_type)
                for udf_name in pt_to_udfs[process_type].get("outputs", []):
                    values = []
                    for artifact in artifacts:
                        if udf_name in artifact.udf:
                            values.append(str(artifact.udf[udf_name]))

                    artifact_metadata[sample.name][udf_name] = values

                processes = set([art.parent_process for art in artifacts])
                inputs = []
                for p in processes:
                    inputs.extend([
                        art for art in p.all_inputs()
                        if sample.name in [s.name for s in art.samples]
                    ])
                for udf_name in pt_to_udfs[process_type].get("inputs", []):
                    values = []
                    for artifact in inputs:
                        if udf_name in artifact.udf:
                            values.append(str(artifact.udf[udf_name]))

                    artifact_metadata[sample.name][udf_name] = values

        return self.flatten_metadata(artifact_metadata)

    def update_multiqc_report(self):
        if config.report_header_info is None:
            config.report_header_info = []
        for first_level in self.header_metadata:
            d = {}
            for key in self.header_metadata[first_level]:
                d[key] = self.header_metadata[first_level][key]
            config.report_header_info.append(d)

        headers = {}
        for first_level in self.schema["General Statistics"]:
            for header in self.schema["General Statistics"][first_level]:
                headers[header] = {
                    'description': first_level,
                    'namespace': 'Clarity',
                    'scale': 'YlGn'
                }

        report.general_stats_headers.append(headers)
        report.general_stats_data.append(self.general_metadata)

    def make_sections(self):
        headers = OrderedDict()
        for first_level in self.tab_metadata:
            for header in self.tab_metadata[first_level]:
                desc = header
                if header not in headers:
                    for key in self.schema['Clarity Tab']:
                        if header in self.schema['Clarity Tab'][key]:
                            desc = key
                        elif isinstance(self.schema['Clarity Tab'][key], dict):
                            for subkey in self.schema['Clarity Tab'][key]:
                                if header in self.schema['Clarity Tab'][key][
                                        subkey]:
                                    desc = key

                    headers[header] = {
                        'namespace': desc,
                        'title': header,
                        'description': header
                    }
        self.sections.append({
            'name':
            'Clarity Data',
            'anchor':
            'clarity_data',
            'content':
            '<p> Data obtained from Illumina Basespace Clarity LIMS.</p>' +
            table.plot(self.tab_metadata, headers)
        })
Exemplo n.º 9
0
class MultiQC_clarity_metadata(BaseMultiqcModule):
    def __init__(self):

        self.log = logging.getLogger('multiqc')

        # Check that this plugin hasn't been disabled
        if config.kwargs.get('disable_clarity', False) is True:
            self.log.info(
                "Skipping MultiQC_Clarity as disabled on command line")
            return
        if getattr(config, 'disable_clarity', False) is True:
            self.log.debug(
                "Skipping MultiQC_Clarity as specified in config file")
            return

        super(MultiQC_clarity_metadata, self).__init__(name='Clarity LIMS',
                                                       anchor='clarity')

        self.intro = '''<p>The <a href="https://github.com/MultiQC/MultiQC_Clarity" target="_blank">MultiQC_Clarity</a>
            plugin fetches data from a specified
            <a href="https://www.genologics.com/clarity-lims/" target="_blank">Basespace Clarity LIMS</a> instance.</p>'''

        try:
            from genologics.lims import Lims
            from genologics import config as genologics_config
        except:
            self.log.warning("Importing genologics failed: " +
                             traceback.format_exc())
            return

        try:
            BASEURI, USERNAME, PASSWORD, VERSION, MAIN_LOG = genologics_config.load_config(
                specified_config=config.kwargs.get('clarity_config'))
        except SystemExit:
            self.log.warning(
                "Genologics config file is not specified as --clarity_config or in ~/.genologicsrc. "
                "Skip running Clarity module")
            return

        self.lims = Lims(BASEURI, USERNAME, PASSWORD)
        self.metadata = {}
        self.header_metadata = {}
        self.general_metadata = {}
        self.tab_metadata = {}
        self.samples = []

        self.schema = getattr(config, 'clarity', None)
        if self.schema is None:
            self.log.debug("No config found for MultiQC_Clarity")
            return
        try:
            self.get_samples()
            if 'report_header_info' in self.schema:
                self.get_metadata('report_header_info')
            if 'general_stats' in self.schema:
                self.get_metadata('general_stats')
            if 'clarity_module' in self.schema:
                self.get_metadata('clarity_module')
            self.update_multiqc_report()
            self.make_sections()
            report.modules_output.append(self)
        except:
            self.log.error("MultiQC_Clarity failed: " + traceback.format_exc())
            return

    def csv_file_from_samplesheet(self, sample_sheet):
        csv_lines = []
        with open(sample_sheet) as f:
            found_data = False
            for line in f:
                if found_data:
                    csv_lines.append(line.strip())
                else:
                    if line.strip().startswith('[Data]'):
                        found_data = True
        return csv_lines

    def get_raw_sample_names(self, csv_fpath, names):
        raw_sample_names = dict()
        with open(csv_fpath) as f:
            csv_reader = csv.DictReader(f)
            name_col = csv_reader.fieldnames[0]
            for r in csv_reader:
                correct_name = r['description'] if 'description' in r else r[
                    name_col]
                if correct_name not in names:
                    continue
                raw_sample_names[correct_name] = r[name_col]
        return raw_sample_names

    def correct_sample_name(self, name):
        import re
        name = re.sub(r'_S\d+$', '', name)
        return name.replace('.', '_')

    def search_by_samplesheet(self, names):
        sample_sheet_fpath = config.kwargs['samplesheet']
        samples_by_container = defaultdict(dict)
        raw_names = dict((name, name) for name in names)
        if config.kwargs.get('bcbio_csv') and isfile(
                config.kwargs.get('bcbio_csv')):
            raw_names = self.get_raw_sample_names(config.kwargs['bcbio_csv'],
                                                  names)

        correct_sample_names = dict(
            (self.correct_sample_name(raw_names[name]), name)
            for name in names)
        for row in csv.DictReader(
                self.csv_file_from_samplesheet(sample_sheet_fpath),
                delimiter=','):
            sample_name = row['SampleName'] if 'SampleName' in row else (
                row['Sample_Name']
                if 'Sample_Name' in row else row['SampleRef'])
            sample_id = row['SampleID'] if 'SampleID' in row else row[
                'Sample_ID']
            sample_artifacts = self.lims.get_artifacts(samplelimsid=sample_id)
            if sample_artifacts:
                sample = sample_artifacts[0].samples[0]
                sample.name = correct_sample_names[sample_name]
                self.samples.append(sample)
            elif sample_name and sample_name in correct_sample_names.keys():
                try:
                    container, sample_well = row['SamplePlate'], row[
                        'SampleWell'].replace('_', ':')
                    samples_by_container[container][sample_well] = sample_name
                except:
                    pass

        for container_id, samples in samples_by_container.items():
            artifacts = self.lims.get_artifacts(containerlimsid=container_id)
            if not artifacts:
                continue
            placements = artifacts[0].container.get_placements()
            for well, sample_name in samples.items():
                sample = placements[well].samples[0]
                sample.name = correct_sample_names[sample_name]
                self.samples.append(sample)

    def get_samples(self):
        if config.kwargs.get('clarity_project_name'):
            pj = self.lims.get_projects(
                name=config.kwargs['clarity_project_name'])
            self.samples = pj.samples
            self.log.info("Found {} in LIMS.".format(
                config.kwargs['clarity_project_name']))
        else:
            names = set()
            for x in report.general_stats_data:
                names.update(x.keys())
            for d in report.saved_raw_data.values():
                try:
                    self.names.update(d.keys())
                except AttributeError:
                    pass
            # if not config.kwargs.get('clarity_skip_edit_names'):
            #    names = self.edit_names(names)

            self.log.debug("Looking into Clarity for samples {}".format(
                ", ".join(names)))
            if config.kwargs.get('samplesheet'):
                self.search_by_samplesheet(names)
            if not self.samples:
                try:
                    for name in names:
                        matching_samples = self.lims.get_samples(name=name)
                        if not matching_samples:
                            self.log.error(
                                "Could not find a sample matching {0}, skipping."
                                .format(name))
                            continue
                        if len(matching_samples) > 1:
                            self.log.error(
                                "Found multiple samples matching {0}, skipping"
                                .format(name))
                            continue
                        self.samples.append(matching_samples[0])
                except Exception as e:
                    self.log.warn(
                        "Could not connect to Clarity LIMS: {}".format(e))
                    return None
            self.log.info("Found {} out of {} samples in LIMS.".format(
                len(self.samples), len(names)))

    def edit_names(self, names):
        edited = []
        for name in names:
            if name.endswith("_1") or name.endswith("_2"):
                edited.append(name[:-2])
            elif name.endswith("_R1") or name.endswith("_R2"):
                edited.append(name[:-3])
            else:
                edited.append(name)

        return edited

    def flatten_metadata(self, metadata):
        for first_level in metadata:
            for second_level in metadata[first_level]:
                if isinstance(metadata[first_level][second_level],
                              set) or isinstance(
                                  metadata[first_level][second_level], list):
                    metadata[first_level][second_level] = ", ".join(
                        metadata[first_level][second_level])

        return metadata

    def get_project_metadata(self, udfs):
        project_metadata = {}
        for sample in self.samples:
            project_metadata[sample.project.name] = {}
            for udf in udfs:
                if udf in sample.project.udf:
                    try:
                        project_metadata[sample.project.name][udf].add(
                            str(sample.project.udf[udf]))
                    except:
                        project_metadata[sample.project.name][udf] = set()
                        project_metadata[sample.project.name][udf].add(
                            str(sample.project.udf[udf]))

        return self.flatten_metadata(project_metadata)

    def get_sample_metadata(self, udfs):
        sample_metadata = {}
        report.lims_col = 'sample type'
        for sample in self.samples:
            sample_metadata[sample.name] = dict()
            for udf in udfs:
                if udf in sample.udf:
                    try:
                        sample_metadata[sample.name][udf].add(
                            str(sample.udf[udf]))
                    except:
                        sample_metadata[sample.name][udf] = set()
                        sample_metadata[sample.name][udf].add(
                            str(sample.udf[udf]))
            sample_type = None
            if 'Sample Tissue' in sample_metadata[sample.name]:
                sample_type = sample_metadata[sample.name].pop('Sample Tissue')
            elif 'Sample Type' in sample_metadata[sample.name]:
                sample_type = sample_metadata[sample.name].pop('Sample Type')
            sample_link = join(self.lims.baseuri, 'clarity',
                               'search?scope=Sample&query=' + sample.id)
            if sample_type:
                sample_metadata[sample.name][
                    'Sample Type'] = '<a href="' + sample_link + '" target="_blank">' + sample_type.pop(
                    ) + '</a>'
                report.lims_added = True
            elif 'Sample Conc.' in sample_metadata[sample.name]:
                sample_metadata[sample.name]['Sample Conc.'] = '<a href="' + sample_link + '" target="_blank">' + \
                                                               sample_metadata[sample.name]['Sample Conc.'].pop() + '</a>'
                report.lims_added = True
        if not any([
                'Sample Type' in sample_metadata[sample.name]
                for sample in self.samples
        ]):
            report.lims_col = 'sample conc'
        elif not all([
                'Sample Type' in sample_metadata[sample.name]
                for sample in self.samples
        ]):
            report.lims_col = 'sample type or sample conc'
        return self.flatten_metadata(sample_metadata)

    def get_metadata(self, part):
        for key in self.schema[part]:
            if key == 'Project':
                metadata = self.get_project_metadata(
                    self.schema[part]['Project'])
            elif key == 'Sample':
                metadata = self.get_sample_metadata(
                    self.schema[part]['Sample'])
            else:
                metadata = self.get_artifact_metadata(self.schema[part])

            if part == "report_header_info":
                self.header_metadata.update(metadata)
            elif part == "general_stats":
                self.general_metadata.update(metadata)
            else:
                self.tab_metadata.update(metadata)

    def get_artifact_metadata(self, pt_to_udfs):
        artifact_metadata = {}
        for sample in self.samples:
            artifact_metadata[sample.name] = {}
            for process_type in pt_to_udfs:
                if process_type == 'Sample':
                    continue
                if process_type == 'Project':
                    continue
                artifacts = self.lims.get_artifacts(sample_name=sample.name,
                                                    process_type=process_type)
                for udf_name in pt_to_udfs[process_type].get("outputs", []):
                    values = []
                    for artifact in artifacts:
                        if udf_name in artifact.udf:
                            values.append(str(artifact.udf[udf_name]))

                    artifact_metadata[sample.name][udf_name] = values

                processes = set([art.parent_process for art in artifacts])
                inputs = []
                for p in processes:
                    inputs.extend([
                        art for art in p.all_inputs()
                        if sample.name in [s.name for s in art.samples]
                    ])
                for udf_name in pt_to_udfs[process_type].get("inputs", []):
                    values = []
                    for artifact in inputs:
                        if udf_name in artifact.udf:
                            values.append(str(artifact.udf[udf_name]))

                    artifact_metadata[sample.name][udf_name] = values

        return self.flatten_metadata(artifact_metadata)

    def update_multiqc_report(self):
        if config.report_header_info is None:
            config.report_header_info = []
        for first_level in self.header_metadata:
            d = {}
            for key in self.header_metadata[first_level]:
                d[key] = self.header_metadata[first_level][key]
            config.report_header_info.append(d)

        headers = {}
        for first_level in self.schema["general_stats"]:
            for header in self.schema["general_stats"][first_level]:
                headers[header] = {}
                if isinstance(
                        self.schema["general_stats"][first_level][header],
                        dict):
                    for subsubkey, cfg in self.schema["general_stats"][
                            first_level][header].items():
                        if subsubkey == 'multiply_by':
                            mby = str(cfg)[:]
                            headers[header]['modify'] = lambda x: float(
                                x) * float(mby)
                        else:
                            headers[header][subsubkey] = cfg
                headers[header]['description'] = headers[header].get(
                    'description', '{} - {}'.format(first_level, header))
                headers[header]['namespace'] = headers[header].get(
                    'namespace', 'Clarity LIMS')
                headers[header]['scale'] = headers[header].get('scale', 'YlGn')

        report.general_stats_headers.append(headers)
        report.general_stats_data.append(self.general_metadata)

    def make_sections(self):
        headers = OrderedDict()
        for first_level in self.tab_metadata:
            for header in self.tab_metadata[first_level]:
                desc = header
                if header not in headers:
                    headers[header] = {}
                    for key in self.schema['clarity_module']:
                        if header in self.schema['clarity_module'][key]:
                            desc = key
                        elif isinstance(self.schema['clarity_module'][key],
                                        dict):
                            for subkey, val in self.schema['clarity_module'][
                                    key].items():
                                # print(val)
                                if val is None:
                                    break
                                elif header in val:
                                    desc = key
                                    if isinstance(val[header], dict):
                                        for subsubkey, cfg in val[
                                                header].items():
                                            if subsubkey == 'multiply_by':
                                                mby = str(cfg)[:]
                                                headers[header][
                                                    'modify'] = lambda x: float(
                                                        x) * float(mby)
                                            else:
                                                headers[header][
                                                    subsubkey] = cfg

                    headers[header]['namespace'] = headers[header].get(
                        'namespace', desc)
                    headers[header]['title'] = headers[header].get(
                        'title', header)
                    headers[header]['description'] = headers[header].get(
                        'description', header)

        self.intro += table.plot(self.tab_metadata, headers)
Exemplo n.º 10
0
Per Kraulis, Science for Life Laboratory, Stockholm, Sweden.
"""

from genologics.lims import Lims

# Login parameters for connecting to a LIMS instance.
# NOTE: Modify according to your setup.
from genologics.site_cloud import BASEURI, USERNAME, PASSWORD

# Create the LIMS interface instance, and check the connection and version.
lims = Lims(BASEURI, USERNAME, PASSWORD)
lims.check_version()

# Get the list of all samples.
samples = lims.get_samples()
print len(samples), 'samples in total'

# Get the list of samples in the project with the LIMS id KLL60.
project = lims.get_project('KLL60')
samples = lims.get_samples(projectlimsid=project.id)
print len(samples), 'samples in', project

print
# Get the sample with the LIMS id JGR58A21, and print info and its UDFs.
sample = lims.get_sample('JGR58A21')
print sample.id, sample.name, sample.date_received, sample.uri,
for key, value in sample.udf.items():
    print ' ', key, '=', value

# Get the sample with the name 'Joels proper sample-20'.