Exemplo n.º 1
0
def get_project(api: lims.Lims,
				project_name: str):
	project = api.get_projects(name=project_name)
	assert isinstance(project, list)
	print(len(project))
	assert len(project) == 2
	return project[0]
Exemplo n.º 2
0
def generate_output(project_id, dest_plate_list, best_sample_struct,total_lanes, req_lanes, lane_maps, rounded_ratios, 
                    target_clusters, clusters_per_lane, extra_lanes, lane_volume, pool_excess, final_pool_sizes, volume_ratios, desired_ratios):
    """"Gathers the container id and well name for all samples in project"""
    timestamp = datetime.fromtimestamp(time()).strftime('%Y-%m-%d_%H:%M')
    
    #Cred to Denis for providing a base epp
    location = dict()
    lims = Lims(BASEURI, USERNAME, PASSWORD)
    allProjects = lims.get_projects()
    for proj in allProjects:
        if proj.id == project_id:
            projName = proj.name 
            break
    
    #Sets up source id    
    #All normalization processes for project
    norms=['Library Normalization (MiSeq) 4.0', 'Library Normalization (Illumina SBS) 4.0','Library Normalization (HiSeq X) 1.0']
    pros=lims.get_processes(type=norms, projectname=projName)
    #For all processes
    for p in pros:
        #For all artifacts in process
        for o in p.all_outputs():
            #If artifact is analyte type and has project name in sample
            if o.type=="Analyte" and project_id in o.name:
                location[o.name.split()[0]] = list()
                location[o.name.split()[0]].append(o.location[0].id)
                location[o.name.split()[0]].append(o.location[1])
    
    #Continue coding from here
    generate_summary(projName, best_sample_struct, timestamp, project_id, dest_plate_list, total_lanes, req_lanes, 
                     lane_maps, rounded_ratios, target_clusters, clusters_per_lane, extra_lanes, volume_ratios, desired_ratios, lane_volume, pool_excess)
    generate_csv(projName, timestamp, location, dest_plate_list, total_lanes, best_sample_struct, rounded_ratios, lane_volume, pool_excess, final_pool_sizes)
    generate_dumpfile(projName, timestamp, location, dest_plate_list, total_lanes, best_sample_struct, rounded_ratios, lane_volume, pool_excess, final_pool_sizes)
class MultiQC_clarity_metadata(BaseMultiqcModule):
    def __init__(self):

        self.log = logging.getLogger('multiqc')

        # Check that this plugin hasn't been disabled
        if config.kwargs.get('disable_clarity', False) is True:
            self.log.info(
                "Skipping MultiQC_Clarity as disabled on command line")
            return None
        if getattr(config, 'disable_clarity', False) is True:
            self.log.debug(
                "Skipping MultiQC_Clarity as specified in config file")
            return None

        super(MultiQC_clarity_metadata, self).__init__(name='Clarity LIMS',
                                                       anchor='clarity')

        self.intro = '''<p>The <a href="https://github.com/MultiQC/MultiQC_Clarity" target="_blank">MultiQC_Clarity</a>
            plugin fetches data from a specified
            <a href="https://www.genologics.com/clarity-lims/" target="_blank">Basespace Clarity LIMS</a> instance.</p>'''

        self.lims = Lims(BASEURI, USERNAME, PASSWORD)
        self.metadata = {}
        self.header_metadata = {}
        self.general_metadata = {}
        self.tab_metadata = {}
        self.samples = []

        self.schema = getattr(config, 'clarity', None)
        if self.schema is None:
            self.log.debug("No config found for MultiQC_Clarity")
            return None

        self.get_samples()
        self.get_metadata('report_header_info')
        self.get_metadata('general_stats')
        self.get_metadata('clarity_module')
        self.update_multiqc_report()
        self.make_sections()
        report.modules_output.append(self)

    def get_samples(self):
        if config.kwargs.get('clarity_project_name'):
            pj = self.lims.get_projects(
                name=config.kwargs['clarity_project_name'])
            self.samples = pj.samples
        else:
            names = set()
            for x in report.general_stats_data:
                names.update(x.keys())
            for d in report.saved_raw_data.values():
                try:
                    self.names.update(d.keys())
                except AttributeError:
                    pass
            if not config.kwargs.get('clarity_skip_edit_names'):
                names = self.edit_names(names)

            self.log.debug("Looking into Clarity for samples {}".format(
                ", ".join(names)))
            found = 0
            try:
                for name in names:
                    matching_samples = self.lims.get_samples(name=name)
                    if not matching_samples:
                        self.log.error(
                            "Could not find a sample matching {0}, skipping.".
                            format(name))
                        continue
                    if len(matching_samples) > 1:
                        self.log.error(
                            "Found multiple samples matching {0}, skipping".
                            format(name))
                        continue
                    found += 1
                    self.samples.append(matching_samples[0])
            except Exception as e:
                self.log.warn(
                    "Could not connect to Clarity LIMS: {}".format(e))
                return None
        self.log.info("Found {} out of {} samples in LIMS.".format(
            found, len(names)))

    def edit_names(self, names):
        edited = []
        for name in names:
            if name.endswith("_1") or name.endswith("_2"):
                edited.append(name[:-2])
            elif name.endswith("_R1") or name.endswith("_R2"):
                edited.append(name[:-3])
            else:
                edited.append(name)

        return edited

    def flatten_metadata(self, metadata):
        for first_level in metadata:
            for second_level in metadata[first_level]:
                if isinstance(metadata[first_level][second_level],
                              set) or isinstance(
                                  metadata[first_level][second_level], list):
                    metadata[first_level][second_level] = ", ".join(
                        metadata[first_level][second_level])

        return metadata

    def get_project_metadata(self, udfs):
        project_metadata = {}
        for sample in self.samples:
            project_metadata[sample.project.name] = {}
            for udf in udfs:
                if udf in sample.project.udf:
                    try:
                        project_metadata[sample.project.name][udf].add(
                            str(sample.project.udf[udf]))
                    except:
                        project_metadata[sample.project.name][udf] = set()
                        project_metadata[sample.project.name][udf].add(
                            str(sample.project.udf[udf]))

        return self.flatten_metadata(project_metadata)

    def get_sample_metadata(self, udfs):
        sample_metadata = {}
        for sample in self.samples:
            sample_metadata[sample.name] = {}
            for udf in udfs:
                if udf in sample.udf:
                    try:
                        sample_metadata[sample.name][udf].add(
                            str(sample.udf[udf]))
                    except:
                        sample_metadata[sample.name][udf] = set()
                        sample_metadata[sample.name][udf].add(
                            str(sample.udf[udf]))

        return self.flatten_metadata(sample_metadata)

    def get_metadata(self, part):
        for key in self.schema[part]:
            if key == 'Project':
                metadata = self.get_project_metadata(
                    self.schema[part]['Project'])
            elif key == 'Sample':
                metadata = self.get_sample_metadata(
                    self.schema[part]['Sample'])
            else:
                metadata = self.get_artifact_metadata(self.schema[part])

            if part == "report_header_info":
                self.header_metadata.update(metadata)
            elif part == "general_stats":
                self.general_metadata.update(metadata)
            else:
                self.tab_metadata.update(metadata)

    def get_artifact_metadata(self, pt_to_udfs):
        artifact_metadata = {}
        for sample in self.samples:
            artifact_metadata[sample.name] = {}
            for process_type in pt_to_udfs:
                if process_type == 'Sample':
                    continue
                if process_type == 'Project':
                    continue
                artifacts = self.lims.get_artifacts(sample_name=sample.name,
                                                    process_type=process_type)
                for udf_name in pt_to_udfs[process_type].get("outputs", []):
                    values = []
                    for artifact in artifacts:
                        if udf_name in artifact.udf:
                            values.append(str(artifact.udf[udf_name]))

                    artifact_metadata[sample.name][udf_name] = values

                processes = set([art.parent_process for art in artifacts])
                inputs = []
                for p in processes:
                    inputs.extend([
                        art for art in p.all_inputs()
                        if sample.name in [s.name for s in art.samples]
                    ])
                for udf_name in pt_to_udfs[process_type].get("inputs", []):
                    values = []
                    for artifact in inputs:
                        if udf_name in artifact.udf:
                            values.append(str(artifact.udf[udf_name]))

                    artifact_metadata[sample.name][udf_name] = values

        return self.flatten_metadata(artifact_metadata)

    def update_multiqc_report(self):
        if config.report_header_info is None:
            config.report_header_info = []
        for first_level in self.header_metadata:
            d = {}
            for key in self.header_metadata[first_level]:
                d[key] = self.header_metadata[first_level][key]
            config.report_header_info.append(d)

        headers = {}
        for first_level in self.schema["general_stats"]:
            for header in self.schema["general_stats"][first_level]:
                headers[header] = {}
                if isinstance(
                        self.schema["general_stats"][first_level][header],
                        dict):
                    for subsubkey, cfg in self.schema["general_stats"][
                            first_level][header].items():
                        if subsubkey == 'multiply_by':
                            mby = str(cfg)[:]
                            headers[header]['modify'] = lambda x: float(
                                x) * float(mby)
                        else:
                            headers[header][subsubkey] = cfg
                headers[header]['description'] = headers[header].get(
                    'description', '{} - {}'.format(first_level, header))
                headers[header]['namespace'] = headers[header].get(
                    'namespace', 'Clarity LIMS')
                headers[header]['scale'] = headers[header].get('scale', 'YlGn')

        report.general_stats_headers.append(headers)
        report.general_stats_data.append(self.general_metadata)

    def make_sections(self):
        headers = OrderedDict()
        for first_level in self.tab_metadata:
            for header in self.tab_metadata[first_level]:
                desc = header
                if header not in headers:
                    headers[header] = {}
                    for key in self.schema['clarity_module']:
                        if header in self.schema['clarity_module'][key]:
                            desc = key
                        elif isinstance(self.schema['clarity_module'][key],
                                        dict):
                            for subkey, val in self.schema['clarity_module'][
                                    key].items():
                                # print(val)
                                if val is None:
                                    break
                                elif header in val:
                                    desc = key
                                    if isinstance(val[header], dict):
                                        for subsubkey, cfg in val[
                                                header].items():
                                            if subsubkey == 'multiply_by':
                                                mby = str(cfg)[:]
                                                headers[header][
                                                    'modify'] = lambda x: float(
                                                        x) * float(mby)
                                            else:
                                                headers[header][
                                                    subsubkey] = cfg

                    headers[header]['namespace'] = headers[header].get(
                        'namespace', desc)
                    headers[header]['title'] = headers[header].get(
                        'title', header)
                    headers[header]['description'] = headers[header].get(
                        'description', header)

        self.intro += table.plot(self.tab_metadata, headers)
class ProjectReport:

    def __init__(self, project_name):
        self.project_name = project_name
        self.project_source = os.path.join(cfg.query('sample','delivery_source'), project_name)
        self.project_delivery = os.path.join(cfg.query('sample','delivery_dest'), project_name)
        self.lims=Lims(**cfg.get('clarity'))
        self.params = {'project_name':project_name}
        self.results = {}
        self.fill_sample_names_from_lims()
        self.samples_delivered = self.read_metrics_csv(os.path.join(self.project_delivery, 'summary_metrics.csv'))
        self.get_sample_param()
        self.fill_project_information_from_lims()

    def fill_project_information_from_lims(self):
        project = self.lims.get_projects(name=self.project_name)[0]
        self.project_info = {}
        self.project_info['project_name']=['Project name:',self.project_name]
        self.project_info['project_title']=['Project title:', project.udf.get('Project Title', '')]
        self.project_info['enquiry'] = ['Enquiry no:', project.udf.get('Enquiry Number', '')]
        self.project_info['quote'] = ['Quote no:', project.udf.get('Quote No.', '')]
        self.project_info['researcher'] = ['Researcher:','%s %s (%s)'%(project.researcher.first_name,
                                                                       project.researcher.last_name,
                                                                       project.researcher.email)]
        self.project_order = ['project_name', 'project_title', 'enquiry', 'quote', 'researcher']


    def fill_sample_names_from_lims(self):
        samples = self.lims.get_samples(projectname=self.project_name)
        self.samples = [s.name for s in samples]
        self.modified_samples = [re.sub(r'[: ]','_', s.name) for s in samples]


    def get_library_workflow_from_sample(self, sample_name):
        samples = self.lims.get_samples(projectname=self.project_name, name=sample_name)
        if len(samples) == 1:
            return samples[0].udf.get('Prep Workflow')
        else:
            app_logger.error('%s samples found for sample name %s'%sample_name)

    def get_species_from_sample(self, sample_name):
        samples = self.lims.get_samples(projectname=self.project_name, name=sample_name)
        if len(samples) == 1:
            s = samples[0].udf.get('Species')
            return species_alias.get(s, s)
        else:
            app_logger.error('%s samples found for sample name %s'%sample_name)

    def parse_program_csv(self, program_csv):
        all_programs = {}
        if os.path.exists(program_csv):
            with open(program_csv) as open_prog:
                for row in csv.reader(open_prog):
                    all_programs[row[0]]=row[1]
        #TODO: change the hardcoded version of bcl2fastq
        all_programs['bcl2fastq'] = '2.17.1.14'
        for p in ['bcl2fastq','bcbio', 'bwa', 'gatk', 'samblaster']:
            if p in all_programs:
                self.params[p + '_version']=all_programs.get(p)
        

    def parse_project_summary_yaml(self, summary_yaml):
        with open(summary_yaml, 'r') as open_file:
            full_yaml = yaml.safe_load(open_file)
        sample_yaml=full_yaml['samples'][0]
        path_to_bcbio = os.path.basename(os.path.dirname(sample_yaml['dirs']['galaxy']))
        self.params['bcbio_version'] = path_to_bcbio.split('/')[-2]
        if sample_yaml['genome_build'] == 'hg38':
            self.params['genome_version'] = 'GRCh38 (with alt, decoy and HLA sequences)'

    def read_metrics_csv(self, metrics_csv):
        samples_to_info={}
        with open(metrics_csv) as open_metrics:
            reader = csv.DictReader(open_metrics, delimiter='\t', quoting=csv.QUOTE_NONE)
            for row in reader:
                samples_to_info[row['Sample Id']] = row
        return samples_to_info

    def get_sample_param(self):
        self.fill_sample_names_from_lims()
        project_size = 0
        library_workflows=set()
        species = set()
        for sample in self.samples:
            library_workflow = self.get_library_workflow_from_sample(sample)
            library_workflows.add(library_workflow)
            species.add(self.get_species_from_sample(sample))
        if len(library_workflows) == 1 :
            self.library_workflow = library_workflows.pop()
        else:
            app_logger.error('More than one workfkow used in project %s: %s'%(self.project_name, ', '.join(library_workflows)))

        if len(species) == 1 :
            self.species = species.pop()
        else:
            app_logger.error('More than one species used in project %s: %s'%(self.project_name, ', '.join(species)))


        if self.library_workflow in ['TruSeq Nano DNA Sample Prep', None] :
            self.template = 'truseq_nano_template'
        elif self.library_workflow in ['TruSeq PCR-Free DNA Sample Prep', 'TruSeq PCR-Free Sample Prep'] :
            self.template = 'truseq_pcrfree_template'
        else:
            app_logger.error('Unknown library workflow %s for project %s'%(self.library_workflow, self.project_name))
            return None

        if self.species == 'Human':
            self.template += '.html'
        else:
            self.template += '_non_human.html'

        self.params['adapter1'] = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA"
        self.params['adapter2'] = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"

        project_size = getFolderSize(self.project_delivery)
        for sample in set(self.modified_samples):
            sample_source=os.path.join(self.project_source, sample)
            if os.path.exists(sample_source):
                program_csv = os.path.join(sample_source, 'programs.txt')
                if not os.path.exists(program_csv):
                    program_csv = os.path.join(sample_source, '.qc', 'programs.txt')
                self.parse_program_csv(program_csv)
                summary_yaml = os.path.join(sample_source, 'project-summary.yaml')
                if not os.path.exists(summary_yaml):
                    summary_yaml = os.path.join(sample_source, '.qc', 'project-summary.yaml')
                if os.path.exists(summary_yaml):
                    self.parse_project_summary_yaml(summary_yaml)

        self.results['project_size']=['Total folder size:','%.2fTb'%(project_size/1000000000000.0)]
        self.results['nb_sample']=['Number of sample:', len(self.samples)]
        self.results['nb_sample_delivered']=['Number of sample delivered:',len(self.samples_delivered)]
        yields = [float(self.samples_delivered[s]['Yield']) for s in self.samples_delivered]
        self.results['yield']=['Total yield Gb:','%.2f'%sum(yields)]
        self.results['mean_yield']=['Average yield Gb:','%.1f'%(sum(yields)/max(len(yields), 1))]

        try:
            coverage = [float(self.samples_delivered[s]['Mean coverage']) for s in self.samples_delivered]
            self.results['coverage']=['Average coverage per samples:','%.2f'%(sum(coverage)/max(len(coverage), 1))]
            self.results_order=['nb_sample','nb_sample_delivered', 'yield', 'mean_yield', 'coverage', 'project_size']
        except KeyError:
            self.results_order=['nb_sample','nb_sample_delivered', 'yield', 'mean_yield', 'project_size']



    def generate_report(self):
        template_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'templates'))
        env = Environment(loader=FileSystemLoader(template_dir))
        template = env.get_template(self.template)
        output = template.render(results_order=self.results_order, results=self.results,
                                 project_info=self.project_info, project_order=self.project_order,
                                 **self.params)
        pdf = get_pdf(output)
        project_file = os.path.join(self.project_delivery, 'project_%s_report.pdf'%self.project_name)
        with open(project_file, 'w') as open_pdf:
            open_pdf.write(pdf.getvalue())
Exemplo n.º 5
0
class MultiQC_clarity_metadata(BaseMultiqcModule):
    def __init__(self):

        self.log = logging.getLogger('multiqc')

        # Check that this plugin hasn't been disabled
        if config.kwargs.get('disable_clarity', False) is True:
            self.log.info(
                "Skipping MultiQC_Clarity as disabled on command line")
            return None
        if getattr(config, 'disable_clarity', False) is True:
            self.log.debug(
                "Skipping MultiQC_Clarity as specified in config file")
            return None

        super(MultiQC_clarity_metadata, self).__init__(
            name='Clarity',
            anchor='clarity',
            href='https://github.com/Galithil/MultiQC_Clarity',
            info="fetches data from your Basespace Clarity LIMS instance.")

        self.lims = Lims(BASEURI, USERNAME, PASSWORD)
        self.metadata = {}
        self.header_metadata = {}
        self.general_metadata = {}
        self.tab_metadata = {}
        self.samples = []
        self.sections = []

        self.schema = getattr(config, 'clarity', None)
        if self.schema is None:
            self.log.warn("No config found for MultiQC_Clarity")
            return None

        self.get_samples()
        self.get_metadata('Header')
        self.get_metadata('General Statistics')
        self.get_metadata('Clarity Tab')
        self.update_multiqc_report()
        self.make_sections()
        report.modules_output.append(self)

    def get_samples(self):
        if config.kwargs.get('clarity_project_name'):
            pj = self.lims.get_projects(
                name=config.kwargs['clarity_project_name'])
            self.samples = pj.samples
        else:
            names = set()
            for x in report.general_stats_data:
                names.update(x.keys())
            for d in report.saved_raw_data.values():
                try:
                    self.names.update(d.keys())
                except AttributeError:
                    pass
            if not config.kwargs.get('clarity_skip_edit_names'):
                names = self.edit_names(names)

            self.log.debug("Looking into Clarity for samples {}".format(
                ", ".join(names)))
            found = 0
            try:
                for name in names:
                    matching_samples = self.lims.get_samples(name=name)
                    if not matching_samples:
                        self.log.error(
                            "Could not find a sample matching {0}, skipping.".
                            format(name))
                        continue
                    if len(matching_samples) > 1:
                        self.log.error(
                            "Found multiple samples matching {0}, skipping".
                            format(name))
                        continue
                    found += 1
                    self.samples.append(matching_samples[0])
            except Exception as e:
                self.log.warn(
                    "Could not connect to Clarity LIMS: {}".format(e))
                return None
        self.log.info("Found {} out of {} samples in LIMS.".format(
            found, len(names)))

    def edit_names(self, names):
        edited = []
        for name in names:
            if name.endswith("_1") or name.endswith("_2"):
                edited.append(name[:-2])
            elif name.endswith("_R1") or name.endswith("_R2"):
                edited.append(name[:-3])
            else:
                edited.append(name)

        return edited

    def flatten_metadata(self, metadata):
        for first_level in metadata:
            for second_level in metadata[first_level]:
                if isinstance(metadata[first_level][second_level],
                              set) or isinstance(
                                  metadata[first_level][second_level], list):
                    metadata[first_level][second_level] = ", ".join(
                        metadata[first_level][second_level])

        return metadata

    def get_project_metadata(self, udfs):
        project_metadata = {}
        for sample in self.samples:
            project_metadata[sample.project.name] = {}
            for udf in udfs:
                if udf in sample.project.udf:
                    try:
                        project_metadata[sample.project.name][udf].add(
                            str(sample.project.udf[udf]))
                    except:
                        project_metadata[sample.project.name][udf] = set()
                        project_metadata[sample.project.name][udf].add(
                            str(sample.project.udf[udf]))

        return self.flatten_metadata(project_metadata)

    def get_sample_metadata(self, udfs):
        sample_metadata = {}
        for sample in self.samples:
            sample_metadata[sample.name] = {}
            for udf in udfs:
                if udf in sample.udf:
                    try:
                        sample_metadata[sample.name][udf].add(
                            str(sample.udf[udf]))
                    except:
                        sample_metadata[sample.name][udf] = set()
                        sample_metadata[sample.name][udf].add(
                            str(sample.udf[udf]))

        return self.flatten_metadata(sample_metadata)

    def get_metadata(self, part):
        for key in self.schema[part]:
            if key == 'Project':
                metadata = self.get_project_metadata(
                    self.schema[part]['Project'])
            elif key == 'Sample':
                metadata = self.get_sample_metadata(
                    self.schema[part]['Sample'])
            else:
                metadata = self.get_artifact_metadata(self.schema[part])

            if part == "Header":
                self.header_metadata.update(metadata)
            elif part == "General Statistics":
                self.general_metadata.update(metadata)
            else:
                self.tab_metadata.update(metadata)

    def get_artifact_metadata(self, pt_to_udfs):
        artifact_metadata = {}
        for sample in self.samples:
            artifact_metadata[sample.name] = {}
            for process_type in pt_to_udfs:
                if process_type == 'Sample':
                    continue
                if process_type == 'Project':
                    continue
                artifacts = self.lims.get_artifacts(sample_name=sample.name,
                                                    process_type=process_type)
                for udf_name in pt_to_udfs[process_type].get("outputs", []):
                    values = []
                    for artifact in artifacts:
                        if udf_name in artifact.udf:
                            values.append(str(artifact.udf[udf_name]))

                    artifact_metadata[sample.name][udf_name] = values

                processes = set([art.parent_process for art in artifacts])
                inputs = []
                for p in processes:
                    inputs.extend([
                        art for art in p.all_inputs()
                        if sample.name in [s.name for s in art.samples]
                    ])
                for udf_name in pt_to_udfs[process_type].get("inputs", []):
                    values = []
                    for artifact in inputs:
                        if udf_name in artifact.udf:
                            values.append(str(artifact.udf[udf_name]))

                    artifact_metadata[sample.name][udf_name] = values

        return self.flatten_metadata(artifact_metadata)

    def update_multiqc_report(self):
        if config.report_header_info is None:
            config.report_header_info = []
        for first_level in self.header_metadata:
            d = {}
            for key in self.header_metadata[first_level]:
                d[key] = self.header_metadata[first_level][key]
            config.report_header_info.append(d)

        headers = {}
        for first_level in self.schema["General Statistics"]:
            for header in self.schema["General Statistics"][first_level]:
                headers[header] = {
                    'description': first_level,
                    'namespace': 'Clarity',
                    'scale': 'YlGn'
                }

        report.general_stats_headers.append(headers)
        report.general_stats_data.append(self.general_metadata)

    def make_sections(self):
        headers = OrderedDict()
        for first_level in self.tab_metadata:
            for header in self.tab_metadata[first_level]:
                desc = header
                if header not in headers:
                    for key in self.schema['Clarity Tab']:
                        if header in self.schema['Clarity Tab'][key]:
                            desc = key
                        elif isinstance(self.schema['Clarity Tab'][key], dict):
                            for subkey in self.schema['Clarity Tab'][key]:
                                if header in self.schema['Clarity Tab'][key][
                                        subkey]:
                                    desc = key

                    headers[header] = {
                        'namespace': desc,
                        'title': header,
                        'description': header
                    }
        self.sections.append({
            'name':
            'Clarity Data',
            'anchor':
            'clarity_data',
            'content':
            '<p> Data obtained from Illumina Basespace Clarity LIMS.</p>' +
            table.plot(self.tab_metadata, headers)
        })
Exemplo n.º 6
0
def namesetter(PID):

    lims = Lims(BASEURI, USERNAME, PASSWORD)
    lims.check_version()
    #Find LIMS entry with same PID
    allProjects = lims.get_projects()
    for proj in allProjects:
        if proj.id == PID:
            limsproject = proj.name
            break
    #Error handling
    if not 'limsproject' in locals():
        print("{} not available in LIMS.".format(PID))
        return None

    #Enter project summary process
    stepname=['Project Summary 1.3']
    process=lims.get_processes(type=stepname, projectname=limsproject)
    #Error handling
    if process == []:
        print("{} for {} is not available in LIMS.".format(stepname, limsproject))
        return None

    loop = True
    while loop:
        if "Bioinfo responsible" in process[0].udf:
            response = process[0].udf["Bioinfo responsible"]
        else:
            response = "Unassigned"
        print("Existing Bioinfo responsible for project {} aka {} is: {}".format(limsproject, PID, response.encode('utf-8')))

        #Checks for valid name
        in_responsibles = False
        config_responsibles =Udfconfig(lims, id="1128")
        while not in_responsibles:
            if sys.version_info[0] == 3:
                newname = input("Enter name of new Bioinfo responsible: ")
            elif sys.version_info[0] == 2:
                newname = raw_input("Enter name of new Bioinfo responsible: ")
            for names in config_responsibles.presets:
                if newname in names:
                    in_responsibles = True
                    newname = names
            if not in_responsibles:
                print("Subset {} not found in accepted Bioinfo responsible list.".format(newname))
            else:
                print("Suggested name is {}".format(newname))

        if sys.version_info[0] == 3:
            confirmation = input("Project {} aka {} will have {} as new Bioinfo responsible, is this correct (Y/N)? ".format(limsproject, PID, newname))
        elif sys.version_info[0] == 2:
            confirmation = raw_input("Project {} aka {} will have {} as new Bioinfo responsible, is this correct (Y/N)? ".format(limsproject, PID, newname))
        if confirmation == 'Y' or confirmation == 'y':
            try:
                newname.encode('ascii')
                process[0].udf["Bioinfo responsible"] = str(newname)
                process[0].put()
                print("Project {} aka {} assigned to {}".format(limsproject, PID, newname))
                return None
            except (UnicodeDecodeError, UnicodeEncodeError):
                #Weird solution due to put function
                process[0].udf["Bioinfo responsible"] = response
                print("ERROR: You tried to use a special character, didn't you? Don't do that. New standards and stuff...")
        elif confirmation == 'N' or confirmation == 'n':
            loop = False
        else:
            print("Invalid answer.")
Exemplo n.º 7
0
def generate_output(project, destid, total_lanes, req_lanes, lane_maps, acc_ratios):
    #Gathers the container id and well name for all samples in project
    #Cred to Denis for providing a base epp
    location = dict()
    lims = Lims(BASEURI, USERNAME, PASSWORD)
    allProjects = lims.get_projects()
    for proj in allProjects:
        if proj.id == project:
            projName = proj.name 
            break

    #All normalization processes for project
    norms=['Library Normalization (MiSeq) 4.0', 'Library Normalization (Illumina SBS) 4.0','Library Normalization (HiSeq X) 1.0']
    pros=lims.get_processes(type=norms, projectname=projName)
    #For all processes
    for p in pros:
        #For all artifacts in process
        for o in p.all_outputs():
            #If artifact is analyte type and has project name in sample
            if o.type=="Analyte" and project in o.name:
                location[o.name.split()[0]] = list()
                location[o.name.split()[0]].append(o.location[0].id)
                location[o.name.split()[0]].append(o.location[1])
                
    #PRINT section
    #Print stats including duplicates
    timestamp = datetime.fromtimestamp(time()).strftime('%Y-%m-%d_%H:%M')
    sumName = projName,  "_summary_", timestamp,".txt"
    sumName = ''.join(sumName)
    with open(sumName, "w") as summary:
        if sum(req_lanes.values()) != 0:
            OPT = sum(total_lanes)/sum(req_lanes.values())
        else: 
            OPT = 0
        output = "Ideal lanes (same schema): ", str(sum(req_lanes.values())) , ", Total lanes: ", str(sum(total_lanes)), ", OPT: ", str(round(OPT,3)),'\n'
        output = ''.join(output)
        summary.write( output )
        output = "Unique pools: ", str(len(total_lanes)), ", Average pool duplication: ", str(sum(total_lanes)/float(len(total_lanes))) ,'\n'
        output = ''.join(output)
        summary.write( output )
        
        bin = 0
        for index in xrange(1, len(lane_maps)+1):
            bin  += 1
            summary.write('\n')
            output = "Wells ", str(bin) , '-' , str(bin+int(total_lanes[index-1])-1),':','\n'
            output = ''.join(output)
            summary.write( output )
            bin += int(total_lanes[index-1]-1)
            for counter in xrange(1, len(lane_maps[index])):
                output = str(lane_maps[index][counter]),' ', str(acc_ratios[index][counter]), "%",'\n'
                output = ''.join(output)
                summary.write( output )

    
    #Creates csv   
    name = projName,"_repool_",timestamp,".csv"
    name = ''.join(name)
    wells = ['Empty','A','B','C','D','E','F','G','H']
    #Index 0 is number, index 1 is Letter
    wellIndex = [1, 1]
    destNo = 0
    
    with open(name, 'w') as csvfile:
        writer = csv.writer(csvfile)
        for index in xrange(1, len(lane_maps)+1):
            for dupes in xrange(1, int(total_lanes[index-1])+1):
                if lane_maps[index] == 0:
                    raise Exception('Error: Project not logged in x_flowcells database!')
                
                for counter in xrange(1, len(lane_maps[index])):
                    #<source plate ID>,<source well>,<volume>,<destination plate ID>,<destination well>
                    #Destination well 200 microL, minimum pipette 2 microL; acc_ratios multiplied by 2.
                    sample = lane_maps[index][counter]
                    position = wells[wellIndex[1]],':',str(wellIndex[0])
                    position = ''.join(position)
                    try:
                        output = location[sample][0],location[sample][1],str(int(acc_ratios[index][counter]*2)),str(destid[destNo]),position
                    except KeyError:
                        print "Error: Samples incorrectly parsed into database, thus causing sample name conflicts!"
                    if not acc_ratios[index][counter] == 0:
                        writer.writerow(output)
                #Increment wellsindex
                if not acc_ratios[index][counter] == 0:
                    if not wellIndex[1] >= 8:
                        wellIndex[1] += 1
                    else:
                        wellIndex[1] = 1
                        if not wellIndex[0] >= 8:
                            wellIndex[0] += 1
                        else:
                            wellIndex[0] = 1
                            destNo += 1
                            try:
                                destid[destNo]
                            except IndexError:
                                print "Critical error; not enough destination plates provided"
def namesetter(PID):

    lims = Lims(BASEURI, USERNAME, PASSWORD)
    lims.check_version()
    #Find LIMS entry with same PID
    allProjects = lims.get_projects()
    for proj in allProjects:
        if proj.id == PID:
            limsproject = proj.name
            break
    #Error handling
    if not 'limsproject' in locals():
        print "{} not available in LIMS.".format(PID)
        return None
    
    #Enter project summary process
    stepname=['Project Summary 1.3']
    process=lims.get_processes(type=stepname, projectname=limsproject)
    #Error handling
    if process == []:
        print "{} for {} is not available in LIMS.".format(stepname, limsproject)
        return None

    loop = True
    while loop:
        if "Bioinfo responsible" in process[0].udf:
            response = process[0].udf["Bioinfo responsible"]
        else:
            response = "Unassigned"
        print "Existing Bioinfo responsible for project {} aka {} is: {}".format(limsproject, PID, response.encode('utf-8'))
        
        #Checks for valid name
        in_responsibles = False
        config_responsibles =Udfconfig(lims, id="1128")
        while not in_responsibles:
            newname = raw_input("Enter name of new Bioinfo responsible: ")
            for names in config_responsibles.presets:
                if newname in names:
                    in_responsibles = True
                    newname = names
            if not in_responsibles:
                print "Subset {} not found in accepted Bioinfo responsible list.".format(newname)
            else:
                print "Suggested name is {}".format(newname)
        
        confirmation = raw_input("Project {} aka {} will have {} as new Bioinfo responsible, is this correct (Y/N)? ".format(limsproject, PID, newname))
        if confirmation == 'Y' or confirmation == 'y':
            try:
                newname.decode('ascii')
                process[0].udf["Bioinfo responsible"] = unicode(newname)
                process[0].put()
                print "Project {} aka {} assigned to {}".format(limsproject, PID, newname)
                return None
            except UnicodeDecodeError:
                #Weird solution due to put function
                process[0].udf["Bioinfo responsible"] = response
                print "ERROR: You tried to use a special character, didn't you? Don't do that. New standards and stuff..."
        elif confirmation == 'N' or confirmation == 'n':
            loop = False
        else:
            print "Invalid answer."
Exemplo n.º 9
0
class MultiQC_clarity_metadata(BaseMultiqcModule):
    def __init__(self):

        self.log = logging.getLogger('multiqc')

        # Check that this plugin hasn't been disabled
        if config.kwargs.get('disable_clarity', False) is True:
            self.log.info(
                "Skipping MultiQC_Clarity as disabled on command line")
            return
        if getattr(config, 'disable_clarity', False) is True:
            self.log.debug(
                "Skipping MultiQC_Clarity as specified in config file")
            return

        super(MultiQC_clarity_metadata, self).__init__(name='Clarity LIMS',
                                                       anchor='clarity')

        self.intro = '''<p>The <a href="https://github.com/MultiQC/MultiQC_Clarity" target="_blank">MultiQC_Clarity</a>
            plugin fetches data from a specified
            <a href="https://www.genologics.com/clarity-lims/" target="_blank">Basespace Clarity LIMS</a> instance.</p>'''

        try:
            from genologics.lims import Lims
            from genologics import config as genologics_config
        except:
            self.log.warning("Importing genologics failed: " +
                             traceback.format_exc())
            return

        try:
            BASEURI, USERNAME, PASSWORD, VERSION, MAIN_LOG = genologics_config.load_config(
                specified_config=config.kwargs.get('clarity_config'))
        except SystemExit:
            self.log.warning(
                "Genologics config file is not specified as --clarity_config or in ~/.genologicsrc. "
                "Skip running Clarity module")
            return

        self.lims = Lims(BASEURI, USERNAME, PASSWORD)
        self.metadata = {}
        self.header_metadata = {}
        self.general_metadata = {}
        self.tab_metadata = {}
        self.samples = []

        self.schema = getattr(config, 'clarity', None)
        if self.schema is None:
            self.log.debug("No config found for MultiQC_Clarity")
            return
        try:
            self.get_samples()
            if 'report_header_info' in self.schema:
                self.get_metadata('report_header_info')
            if 'general_stats' in self.schema:
                self.get_metadata('general_stats')
            if 'clarity_module' in self.schema:
                self.get_metadata('clarity_module')
            self.update_multiqc_report()
            self.make_sections()
            report.modules_output.append(self)
        except:
            self.log.error("MultiQC_Clarity failed: " + traceback.format_exc())
            return

    def csv_file_from_samplesheet(self, sample_sheet):
        csv_lines = []
        with open(sample_sheet) as f:
            found_data = False
            for line in f:
                if found_data:
                    csv_lines.append(line.strip())
                else:
                    if line.strip().startswith('[Data]'):
                        found_data = True
        return csv_lines

    def get_raw_sample_names(self, csv_fpath, names):
        raw_sample_names = dict()
        with open(csv_fpath) as f:
            csv_reader = csv.DictReader(f)
            name_col = csv_reader.fieldnames[0]
            for r in csv_reader:
                correct_name = r['description'] if 'description' in r else r[
                    name_col]
                if correct_name not in names:
                    continue
                raw_sample_names[correct_name] = r[name_col]
        return raw_sample_names

    def correct_sample_name(self, name):
        import re
        name = re.sub(r'_S\d+$', '', name)
        return name.replace('.', '_')

    def search_by_samplesheet(self, names):
        sample_sheet_fpath = config.kwargs['samplesheet']
        samples_by_container = defaultdict(dict)
        raw_names = dict((name, name) for name in names)
        if config.kwargs.get('bcbio_csv') and isfile(
                config.kwargs.get('bcbio_csv')):
            raw_names = self.get_raw_sample_names(config.kwargs['bcbio_csv'],
                                                  names)

        correct_sample_names = dict(
            (self.correct_sample_name(raw_names[name]), name)
            for name in names)
        for row in csv.DictReader(
                self.csv_file_from_samplesheet(sample_sheet_fpath),
                delimiter=','):
            sample_name = row['SampleName'] if 'SampleName' in row else (
                row['Sample_Name']
                if 'Sample_Name' in row else row['SampleRef'])
            sample_id = row['SampleID'] if 'SampleID' in row else row[
                'Sample_ID']
            sample_artifacts = self.lims.get_artifacts(samplelimsid=sample_id)
            if sample_artifacts:
                sample = sample_artifacts[0].samples[0]
                sample.name = correct_sample_names[sample_name]
                self.samples.append(sample)
            elif sample_name and sample_name in correct_sample_names.keys():
                try:
                    container, sample_well = row['SamplePlate'], row[
                        'SampleWell'].replace('_', ':')
                    samples_by_container[container][sample_well] = sample_name
                except:
                    pass

        for container_id, samples in samples_by_container.items():
            artifacts = self.lims.get_artifacts(containerlimsid=container_id)
            if not artifacts:
                continue
            placements = artifacts[0].container.get_placements()
            for well, sample_name in samples.items():
                sample = placements[well].samples[0]
                sample.name = correct_sample_names[sample_name]
                self.samples.append(sample)

    def get_samples(self):
        if config.kwargs.get('clarity_project_name'):
            pj = self.lims.get_projects(
                name=config.kwargs['clarity_project_name'])
            self.samples = pj.samples
            self.log.info("Found {} in LIMS.".format(
                config.kwargs['clarity_project_name']))
        else:
            names = set()
            for x in report.general_stats_data:
                names.update(x.keys())
            for d in report.saved_raw_data.values():
                try:
                    self.names.update(d.keys())
                except AttributeError:
                    pass
            # if not config.kwargs.get('clarity_skip_edit_names'):
            #    names = self.edit_names(names)

            self.log.debug("Looking into Clarity for samples {}".format(
                ", ".join(names)))
            if config.kwargs.get('samplesheet'):
                self.search_by_samplesheet(names)
            if not self.samples:
                try:
                    for name in names:
                        matching_samples = self.lims.get_samples(name=name)
                        if not matching_samples:
                            self.log.error(
                                "Could not find a sample matching {0}, skipping."
                                .format(name))
                            continue
                        if len(matching_samples) > 1:
                            self.log.error(
                                "Found multiple samples matching {0}, skipping"
                                .format(name))
                            continue
                        self.samples.append(matching_samples[0])
                except Exception as e:
                    self.log.warn(
                        "Could not connect to Clarity LIMS: {}".format(e))
                    return None
            self.log.info("Found {} out of {} samples in LIMS.".format(
                len(self.samples), len(names)))

    def edit_names(self, names):
        edited = []
        for name in names:
            if name.endswith("_1") or name.endswith("_2"):
                edited.append(name[:-2])
            elif name.endswith("_R1") or name.endswith("_R2"):
                edited.append(name[:-3])
            else:
                edited.append(name)

        return edited

    def flatten_metadata(self, metadata):
        for first_level in metadata:
            for second_level in metadata[first_level]:
                if isinstance(metadata[first_level][second_level],
                              set) or isinstance(
                                  metadata[first_level][second_level], list):
                    metadata[first_level][second_level] = ", ".join(
                        metadata[first_level][second_level])

        return metadata

    def get_project_metadata(self, udfs):
        project_metadata = {}
        for sample in self.samples:
            project_metadata[sample.project.name] = {}
            for udf in udfs:
                if udf in sample.project.udf:
                    try:
                        project_metadata[sample.project.name][udf].add(
                            str(sample.project.udf[udf]))
                    except:
                        project_metadata[sample.project.name][udf] = set()
                        project_metadata[sample.project.name][udf].add(
                            str(sample.project.udf[udf]))

        return self.flatten_metadata(project_metadata)

    def get_sample_metadata(self, udfs):
        sample_metadata = {}
        report.lims_col = 'sample type'
        for sample in self.samples:
            sample_metadata[sample.name] = dict()
            for udf in udfs:
                if udf in sample.udf:
                    try:
                        sample_metadata[sample.name][udf].add(
                            str(sample.udf[udf]))
                    except:
                        sample_metadata[sample.name][udf] = set()
                        sample_metadata[sample.name][udf].add(
                            str(sample.udf[udf]))
            sample_type = None
            if 'Sample Tissue' in sample_metadata[sample.name]:
                sample_type = sample_metadata[sample.name].pop('Sample Tissue')
            elif 'Sample Type' in sample_metadata[sample.name]:
                sample_type = sample_metadata[sample.name].pop('Sample Type')
            sample_link = join(self.lims.baseuri, 'clarity',
                               'search?scope=Sample&query=' + sample.id)
            if sample_type:
                sample_metadata[sample.name][
                    'Sample Type'] = '<a href="' + sample_link + '" target="_blank">' + sample_type.pop(
                    ) + '</a>'
                report.lims_added = True
            elif 'Sample Conc.' in sample_metadata[sample.name]:
                sample_metadata[sample.name]['Sample Conc.'] = '<a href="' + sample_link + '" target="_blank">' + \
                                                               sample_metadata[sample.name]['Sample Conc.'].pop() + '</a>'
                report.lims_added = True
        if not any([
                'Sample Type' in sample_metadata[sample.name]
                for sample in self.samples
        ]):
            report.lims_col = 'sample conc'
        elif not all([
                'Sample Type' in sample_metadata[sample.name]
                for sample in self.samples
        ]):
            report.lims_col = 'sample type or sample conc'
        return self.flatten_metadata(sample_metadata)

    def get_metadata(self, part):
        for key in self.schema[part]:
            if key == 'Project':
                metadata = self.get_project_metadata(
                    self.schema[part]['Project'])
            elif key == 'Sample':
                metadata = self.get_sample_metadata(
                    self.schema[part]['Sample'])
            else:
                metadata = self.get_artifact_metadata(self.schema[part])

            if part == "report_header_info":
                self.header_metadata.update(metadata)
            elif part == "general_stats":
                self.general_metadata.update(metadata)
            else:
                self.tab_metadata.update(metadata)

    def get_artifact_metadata(self, pt_to_udfs):
        artifact_metadata = {}
        for sample in self.samples:
            artifact_metadata[sample.name] = {}
            for process_type in pt_to_udfs:
                if process_type == 'Sample':
                    continue
                if process_type == 'Project':
                    continue
                artifacts = self.lims.get_artifacts(sample_name=sample.name,
                                                    process_type=process_type)
                for udf_name in pt_to_udfs[process_type].get("outputs", []):
                    values = []
                    for artifact in artifacts:
                        if udf_name in artifact.udf:
                            values.append(str(artifact.udf[udf_name]))

                    artifact_metadata[sample.name][udf_name] = values

                processes = set([art.parent_process for art in artifacts])
                inputs = []
                for p in processes:
                    inputs.extend([
                        art for art in p.all_inputs()
                        if sample.name in [s.name for s in art.samples]
                    ])
                for udf_name in pt_to_udfs[process_type].get("inputs", []):
                    values = []
                    for artifact in inputs:
                        if udf_name in artifact.udf:
                            values.append(str(artifact.udf[udf_name]))

                    artifact_metadata[sample.name][udf_name] = values

        return self.flatten_metadata(artifact_metadata)

    def update_multiqc_report(self):
        if config.report_header_info is None:
            config.report_header_info = []
        for first_level in self.header_metadata:
            d = {}
            for key in self.header_metadata[first_level]:
                d[key] = self.header_metadata[first_level][key]
            config.report_header_info.append(d)

        headers = {}
        for first_level in self.schema["general_stats"]:
            for header in self.schema["general_stats"][first_level]:
                headers[header] = {}
                if isinstance(
                        self.schema["general_stats"][first_level][header],
                        dict):
                    for subsubkey, cfg in self.schema["general_stats"][
                            first_level][header].items():
                        if subsubkey == 'multiply_by':
                            mby = str(cfg)[:]
                            headers[header]['modify'] = lambda x: float(
                                x) * float(mby)
                        else:
                            headers[header][subsubkey] = cfg
                headers[header]['description'] = headers[header].get(
                    'description', '{} - {}'.format(first_level, header))
                headers[header]['namespace'] = headers[header].get(
                    'namespace', 'Clarity LIMS')
                headers[header]['scale'] = headers[header].get('scale', 'YlGn')

        report.general_stats_headers.append(headers)
        report.general_stats_data.append(self.general_metadata)

    def make_sections(self):
        headers = OrderedDict()
        for first_level in self.tab_metadata:
            for header in self.tab_metadata[first_level]:
                desc = header
                if header not in headers:
                    headers[header] = {}
                    for key in self.schema['clarity_module']:
                        if header in self.schema['clarity_module'][key]:
                            desc = key
                        elif isinstance(self.schema['clarity_module'][key],
                                        dict):
                            for subkey, val in self.schema['clarity_module'][
                                    key].items():
                                # print(val)
                                if val is None:
                                    break
                                elif header in val:
                                    desc = key
                                    if isinstance(val[header], dict):
                                        for subsubkey, cfg in val[
                                                header].items():
                                            if subsubkey == 'multiply_by':
                                                mby = str(cfg)[:]
                                                headers[header][
                                                    'modify'] = lambda x: float(
                                                        x) * float(mby)
                                            else:
                                                headers[header][
                                                    subsubkey] = cfg

                    headers[header]['namespace'] = headers[header].get(
                        'namespace', desc)
                    headers[header]['title'] = headers[header].get(
                        'title', header)
                    headers[header]['description'] = headers[header].get(
                        'description', header)

        self.intro += table.plot(self.tab_metadata, headers)
Exemplo n.º 10
0
"""

import codecs

from genologics.lims import Lims

# Login parameters for connecting to a LIMS instance.
# NOTE: Modify according to your setup.
from genologics.site_cloud import BASEURI, USERNAME, PASSWORD

# Create the LIMS interface instance, and check the connection and version.
lims = Lims(BASEURI, USERNAME, PASSWORD)
lims.check_version()

# Get the list of all projects.
projects = lims.get_projects()
print len(projects), 'projects in total'

# Get the list of all projects opened since May 30th 2012.
day = '2012-05-30'
projects = lims.get_projects(open_date=day)
print len(projects), 'projects opened since', day

# Get the project with the specified LIMS id, and print some info.
project = lims.get_project('KRA61')
print project, project.name, project.open_date

print 'UDFs:'
for key, value in project.udf.items():
    if isinstance(value, unicode):
        value = codecs.encode(value, 'UTF-8')