def get_project(api: lims.Lims, project_name: str): project = api.get_projects(name=project_name) assert isinstance(project, list) print(len(project)) assert len(project) == 2 return project[0]
def generate_output(project_id, dest_plate_list, best_sample_struct,total_lanes, req_lanes, lane_maps, rounded_ratios, target_clusters, clusters_per_lane, extra_lanes, lane_volume, pool_excess, final_pool_sizes, volume_ratios, desired_ratios): """"Gathers the container id and well name for all samples in project""" timestamp = datetime.fromtimestamp(time()).strftime('%Y-%m-%d_%H:%M') #Cred to Denis for providing a base epp location = dict() lims = Lims(BASEURI, USERNAME, PASSWORD) allProjects = lims.get_projects() for proj in allProjects: if proj.id == project_id: projName = proj.name break #Sets up source id #All normalization processes for project norms=['Library Normalization (MiSeq) 4.0', 'Library Normalization (Illumina SBS) 4.0','Library Normalization (HiSeq X) 1.0'] pros=lims.get_processes(type=norms, projectname=projName) #For all processes for p in pros: #For all artifacts in process for o in p.all_outputs(): #If artifact is analyte type and has project name in sample if o.type=="Analyte" and project_id in o.name: location[o.name.split()[0]] = list() location[o.name.split()[0]].append(o.location[0].id) location[o.name.split()[0]].append(o.location[1]) #Continue coding from here generate_summary(projName, best_sample_struct, timestamp, project_id, dest_plate_list, total_lanes, req_lanes, lane_maps, rounded_ratios, target_clusters, clusters_per_lane, extra_lanes, volume_ratios, desired_ratios, lane_volume, pool_excess) generate_csv(projName, timestamp, location, dest_plate_list, total_lanes, best_sample_struct, rounded_ratios, lane_volume, pool_excess, final_pool_sizes) generate_dumpfile(projName, timestamp, location, dest_plate_list, total_lanes, best_sample_struct, rounded_ratios, lane_volume, pool_excess, final_pool_sizes)
class MultiQC_clarity_metadata(BaseMultiqcModule): def __init__(self): self.log = logging.getLogger('multiqc') # Check that this plugin hasn't been disabled if config.kwargs.get('disable_clarity', False) is True: self.log.info( "Skipping MultiQC_Clarity as disabled on command line") return None if getattr(config, 'disable_clarity', False) is True: self.log.debug( "Skipping MultiQC_Clarity as specified in config file") return None super(MultiQC_clarity_metadata, self).__init__(name='Clarity LIMS', anchor='clarity') self.intro = '''<p>The <a href="https://github.com/MultiQC/MultiQC_Clarity" target="_blank">MultiQC_Clarity</a> plugin fetches data from a specified <a href="https://www.genologics.com/clarity-lims/" target="_blank">Basespace Clarity LIMS</a> instance.</p>''' self.lims = Lims(BASEURI, USERNAME, PASSWORD) self.metadata = {} self.header_metadata = {} self.general_metadata = {} self.tab_metadata = {} self.samples = [] self.schema = getattr(config, 'clarity', None) if self.schema is None: self.log.debug("No config found for MultiQC_Clarity") return None self.get_samples() self.get_metadata('report_header_info') self.get_metadata('general_stats') self.get_metadata('clarity_module') self.update_multiqc_report() self.make_sections() report.modules_output.append(self) def get_samples(self): if config.kwargs.get('clarity_project_name'): pj = self.lims.get_projects( name=config.kwargs['clarity_project_name']) self.samples = pj.samples else: names = set() for x in report.general_stats_data: names.update(x.keys()) for d in report.saved_raw_data.values(): try: self.names.update(d.keys()) except AttributeError: pass if not config.kwargs.get('clarity_skip_edit_names'): names = self.edit_names(names) self.log.debug("Looking into Clarity for samples {}".format( ", ".join(names))) found = 0 try: for name in names: matching_samples = self.lims.get_samples(name=name) if not matching_samples: self.log.error( "Could not find a sample matching {0}, skipping.". format(name)) continue if len(matching_samples) > 1: self.log.error( "Found multiple samples matching {0}, skipping". format(name)) continue found += 1 self.samples.append(matching_samples[0]) except Exception as e: self.log.warn( "Could not connect to Clarity LIMS: {}".format(e)) return None self.log.info("Found {} out of {} samples in LIMS.".format( found, len(names))) def edit_names(self, names): edited = [] for name in names: if name.endswith("_1") or name.endswith("_2"): edited.append(name[:-2]) elif name.endswith("_R1") or name.endswith("_R2"): edited.append(name[:-3]) else: edited.append(name) return edited def flatten_metadata(self, metadata): for first_level in metadata: for second_level in metadata[first_level]: if isinstance(metadata[first_level][second_level], set) or isinstance( metadata[first_level][second_level], list): metadata[first_level][second_level] = ", ".join( metadata[first_level][second_level]) return metadata def get_project_metadata(self, udfs): project_metadata = {} for sample in self.samples: project_metadata[sample.project.name] = {} for udf in udfs: if udf in sample.project.udf: try: project_metadata[sample.project.name][udf].add( str(sample.project.udf[udf])) except: project_metadata[sample.project.name][udf] = set() project_metadata[sample.project.name][udf].add( str(sample.project.udf[udf])) return self.flatten_metadata(project_metadata) def get_sample_metadata(self, udfs): sample_metadata = {} for sample in self.samples: sample_metadata[sample.name] = {} for udf in udfs: if udf in sample.udf: try: sample_metadata[sample.name][udf].add( str(sample.udf[udf])) except: sample_metadata[sample.name][udf] = set() sample_metadata[sample.name][udf].add( str(sample.udf[udf])) return self.flatten_metadata(sample_metadata) def get_metadata(self, part): for key in self.schema[part]: if key == 'Project': metadata = self.get_project_metadata( self.schema[part]['Project']) elif key == 'Sample': metadata = self.get_sample_metadata( self.schema[part]['Sample']) else: metadata = self.get_artifact_metadata(self.schema[part]) if part == "report_header_info": self.header_metadata.update(metadata) elif part == "general_stats": self.general_metadata.update(metadata) else: self.tab_metadata.update(metadata) def get_artifact_metadata(self, pt_to_udfs): artifact_metadata = {} for sample in self.samples: artifact_metadata[sample.name] = {} for process_type in pt_to_udfs: if process_type == 'Sample': continue if process_type == 'Project': continue artifacts = self.lims.get_artifacts(sample_name=sample.name, process_type=process_type) for udf_name in pt_to_udfs[process_type].get("outputs", []): values = [] for artifact in artifacts: if udf_name in artifact.udf: values.append(str(artifact.udf[udf_name])) artifact_metadata[sample.name][udf_name] = values processes = set([art.parent_process for art in artifacts]) inputs = [] for p in processes: inputs.extend([ art for art in p.all_inputs() if sample.name in [s.name for s in art.samples] ]) for udf_name in pt_to_udfs[process_type].get("inputs", []): values = [] for artifact in inputs: if udf_name in artifact.udf: values.append(str(artifact.udf[udf_name])) artifact_metadata[sample.name][udf_name] = values return self.flatten_metadata(artifact_metadata) def update_multiqc_report(self): if config.report_header_info is None: config.report_header_info = [] for first_level in self.header_metadata: d = {} for key in self.header_metadata[first_level]: d[key] = self.header_metadata[first_level][key] config.report_header_info.append(d) headers = {} for first_level in self.schema["general_stats"]: for header in self.schema["general_stats"][first_level]: headers[header] = {} if isinstance( self.schema["general_stats"][first_level][header], dict): for subsubkey, cfg in self.schema["general_stats"][ first_level][header].items(): if subsubkey == 'multiply_by': mby = str(cfg)[:] headers[header]['modify'] = lambda x: float( x) * float(mby) else: headers[header][subsubkey] = cfg headers[header]['description'] = headers[header].get( 'description', '{} - {}'.format(first_level, header)) headers[header]['namespace'] = headers[header].get( 'namespace', 'Clarity LIMS') headers[header]['scale'] = headers[header].get('scale', 'YlGn') report.general_stats_headers.append(headers) report.general_stats_data.append(self.general_metadata) def make_sections(self): headers = OrderedDict() for first_level in self.tab_metadata: for header in self.tab_metadata[first_level]: desc = header if header not in headers: headers[header] = {} for key in self.schema['clarity_module']: if header in self.schema['clarity_module'][key]: desc = key elif isinstance(self.schema['clarity_module'][key], dict): for subkey, val in self.schema['clarity_module'][ key].items(): # print(val) if val is None: break elif header in val: desc = key if isinstance(val[header], dict): for subsubkey, cfg in val[ header].items(): if subsubkey == 'multiply_by': mby = str(cfg)[:] headers[header][ 'modify'] = lambda x: float( x) * float(mby) else: headers[header][ subsubkey] = cfg headers[header]['namespace'] = headers[header].get( 'namespace', desc) headers[header]['title'] = headers[header].get( 'title', header) headers[header]['description'] = headers[header].get( 'description', header) self.intro += table.plot(self.tab_metadata, headers)
class ProjectReport: def __init__(self, project_name): self.project_name = project_name self.project_source = os.path.join(cfg.query('sample','delivery_source'), project_name) self.project_delivery = os.path.join(cfg.query('sample','delivery_dest'), project_name) self.lims=Lims(**cfg.get('clarity')) self.params = {'project_name':project_name} self.results = {} self.fill_sample_names_from_lims() self.samples_delivered = self.read_metrics_csv(os.path.join(self.project_delivery, 'summary_metrics.csv')) self.get_sample_param() self.fill_project_information_from_lims() def fill_project_information_from_lims(self): project = self.lims.get_projects(name=self.project_name)[0] self.project_info = {} self.project_info['project_name']=['Project name:',self.project_name] self.project_info['project_title']=['Project title:', project.udf.get('Project Title', '')] self.project_info['enquiry'] = ['Enquiry no:', project.udf.get('Enquiry Number', '')] self.project_info['quote'] = ['Quote no:', project.udf.get('Quote No.', '')] self.project_info['researcher'] = ['Researcher:','%s %s (%s)'%(project.researcher.first_name, project.researcher.last_name, project.researcher.email)] self.project_order = ['project_name', 'project_title', 'enquiry', 'quote', 'researcher'] def fill_sample_names_from_lims(self): samples = self.lims.get_samples(projectname=self.project_name) self.samples = [s.name for s in samples] self.modified_samples = [re.sub(r'[: ]','_', s.name) for s in samples] def get_library_workflow_from_sample(self, sample_name): samples = self.lims.get_samples(projectname=self.project_name, name=sample_name) if len(samples) == 1: return samples[0].udf.get('Prep Workflow') else: app_logger.error('%s samples found for sample name %s'%sample_name) def get_species_from_sample(self, sample_name): samples = self.lims.get_samples(projectname=self.project_name, name=sample_name) if len(samples) == 1: s = samples[0].udf.get('Species') return species_alias.get(s, s) else: app_logger.error('%s samples found for sample name %s'%sample_name) def parse_program_csv(self, program_csv): all_programs = {} if os.path.exists(program_csv): with open(program_csv) as open_prog: for row in csv.reader(open_prog): all_programs[row[0]]=row[1] #TODO: change the hardcoded version of bcl2fastq all_programs['bcl2fastq'] = '2.17.1.14' for p in ['bcl2fastq','bcbio', 'bwa', 'gatk', 'samblaster']: if p in all_programs: self.params[p + '_version']=all_programs.get(p) def parse_project_summary_yaml(self, summary_yaml): with open(summary_yaml, 'r') as open_file: full_yaml = yaml.safe_load(open_file) sample_yaml=full_yaml['samples'][0] path_to_bcbio = os.path.basename(os.path.dirname(sample_yaml['dirs']['galaxy'])) self.params['bcbio_version'] = path_to_bcbio.split('/')[-2] if sample_yaml['genome_build'] == 'hg38': self.params['genome_version'] = 'GRCh38 (with alt, decoy and HLA sequences)' def read_metrics_csv(self, metrics_csv): samples_to_info={} with open(metrics_csv) as open_metrics: reader = csv.DictReader(open_metrics, delimiter='\t', quoting=csv.QUOTE_NONE) for row in reader: samples_to_info[row['Sample Id']] = row return samples_to_info def get_sample_param(self): self.fill_sample_names_from_lims() project_size = 0 library_workflows=set() species = set() for sample in self.samples: library_workflow = self.get_library_workflow_from_sample(sample) library_workflows.add(library_workflow) species.add(self.get_species_from_sample(sample)) if len(library_workflows) == 1 : self.library_workflow = library_workflows.pop() else: app_logger.error('More than one workfkow used in project %s: %s'%(self.project_name, ', '.join(library_workflows))) if len(species) == 1 : self.species = species.pop() else: app_logger.error('More than one species used in project %s: %s'%(self.project_name, ', '.join(species))) if self.library_workflow in ['TruSeq Nano DNA Sample Prep', None] : self.template = 'truseq_nano_template' elif self.library_workflow in ['TruSeq PCR-Free DNA Sample Prep', 'TruSeq PCR-Free Sample Prep'] : self.template = 'truseq_pcrfree_template' else: app_logger.error('Unknown library workflow %s for project %s'%(self.library_workflow, self.project_name)) return None if self.species == 'Human': self.template += '.html' else: self.template += '_non_human.html' self.params['adapter1'] = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA" self.params['adapter2'] = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT" project_size = getFolderSize(self.project_delivery) for sample in set(self.modified_samples): sample_source=os.path.join(self.project_source, sample) if os.path.exists(sample_source): program_csv = os.path.join(sample_source, 'programs.txt') if not os.path.exists(program_csv): program_csv = os.path.join(sample_source, '.qc', 'programs.txt') self.parse_program_csv(program_csv) summary_yaml = os.path.join(sample_source, 'project-summary.yaml') if not os.path.exists(summary_yaml): summary_yaml = os.path.join(sample_source, '.qc', 'project-summary.yaml') if os.path.exists(summary_yaml): self.parse_project_summary_yaml(summary_yaml) self.results['project_size']=['Total folder size:','%.2fTb'%(project_size/1000000000000.0)] self.results['nb_sample']=['Number of sample:', len(self.samples)] self.results['nb_sample_delivered']=['Number of sample delivered:',len(self.samples_delivered)] yields = [float(self.samples_delivered[s]['Yield']) for s in self.samples_delivered] self.results['yield']=['Total yield Gb:','%.2f'%sum(yields)] self.results['mean_yield']=['Average yield Gb:','%.1f'%(sum(yields)/max(len(yields), 1))] try: coverage = [float(self.samples_delivered[s]['Mean coverage']) for s in self.samples_delivered] self.results['coverage']=['Average coverage per samples:','%.2f'%(sum(coverage)/max(len(coverage), 1))] self.results_order=['nb_sample','nb_sample_delivered', 'yield', 'mean_yield', 'coverage', 'project_size'] except KeyError: self.results_order=['nb_sample','nb_sample_delivered', 'yield', 'mean_yield', 'project_size'] def generate_report(self): template_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'templates')) env = Environment(loader=FileSystemLoader(template_dir)) template = env.get_template(self.template) output = template.render(results_order=self.results_order, results=self.results, project_info=self.project_info, project_order=self.project_order, **self.params) pdf = get_pdf(output) project_file = os.path.join(self.project_delivery, 'project_%s_report.pdf'%self.project_name) with open(project_file, 'w') as open_pdf: open_pdf.write(pdf.getvalue())
class MultiQC_clarity_metadata(BaseMultiqcModule): def __init__(self): self.log = logging.getLogger('multiqc') # Check that this plugin hasn't been disabled if config.kwargs.get('disable_clarity', False) is True: self.log.info( "Skipping MultiQC_Clarity as disabled on command line") return None if getattr(config, 'disable_clarity', False) is True: self.log.debug( "Skipping MultiQC_Clarity as specified in config file") return None super(MultiQC_clarity_metadata, self).__init__( name='Clarity', anchor='clarity', href='https://github.com/Galithil/MultiQC_Clarity', info="fetches data from your Basespace Clarity LIMS instance.") self.lims = Lims(BASEURI, USERNAME, PASSWORD) self.metadata = {} self.header_metadata = {} self.general_metadata = {} self.tab_metadata = {} self.samples = [] self.sections = [] self.schema = getattr(config, 'clarity', None) if self.schema is None: self.log.warn("No config found for MultiQC_Clarity") return None self.get_samples() self.get_metadata('Header') self.get_metadata('General Statistics') self.get_metadata('Clarity Tab') self.update_multiqc_report() self.make_sections() report.modules_output.append(self) def get_samples(self): if config.kwargs.get('clarity_project_name'): pj = self.lims.get_projects( name=config.kwargs['clarity_project_name']) self.samples = pj.samples else: names = set() for x in report.general_stats_data: names.update(x.keys()) for d in report.saved_raw_data.values(): try: self.names.update(d.keys()) except AttributeError: pass if not config.kwargs.get('clarity_skip_edit_names'): names = self.edit_names(names) self.log.debug("Looking into Clarity for samples {}".format( ", ".join(names))) found = 0 try: for name in names: matching_samples = self.lims.get_samples(name=name) if not matching_samples: self.log.error( "Could not find a sample matching {0}, skipping.". format(name)) continue if len(matching_samples) > 1: self.log.error( "Found multiple samples matching {0}, skipping". format(name)) continue found += 1 self.samples.append(matching_samples[0]) except Exception as e: self.log.warn( "Could not connect to Clarity LIMS: {}".format(e)) return None self.log.info("Found {} out of {} samples in LIMS.".format( found, len(names))) def edit_names(self, names): edited = [] for name in names: if name.endswith("_1") or name.endswith("_2"): edited.append(name[:-2]) elif name.endswith("_R1") or name.endswith("_R2"): edited.append(name[:-3]) else: edited.append(name) return edited def flatten_metadata(self, metadata): for first_level in metadata: for second_level in metadata[first_level]: if isinstance(metadata[first_level][second_level], set) or isinstance( metadata[first_level][second_level], list): metadata[first_level][second_level] = ", ".join( metadata[first_level][second_level]) return metadata def get_project_metadata(self, udfs): project_metadata = {} for sample in self.samples: project_metadata[sample.project.name] = {} for udf in udfs: if udf in sample.project.udf: try: project_metadata[sample.project.name][udf].add( str(sample.project.udf[udf])) except: project_metadata[sample.project.name][udf] = set() project_metadata[sample.project.name][udf].add( str(sample.project.udf[udf])) return self.flatten_metadata(project_metadata) def get_sample_metadata(self, udfs): sample_metadata = {} for sample in self.samples: sample_metadata[sample.name] = {} for udf in udfs: if udf in sample.udf: try: sample_metadata[sample.name][udf].add( str(sample.udf[udf])) except: sample_metadata[sample.name][udf] = set() sample_metadata[sample.name][udf].add( str(sample.udf[udf])) return self.flatten_metadata(sample_metadata) def get_metadata(self, part): for key in self.schema[part]: if key == 'Project': metadata = self.get_project_metadata( self.schema[part]['Project']) elif key == 'Sample': metadata = self.get_sample_metadata( self.schema[part]['Sample']) else: metadata = self.get_artifact_metadata(self.schema[part]) if part == "Header": self.header_metadata.update(metadata) elif part == "General Statistics": self.general_metadata.update(metadata) else: self.tab_metadata.update(metadata) def get_artifact_metadata(self, pt_to_udfs): artifact_metadata = {} for sample in self.samples: artifact_metadata[sample.name] = {} for process_type in pt_to_udfs: if process_type == 'Sample': continue if process_type == 'Project': continue artifacts = self.lims.get_artifacts(sample_name=sample.name, process_type=process_type) for udf_name in pt_to_udfs[process_type].get("outputs", []): values = [] for artifact in artifacts: if udf_name in artifact.udf: values.append(str(artifact.udf[udf_name])) artifact_metadata[sample.name][udf_name] = values processes = set([art.parent_process for art in artifacts]) inputs = [] for p in processes: inputs.extend([ art for art in p.all_inputs() if sample.name in [s.name for s in art.samples] ]) for udf_name in pt_to_udfs[process_type].get("inputs", []): values = [] for artifact in inputs: if udf_name in artifact.udf: values.append(str(artifact.udf[udf_name])) artifact_metadata[sample.name][udf_name] = values return self.flatten_metadata(artifact_metadata) def update_multiqc_report(self): if config.report_header_info is None: config.report_header_info = [] for first_level in self.header_metadata: d = {} for key in self.header_metadata[first_level]: d[key] = self.header_metadata[first_level][key] config.report_header_info.append(d) headers = {} for first_level in self.schema["General Statistics"]: for header in self.schema["General Statistics"][first_level]: headers[header] = { 'description': first_level, 'namespace': 'Clarity', 'scale': 'YlGn' } report.general_stats_headers.append(headers) report.general_stats_data.append(self.general_metadata) def make_sections(self): headers = OrderedDict() for first_level in self.tab_metadata: for header in self.tab_metadata[first_level]: desc = header if header not in headers: for key in self.schema['Clarity Tab']: if header in self.schema['Clarity Tab'][key]: desc = key elif isinstance(self.schema['Clarity Tab'][key], dict): for subkey in self.schema['Clarity Tab'][key]: if header in self.schema['Clarity Tab'][key][ subkey]: desc = key headers[header] = { 'namespace': desc, 'title': header, 'description': header } self.sections.append({ 'name': 'Clarity Data', 'anchor': 'clarity_data', 'content': '<p> Data obtained from Illumina Basespace Clarity LIMS.</p>' + table.plot(self.tab_metadata, headers) })
def namesetter(PID): lims = Lims(BASEURI, USERNAME, PASSWORD) lims.check_version() #Find LIMS entry with same PID allProjects = lims.get_projects() for proj in allProjects: if proj.id == PID: limsproject = proj.name break #Error handling if not 'limsproject' in locals(): print("{} not available in LIMS.".format(PID)) return None #Enter project summary process stepname=['Project Summary 1.3'] process=lims.get_processes(type=stepname, projectname=limsproject) #Error handling if process == []: print("{} for {} is not available in LIMS.".format(stepname, limsproject)) return None loop = True while loop: if "Bioinfo responsible" in process[0].udf: response = process[0].udf["Bioinfo responsible"] else: response = "Unassigned" print("Existing Bioinfo responsible for project {} aka {} is: {}".format(limsproject, PID, response.encode('utf-8'))) #Checks for valid name in_responsibles = False config_responsibles =Udfconfig(lims, id="1128") while not in_responsibles: if sys.version_info[0] == 3: newname = input("Enter name of new Bioinfo responsible: ") elif sys.version_info[0] == 2: newname = raw_input("Enter name of new Bioinfo responsible: ") for names in config_responsibles.presets: if newname in names: in_responsibles = True newname = names if not in_responsibles: print("Subset {} not found in accepted Bioinfo responsible list.".format(newname)) else: print("Suggested name is {}".format(newname)) if sys.version_info[0] == 3: confirmation = input("Project {} aka {} will have {} as new Bioinfo responsible, is this correct (Y/N)? ".format(limsproject, PID, newname)) elif sys.version_info[0] == 2: confirmation = raw_input("Project {} aka {} will have {} as new Bioinfo responsible, is this correct (Y/N)? ".format(limsproject, PID, newname)) if confirmation == 'Y' or confirmation == 'y': try: newname.encode('ascii') process[0].udf["Bioinfo responsible"] = str(newname) process[0].put() print("Project {} aka {} assigned to {}".format(limsproject, PID, newname)) return None except (UnicodeDecodeError, UnicodeEncodeError): #Weird solution due to put function process[0].udf["Bioinfo responsible"] = response print("ERROR: You tried to use a special character, didn't you? Don't do that. New standards and stuff...") elif confirmation == 'N' or confirmation == 'n': loop = False else: print("Invalid answer.")
def generate_output(project, destid, total_lanes, req_lanes, lane_maps, acc_ratios): #Gathers the container id and well name for all samples in project #Cred to Denis for providing a base epp location = dict() lims = Lims(BASEURI, USERNAME, PASSWORD) allProjects = lims.get_projects() for proj in allProjects: if proj.id == project: projName = proj.name break #All normalization processes for project norms=['Library Normalization (MiSeq) 4.0', 'Library Normalization (Illumina SBS) 4.0','Library Normalization (HiSeq X) 1.0'] pros=lims.get_processes(type=norms, projectname=projName) #For all processes for p in pros: #For all artifacts in process for o in p.all_outputs(): #If artifact is analyte type and has project name in sample if o.type=="Analyte" and project in o.name: location[o.name.split()[0]] = list() location[o.name.split()[0]].append(o.location[0].id) location[o.name.split()[0]].append(o.location[1]) #PRINT section #Print stats including duplicates timestamp = datetime.fromtimestamp(time()).strftime('%Y-%m-%d_%H:%M') sumName = projName, "_summary_", timestamp,".txt" sumName = ''.join(sumName) with open(sumName, "w") as summary: if sum(req_lanes.values()) != 0: OPT = sum(total_lanes)/sum(req_lanes.values()) else: OPT = 0 output = "Ideal lanes (same schema): ", str(sum(req_lanes.values())) , ", Total lanes: ", str(sum(total_lanes)), ", OPT: ", str(round(OPT,3)),'\n' output = ''.join(output) summary.write( output ) output = "Unique pools: ", str(len(total_lanes)), ", Average pool duplication: ", str(sum(total_lanes)/float(len(total_lanes))) ,'\n' output = ''.join(output) summary.write( output ) bin = 0 for index in xrange(1, len(lane_maps)+1): bin += 1 summary.write('\n') output = "Wells ", str(bin) , '-' , str(bin+int(total_lanes[index-1])-1),':','\n' output = ''.join(output) summary.write( output ) bin += int(total_lanes[index-1]-1) for counter in xrange(1, len(lane_maps[index])): output = str(lane_maps[index][counter]),' ', str(acc_ratios[index][counter]), "%",'\n' output = ''.join(output) summary.write( output ) #Creates csv name = projName,"_repool_",timestamp,".csv" name = ''.join(name) wells = ['Empty','A','B','C','D','E','F','G','H'] #Index 0 is number, index 1 is Letter wellIndex = [1, 1] destNo = 0 with open(name, 'w') as csvfile: writer = csv.writer(csvfile) for index in xrange(1, len(lane_maps)+1): for dupes in xrange(1, int(total_lanes[index-1])+1): if lane_maps[index] == 0: raise Exception('Error: Project not logged in x_flowcells database!') for counter in xrange(1, len(lane_maps[index])): #<source plate ID>,<source well>,<volume>,<destination plate ID>,<destination well> #Destination well 200 microL, minimum pipette 2 microL; acc_ratios multiplied by 2. sample = lane_maps[index][counter] position = wells[wellIndex[1]],':',str(wellIndex[0]) position = ''.join(position) try: output = location[sample][0],location[sample][1],str(int(acc_ratios[index][counter]*2)),str(destid[destNo]),position except KeyError: print "Error: Samples incorrectly parsed into database, thus causing sample name conflicts!" if not acc_ratios[index][counter] == 0: writer.writerow(output) #Increment wellsindex if not acc_ratios[index][counter] == 0: if not wellIndex[1] >= 8: wellIndex[1] += 1 else: wellIndex[1] = 1 if not wellIndex[0] >= 8: wellIndex[0] += 1 else: wellIndex[0] = 1 destNo += 1 try: destid[destNo] except IndexError: print "Critical error; not enough destination plates provided"
def namesetter(PID): lims = Lims(BASEURI, USERNAME, PASSWORD) lims.check_version() #Find LIMS entry with same PID allProjects = lims.get_projects() for proj in allProjects: if proj.id == PID: limsproject = proj.name break #Error handling if not 'limsproject' in locals(): print "{} not available in LIMS.".format(PID) return None #Enter project summary process stepname=['Project Summary 1.3'] process=lims.get_processes(type=stepname, projectname=limsproject) #Error handling if process == []: print "{} for {} is not available in LIMS.".format(stepname, limsproject) return None loop = True while loop: if "Bioinfo responsible" in process[0].udf: response = process[0].udf["Bioinfo responsible"] else: response = "Unassigned" print "Existing Bioinfo responsible for project {} aka {} is: {}".format(limsproject, PID, response.encode('utf-8')) #Checks for valid name in_responsibles = False config_responsibles =Udfconfig(lims, id="1128") while not in_responsibles: newname = raw_input("Enter name of new Bioinfo responsible: ") for names in config_responsibles.presets: if newname in names: in_responsibles = True newname = names if not in_responsibles: print "Subset {} not found in accepted Bioinfo responsible list.".format(newname) else: print "Suggested name is {}".format(newname) confirmation = raw_input("Project {} aka {} will have {} as new Bioinfo responsible, is this correct (Y/N)? ".format(limsproject, PID, newname)) if confirmation == 'Y' or confirmation == 'y': try: newname.decode('ascii') process[0].udf["Bioinfo responsible"] = unicode(newname) process[0].put() print "Project {} aka {} assigned to {}".format(limsproject, PID, newname) return None except UnicodeDecodeError: #Weird solution due to put function process[0].udf["Bioinfo responsible"] = response print "ERROR: You tried to use a special character, didn't you? Don't do that. New standards and stuff..." elif confirmation == 'N' or confirmation == 'n': loop = False else: print "Invalid answer."
class MultiQC_clarity_metadata(BaseMultiqcModule): def __init__(self): self.log = logging.getLogger('multiqc') # Check that this plugin hasn't been disabled if config.kwargs.get('disable_clarity', False) is True: self.log.info( "Skipping MultiQC_Clarity as disabled on command line") return if getattr(config, 'disable_clarity', False) is True: self.log.debug( "Skipping MultiQC_Clarity as specified in config file") return super(MultiQC_clarity_metadata, self).__init__(name='Clarity LIMS', anchor='clarity') self.intro = '''<p>The <a href="https://github.com/MultiQC/MultiQC_Clarity" target="_blank">MultiQC_Clarity</a> plugin fetches data from a specified <a href="https://www.genologics.com/clarity-lims/" target="_blank">Basespace Clarity LIMS</a> instance.</p>''' try: from genologics.lims import Lims from genologics import config as genologics_config except: self.log.warning("Importing genologics failed: " + traceback.format_exc()) return try: BASEURI, USERNAME, PASSWORD, VERSION, MAIN_LOG = genologics_config.load_config( specified_config=config.kwargs.get('clarity_config')) except SystemExit: self.log.warning( "Genologics config file is not specified as --clarity_config or in ~/.genologicsrc. " "Skip running Clarity module") return self.lims = Lims(BASEURI, USERNAME, PASSWORD) self.metadata = {} self.header_metadata = {} self.general_metadata = {} self.tab_metadata = {} self.samples = [] self.schema = getattr(config, 'clarity', None) if self.schema is None: self.log.debug("No config found for MultiQC_Clarity") return try: self.get_samples() if 'report_header_info' in self.schema: self.get_metadata('report_header_info') if 'general_stats' in self.schema: self.get_metadata('general_stats') if 'clarity_module' in self.schema: self.get_metadata('clarity_module') self.update_multiqc_report() self.make_sections() report.modules_output.append(self) except: self.log.error("MultiQC_Clarity failed: " + traceback.format_exc()) return def csv_file_from_samplesheet(self, sample_sheet): csv_lines = [] with open(sample_sheet) as f: found_data = False for line in f: if found_data: csv_lines.append(line.strip()) else: if line.strip().startswith('[Data]'): found_data = True return csv_lines def get_raw_sample_names(self, csv_fpath, names): raw_sample_names = dict() with open(csv_fpath) as f: csv_reader = csv.DictReader(f) name_col = csv_reader.fieldnames[0] for r in csv_reader: correct_name = r['description'] if 'description' in r else r[ name_col] if correct_name not in names: continue raw_sample_names[correct_name] = r[name_col] return raw_sample_names def correct_sample_name(self, name): import re name = re.sub(r'_S\d+$', '', name) return name.replace('.', '_') def search_by_samplesheet(self, names): sample_sheet_fpath = config.kwargs['samplesheet'] samples_by_container = defaultdict(dict) raw_names = dict((name, name) for name in names) if config.kwargs.get('bcbio_csv') and isfile( config.kwargs.get('bcbio_csv')): raw_names = self.get_raw_sample_names(config.kwargs['bcbio_csv'], names) correct_sample_names = dict( (self.correct_sample_name(raw_names[name]), name) for name in names) for row in csv.DictReader( self.csv_file_from_samplesheet(sample_sheet_fpath), delimiter=','): sample_name = row['SampleName'] if 'SampleName' in row else ( row['Sample_Name'] if 'Sample_Name' in row else row['SampleRef']) sample_id = row['SampleID'] if 'SampleID' in row else row[ 'Sample_ID'] sample_artifacts = self.lims.get_artifacts(samplelimsid=sample_id) if sample_artifacts: sample = sample_artifacts[0].samples[0] sample.name = correct_sample_names[sample_name] self.samples.append(sample) elif sample_name and sample_name in correct_sample_names.keys(): try: container, sample_well = row['SamplePlate'], row[ 'SampleWell'].replace('_', ':') samples_by_container[container][sample_well] = sample_name except: pass for container_id, samples in samples_by_container.items(): artifacts = self.lims.get_artifacts(containerlimsid=container_id) if not artifacts: continue placements = artifacts[0].container.get_placements() for well, sample_name in samples.items(): sample = placements[well].samples[0] sample.name = correct_sample_names[sample_name] self.samples.append(sample) def get_samples(self): if config.kwargs.get('clarity_project_name'): pj = self.lims.get_projects( name=config.kwargs['clarity_project_name']) self.samples = pj.samples self.log.info("Found {} in LIMS.".format( config.kwargs['clarity_project_name'])) else: names = set() for x in report.general_stats_data: names.update(x.keys()) for d in report.saved_raw_data.values(): try: self.names.update(d.keys()) except AttributeError: pass # if not config.kwargs.get('clarity_skip_edit_names'): # names = self.edit_names(names) self.log.debug("Looking into Clarity for samples {}".format( ", ".join(names))) if config.kwargs.get('samplesheet'): self.search_by_samplesheet(names) if not self.samples: try: for name in names: matching_samples = self.lims.get_samples(name=name) if not matching_samples: self.log.error( "Could not find a sample matching {0}, skipping." .format(name)) continue if len(matching_samples) > 1: self.log.error( "Found multiple samples matching {0}, skipping" .format(name)) continue self.samples.append(matching_samples[0]) except Exception as e: self.log.warn( "Could not connect to Clarity LIMS: {}".format(e)) return None self.log.info("Found {} out of {} samples in LIMS.".format( len(self.samples), len(names))) def edit_names(self, names): edited = [] for name in names: if name.endswith("_1") or name.endswith("_2"): edited.append(name[:-2]) elif name.endswith("_R1") or name.endswith("_R2"): edited.append(name[:-3]) else: edited.append(name) return edited def flatten_metadata(self, metadata): for first_level in metadata: for second_level in metadata[first_level]: if isinstance(metadata[first_level][second_level], set) or isinstance( metadata[first_level][second_level], list): metadata[first_level][second_level] = ", ".join( metadata[first_level][second_level]) return metadata def get_project_metadata(self, udfs): project_metadata = {} for sample in self.samples: project_metadata[sample.project.name] = {} for udf in udfs: if udf in sample.project.udf: try: project_metadata[sample.project.name][udf].add( str(sample.project.udf[udf])) except: project_metadata[sample.project.name][udf] = set() project_metadata[sample.project.name][udf].add( str(sample.project.udf[udf])) return self.flatten_metadata(project_metadata) def get_sample_metadata(self, udfs): sample_metadata = {} report.lims_col = 'sample type' for sample in self.samples: sample_metadata[sample.name] = dict() for udf in udfs: if udf in sample.udf: try: sample_metadata[sample.name][udf].add( str(sample.udf[udf])) except: sample_metadata[sample.name][udf] = set() sample_metadata[sample.name][udf].add( str(sample.udf[udf])) sample_type = None if 'Sample Tissue' in sample_metadata[sample.name]: sample_type = sample_metadata[sample.name].pop('Sample Tissue') elif 'Sample Type' in sample_metadata[sample.name]: sample_type = sample_metadata[sample.name].pop('Sample Type') sample_link = join(self.lims.baseuri, 'clarity', 'search?scope=Sample&query=' + sample.id) if sample_type: sample_metadata[sample.name][ 'Sample Type'] = '<a href="' + sample_link + '" target="_blank">' + sample_type.pop( ) + '</a>' report.lims_added = True elif 'Sample Conc.' in sample_metadata[sample.name]: sample_metadata[sample.name]['Sample Conc.'] = '<a href="' + sample_link + '" target="_blank">' + \ sample_metadata[sample.name]['Sample Conc.'].pop() + '</a>' report.lims_added = True if not any([ 'Sample Type' in sample_metadata[sample.name] for sample in self.samples ]): report.lims_col = 'sample conc' elif not all([ 'Sample Type' in sample_metadata[sample.name] for sample in self.samples ]): report.lims_col = 'sample type or sample conc' return self.flatten_metadata(sample_metadata) def get_metadata(self, part): for key in self.schema[part]: if key == 'Project': metadata = self.get_project_metadata( self.schema[part]['Project']) elif key == 'Sample': metadata = self.get_sample_metadata( self.schema[part]['Sample']) else: metadata = self.get_artifact_metadata(self.schema[part]) if part == "report_header_info": self.header_metadata.update(metadata) elif part == "general_stats": self.general_metadata.update(metadata) else: self.tab_metadata.update(metadata) def get_artifact_metadata(self, pt_to_udfs): artifact_metadata = {} for sample in self.samples: artifact_metadata[sample.name] = {} for process_type in pt_to_udfs: if process_type == 'Sample': continue if process_type == 'Project': continue artifacts = self.lims.get_artifacts(sample_name=sample.name, process_type=process_type) for udf_name in pt_to_udfs[process_type].get("outputs", []): values = [] for artifact in artifacts: if udf_name in artifact.udf: values.append(str(artifact.udf[udf_name])) artifact_metadata[sample.name][udf_name] = values processes = set([art.parent_process for art in artifacts]) inputs = [] for p in processes: inputs.extend([ art for art in p.all_inputs() if sample.name in [s.name for s in art.samples] ]) for udf_name in pt_to_udfs[process_type].get("inputs", []): values = [] for artifact in inputs: if udf_name in artifact.udf: values.append(str(artifact.udf[udf_name])) artifact_metadata[sample.name][udf_name] = values return self.flatten_metadata(artifact_metadata) def update_multiqc_report(self): if config.report_header_info is None: config.report_header_info = [] for first_level in self.header_metadata: d = {} for key in self.header_metadata[first_level]: d[key] = self.header_metadata[first_level][key] config.report_header_info.append(d) headers = {} for first_level in self.schema["general_stats"]: for header in self.schema["general_stats"][first_level]: headers[header] = {} if isinstance( self.schema["general_stats"][first_level][header], dict): for subsubkey, cfg in self.schema["general_stats"][ first_level][header].items(): if subsubkey == 'multiply_by': mby = str(cfg)[:] headers[header]['modify'] = lambda x: float( x) * float(mby) else: headers[header][subsubkey] = cfg headers[header]['description'] = headers[header].get( 'description', '{} - {}'.format(first_level, header)) headers[header]['namespace'] = headers[header].get( 'namespace', 'Clarity LIMS') headers[header]['scale'] = headers[header].get('scale', 'YlGn') report.general_stats_headers.append(headers) report.general_stats_data.append(self.general_metadata) def make_sections(self): headers = OrderedDict() for first_level in self.tab_metadata: for header in self.tab_metadata[first_level]: desc = header if header not in headers: headers[header] = {} for key in self.schema['clarity_module']: if header in self.schema['clarity_module'][key]: desc = key elif isinstance(self.schema['clarity_module'][key], dict): for subkey, val in self.schema['clarity_module'][ key].items(): # print(val) if val is None: break elif header in val: desc = key if isinstance(val[header], dict): for subsubkey, cfg in val[ header].items(): if subsubkey == 'multiply_by': mby = str(cfg)[:] headers[header][ 'modify'] = lambda x: float( x) * float(mby) else: headers[header][ subsubkey] = cfg headers[header]['namespace'] = headers[header].get( 'namespace', desc) headers[header]['title'] = headers[header].get( 'title', header) headers[header]['description'] = headers[header].get( 'description', header) self.intro += table.plot(self.tab_metadata, headers)
""" import codecs from genologics.lims import Lims # Login parameters for connecting to a LIMS instance. # NOTE: Modify according to your setup. from genologics.site_cloud import BASEURI, USERNAME, PASSWORD # Create the LIMS interface instance, and check the connection and version. lims = Lims(BASEURI, USERNAME, PASSWORD) lims.check_version() # Get the list of all projects. projects = lims.get_projects() print len(projects), 'projects in total' # Get the list of all projects opened since May 30th 2012. day = '2012-05-30' projects = lims.get_projects(open_date=day) print len(projects), 'projects opened since', day # Get the project with the specified LIMS id, and print some info. project = lims.get_project('KRA61') print project, project.name, project.open_date print 'UDFs:' for key, value in project.udf.items(): if isinstance(value, unicode): value = codecs.encode(value, 'UTF-8')