def main(lims, args, epp_logger): p = Process(lims, id=args.pid) udf_check = 'Conc. Units' value_check = 'ng/ul' concentration_udf = 'Concentration' size_udf = 'Size (bp)' if args.aggregate: artifacts = p.all_inputs(unique=True) else: all_artifacts = p.all_outputs(unique=True) artifacts = filter(lambda a: a.output_type == "ResultFile", all_artifacts) correct_artifacts, no_concentration = check_udf_is_defined( artifacts, concentration_udf) correct_artifacts, no_size = check_udf_is_defined(correct_artifacts, size_udf) correct_artifacts, wrong_value = check_udf_has_value( correct_artifacts, udf_check, value_check) apply_calculations(lims, correct_artifacts, concentration_udf, size_udf, udf_check, epp_logger) d = { 'ca': len(correct_artifacts), 'ia': len(wrong_value) + len(no_size) + len(no_concentration) } abstract = ("Updated {ca} artifact(s), skipped {ia} artifact(s) with " "wrong and/or blank values for some udfs.").format(**d) print >> sys.stderr, abstract # stderr will be logged and printed in GUI
def main(lims,args,epp_logger): p = Process(lims,id = args.pid) udf_factor1 = 'Concentration (ng/ul)' result_udf = 'Concentration nM' udf_factor2 = 'Size (bp)' if args.aggregate: artifacts = p.all_inputs(unique=True) else: all_artifacts = p.all_outputs(unique=True) artifacts = filter(lambda a: a.output_type == "Analyte", all_artifacts) # print rrtifacts correct_artifacts, wrong_factor1 = check_udf_is_defined(artifacts, udf_factor1) correct_artifacts, wrong_factor2 = check_udf_is_defined(correct_artifacts, udf_factor2) f = open(args.res, "a") if correct_artifacts: apply_calculations(lims, correct_artifacts, udf_factor1, udf_factor2, result_udf, epp_logger, f) f.close() d = {'ca': len(correct_artifacts), 'ia': len(wrong_factor1)+ len(wrong_factor2) } abstract = ("Updated {ca} artifact(s), skipped {ia} artifact(s) with " "wrong and/or blank values for some udfs.").format(**d) print >> sys.stderr, abstract # stderr will be logged and printed in GUI
def main(lims, args, epp_logger): p = Process(lims, id=args.pid) udf_check = "Conc. Units" value_check = "ng/ul" concentration_udf = "Concentration" size_udf = "Size (bp)" if args.aggregate: artifacts = p.all_inputs(unique=True) else: all_artifacts = p.all_outputs(unique=True) artifacts = filter(lambda a: a.output_type == "ResultFile", all_artifacts) correct_artifacts, no_concentration = check_udf_is_defined(artifacts, concentration_udf) correct_artifacts, no_size = check_udf_is_defined(correct_artifacts, size_udf) correct_artifacts, wrong_value = check_udf_has_value(correct_artifacts, udf_check, value_check) apply_calculations(lims, correct_artifacts, concentration_udf, size_udf, udf_check, epp_logger) d = {"ca": len(correct_artifacts), "ia": len(wrong_value) + len(no_size) + len(no_concentration)} abstract = ( "Updated {ca} artifact(s), skipped {ia} artifact(s) with " "wrong and/or blank values for some udfs." ).format(**d) print >>sys.stderr, abstract # stderr will be logged and printed in GUI
def main(lims, args, epp_logger): pro = Process(lims, id=args.pid) source_udf = 'Reference genome' destination_udf = 'Reference Genome' artifacts = pro.all_inputs(unique=True) projects = all_projects_for_artifacts(artifacts) correct_projects, incorrect_udf = check_udf_is_defined( projects, source_udf) correct_samples = filter_samples(artifacts, correct_projects) session = Session(pro, source_udf, destination_udf) session.copy_main(correct_samples) if len(incorrect_udf) == 0: warning = "no projects" else: warning = "WARNING: skipped {0} project(s)".format(len(incorrect_udf)) d = {'cs': len(correct_samples), 'warning': warning} abstract = ( "Updated {cs} sample(s), {warning} with incorrect udf info.").format( **d) print >> sys.stderr, abstract # stderr will be logged and printed in GUI
def samplesheet_pool_samples(lims, process_id, output_file): """Create manual pipetting samplesheet for pooling samples.""" process = Process(lims, id=process_id) # print header output_file.write('Sample\tContainer\tWell\tPool\n') # Get all input artifact and store per container input_containers = {} for input_artifact in process.all_inputs(resolve=True): container = input_artifact.location[0].name well = ''.join(input_artifact.location[1].split(':')) if container not in input_containers: input_containers[container] = {} input_containers[container][well] = input_artifact # print pool scheme per input artifact # sort on container and well for input_container in sorted(input_containers.keys()): input_artifacts = input_containers[input_container] for well in clarity_epp.export.utils.sort_96_well_plate( input_artifacts.keys()): output_file.write('{sample}\t{container}\t{well}\t{pool}\n'.format( sample=input_artifacts[well].name, container=input_artifacts[well].location[0].name, well=well, pool=process.outputs_per_input(input_artifacts[well].id, Analyte=True)[0].name))
def main(lims,args,epp_logger): pro = Process(lims,id = args.pid) source_udf = 'Reference genome' destination_udf = 'Reference Genome' artifacts = pro.all_inputs(unique=True) projects = all_projects_for_artifacts(artifacts) correct_projects, incorrect_udf = check_udf_is_defined(projects, source_udf) correct_samples = filter_samples(artifacts, correct_projects) session = Session(pro, source_udf, destination_udf) session.copy_main(correct_samples) if len(incorrect_udf) == 0: warning = "no projects" else: warning = "WARNING: skipped {0} project(s)".format(len(incorrect_udf)) d = {'cs': len(correct_samples), 'warning' : warning } abstract = ("Updated {cs} sample(s), {warning} with incorrect udf info.").format(**d) print >> sys.stderr, abstract # stderr will be logged and printed in GUI
def samplesheet_dilute_library_pool(lims, process_id, output_file): """Create manual pipetting samplesheet for sequencing pools.""" output_file.write('Sample\tContainer\tWell\tul Sample\tul EB\n') process = Process(lims, id=process_id) output = [] # save pool data to list, to be able to sort on pool number. nM_pool = process.udf['Dx Pool verdunning (nM)'] output_ul = process.udf['Eindvolume (ul)'] for input in process.all_inputs(): search_number = re.search(r'Pool #(\d+)_', input.name) if search_number: input_number = int(search_number.group(1)) else: input_number = 0 qc_artifact = input.input_artifact_list()[0] size = float(qc_artifact.udf['Dx Fragmentlengte (bp)']) concentration = float( qc_artifact.udf['Dx Concentratie fluorescentie (ng/ul)']) nM_dna = (concentration * 1000 * (1 / 660.0) * (1 / size)) * 1000 ul_sample = (nM_pool / nM_dna) * output_ul ul_EB = output_ul - ul_sample line = '{pool_name}\t{container}\t{well}\t{ul_sample:.2f}\t{ul_EB:.2f}\n'.format( pool_name=input.name, container=input.location[0].name, well=input.location[1], ul_sample=ul_sample, ul_EB=ul_EB) output.append((input_number, line)) for number, line in sorted(output): output_file.write(line)
def main(lims, args, epp_logger): p = Process(lims, id=args.pid) udf_check = "Conc. Units" value_check = "ng/ul" udf_factor1 = "Concentration" udf_factor2 = "Volume (ul)" result_udf = "Amount (ng)" if args.aggregate: artifacts = p.all_inputs(unique=True) else: all_artifacts = p.all_outputs(unique=True) artifacts = filter(lambda a: a.output_type == "ResultFile", all_artifacts) correct_artifacts, wrong_factor1 = check_udf_is_defined(artifacts, udf_factor1) correct_artifacts, wrong_factor2 = check_udf_is_defined(correct_artifacts, udf_factor2) correct_artifacts, wrong_value = check_udf_has_value(correct_artifacts, udf_check, value_check) if correct_artifacts: apply_calculations(lims, correct_artifacts, udf_factor1, "*", udf_factor2, result_udf, epp_logger, p) d = {"ca": len(correct_artifacts), "ia": len(wrong_factor1) + len(wrong_factor2) + len(wrong_value)} abstract = ( "Updated {ca} artifact(s), skipped {ia} artifact(s) with " "wrong and/or blank values for some udfs." ).format(**d) print >>sys.stderr, abstract # stderr will be logged and printed in GUI
def sequencing_run(lims, email_settings, process_id): process = Process(lims, id=process_id) artifact = process.all_inputs()[0] subject = "LIMS QC Controle - {0}".format(artifact.name) message = "Sequencing Run: {0}\n".format(artifact.name) message += "Technician: {0}\n".format(process.technician.name) message += "LIMS Next Action: {0}\n\n".format( process.step.actions.next_actions[0]['action']) message += "UDF - Conversie rapport OK?: {0}\n".format( process.udf['Conversie rapport OK?']) if 'Fouten registratie (uitleg)' in process.udf: message += "UDF - Fouten registratie (uitleg): {0}\n".format( process.udf['Fouten registratie (uitleg)']) if 'Fouten registratie (oorzaak)' in process.udf: message += "UDF - Fouten registratie (oorzaak): {0}\n".format( process.udf['Fouten registratie (uitleg)']) if process.step.actions.escalation: message += "\nManager Review LIMS:\n" message += "{0}: {1}\n".format( process.step.actions.escalation['author'].name, process.step.actions.escalation['request']) message += "{0}: {1}\n".format( process.step.actions.escalation['reviewer'].name, process.step.actions.escalation['answer']) send_email(email_settings['server'], email_settings['from'], email_settings['to_sequencing_run_complete'], subject, message)
def main(lims, args): p = Process(lims, id=args.pid) log = [] datamap = {} wsname = None username = "******".format(p.technician.first_name, p.technician.last_name) user_email = p.technician.email for art in p.all_inputs(): if len(art.samples) != 1: log.append( "Warning : artifact {0} has more than one sample".format( art.id)) for sample in art.samples: #take care of lamda DNA if sample.project: if sample.project.id not in datamap: datamap[sample.project.id] = [sample.name] else: datamap[sample.project.id].append(sample.name) for art in p.all_outputs(): try: wsname = art.location[0].name break except: pass now = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") for pid in datamap: pj = Project(lims, id=pid) if len(datamap[pid]) > 1: rnt = "{0} samples planned for {1}".format(len(datamap[pid]), wsname) else: rnt = "{0} sample planned for {1}".format(len(datamap[pid]), wsname) running_note = { "note": rnt, "user": username, "email": user_email, "category": "Workset" } write_note_to_couch(pid, now, running_note, lims.get_uri()) log.append( "Updated project {0} : {1}, {2} samples in this workset".format( pid, pj.name, len(datamap[pid]))) with open("EPP_Notes.log", "w") as flog: flog.write("\n".join(log)) for out in p.all_outputs(): #attach the log file if out.name == "RNotes Log": attach_file(os.path.join(os.getcwd(), "EPP_Notes.log"), out) sys.stderr.write("Updated {0} projects successfully".format( len(list(datamap.keys()))))
def main(lims, args): process = Process(lims, id=args.p) duplicates = get_duplicate_samples(process.all_inputs()) if duplicates: sys.exit('Samples: ' + ', '.join(duplicates) + ' appeared more than once in this step.') else: print >> sys.stderr, 'No duplicated samples!'
def unpooling(lims, process_id): """Unpool samples after sequencing.""" process = Process(lims, id=process_id) pool_artifact = process.all_inputs()[0] pool_artifact_parent_process = pool_artifact.parent_process pool_artifact_demux = lims.get(pool_artifact.uri + '/demux') run_id = pool_artifact.name # Assume run id is set as pool name using placement/artifact/set_runid_name sample_artifacts = [] # sample artifacts before pooling sample_projects = {} for artifact in pool_artifact_parent_process.result_files(): if (artifact.name == 'SampleSheet csv' or artifact.name == 'Sample Sheet') and artifact.files: file_id = artifact.files[0].id sample_sheet = lims.get_file_contents(id=file_id) project_index = None sample_index = None for line in sample_sheet.split('\n'): data = line.rstrip().split(',') if 'Sample_Project' in data and 'Sample_ID' in data: project_index = data.index('Sample_Project') sample_index = data.index('Sample_ID') elif project_index and len(data) >= project_index: sample_projects[data[sample_index]] = data[project_index] for node in pool_artifact_demux.getiterator('artifact'): if node.find('samples'): if len(node.find('samples').findall('sample')) == 1: sample_artifact = Artifact(lims, uri=node.attrib['uri']) sample = sample_artifact.samples[0] # 1 sample per artifact. # Get sample sequencing run and project from samplesheet sample_artifact.udf['Dx Sequencing Run ID'] = run_id if 'Sample Type' in sample.udf and 'library' in sample.udf[ 'Sample Type']: # Use sample.name for external (clarity_portal) samples sample_artifact.udf[ 'Dx Sequencing Run Project'] = sample_projects[ sample.name] else: # Use sample_artifact.name for Dx samples (upload via Helix) sample_artifact.udf[ 'Dx Sequencing Run Project'] = sample_projects[ sample_artifact.name] sample_artifact.put() if sample_artifact.samples[ 0].project and sample_artifact.samples[0].project.udf[ 'Application'] == 'DX': # Only move DX production samples to post sequencing workflow sample_artifacts.append(sample_artifact) lims.route_artifacts(sample_artifacts, workflow_uri=Workflow( lims, id=config.post_sequencing_workflow).uri)
def samplesheet_normalization(lims, process_id, output_file): """Create manual pipetting samplesheet for normalizing (MIP) samples.""" output_file.write( 'Sample\tConcentration (ng/ul)\tVolume sample (ul)\tVolume water (ul)\tOutput (ng)\tIndampen\n' ) process = Process(lims, id=process_id) # Find all QC process types qc_process_types = clarity_epp.export.utils.get_process_types( lims, ['Dx Qubit QC', 'Dx Tecan Spark 10M QC']) for input_artifact in process.all_inputs(resolve=True): artifact = process.outputs_per_input( input_artifact.id, Analyte=True)[0] # assume one artifact per input sample = input_artifact.samples[ 0] # asume one sample per input artifact # Find last qc process for artifact qc_process = sorted( lims.get_processes(type=qc_process_types, inputartifactlimsid=input_artifact.id), key=lambda process: int(process.id.split('-')[-1]))[-1] # Find concentration measurement for qc_artifact in qc_process.outputs_per_input(input_artifact.id): if qc_artifact.name.split(' ')[0] == artifact.name: concentration = float( qc_artifact.udf['Dx Concentratie fluorescentie (ng/ul)']) final_volume = float(artifact.udf['Dx Eindvolume (ul)']) input_ng = float(artifact.udf['Dx Input (ng)']) if 'Dx pipetteervolume (ul)' in artifact.udf: input_ng = concentration * float( artifact.udf['Dx pipetteervolume (ul)']) sample_volume = input_ng / concentration water_volume = final_volume - sample_volume evaporate = 'N' if sample_volume < 0.5: sample_volume = 0.5 water_volume = final_volume - sample_volume elif sample_volume > final_volume: evaporate = 'J' water_volume = 0 output_file.write( '{sample}\t{concentration:.1f}\t{sample_volume:.1f}\t{water_volume:.1f}\t{output:.1f}\t{evaporate}\n' .format(sample=sample.name, concentration=concentration, sample_volume=sample_volume, water_volume=water_volume, output=input_ng, evaporate=evaporate))
def main(lims, args): p=Process(lims, id=args.pid) log=[] datamap={} wsname=None username="******".format(p.technician.first_name, p.technician.last_name) user_email=p.technician.email for art in p.all_inputs(): if len(art.samples)!=1: log.append("Warning : artifact {0} has more than one sample".format(art.id)) for sample in art.samples: #take care of lamda DNA if sample.project: if sample.project.id not in datamap: datamap[sample.project.id]=[sample.name] else: datamap[sample.project.id].append(sample.name) for art in p.all_outputs(): try: wsname=art.location[0].name break except: pass now=datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") for pid in datamap: pj=Project(lims, id=pid) running_notes=json.loads(pj.udf['Running Notes']) if len(datamap[pid]) > 1: rnt="{0} samples planned for {1}".format(len(datamap[pid]), wsname) else: rnt="{0} sample planned for {1}".format(len(datamap[pid]), wsname) running_notes[now]={"note": rnt, "user" : username, "email":user_email, "category":"Workset"} pj.udf['Running Notes']=json.dumps(running_notes) pj.put() log.append("Updated project {0} : {1}, {2} samples in this workset".format(pid,pj.name, len(datamap[pid]))) with open("EPP_Notes.log", "w") as flog: flog.write("\n".join(log)) for out in p.all_outputs(): #attach the log file if out.name=="RNotes Log": attach_file(os.path.join(os.getcwd(), "EPP_Notes.log"), out) sys.stderr.write("Updated {0} projects successfully".format(len(datamap.keys())))
def get(self): data = {} lims_url = self.request.query lims_id = "24-{}".format(lims_url.split("/")[-1]) mylims = lims.Lims(BASEURI, USERNAME, PASSWORD) try: p = Process(mylims, id=lims_id) if p.type.name != 'Setup Workset/Plate': raise Exception("Wrong process type") except: self.set_status( 400, reason="Wrong process type : use a Setup Workset/Plate") self.finish() data['comments'] = {} data['samples'] = {} for i in p.all_inputs(): sample_name = i.samples[0].name if not i.samples[0].project: continue else: project = i.samples[0].project if 'Project Comment' in project.udf and project.id not in data[ 'comments']: data['comments'][project.id] = project.udf['Project Comment'] data['samples'][sample_name] = {} data['samples'][sample_name]['amount'] = i.udf['Amount (ng)'] data['samples'][sample_name]['previous_preps'] = {} if 'Library construction method' in project.udf: data['samples'][sample_name]['lib_method'] = project.udf[ 'Library construction method'] if 'Sequencing platform' in project.udf: data['samples'][sample_name]['seq_pl'] = project.udf[ 'Sequencing platform'] other_preps = mylims.get_processes(inputartifactlimsid=i.id, type="Setup Workset/Plate") for op in other_preps: if op.id != p.id: for o in op.all_outputs(): if o.type == "Analyte" and o.samples[ 0].name == sample_name: data['samples'][sample_name]['previous_preps'][ o.location[0].name] = {} data['samples'][sample_name]['previous_preps'][ o.location[0].name]['position'] = o.location[1] data['samples'][sample_name]['previous_preps'][ o.location[0]. name]['amount'] = o.udf['Amount taken (ng)'] self.set_header("Content-type", "application/json") self.write(json.dumps(data))
def samplesheet_dilute(lims, process_id, output_file): """Create Caliper samplesheet for diluting samples.""" output_file.write( 'Monsternummer\tPlate_Id_input\tWell\tPlate_Id_output\tPipetteervolume DNA (ul)\tPipetteervolume H2O (ul)\n' ) process = Process(lims, id=process_id) output = {} # save output data to dict, to be able to sort on well. nM_pool = process.udf['Dx Pool verdunning (nM)'] output_ul = process.udf['Eindvolume (ul)'] for input_artifact in process.all_inputs(): output_artifact = process.outputs_per_input(input_artifact.id, Analyte=True)[0] # Get QC stats size = float(input_artifact.udf['Dx Fragmentlengte (bp)']) concentration = float( input_artifact.udf['Dx Concentratie fluorescentie (ng/ul)']) # Calculate dilution nM_dna = (concentration * 1000 * (1 / 660.0) * (1 / size)) * 1000 ul_sample = (nM_pool / nM_dna) * output_ul ul_water = output_ul - ul_sample # Store output lines by well well = ''.join(input_artifact.location[1].split(':')) output[ well] = '{name}\t{plate_id_input}\t{well}\t{plate_id_output}\t{volume_dna:.1f}\t{volume_water:.1f}\n'.format( name=input_artifact.name, plate_id_input=input_artifact.location[0].name, well=well, plate_id_output=output_artifact.location[0].name, volume_dna=ul_sample, volume_water=ul_water) wells = [] for col in range(1, 13): wells.extend([ '{}{}'.format(row, str(col)) for row in string.ascii_uppercase[:8] ]) for well in wells: if well in output: output_file.write(output[well]) else: output_file.write( 'Leeg\tNone\t{well}\t{plate_id_output}\t0\t0\n'.format( well=well, plate_id_output=output_artifact.location[0].name, ))
def main(lims, args, epp_logger): p = Process(lims, id=args.pid) target_concentration = float(args.targetConcentration) target_volume = float(args.targetVolume) threshold_conc_no_normalize = float(args.thresholdConcNoNormalize) with open(args.newCsvFilename, 'w', newline='') as csvfile: pass samples_in = p.all_inputs(unique=True) well_re = re.compile("([A-Z]):*([0-9]{1,2})") samples_in.sort( key=lambda sample: sort_samples_columnwise(sample, well_re) ) # wrap the call in a lambda to be able to pass in the regex if args.concOnOutput: samples = [find_output_artifact(s.name, p) for s in samples_in] # required for the WGS step else: samples = samples_in print(samples) for i, sample in enumerate(samples): if not args.concOnOutput and sample.type != "Analyte": # if 16S, only work on analytes (not result files) # but WGS should work on result files continue concentration = get_udf_if_exists(sample, args.concentrationUDF, default=None) if concentration is not None: concentration = float(concentration) sample_required, water_required = calculate_volumes_required( concentration, target_concentration, target_volume, threshold_conc_no_normalize, is_control(sample.name)) sample_required = format_volume(sample_required) water_required = format_volume(water_required) else: raise RuntimeError("Could not find UDF '%s' of sample '%s'" % (args.concentrationUDF, sample.name)) well = samples_in[i].location[1].split(':') well = ''.join(well) with open(args.newCsvFilename, 'a') as csvfile: csv_writer = csv.writer(csvfile, delimiter='\t') csv_writer.writerow([well, water_required, sample_required])
def samplesheet_pool_magnis_pools(lims, process_id, output_file): """Create manual pipetting samplesheet for pooling magnis pools. Correct for pools with < 8 samples""" process = Process(lims, id=process_id) # print header output_file.write('Pool\tContainer\tSample count\tVolume (ul)\n') # Get input pools, sort by name and print volume for input_artifact in sorted(process.all_inputs(resolve=True), key=lambda artifact: artifact.name): output_file.write( '{pool}\t{container}\t{sample_count}\t{volume}\n'.format( pool=input_artifact.name, container=input_artifact.container.name, sample_count=len(input_artifact.samples), volume=len(input_artifact.samples) * 1.25))
def samplesheet_multiplex_sequence_pool(lims, process_id, output_file): """Create manual pipetting samplesheet for multiplex sequence pools.""" process = Process(lims, id=process_id) input_pools = [] total_sample_count = 0 total_load_uL = 0 for input_pool in process.all_inputs(): input_pool_conc = float( input_pool.udf['Dx Concentratie fluorescentie (ng/ul)']) input_pool_size = float(input_pool.udf['Dx Fragmentlengte (bp)']) input_pool_nM = (input_pool_conc * 1000 * (1.0 / 660.0) * (1 / input_pool_size)) * 1000 input_pool_pM = (input_pool_nM * 1000) / 5 input_pool_sample_count = 0 for sample in input_pool.samples: if 'Dx Exoomequivalent' in sample.udf: input_pool_sample_count += sample.udf['Dx Exoomequivalent'] else: input_pool_sample_count += 1 total_sample_count += input_pool_sample_count input_pools.append({ 'name': input_pool.name, 'nM': input_pool_nM, 'pM': input_pool_pM, 'sample_count': input_pool_sample_count }) # print header output_file.write('Naam\tuL\n') # Last calcuations and print sample for input_pool in input_pools: input_pool_load_pM = (float(process.udf['Dx Laadconcentratie (pM)']) / total_sample_count) * input_pool['sample_count'] input_pool_load_uL = 150.0 / (input_pool['pM'] / input_pool_load_pM) total_load_uL += input_pool_load_uL output_file.write('{0}\t{1:.2f}\n'.format(input_pool['name'], input_pool_load_uL)) tris_HCL_uL = 150 - total_load_uL output_file.write('{0}\t{1:.2f}\n'.format('Tris-HCL', tris_HCL_uL))
def set_runid_name(lims, process_id): """Change artifact name to run id.""" process = Process(lims, id=process_id) analyte = process.analytes()[0][0] input_artifact = process.all_inputs()[0] container_name = analyte.container.name # Find sequencing process # Assume one sequence process per input artifact for sequence_process_type in config.sequence_process_types: sequence_processes = lims.get_processes( type=sequence_process_type, inputartifactlimsid=input_artifact.id) for sequence_process in sequence_processes: if sequence_process.analytes( )[0][0].container.name == container_name: analyte.name = sequence_process.udf['Run ID'] analyte.put()
def get(self): data={} lims_url=self.request.query lims_id="24-{}".format(lims_url.split("/")[-1]) mylims = lims.Lims(BASEURI, USERNAME, PASSWORD) try: p=Process(mylims, id=lims_id) if p.type.name!='Setup Workset/Plate': raise Exception("Wrong process type") except: self.set_status(400, reason="Wrong process type : use a Setup Workset/Plate") self.finish() data['comments']={} data['samples']={} for i in p.all_inputs(): sample_name=i.samples[0].name if not i.samples[0].project: continue else: project=i.samples[0].project if 'Project Comment' in project.udf and project.id not in data['comments']: data['comments'][project.id]=project.udf['Project Comment'] data['samples'][sample_name]={} data['samples'][sample_name]['amount']=i.udf['Amount (ng)'] data['samples'][sample_name]['previous_preps']={} if 'Library construction method' in project.udf: data['samples'][sample_name]['lib_method']=project.udf['Library construction method'] if 'Sequencing platform' in project.udf: data['samples'][sample_name]['seq_pl']=project.udf['Sequencing platform'] other_preps=mylims.get_processes(inputartifactlimsid=i.id, type="Setup Workset/Plate") for op in other_preps: if op.id != p.id: for o in op.all_outputs(): if o.type=="Analyte" and o.samples[0].name==sample_name: data['samples'][sample_name]['previous_preps'][o.location[0].name]={} data['samples'][sample_name]['previous_preps'][o.location[0].name]['position']=o.location[1] data['samples'][sample_name]['previous_preps'][o.location[0].name]['amount']=o.udf['Amount taken (ng)'] self.set_header("Content-type", "application/json") self.write(json.dumps(data))
def main(lims, args, epp_logger): p = Process(lims, id=args.pid) if p.type.name == 'Aggregate QC (Library Validation) 4.0': udf_check = 'Conc. Units' value_check = ['nM', 'pM'] udf_factor1 = 'Concentration' udf_factor2 = 'Volume (ul)' result_udf = 'Amount (fmol)' else: udf_check = 'Conc. Units' value_check = ['ng/ul', 'ng/uL'] udf_factor1 = 'Concentration' udf_factor2 = 'Volume (ul)' result_udf = 'Amount (ng)' if args.aggregate: artifacts = p.all_inputs(unique=True) else: all_artifacts = p.all_outputs(unique=True) artifacts = [a for a in all_artifacts if a.output_type == "ResultFile"] correct_artifacts, wrong_factor1 = check_udf_is_defined( artifacts, udf_factor1) correct_artifacts, wrong_factor2 = check_udf_is_defined( correct_artifacts, udf_factor2) correct_artifacts, wrong_value = check_udf_has_value( correct_artifacts, udf_check, value_check) if correct_artifacts: apply_calculations(lims, correct_artifacts, udf_factor1, '*', udf_factor2, result_udf, epp_logger, p) d = { 'ca': len(correct_artifacts), 'ia': len(wrong_factor1) + len(wrong_factor2) + len(wrong_value) } abstract = ("Updated {ca} artifact(s), skipped {ia} artifact(s) with " "wrong and/or blank values for some udfs.").format(**d) print(abstract, file=sys.stderr) # stderr will be logged and printed in GUI
def samplesheet_mip_pool_dilution(lims, process_id, output_file): """Create manual pipetting samplesheet for smMIP pool dilution""" process = Process(lims, id=process_id) # Write header output_file.write( '{sample}\t{ul_sample_10}\t{ul_EB_10}\t{ul_sample_20}\t{ul_EB_20}\t{ul_sample_40}\t{ul_EB_40}\t\n' .format( sample='Sample', ul_sample_10='ul Sample (10 ul)', ul_EB_10='ul EB buffer (10 ul)', ul_sample_20='ul Sample (20 ul)', ul_EB_20='ul EB buffer (20 ul)', ul_sample_40='ul Sample (40 ul)', ul_EB_40='ul EB buffer (40 ul)', )) for input_artifact in process.all_inputs(resolve=True): concentration = float( input_artifact.udf['Dx Concentratie fluorescentie (ng/ul)']) fragment_length = float(input_artifact.udf['Dx Fragmentlengte (bp)']) dna = (concentration * (10.0**3.0 / 1.0) * (1.0 / 649.0) * (1.0 / fragment_length)) * 1000.0 ul_sample = 2 / dna * 10 ul_EB = 10 - ul_sample output_file.write( '{sample}\t{ul_sample_10:.2f}\t{ul_EB_10:.2f}\t{ul_sample_20:.2f}\t{ul_EB_20:.2f}\t{ul_sample_40:.2f}\t{ul_EB_40:.2f}\t\n' .format( sample=input_artifact.name, ul_sample_10=ul_sample, ul_EB_10=ul_EB, ul_sample_20=ul_sample * 2, ul_EB_20=ul_EB * 2, ul_sample_40=ul_sample * 4, ul_EB_40=ul_EB * 4, ))
def main(lims, args, epp_logger): p = Process(lims, id=args.pid) with open(args.newCsvFilename, 'w', newline='') as csvfile: pass # the well location information is on the input samples, well_re = re.compile("([A-Z]):*([0-9]{1,2})") samples_in = p.all_inputs(unique=True) samples_in.sort( key=lambda sample: sort_samples_columnwise(sample, well_re) ) # wrap the call in a lambda to be able to pass in the regex if args.concOnOutput: samples = [find_output_artifact(s.name, p) for s in samples_in] # required in the WGS step else: samples = samples_in for i, sample in enumerate(samples): concentration = get_udf_if_exists(sample, args.concUdf, default=None) if concentration is not None: concentration = float(concentration) else: raise RuntimeError("Could not find UDF '%s' of sample '%s'" % (args.concUdf, sample.name)) if concentration == 0.0: concentration = 0.01 well = samples_in[i].location[1].split(':') well = ''.join(well) with open(args.newCsvFilename, 'a') as csvfile: csv_writer = csv.writer(csvfile, delimiter=';') csv_writer.writerow([well, concentration])
def main(lims, args, epp_logger): p = Process(lims, id=args.pid) udf_check = 'Conc. Units' value_check = 'ng/ul' udf_factor1 = 'Concentration' udf_factor2 = 'Volume (ul)' result_udf = 'Amount (ng)' if args.aggregate: artifacts = p.all_inputs(unique=True) else: all_artifacts = p.all_outputs(unique=True) artifacts = filter(lambda a: a.output_type == "ResultFile", all_artifacts) correct_artifacts, wrong_factor1 = check_udf_is_defined( artifacts, udf_factor1) correct_artifacts, wrong_factor2 = check_udf_is_defined( correct_artifacts, udf_factor2) correct_artifacts, wrong_value = check_udf_has_value( correct_artifacts, udf_check, value_check) if correct_artifacts: apply_calculations(lims, correct_artifacts, udf_factor1, '*', udf_factor2, result_udf, epp_logger, p) d = { 'ca': len(correct_artifacts), 'ia': len(wrong_factor1) + len(wrong_factor2) + len(wrong_value) } abstract = ("Updated {ca} artifact(s), skipped {ia} artifact(s) with " "wrong and/or blank values for some udfs.").format(**d) print >> sys.stderr, abstract # stderr will be logged and printed in GUI
def process_samples(lims_process: Process) -> Generator[str, None, None]: """Retrieve LIMS input samples from a process.""" for lims_artifact in lims_process.all_inputs(): for lims_sample in lims_artifact.samples: yield lims_sample.id
def helix_magnis(lims, process_id, output_file): """Export workflow information in helix table format (Magnis Workflow).""" output_file.write(( "meet_id\twerklijst_nummer\tonderzoeknr\tmonsternummer\tZuivering OK?\tZuivering herh?\tSampleprep OK?\t" "Sampleprep herh?\tSequencen OK?\tSequencen herh?\tBfx analyse OK?\tSamplenaam\tvcf-file\tCNV vcf-file\n" )) process = Process(lims, id=process_id) for artifact in process.all_inputs(): for sample in artifact.samples: if 'Dx Werklijstnummer' in sample.udf: # Only check samples with a 'Werklijstnummer' sample_artifacts = lims.get_artifacts(samplelimsid=sample.id, type='Analyte') # Filter artifacts without parent_process sample_artifacts = [ sample_artifact for sample_artifact in sample_artifacts if sample_artifact.parent_process ] # Sort artifact by parent process id sample_artifacts = sorted( sample_artifacts, key=lambda artifact: int( artifact.parent_process.id.split('-')[-1])) sample_all_processes = {} # reset after 'Dx Sample registratie zuivering' process -> this is a new import from helix, should not be counted as a repeat sample_filter_processes = {} for sample_artifact in sample_artifacts: if 'Dx Sample registratie zuivering' in sample_artifact.parent_process.type.name: sample_filter_processes = { } # reset after new helix import process_id = sample_artifact.parent_process.id process_name = sample_artifact.parent_process.type.name if process_name in sample_all_processes: sample_all_processes[process_name].add(process_id) else: sample_all_processes[process_name] = set([process_id]) if process_name in sample_filter_processes: sample_filter_processes[process_name].add(process_id) else: sample_filter_processes[process_name] = set( [process_id]) # Determine meetw repeat_cutoff = len( sample.udf['Dx Werklijstnummer'].split(';')) * 2 meetw_zui, meetw_zui_herh = determin_meetw( config.meetw_zui_processes, sample_all_processes, repeat_cutoff) meetw_sampleprep, meetw_sampleprep_herh = determin_meetw( config.meetw_sampleprep_processes, sample_filter_processes, 2) meetw_seq, meetw_seq_herh = determin_meetw( config.meetw_seq_processes, sample_filter_processes, 2) # Determine vcf files gatk_vcf = '' exomedepth_vcf = '' if 'Dx GATK vcf' in artifact.udf: gatk_vcf = artifact.udf['Dx GATK vcf'] if 'Dx ExomeDepth vcf' in artifact.udf: exomedepth_vcf = artifact.udf['Dx ExomeDepth vcf'] output_file.write(( "{meet_id}\t{werklijst}\t{onderzoeksnummer}\t{monsternummer}\t{meetw_zui}\t{meetw_zui_herh}\t" "{meetw_sampleprep}\t{meetw_sampleprep_herh}\t{meetw_seq}\t{meetw_seq_herh}\t{meetw_bfx}\t" "{sample_name}\t{vcf_file}\t{cnv_vcf_file}\n" ).format( meet_id=sample.udf['Dx Meet ID'].split(';')[0], werklijst=sample.udf['Dx Werklijstnummer'].split(';')[0], onderzoeksnummer=sample.udf['Dx Onderzoeknummer'].split( ';')[0], monsternummer=sample.udf['Dx Monsternummer'], meetw_zui=meetw_zui, meetw_zui_herh=meetw_zui_herh, meetw_sampleprep=meetw_sampleprep, meetw_sampleprep_herh=meetw_sampleprep_herh, meetw_seq=meetw_seq, meetw_seq_herh=meetw_seq_herh, meetw_bfx='J', sample_name=get_sequence_name(sample), vcf_file=gatk_vcf, cnv_vcf_file=exomedepth_vcf, ))
def main(lims, args, epp_logger): p = Process(lims, id=args.pid) new_workbook = xlwt.Workbook() new_sheet = new_workbook.add_sheet('Sheet 1') for col, heading in enumerate(fields.keys()): new_sheet.write(0, col, heading) well_re = re.compile("([A-Z]):*([0-9]{1,2})") artifacts = p.all_inputs(unique=True) artifacts.sort( key=lambda sample: sort_samples_columnwise(sample, well_re) ) # wrap the call in a lambda to be able to pass in the regex if args.udfsOnOutput: outputs = [find_output_artifact(s.name, p) for s in artifacts] # required for the WGS step for i, artifact in enumerate(artifacts): sample = artifact.samples[0] # the original, submitted sample fields["Sample Name"] = artifact.name fields["Original DNA Plate LIMS ID"] = "" if artifact.location: fields["Container Name"] = artifact.location[0].name fields["Well"] = artifact.location[1] else: fields["Container Name"] = "Unknown Container" fields["Well"] = "Unknown Well" if sample.project: fields["Project"] = sample.project.name else: fields["Project"] = "" fields["Sample Origin"] = get_udf_if_exists(sample, "Sample Origin") fields["Sample Buffer"] = get_udf_if_exists(sample, "Sample Buffer") fields["Indexes"] = artifact.reagent_labels fields["PCR Method"] = "" if args.udfsOnOutput: udf_sample = outputs[ i] # use the equivalent output of the sample to find the UDF measurement else: udf_sample = artifact fields["QuantIt HS Concentration"] = get_udf_if_exists( udf_sample, "QuantIt HS Concentration") fields["QuantIt BR Concentration"] = get_udf_if_exists( udf_sample, "QuantIt BR Concentration") fields["Qubit Concentration"] = get_udf_if_exists( udf_sample, "Qubit Concentration") fields["Chosen Concentration"] = get_udf_if_exists( udf_sample, "Concentration") fields["QuantIt HS Concentration (nM)"] = get_udf_if_exists( udf_sample, "QuantIt HS Concentration (nM)") fields["QuantIt BR Concentration (nM)"] = get_udf_if_exists( udf_sample, "QuantIt BR Concentration (nM)") fields["Qubit Concentration (nM)"] = get_udf_if_exists( udf_sample, "Qubit Concentration (nM)") fields["Chosen Concentration (nM)"] = get_udf_if_exists( udf_sample, "Concentration (nM)") #for col, field in enumerate([sample_name, container, well, conc_hs, conc_br, conc_qb, conc_chosen]): for col, field in enumerate(fields.values()): style = get_field_style(field, float(args.redTextConcThreshold), float(args.orangeTextConcThreshold)) new_sheet.write(i + 1, col, field, style) new_workbook.save(args.outputFile)
def update_samplesheet(lims, process_id, artifact_id, output_file): """Update illumina samplesheet.""" process = Process(lims, id=process_id) trim_last_base = True # Used to set Read1EndWithCycle def get_project(projects, urgent=False): """Inner function to get a project name for samples.""" if urgent: # Sort projects for urgent samples on name projects = sorted(projects.items(), key=operator.itemgetter(0)) for project in projects: if project[1] < 9: return project[0] # return first project with < 9 samples else: # Sort projects for other samples on number of samples projects = sorted(projects.items(), key=operator.itemgetter(1)) return projects[0][0] # return project with least amount of samples. # Parse families families = {} for artifact in process.all_inputs(): for sample in artifact.samples: if ( 'Dx Familienummer' in list(sample.udf) and 'Dx NICU Spoed' in list(sample.udf) and 'Dx Protocolomschrijving' in list(sample.udf) ): # Dx production sample family = sample.udf['Dx Familienummer'] # Create family if not exist if family not in families: families[family] = { 'samples': [], 'NICU': False, 'project_type': 'unknown_project', 'split_project_type': False, 'urgent': False, 'deviating': False # merge, deep sequencing (5x), etc samples } # Update family information if sample.udf['Dx Onderzoeksreden'] == 'Research': # Dx research sample for onderzoeksindicatie in config.research_onderzoeksindicatie_project: if sample.udf['Dx Onderzoeksindicatie'] == onderzoeksindicatie: project_type = config.research_onderzoeksindicatie_project[onderzoeksindicatie] families[family]['project_type'] = project_type families[family]['split_project_type'] = False break else: # Dx clinic sample newest_protocol = sample.udf['Dx Protocolomschrijving'].split(';')[0] if 'SNP fingerprint MIP' in newest_protocol and not families[family]['NICU']: project_type = 'Fingerprint' families[family]['project_type'] = project_type families[family]['split_project_type'] = False trim_last_base = False elif 'PID09.V7_smMIP' in newest_protocol and not families[family]['NICU']: project_type = 'ERARE' families[family]['project_type'] = project_type families[family]['split_project_type'] = False trim_last_base = False elif sample.udf['Dx NICU Spoed']: families[family]['NICU'] = True project_type = 'NICU_{0}'.format(sample.udf['Dx Familienummer']) families[family]['project_type'] = project_type families[family]['split_project_type'] = False elif 'elidS30409818' in newest_protocol and not families[family]['NICU']: project_type = 'CREv2' families[family]['project_type'] = project_type families[family]['split_project_type'] = True elif 'elidS31285117' in newest_protocol and not families[family]['NICU']: project_type = 'SSv7' families[family]['project_type'] = project_type families[family]['split_project_type'] = True # Set urgent status if 'Dx Spoed' in list(sample.udf) and sample.udf['Dx Spoed']: families[family]['urgent'] = True # Set deviating status, remove urgent status if deviating if ( ('Dx Mergen' in list(sample.udf) and sample.udf['Dx Mergen']) or ('Dx Exoomequivalent' in list(sample.udf) and sample.udf['Dx Exoomequivalent'] > 1) ): families[family]['deviating'] = True families[family]['urgent'] = False else: # Other samples if 'GIAB' in sample.name.upper() and not sample.project: # GIAB control samples family = 'GIAB' else: family = sample.project.name # Remove 'dx' (ignore case) and strip leading space or _ family = re.sub('^dx[ _]*', '', family, flags=re.IGNORECASE) if family not in families: families[family] = { 'samples': [], 'NICU': False, 'project_type': family, 'split_project_type': False, 'urgent': False, 'deviating': False } # Add sample to family families[family]['samples'].append(sample) # Get all project types and count samples project_types = {} for family in families.values(): if family['project_type'] in project_types: project_types[family['project_type']]['sample_count'] += len(family['samples']) else: project_types[family['project_type']] = { 'sample_count': len(family['samples']), 'projects': {}, 'split_project_type': family['split_project_type'] } # Define projects per project_type for project_type in project_types: project_types[project_type]['index'] = 0 if project_types[project_type]['split_project_type']: for i in range(0, int(project_types[project_type]['sample_count']/9+1)): project_types[project_type]['projects']['{0}_{1}'.format(project_type, i+1)] = 0 else: project_types[project_type]['projects'][project_type] = 0 # Set sample projects sample_projects = {} sample_sequence_names = {} # Urgent families / samples, skip deviating for family in [family for family in families.values() if family['urgent'] and not family['deviating']]: family_project = get_project(project_types[family['project_type']]['projects'], urgent=True) for sample in family['samples']: sample_sequence_name = get_sequence_name(sample) sample_sequence_names[sample.name] = sample_sequence_name sample_projects[sample_sequence_name] = family_project project_types[family['project_type']]['projects'][family_project] += 1 # Deviating families / samples for family in [family for family in families.values() if family['deviating']]: family_project = get_project(project_types[family['project_type']]['projects']) for sample in family['samples']: sample_sequence_name = get_sequence_name(sample) sample_sequence_names[sample.name] = sample_sequence_name sample_projects[sample_sequence_name] = family_project project_types[family['project_type']]['projects'][family_project] += 1 # Non urgent and non deviating families / samples normal_families = [family for family in families.values() if not family['urgent'] and not family['deviating']] for family in sorted(normal_families, key=lambda fam: (len(fam['samples'])), reverse=True): family_project = get_project(project_types[family['project_type']]['projects']) for sample in family['samples']: sample_sequence_name = get_sequence_name(sample) sample_sequence_names[sample.name] = sample_sequence_name sample_projects[sample_sequence_name] = family_project project_types[family['project_type']]['projects'][family_project] += 1 # Check sequencer type # NextSeq runs need to reverse complement 'index2' for dual barcodes and 'index' for single barcodes. if 'nextseq' in process.type.name.lower(): nextseq_run = True else: nextseq_run = False # Edit clarity samplesheet sample_header = '' # empty until [data] section settings_section = False samplesheet_artifact = Artifact(lims, id=artifact_id) file_id = samplesheet_artifact.files[0].id for line in lims.get_file_contents(id=file_id).rstrip().split('\n'): if line.startswith('[Settings]') and trim_last_base: output_file.write('{line}\n'.format(line=line)) output_file.write('Read1EndWithCycle,{value}\n'.format(value=process.udf['Read 1 Cycles']-1)) output_file.write('Read2EndWithCycle,{value}\n'.format(value=process.udf['Read 2 Cycles']-1)) settings_section = True elif line.startswith('[Data]') and trim_last_base and not settings_section: output_file.write('[Settings]\n') output_file.write('Read1EndWithCycle,{value}\n'.format(value=process.udf['Read 1 Cycles']-1)) output_file.write('Read2EndWithCycle,{value}\n'.format(value=process.udf['Read 2 Cycles']-1)) output_file.write('{line}\n'.format(line=line)) elif line.startswith('Sample_ID'): # Samples header line sample_header = line.rstrip().split(',') sample_id_index = sample_header.index('Sample_ID') sample_name_index = sample_header.index('Sample_Name') sample_project_index = sample_header.index('Sample_Project') if 'index2' in sample_header: index_index = sample_header.index('index2') else: index_index = sample_header.index('index') output_file.write('{line}\n'.format(line=line)) elif sample_header: # Samples header seen, so continue with samples. data = line.rstrip().split(',') # Fix sample name -> use sequence name if data[sample_name_index] in sample_sequence_names: data[sample_name_index] = sample_sequence_names[data[sample_name_index]] # Set Sample_Project if data[sample_name_index] in sample_projects: data[sample_project_index] = sample_projects[data[sample_name_index]] # Overwrite Sample_ID with Sample_name to get correct conversion output folder structure data[sample_id_index] = data[sample_name_index] # Reverse complement index for NextSeq runs if nextseq_run: data[index_index] = clarity_epp.export.utils.reverse_complement(data[index_index]) output_file.write('{line}\n'.format(line=','.join(data))) else: # Leave other lines untouched. output_file.write('{line}\n'.format(line=line))
def samplesheet_multiplex_library_pool(lims, process_id, output_file): """Create manual pipetting samplesheet for multiplexing(pooling) samples.""" process = Process(lims, id=process_id) inputs = list(set(process.all_inputs())) outputs = list(set(process.all_outputs())) sample_concentration = {} sample_size = {} trio_statuses = {} ul_sample = {} ng_sample = {} udf_output = [] udf_ul_sample = {} udf_name_ul_sample = {} plate_id = {} well_id = {} pools_not_3 = [] order = [ 'A1', 'B1', 'C1', 'D1', 'E1', 'F1', 'G1', 'H1', 'A2', 'B2', 'C2', 'D2', 'E2', 'F2', 'G2', 'H2', 'A3', 'B3', 'C3', 'D3', 'E3', 'F3', 'G3', 'H3', 'A4', 'B4', 'C4', 'D4', 'E4', 'F4', 'G4', 'H4', 'A5', 'B5', 'C5', 'D5', 'E5', 'F5', 'G5', 'H5', 'A6', 'B6', 'C6', 'D6', 'E6', 'F6', 'G6', 'H6', 'A7', 'B7', 'C7', 'D7', 'E7', 'F7', 'G7', 'H7', 'A8', 'B8', 'C8', 'D8', 'E8', 'F8', 'G8', 'H8', 'A9', 'B9', 'C9', 'D9', 'E9', 'F9', 'G9', 'H9', 'A10', 'B10', 'C10', 'D10', 'E10', 'F10', 'G10', 'H10', 'A11', 'B11', 'C11', 'D11', 'E11', 'F11', 'G11', 'H11', 'A12', 'B12', 'C12', 'D12', 'E12', 'F12', 'G12', 'H12' ] order = dict(zip(order, range(len(order)))) well_order = {} sample_well_pool = [] # get input udfs 'Dx sample volume ul' and 'Dx Samplenaam' per output analyte for output in outputs: if output.type == 'Analyte': if 'Dx sample volume (ul)' in output.udf and 'Dx Samplenaam' in output.udf: udf_ul_sample[ output.name] = output.udf['Dx sample volume (ul)'] # if samplename is complete sequencename take only monsternummer if re.search(r'U\d{6}\D{2}', output.udf['Dx Samplenaam']): udf_name_ul_sample[ output.name] = output.udf['Dx Samplenaam'][9:] else: udf_name_ul_sample[ output.name] = output.udf['Dx Samplenaam'] udf_output.append(output.name) # get concentration, size, containername and well per input artifact for input in inputs: sample = input.samples[0] samplename = sample.name if 'Dx Concentratie fluorescentie (ng/ul)' in input.udf: measurement = input.udf['Dx Concentratie fluorescentie (ng/ul)'] qcflag = input.qc_flag if qcflag == 'UNKNOWN' or 'PASSED': sample_concentration[samplename] = measurement if 'Dx Fragmentlengte (bp)' in input.udf: measurement = input.udf['Dx Fragmentlengte (bp)'] qcflag = input.qc_flag if qcflag == 'UNKNOWN' or 'PASSED': sample_size[samplename] = measurement plate_id[samplename] = input.container.name placement = input.location[1] placement = ''.join(placement.split(':')) well_id[samplename] = placement well_order[sample.name] = order[placement] # get familystatus per sample in output analyte and determine trio composition if number of samples in pool = 3 for output in outputs: if output.type == 'Analyte': sample_given_ul = '' if len(output.samples) == 3: samplestatus = [] for sample in output.samples: # First check GIAB controls if 'CFGIAB' in sample.name.upper(): sample.udf['Dx Familie status'] = 'Kind' elif 'PFGIAB' in sample.name.upper( ) or 'PMGIAB' in sample.name.upper(): sample.udf['Dx Familie status'] = 'Ouder' if 'Dx Onderzoeksreden' in sample.udf and sample.udf[ 'Dx Onderzoeksreden'] == 'Research': samplestatus.append('Kind') else: samplestatus.append(sample.udf['Dx Familie status']) if samplestatus == ['Kind'] * 3 or samplestatus == ['Ouder' ] * 3: trio_statuses[output.name] = 'CCC' elif sorted(samplestatus) == ['Kind', 'Ouder', 'Ouder']: trio_statuses[output.name] = 'CPP' elif sorted(samplestatus) == ['Kind', 'Kind', 'Ouder']: trio_statuses[output.name] = 'CCP' # if udfs 'Dx sample volume ul' and 'Dx Samplenaam' are not empty change trio status and do pre-calculation if output.name in udf_output: trio_statuses[output.name] = 'adapted' for sample in output.samples: if sample.name == udf_name_ul_sample[output.name]: sample_given_ul = sample ng_sample[ sample. name] = library_dilution_calculator_fixed_volume( sample_concentration[sample.name], sample_size[sample.name], udf_ul_sample[output.name]) for sample in output.samples: if sample.name != udf_name_ul_sample[output.name]: ng_sample[ sample. name] = library_dilution_calculator_fixed_ng( sample_concentration[sample.name], sample_size[sample.name], sample.udf['Dx Familie status'], ng_sample[udf_name_ul_sample[output.name]], sample_given_ul.udf['Dx Familie status']) output.udf['Dx input pool (ng)'] = round( ng_sample[output.samples[0].name] + ng_sample[output.samples[1].name] + ng_sample[output.samples[2].name], 2) output.put() else: output.udf['Dx input pool (ng)'] = 750 output.put() # if number of samples in pool is not 3 set trio status and prepare error warning output file else: trio_statuses[output.name] = 'not_3' pools_not_3.append(output.name) # calculation if udfs 'Dx sample volume ul' and 'Dx Samplenaam' are empty and not empty if not sample_given_ul: for sample in output.samples: if 'Dx Onderzoeksreden' in sample.udf and sample.udf[ 'Dx Onderzoeksreden'] == 'Research': sample_pedigree = 'Kind' else: sample_pedigree = sample.udf['Dx Familie status'] ul_sample[sample.name] = library_dilution_calculator( concentration=sample_concentration[sample.name], size=sample_size[sample.name], trio=trio_statuses[output.name], pedigree=sample_pedigree, ng=0) else: for sample in output.samples: if sample.udf['Dx Onderzoeksreden'] == 'Research': sample_pedigree = 'Kind' else: sample_pedigree = sample.udf['Dx Familie status'] ul_sample[sample.name] = library_dilution_calculator( concentration=sample_concentration[sample.name], size=sample_size[sample.name], trio=trio_statuses[output.name], pedigree=sample_pedigree, ng=ng_sample[sample.name]) # sorting pools then wells for output file sort_pool_name = output.name if re.search(r'#\d_', sort_pool_name): sort_pool_name = re.sub('#', '#0', sort_pool_name) for sample in output.samples: sample_well_pool.append([ sample, well_order[sample.name], sort_pool_name, output.name ]) sorted_samples = sorted(sample_well_pool, key=lambda sample: (sample[2], sample[1])) # write output file per output analyte sorted on pool number output_file.write('Sample\tul Sample\tPlaat_id\twell_id\tpool\n') if pools_not_3: output_file.write( 'De volgende pool(s) hebben een ander aantal samples dan 3: {pools}\n' .format(pools=pools_not_3)) for sorted_sample in sorted_samples: sample = sorted_sample[0] output_file.write( '{sample}\t{ul_sample:.2f}\t{plate_id}\t{well_id}\t{pool}\n'. format(sample=sample.name, ul_sample=ul_sample[sample.name], plate_id=plate_id[sample.name], well_id=well_id[sample.name], pool=sorted_sample[3]))
def samplesheet_mip_multiplex_pool(lims, process_id, output_file): """Create manual pipetting samplesheet for smMIP multiplexing""" process = Process(lims, id=process_id) input_artifacts = [] # Find all Dx Tapestation 2200/4200 QC process types qc_process_types = clarity_epp.export.utils.get_process_types( lims, ['Dx Tapestation 2200 QC', 'Dx Tapestation 4200 QC']) # Write header output_file.write( '{sample}\t{volume}\t{plate_id}\t{well_id}\t{concentration}\t{manual}\n' .format( sample='Sample', volume='Volume', plate_id='Plaat_id', well_id='Well_id', concentration='Concentratie', manual='Handmatig', )) for input_artifact in process.all_inputs(resolve=True): # Find last qc process for artifact qc_process = sorted( lims.get_processes(type=qc_process_types, inputartifactlimsid=input_artifact.id), key=lambda process: int(process.id.split('-')[-1]))[-1] # Find concentration measurement for qc_artifact in qc_process.outputs_per_input(input_artifact.id): if qc_artifact.name == input_artifact.name: concentration = float( qc_artifact.udf['Dx Concentratie fluorescentie (ng/ul)']) input_artifacts.append({ 'name': input_artifact.name, 'concentration': concentration, 'plate_id': input_artifact.location[0].id, 'well_id': ''.join(input_artifact.location[1].split(':')), 'manual': input_artifact.samples[0].udf['Dx Handmatig'] }) # Calculate avg concentration for all non manual samples concentrations = [ input_artifact['concentration'] for input_artifact in input_artifacts if not input_artifact['manual'] ] avg_concentration = sum(concentrations) / len(concentrations) # Set volume and store input_artifact per plate to be able print samplesheet sorted on plate and well input_containers = {} for input_artifact in input_artifacts: if input_artifact['concentration'] < avg_concentration * 0.5: input_artifact['volume'] = 20 elif input_artifact['concentration'] > avg_concentration * 1.5: input_artifact['volume'] = 2 else: input_artifact['volume'] = 5 if input_artifact['plate_id'] not in input_containers: input_containers[input_artifact['plate_id']] = {} input_containers[input_artifact['plate_id']][ input_artifact['well_id']] = input_artifact for input_container in sorted(input_containers.keys()): input_artifacts = input_containers[input_container] for well in clarity_epp.export.utils.sort_96_well_plate( input_artifacts.keys()): input_artifact = input_artifacts[well] output_file.write( '{sample}\t{volume}\t{plate_id}\t{well_id}\t{concentration}\t{manual}\n' .format( sample=input_artifact['name'], volume=input_artifact['volume'], plate_id=input_artifact['plate_id'], well_id=input_artifact['well_id'], concentration=input_artifact['concentration'], manual=input_artifact['manual'], ))
def process_samples(self, lims_process: Process): """Retrieve LIMS input samples from a process.""" for lims_artifact in lims_process.all_inputs(): for lims_sample in lims_artifact.samples: yield lims_sample.id