def get_and_set_fluor_int(self, target_file): """Copies "End RFU" values from "Quant-iT Result File 1" and "Quant-iT Result File 2" (if provided) to udfs "Fluorescence intensity 1" and "Fluorescence intensity 2. Calculates and returns Relative fluorescence intensity standards: rel_fluor_int = The End RFU of standards - Background fluorescence intensity""" sample = target_file.input_artifact_list()[0].name fluor_int = [] target_udfs = target_file.udf # For the moment we dont know ofa way to delete udfs. Should be solved. #if dict(target_udfs.items()).has_key('Fluorescence intensity 1'): # del target_udfs['Fluorescence intensity 1'] #if dict(target_udfs.items()).has_key('Fluorescence intensity 2'): # del target_udfs['Fluorescence intensity 2'] target_file.udf = target_udfs for udf_name ,formated_file in self.result_files.items(): if sample in list(formated_file.keys()): fluor_int.append(float(formated_file[sample]['End RFU'])) target_file.udf[udf_name] = float(formated_file[sample]['End RFU']) else: self.missing_samps.append(sample) set_field(target_file) rel_fluor_int = np.mean(fluor_int) - self.standards[1] return rel_fluor_int
def parse_anglerfish_results(lims, process): #samples missing from the qubit csv file missing_samples = [] #strings returned to the EPP user log = [] # Get file contents by parsing lims artifacts file_content = get_anglerfish_output_file(lims, process) #parse the Anglerfish output data = get_data(file_content, log) # Fill values in LIMS for out in process.all_outputs(): if NGISAMPLE_PAT.findall(out.name): if data.get(out.name): out.udf['# Reads'] = data[out.name] out.put() set_field(out) else: missing_samples.append(out.name) if missing_samples: log.append('Sample {} missing in the Anglerfish Result File.'.format( missing_samples)) print(''.join(log), file=sys.stderr)
def get_and_set_fluor_int(self, target_file): """Copies "End RFU" values from "Quant-iT Result File 1" and "Quant-iT Result File 2" (if provided) to udfs "Fluorescence intensity 1" and "Fluorescence intensity 2. Calculates and returns Relative fluorescence intensity standards: rel_fluor_int = The End RFU of standards - Background fluorescence intensity""" sample = target_file.input_artifact_list()[0].name fluor_int = [] target_udfs = target_file.udf # For the moment we dont know ofa way to delete udfs. Should be solved. #if dict(target_udfs.items()).has_key('Fluorescence intensity 1'): # del target_udfs['Fluorescence intensity 1'] #if dict(target_udfs.items()).has_key('Fluorescence intensity 2'): # del target_udfs['Fluorescence intensity 2'] target_file.udf = target_udfs for udf_name ,formated_file in self.result_files.items(): if sample in formated_file.keys(): fluor_int.append(float(formated_file[sample]['End RFU'])) target_file.udf[udf_name] = float(formated_file[sample]['End RFU']) else: self.missing_samps.append(sample) set_field(target_file) rel_fluor_int = np.mean(fluor_int) - self.standards[1] return rel_fluor_int
def parse_caliper_results(process): # Sample UDF and data map map = [] map_RNA = [('RIN', 'RNA Quality Score'), ('Concentration', 'Total Conc. (ng/ul)'), ('DV200', 'Region[A-B] % of Total Area')] map_DNA = [('Concentration', 'Smear Conc. (ng/ul)'), ('Size (bp)', 'Smear Size [BP]'), ('Conc. nM', 'Smear Molarity (nmol/l)')] if 'RNA' in process.type.name: map = map_RNA elif 'DNA' in process.type.name: map = map_DNA #strings returned to the EPP user log = [] # Get file contents by parsing lims artifacts content = get_caliper_output_file(process, log) #parse the Caliper output data = get_data(content, log) # Fill values in LIMS for out in process.all_outputs(): if CALIPER_PAT.findall(out.name): found_flag = False for k, v in data.items(): if 'Sample' in v.keys() and v['Sample'] == CALIPER_PAT.findall( out.name)[0] and v['Well'] == out.location[1]: found_flag = True for item in map: target_column = '' if item[0] == 'DV200': for field in v.keys(): if DV200_PAT.findall(field): target_column = DV200_PAT.findall(field)[0] else: if item[1] in v.keys(): target_column = item[1] if target_column != '' and v[ target_column] != 'NA' and v[ target_column] != '': out.udf[item[0]] = float( re.sub('\[|\]', '', v[target_column])) else: log.append( "Sample {} in well {} missing {}.".format( v['Sample'], v['Well'], item[0])) out.udf['Conc. Units'] = 'ng/ul' if found_flag: out.put() set_field(out) else: log.append( 'No record of sample {} in well {} in the Caliper WellTable file.' .format( NGISAMPLE_PAT.findall(out.name)[0], out.location[1])) print(''.join(log), file=sys.stderr)
def get_frag_an_csv_data(process): #samples missing from the csv file missing_samples = 0 #strings returned to the EPP user log = [] # Get file contents by parsing lims artifacts file_content = get_result_file(process, log) #parse the file and get the interesting data out data = get_data(file_content, log) for target_file in process.result_files(): conc=None rin=None ratio=None range=None dv200=None file_sample=target_file.samples[0].name if file_sample in data: if data[file_sample].get('concentration'): try: conc=float(data[file_sample]['concentration']) target_file.udf['Concentration'] = conc target_file.udf['Conc. Units'] = 'ng/ul' except ValueError: log.append('Bad concentration value format for Sample {}.'.format(file_sample)) if data[file_sample].get('rin'): try: rin=float(data[file_sample]['rin']) target_file.udf['RIN'] = rin except ValueError: log.append('Bad RIN value format for Sample {}.'.format(file_sample)) if data[file_sample].get('ratio'): try: ratio=float(data[file_sample]['ratio']) target_file.udf['28s/18s ratio'] = ratio except ValueError: log.append('Bad ratio value format for Sample {}.'.format(file_sample)) if data[file_sample].get('range'): try: range=str(data[file_sample]['range']) target_file.udf['Range'] = range except ValueError: log.append('Bad range value format for Sample {}.'.format(file_sample)) if data[file_sample].get('dv200'): try: dv200=float(data[file_sample]['dv200']) target_file.udf['DV200'] = dv200 except ValueError: log.append('Bad dv200 value format for Sample {}.'.format(file_sample)) #actually set the data target_file.put() set_field(target_file) else: missing_samples += 1 if missing_samples: log.append('{0}/{1} samples are missing in the Result File.'.format(missing_samples, len(process.result_files()))) print(''.join(log), file=sys.stderr)
def get_qbit_csv_data(process): #samples missing from the qubit csv file missing_samples = 0 low_conc = 0 bad_format = 0 #strings returned to the EPP user log = [] # Get file contents by parsing lims artifacts file_content = get_qbit_file(process) #parse the qubit file and get the interesting data out data = get_data(file_content, log) if "Minimum required concentration (ng/ul)" in process.udf: min_conc=process.udf['Minimum required concentration (ng/ul)'] else: min_conc=None log.append("Set 'Minimum required concentration (ng/ul)' to get qc-flags based on this threshold!") for target_file in process.result_files(): conc=None new_conc=None file_sample=target_file.samples[0].name if file_sample in data: try: conc=float(data[file_sample]['concentration']) except ValueError: #concentration is not a float target_file.qc_flag = "FAILED" if data[file_sample]['concentration'] != 'Out of range': #Out of range is a valid value, the others are not. bad_format+=1 else: new_conc=convert_to_ng_ul(conc, data[file_sample]['unit']) if new_conc is not None : target_file.udf['Concentration'] = new_conc target_file.udf['Conc. Units'] = 'ng/ul' if new_conc < min_conc: target_file.qc_flag = "FAILED" low_conc +=1 else: target_file.qc_flag = "PASSED" #actually set the data target_file.put() set_field(target_file) else: missing_samples += 1 if low_conc: log.append('{0}/{1} samples have low concentration.'.format(low_conc, len(process.result_files()))) if missing_samples: log.append('{0}/{1} samples are missing in the Qubit Result File.'.format(missing_samples, len(process.result_files()))) if bad_format: log.append('There are {0} badly formatted samples in Qubit Result File. Please fix these to get proper results.'.format(bad_format)) print(''.join(log), file=sys.stderr)
def _set_udfs(self, samp_name, target_file, lane): if lane in self.QF_from_file.keys(): if samp_name in self.QF_from_file[lane].keys(): s_inf = self.QF_from_file[lane][samp_name] target_file.udf['# Reads'] = int(s_inf['# Reads']) target_file.udf['% Bases >=Q30'] = float(s_inf['% Bases >=Q30']) self.nr_samps_updat.append(samp_name) else: self.missing_samps.append(samp_name) set_field(target_file)
def old_main(lims, pid, epp_logger): process = Process(lims,id = pid) sample_names = map(lambda a: a.name, process.analytes()[0]) target_files = process.result_files() file_handler = ReadResultFiles(process) files = file_handler.shared_files['Qubit Result File'] qubit_result_file = file_handler.format_file(files, name = 'Qubit Result File', first_header = ['Test','Sample'], find_keys = sample_names) missing_samples = 0 low_conc = 0 bad_formated = 0 abstract = [] udfs = dict(process.udf.items()) if udfs.has_key("Minimum required concentration (ng/ul)"): min_conc = udfs["Minimum required concentration (ng/ul)"] else: min_conc = None abstract.append("Set 'Minimum required concentration (ng/ul)' to get qc-flaggs based on this treshold!") for target_file in target_files: sample = target_file.samples[0].name if qubit_result_file.has_key(sample): sample_mesurements = qubit_result_file[sample] if "Sample Concentration" in sample_mesurements.keys(): conc, unit = sample_mesurements["Sample Concentration"] if conc == 'Out Of Range': target_file.qc_flag = "FAILED" elif conc.replace('.','').isdigit(): conc = float(conc) if unit == 'ng/mL': conc = np.true_divide(conc, 1000) if min_conc: if conc < min_conc: target_file.qc_flag = "FAILED" low_conc +=1 else: target_file.qc_flag = "PASSED" target_file.udf['Concentration'] = conc target_file.udf['Conc. Units'] = 'ng/ul' else: bad_formated += 1 set_field(target_file) else: missing_samples += 1 if low_conc: abstract.append('{0}/{1} samples have low concentration.'.format(low_conc, len(target_files))) if missing_samples: abstract.append('{0}/{1} samples are missing in Qubit Result File.'.format(missing_samples, len(target_files))) if bad_formated: abstract.append('There are {0} badly formated samples in Qubit Result File. Please fix these to get proper results.'.format(bad_formated)) print >> sys.stderr, ' '.join(abstract)
def _set_udfs(self, samp_name, target_file, lane): if lane in list(self.QF_from_file.keys()): if samp_name in list(self.QF_from_file[lane].keys()): s_inf = self.QF_from_file[lane][samp_name] target_file.udf['# Reads'] = int(s_inf['# Reads']) target_file.udf['% Bases >=Q30'] = float( s_inf['% Bases >=Q30']) self.nr_samps_updat.append(samp_name) else: self.missing_samps.append(samp_name) set_field(target_file)
def set_result_file_udfs(self): """populates the target file App QC udf""" for samp_name, target_file in self.target_files.items(): if samp_name in self.app_QC.keys(): qc_passed = str(self.app_QC[samp_name]['automated_qc']['qc_passed']) sample = target_file.samples[0] sample.udf['App QC'] = qc_passed set_field(sample) self.nr_samps_updat += 1 else: self.missing_samps.append(samp_name)
def assign_QC_flag(self): if self.required_udfs.issubset(self.udfs.keys()): for result_file in self.result_files: result_file_udfs = dict(result_file.udf.items()) QC_conc = self.concentration_QC(result_file, result_file_udfs) QC_sat = self.saturation_QC(result_file, result_file_udfs) if QC_conc and QC_sat: QC = QC_conc if QC_conc == QC_sat else "FAILED" self.no_failed +=1 if QC == "FAILED" else 0 result_file.qc_flag = QC set_field(result_file) else: self.missing_udfs = ', '.join(list(self.required_udfs))
def assign_QC_flag(self): if self.required_udfs.issubset(self.udfs.keys()): for result_file in self.result_files: result_file_udfs = dict(result_file.udf.items()) QC_conc = self.concentration_QC(result_file, result_file_udfs) QC_sat = self.saturation_QC(result_file, result_file_udfs) if QC_conc and QC_sat: QC = QC_conc if QC_conc == QC_sat else "FAILED" self.no_failed += 1 if QC == "FAILED" else 0 result_file.qc_flag = QC set_field(result_file) else: self.missing_udfs = ', '.join(list(self.required_udfs))
def calc_and_set_conc(self, target_file, rel_fluor_int): """Concentrations are calculated based on the linear regression parametersand copied to the "Concentration"-udf of the target_file. The "Conc. Units"-udf is set to "ng/ul""" requiered_udfs = set(['Sample volume','Standard dilution','WS volume']) if requiered_udfs.issubset(self.udfs.keys()) and self.model: conc = np.true_divide((self.model[1] * rel_fluor_int * ( self.udfs['WS volume'] + self.udfs['Sample volume'])), self.udfs['Sample volume']*(self.udfs['WS volume'] + self.udfs['Standard volume'])) target_file.udf['Concentration'] = conc target_file.udf['Conc. Units'] = 'ng/ul' set_field(target_file) self.no_samps +=1 elif not requiered_udfs.issubset(self.missing_udfs): self.missing_udfs += requiered_udfs
def calc_and_set_conc(self, target_file, rel_fluor_int): """Concentrations are calculated based on the linear regression parametersand copied to the "Concentration"-udf of the target_file. The "Conc. Units"-udf is set to "ng/ul""" requiered_udfs = set(['Sample volume','Standard dilution','WS volume']) if requiered_udfs.issubset(list(self.udfs.keys())) and self.model: conc = np.true_divide((self.model[1] * rel_fluor_int * ( self.udfs['WS volume'] + self.udfs['Sample volume'])), self.udfs['Sample volume']*(self.udfs['WS volume'] + self.udfs['Standard volume'])) target_file.udf['Concentration'] = conc target_file.udf['Conc. Units'] = 'ng/ul' set_field(target_file) self.no_samps +=1 elif not requiered_udfs.issubset(self.missing_udfs): self.missing_udfs += requiered_udfs
def lane_QC(self): for target_file in self.out_arts: samp_name = target_file.samples[0].name for lane_samp in self.BLS: if self.lane == lane_samp['Lane']: samp = lane_samp['Sample ID'] if samp == samp_name: IQC = IndexQC(target_file, lane_samp) IQC.set_target_file_udfs() IQC.set_read_pairs(self.single) try: IQC.lane_index_QC(self.reads_threshold, self.Q30_treshold) if IQC.html_file_error: self.html_file_error = IQC.html_file_error set_field(IQC.t_file) self.nr_samps_updat +=1 except: self.QC_fail.append(samp) self._check_un_exp_lane_yield() for index_count in self.counts: self._check_un_exp_ind_yield(index_count)
def get_frag_an_csv_data(process): #samples missing from the csv file missing_samples = 0 #strings returned to the EPP user log = [] # Get file contents by parsing lims artifacts file_content = get_result_file(process) #parse the file and get the interesting data out data = get_data(file_content, log) for target_file in process.result_files(): bad_format = 0 conc = None rin = None ratio = None file_sample = target_file.samples[0].name if file_sample in data: try: conc = float(data[file_sample]['concentration']) rin = float(data[file_sample]['rin']) ratio = float(data[file_sample]['ratio']) except ValueError: bad_format += 1 else: if conc is not None: target_file.udf['Concentration'] = conc target_file.udf['Conc. Units'] = 'ng/ul' target_file.udf['RIN'] = rin target_file.udf['28s/18s ratio'] = ratio #actually set the data target_file.put() set_field(target_file) else: missing_samples += 1 if missing_samples: log.append('{0}/{1} samples are missing in the Result File.'.format( missing_samples, len(process.result_files()))) if bad_format: log.append('There are {0} badly formatted samples in the Result File') print(''.join(log), file=sys.stderr)
def lane_QC(self): for target_file in self.out_arts: samp_name = target_file.samples[0].name for lane_samp in self.BLS: if self.lane == lane_samp['Lane']: samp = lane_samp['Sample ID'] if samp == samp_name: IQC = IndexQC(target_file, lane_samp) IQC.set_target_file_udfs() IQC.set_read_pairs(self.single) try: IQC.lane_index_QC(self.reads_threshold, self.Q30_treshold) if IQC.html_file_error: self.html_file_error = IQC.html_file_error set_field(IQC.t_file) self.nr_samps_updat += 1 except: self.QC_fail.append(samp) self._check_un_exp_lane_yield() for index_count in self.counts: self._check_un_exp_ind_yield(index_count)
def get_frag_an_csv_data(process): #samples missing from the csv file missing_samples = 0 #strings returned to the EPP user log = [] # Get file contents by parsing lims artifacts file_content = get_result_file(process) #parse the file and get the interesting data out data = get_data(file_content, log) for target_file in process.result_files(): bad_format=0 conc=None rin=None ratio=None file_sample=target_file.samples[0].name if file_sample in data: try: conc=float(data[file_sample]['concentration']) rin=float(data[file_sample]['rin']) ratio=float(data[file_sample]['ratio']) except ValueError: bad_format+=1 else: if conc is not None : target_file.udf['Concentration'] = conc target_file.udf['Conc. Units'] = 'ng/ul' target_file.udf['RIN'] = rin target_file.udf['28s/18s ratio'] = ratio #actually set the data target_file.put() set_field(target_file) else: missing_samples += 1 if missing_samples: log.append('{0}/{1} samples are missing in the Result File.'.format(missing_samples, len(process.result_files()))) if bad_format: log.append('There are {0} badly formatted samples in the Result File') print(''.join(log), file=sys.stderr)
def _get_run_id(self): if self.run_udfs.has_key('Run ID'): self.process.udf['Run ID'] = self.run_udfs['Run ID'] set_field(self.process)
def _get_run_id(self): if 'Run ID' in self.run_udfs: self.process.udf['Run ID'] = self.run_udfs['Run ID'] set_field(self.process)