def execute_interpolateBiomassFromAverages(self,experiment_id_I, sample_ids_I=[]): '''Interpolate the OD600 based on the Calculated growth rates (hr-1) of the average and time the sample was taken for samples in the experiment that do not have a measured OD600 value Use cases: 1. a replicate is bad''' calc = calculate_interface(); data = []; #query sample_ids for the experiment that do not have an OD600 but have a time print('execute interoplate biomass from averages...') if sample_ids_I: sample_ids = []; sample_ids = sample_ids_I; else: sample_ids = []; sample_ids = self.get_sampleIDs_experimentIDNoOD600_samplePhysiologicalParameters(experiment_id_I) for si in sample_ids: print('interpolating biomass from averages for sample_id' + si); #query rate parameters for the sample_id slope_average, intercept_average, rate_average, rate_units, rate_var = None,None,None,None,None; slope_average, intercept_average, rate_average, rate_units, rate_var = self.get_rateData_experimentIDAndSampleIDAndMetID_dataStage01PhysiologyRatesAverages(experiment_id_I,si,'biomass'); #query physiological parameters for the sample_id pp = {}; pp = self.get_physiologicalParameters_experimentIDAndSampleID_samplePhysiologicalParameters(experiment_id_I,si) #query sample_date sample_date = None; sample_date = self.get_sampleDate_experimentIDAndSampleID_sampleDescription(experiment_id_I,si); #interpolate based off of the regression parameters time = sample_date.year*8765.81277 + sample_date.month*730.484 + sample_date.day*365.242 + sample_date.hour + sample_date.minute / 60. + sample_date.second / 3600.; biomass = exp(time*slope_average+intercept_average); #update sample_physiologicalParameters pp['od600'] = biomass; data.append(pp); self.update_data_samplePhysiologicalParameters(data)
def execute_calculateBiomassFromBrothAverage(self,experiment_id_I, sample_ids_I=[]): '''Calculate the OD600 based on the average OD600 of the broth samples for samples in the experiment that do not have a measured OD600 value Use cases: 1. the sample Filtrate''' calc = calculate_interface(); data = []; #query sample_ids for the experiment that do not have an OD600 print('execute calculate biomass from broth averages...') if sample_ids_I: sample_ids = []; sample_ids = sample_ids_I; else: sample_ids = []; sample_ids = self.get_sampleIDs_experimentIDAndSampleDescriptionNoOD600_samplePhysiologicalParameters(experiment_id_I,'Filtrate') for si in sample_ids: print('calculating biomass from broth averages for sample_id ' + si); #query physiological parameters for the sample_id pp = {}; pp = self.get_physiologicalParameters_experimentIDAndSampleID_samplePhysiologicalParameters(experiment_id_I,si) #query sample_date sample_date = None; sample_date = self.get_sampleDate_experimentIDAndSampleID_sampleDescription(experiment_id_I,si); #query od600 values from biological broth replicates broth_od600 = []; broth_od600 = self.get_OD600s_experimentIDAndSampleID_samplePhysiologicalParameters(experiment_id_I,si); #update sample_physiologicalParameters pp['od600'] = numpy.mean(broth_od600); data.append(pp); self.update_data_samplePhysiologicalParameters(data)
def calculate_genesFpkmTrackingStats(self, experiment_id_I = None, sample_name_I = None,): """calculate statistics of replicate samples from genesFpkmTracking INPUT: OPTION INPUT: experiment_id_I = limiter for the experiment_id sample_name_I = limiter for the sample_name """ data_O=[]; stats_O=[]; experiment_id = experiment_id_I; sn = sample_name_I; genesFpkmTracking = self.genesFpkmTracking; calculate = calculate_interface(); # get the uniqueSampleNameAbbreviations sna_unique = self._get_uniqueSampleNameAbbreviations(); for sna in sna_unique: data_tmp = []; data_tmp = self._get_rowsBySampleNameAbbreviation(sna); # calculate using scipy data_ave_O, data_var_O, data_lb_O, data_ub_O = None, None, None, None; data_ave_O, data_var_O, data_lb_O, data_ub_O = calculate.calculate_ave_var(data_tmp,confidence_I = 0.95); # calculate the interquartile range min_O, max_O, median_O, iq_1_O, iq_3_O = None, None, None, None, None; min_O, max_O, median_O, iq_1_O, iq_3_O=calculate.calculate_interquartiles(data_tmp);
def make_heatmap(self,mutations_I=[],sample_names_I=[], mutation_id_exclusion_list=[],max_position=4000000, row_pdist_metric_I='euclidean',row_linkage_method_I='complete', col_pdist_metric_I='euclidean',col_linkage_method_I='complete'): '''Execute hierarchical cluster on row and column data''' print('executing heatmap...'); calculate = calculate_interface(); # partition into variables: if mutations_I: mutation_data = mutations_I; else: mutation_data = self.mutations; if sample_names_I: sample_names = sample_names_I; else: sample_names = self.sample_names; mutation_data_O = []; mutation_ids_all = []; for end_cnt,mutation in enumerate(mutation_data): if int(mutation['mutation_position']) > max_position: #ignore positions great than 4000000 continue; # mutation id mutation_id = ''; mutation_id = self._make_mutationID(mutation['mutation_genes'],mutation['mutation_type'],int(mutation['mutation_position'])) tmp = {}; tmp.update(mutation); tmp.update({'mutation_id':mutation_id}); mutation_data_O.append(tmp); mutation_ids_all.append(mutation_id); mutation_ids_all_unique = list(set(mutation_ids_all)); mutation_ids = [x for x in mutation_ids_all_unique if not x in mutation_id_exclusion_list]; # generate the frequency matrix data structure (mutation x intermediate) data_O = numpy.zeros((len(sample_names),len(mutation_ids))); samples=[]; # order 2: groups each sample by mutation (intermediate x mutation) for sample_name_cnt,sample_name in enumerate(sample_names): #all samples for intermediate j / mutation i samples.append(sample_name); # corresponding label from hierarchical clustering for mutation_cnt,mutation in enumerate(mutation_ids): #all mutations i for intermediate j for row in mutation_data_O: if row['mutation_id'] == mutation and row['sample_name'] == sample_name: data_O[sample_name_cnt,mutation_cnt] = row['mutation_frequency']; # generate the clustering for the heatmap heatmap_O = []; dendrogram_col_O = {}; dendrogram_row_O = {}; heatmap_O,dendrogram_col_O,dendrogram_row_O = calculate.heatmap(data_O,samples,mutation_ids, row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I, col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I); # record the data self.heatmap = heatmap_O; self.dendrogram_col = dendrogram_col_O; self.dendrogram_row = dendrogram_row_O;
def execute_calculateGrowthRates(self,experiment_id_I,sample_name_short_I=[]): '''Calculate growth rates (hr-1) based on the sample time and measured OD600''' calc = calculate_interface(); data_O = []; #query sample names print('executing calculating growth rates...') if sample_name_short_I: sample_name_short = sample_name_short_I; else: sample_name_short = []; sample_name_short = self.get_sampleNameShort_experimentID(experiment_id_I,6) for sns in sample_name_short: print('calculating growth rates for sample_name_short ' + sns); #query met_ids met_ids = []; met_ids = self.get_metIDs_experimentIDAndSampleNameShort(experiment_id_I,6,sns); for met in met_ids: print('calculating growth rates for met_id ' + met); if met != 'biomass': continue; #query time and OD600 values time, OD600 = [], []; time, OD600 = self.get_sampleDateAndDataCorrected_experimentIDAndSampleNameShortAndMetIDAndDataUnits(experiment_id_I,6,sns,met,'OD600'); if not OD600 or not time: continue; #convert time to hrs time_hrs = []; for t in time: time_hrs.append(t.year*8765.81277 + t.month*730.484 + t.day*365.242 + t.hour + t.minute / 60. + t.second / 3600.); #convert using datetime object #calculate growth rate and r2 slope, intercept, r2, p_value, std_err = calc.calculate_growthRate(time_hrs,OD600) #add rows to the data base row = {}; row = {'experiment_id':experiment_id_I, 'sample_name_short':sns, 'met_id':met, 'slope':slope, 'intercept':intercept, 'r2':r2, 'rate':slope, 'rate_units':'hr-1', 'p_value':p_value, 'std_err':std_err, 'used_':True, 'comment_':None,}; data_O.append(row); self.add_rows_table('data_stage01_physiology_rates',data_O);
def calculate_fluxDifference(self,flux_1,flux_stdev_1,flux_lb_1,flux_ub_1,flux_units_1, flux_2,flux_stdev_2,flux_lb_2,flux_ub_2,flux_units_2, criteria_I = 'flux_lb/flux_ub'): """Calculate flux differences and deterimine if the differences are significant Input: flux_1 = data for flux 1 to be compared ... flux_2 = data for flux 2 to be compared ... criteria_I = string, flux_lb/flux_ub: use flux_lb and flux_ub to determine significance (default) flux_mean/flux_stdev: use the flux_mean and flux_stdev to determine significance Output: flux_diff = relative flux difference, float flux_distance = geometric difference, (i.e., distance) fold_change = geometric fold change significant = boolean """ calc = calculate_interface(); flux_diff = 0.0; flux_distance = 0.0; significant = False; fold_change = 0.0; if criteria_I == 'flux_lb/flux_ub': flux_mean_1 = np.mean([flux_lb_1,flux_ub_1]); flux_mean_2 = np.mean([flux_lb_2,flux_ub_2]); flux_diff = calc.calculate_difference(flux_mean_1,flux_mean_2,type_I='relative'); flux_distance = calc.calculate_difference(flux_mean_1,flux_mean_2,type_I='geometric'); fold_change = calc.calculate_foldChange(flux_mean_1,flux_mean_2,type_I='geometric'); significant = self.determine_fluxDifferenceSignificance(flux_lb_1,flux_ub_1,flux_lb_2,flux_ub_2); elif criteria_I == 'flux_mean/flux_stdev': flux_diff = calc.calculate_difference(flux_1,flux_2,type_I='relative'); flux_distance = calc.calculate_difference(flux_1,flux_2,type_I='geometric'); fold_change = calc.calculate_foldChange(flux_1,flux_2,type_I='geometric'); flux_lb_1 = flux_1 - flux_stdev_1; flux_lb_2 = flux_2 - flux_stdev_2; flux_ub_1 = flux_1 + flux_stdev_1; flux_ub_2 = flux_2 + flux_stdev_2; significant = self.determine_fluxDifferenceSignificance(flux_lb_1,flux_ub_1,flux_lb_2,flux_ub_2); else: print('criteria not recognized!'); return flux_diff,flux_distance,fold_change,significant;
def execute_updatePhysiologicalParametersFromOD600(self, experiment_id_I, sample_ids_I=[]): '''Calculate physiological parameters from the OD600 and volume sample''' calc = calculate_interface(); data = []; #query sample_ids for the experiment that have an OD600, but do not have culture_density print('execute update physiological parameters from OD600...') if sample_ids_I: sample_ids = []; sample_ids = sample_ids_I; else: sample_ids = []; sample_ids = self.get_sampleIDs_experimentIDWithOD600NoCultureDensity_samplePhysiologicalParameters(experiment_id_I) for si in sample_ids: print('updating physiological parameters from OD600 for sample_id ' + si); #Query sample_description desc = {}; desc = self.get_description_experimentIDAndSampleID_sampleDescription(experiment_id_I,si) if not desc['biological_material']: continue; #query physiological parameters for the sample_id pp = {}; pp = self.get_physiologicalParameters_experimentIDAndSampleID_samplePhysiologicalParameters(experiment_id_I,si) #Query conversions (conversion_name: gDW2OD_lab and ODspecificCellConcentration_lab) conversion_gDW2OD = None; conversion_gDW2OD_units = None; conversion_gDW2OD, conversion_gDW2OD_units = self.get_conversionAndConversionUnits_biologicalMaterialAndConversionName(desc['biological_material'],'gDW2OD_lab'); conversion_ODspecificCellConcentration = None; conversion_ODspecificCellConcentration_units = None; conversion_ODspecificCellConcentration, conversion_ODspecificCellConcentration_units = self.get_conversionAndConversionUnits_biologicalMaterialAndConversionName(desc['biological_material'],'ODspecificCellConcentration_lab'); #Calculate the vcd, culture_density from the OD600 and conversions culture_density,culture_density_units = None,None; culture_density,culture_density_units = calc.calculate_cultureDensity_ODAndConversionAndConversionUnits(pp['od600'],conversion_gDW2OD, conversion_gDW2OD_units); vcd,vcd_units = None,None; vcd,vcd_units = calc.calculate_cultureDensity_ODAndConversionAndConversionUnits(pp['od600'],conversion_ODspecificCellConcentration, conversion_ODspecificCellConcentration_units); #Calculate the cells, dcw, wcw from the OD600, culture_volume_sampled and conversions #Update sample_physiologicalparameters pp['culture_density'],pp['culture_density_units'] = culture_density,culture_density_units; pp['vcd'],pp['vcd_units'] = vcd,vcd_units; data.append(pp); self.update_data_samplePhysiologicalParameters(data);
def execute_findShortestPath_nodes(self,model_id_I, nodes_startAndStop_I, algorithm_I='all_simple_paths',params_I={'cutoff':25}, exclusion_list_I=[], weights_I=None ): ''' INPUT: model_id_I: model id [string] nodes_startAndStop_I: list of node start/stops e.g., [[nad_c,nadh_c],[g6p_c,f6p_c],...] OUTPUT: shortest_path_O = [{[nad_c,nadh_c]:algorithm_I output},...] distance = (len(sp['shortest_path'])-1)/2 ''' calc = calculate_interface(); shortest_path_O = []; # get the model reactions from table reactions = self.get_rows_modelID_dataStage02PhysiologyModelReactions(model_id_I); #convert rxns list to directed graph aCyclicGraph = self.convert_modelReactionsTable2DirectedAcyclicGraph( reactions,weights_I=weights_I,attributes_I={}, exclusion_list_I=exclusion_list_I); # find the shortest paths for startAndStop in nodes_startAndStop_I: tmp = {'start':startAndStop[0],'stop':startAndStop[1]}; try: output2 = self.find_shortestPath_nodes( aCyclicGraph,startAndStop[0],startAndStop[1], algorithm_I=algorithm_I,params_I=params_I); if str(type(output2))=="<class 'generator'>": paths = [o for o in output2]; distances = [(len(p)-1)/2 for p in paths] else: paths = [output2]; distances = [(len(output2)-1)/2]; except Exception as e: print(e); print('algorithm = ' + algorithm_I + '; start = ' + startAndStop[0] + '; stop = ' + startAndStop[1]); continue; #calculate descriptive statistics on the paths try: data_ave_O, data_var_O, data_lb_O, data_ub_O = calc.calculate_ave_var(distances,confidence_I = 0.95); if data_ave_O: data_cv_O = sqrt(data_var_O)/data_ave_O*100; else: data_cv_O = None; min_O, max_O, median_O, iq_1_O, iq_3_O=calc.calculate_interquartiles(distances); except Exception as e: print(e); print('algorithm = ' + algorithm_I + '; start = ' + startAndStop[0] + '; stop = ' + startAndStop[1]); continue; tmp['all_paths'] = paths; tmp['algorithm'] = algorithm_I; tmp['params'] = params_I; tmp['path_max'] = max_O; tmp['path_min'] = min_O; tmp['path_iq_1'] = iq_1_O; tmp['path_iq_3'] = iq_3_O; tmp['path_median'] = median_O; tmp['path_average'] = data_ave_O; tmp['path_var'] = data_var_O; tmp['path_n'] = len(paths); tmp['path_cv'] = data_cv_O; tmp['path_ci_lb'] = data_lb_O; tmp['path_ci_ub'] = data_ub_O; tmp['path_ci_level'] = 0.95; shortest_path_O.append(tmp); return shortest_path_O;
def findAndCalculate_amplificationStats_fromGff(self,gff_file, strand_start, strand_stop, experiment_id_I = None, sample_name_I = None, scale_factor=True, downsample_factor=0, reads_min=1.5,reads_max=5.0, indices_min=200,consecutive_tol=10): """find amplifications from the gff file and calculate their statistics INPUT: strand_start = index of the start position strand_stop = index of the stop position scale_factor = boolean, if true, reads will be normalized to have 100 max downsample_factor = integer, factor to downsample the points to reads_min = minimum number of reads to identify an amplification reads_max = maximum number of reads to identify an amplification indices_min : minimum number of points of a high coverage region consecutive_tol: maximum number of consecutive points that do not meet the coverage_min/max criteria that can be included a high coverage region OPTION INPUT: experiment_id_I = tag for the experiment from which the sample came sample_name_I = tag for the sample name """ data_O=[]; stats_O=[]; experiment_id = experiment_id_I; sn = sample_name_I; calculate = calculate_interface(); # get the data_dir self.set_gffFile(gff_file); # extract the strands self.extract_strandsFromGff(strand_start, strand_stop, scale=scale_factor, downsample=0) # find high coverage regions plus_high_region_indices,minus_high_region_indices = self.find_highCoverageRegions(coverage_min=reads_min,coverage_max=reads_max,points_min=indices_min,consecutive_tol=consecutive_tol); # record the means for later use plus_mean,minus_mean = self.plus.mean(),self.minus.mean(); plus_min,minus_min = self.plus.min(),self.minus.min(); plus_max,minus_max = self.plus.max(),self.minus.max(); # calculate stats on the high coverage regions # + strand for row_cnt,row in enumerate(plus_high_region_indices): plus_region = self.plus_high_regions[(self.plus_high_regions.index>=row['start']) & (self.plus_high_regions.index<=row['stop'])] # calculate using scipy data_ave_O, data_var_O, data_lb_O, data_ub_O = None, None, None, None; data_ave_O, data_var_O, data_lb_O, data_ub_O = calculate.calculate_ave_var(plus_region.values,confidence_I = 0.95); # calculate the interquartile range min_O, max_O, median_O, iq_1_O, iq_3_O = None, None, None, None, None; min_O, max_O, median_O, iq_1_O, iq_3_O=calculate.calculate_interquartiles(plus_region.values); # record data stats_O.append({ #'analysis_id':analysis_id, 'experiment_id':experiment_id, 'sample_name':sn, 'genome_chromosome':1, 'genome_strand':'plus', 'strand_start':strand_start, 'strand_stop':strand_stop, 'reads_min':min_O, 'reads_max':max_O, 'reads_lb':data_lb_O, 'reads_ub':data_ub_O, 'reads_iq1':iq_1_O, 'reads_iq3':iq_3_O, 'reads_median':median_O, 'reads_mean':data_ave_O, 'reads_var':data_var_O, 'reads_n':len(plus_region.values), 'amplification_start':int(row['start']), 'amplification_stop':int(row['stop']), 'used_':True, 'comment_':None }) # downsample collapse_factor = None; if downsample_factor > 1: collapse_factor = int((row['stop'] - row['start']) / downsample_factor) if collapse_factor and collapse_factor > 1: plus_region = plus_region.groupby(lambda x: x // collapse_factor).mean() plus_region.index *= collapse_factor # add mean to index before and after the amplification start and stop, respectively (for visualization) if downsample_factor > 1 and row_cnt==0: #plus_region[strand_start]=plus_mean; #plus_region[strand_stop]=plus_mean; data_O.append({ #'analysis_id':analysis_id, 'experiment_id':experiment_id, 'sample_name':sn, 'genome_chromosome':1, #default 'genome_strand':'plus_mean', #'genome_index':int(strand_start), 'genome_index':int(row['start']-1), 'strand_start':strand_start, 'strand_stop':strand_stop, 'reads':plus_mean, 'reads_min':reads_min, 'reads_max':reads_max, 'indices_min':indices_min, 'consecutive_tol':consecutive_tol, 'scale_factor':scale_factor, 'downsample_factor':downsample_factor, 'amplification_start':strand_start, 'amplification_stop':strand_stop, 'used_':True, 'comment_':'mean reads of the plus strand' }); if downsample_factor > 1 and row_cnt==len(plus_high_region_indices)-1: data_O.append({ #'analysis_id':analysis_id, 'experiment_id':experiment_id, 'sample_name':sn, 'genome_chromosome':1, #default 'genome_strand':'plus_mean', #'genome_index':int(strand_stop), 'genome_index':int(row['stop']+1), 'strand_start':strand_start, 'strand_stop':strand_stop, 'reads':plus_mean, 'reads_min':reads_min, 'reads_max':reads_max, 'indices_min':indices_min, 'consecutive_tol':consecutive_tol, 'scale_factor':scale_factor, 'downsample_factor':downsample_factor, 'amplification_start':strand_start, 'amplification_stop':strand_stop, 'used_':True, 'comment_':'mean reads of the plus strand' }); ## add zeros to strand start and stop, respectively (for visualization) #if downsample_factor > 1: # plus_region[row['start']-1]=plus_mean; # plus_region[row['stop']+1]=plus_mean; # record high coverage regions for index,reads in plus_region.iteritems(): data_O.append({ #'analysis_id':analysis_id, 'experiment_id':experiment_id, 'sample_name':sn, 'genome_chromosome':1, #default 'genome_strand':'plus', 'genome_index':int(index), 'strand_start':strand_start, 'strand_stop':strand_stop, 'reads':float(reads), 'reads_min':reads_min, 'reads_max':reads_max, 'indices_min':indices_min, 'consecutive_tol':consecutive_tol, 'scale_factor':scale_factor, 'downsample_factor':downsample_factor, 'amplification_start':int(row['start']), 'amplification_stop':int(row['stop']), 'used_':True, 'comment_':None }); # - strand for row_cnt,row in enumerate(minus_high_region_indices): minus_region = self.minus_high_regions[(self.minus_high_regions.index>=row['start']) & (self.minus_high_regions.index<=row['stop'])] # calculate using scipy data_ave_O, data_var_O, data_lb_O, data_ub_O = None, None, None, None; data_ave_O, data_var_O, data_lb_O, data_ub_O = calculate.calculate_ave_var(minus_region.values,confidence_I = 0.95); # calculate the interquartile range min_O, max_O, median_O, iq_1_O, iq_3_O = None, None, None, None, None; min_O, max_O, median_O, iq_1_O, iq_3_O=calculate.calculate_interquartiles(minus_region.values); # record data stats_O.append({ #'analysis_id':analysis_id, 'experiment_id':experiment_id, 'sample_name':sn, 'genome_chromosome':1, 'genome_strand':'minus', 'strand_start':strand_start, 'strand_stop':strand_stop, 'reads_min':min_O, 'reads_max':max_O, 'reads_lb':data_lb_O, 'reads_ub':data_ub_O, 'reads_iq1':iq_1_O, 'reads_iq3':iq_3_O, 'reads_median':median_O, 'reads_mean':data_ave_O, 'reads_var':data_var_O, 'reads_n':len(minus_region.values), 'amplification_start':int(row['start']), 'amplification_stop':int(row['stop']), 'used_':True, 'comment_':None }) # downsample collapse_factor = None; if downsample_factor > 1: collapse_factor = int((row['stop'] - row['start']) / downsample_factor) if collapse_factor and collapse_factor > 1: minus_region = minus_region.groupby(lambda x: x // collapse_factor).mean() minus_region.index *= collapse_factor # add mean to index before and after the amplification start and stop, respectively (for visualization) if downsample_factor > 1 and row_cnt==0: #minus_region[strand_start]=minus_mean; #minus_region[strand_stop]=minus_mean; data_O.append({ #'analysis_id':analysis_id, 'experiment_id':experiment_id, 'sample_name':sn, 'genome_chromosome':1, #default 'genome_strand':'minus_mean', #'genome_index':int(strand_start), 'genome_index':int(row['start']-1), 'strand_start':strand_start, 'strand_stop':strand_stop, 'reads':minus_mean, 'reads_min':reads_min, 'reads_max':reads_max, 'indices_min':indices_min, 'consecutive_tol':consecutive_tol, 'scale_factor':scale_factor, 'downsample_factor':downsample_factor, 'amplification_start':strand_start, 'amplification_stop':strand_stop, 'used_':True, 'comment_':'mean reads of the minus strand' }); if downsample_factor > 1 and row_cnt==len(minus_high_region_indices)-1: data_O.append({ #'analysis_id':analysis_id, 'experiment_id':experiment_id, 'sample_name':sn, 'genome_chromosome':1, #default 'genome_strand':'minus_mean', #'genome_index':int(strand_stop), 'genome_index':int(row['stop']+1), 'strand_start':strand_start, 'strand_stop':strand_stop, 'reads':minus_mean, 'reads_min':reads_min, 'reads_max':reads_max, 'indices_min':indices_min, 'consecutive_tol':consecutive_tol, 'scale_factor':scale_factor, 'downsample_factor':downsample_factor, 'amplification_start':strand_start, 'amplification_stop':strand_stop, 'used_':True, 'comment_':'mean reads of the minus strand' }); ## add zeros to strand start and stop, respectively (for visualization) #if downsample_factor > 1: # minus_region[row['start']-1]=minus_mean; # minus_region[row['stop']+1]=minus_mean; # record high coverage regions for index,reads in minus_region.iteritems(): data_O.append({ #'analysis_id':analysis_id, 'experiment_id':experiment_id, 'sample_name':sn, 'genome_chromosome':1, #default 'genome_strand':'minus', 'genome_index':int(index), 'strand_start':strand_start, 'strand_stop':strand_stop, 'reads':float(reads), 'reads_min':reads_min, 'reads_max':reads_max, 'indices_min':indices_min, 'consecutive_tol':consecutive_tol, 'scale_factor':scale_factor, 'downsample_factor':downsample_factor, 'amplification_start':int(row['start']), 'amplification_stop':int(row['stop']), 'used_':True, 'comment_':None}); #record the data self.amplifications = data_O; self.amplificationStats = stats_O;
def execute_calculateMissingComponents_replicates(self,experiment_id_I,biological_material_I=None,conversion_name_I=None,sample_names_short_I=[]): '''calculate estimates for samples in which a component was not found for any of the replicates''' calc = calculate_interface(); print('execute_calculateMissingComponents_replicates...') data_O=[]; # get all sample names short if sample_names_short_I: sample_names_short = sample_names_short_I; else: sample_names_short = []; sample_names_short = self.get_sampleNameShort_experimentIDAndSampleDescription_dataStage01Normalized(experiment_id_I,'Broth'); # get component names component_names = [] component_names = self.get_componentNames_experimentID_dataStage01ReplicatesMI(experiment_id_I); # get time points time_points = []; time_points = self.get_timePoint_experimentID_dataStage01ReplicatesMI(experiment_id_I); for tp in time_points: print('calculating missing components for time_point ' + tp); for cn in component_names: print('calculating missing components for component_name ' + cn); component_group_name = None; calculated_concentration_units = None; component_group_name, calculated_concentration_units = self.get_componentGroupNameAndConcUnits_experimentIDAndComponentName_dataStage01Replicates(experiment_id_I,cn); for sns in sample_names_short: print('calculating missing components for sample_name_short ' + sns); # get calculated concentration calculated_concentration = None; calculated_concentration = self.get_calculatedConcentration_experimentIDAndSampleNameShortAndTimePointAndComponentName_dataStage01ReplicatesMI(experiment_id_I,sns,tp,cn); if calculated_concentration: continue # get the lloq lloq = None; conc_units = None; lloq, conc_units = self.get_lloq_ExperimentIDAndComponentName_dataStage01LLOQAndULOQ(experiment_id_I,cn); if not lloq: print('lloq not found'); continue # normalize the lloq if (biological_material_I and conversion_name_I): # get physiological parameters cvs = None; cvs_units = None; od600 = None; dil = None; dil_units = None; conversion = None; conversion_units = None; cvs, cvs_units, od600, dil,dil_units = self.get_CVSAndCVSUnitsAndODAndDilAndDilUnits_sampleNameShort(experiment_id_I,sns); conversion, conversion_units = self.get_conversionAndConversionUnits_biologicalMaterialAndConversionName(biological_material_I,conversion_name_I); if not(cvs and cvs_units and od600 and dil and dil_units): print('cvs, cvs_units, or od600 are missing from physiological parameters'); print('or dil and dil_units are missing from sample descripton'); exit(-1); elif not(conversion and conversion_units): print('biological_material or conversion name is incorrect'); exit(-1); else: #calculate the cell volume cell_volume, cell_volume_units = self.calculate.calculate_biomass_CVSAndCVSUnitsAndODAndConversionAndConversionUnits(cvs,cvs_units,od600,conversion,conversion_units); # calculate the normalized concentration norm_conc = None; norm_conc_units = None; norm_conc, norm_conc_units = self.calculate.calculate_conc_concAndConcUnitsAndDilAndDilUnitsAndConversionAndConversionUnits(lloq,conc_units,dil,dil_units,cell_volume, cell_volume_units); if norm_conc: norm_conc = norm_conc/2; # update data_stage01_quantification_normalized # dataListUpdated_I.append({'experiment_id':experiment_id_I, # 'sample_name_short':sns, # 'time_point':tp, # 'component_group_name':component_group_name, # 'component_name':cn, # 'calculated_concentration':norm_conc, # 'calculated_concentration_units':norm_conc_units, # 'used_':True, # 'comment_':None}); # populate data_stage01_quantification_replicatesMI row = data_stage01_quantification_replicatesMI(experiment_id_I,sns,tp,component_group_name,cn,norm_conc,"lloq",None,norm_conc_units,True,None); self.session.add(row); else: calc_conc = lloq/2; # populate data_stage01_quantification_replicatesMI #dataListUpdated_I.append({'experiment_id':experiment_id_I, # 'sample_name_short':sns, # 'time_point':tp, # 'component_group_name':component_group_name, # 'component_name':cn, # 'calculated_concentration':calc_conc, # 'calculated_concentration_units':conc_units, # 'used_':True, # 'comment_':None}); row = data_stage01_quantification_replicatesMI(experiment_id_I,sns,tp,component_group_name,cn,"lloq",None,calc_conc,conc_units,True); self.session.add(row); #self.update_dataStage01ReplicatesMI(dataListUpdated_I); self.session.commit(); #self.add_rows_table('data_stage01_quantification_replicatesMI',data_O)
def execute_analyzePeakResolution(self,experiment_id_I,sample_names_I=[],sample_types_I=['Standard'],component_name_pairs_I=[], acquisition_date_and_time_I=[None,None]): '''Analyze resolution for critical pairs Input: experiment_id_I sample_names_I sample_types_I component_name_pairs_I = [[component_name_1,component_name_2],...] acquisition_date_and_time_I = ['%m/%d/%Y %H:%M','%m/%d/%Y %H:%M'] ''' print('execute_peakInformation_resolution...') #convert string date time to datetime # e.g. time.strptime('4/15/2014 15:51','%m/%d/%Y %H:%M') acquisition_date_and_time = []; if acquisition_date_and_time_I and acquisition_date_and_time_I[0] and acquisition_date_and_time_I[1]: for dateandtime in acquisition_date_and_time_I: time_struct = strptime(dateandtime,'%m/%d/%Y %H:%M') dt = datetime.fromtimestamp(mktime(time_struct)) acquisition_date_and_time.append(dt); else: acquisition_date_and_time=[None,None] data_O = []; component_names_pairs_all = []; # get sample names if sample_names_I and sample_types_I and len(sample_types_I)==1: sample_names = sample_names_I; sample_types = [sample_types_I[0] for sn in sample_names]; else: sample_names = []; sample_types = []; for st in sample_types_I: sample_names_tmp = []; sample_names_tmp = self.get_sampleNames_experimentIDAndSampleType(experiment_id_I,st); sample_names.extend(sample_names_tmp); sample_types_tmp = []; sample_types_tmp = [st for sn in sample_names_tmp]; sample_types.extend(sample_types_tmp); for sn in sample_names: print('analyzing peakInformation for sample_name ' + sn); for component_name_pair in component_name_pairs_I: # get critical pair data cpd1 = {}; cpd2 = {}; cpd1 = self.get_peakInfo_sampleNameAndComponentName(sn,component_name_pair[0],acquisition_date_and_time); cpd2 = self.get_peakInfo_sampleNameAndComponentName(sn,component_name_pair[1],acquisition_date_and_time); if cpd1 and cpd2 and cpd1['retention_time'] and cpd2['retention_time']: # calculate the RT difference and resolution rt_dif = 0.0; rt_dif = abs(cpd1['retention_time']-cpd2['retention_time']) resolution = 0.0; resolution = rt_dif/(0.5*(cpd1['width_at_50']+cpd2['width_at_50'])); # record data data_O.append({'component_name_pair':component_name_pair, 'rt_dif':rt_dif, 'resolution':resolution, 'component_group_name_pair':[cpd1['component_group_name'],cpd2['component_group_name']], 'sample_name':sn, 'acquisition_date_and_time':cpd1['acquisition_date_and_time']}); #TODO: # 1. make a calculation method # calculate statistics for specific parameters data_add = []; calc = calculate_interface(); for cnp in component_name_pairs_I: data_parameters = {}; data_parameters_stats = {}; for parameter in ['rt_dif','resolution']: data_parameters[parameter] = []; data_parameters_stats[parameter] = {'ave':None,'var':None,'cv':None,'lb':None,'ub':None}; acquisition_date_and_times = []; sample_names_parameter = []; sample_types_parameter = []; component_group_name_pair = None; for sn_cnt,sn in enumerate(sample_names): for d in data_O: if d['sample_name'] == sn and d['component_name_pair'] == cnp and d[parameter]: data_parameters[parameter].append(d[parameter]); acquisition_date_and_times.append(d['acquisition_date_and_time']) sample_names_parameter.append(sn); sample_types_parameter.append(sample_types[sn_cnt]) component_group_name_pair = d['component_group_name_pair']; ave,var,lb,ub = None,None,None,None; if len(data_parameters[parameter])>1:ave,var,lb,ub = calc.calculate_ave_var(data_parameters[parameter]); if ave: cv = sqrt(var)/ave*100; data_parameters_stats[parameter] = {'ave':ave,'var':var,'cv':cv,'lb':lb,'ub':ub}; # add data to the database: row = {'experiment_id':experiment_id_I, 'component_group_name_pair':component_group_name_pair, 'component_name_pair':cnp, 'peakInfo_parameter':parameter, 'peakInfo_ave':data_parameters_stats[parameter]['ave'], 'peakInfo_cv':data_parameters_stats[parameter]['cv'], 'peakInfo_lb':data_parameters_stats[parameter]['lb'], 'peakInfo_ub':data_parameters_stats[parameter]['ub'], 'peakInfo_units':None, 'sample_names':sample_names_parameter, 'sample_types':sample_types_parameter, 'acqusition_date_and_times':acquisition_date_and_times, 'peakInfo_data':data_parameters[parameter], 'used_':True, 'comment_':None,}; data_add.append(row); self.add_rows_table('data_stage01_quantification_peakResolution',data_add);
def execute_physiologicalRatios_replicatesMI(self,experiment_id_I): '''Calculate physiologicalRatios from replicates MI''' calc = calculate_interface(); print('calculate_physiologicalRatios_replicates...') # get sample names short sample_names_short = []; sample_names_short = self.get_SampleNameShort_experimentID_dataStage01ReplicatesMI(experiment_id_I); data_O = []; ratios_calc_O = []; for sns in sample_names_short: print('calculating physiologicalRatios from replicates for sample_names_short ' + sns); # get time points time_points = []; time_points = self.get_timePoint_experimentIDAndSampleNameShort_dataStage01ReplicatesMI(experiment_id_I,sns); for tp in time_points: print('calculating physiologicalRatios from replicates for time_point ' + tp); for k,v in self.ratios.items(): print('calculating physiologicalRatios from replicates for ratio ' + k); ratios_data={}; calcratios=True; for cgn in v['component_group_name']: ratios_data[cgn] = None; # concentrations and units conc = None; conc_unit = None; conc, conc_unit = self.get_concAndConcUnits_experimentIDAndSampleNameShortAndTimePointAndComponentGroupName_dataStage01ReplicatesMI(experiment_id_I,sns,tp,cgn); if not(conc): calcratios=False; break; ratios_data[cgn]=conc; # calculate the physiologicalratios if not calcratios: continue ratio_calc,num_calc,den_calc = self.calculate_physiologicalRatios(k,ratios_data); # add data to the session row = {"experiment_id":experiment_id_I, "sample_name_short":sns, "time_point":tp, "physiologicalratio_id":k, "physiologicalratio_name":v['name'], "physiologicalratio_value":ratio_calc, "physiologicalratio_description":v['description'], "used_":True, "comment_":None} data_O.append(row); row = {"experiment_id":experiment_id_I, "sample_name_short":sns, "time_point":tp, "physiologicalratio_id":k+'_numerator', "physiologicalratio_name":v['name']+'_numerator', "physiologicalratio_value":num_calc, "physiologicalratio_description":v['description'].split('/')[0], "used_":True, "comment_":None} data_O.append(row); row = {"experiment_id":experiment_id_I, "sample_name_short":sns, "time_point":tp, "physiologicalratio_id":k+'_denominator', "physiologicalratio_name":v['name']+'_denominator', "physiologicalratio_value":den_calc, "physiologicalratio_description":v['description'].split('/')[1], "used_":True, "comment_":None} data_O.append(row); self.add_rows_table('data_stage01_quantification_physiologicalRatios_replicates',data_O);
def execute_calculateGeoAverages_replicates( self, experiment_id_I, sample_name_abbreviations_I=[], time_points_I=[], calculated_concentration_units_I=[], ): '''Calculate the averages from replicates MI in ln space''' calc = calculate_interface(); print(' execute_calculateGeoAverages_replicates...') data_O = []; # get unique calculated_concentration_units/sample_name_abbreviations/component_names/component_group_names/time_points unique_rows = []; unique_rows = self.get_sampleNameAbbreviationsAndCalculatedConcentrationUnitsAndTimePointsAndComponentNames_experimentID_dataStage01QuantificationReplicatesMI( experiment_id_I, sample_name_abbreviations_I, time_points_I, calculated_concentration_units_I, exp_type_I=4) for unique_row in unique_rows: # get sample names short sample_names_short = []; sample_names_short = self.get_sampleNameShort_experimentIDAndSampleNameAbbreviationAndComponentNameAndTimePointAndCalculatedConcentrationUnits_dataStage01ReplicatesMI( experiment_id_I, unique_row['sample_name_abbreviation'], unique_row['component_name'], unique_row['time_point'], unique_row['calculated_concentration_units'] ); concs = []; conc_units = None; for sns in sample_names_short: # concentrations and units conc = None; conc = self.get_calculatedConcentration_experimentIDAndSampleNameShortAndTimePointAndComponentNameAndCalculatedConcentrationUnits_dataStage01ReplicatesMI( experiment_id_I, sns, unique_row['time_point'], unique_row['component_name'], unique_row['calculated_concentration_units'] ); if (not(conc) or conc==0): continue; # calculate the ln of the concentration # and convert to M from mM or uM if (unique_row['calculated_concentration_units'] == 'mM'): conc_units = 'M'; conc = conc*1e-3; elif (unique_row['calculated_concentration_units'] == 'uM'): conc_units = 'M'; conc = conc*1e-6; elif (unique_row['calculated_concentration_units'] == 'uM'): conc_units = 'M'; conc = conc*1e-6; elif (unique_row['calculated_concentration_units'] == 'umol*gDW-1'): conc_units = 'mol*gDW-1'; conc = conc*1e-6; elif (unique_row['calculated_concentration_units'] == 'height_ratio' \ or unique_row['calculated_concentration_units'] == 'area_ratio'): continue; else: print('units of ' + str(unique_row['calculated_concentration_units']) + ' are not supported') exit(-1); concs.append(conc); n_replicates = len(concs); conc_average = 0.0; conc_var = 0.0; conc_lb = 0.0; conc_ub = 0.0; # calculate average and CV of concentrations if (not(concs)): continue elif n_replicates<2: continue else: conc_average, conc_var, conc_lb, conc_ub = calc.calculate_ave_var_geometric(concs); # add data to the session row = {"experiment_id":experiment_id_I, "sample_name_abbreviation":unique_row['sample_name_abbreviation'], "time_point":unique_row['time_point'], "component_group_name":unique_row['component_group_name'], "component_name":unique_row['component_name'], "n_replicates":n_replicates, "calculated_concentration_average":conc_average, "calculated_concentration_var":conc_var, "calculated_concentration_lb":conc_lb, "calculated_concentration_ub":conc_ub, "calculated_concentration_units":conc_units, "used_":True }; data_O.append(row); self.add_rows_table('data_stage01_quantification_averagesmigeo',data_O)
def execute_calculateUptakeAndSecretionRates(self,experiment_id_I,sample_name_short_I=[],QC_filename_O=None): '''Calculate uptake and secretion rates (mmol*gDCW-1*hr-1) based on the sample time, measured gDCW (calculated from the OD600), and calculated growth rate (hr-1)''' calc = calculate_interface(); data_O = []; #query sample names print('execute calculate uptake and secretion rates...') if sample_name_short_I: sample_name_short = sample_name_short_I; else: sample_name_short = []; sample_name_short = self.get_sampleNameShort_experimentID(experiment_id_I,7) for sns in sample_name_short: print('calculating uptake and secretion rates for sample_name_short ' +sns); #query met_ids met_ids = []; met_ids = self.get_metIDs_experimentIDAndSampleNameShort(experiment_id_I,7,sns); for met in met_ids: print('calculating uptake and secretion rates for met_id ' + met); if met == 'biomass': continue; #ignore biomass (calculated previously) #query time,conc (mM), and sample_ids time, conc, sample_ids = [], [], []; #sorted by sample_date time, conc, sample_ids = self.get_sampleDateAndDataCorrectedAndSampleIDs_experimentIDAndSampleNameShortAndMetIDAndDataUnits(experiment_id_I,7,sns,met,'mM'); if not conc or not time: continue; #query slope, intercept, and rate for the growth rate slope, intercept, r2, gr_rate, rate_units, p_value, std_err = None,None,None,None,None,None,None; slope, intercept, r2, gr_rate, rate_units, p_value, std_err = self.get_rateData_experimentIDAndSampleNameShortAndMetID_dataStage01PhysiologyRates(experiment_id_I,sns,'biomass'); #query OD600 and DCW from sample_physiologicalparameters OD600, culture_density = [],[]; #sorted by sample_date for si in sample_ids: OD600_tmp, culture_density_tmp = None,None; OD600_tmp, culture_density_tmp = self.get_OD600AndCultureDensity_experimentIDAndSampleID_samplePhysiologicalParameters(experiment_id_I,7,si); OD600.append(OD600_tmp); culture_density.append(culture_density_tmp); #check that the length of DCW and conc match if len(conc)!=len(culture_density): print('The length of measured concentrations and measured dcw do not match!') #convert time to hrs time_hrs = []; for t in time: time_hrs.append(t.year*8765.81277 + t.month*730.484 + t.day*365.242 + t.hour + t.minute / 60. + t.second / 3600.); #convert using datetime object #calculate growth rate and r2 slope, intercept, r2, rate, rate_units, p_value, std_err = None,None,None,None,None,None,None; slope, intercept, r2, p_value, std_err, rate = calc.calculate_uptakeAndSecretionRate(culture_density,conc,gr_rate) #record time, conc, and culture density for QC for si_cnt,si in enumerate(sample_ids): tmp={}; tmp['sample_name_short']=sns; tmp['met_id']=met; tmp['sample_id']=si; tmp['time [hr]']=time_hrs[si_cnt]; tmp['OD600']=OD600[si_cnt]; tmp['culture_density [gDW*L-1]']=culture_density[si_cnt]; tmp['concentration [mM]']=conc[si_cnt]; tmp['growth_rate [hr-1]']=gr_rate; data_O.append(tmp); #add rows to the data base row = []; row = data_stage01_physiology_rates(experiment_id_I, sns, met, slope, intercept, r2, rate, 'mmol*gDCW-1*hr-1', p_value, std_err, True, None); self.session.add(row); self.session.commit(); if QC_filename_O: io = base_exportData(data_O); io.write_dict2csv(QC_filename_O,['sample_name_short','met_id','sample_id', 'time [hr]','OD600','culture_density [gDW*L-1]', 'concentration [mM]','growth_rate [hr-1]']);
def export_dataStage01NormalizedAndAverages_js(self, analysis_id_I, sample_name_abbreviations_I=[], sample_names_I=[], component_names_I=[], cv_threshold_I=40, extracellular_threshold_I=80, data_dir_I='tmp'): '''export data_stage01_quantification_normalized and averages for visualization with ddt''' calc = calculate_interface(); print('export_dataStage01Normalized_js...') data_norm_broth = []; data_norm_filtrate = []; data_norm_combined = []; data_ave = []; #SPLIT 1: #1 query unique calculated_concentration_units/sample_name_abbreviations/component_names/component_group_names/time_points/sample_names/sample_ids/sample_description uniqueRows_all = self.getQueryResult_groupNormalizedAveragesSamples_analysisID_dataStage01QuantificationNormalizedAndAverages( analysis_id_I ); #2 filter in broth samples uniqueRows = self.filter_groupNormalizedAveragesSamples_experimentID_dataStage01QuantificationNormalizedAndAverages_limsSampleAndSampleID( uniqueRows_all, calculated_concentration_units_I=[], component_names_I=component_names_I, component_group_names_I=[], sample_names_I=sample_names_I, sample_name_abbreviations_I=sample_name_abbreviations_I, time_points_I=[], ); if type(uniqueRows)==type(listDict()): uniqueRows.convert_dataFrame2ListDict() uniqueRows = uniqueRows.get_listDict(); replicates_tmp = {};#reorganize the data into a dictionary for quick traversal of the replicates for uniqueRow_cnt,uniqueRow in enumerate(uniqueRows): unique = ( uniqueRow['sample_name_abbreviation'], uniqueRow['experiment_id'], uniqueRow['time_point'], uniqueRow['component_name'], uniqueRow['calculated_concentration_units']) if not unique in replicates_tmp.keys(): replicates_tmp[unique] = []; replicates_tmp[unique].append(uniqueRow); for unique,replicates in replicates_tmp.items(): #get data from averages once per sample_name_abbreviation/component_name #print('exporting sample_name_abbreviation ' + replicates[0]['sample_name_abbreviation'] + " and component_name " + replicates[0]['component_name']); # get the averages and %CV samples row_ave = {}; row_ave = self.get_row_experimentIDAndSampleNameAbbreviationAndTimePointAndComponentNameAndCalculatedConcentrationCVAndExtracellularPercent_dataStage01Averages( replicates[0]['experiment_id'], replicates[0]['sample_name_abbreviation'], replicates[0]['time_point'], replicates[0]['component_name'], cv_threshold_I=cv_threshold_I, extracellular_threshold_I=extracellular_threshold_I); if row_ave: stdev = calc.convert_cv2StDev(row_ave['calculated_concentration_filtrate_average'],row_ave['calculated_concentration_filtrate_cv']); row_ave['calculated_concentration_filtrate_lb'] = row_ave['calculated_concentration_filtrate_average']-stdev; row_ave['calculated_concentration_filtrate_ub'] = row_ave['calculated_concentration_filtrate_average']+stdev; stdev = calc.convert_cv2StDev(row_ave['calculated_concentration_broth_average'],row_ave['calculated_concentration_broth_cv']); row_ave['calculated_concentration_broth_lb'] = row_ave['calculated_concentration_broth_average']-stdev; row_ave['calculated_concentration_broth_ub'] = row_ave['calculated_concentration_broth_average']+stdev; stdev = calc.convert_cv2StDev(row_ave['calculated_concentration_average'],row_ave['calculated_concentration_cv']); row_ave['calculated_concentration_lb'] = row_ave['calculated_concentration_average']-stdev; row_ave['calculated_concentration_ub'] = row_ave['calculated_concentration_average']+stdev; row_ave['analysis_id'] = analysis_id_I; # get data from normalized filtrate_conc = []; broth_conc = []; for rep in replicates: row = {}; row['analysis_id'] = analysis_id_I; row['extracellular_percent'] = row_ave['extracellular_percent'] row['calculated_concentration_cv'] = row_ave['calculated_concentration_cv'] row.update(rep) if rep['sample_desc'] == 'Filtrate': data_norm_filtrate.append(row); filtrate_conc.append(rep['calculated_concentration']) if rep['sample_desc'] == 'Broth': data_norm_broth.append(row); broth_conc.append(rep['calculated_concentration']) data_norm_combined.append(row); #add data to aggregate and sample_name_abbreviations_all if not broth_conc: broth_conc = [0]; if not filtrate_conc: filtrate_conc = [0]; row_ave['calculated_concentration_min']=min(broth_conc+filtrate_conc) row_ave['calculated_concentration_max']=max(broth_conc+filtrate_conc) row_ave['calculated_concentration_broth_min']=min(broth_conc) row_ave['calculated_concentration_broth_max']=max(broth_conc) row_ave['calculated_concentration_filtrate_min']=min(filtrate_conc) row_ave['calculated_concentration_filtrate_max']=max(filtrate_conc) data_ave.append(row_ave); # dump chart parameters to a js files data1_keys = ['analysis_id', 'experiment_id', 'sample_name', 'sample_id', 'sample_name_abbreviation', 'component_group_name', 'component_name', 'calculated_concentration_units', 'extracellular_percent', 'calculated_concentration_cv' ]; data1_nestkeys = ['component_name']; data1_keymap = {'xdata':'component_name', 'ydata':'calculated_concentration', #'ydatalb':'peakInfo_lb', #'ydataub':'peakInfo_ub', #'ydatamin':None, #'ydatamax':None, #'ydataiq1':None, #'ydataiq3':None, #'ydatamedian':None, 'serieslabel':'sample_name_abbreviation', 'featureslabel':'sample_name'}; data2_keys = ['analysis_id', 'experiment_id', 'sample_name_abbreviation', 'time_point', 'component_group_name', 'component_name', 'calculated_concentration_units', 'extracellular_percent', 'calculated_concentration_broth_cv' ]; data2_nestkeys = ['component_name']; data2_keymap = {'xdata':'component_name', 'ydatamean':'calculated_concentration_broth_average', 'ydatalb':'calculated_concentration_broth_lb', 'ydataub':'calculated_concentration_broth_ub', 'ydatamin':'calculated_concentration_broth_min', 'ydatamax':'calculated_concentration_broth_max', #'ydataiq1':None, #'ydataiq3':None, #'ydatamedian':None, 'serieslabel':'sample_name_abbreviation', 'featureslabel':'component_name'}; data3_keys = ['analysis_id', 'experiment_id', 'sample_name_abbreviation', 'time_point', 'component_group_name', 'component_name', 'calculated_concentration_units', 'extracellular_percent', 'calculated_concentration_filtrate_cv', ]; data3_nestkeys = ['component_name']; data3_keymap = {'xdata':'component_name', 'ydatamean':'calculated_concentration_filtrate_average', 'ydatalb':'calculated_concentration_filtrate_lb', 'ydataub':'calculated_concentration_filtrate_ub', 'ydatamin':'calculated_concentration_filtrate_min', 'ydatamax':'calculated_concentration_filtrate_max', #'ydataiq1':None, #'ydataiq3':None, #'ydatamedian':None, 'serieslabel':'sample_name_abbreviation', 'featureslabel':'component_name'}; data4_keys = ['analysis_id', 'experiment_id', 'sample_name_abbreviation', 'time_point', 'component_group_name', 'component_name', 'calculated_concentration_units', 'extracellular_percent', 'calculated_concentration_cv' ]; data4_nestkeys = ['component_name']; data4_keymap = {'xdata':'component_name', 'ydata':'calculated_concentration_average', 'ydatamean':'calculated_concentration_average', 'ydatalb':'calculated_concentration_lb', 'ydataub':'calculated_concentration_ub', #'ydatamin':'calculated_concentration_min', #'ydatamax':'calculated_concentration_max', #'ydataiq1':None, #'ydataiq3':None, #'ydatamedian':None, 'serieslabel':'sample_name_abbreviation', 'featureslabel':'component_name'}; # make the data object dataobject_O = [{"data":data_norm_broth,"datakeys":data1_keys,"datanestkeys":data1_nestkeys}, {"data":data_norm_filtrate,"datakeys":data1_keys,"datanestkeys":data1_nestkeys}, {"data":data_norm_combined,"datakeys":data1_keys,"datanestkeys":data1_nestkeys}, {"data":data_ave,"datakeys":data2_keys,"datanestkeys":data2_nestkeys}, {"data":data_ave,"datakeys":data3_keys,"datanestkeys":data3_nestkeys}, {"data":data_ave,"datakeys":data4_keys,"datanestkeys":data4_nestkeys}]; # make the tile parameter objects for the normalized and averages formtileparameters_averages_O = {'tileheader':'Filter menu averages','tiletype':'html','tileid':"filtermenu2",'rowid':"row1",'colid':"col1", 'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-6"}; formparameters_averages_O = {'htmlid':'filtermenuform2',"htmltype":'form_01',"formsubmitbuttonidtext":{'id':'submit2','text':'submit'},"formresetbuttonidtext":{'id':'reset2','text':'reset'},"formupdatebuttonidtext":{'id':'update2','text':'update'}}; formtileparameters_averages_O.update(formparameters_averages_O); # make the svg objects for the averages data svgparameters_averages_broth_O = {"svgtype":'boxandwhiskersplot2d_02',"svgkeymap":[data2_keymap,data1_keymap], 'svgid':'svg4', "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 }, "svgwidth":250,"svgheight":250, "svgx1axislabel":"component_name","svgy1axislabel":"concentration", 'svgformtileid':'filtermenu2','svgresetbuttonid':'reset2','svgsubmitbuttonid':'submit2'}; svgtileparameters_averages_broth_O = {'tileheader':'Broth data','tiletype':'svg','tileid':"tile4",'rowid':"row2",'colid':"col1", 'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"}; svgtileparameters_averages_broth_O.update(svgparameters_averages_broth_O); if data_norm_filtrate: svgparameters_averages_filtrate_O = {"svgtype":'boxandwhiskersplot2d_02',"svgkeymap":[data3_keymap,data1_keymap], 'svgid':'svg5', "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 }, "svgwidth":250,"svgheight":250, "svgx1axislabel":"component_name","svgy1axislabel":"concentration", 'svgformtileid':'filtermenu2','svgresetbuttonid':'reset2','svgsubmitbuttonid':'submit2'}; svgtileparameters_averages_filtrate_O = {'tileheader':'Filtrate data','tiletype':'svg','tileid':"tile5",'rowid':"row2",'colid':"col2", 'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"}; svgtileparameters_averages_filtrate_O.update(svgparameters_averages_filtrate_O); else: svgparameters_averages_filtrate_O = {"svgtype":'boxandwhiskersplot2d_01',"svgkeymap":[data3_keymap], 'svgid':'svg5', "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 }, "svgwidth":250,"svgheight":250, "svgx1axislabel":"component_name","svgy1axislabel":"concentration", 'svgformtileid':'filtermenu2','svgresetbuttonid':'reset2','svgsubmitbuttonid':'submit2'}; svgtileparameters_averages_filtrate_O = {'tileheader':'Filtrate data','tiletype':'svg','tileid':"tile5",'rowid':"row2",'colid':"col2", 'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"}; svgtileparameters_averages_filtrate_O.update(svgparameters_averages_filtrate_O); svgparameters_averages_combined_O = { #"svgtype":'boxandwhiskersplot2d_02', "svgtype":'boxandwhiskersplot2d_01', #"svgkeymap":[data4_keymap,data1_keymap], "svgkeymap":[data4_keymap], 'svgid':'svg6', "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 }, "svgwidth":250,"svgheight":250, "svgx1axislabel":"component_name","svgy1axislabel":"concentration", 'svgformtileid':'filtermenu2','svgresetbuttonid':'reset2','svgsubmitbuttonid':'submit2'}; svgtileparameters_averages_combined_O = {'tileheader':'Broth-Filtrate data','tiletype':'svg','tileid':"tile6",'rowid':"row2",'colid':"col3", 'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"}; svgtileparameters_averages_combined_O.update(svgparameters_averages_combined_O); # make the tables for the normalized and averages data tableparameters_normalized_O = {"tabletype":'responsivetable_01', 'tableid':'table1', "tablefilters":None, "tableclass":"table table-condensed table-hover", 'tableformtileid':'filtermenu1','tableresetbuttonid':'reset1','tablesubmitbuttonid':'submit1'}; tabletileparameters_normalized_O = {'tileheader':'normalized data','tiletype':'table','tileid':"tile7",'rowid':"row4",'colid':"col1", 'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-12"}; tabletileparameters_normalized_O.update(tableparameters_normalized_O); tableparameters_averages_O = {"tabletype":'responsivetable_01', 'tableid':'table2', "tablefilters":None, "tableclass":"table table-condensed table-hover", 'tableformtileid':'filtermenu2','tableresetbuttonid':'reset2','tablesubmitbuttonid':'submit2'}; tabletileparameters_averages_O = {'tileheader':'averages data','tiletype':'table','tileid':"tile8",'rowid':"row5",'colid':"col1", 'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-12"}; tabletileparameters_averages_O.update(tableparameters_averages_O); parametersobject_O = [ formtileparameters_averages_O, svgtileparameters_averages_broth_O, svgtileparameters_averages_filtrate_O, svgtileparameters_averages_combined_O, tabletileparameters_normalized_O, tabletileparameters_averages_O]; tile2datamap_O = { "filtermenu2":[5], "tile4":[3,0], "tile5":[4,1], #"tile6":[5,2], "tile6":[5], "tile7":[2], "tile8":[5] }; #if data_norm_filtrate: tile2datamap_O.update({"tile5":[4,1]}) #else: tile2datamap_O.update({"tile5":[4]}) filtermenuobject_O = [ #{"filtermenuid":"filtermenu1","filtermenuhtmlid":"filtermenuform1", #"filtermenusubmitbuttonid":"submit1","filtermenuresetbuttonid":"reset1", #"filtermenuupdatebuttonid":"update1"}, {"filtermenuid":"filtermenu2","filtermenuhtmlid":"filtermenuform2", "filtermenusubmitbuttonid":"submit2","filtermenuresetbuttonid":"reset2", "filtermenuupdatebuttonid":"update2"} ]; # ddtutilities = ddt_container(parameters_I = parametersobject_O,data_I = dataobject_O,tile2datamap_I = tile2datamap_O,filtermenu_I = filtermenuobject_O); if data_dir_I=='tmp': filename_str = self.settings['visualization_data'] + '/tmp/ddt_data.js' elif data_dir_I=='data_json': data_json_O = ddtutilities.get_allObjects_js(); return data_json_O; with open(filename_str,'w') as file: file.write(ddtutilities.get_allObjects());
def execute_normalizeSamples2Biomass(self,experiment_id_I,biological_material_I=None,conversion_name_I=None,sample_names_I=[],component_names_I=[],use_height_I=False,sample_types_I=['Unknown']): '''Normalize calculated concentrations to measured biomass Input: experiment_id_I biological_material_I = biological material (if None, no normalization is done) conversion_name_I = biomass conversion name (if None, no normalization is done) use_height_I = if True, use the ion count for peak height instead of the calculated_concentration or height/area ratio Output: sample_name sample_id component_group_name component_name calculated_concentration calculated_concentration_units used_ ''' data_O=[]; calc = calculate_interface(); print('execute_normalizeSamples2Biomass...') ##SPLIT 1: # get the unique sample_names/sample_ids/sample_types/component_names/component_group_names/calculated_concentration_units groupJoin = self.getGroupJoin_experimentAndQuantitationMethodAndMQResultsTable_experimentID_dataStage01QuantificationMQResultsTable( experiment_id_I, sample_types_I=sample_types_I, sample_names_I=sample_names_I, component_names_I=component_names_I, sample_ids_I=[], ); if type(groupJoin)==type(listDict()): groupJoin.convert_dataFrame2ListDict() groupJoin = groupJoin.get_listDict(); if (biological_material_I and conversion_name_I): # get the conversion units once conversion = None; conversion_units = None; conversion, conversion_units = self.get_conversionAndConversionUnits_biologicalMaterialAndConversionName(biological_material_I,conversion_name_I); for row_cnt,row in enumerate(groupJoin): print('normalizing samples2Biomass for component_name ' + row['component_name']); # get physiological parameters cvs = None; cvs_units = None; od600 = None; dil = None; dil_units = None; cvs, cvs_units, od600, dil,dil_units = self.get_CVSAndCVSUnitsAndODAndDilAndDilUnits_sampleName(row['sample_name']); if not(cvs and cvs_units and od600 and dil and dil_units): print('cvs, cvs_units, or od600 are missing from physiological parameters'); print('or dil and dil_units are missing from sample descripton'); exit(-1); elif not(conversion and conversion_units): print('biological_material or conversion name is incorrect'); exit(-1); else: #calculate the cell volume or biomass depending on the conversion units #cell_volume, cell_volume_units = calc.calculate_cellVolume_CVSAndCVSUnitsAndODAndConversionAndConversionUnits(cvs,cvs_units,od600,conversion,conversion_units); cell_volume, cell_volume_units = calc.calculate_biomass_CVSAndCVSUnitsAndODAndConversionAndConversionUnits(cvs,cvs_units,od600,conversion,conversion_units); # get the calculated concentration calc_conc = None; calc_conc_units = None; #data_row = self.get_row_sampleNameAndComponentName( # row['sample_name'], # row['component_name']); if use_height_I: #calc_conc, calc_conc_units = data_row['height'],'height'; calc_conc, calc_conc_units = row['height'],'height'; elif row['use_calculated_concentration']: #calc_conc, calc_conc_units = data_row['calculated_concentration'],data_row['conc_units']; calc_conc, calc_conc_units = row['calculated_concentration'],row['conc_units']; elif not row['use_calculated_concentration'] and row['use_area']: #calc_conc, calc_conc_units = data_row['area_ratio'],'area_ratio'; calc_conc, calc_conc_units = row['area_ratio'],'area_ratio'; elif not row['use_calculated_concentration'] and not row['use_area']: #calc_conc, calc_conc_units = data_row['height_ratio'],'height_ratio'; calc_conc, calc_conc_units = row['height_ratio'],'height_ratio'; # calculate the normalized concentration norm_conc = None; norm_conc_units = None; if calc_conc: norm_conc, norm_conc_units = calc.calculate_conc_concAndConcUnitsAndDilAndDilUnitsAndConversionAndConversionUnits(calc_conc,calc_conc_units,dil,dil_units,cell_volume, cell_volume_units); # update data_stage01_quantification_normalized if norm_conc: row = {'experiment_id':experiment_id_I, 'sample_name':row['sample_name'], 'sample_id':row['sample_id'], 'component_group_name':row['component_group_name'], 'component_name':row['component_name'], 'calculated_concentration':norm_conc, 'calculated_concentration_units':norm_conc_units, 'used_':True,}; data_O.append(row); else: for row_cnt,row in enumerate(groupJoin): print('normalizing samples2Biomass for sample_name ' + row['sample_name'] + ' and component_name ' + row['component_name']); # get the calculated concentration calc_conc = None; calc_conc_units = None; #data_row = self.get_row_sampleNameAndComponentName( # row['sample_name'], # row['component_name']); if use_height_I: #calc_conc, calc_conc_units = data_row['height'],'height'; calc_conc, calc_conc_units = row['height'],'height'; elif row['use_calculated_concentration']: #calc_conc, calc_conc_units = data_row['calculated_concentration'],data_row['conc_units']; calc_conc, calc_conc_units = row['calculated_concentration'],row['conc_units']; elif not row['use_calculated_concentration'] and row['use_area']: #calc_conc, calc_conc_units = data_row['area_ratio'],'area_ratio'; calc_conc, calc_conc_units = row['area_ratio'],'area_ratio'; elif not row['use_calculated_concentration'] and not row['use_area']: #calc_conc, calc_conc_units = data_row['height_ratio'],'height_ratio'; calc_conc, calc_conc_units = row['height_ratio'],'height_ratio'; # add data to the DB if calc_conc: row = {'experiment_id':experiment_id_I, 'sample_name':row['sample_name'], 'sample_id':row['sample_id'], 'component_group_name':row['component_group_name'], 'component_name':row['component_name'], 'calculated_concentration':calc_conc, 'calculated_concentration_units':calc_conc_units, 'used_':True,}; data_O.append(row); ##SPLIT 2: ## get sample names #sample_names = []; #sample_ids = []; #for st in sample_types_I: # sample_names_tmp = []; # sample_ids_tmp = []; # #sample_names_tmp = self.get_sampleNames_experimentIDAndSampleType(experiment_id_I,st); # sample_names_tmp,sample_ids_tmp = self.get_sampleNamesAndSampleIDs_experimentIDAndSampleType(experiment_id_I,st); # sample_names.extend(sample_names_tmp); # sample_ids.extend(sample_ids_tmp); #if sample_names_I: # sample_names_ind = [i for i,x in enumerate(sample_names) if x in sample_names_I]; # sample_names_cpy = copy.copy(sample_names); # sample_ids = copy.copy(sample_ids); # sample_names = [x for i,x in enumerate(sample_names) if i in sample_names_ind] # sample_ids = [x for i,x in enumerate(sample_ids) if i in sample_names_ind] ## create database table #for sn_cnt,sn in enumerate(sample_names): # print('normalizing samples2Biomass for sample_name ' + sn); # # get component names # component_names = []; # component_group_names = []; # #component_names = self.get_componentsNames_experimentIDAndSampleName(experiment_id_I,sn); # component_names,component_group_names = self.get_componentsNamesAndComponentGroupNames_experimentIDAndSampleName(experiment_id_I,sn); # if component_names_I: # component_names_ind = [i for i,x in enumerate(component_names) if x in component_names_I]; # component_names_cpy = copy.copy(component_names); # component_group_names = copy.copy(component_group_names); # component_names = [x for i,x in enumerate(component_names) if i in component_names_ind] # component_group_names = [x for i,x in enumerate(component_group_names) if i in component_names_ind] # ## get sample id # #sample_id = self.get_sampleID_experimentIDAndSampleName(experiment_id_I,sn); # if (biological_material_I and conversion_name_I): # # get physiological parameters # cvs = None; # cvs_units = None; # od600 = None; # dil = None; # dil_units = None; # conversion = None; # conversion_units = None; # cvs, cvs_units, od600, dil,dil_units = self.get_CVSAndCVSUnitsAndODAndDilAndDilUnits_sampleName(sn); # conversion, conversion_units = self.get_conversionAndConversionUnits_biologicalMaterialAndConversionName(biological_material_I,conversion_name_I); # if not(cvs and cvs_units and od600 and dil and dil_units): # print('cvs, cvs_units, or od600 are missing from physiological parameters'); # print('or dil and dil_units are missing from sample descripton'); # exit(-1); # elif not(conversion and conversion_units): # print('biological_material or conversion name is incorrect'); # exit(-1); # else: # #calculate the cell volume or biomass depending on the conversion units # #cell_volume, cell_volume_units = calc.calculate_cellVolume_CVSAndCVSUnitsAndODAndConversionAndConversionUnits(cvs,cvs_units,od600,conversion,conversion_units); # cell_volume, cell_volume_units = calc.calculate_biomass_CVSAndCVSUnitsAndODAndConversionAndConversionUnits(cvs,cvs_units,od600,conversion,conversion_units); # for cn_cnt,cn in enumerate(component_names): # print('normalizing samples2Biomass for component_name ' + cn); # # get component group name # #component_group_name = self.get_componentGroupName_experimentIDAndComponentName(experiment_id_I,cn); # #component_group_name = self.get_msGroup_componentName_MSComponents(cn); # # get the calculated concentration # calc_conc = None; # calc_conc_units = None; # if use_height_I: # calc_conc, calc_conc_units = self.get_peakHeight_sampleNameAndComponentName(sn,cn); # else: # calc_conc, calc_conc_units = self.get_concAndConcUnits_sampleNameAndComponentName(sn,cn); # # calculate the normalized concentration # norm_conc = None; # norm_conc_units = None; # if calc_conc: # norm_conc, norm_conc_units = calc.calculate_conc_concAndConcUnitsAndDilAndDilUnitsAndConversionAndConversionUnits(calc_conc,calc_conc_units,dil,dil_units,cell_volume, cell_volume_units); # # update data_stage01_quantification_normalized # if norm_conc: # row = {'experiment_id':experiment_id_I, # 'sample_name':sn, # 'sample_id':sample_ids[sn_cnt], # 'component_group_name':component_group_names[cn_cnt], # 'component_name':cn, # 'calculated_concentration':norm_conc, # 'calculated_concentration_units':norm_conc_units, # 'used_':True,}; # data_O.append(row); # else: # for cn_cnt,cn in enumerate(component_names): # print('normalizing samples2Biomass for component_name ' + cn); # # get component group name # #component_group_name = self.get_componentGroupName_experimentIDAndComponentName(experiment_id_I,cn); # #component_group_name = self.get_msGroup_componentName_MSComponents(cn); # # get the calculated concentration # calc_conc = None; # calc_conc_units = None; # if use_height_I: # calc_conc, calc_conc_units = self.get_peakHeight_sampleNameAndComponentName(sn,cn); # else: # calc_conc, calc_conc_units = self.get_concAndConcUnits_sampleNameAndComponentName(sn,cn); # # add data to the DB # row = {'experiment_id':experiment_id_I, # 'sample_name':sn, # 'sample_id':sample_ids[sn_cnt], # 'component_group_name':component_group_names[cn_cnt], # 'component_name':cn, # 'calculated_concentration':calc_conc, # 'calculated_concentration_units':calc_conc_units, # 'used_':True,}; # data_O.append(row); self.add_rows_table('data_stage01_quantification_normalized',data_O);
def execute_analyzePeakInformation(self,experiment_id_I,sample_names_I=[], sample_types_I=['Standard'], component_names_I=[], peakInfo_I = ['height','retention_time','width_at_50','signal_2_noise'], acquisition_date_and_time_I=[None,None]): '''Analyze retention-time, height, s/n, and assymetry INPUT: experiment_id_I sample_names_I sample_types_I component_names_I peakInfo_I acquisition_date_and_time_I = ['%m/%d/%Y %H:%M','%m/%d/%Y %H:%M'] ''' print('execute_peakInformation...') #convert string date time to datetime # e.g. time.strptime('4/15/2014 15:51','%m/%d/%Y %H:%M') acquisition_date_and_time = []; if acquisition_date_and_time_I and acquisition_date_and_time_I[0] and acquisition_date_and_time_I[1]: for dateandtime in acquisition_date_and_time_I: time_struct = strptime(dateandtime,'%m/%d/%Y %H:%M') dt = datetime.fromtimestamp(mktime(time_struct)) acquisition_date_and_time.append(dt); else: acquisition_date_and_time=[None,None] data_O = []; component_names_all = []; # get sample names if sample_names_I and sample_types_I and len(sample_types_I)==1: sample_names = sample_names_I; sample_types = [sample_types_I[0] for sn in sample_names]; else: sample_names = []; sample_types = []; for st in sample_types_I: sample_names_tmp = []; sample_names_tmp = self.get_sampleNames_experimentIDAndSampleType(experiment_id_I,st); sample_names.extend(sample_names_tmp); sample_types_tmp = []; sample_types_tmp = [st for sn in sample_names_tmp]; sample_types.extend(sample_types_tmp); print(str(len(sample_names)) + ' total samples'); for sn in sample_names: print('analyzing peakInformation for sample_name ' + sn); # get sample description desc = {}; desc = self.get_description_experimentIDAndSampleID_sampleDescription(experiment_id_I,sn); # get component names if component_names_I: component_names = component_names_I; else: component_names = []; component_names = self.get_componentsNames_experimentIDAndSampleName(experiment_id_I,sn); component_names_all.extend(component_names); for cn in component_names: # get rt, height, s/n sst_data = {}; sst_data = self.get_peakInfo_sampleNameAndComponentName(sn,cn,acquisition_date_and_time); if sst_data: tmp = {}; tmp.update(sst_data); tmp.update(desc); tmp.update({'sample_name':sn}); data_O.append(tmp); #TODO: # 1. make a calculation method # calculate statistics for specific parameters data_add = []; component_names_unique = list(set(component_names_all)); component_names_unique.sort(); # math utilities from math import sqrt calc = calculate_interface(); for cn in component_names_unique: data_parameters = {}; data_parameters_stats = {}; for parameter in peakInfo_I: data_parameters[parameter] = []; data_parameters_stats[parameter] = {'ave':None,'var':None,'cv':None,'lb':None,'ub':None}; acquisition_date_and_times = []; sample_names_parameter = []; sample_types_parameter = []; component_group_name = None; for sn_cnt,sn in enumerate(sample_names): for d in data_O: if d['sample_name'] == sn and d['component_name'] == cn and d[parameter]: data_parameters[parameter].append(d[parameter]); acquisition_date_and_times.append(d['acquisition_date_and_time']) sample_names_parameter.append(sn); sample_types_parameter.append(sample_types[sn_cnt]) component_group_name = d['component_group_name']; ave,var,lb,ub = None,None,None,None; if len(data_parameters[parameter])>1:ave,var,lb,ub = calc.calculate_ave_var(data_parameters[parameter]); if ave: cv = sqrt(var)/ave*100; data_parameters_stats[parameter] = {'ave':ave,'var':var,'cv':cv,'lb':lb,'ub':ub}; # add data to the DB row = {'experiment_id':experiment_id_I, 'component_group_name':component_group_name, 'component_name':cn, 'peakInfo_parameter':parameter, 'peakInfo_ave':data_parameters_stats[parameter]['ave'], 'peakInfo_cv':data_parameters_stats[parameter]['cv'], 'peakInfo_lb':data_parameters_stats[parameter]['lb'], 'peakInfo_ub':data_parameters_stats[parameter]['ub'], 'peakInfo_units':None, 'sample_names':sample_names_parameter, 'sample_types':sample_types_parameter, 'acqusition_date_and_times':acquisition_date_and_times, 'peakInfo_data':data_parameters[parameter], 'used_':True, 'comment_':None,}; data_add.append(row); self.add_rows_table('data_stage01_quantification_peakInformation',data_add);
def execute_analyzeAverages(self,experiment_id_I,sample_name_abbreviations_I=[],sample_names_I=[],component_names_I=[]): '''calculate the averages using the formula ave(broth),i - ave(filtrate),i NOTE: data_stage01_quantification_normalized must be populated Input: experiment_id_I sample_name_abbreviations_I sample_names_I component_names_I Output: sample_name_abbreviation component_group_name component_name concentration average concentration CV concentration units % extracellular ''' data_O=[]; calc = calculate_interface(); print('execute_analyzeAverages...') # get sample_name_abbreviations if sample_name_abbreviations_I: sample_name_abbreviations = sample_name_abbreviations_I else: sample_name_abbreviations = []; sample_name_abbreviations = self.get_sampleNameAbbreviations_experimentID_dataStage01Normalized(experiment_id_I); for sna in sample_name_abbreviations: print('analyzing averages for sample_name_abbreviation ' + sna); # get component names if component_names_I: component_names = component_names_I else: component_names = []; component_names = self.get_componentsNames_experimentIDAndSampleNameAbbreviation_dataStage01Normalized(experiment_id_I,sna); for cn in component_names: print('analyzing averages for component_name ' + cn); component_group_name = self.get_componentGroupName_experimentIDAndComponentName_dataStage01Normalized(experiment_id_I,cn); # get time points time_points = self.get_timePoint_experimentIDAndSampleNameAbbreviation_dataStage01Normalized(experiment_id_I,sna); if not time_points: continue; for tp in time_points: print('analyzing averages for time_point ' + tp); # get filtrate sample names sample_names = []; sample_description = 'Filtrate'; sample_names = self.get_sampleNames_experimentIDAndSampleNameAbbreviationAndSampleDescriptionAndComponentNameAndTimePoint_dataStage01Normalized(experiment_id_I,sna,sample_description,cn,tp); if sample_names_I: # screen out sample names that are not in the input sample_names = [x for x in sample_names if x in sample_names_I]; concs = []; conc_units = None; for sn in sample_names: # concentrations and units conc = None; conc_unit = None; conc, conc_unit = self.get_concAndConcUnits_sampleNameAndComponentName_dataStage01Normalized(sn,cn); if (not(conc) or conc==0): continue if (conc_unit): conc_units = conc_unit; concs.append(conc); n_replicates_filtrate = len(concs); conc_average_filtrate = 0.0; conc_var_filtrate = 0.0; conc_cv_filtrate = 0.0; # calculate average and CV of concentrations if (not(concs)): conc_average_filtrate = 0; conc_var_filtrate = 0; elif n_replicates_filtrate<2: conc_average_filtrate = concs[0]; conc_var_filtrate = 0; else: #conc_average_filtrate, conc_var_filtrate = calc.calculate_ave_var_R(concs); conc_average_filtrate = numpy.mean(numpy.array(concs)); conc_var_filtrate = numpy.var(numpy.array(concs)); if (conc_average_filtrate <= 0): conc_cv_filtrate = 0; else: conc_cv_filtrate = sqrt(conc_var_filtrate)/conc_average_filtrate*100; # get broth sample names sample_names = []; sample_description = 'Broth'; sample_names = self.get_sampleNames_experimentIDAndSampleNameAbbreviationAndSampleDescriptionAndComponentNameAndTimePoint_dataStage01Normalized(experiment_id_I,sna,sample_description,cn,tp); if sample_names_I: # screen out sample names that are not in the input sample_names = [x for x in sample_names if x in sample_names_I]; concs = []; conc_units = None; for sn in sample_names: print('analyzing averages for sample_name ' + sn); # query concentrations and units conc = None; conc_unit = None; conc, conc_unit = self.get_concAndConcUnits_sampleNameAndComponentName_dataStage01Normalized(sn,cn); if (not(conc) or conc==0): continue if (conc_unit): conc_units = conc_unit; concs.append(conc); n_replicates = len(concs); conc_average_broth = 0.0; conc_var_broth = 0.0; conc_cv_broth = 0.0; # calculate average and CV of concentrations if (not(concs)): continue elif n_replicates<2: continue else: #conc_average_broth, conc_var_broth = calc.calculate_ave_var_R(concs); conc_average_broth = numpy.mean(numpy.array(concs)); conc_var_broth = numpy.var(numpy.array(concs)); if (conc_average_broth <= 0): conc_cv_broth = 0; else: conc_cv_broth = sqrt(conc_var_broth)/conc_average_broth*100; # calculate average and CV conc_average = 0.0; conc_var = 0.0; conc_cv = 0.0; conc_average = conc_average_broth-conc_average_filtrate; if (conc_average < 0): conc_average = 0; conc_var = conc_var_broth + conc_var_filtrate; if (conc_average <= 0): conc_cv = 0; else: conc_cv = sqrt(conc_var)/conc_average*100; # calculate the % extracellular extracellular_percent = conc_average_filtrate/conc_average_broth*100; # add data to the session row = {}; row = {'experiment_id':experiment_id_I, 'sample_name_abbreviation':sna, 'time_point':tp, 'component_group_name':component_group_name, 'component_name':cn, 'n_replicates_broth':n_replicates, 'calculated_concentration_broth_average':conc_average_broth, 'calculated_concentration_broth_cv':conc_cv_broth, 'n_replicates_filtrate':n_replicates_filtrate, 'calculated_concentration_filtrate_average':conc_average_filtrate, 'calculated_concentration_filtrate_cv':conc_cv_filtrate, 'n_replicates':n_replicates, 'calculated_concentration_average':conc_average, 'calculated_concentration_cv':conc_cv, 'calculated_concentration_units':conc_units, 'extracellular_percent':extracellular_percent, 'used_':True,}; data_O.append(row) self.add_rows_table('data_stage01_quantification_averages',data_O);
def execute_analyzeAverages_blanks(self,experiment_id_I, sample_name_abbreviations_I=[], sample_names_I=[], component_names_I=[], blank_sample_names_I=[], blank_sample_name_abbreviations_I=[], ): '''calculate the averages using the ave(broth),i - ave(blank,broth) NOTE: data_stage01_quantification_normalized must be populated Input: experiment_id_I sample_name_abbreviations_I sample_names_I component_names_I blank_sample_names_I = []; if specified, specific blank samples will be used as the filtrate instead of filtrate samples Output: sample_name_abbreviation component_group_name component_name concentration average concentration CV concentration units % extracellular ''' data_O=[]; calc = calculate_interface(); print('execute_analyzeAverages...') #SPLIT 1: #1 query unique calculated_concentration_units/sample_name_abbreviations/component_names/component_group_names/time_points/sample_names/sample_ids/sample_description uniqueRows_all = self.getQueryResult_groupNormalizedAveragesSamples_experimentID_dataStage01QuantificationNormalizedAndAverages_limsSampleAndSampleID( experiment_id_I ); #2 filter in broth samples uniqueRows = self.filter_groupNormalizedAveragesSamples_experimentID_dataStage01QuantificationNormalizedAndAverages_limsSampleAndSampleID( uniqueRows_all, calculated_concentration_units_I=[], component_names_I=component_names_I, component_group_names_I=[], sample_names_I=sample_names_I, sample_name_abbreviations_I=sample_name_abbreviations_I, time_points_I=[], ); if type(uniqueRows)==type(listDict()): uniqueRows.convert_dataFrame2ListDict() uniqueRows = uniqueRows.get_listDict(); data_tmp = {};#reorganize the data into a dictionary for quick traversal of the replicates for uniqueRow_cnt,uniqueRow in enumerate(uniqueRows): unique = (uniqueRow['sample_name_abbreviation'], uniqueRow['experiment_id'], uniqueRow['time_point'], uniqueRow['component_name'], uniqueRow['calculated_concentration_units']) if not unique in data_tmp.keys(): data_tmp[unique] = []; data_tmp[unique].append(uniqueRow); #3 filter in blank samples uniqueBlanks=[]; if blank_sample_names_I or blank_sample_name_abbreviations_I: uniqueBlanks = self.filter_groupNormalizedAveragesSamples_experimentID_dataStage01QuantificationNormalizedAndAverages_limsSampleAndSampleID( uniqueRows_all, calculated_concentration_units_I=[], component_names_I=component_names_I, component_group_names_I=[], sample_names_I=blank_sample_names_I, sample_name_abbreviations_I=blank_sample_name_abbreviations_I, time_points_I=[], ); if type(uniqueBlanks)==type(listDict()): uniqueBlanks.convert_dataFrame2ListDict() uniqueBlanks = uniqueBlanks.get_listDict(); data_blanks_tmp = {}; #reorganize the data for a quick traversal of the components for uniqueBlanks_cnt,uniqueBlank in enumerate(uniqueBlanks): unique = uniqueBlank['component_name'] if not unique in data_tmp.keys(): data_blanks_tmp[unique] = []; data_blanks_tmp[unique].append(uniqueBlank); #4 iterate through each unique unique calculated_concentration_units/sample_name_abbreviations/component_names/component_group_names/time_points # and determine the ave, cv, etc., after subtracting out the blanks for unique,replicates in data_tmp.items(): print('analyzing averages for sample_name_abbreviation ' + replicates[0]['sample_name_abbreviation'] + ' and component_name ' + replicates[0]['component_name']); # get blank concentrations if data_blanks_tmp and replicates[0]['component_name'] in data_blanks_tmp.keys(): concs = [d['calculated_concentration'] for d in data_blanks_tmp[replicates[0]['component_name']] if not d['calculated_concentration'] is None and d['calculated_concentration']!=0]; conc_units = [d['calculated_concentration_units'] for d in data_blanks_tmp[replicates[0]['component_name']] if not d['calculated_concentration'] is None and d['calculated_concentration']!=0]; if conc_units: conc_units = conc_units[0]; else: conc_units = None; else: concs = []; conc_units = None; #if blank_sample_names_I: # sample_names = blank_sample_names_I; #else: # sample_names = []; #concs = []; #conc_units = None; #for sn in sample_names: # # concentrations and units # conc = None; # conc_unit = None; # conc, conc_unit = self.get_concAndConcUnits_sampleNameAndComponentName_dataStage01Normalized( # sn, # replicates[0]['component_name']); # if (not(conc) or conc==0): continue # if (conc_unit): conc_units = conc_unit; # concs.append(conc); n_replicates_filtrate = len(concs); conc_average_filtrate = 0.0; conc_var_filtrate = 0.0; conc_cv_filtrate = 0.0; # calculate average and CV of concentrations if (not(concs)): conc_average_filtrate = 0; conc_var_filtrate = 0; elif n_replicates_filtrate<2: conc_average_filtrate = concs[0]; conc_var_filtrate = 0; else: #conc_average_filtrate, conc_var_filtrate = calc.calculate_ave_var_R(concs); conc_average_filtrate = numpy.mean(numpy.array(concs)); conc_var_filtrate = numpy.var(numpy.array(concs)); if (conc_average_filtrate <= 0): conc_cv_filtrate = 0; else: conc_cv_filtrate = sqrt(conc_var_filtrate)/conc_average_filtrate*100; # get broth sample names concs = [d['calculated_concentration'] for d in replicates if d['sample_desc']=='Broth' and not d['calculated_concentration'] is None and d['calculated_concentration']!=0]; conc_units = [d['calculated_concentration_units'] for d in replicates if d['sample_desc']=='Broth' and not d['calculated_concentration'] is None and d['calculated_concentration']!=0]; if conc_units: conc_units = conc_units[0]; else: conc_units = None; #concs = []; #conc_units = None; #sample_names = [d['sample_name'] for d in replicates if d['sample_desc']=='Broth']; #for sn in sample_names: # # query concentrations and units # conc = None; # conc_unit = None; # conc, conc_unit = self.get_concAndConcUnits_sampleNameAndComponentName_dataStage01Normalized( # sn, # replicates[0]['component_name']); # if (not(conc) or conc==0): continue # if (conc_unit): conc_units = conc_unit; # concs.append(conc); n_replicates = len(concs); conc_average_broth = 0.0; conc_var_broth = 0.0; conc_cv_broth = 0.0; # calculate average and CV of concentrations if (not(concs)): continue elif n_replicates<2: continue else: #conc_average_broth, conc_var_broth = calc.calculate_ave_var_R(concs); conc_average_broth = numpy.mean(numpy.array(concs)); conc_var_broth = numpy.var(numpy.array(concs)); if (conc_average_broth <= 0): conc_cv_broth = 0; else: conc_cv_broth = sqrt(conc_var_broth)/conc_average_broth*100; # calculate average and CV conc_average = 0.0; conc_var = 0.0; conc_cv = 0.0; conc_average = conc_average_broth-conc_average_filtrate; if (conc_average < 0): conc_average = 0; conc_var = conc_var_broth + conc_var_filtrate; if (conc_average <= 0): conc_cv = 0; else: conc_cv = sqrt(conc_var)/conc_average*100; # calculate the % extracellular extracellular_percent = conc_average_filtrate/conc_average_broth*100; # add data to the session row = {}; row = {'experiment_id':experiment_id_I, 'sample_name_abbreviation':replicates[0]['sample_name_abbreviation'], 'time_point':replicates[0]['time_point'], 'component_group_name':replicates[0]['component_group_name'], 'component_name':replicates[0]['component_name'], 'n_replicates_broth':n_replicates, 'calculated_concentration_broth_average':conc_average_broth, 'calculated_concentration_broth_cv':conc_cv_broth, 'n_replicates_filtrate':n_replicates_filtrate, 'calculated_concentration_filtrate_average':conc_average_filtrate, 'calculated_concentration_filtrate_cv':conc_cv_filtrate, 'n_replicates':n_replicates, 'calculated_concentration_average':conc_average, 'calculated_concentration_cv':conc_cv, 'calculated_concentration_units':conc_units, 'extracellular_percent':extracellular_percent, 'used_':True,}; data_O.append(row); ##SPLIT2 ## get sample_name_abbreviations #if sample_name_abbreviations_I: # sample_name_abbreviations = sample_name_abbreviations_I #else: # sample_name_abbreviations = []; # sample_name_abbreviations = self.get_sampleNameAbbreviations_experimentID_dataStage01Normalized(experiment_id_I); #for sna in sample_name_abbreviations: # print('analyzing averages for sample_name_abbreviation ' + sna); # # get component names # if component_names_I: # component_names = component_names_I # else: # component_names = []; # component_names = self.get_componentsNames_experimentIDAndSampleNameAbbreviation_dataStage01Normalized(experiment_id_I,sna); # for cn in component_names: # print('analyzing averages for component_name ' + cn); # component_group_name = self.get_componentGroupName_experimentIDAndComponentName_dataStage01Normalized(experiment_id_I,cn); # # get time points # time_points = self.get_timePoint_experimentIDAndSampleNameAbbreviation_dataStage01Normalized(experiment_id_I,sna); # if not time_points: continue; # for tp in time_points: # print('analyzing averages for time_point ' + tp); # # get blank concentrations # if blank_sample_names_I: # sample_names = blank_sample_names_I; # else: # sample_names = []; # concs = []; # conc_units = None; # for sn in sample_names: # # concentrations and units # conc = None; # conc_unit = None; # conc, conc_unit = self.get_concAndConcUnits_sampleNameAndComponentName_dataStage01Normalized(sn,cn); # if (not(conc) or conc==0): continue # if (conc_unit): conc_units = conc_unit; # concs.append(conc); # n_replicates_filtrate = len(concs); # conc_average_filtrate = 0.0; # conc_var_filtrate = 0.0; # conc_cv_filtrate = 0.0; # # calculate average and CV of concentrations # if (not(concs)): # conc_average_filtrate = 0; # conc_var_filtrate = 0; # elif n_replicates_filtrate<2: # conc_average_filtrate = concs[0]; # conc_var_filtrate = 0; # else: # #conc_average_filtrate, conc_var_filtrate = calc.calculate_ave_var_R(concs); # conc_average_filtrate = numpy.mean(numpy.array(concs)); # conc_var_filtrate = numpy.var(numpy.array(concs)); # if (conc_average_filtrate <= 0): conc_cv_filtrate = 0; # else: conc_cv_filtrate = sqrt(conc_var_filtrate)/conc_average_filtrate*100; # # get broth sample names # sample_names = []; # sample_description = 'Broth'; # sample_names = self.get_sampleNames_experimentIDAndSampleNameAbbreviationAndSampleDescriptionAndComponentNameAndTimePoint_dataStage01Normalized(experiment_id_I,sna,sample_description,cn,tp); # if sample_names_I: # screen out sample names that are not in the input # sample_names = [x for x in sample_names if x in sample_names_I]; # concs = []; # conc_units = None; # for sn in sample_names: # print('analyzing averages for sample_name ' + sn); # # query concentrations and units # conc = None; # conc_unit = None; # conc, conc_unit = self.get_concAndConcUnits_sampleNameAndComponentName_dataStage01Normalized(sn,cn); # if (not(conc) or conc==0): continue # if (conc_unit): conc_units = conc_unit; # concs.append(conc); # n_replicates = len(concs); # conc_average_broth = 0.0; # conc_var_broth = 0.0; # conc_cv_broth = 0.0; # # calculate average and CV of concentrations # if (not(concs)): # continue # elif n_replicates<2: # continue # else: # #conc_average_broth, conc_var_broth = calc.calculate_ave_var_R(concs); # conc_average_broth = numpy.mean(numpy.array(concs)); # conc_var_broth = numpy.var(numpy.array(concs)); # if (conc_average_broth <= 0): conc_cv_broth = 0; # else: conc_cv_broth = sqrt(conc_var_broth)/conc_average_broth*100; # # calculate average and CV # conc_average = 0.0; # conc_var = 0.0; # conc_cv = 0.0; # conc_average = conc_average_broth-conc_average_filtrate; # if (conc_average < 0): conc_average = 0; # conc_var = conc_var_broth + conc_var_filtrate; # if (conc_average <= 0): conc_cv = 0; # else: conc_cv = sqrt(conc_var)/conc_average*100; # # calculate the % extracellular # extracellular_percent = conc_average_filtrate/conc_average_broth*100; # # add data to the session # row = {}; # row = {'experiment_id':experiment_id_I, # 'sample_name_abbreviation':sna, # 'time_point':tp, # 'component_group_name':component_group_name, # 'component_name':cn, # 'n_replicates_broth':n_replicates, # 'calculated_concentration_broth_average':conc_average_broth, # 'calculated_concentration_broth_cv':conc_cv_broth, # 'n_replicates_filtrate':n_replicates_filtrate, # 'calculated_concentration_filtrate_average':conc_average_filtrate, # 'calculated_concentration_filtrate_cv':conc_cv_filtrate, # 'n_replicates':n_replicates, # 'calculated_concentration_average':conc_average, # 'calculated_concentration_cv':conc_cv, # 'calculated_concentration_units':conc_units, # 'extracellular_percent':extracellular_percent, # 'used_':True,}; # data_O.append(row); self.add_rows_table('data_stage01_quantification_averages',data_O);
def execute_calculateRatesAverages(self,experiment_id_I,sample_name_abbreviations_I=[],met_ids_I=[]): '''Calculate the average rates based on the rates of the replicates''' calc = calculate_interface(); data_O = []; #query sample_name abbreviations print('execute calcute rates averages...') if sample_name_abbreviations_I: sample_name_abbreviations = sample_name_abbreviations_I; else: sample_name_abbreviations = []; sample_name_abbreviations = self.get_sampleNameAbbreviations_experimentID_dataStage01PhysiologyRates(experiment_id_I,6); for sna in sample_name_abbreviations: print('calculating rates averages for sample_name_abbreviation ' + sna); #query met_ids if met_ids_I: met_ids = met_ids_I; else: met_ids = []; met_ids = self.get_metIDs_experimentIDAndSampleNameAbbreviation_dataStage01PhysiologyRates(experiment_id_I,6,sna) for met in met_ids: print('calculating rates averages for met_id ' +met); #query sample names sample_name_short = []; sample_name_short = self.get_sampleNameShort_experimentIDAndSampleNameAbbreviationAndMetID_dataStage01PhysiologyRates(experiment_id_I,6,sna,met) slopes, intercepts, rates, rates_units, std_errs = [],[],[],[],[]; for sns in sample_name_short: #query slope, intercept, and rate slope, intercept, r2, rate, rate_units, p_value, std_err = None,None,None,None,None,None,None; slope, intercept, r2, rate, rate_units, p_value, std_err = self.get_rateData_experimentIDAndSampleNameShortAndMetID_dataStage01PhysiologyRates(experiment_id_I,sns,met); if rate: slopes.append(slope); intercepts.append(intercept); rates.append(rate); rates_units.append(rate_units); std_errs.append(std_err); #calculate the average, variance, and 95% confidence intervals n = len(rates); slopes_ave, slopes_var, slopes_lb, slopes_ub = None,None,None,None; intercepts_ave, intercepts_var, intercepts_lb = None,None,None; rates_ave, rates_var, rates_lb, rates_ub = None, None, None, None; if not None in slopes: slopes_ave, slopes_var, slopes_lb, slopes_ub = calc.calculate_ave_var(slopes); if not None in intercepts: intercepts_ave, intercepts_var, intercepts_lb, intercepts_ub = calc.calculate_ave_var(intercepts); if not None in rates: rates_ave, rates_var, rates_lb, rates_ub = calc.calculate_ave_var(rates); #add rows to the data base row = {}; row = {'experiment_id':experiment_id_I, 'sample_name_abbreviation':sna, 'met_id':met, 'n':n, 'slope_average':slopes_ave, 'intercept_average':intercepts_ave, 'rate_average':rates_ave, 'rate_var':rates_var, 'rate_lb':rates_lb, 'rate_ub':rates_ub, 'rate_units':rates_units[0], 'used_':True, 'comment_':None,}; data_O.append(row); #add data to the DB self.add_rows_table('data_stage01_physiology_ratesAverages',data_O);
def calculate_coverageStats_fromGff(self,gff_file, strand_start,strand_stop,scale_factor=True,downsample_factor=2000, experiment_id_I=None, sample_name_I=None): """extract coverage (genome position and reads) from .gff INPUT: strand_start = index of the start position strand_stop = index of the stop position scale_factor = boolean, if true, reads will be normalized to have 100 max downsample_factor = integer, factor to downsample the points to OPTION INPUT: experiment_id_I = tag for the experiment from which the sample came sample_name_I = tag for the sample name """ calculate = calculate_interface(); self.set_gffFile(gff_file); filename = self.gff_file; experiment_id = experiment_id_I; sn = sample_name_I; # parse the gff file into pandas dataframes self.extract_strandsFromGff(strand_start, strand_stop, scale=scale_factor, downsample=downsample_factor) # split into seperate data structures based on the destined table add coverageStats_data = []; # plus strand # calculate using scipy data_ave_O, data_var_O, data_lb_O, data_ub_O = None, None, None, None; data_ave_O, data_var_O, data_lb_O, data_ub_O = calculate.calculate_ave_var(self.plus.values,confidence_I = 0.95); # calculate the interquartile range min_O, max_O, median_O, iq_1_O, iq_3_O = None, None, None, None, None; min_O, max_O, median_O, iq_1_O, iq_3_O=calculate.calculate_interquartiles(self.plus.values); # record data coverageStats_data.append({ #'analysis_id':analysis_id, 'experiment_id':experiment_id, 'sample_name':sn, 'genome_chromosome':1, 'genome_strand':'plus', 'strand_start':strand_start, 'strand_stop':strand_stop, 'reads_min':int(min_O), 'reads_max':int(max_O), 'reads_lb':data_lb_O, 'reads_ub':data_ub_O, 'reads_iq1':iq_1_O, 'reads_iq3':iq_3_O, 'reads_median':median_O, 'reads_mean':data_ave_O, 'reads_var':data_var_O, 'reads_n':len(self.plus.values), 'used_':True, 'comment_':None}); # minus strand # calculate using scipy data_ave_O, data_var_O, data_lb_O, data_ub_O = None, None, None, None; data_ave_O, data_var_O, data_lb_O, data_ub_O = calculate.calculate_ave_var(self.minus.values,confidence_I = 0.95); # calculate the interquartile range min_O, max_O, median_O, iq_1_O, iq_3_O = None, None, None, None, None; min_O, max_O, median_O, iq_1_O, iq_3_O=calculate.calculate_interquartiles(self.minus.values); # record data coverageStats_data.append({ #'analysis_id':analysis_id, 'experiment_id':experiment_id, 'sample_name':sn, 'genome_chromosome':1, 'genome_strand':'minus', 'strand_start':strand_start, 'strand_stop':strand_stop, 'reads_min':int(min_O), 'reads_max':int(max_O), 'reads_lb':data_lb_O, 'reads_ub':data_ub_O, 'reads_iq1':iq_1_O, 'reads_iq3':iq_3_O, 'reads_median':median_O, 'reads_mean':data_ave_O, 'reads_var':data_var_O, 'reads_n':len(self.minus.values), 'used_':True, 'comment_':None}); # record the data self.coverageStats = coverageStats_data;
def execute_calculateYield(self,experiment_id_I,sample_name_short_I=[],uptake_mets_I=[]): '''Calculate the yield from the growth rate and the uptake rates''' calc = calculate_interface(); #query sample names print('executing calculating yield...') if sample_name_short_I: sample_name_short = sample_name_short_I; else: sample_name_short = []; sample_name_short = self.get_sampleNameShort_experimentID_dataStage01PhysiologyRates(experiment_id_I) for sns in sample_name_short: print('calculating yield for sample_name_short ' + sns); #query met_ids met_ids = []; met_ids = self.get_metIDs_experimentIDAndSampleNameShort_dataStage01PhysiologyRates(experiment_id_I,sns); # check for biomass if 'biomass' not in met_ids: print('no growth rate found!'); continue; # get the biomass physiological rates slope_biomass, intercept_biomass, r2_biomass, rate_biomass, units_biomass, p_value_biomass, std_err_biomass = None,None,None,None,None,None,None; slope_biomass, intercept_biomass, r2_biomass, rate_biomass, units_biomass, p_value_biomass, std_err_biomass = self.get_rateData_experimentIDAndSampleNameShortAndMetID_dataStage01PhysiologyRates(experiment_id_I,sns,'biomass'); # check for uptake metabolites and get the uptake metabolite rates uptake_rates = []; uptake_units = []; if uptake_mets_I: met_ids_nobiomass = []; for umet in uptake_mets_I: if umet in met_ids: met_ids_nobiomass.append(umet); else: print('met_id ' + umet + ' was not found!'); for umet in met_ids_nobiomass: slope_umet, intercept_umet, r2_umet, rate_umet, units_umet, p_value_umet, std_err_umet = None,None,None,None,None,None,None; slope_umet, intercept_umet, r2_umet, rate_umet, units_umet, p_value_umet, std_err_umet = self.get_rateData_experimentIDAndSampleNameShortAndMetID_dataStage01PhysiologyRates(experiment_id_I,sns,umet); if rate_umet < 0.0: uptake_rates.append(abs(rate_umet)); else: met_ids_nobiomass = [x for x in met_ids if x != 'biomass']; for umet in met_ids_nobiomass: slope_umet, intercept_umet, r2_umet, rate_umet, units_umet, p_value_umet, std_err_umet = None,None,None,None,None,None,None; slope_umet, intercept_umet, r2_umet, rate_umet, units_umet, p_value_umet, std_err_umet = self.get_rateData_experimentIDAndSampleNameShortAndMetID_dataStage01PhysiologyRates(experiment_id_I,sns,umet); if rate_umet < 0.0: uptake_rates.append(abs(rate_umet)); uptake_units.append(units_umet); if not uptake_rates: print('no uptake metabolites found!'); continue; # calculate the yield yield_ss = None; yield_ss_units = None; yield_ss,yield_ss_units = calc.calculate_yield_growthRateAndUptakeRates(rate_biomass,uptake_rates); yield_ss_units = 'gDCW*mmol-1 of glc-D'; # hard-coded value that needs to be updated #add rows to the data base row = None; row = data_stage01_physiology_rates(experiment_id_I, sns, 'yield_ss', None, None, None, yield_ss, yield_ss_units, None, None, True, None); self.session.add(row); self.session.commit();
def export_dataStage01NormalizedAndAverages_checkCVAndExtracelluar_js(self,experiment_id_I,sample_name_abbreviations_I=[],sample_names_I=[],component_names_I=[], cv_threshold_I=40,extracellular_threshold_I=80, data_dir_I='tmp'): '''export data_stage01_quantification_normalized and averages for visualization with ddt''' calc = calculate_interface(); print('export_dataStage01Normalized_js...') data_norm_broth = []; data_norm_filtrate = []; data_norm_combined = []; data_ave = []; # get sample_name_abbreviations if sample_name_abbreviations_I: sample_name_abbreviations = sample_name_abbreviations_I else: sample_name_abbreviations = []; sample_name_abbreviations = self.get_sampleNameAbbreviations_experimentID_dataStage01Normalized(experiment_id_I); # create database table for sna in sample_name_abbreviations: print('exporting sample_name_abbreviation ' + sna); # get component names if component_names_I: component_names = component_names_I else: component_names = []; component_names = self.get_componentsNames_experimentIDAndSampleNameAbbreviation_dataStage01Normalized(experiment_id_I,sna); for cn in component_names: print('exporting component_name ' + cn); component_group_name = self.get_componentGroupName_experimentIDAndComponentName_dataStage01Normalized(experiment_id_I,cn); # get time points time_points = self.get_timePoint_experimentIDAndSampleNameAbbreviation_dataStage01Normalized(experiment_id_I,sna); for tp in time_points: print('exporting time_point ' + tp); # get the averages and %CV samples row = {}; #row = self.get_row_experimentIDAndSampleNameAbbreviationAndTimePointAndComponentName_dataStage01Averages(experiment_id_I,sna,tp,cn); row = self.get_row_experimentIDAndSampleNameAbbreviationAndTimePointAndComponentNameAndCalculatedConcentrationCVAndExtracellularPercent_dataStage01Averages(experiment_id_I, sna,tp,cn, cv_threshold_I=cv_threshold_I, extracellular_threshold_I=extracellular_threshold_I); if not row: continue; stdev = calc.convert_cv2StDev(row['calculated_concentration_filtrate_average'],row['calculated_concentration_filtrate_cv']); row['calculated_concentration_filtrate_lb'] = row['calculated_concentration_filtrate_average']-stdev; row['calculated_concentration_filtrate_ub'] = row['calculated_concentration_filtrate_average']+stdev; stdev = calc.convert_cv2StDev(row['calculated_concentration_broth_average'],row['calculated_concentration_broth_cv']); row['calculated_concentration_broth_lb'] = row['calculated_concentration_broth_average']-stdev; row['calculated_concentration_broth_ub'] = row['calculated_concentration_broth_average']+stdev; stdev = calc.convert_cv2StDev(row['calculated_concentration_average'],row['calculated_concentration_cv']); row['calculated_concentration_lb'] = row['calculated_concentration_average']-stdev; row['calculated_concentration_ub'] = row['calculated_concentration_average']+stdev; data_ave.append(row); # get filtrate sample names sample_names = []; sample_description = 'Filtrate'; sample_names = self.get_sampleNames_experimentIDAndSampleNameAbbreviationAndSampleDescriptionAndComponentNameAndTimePoint_dataStage01Normalized(experiment_id_I,sna,sample_description,cn,tp); if sample_names_I: # screen out sample names that are not in the input sample_names = [x for x in sample_names if x in sample_names_I]; for sn in sample_names: # get the row row = None; row = self.get_row_sampleNameAndComponentName_dataStage01Normalized(sn,cn); if not(row): continue; row['sample_name_abbreviation'] = sna; data_norm_filtrate.append(row); data_norm_combined.append(row); # get filtrate sample names sample_names = []; sample_description = 'Broth'; sample_names = self.get_sampleNames_experimentIDAndSampleNameAbbreviationAndSampleDescriptionAndComponentNameAndTimePoint_dataStage01Normalized(experiment_id_I,sna,sample_description,cn,tp); if sample_names_I: # screen out sample names that are not in the input sample_names = [x for x in sample_names if x in sample_names_I]; for sn in sample_names: # get the row row = None; row = self.get_row_sampleNameAndComponentName_dataStage01Normalized(sn,cn); if not(row): continue; row['sample_name_abbreviation'] = sna; data_norm_broth.append(row); data_norm_combined.append(row); # dump chart parameters to a js files data1_keys = ['experiment_id', 'sample_name', 'sample_id', 'sample_name_abbreviation', 'component_group_name', 'component_name', 'calculated_concentration_units' ]; data1_nestkeys = ['component_name']; data1_keymap = {'xdata':'component_name', 'ydatamean':'calculated_concentration', #'ydatalb':'peakInfo_lb', #'ydataub':'peakInfo_ub', #'ydatamin':None, #'ydatamax':None, #'ydataiq1':None, #'ydataiq3':None, #'ydatamedian':None, 'serieslabel':'sample_name', 'featureslabel':'component_name'}; data2_keys = ['experiment_id', 'sample_name_abbreviation', 'time_point', 'component_group_name', 'component_name', 'calculated_concentration_units', 'extracellular_percent', 'calculated_concentration_broth_cv' ]; data2_nestkeys = ['component_name']; data2_keymap = {'xdata':'component_name', 'ydatamean':'calculated_concentration_broth_average', 'ydatalb':'calculated_concentration_broth_lb', 'ydataub':'calculated_concentration_broth_ub', #'ydatamin':None, #'ydatamax':None, #'ydataiq1':None, #'ydataiq3':None, #'ydatamedian':None, 'serieslabel':'sample_name_abbreviation', 'featureslabel':'component_name'}; data3_keys = ['experiment_id', 'sample_name_abbreviation', 'time_point', 'component_group_name', 'component_name', 'calculated_concentration_units', 'extracellular_percent', 'calculated_concentration_filtrate_cv' ]; data3_nestkeys = ['component_name']; data3_keymap = {'xdata':'component_name', 'ydatamean':'calculated_concentration_filtrate_average', 'ydatalb':'calculated_concentration_filtrate_lb', 'ydataub':'calculated_concentration_filtrate_ub', #'ydatamin':None, #'ydatamax':None, #'ydataiq1':None, #'ydataiq3':None, #'ydatamedian':None, 'serieslabel':'sample_name_abbreviation', 'featureslabel':'component_name'}; data4_keys = ['experiment_id', 'sample_name_abbreviation', 'time_point', 'component_group_name', 'component_name', 'calculated_concentration_units', 'extracellular_percent', 'calculated_concentration_cv' ]; data4_nestkeys = ['component_name']; data4_keymap = {'xdata':'component_name', 'ydatamean':'calculated_concentration_average', 'ydatalb':'calculated_concentration_lb', 'ydataub':'calculated_concentration_ub', #'ydatamin':None, #'ydatamax':None, #'ydataiq1':None, #'ydataiq3':None, #'ydatamedian':None, 'serieslabel':'sample_name_abbreviation', 'featureslabel':'component_name'}; # make the data object dataobject_O = [{"data":data_norm_broth,"datakeys":data1_keys,"datanestkeys":data1_nestkeys}, {"data":data_norm_filtrate,"datakeys":data1_keys,"datanestkeys":data1_nestkeys}, {"data":data_norm_combined,"datakeys":data1_keys,"datanestkeys":data1_nestkeys}, {"data":data_ave,"datakeys":data2_keys,"datanestkeys":data2_nestkeys}, {"data":data_ave,"datakeys":data3_keys,"datanestkeys":data3_nestkeys}, {"data":data_ave,"datakeys":data4_keys,"datanestkeys":data4_nestkeys}]; # make the tile parameter objects for the normalized and averages formtileparameters_normalized_O = {'tileheader':'Filter menu normalized','tiletype':'html','tileid':"filtermenu1",'rowid':"row1",'colid':"col1", 'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-6"}; formparameters_normalized_O = {'htmlid':'filtermenuform1',"htmltype":'form_01',"formsubmitbuttonidtext":{'id':'submit1','text':'submit'},"formresetbuttonidtext":{'id':'reset1','text':'reset'},"formupdatebuttonidtext":{'id':'update1','text':'update'}}; formtileparameters_normalized_O.update(formparameters_normalized_O); formtileparameters_averages_O = {'tileheader':'Filter menu averages','tiletype':'html','tileid':"filtermenu2",'rowid':"row1",'colid':"col2", 'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-6"}; formparameters_averages_O = {'htmlid':'filtermenuform2',"htmltype":'form_01',"formsubmitbuttonidtext":{'id':'submit2','text':'submit'},"formresetbuttonidtext":{'id':'reset2','text':'reset'},"formupdatebuttonidtext":{'id':'update2','text':'update'}}; formtileparameters_averages_O.update(formparameters_averages_O); # make the svg objects for the normalized data svgparameters_broth_O = {"svgtype":'boxandwhiskersplot2d_02',"svgkeymap":[data1_keymap], 'svgid':'svg1', "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 }, "svgwidth":250,"svgheight":250, "svgx1axislabel":"component_name","svgy1axislabel":"concentration", 'svgformtileid':'filtermenu1','svgresetbuttonid':'reset1','svgsubmitbuttonid':'submit1'}; svgtileparameters_broth_O = {'tileheader':'Broth data','tiletype':'svg','tileid':"tile1",'rowid':"row2",'colid':"col1", 'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"}; svgtileparameters_broth_O.update(svgparameters_broth_O); svgparameters_filtrate_O = {"svgtype":'boxandwhiskersplot2d_02',"svgkeymap":[data1_keymap], 'svgid':'svg2', "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 }, "svgwidth":250,"svgheight":250, "svgx1axislabel":"component_name","svgy1axislabel":"concentration", 'svgformtileid':'filtermenu1','svgresetbuttonid':'reset1','svgsubmitbuttonid':'submit1'}; svgtileparameters_filtrate_O = {'tileheader':'Filtrate data','tiletype':'svg','tileid':"tile2",'rowid':"row2",'colid':"col2", 'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"}; svgtileparameters_filtrate_O.update(svgparameters_filtrate_O); svgparameters_combined_O = {"svgtype":'boxandwhiskersplot2d_02',"svgkeymap":[data1_keymap], 'svgid':'svg3', "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 }, "svgwidth":250,"svgheight":250, "svgx1axislabel":"component_name","svgy1axislabel":"concentration", 'svgformtileid':'filtermenu1','svgresetbuttonid':'reset1','svgsubmitbuttonid':'submit1'}; svgtileparameters_combined_O = {'tileheader':'Broth-Filtrate data','tiletype':'svg','tileid':"tile3",'rowid':"row2",'colid':"col3", 'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"}; svgtileparameters_combined_O.update(svgparameters_combined_O); # make the svg objects for the averages data svgparameters_averages_broth_O = {"svgtype":'boxandwhiskersplot2d_02',"svgkeymap":[data2_keymap], 'svgid':'svg4', "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 }, "svgwidth":250,"svgheight":250, "svgx1axislabel":"component_name","svgy1axislabel":"concentration", 'svgformtileid':'filtermenu2','svgresetbuttonid':'reset2','svgsubmitbuttonid':'submit2'}; svgtileparameters_averages_broth_O = {'tileheader':'Broth data','tiletype':'svg','tileid':"tile4",'rowid':"row3",'colid':"col1", 'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"}; svgtileparameters_averages_broth_O.update(svgparameters_averages_broth_O); svgparameters_averages_filtrate_O = {"svgtype":'boxandwhiskersplot2d_02',"svgkeymap":[data3_keymap], 'svgid':'svg5', "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 }, "svgwidth":250,"svgheight":250, "svgx1axislabel":"component_name","svgy1axislabel":"concentration", 'svgformtileid':'filtermenu2','svgresetbuttonid':'reset2','svgsubmitbuttonid':'submit2'}; svgtileparameters_averages_filtrate_O = {'tileheader':'Filtrate data','tiletype':'svg','tileid':"tile5",'rowid':"row3",'colid':"col2", 'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"}; svgtileparameters_averages_filtrate_O.update(svgparameters_averages_filtrate_O); svgparameters_averages_combined_O = {"svgtype":'boxandwhiskersplot2d_02',"svgkeymap":[data4_keymap], 'svgid':'svg6', "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 }, "svgwidth":250,"svgheight":250, "svgx1axislabel":"component_name","svgy1axislabel":"concentration", 'svgformtileid':'filtermenu2','svgresetbuttonid':'reset2','svgsubmitbuttonid':'submit2'}; svgtileparameters_averages_combined_O = {'tileheader':'Broth-Filtrate data','tiletype':'svg','tileid':"tile6",'rowid':"row3",'colid':"col3", 'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"}; svgtileparameters_averages_combined_O.update(svgparameters_averages_combined_O); # make the tables for the normalized and averages data tableparameters_normalized_O = {"tabletype":'responsivetable_01', 'tableid':'table1', "tablefilters":None, "tableclass":"table table-condensed table-hover", 'tableformtileid':'filtermenu1','tableresetbuttonid':'reset1','tablesubmitbuttonid':'submit1'}; tabletileparameters_normalized_O = {'tileheader':'normalized data','tiletype':'table','tileid':"tile7",'rowid':"row4",'colid':"col1", 'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-12"}; tabletileparameters_normalized_O.update(tableparameters_normalized_O); tableparameters_averages_O = {"tabletype":'responsivetable_01', 'tableid':'table2', "tablefilters":None, "tableclass":"table table-condensed table-hover", 'tableformtileid':'filtermenu2','tableresetbuttonid':'reset2','tablesubmitbuttonid':'submit2'}; tabletileparameters_averages_O = {'tileheader':'averages data','tiletype':'table','tileid':"tile8",'rowid':"row5",'colid':"col1", 'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-12"}; tabletileparameters_averages_O.update(tableparameters_averages_O); parametersobject_O = [formtileparameters_normalized_O, formtileparameters_averages_O, svgtileparameters_broth_O, svgtileparameters_filtrate_O, svgtileparameters_combined_O, svgtileparameters_averages_broth_O, svgtileparameters_averages_filtrate_O, svgtileparameters_averages_combined_O, tabletileparameters_normalized_O, tabletileparameters_averages_O]; tile2datamap_O = {"filtermenu1":[2],"filtermenu2":[5], "tile1":[0],"tile2":[1],"tile3":[2], "tile4":[3],"tile5":[4],"tile6":[5], "tile7":[2],"tile8":[5]}; filtermenuobject_O = [{"filtermenuid":"filtermenu1","filtermenuhtmlid":"filtermenuform1", "filtermenusubmitbuttonid":"submit1","filtermenuresetbuttonid":"reset1", "filtermenuupdatebuttonid":"update1"},{"filtermenuid":"filtermenu2","filtermenuhtmlid":"filtermenuform2", "filtermenusubmitbuttonid":"submit2","filtermenuresetbuttonid":"reset2", "filtermenuupdatebuttonid":"update2"} ]; # dump the data to a json file data_str = 'var ' + 'data' + ' = ' + json.dumps(dataobject_O) + ';'; parameters_str = 'var ' + 'parameters' + ' = ' + json.dumps(parametersobject_O) + ';'; tile2datamap_str = 'var ' + 'tile2datamap' + ' = ' + json.dumps(tile2datamap_O) + ';'; filtermenu_str = 'var ' + 'filtermenu' + ' = ' + json.dumps(filtermenuobject_O) + ';'; # ddtutilities = ddt_container(parameters_I = parametersobject_O,data_I = dataobject_O,tile2datamap_I = tile2datamap_O,filtermenu_I = filtermenuobject_O); if data_dir_I=='tmp': filename_str = self.settings['visualization_data'] + '/tmp/ddt_data.js' elif data_dir_I=='data_json': data_json_O = ddtutilities.get_allObjects_js(); return data_json_O; with open(filename_str,'w') as file: file.write(ddtutilities.get_allObjects());
def execute_calculateGeoAverages_replicates_v1( self, experiment_id_I, sample_name_abbreviations_I=[], ): '''Calculate the averages from replicates MI in ln space''' calc = calculate_interface(); print(' execute_calculateGeoAverages_replicates...') data_O = []; # get sample_name_abbreviations if sample_name_abbreviations_I: sample_name_abbreviations = sample_name_abbreviations_I; else: sample_name_abbreviations = []; sample_name_abbreviations = self.get_sampleNameAbbreviations_experimentID_dataStage01ReplicatesMI(experiment_id_I); for sna in sample_name_abbreviations: print('calculating the geometric average from replicates for sample_name_abbreviation ' + sna); # get component names component_names = []; component_names = self.get_componentNames_experimentIDAndSampleNameAbbreviation_dataStage01ReplicatesMI(experiment_id_I,sna); # get time points time_points = self.get_timePoint_experimentIDAndSampleNameAbbreviation_dataStage01ReplicatesMI(experiment_id_I,sna); for cn in component_names: print('calculating the geometric average from replicates for component_names ' + cn); component_group_name = self.get_componentGroupName_experimentIDAndComponentName_dataStage01Normalized(experiment_id_I,cn); for tp in time_points: print('calculating the geometric average from replicates for time_points ' + tp); # get sample names short sample_names_short = []; sample_names_short = self.get_sampleNameShort_experimentIDAndSampleNameAbbreviationAndComponentNameAndTimePoint_dataStage01ReplicatesMI(experiment_id_I,sna,cn,tp); concs = []; conc_units = None; for sns in sample_names_short: # concentrations and units conc = None; conc_unit = None; conc, conc_unit = self.get_concAndConcUnits_experimentIDAndSampleNameShortAndTimePointAndComponentName_dataStage01ReplicatesMI(experiment_id_I,sns,tp,cn); if (not(conc) or conc==0): continue; # calculate the ln of the concentration # and convert to M from mM or uM if (conc_unit == 'mM'): conc_units = 'M'; conc = conc*1e-3; elif (conc_unit == 'uM'): conc_units = 'M'; conc = conc*1e-6; elif (conc_unit == 'uM'): conc_units = 'M'; conc = conc*1e-6; elif (conc_unit == 'umol*gDW-1'): conc_units = 'mol*gDW-1'; conc = conc*1e-6; elif (conc_unit == 'height_ratio' or conc_unit == 'area_ratio'): continue; else: print('units of ' + str(conc_unit) + ' are not supported') exit(-1); concs.append(conc); n_replicates = len(concs); conc_average = 0.0; conc_var = 0.0; conc_lb = 0.0; conc_ub = 0.0; # calculate average and CV of concentrations if (not(concs)): continue elif n_replicates<2: continue else: conc_average, conc_var, conc_lb, conc_ub = calc.calculate_ave_var_geometric(concs); # add data to the session row = {"experiment_id":experiment_id_I, "sample_name_abbreviation":sna, "time_point":tp, "component_group_name":component_group_name, "component_name":cn, "n_replicates":n_replicates, "calculated_concentration_average":conc_average, "calculated_concentration_var":conc_var, "calculated_concentration_lb":conc_lb, "calculated_concentration_ub":conc_ub, "calculated_concentration_units":conc_units, "used_":True }; data_O.append(row); self.add_rows_table('data_stage01_quantification_averagesMIgeo',data_O)
def execute_analyzeQCs(self,experiment_id_I,sample_types_I=['QC']): '''calculate the average and coefficient of variation for QCs NOTE: analytical replicates are those samples with the same sample_id (but different sample_name) INPUT: experiment_id OUTPUT: sample_name component_group_name component_name n_replicates conc_average conc_CV conc_units ''' calc = calculate_interface(); print('execute_analyzeQCs...') # get sample name abbreviations sample_name_abbreviations = []; data_O = []; for st in sample_types_I: sample_name_abbreviations_tmp = []; sample_name_abbreviations_tmp = self.get_sampleNameAbbreviations_experimentIDAndSampleType(experiment_id_I,st); sample_name_abbreviations.extend(sample_name_abbreviations_tmp); # create database table for sna in sample_name_abbreviations: # get dilutions sample_dilutions = []; sample_dilutions = self.get_sampleDilution_experimentIDAndSampleNameAbbreviation(experiment_id_I,sna); # get component names component_names = []; component_names = self.get_componentsNames_experimentIDAndSampleNameAbbreviation(experiment_id_I,sna); for cn in component_names: component_group_name = self.get_componentGroupName_experimentIDAndComponentName(experiment_id_I,cn); for sd in sample_dilutions: # get sample names sample_names = []; sample_names = self.get_sampleNames_experimentIDAndSampleNameAbbreviationAndSampleDilution(experiment_id_I,sna,sd); if len(sample_names)<2: continue; concs = []; conc_units = None; for sn in sample_names: # concentrations and units conc = None; conc_unit = None; conc, conc_unit = self.get_concAndConcUnits_sampleNameAndComponentName(sn,cn); if not(conc): continue if (conc_unit): conc_units = conc_unit; concs.append(conc); n_replicates = len(concs); # calculate average and CV of concentrations if (not(concs) or n_replicates<2): continue conc_average, data_var_O, conc_CV, data_lb_O, data_ub_O = calc.calculate_ave_var_cv(concs); data_O.append({'experiment_id':experiment_id_I, 'sample_name_abbreviation':sna, 'sample_dilution':sd, 'component_group_name':component_group_name, 'component_name':cn, 'n_replicates':n_replicates, 'calculated_concentration_average':conc_average, 'calculated_concentration_CV':conc_CV, 'calculated_concentration_units':conc_units}); self.add_dataStage01_quantification_QCs(data_O);
def execute_physiologicalRatios_averages(self,experiment_id_I): '''Calculate physiologicalRatios_averages from physiologicalRatios_replicates''' calc = calculate_interface(); print('calculate_physiologicalRatios_averages...') data_O = []; # get sample_name_abbreviations sample_name_abbreviations = []; sample_name_abbreviations = self.get_sampleNameAbbreviations_experimentID_dataStage01PhysiologicalRatiosReplicates(experiment_id_I); for sna in sample_name_abbreviations: print('calculating physiologicalRatios from replicates for sample_name_abbreviation ' + sna); # get time points time_points = []; time_points = self.get_timePoint_experimentIDAndSampleNameAbbreviation_dataStage01PhysiologicalRatiosReplicates(experiment_id_I,sna); for tp in time_points: print('calculating physiologicalRatios from replicates for time_point ' + tp); # get ratio information ratio_info = {}; ratio_info = self.get_ratioIDs_experimentIDAndTimePoint_dataStage01PhysiologicalRatiosReplicates(experiment_id_I,tp) #for k,v in self.ratios.iteritems(): for k,v in ratio_info.items(): print('calculating physiologicalRatios from replicates for ratio ' + k); # get sample names short sample_names_short = []; sample_names_short = self.get_sampleNameShort_experimentIDAndSampleNameAbbreviationAndRatioIDAndTimePoint_dataStage01PhysiologicalRatiosReplicates(experiment_id_I,sna,k,tp); ratios = []; for sns in sample_names_short: # get ratios ratio = None; ratio = self.get_ratio_experimentIDAndSampleNameShortAndTimePointAndRatioID_dataStage01PhysiologicalRatiosReplicates(experiment_id_I,sns,tp,k); if not ratio: continue; ratios.append(ratio); n_replicates = len(ratios); ratio_average = 0.0; ratio_var = 0.0; ratio_cv = 0.0; ratio_lb = 0.0; ratio_ub = 0.0; # calculate average and CV of ratios if (not(ratios)): continue elif n_replicates<2: continue else: ratio_average,ratio_var,ratio_lb,ratio_ub = calc.calculate_ave_var(ratios); if (ratio_average <= 0): ratio_cv = 0; else: ratio_cv = sqrt(ratio_var)/ratio_average*100; # add data to the session row = { "experiment_id":experiment_id_I, "sample_name_abbreviation":sna, "time_point":tp, "physiologicalratio_id":k, "physiologicalratio_name":v['name'], "physiologicalratio_value_ave":ratio_average, "physiologicalratio_value_cv":ratio_cv, "physiologicalratio_value_lb":ratio_lb, "physiologicalratio_value_ub":ratio_ub, "physiologicalratio_description":v['description'], "used_":True, "comment_":None }; data_O.append(row); self.add_rows_table('data_stage01_quantification_physiologicalRatios_averages',data_O);